"Fossies" - the Fresh Open Source Software Archive

Member "cb2bib-2.0.1/xpdf/pdftohtml.cc" (12 Feb 2021, 5002 Bytes) of package /linux/privat/cb2bib-2.0.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "pdftohtml.cc" see the Fossies "Dox" file reference documentation.

    1 //========================================================================
    2 //
    3 // Modified pdftohtml (constans@molspaces.com, 2014)
    4 //
    5 // pdftohtml.cc
    6 //
    7 // Copyright 2005 Glyph & Cog, LLC
    8 //
    9 //========================================================================
   10 
   11 #include <aconf.h>
   12 #include <stdio.h>
   13 #include <stdlib.h>
   14 #include "parseargs.h"
   15 #include "gmem.h"
   16 #include "gfile.h"
   17 #include "GString.h"
   18 #include "GlobalParams.h"
   19 #include "PDFDoc.h"
   20 #include "HTMLGen.h"
   21 #include "Error.h"
   22 #include "ErrorCodes.h"
   23 #include "config.h"
   24 
   25 //------------------------------------------------------------------------
   26 
   27 static int firstPage = 1;
   28 static int lastPage = 0;
   29 static GBool skipInvisible = gFalse;
   30 static char ownerPassword[33] = "\001";
   31 static char userPassword[33] = "\001";
   32 static GBool quiet = gFalse;
   33 static char cfgFileName[256] = "";
   34 static GBool printVersion = gFalse;
   35 static GBool printHelp = gFalse;
   36 
   37 static ArgDesc argDesc[] =
   38 {
   39     {
   40         "-f", argInt, &firstPage, 0, "first page to convert"
   41     },
   42     {
   43         "-l", argInt, &lastPage, 0, "last page to convert"
   44     },
   45     {
   46         "-skipinvisible", argFlag, &skipInvisible, 0, "do not draw invisible text"
   47     },
   48     {
   49         "-opw", argString, ownerPassword, sizeof(ownerPassword), "owner password (for encrypted files)"
   50     },
   51     {
   52         "-upw", argString, userPassword, sizeof(userPassword), "user password (for encrypted files)"
   53     },
   54     {
   55         "-q", argFlag, &quiet, 0, "don't print any messages or errors"
   56     },
   57     {
   58         "-cfg", argString, cfgFileName, sizeof(cfgFileName), "configuration file to use in place of .xpdfrc"
   59     },
   60     {
   61         "-v", argFlag, &printVersion, 0, "print copyright and version info"
   62     },
   63     {
   64         "-h", argFlag, &printHelp, 0, "print usage information"
   65     },
   66     {
   67         "-help", argFlag, &printHelp, 0, "print usage information"
   68     },
   69     {
   70         "--help", argFlag, &printHelp, 0, "print usage information"
   71     },
   72     {
   73         "-?", argFlag, &printHelp, 0, "print usage information"
   74     },
   75     {NULL}
   76 };
   77 
   78 //------------------------------------------------------------------------
   79 
   80 static int writeToFile(void* file, const char* data, int size)
   81 {
   82     return (int)fwrite(data, 1, size, (FILE*)file);
   83 }
   84 
   85 int main(int argc, char* argv[])
   86 {
   87     PDFDoc* doc;
   88     GString* fileName;
   89     GString* ownerPW, *userPW;
   90     HTMLGen* htmlGen;
   91     GString* htmlFileName;
   92     FILE* htmlFile;
   93     int pg, err, exitCode;
   94     GBool ok;
   95 
   96     exitCode = 99;
   97 
   98     // parse args
   99     ok = parseArgs(argDesc, &argc, argv);
  100     if (!ok || argc != 3 || printVersion || printHelp)
  101     {
  102         fprintf(stderr, "\npdf2cb (a modified pdftohtml version %s for cb2bib)\n", xpdfVersion);
  103         fprintf(stderr, "\n%s\n", xpdfCopyright);
  104         if (!printVersion)
  105         {
  106             printUsage("pdf2cb", "<PDF-file> <TXT-file>", argDesc);
  107         }
  108         goto err0;
  109     }
  110     fileName = new GString(argv[1]);
  111     htmlFileName = new GString(argv[2]);
  112 
  113     // read config file
  114     globalParams = new GlobalParams(cfgFileName);
  115     if (quiet)
  116     {
  117         globalParams->setErrQuiet(quiet);
  118     }
  119     globalParams->setupBaseFonts(NULL);
  120     globalParams->setTextEncoding("UTF-8");
  121 
  122     // open PDF file
  123     if (ownerPassword[0] != '\001')
  124         ownerPW = new GString(ownerPassword);
  125     else
  126         ownerPW = NULL;
  127     if (userPassword[0] != '\001')
  128         userPW = new GString(userPassword);
  129     else
  130         userPW = NULL;
  131     doc = new PDFDoc(fileName, ownerPW, userPW);
  132     if (userPW)
  133         delete userPW;
  134     if (ownerPW)
  135         delete ownerPW;
  136     if (!doc->isOk())
  137     {
  138         exitCode = 1;
  139         goto err1;
  140     }
  141 
  142     // check for copy permission
  143     if (!doc->okToCopy())
  144     {
  145         error(errNotAllowed, -1, "Copying of text from this document is not allowed.");
  146         exitCode = 3;
  147         goto err1;
  148     }
  149 
  150     // get page range
  151     if (firstPage < 1)
  152     {
  153         firstPage = 1;
  154     }
  155     if (lastPage < 1 || lastPage > doc->getNumPages())
  156     {
  157         lastPage = doc->getNumPages();
  158     }
  159 
  160     // set up the HTMLGen object
  161     htmlGen = new HTMLGen();
  162     if (!htmlGen->isOk())
  163     {
  164         exitCode = 99;
  165         goto err1;
  166     }
  167     htmlGen->setDrawInvisibleText(!skipInvisible);
  168     htmlGen->startDoc(doc);
  169 
  170     if (!(htmlFile = fopen(htmlFileName->getCString(), "wb")))
  171     {
  172         error(errIO, -1, "Couldn't open TXT file '{0:t}'", htmlFileName);
  173         goto err2;
  174     }
  175 
  176     // convert the pages
  177     for (pg = firstPage; pg <= lastPage; ++pg)
  178     {
  179         err = htmlGen->convertPage(pg, writeToFile, htmlFile);
  180 
  181         if (err != errNone)
  182         {
  183             error(errIO, -1, "Error converting page {0:d}", pg);
  184             exitCode = 2;
  185             goto err2;
  186         }
  187     }
  188 
  189     fclose(htmlFile);
  190 
  191     exitCode = 0;
  192 
  193     // clean up
  194 err2:
  195     delete htmlFileName;
  196     delete htmlGen;
  197 err1:
  198     delete doc;
  199     delete globalParams;
  200 err0:
  201 
  202     // check for memory leaks
  203     Object::memCheck(stderr);
  204     gMemReport(stderr);
  205 
  206     return exitCode;
  207 }