"Fossies" - the Fresh Open Source Software Archive

Member "xpdf-4.04/xpdf/PDFDoc.cc" (18 Apr 2022, 16078 Bytes) of package /linux/misc/xpdf-4.04.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "PDFDoc.cc" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 4.03_vs_4.04.

    1 //========================================================================
    2 //
    3 // PDFDoc.cc
    4 //
    5 // Copyright 1996-2003 Glyph & Cog, LLC
    6 //
    7 //========================================================================
    8 
    9 #include <aconf.h>
   10 
   11 #ifdef USE_GCC_PRAGMAS
   12 #pragma implementation
   13 #endif
   14 
   15 #include <stdio.h>
   16 #include <stdlib.h>
   17 #include <stddef.h>
   18 #include <string.h>
   19 #ifdef _WIN32
   20 #  include <windows.h>
   21 #endif
   22 #include "gmempp.h"
   23 #include "GString.h"
   24 #include "gfile.h"
   25 #include "config.h"
   26 #include "GlobalParams.h"
   27 #include "Page.h"
   28 #include "Catalog.h"
   29 #include "Stream.h"
   30 #include "XRef.h"
   31 #include "Link.h"
   32 #include "OutputDev.h"
   33 #include "Error.h"
   34 #include "ErrorCodes.h"
   35 #include "Lexer.h"
   36 #include "Parser.h"
   37 #include "SecurityHandler.h"
   38 #include "UTF8.h"
   39 #ifndef DISABLE_OUTLINE
   40 #include "Outline.h"
   41 #endif
   42 #include "OptionalContent.h"
   43 #include "PDFDoc.h"
   44 
   45 //------------------------------------------------------------------------
   46 
   47 #define headerSearchSize 1024   // read this many bytes at beginning of
   48                 //   file to look for '%PDF'
   49 
   50 // Avoid sharing files with child processes on Windows, where sharing
   51 // can cause problems.
   52 #ifdef _WIN32
   53 #  define fopenReadMode "rbN"
   54 #  define wfopenReadMode L"rbN"
   55 #else
   56 #  define fopenReadMode "rb"
   57 #endif
   58 
   59 //------------------------------------------------------------------------
   60 // PDFDoc
   61 //------------------------------------------------------------------------
   62 
   63 PDFDoc::PDFDoc(GString *fileNameA, GString *ownerPassword,
   64            GString *userPassword, PDFCore *coreA) {
   65   Object obj;
   66   GString *fileName1, *fileName2;
   67 #ifdef _WIN32
   68   int n, i;
   69 #endif
   70 
   71   init(coreA);
   72 
   73   fileName = fileNameA;
   74 #ifdef _WIN32
   75   n = fileName->getLength();
   76   fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
   77   for (i = 0; i < n; ++i) {
   78     fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
   79   }
   80   fileNameU[n] = L'\0';
   81 #endif
   82 
   83   fileName1 = fileName;
   84 
   85   // try to open file
   86   fileName2 = NULL;
   87 #ifdef VMS
   88   if (!(file = fopen(fileName1->getCString(), fopenReadMode, "ctx=stm"))) {
   89     error(errIO, -1, "Couldn't open file '{0:t}'", fileName1);
   90     errCode = errOpenFile;
   91     return;
   92   }
   93 #else
   94   if (!(file = fopen(fileName1->getCString(), fopenReadMode))) {
   95     fileName2 = fileName->copy();
   96     fileName2->lowerCase();
   97     if (!(file = fopen(fileName2->getCString(), fopenReadMode))) {
   98       fileName2->upperCase();
   99       if (!(file = fopen(fileName2->getCString(), fopenReadMode))) {
  100     error(errIO, -1, "Couldn't open file '{0:t}'", fileName);
  101     delete fileName2;
  102     errCode = errOpenFile;
  103     return;
  104       }
  105     }
  106     delete fileName2;
  107   }
  108 #endif
  109 
  110   // create stream
  111   obj.initNull();
  112   str = new FileStream(file, 0, gFalse, 0, &obj);
  113 
  114   ok = setup(ownerPassword, userPassword);
  115 }
  116 
  117 #ifdef _WIN32
  118 PDFDoc::PDFDoc(wchar_t *fileNameA, int fileNameLen, GString *ownerPassword,
  119            GString *userPassword, PDFCore *coreA) {
  120   OSVERSIONINFO version;
  121   Object obj;
  122   int i;
  123 
  124   init(coreA);
  125 
  126   // handle a Windows shortcut
  127   wchar_t wPath[winMaxLongPath + 1];
  128   int n = fileNameLen < winMaxLongPath ? fileNameLen : winMaxLongPath;
  129   memcpy(wPath, fileNameA, n * sizeof(wchar_t));
  130   wPath[n] = L'\0';
  131   readWindowsShortcut(wPath, winMaxLongPath + 1);
  132   int wPathLen = (int)wcslen(wPath);
  133 
  134   // save both Unicode and 8-bit copies of the file name
  135   fileName = new GString();
  136   fileNameU = (wchar_t *)gmallocn(wPathLen + 1, sizeof(wchar_t));
  137   memcpy(fileNameU, wPath, (wPathLen + 1) * sizeof(wchar_t));
  138   for (i = 0; i < wPathLen; ++i) {
  139     fileName->append((char)fileNameA[i]);
  140   }
  141 
  142   // try to open file
  143   // NB: _wfopen is only available in NT
  144   version.dwOSVersionInfoSize = sizeof(version);
  145   GetVersionEx(&version);
  146   if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
  147     file = _wfopen(fileNameU, wfopenReadMode);
  148   } else {
  149     file = fopen(fileName->getCString(), fopenReadMode);
  150   }
  151   if (!file) {
  152     error(errIO, -1, "Couldn't open file '{0:t}'", fileName);
  153     errCode = errOpenFile;
  154     return;
  155   }
  156 
  157   // create stream
  158   obj.initNull();
  159   str = new FileStream(file, 0, gFalse, 0, &obj);
  160 
  161   ok = setup(ownerPassword, userPassword);
  162 }
  163 #endif
  164 
  165 PDFDoc::PDFDoc(char *fileNameA, GString *ownerPassword,
  166            GString *userPassword, PDFCore *coreA) {
  167 #ifdef _WIN32
  168   OSVERSIONINFO version;
  169 #endif
  170   Object obj;
  171 #ifdef _WIN32
  172   Unicode u;
  173   int i, j;
  174 #endif
  175 
  176   init(coreA);
  177 
  178   fileName = new GString(fileNameA);
  179 
  180 #if defined(_WIN32)
  181   wchar_t wPath[winMaxLongPath + 1];
  182   i = 0;
  183   j = 0;
  184   while (j < winMaxLongPath && getUTF8(fileName, &i, &u)) {
  185     wPath[j++] = (wchar_t)u;
  186   }
  187   wPath[j] = L'\0';
  188   readWindowsShortcut(wPath, winMaxLongPath + 1);
  189   int wPathLen = (int)wcslen(wPath);
  190 
  191   fileNameU = (wchar_t *)gmallocn(wPathLen + 1, sizeof(wchar_t));
  192   memcpy(fileNameU, wPath, (wPathLen + 1) * sizeof(wchar_t));
  193 
  194   // NB: _wfopen is only available in NT
  195   version.dwOSVersionInfoSize = sizeof(version);
  196   GetVersionEx(&version);
  197   if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
  198     file = _wfopen(fileNameU, wfopenReadMode);
  199   } else {
  200     file = fopen(fileName->getCString(), fopenReadMode);
  201   }
  202 #elif defined(VMS)
  203   file = fopen(fileName->getCString(), fopenReadMode, "ctx=stm");
  204 #else
  205   file = fopen(fileName->getCString(), fopenReadMode);
  206 #endif
  207 
  208   if (!file) {
  209     error(errIO, -1, "Couldn't open file '{0:t}'", fileName);
  210     errCode = errOpenFile;
  211     return;
  212   }
  213 
  214   // create stream
  215   obj.initNull();
  216   str = new FileStream(file, 0, gFalse, 0, &obj);
  217 
  218   ok = setup(ownerPassword, userPassword);
  219 }
  220 
  221 PDFDoc::PDFDoc(BaseStream *strA, GString *ownerPassword,
  222            GString *userPassword, PDFCore *coreA) {
  223 #ifdef _WIN32
  224   int n, i;
  225 #endif
  226 
  227   init(coreA);
  228 
  229   if (strA->getFileName()) {
  230     fileName = strA->getFileName()->copy();
  231 #ifdef _WIN32
  232     n = fileName->getLength();
  233     fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
  234     for (i = 0; i < n; ++i) {
  235       fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
  236     }
  237     fileNameU[n] = L'\0';
  238 #endif
  239   } else {
  240     fileName = NULL;
  241 #ifdef _WIN32
  242     fileNameU = NULL;
  243 #endif
  244   }
  245   str = strA;
  246   ok = setup(ownerPassword, userPassword);
  247 }
  248 
  249 void PDFDoc::init(PDFCore *coreA) {
  250   ok = gFalse;
  251   errCode = errNone;
  252   core = coreA;
  253   file = NULL;
  254   str = NULL;
  255   xref = NULL;
  256   catalog = NULL;
  257 #ifndef DISABLE_OUTLINE
  258   outline = NULL;
  259 #endif
  260   optContent = NULL;
  261 }
  262 
  263 GBool PDFDoc::setup(GString *ownerPassword, GString *userPassword) {
  264 
  265   str->reset();
  266 
  267   // check header
  268   checkHeader();
  269 
  270   // read the xref and catalog
  271   if (!PDFDoc::setup2(ownerPassword, userPassword, gFalse)) {
  272     if (errCode == errDamaged || errCode == errBadCatalog) {
  273       // try repairing the xref table
  274       error(errSyntaxWarning, -1,
  275         "PDF file is damaged - attempting to reconstruct xref table...");
  276       if (!PDFDoc::setup2(ownerPassword, userPassword, gTrue)) {
  277     return gFalse;
  278       }
  279     } else {
  280       return gFalse;
  281     }
  282   }
  283 
  284 #ifndef DISABLE_OUTLINE
  285   // read outline
  286   outline = new Outline(catalog->getOutline(), xref);
  287 #endif
  288 
  289   // read the optional content info
  290   optContent = new OptionalContent(this);
  291 
  292 
  293   // done
  294   return gTrue;
  295 }
  296 
  297 GBool PDFDoc::setup2(GString *ownerPassword, GString *userPassword,
  298              GBool repairXRef) {
  299   // read xref table
  300   xref = new XRef(str, repairXRef);
  301   if (!xref->isOk()) {
  302     error(errSyntaxError, -1, "Couldn't read xref table");
  303     errCode = xref->getErrorCode();
  304     delete xref;
  305     xref = NULL;
  306     return gFalse;
  307   }
  308 
  309   // check for encryption
  310   if (!checkEncryption(ownerPassword, userPassword)) {
  311     errCode = errEncrypted;
  312     delete xref;
  313     xref = NULL;
  314     return gFalse;
  315   }
  316 
  317   // read catalog
  318   catalog = new Catalog(this);
  319   if (!catalog->isOk()) {
  320     error(errSyntaxError, -1, "Couldn't read page catalog");
  321     errCode = errBadCatalog;
  322     delete catalog;
  323     catalog = NULL;
  324     delete xref;
  325     xref = NULL;
  326     return gFalse;
  327   }
  328 
  329   return gTrue;
  330 }
  331 
  332 PDFDoc::~PDFDoc() {
  333   if (optContent) {
  334     delete optContent;
  335   }
  336 #ifndef DISABLE_OUTLINE
  337   if (outline) {
  338     delete outline;
  339   }
  340 #endif
  341   if (catalog) {
  342     delete catalog;
  343   }
  344   if (xref) {
  345     delete xref;
  346   }
  347   if (str) {
  348     delete str;
  349   }
  350   if (file) {
  351     fclose(file);
  352   }
  353   if (fileName) {
  354     delete fileName;
  355   }
  356 #ifdef _WIN32
  357   if (fileNameU) {
  358     gfree(fileNameU);
  359   }
  360 #endif
  361 }
  362 
  363 // Check for a PDF header on this stream.  Skip past some garbage
  364 // if necessary.
  365 void PDFDoc::checkHeader() {
  366   char hdrBuf[headerSearchSize+1];
  367   char *p;
  368   int i;
  369 
  370   pdfVersion = 0;
  371   memset(hdrBuf, 0, headerSearchSize + 1);
  372   str->getBlock(hdrBuf, headerSearchSize);
  373   for (i = 0; i < headerSearchSize - 5; ++i) {
  374     if (!strncmp(&hdrBuf[i], "%PDF-", 5)) {
  375       break;
  376     }
  377   }
  378   if (i >= headerSearchSize - 5) {
  379     error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
  380     return;
  381   }
  382   str->moveStart(i);
  383   if (!(p = strtok(&hdrBuf[i+5], " \t\n\r"))) {
  384     error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
  385     return;
  386   }
  387   pdfVersion = atof(p);
  388   if (!(hdrBuf[i+5] >= '0' && hdrBuf[i+5] <= '9') ||
  389       pdfVersion > supportedPDFVersionNum + 0.0001) {
  390     error(errSyntaxWarning, -1,
  391       "PDF version {0:s} -- xpdf supports version {1:s} (continuing anyway)",
  392       p, supportedPDFVersionStr);
  393   }
  394 }
  395 
  396 GBool PDFDoc::checkEncryption(GString *ownerPassword, GString *userPassword) {
  397   Object encrypt;
  398   GBool encrypted;
  399   SecurityHandler *secHdlr;
  400   GBool ret;
  401 
  402   xref->getTrailerDict()->dictLookup("Encrypt", &encrypt);
  403   if ((encrypted = encrypt.isDict())) {
  404     if ((secHdlr = SecurityHandler::make(this, &encrypt))) {
  405       if (secHdlr->isUnencrypted()) {
  406     // no encryption
  407     ret = gTrue;
  408       } else if (secHdlr->checkEncryption(ownerPassword, userPassword)) {
  409     // authorization succeeded
  410         xref->setEncryption(secHdlr->getPermissionFlags(),
  411                 secHdlr->getOwnerPasswordOk(),
  412                 secHdlr->getFileKey(),
  413                 secHdlr->getFileKeyLength(),
  414                 secHdlr->getEncVersion(),
  415                 secHdlr->getEncAlgorithm());
  416     ret = gTrue;
  417       } else {
  418     // authorization failed
  419     ret = gFalse;
  420       }
  421       delete secHdlr;
  422     } else {
  423       // couldn't find the matching security handler
  424       ret = gFalse;
  425     }
  426   } else {
  427     // document is not encrypted
  428     ret = gTrue;
  429   }
  430   encrypt.free();
  431   return ret;
  432 }
  433 
  434 void PDFDoc::displayPage(OutputDev *out, int page,
  435              double hDPI, double vDPI, int rotate,
  436              GBool useMediaBox, GBool crop, GBool printing,
  437              GBool (*abortCheckCbk)(void *data),
  438              void *abortCheckCbkData) {
  439   if (globalParams->getPrintCommands()) {
  440     printf("***** page %d *****\n", page);
  441   }
  442   catalog->getPage(page)->display(out, hDPI, vDPI,
  443                   rotate, useMediaBox, crop, printing,
  444                   abortCheckCbk, abortCheckCbkData);
  445 }
  446 
  447 void PDFDoc::displayPages(OutputDev *out, int firstPage, int lastPage,
  448               double hDPI, double vDPI, int rotate,
  449               GBool useMediaBox, GBool crop, GBool printing,
  450               GBool (*abortCheckCbk)(void *data),
  451               void *abortCheckCbkData) {
  452   int page;
  453 
  454   for (page = firstPage; page <= lastPage; ++page) {
  455     if (globalParams->getPrintStatusInfo()) {
  456       fflush(stderr);
  457       printf("[processing page %d]\n", page);
  458       fflush(stdout);
  459     }
  460     displayPage(out, page, hDPI, vDPI, rotate, useMediaBox, crop, printing,
  461         abortCheckCbk, abortCheckCbkData);
  462     catalog->doneWithPage(page);
  463   }
  464 }
  465 
  466 void PDFDoc::displayPageSlice(OutputDev *out, int page,
  467                   double hDPI, double vDPI, int rotate,
  468                   GBool useMediaBox, GBool crop, GBool printing,
  469                   int sliceX, int sliceY, int sliceW, int sliceH,
  470                   GBool (*abortCheckCbk)(void *data),
  471                   void *abortCheckCbkData) {
  472   catalog->getPage(page)->displaySlice(out, hDPI, vDPI,
  473                        rotate, useMediaBox, crop,
  474                        sliceX, sliceY, sliceW, sliceH,
  475                        printing,
  476                        abortCheckCbk, abortCheckCbkData);
  477 }
  478 
  479 Links *PDFDoc::getLinks(int page) {
  480   return catalog->getPage(page)->getLinks();
  481 }
  482 
  483 void PDFDoc::processLinks(OutputDev *out, int page) {
  484   catalog->getPage(page)->processLinks(out);
  485 }
  486 
  487 #ifndef DISABLE_OUTLINE
  488 int PDFDoc::getOutlineTargetPage(OutlineItem *outlineItem) {
  489   LinkAction *action;
  490   LinkActionKind kind;
  491   LinkDest *dest;
  492   GString *namedDest;
  493   Ref pageRef;
  494   int pg;
  495 
  496   if (outlineItem->pageNum >= 0) {
  497     return outlineItem->pageNum;
  498   }
  499   if (!(action = outlineItem->getAction())) {
  500     outlineItem->pageNum = 0;
  501     return 0;
  502   }
  503   kind = action->getKind();
  504   if (kind != actionGoTo) {
  505     outlineItem->pageNum = 0;
  506     return 0;
  507   }
  508   if ((dest = ((LinkGoTo *)action)->getDest())) {
  509     dest = dest->copy();
  510   } else if ((namedDest = ((LinkGoTo *)action)->getNamedDest())) {
  511     dest = findDest(namedDest);
  512   }
  513   pg = 0;
  514   if (dest) {
  515     if (dest->isPageRef()) {
  516       pageRef = dest->getPageRef();
  517       pg = findPage(pageRef.num, pageRef.gen);
  518     } else {
  519       pg = dest->getPageNum();
  520     }
  521     delete dest;
  522   }
  523   outlineItem->pageNum = pg;
  524   return pg;
  525 }
  526 #endif
  527 
  528 GBool PDFDoc::isLinearized() {
  529   Parser *parser;
  530   Object obj1, obj2, obj3, obj4, obj5;
  531   GBool lin;
  532 
  533   lin = gFalse;
  534   obj1.initNull();
  535   parser = new Parser(xref,
  536          new Lexer(xref,
  537            str->makeSubStream(str->getStart(), gFalse, 0, &obj1)),
  538          gTrue);
  539   parser->getObj(&obj1);
  540   parser->getObj(&obj2);
  541   parser->getObj(&obj3);
  542   parser->getObj(&obj4);
  543   if (obj1.isInt() && obj2.isInt() && obj3.isCmd("obj") &&
  544       obj4.isDict()) {
  545     obj4.dictLookup("Linearized", &obj5);
  546     if (obj5.isNum() && obj5.getNum() > 0) {
  547       lin = gTrue;
  548     }
  549     obj5.free();
  550   }
  551   obj4.free();
  552   obj3.free();
  553   obj2.free();
  554   obj1.free();
  555   delete parser;
  556   return lin;
  557 }
  558 
  559 GBool PDFDoc::saveAs(GString *name) {
  560   FILE *f;
  561   char buf[4096];
  562   int n;
  563 
  564   if (!(f = fopen(name->getCString(), "wb"))) {
  565     error(errIO, -1, "Couldn't open file '{0:t}'", name);
  566     return gFalse;
  567   }
  568   str->reset();
  569   while ((n = str->getBlock(buf, sizeof(buf))) > 0) {
  570     fwrite(buf, 1, n, f);
  571   }
  572   str->close();
  573   fclose(f);
  574   return gTrue;
  575 }
  576 
  577 GBool PDFDoc::saveEmbeddedFile(int idx, const char *path) {
  578   FILE *f;
  579   GBool ret;
  580 
  581   if (!(f = fopen(path, "wb"))) {
  582     return gFalse;
  583   }
  584   ret = saveEmbeddedFile2(idx, f);
  585   fclose(f);
  586   return ret;
  587 }
  588 
  589 GBool PDFDoc::saveEmbeddedFileU(int idx, const char *path) {
  590   FILE *f;
  591   GBool ret;
  592 
  593   if (!(f = openFile(path, "wb"))) {
  594     return gFalse;
  595   }
  596   ret = saveEmbeddedFile2(idx, f);
  597   fclose(f);
  598   return ret;
  599 }
  600 
  601 #ifdef _WIN32
  602 GBool PDFDoc::saveEmbeddedFile(int idx, const wchar_t *path, int pathLen) {
  603   FILE *f;
  604   OSVERSIONINFO version;
  605   wchar_t path2w[winMaxLongPath + 1];
  606   char path2c[MAX_PATH + 1];
  607   int i;
  608   GBool ret;
  609 
  610   // NB: _wfopen is only available in NT
  611   version.dwOSVersionInfoSize = sizeof(version);
  612   GetVersionEx(&version);
  613   if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
  614     for (i = 0; i < pathLen && i < winMaxLongPath; ++i) {
  615       path2w[i] = path[i];
  616     }
  617     path2w[i] = 0;
  618     f = _wfopen(path2w, L"wb");
  619   } else {
  620     for (i = 0; i < pathLen && i < MAX_PATH; ++i) {
  621       path2c[i] = (char)path[i];
  622     }
  623     path2c[i] = 0;
  624     f = fopen(path2c, "wb");
  625   }
  626   if (!f) {
  627     return gFalse;
  628   }
  629   ret = saveEmbeddedFile2(idx, f);
  630   fclose(f);
  631   return ret;
  632 }
  633 #endif
  634 
  635 GBool PDFDoc::saveEmbeddedFile2(int idx, FILE *f) {
  636   Object strObj;
  637   char buf[4096];
  638   int n;
  639 
  640   if (!catalog->getEmbeddedFileStreamObj(idx, &strObj)) {
  641     return gFalse;
  642   }
  643   strObj.streamReset();
  644   while ((n = strObj.streamGetBlock(buf, sizeof(buf))) > 0) {
  645     fwrite(buf, 1, n, f);
  646   }
  647   strObj.streamClose();
  648   strObj.free();
  649   return gTrue;
  650 }
  651 
  652 char *PDFDoc::getEmbeddedFileMem(int idx, int *size) {
  653   Object strObj;
  654   char *buf;
  655   int bufSize, sizeInc, n;
  656 
  657   if (!catalog->getEmbeddedFileStreamObj(idx, &strObj)) {
  658     return NULL;
  659   }
  660   strObj.streamReset();
  661   bufSize = 0;
  662   buf = NULL;
  663   do {
  664     sizeInc = bufSize ? bufSize : 1024;
  665     if (bufSize > INT_MAX - sizeInc) {
  666       error(errIO, -1, "embedded file is too large");
  667       *size = 0;
  668       return NULL;
  669     }
  670     buf = (char *)grealloc(buf, bufSize + sizeInc);
  671     n = strObj.streamGetBlock(buf + bufSize, sizeInc);
  672     bufSize += n;
  673   } while (n == sizeInc);
  674   strObj.streamClose();
  675   strObj.free();
  676   *size = bufSize;
  677   return buf;
  678 }
  679