"Fossies" - the Fresh Open Source Software Archive

Member "xpdf-4.04/xpdf/PDFDoc.cc" (18 Apr 2022, 16078 Bytes) of package /linux/misc/xpdf-4.04.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 //========================================================================
    2 //
    3 // PDFDoc.cc
    4 //
    5 // Copyright 1996-2003 Glyph & Cog, LLC
    6 //
    7 //========================================================================
    8 
    9 #include <aconf.h>
   10 
   11 #ifdef USE_GCC_PRAGMAS
   12 #pragma implementation
   13 #endif
   14 
   15 #include <stdio.h>
   16 #include <stdlib.h>
   17 #include <stddef.h>
   18 #include <string.h>
   19 #ifdef _WIN32
   20 #  include <windows.h>
   21 #endif
   22 #include "gmempp.h"
   23 #include "GString.h"
   24 #include "gfile.h"
   25 #include "config.h"
   26 #include "GlobalParams.h"
   27 #include "Page.h"
   28 #include "Catalog.h"
   29 #include "Stream.h"
   30 #include "XRef.h"
   31 #include "Link.h"
   32 #include "OutputDev.h"
   33 #include "Error.h"
   34 #include "ErrorCodes.h"
   35 #include "Lexer.h"
   36 #include "Parser.h"
   37 #include "SecurityHandler.h"
   38 #include "UTF8.h"
   39 #ifndef DISABLE_OUTLINE
   40 #include "Outline.h"
   41 #endif
   42 #include "OptionalContent.h"
   43 #include "PDFDoc.h"
   44 
   45 //------------------------------------------------------------------------
   46 
   47 #define headerSearchSize 1024   // read this many bytes at beginning of
   48                 //   file to look for '%PDF'
   49 
   50 // Avoid sharing files with child processes on Windows, where sharing
   51 // can cause problems.
   52 #ifdef _WIN32
   53 #  define fopenReadMode "rbN"
   54 #  define wfopenReadMode L"rbN"
   55 #else
   56 #  define fopenReadMode "rb"
   57 #endif
   58 
   59 //------------------------------------------------------------------------
   60 // PDFDoc
   61 //------------------------------------------------------------------------
   62 
   63 PDFDoc::PDFDoc(GString *fileNameA, GString *ownerPassword,
   64            GString *userPassword, PDFCore *coreA) {
   65   Object obj;
   66   GString *fileName1, *fileName2;
   67 #ifdef _WIN32
   68   int n, i;
   69 #endif
   70 
   71   init(coreA);
   72 
   73   fileName = fileNameA;
   74 #ifdef _WIN32
   75   n = fileName->getLength();
   76   fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
   77   for (i = 0; i < n; ++i) {
   78     fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
   79   }
   80   fileNameU[n] = L'\0';
   81 #endif
   82 
   83   fileName1 = fileName;
   84 
   85   // try to open file
   86   fileName2 = NULL;
   87 #ifdef VMS
   88   if (!(file = fopen(fileName1->getCString(), fopenReadMode, "ctx=stm"))) {
   89     error(errIO, -1, "Couldn't open file '{0:t}'", fileName1);
   90     errCode = errOpenFile;
   91     return;
   92   }
   93 #else
   94   if (!(file = fopen(fileName1->getCString(), fopenReadMode))) {
   95     fileName2 = fileName->copy();
   96     fileName2->lowerCase();
   97     if (!(file = fopen(fileName2->getCString(), fopenReadMode))) {
   98       fileName2->upperCase();
   99       if (!(file = fopen(fileName2->getCString(), fopenReadMode))) {
  100     error(errIO, -1, "Couldn't open file '{0:t}'", fileName);
  101     delete fileName2;
  102     errCode = errOpenFile;
  103     return;
  104       }
  105     }
  106     delete fileName2;
  107   }
  108 #endif
  109 
  110   // create stream
  111   obj.initNull();
  112   str = new FileStream(file, 0, gFalse, 0, &obj);
  113 
  114   ok = setup(ownerPassword, userPassword);
  115 }
  116 
  117 #ifdef _WIN32
  118 PDFDoc::PDFDoc(wchar_t *fileNameA, int fileNameLen, GString *ownerPassword,
  119            GString *userPassword, PDFCore *coreA) {
  120   OSVERSIONINFO version;
  121   Object obj;
  122   int i;
  123 
  124   init(coreA);
  125 
  126   // handle a Windows shortcut
  127   wchar_t wPath[winMaxLongPath + 1];
  128   int n = fileNameLen < winMaxLongPath ? fileNameLen : winMaxLongPath;
  129   memcpy(wPath, fileNameA, n * sizeof(wchar_t));
  130   wPath[n] = L'\0';
  131   readWindowsShortcut(wPath, winMaxLongPath + 1);
  132   int wPathLen = (int)wcslen(wPath);
  133 
  134   // save both Unicode and 8-bit copies of the file name
  135   fileName = new GString();
  136   fileNameU = (wchar_t *)gmallocn(wPathLen + 1, sizeof(wchar_t));
  137   memcpy(fileNameU, wPath, (wPathLen + 1) * sizeof(wchar_t));
  138   for (i = 0; i < wPathLen; ++i) {
  139     fileName->append((char)fileNameA[i]);
  140   }
  141 
  142   // try to open file
  143   // NB: _wfopen is only available in NT
  144   version.dwOSVersionInfoSize = sizeof(version);
  145   GetVersionEx(&version);
  146   if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
  147     file = _wfopen(fileNameU, wfopenReadMode);
  148   } else {
  149     file = fopen(fileName->getCString(), fopenReadMode);
  150   }
  151   if (!file) {
  152     error(errIO, -1, "Couldn't open file '{0:t}'", fileName);
  153     errCode = errOpenFile;
  154     return;
  155   }
  156 
  157   // create stream
  158   obj.initNull();
  159   str = new FileStream(file, 0, gFalse, 0, &obj);
  160 
  161   ok = setup(ownerPassword, userPassword);
  162 }
  163 #endif
  164 
  165 PDFDoc::PDFDoc(char *fileNameA, GString *ownerPassword,
  166            GString *userPassword, PDFCore *coreA) {
  167 #ifdef _WIN32
  168   OSVERSIONINFO version;
  169 #endif
  170   Object obj;
  171 #ifdef _WIN32
  172   Unicode u;
  173   int i, j;
  174 #endif
  175 
  176   init(coreA);
  177 
  178   fileName = new GString(fileNameA);
  179 
  180 #if defined(_WIN32)
  181   wchar_t wPath[winMaxLongPath + 1];
  182   i = 0;
  183   j = 0;
  184   while (j < winMaxLongPath && getUTF8(fileName, &i, &u)) {
  185     wPath[j++] = (wchar_t)u;
  186   }
  187   wPath[j] = L'\0';
  188   readWindowsShortcut(wPath, winMaxLongPath + 1);
  189   int wPathLen = (int)wcslen(wPath);
  190 
  191   fileNameU = (wchar_t *)gmallocn(wPathLen + 1, sizeof(wchar_t));
  192   memcpy(fileNameU, wPath, (wPathLen + 1) * sizeof(wchar_t));
  193 
  194   // NB: _wfopen is only available in NT
  195   version.dwOSVersionInfoSize = sizeof(version);
  196   GetVersionEx(&version);
  197   if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
  198     file = _wfopen(fileNameU, wfopenReadMode);
  199   } else {
  200     file = fopen(fileName->getCString(), fopenReadMode);
  201   }
  202 #elif defined(VMS)
  203   file = fopen(fileName->getCString(), fopenReadMode, "ctx=stm");
  204 #else
  205   file = fopen(fileName->getCString(), fopenReadMode);
  206 #endif
  207 
  208   if (!file) {
  209     error(errIO, -1, "Couldn't open file '{0:t}'", fileName);
  210     errCode = errOpenFile;
  211     return;
  212   }
  213 
  214   // create stream
  215   obj.initNull();
  216   str = new FileStream(file, 0, gFalse, 0, &obj);
  217 
  218   ok = setup(ownerPassword, userPassword);
  219 }
  220 
  221 PDFDoc::PDFDoc(BaseStream *strA, GString *ownerPassword,
  222            GString *userPassword, PDFCore *coreA) {
  223 #ifdef _WIN32
  224   int n, i;
  225 #endif
  226 
  227   init(coreA);
  228 
  229   if (strA->getFileName()) {
  230     fileName = strA->getFileName()->copy();
  231 #ifdef _WIN32
  232     n = fileName->getLength();
  233     fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
  234     for (i = 0; i < n; ++i) {
  235       fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
  236     }
  237     fileNameU[n] = L'\0';
  238 #endif
  239   } else {
  240     fileName = NULL;
  241 #ifdef _WIN32
  242     fileNameU = NULL;
  243 #endif
  244   }
  245   str = strA;
  246   ok = setup(ownerPassword, userPassword);
  247 }
  248 
  249 void PDFDoc::init(PDFCore *coreA) {
  250   ok = gFalse;
  251   errCode = errNone;
  252   core = coreA;
  253   file = NULL;
  254   str = NULL;
  255   xref = NULL;
  256   catalog = NULL;
  257 #ifndef DISABLE_OUTLINE
  258   outline = NULL;
  259 #endif
  260   optContent = NULL;
  261 }
  262 
  263 GBool PDFDoc::setup(GString *ownerPassword, GString *userPassword) {
  264 
  265   str->reset();
  266 
  267   // check header
  268   checkHeader();
  269 
  270   // read the xref and catalog
  271   if (!PDFDoc::setup2(ownerPassword, userPassword, gFalse)) {
  272     if (errCode == errDamaged || errCode == errBadCatalog) {
  273       // try repairing the xref table
  274       error(errSyntaxWarning, -1,
  275         "PDF file is damaged - attempting to reconstruct xref table...");
  276       if (!PDFDoc::setup2(ownerPassword, userPassword, gTrue)) {
  277     return gFalse;
  278       }
  279     } else {
  280       return gFalse;
  281     }
  282   }
  283 
  284 #ifndef DISABLE_OUTLINE
  285   // read outline
  286   outline = new Outline(catalog->getOutline(), xref);
  287 #endif
  288 
  289   // read the optional content info
  290   optContent = new OptionalContent(this);
  291 
  292 
  293   // done
  294   return gTrue;
  295 }
  296 
  297 GBool PDFDoc::setup2(GString *ownerPassword, GString *userPassword,
  298              GBool repairXRef) {
  299   // read xref table
  300   xref = new XRef(str, repairXRef);
  301   if (!xref->isOk()) {
  302     error(errSyntaxError, -1, "Couldn't read xref table");
  303     errCode = xref->getErrorCode();
  304     delete xref;
  305     xref = NULL;
  306     return gFalse;
  307   }
  308 
  309   // check for encryption
  310   if (!checkEncryption(ownerPassword, userPassword)) {
  311     errCode = errEncrypted;
  312     delete xref;
  313     xref = NULL;
  314     return gFalse;
  315   }
  316 
  317   // read catalog
  318   catalog = new Catalog(this);
  319   if (!catalog->isOk()) {
  320     error(errSyntaxError, -1, "Couldn't read page catalog");
  321     errCode = errBadCatalog;
  322     delete catalog;
  323     catalog = NULL;
  324     delete xref;
  325     xref = NULL;
  326     return gFalse;
  327   }
  328 
  329   return gTrue;
  330 }
  331 
  332 PDFDoc::~PDFDoc() {
  333   if (optContent) {
  334     delete optContent;
  335   }
  336 #ifndef DISABLE_OUTLINE
  337   if (outline) {
  338     delete outline;
  339   }
  340 #endif
  341   if (catalog) {
  342     delete catalog;
  343   }
  344   if (xref) {
  345     delete xref;
  346   }
  347   if (str) {
  348     delete str;
  349   }
  350   if (file) {
  351     fclose(file);
  352   }
  353   if (fileName) {
  354     delete fileName;
  355   }
  356 #ifdef _WIN32
  357   if (fileNameU) {
  358     gfree(fileNameU);
  359   }
  360 #endif
  361 }
  362 
  363 // Check for a PDF header on this stream.  Skip past some garbage
  364 // if necessary.
  365 void PDFDoc::checkHeader() {
  366   char hdrBuf[headerSearchSize+1];
  367   char *p;
  368   int i;
  369 
  370   pdfVersion = 0;
  371   memset(hdrBuf, 0, headerSearchSize + 1);
  372   str->getBlock(hdrBuf, headerSearchSize);
  373   for (i = 0; i < headerSearchSize - 5; ++i) {
  374     if (!strncmp(&hdrBuf[i], "%PDF-", 5)) {
  375       break;
  376     }
  377   }
  378   if (i >= headerSearchSize - 5) {
  379     error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
  380     return;
  381   }
  382   str->moveStart(i);
  383   if (!(p = strtok(&hdrBuf[i+5], " \t\n\r"))) {
  384     error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
  385     return;
  386   }
  387   pdfVersion = atof(p);
  388   if (!(hdrBuf[i+5] >= '0' && hdrBuf[i+5] <= '9') ||
  389       pdfVersion > supportedPDFVersionNum + 0.0001) {
  390     error(errSyntaxWarning, -1,
  391       "PDF version {0:s} -- xpdf supports version {1:s} (continuing anyway)",
  392       p, supportedPDFVersionStr);
  393   }
  394 }
  395 
  396 GBool PDFDoc::checkEncryption(GString *ownerPassword, GString *userPassword) {
  397   Object encrypt;
  398   GBool encrypted;
  399   SecurityHandler *secHdlr;
  400   GBool ret;
  401 
  402   xref->getTrailerDict()->dictLookup("Encrypt", &encrypt);
  403   if ((encrypted = encrypt.isDict())) {
  404     if ((secHdlr = SecurityHandler::make(this, &encrypt))) {
  405       if (secHdlr->isUnencrypted()) {
  406     // no encryption
  407     ret = gTrue;
  408       } else if (secHdlr->checkEncryption(ownerPassword, userPassword)) {
  409     // authorization succeeded
  410         xref->setEncryption(secHdlr->getPermissionFlags(),
  411                 secHdlr->getOwnerPasswordOk(),
  412                 secHdlr->getFileKey(),
  413                 secHdlr->getFileKeyLength(),
  414                 secHdlr->getEncVersion(),
  415                 secHdlr->getEncAlgorithm());
  416     ret = gTrue;
  417       } else {
  418     // authorization failed
  419     ret = gFalse;
  420       }
  421       delete secHdlr;
  422     } else {
  423       // couldn't find the matching security handler
  424       ret = gFalse;
  425     }
  426   } else {
  427     // document is not encrypted
  428     ret = gTrue;
  429   }
  430   encrypt.free();
  431   return ret;
  432 }
  433 
  434 void PDFDoc::displayPage(OutputDev *out, int page,
  435              double hDPI, double vDPI, int rotate,
  436              GBool useMediaBox, GBool crop, GBool printing,
  437              GBool (*abortCheckCbk)(void *data),
  438              void *abortCheckCbkData) {
  439   if (globalParams->getPrintCommands()) {
  440     printf("***** page %d *****\n", page);
  441   }
  442   catalog->getPage(page)->display(out, hDPI, vDPI,
  443                   rotate, useMediaBox, crop, printing,
  444                   abortCheckCbk, abortCheckCbkData);
  445 }
  446 
  447 void PDFDoc::displayPages(OutputDev *out, int firstPage, int lastPage,
  448               double hDPI, double vDPI, int rotate,
  449               GBool useMediaBox, GBool crop, GBool printing,
  450               GBool (*abortCheckCbk)(void *data),
  451               void *abortCheckCbkData) {
  452   int page;
  453 
  454   for (page = firstPage; page <= lastPage; ++page) {
  455     if (globalParams->getPrintStatusInfo()) {
  456       fflush(stderr);
  457       printf("[processing page %d]\n", page);
  458       fflush(stdout);
  459     }
  460     displayPage(out, page, hDPI, vDPI, rotate, useMediaBox, crop, printing,
  461         abortCheckCbk, abortCheckCbkData);
  462     catalog->doneWithPage(page);
  463   }
  464 }
  465 
  466 void PDFDoc::displayPageSlice(OutputDev *out, int page,
  467                   double hDPI, double vDPI, int rotate,
  468                   GBool useMediaBox, GBool crop, GBool printing,
  469                   int sliceX, int sliceY, int sliceW, int sliceH,
  470                   GBool (*abortCheckCbk)(void *data),
  471                   void *abortCheckCbkData) {
  472   catalog->getPage(page)->displaySlice(out, hDPI, vDPI,
  473                        rotate, useMediaBox, crop,
  474                        sliceX, sliceY, sliceW, sliceH,
  475                        printing,
  476                        abortCheckCbk, abortCheckCbkData);
  477 }
  478 
  479 Links *PDFDoc::getLinks(int page) {
  480   return catalog->getPage(page)->getLinks();
  481 }
  482 
  483 void PDFDoc::processLinks(OutputDev *out, int page) {
  484   catalog->getPage(page)->processLinks(out);
  485 }
  486 
  487 #ifndef DISABLE_OUTLINE
  488 int PDFDoc::getOutlineTargetPage(OutlineItem *outlineItem) {
  489   LinkAction *action;
  490   LinkActionKind kind;
  491   LinkDest *dest;
  492   GString *namedDest;
  493   Ref pageRef;
  494   int pg;
  495 
  496   if (outlineItem->pageNum >= 0) {
  497     return outlineItem->pageNum;
  498   }
  499   if (!(action = outlineItem->getAction())) {
  500     outlineItem->pageNum = 0;
  501     return 0;
  502   }
  503   kind = action->getKind();
  504   if (kind != actionGoTo) {
  505     outlineItem->pageNum = 0;
  506     return 0;
  507   }
  508   if ((dest = ((LinkGoTo *)action)->getDest())) {
  509     dest = dest->copy();
  510   } else if ((namedDest = ((LinkGoTo *)action)->getNamedDest())) {
  511     dest = findDest(namedDest);
  512   }
  513   pg = 0;
  514   if (dest) {
  515     if (dest->isPageRef()) {
  516       pageRef = dest->getPageRef();
  517       pg = findPage(pageRef.num, pageRef.gen);
  518     } else {
  519       pg = dest->getPageNum();
  520     }
  521     delete dest;
  522   }
  523   outlineItem->pageNum = pg;
  524   return pg;
  525 }
  526 #endif
  527 
  528 GBool PDFDoc::isLinearized() {
  529   Parser *parser;
  530   Object obj1, obj2, obj3, obj4, obj5;
  531   GBool lin;
  532 
  533   lin = gFalse;
  534   obj1.initNull();
  535   parser = new Parser(xref,
  536          new Lexer(xref,
  537            str->makeSubStream(str->getStart(), gFalse, 0, &obj1)),
  538          gTrue);
  539   parser->getObj(&obj1);
  540   parser->getObj(&obj2);
  541   parser->getObj(&obj3);
  542   parser->getObj(&obj4);
  543   if (obj1.isInt() && obj2.isInt() && obj3.isCmd("obj") &&
  544       obj4.isDict()) {
  545     obj4.dictLookup("Linearized", &obj5);
  546     if (obj5.isNum() && obj5.getNum() > 0) {
  547       lin = gTrue;
  548     }
  549     obj5.free();
  550   }
  551   obj4.free();
  552   obj3.free();
  553   obj2.free();
  554   obj1.free();
  555   delete parser;
  556   return lin;
  557 }
  558 
  559 GBool PDFDoc::saveAs(GString *name) {
  560   FILE *f;
  561   char buf[4096];
  562   int n;
  563 
  564   if (!(f = fopen(name->getCString(), "wb"))) {
  565     error(errIO, -1, "Couldn't open file '{0:t}'", name);
  566     return gFalse;
  567   }
  568   str->reset();
  569   while ((n = str->getBlock(buf, sizeof(buf))) > 0) {
  570     fwrite(buf, 1, n, f);
  571   }
  572   str->close();
  573   fclose(f);
  574   return gTrue;
  575 }
  576 
  577 GBool PDFDoc::saveEmbeddedFile(int idx, const char *path) {
  578   FILE *f;
  579   GBool ret;
  580 
  581   if (!(f = fopen(path, "wb"))) {
  582     return gFalse;
  583   }
  584   ret = saveEmbeddedFile2(idx, f);
  585   fclose(f);
  586   return ret;
  587 }
  588 
  589 GBool PDFDoc::saveEmbeddedFileU(int idx, const char *path) {
  590   FILE *f;
  591   GBool ret;
  592 
  593   if (!(f = openFile(path, "wb"))) {
  594     return gFalse;
  595   }
  596   ret = saveEmbeddedFile2(idx, f);
  597   fclose(f);
  598   return ret;
  599 }
  600 
  601 #ifdef _WIN32
  602 GBool PDFDoc::saveEmbeddedFile(int idx, const wchar_t *path, int pathLen) {
  603   FILE *f;
  604   OSVERSIONINFO version;
  605   wchar_t path2w[winMaxLongPath + 1];
  606   char path2c[MAX_PATH + 1];
  607   int i;
  608   GBool ret;
  609 
  610   // NB: _wfopen is only available in NT
  611   version.dwOSVersionInfoSize = sizeof(version);
  612   GetVersionEx(&version);
  613   if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
  614     for (i = 0; i < pathLen && i < winMaxLongPath; ++i) {
  615       path2w[i] = path[i];
  616     }
  617     path2w[i] = 0;
  618     f = _wfopen(path2w, L"wb");
  619   } else {
  620     for (i = 0; i < pathLen && i < MAX_PATH; ++i) {
  621       path2c[i] = (char)path[i];
  622     }
  623     path2c[i] = 0;
  624     f = fopen(path2c, "wb");
  625   }
  626   if (!f) {
  627     return gFalse;
  628   }
  629   ret = saveEmbeddedFile2(idx, f);
  630   fclose(f);
  631   return ret;
  632 }
  633 #endif
  634 
  635 GBool PDFDoc::saveEmbeddedFile2(int idx, FILE *f) {
  636   Object strObj;
  637   char buf[4096];
  638   int n;
  639 
  640   if (!catalog->getEmbeddedFileStreamObj(idx, &strObj)) {
  641     return gFalse;
  642   }
  643   strObj.streamReset();
  644   while ((n = strObj.streamGetBlock(buf, sizeof(buf))) > 0) {
  645     fwrite(buf, 1, n, f);
  646   }
  647   strObj.streamClose();
  648   strObj.free();
  649   return gTrue;
  650 }
  651 
  652 char *PDFDoc::getEmbeddedFileMem(int idx, int *size) {
  653   Object strObj;
  654   char *buf;
  655   int bufSize, sizeInc, n;
  656 
  657   if (!catalog->getEmbeddedFileStreamObj(idx, &strObj)) {
  658     return NULL;
  659   }
  660   strObj.streamReset();
  661   bufSize = 0;
  662   buf = NULL;
  663   do {
  664     sizeInc = bufSize ? bufSize : 1024;
  665     if (bufSize > INT_MAX - sizeInc) {
  666       error(errIO, -1, "embedded file is too large");
  667       *size = 0;
  668       return NULL;
  669     }
  670     buf = (char *)grealloc(buf, bufSize + sizeInc);
  671     n = strObj.streamGetBlock(buf + bufSize, sizeInc);
  672     bufSize += n;
  673   } while (n == sizeInc);
  674   strObj.streamClose();
  675   strObj.free();
  676   *size = bufSize;
  677   return buf;
  678 }
  679