"Fossies" - the Fresh Open Source Software Archive 
Member "xpdf-4.04/xpdf/PDFDoc.cc" (18 Apr 2022, 16078 Bytes) of package /linux/misc/xpdf-4.04.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "PDFDoc.cc" see the
Fossies "Dox" file reference documentation and the latest
Fossies "Diffs" side-by-side code changes report:
4.03_vs_4.04.
1 //========================================================================
2 //
3 // PDFDoc.cc
4 //
5 // Copyright 1996-2003 Glyph & Cog, LLC
6 //
7 //========================================================================
8
9 #include <aconf.h>
10
11 #ifdef USE_GCC_PRAGMAS
12 #pragma implementation
13 #endif
14
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <stddef.h>
18 #include <string.h>
19 #ifdef _WIN32
20 # include <windows.h>
21 #endif
22 #include "gmempp.h"
23 #include "GString.h"
24 #include "gfile.h"
25 #include "config.h"
26 #include "GlobalParams.h"
27 #include "Page.h"
28 #include "Catalog.h"
29 #include "Stream.h"
30 #include "XRef.h"
31 #include "Link.h"
32 #include "OutputDev.h"
33 #include "Error.h"
34 #include "ErrorCodes.h"
35 #include "Lexer.h"
36 #include "Parser.h"
37 #include "SecurityHandler.h"
38 #include "UTF8.h"
39 #ifndef DISABLE_OUTLINE
40 #include "Outline.h"
41 #endif
42 #include "OptionalContent.h"
43 #include "PDFDoc.h"
44
45 //------------------------------------------------------------------------
46
47 #define headerSearchSize 1024 // read this many bytes at beginning of
48 // file to look for '%PDF'
49
50 // Avoid sharing files with child processes on Windows, where sharing
51 // can cause problems.
52 #ifdef _WIN32
53 # define fopenReadMode "rbN"
54 # define wfopenReadMode L"rbN"
55 #else
56 # define fopenReadMode "rb"
57 #endif
58
59 //------------------------------------------------------------------------
60 // PDFDoc
61 //------------------------------------------------------------------------
62
63 PDFDoc::PDFDoc(GString *fileNameA, GString *ownerPassword,
64 GString *userPassword, PDFCore *coreA) {
65 Object obj;
66 GString *fileName1, *fileName2;
67 #ifdef _WIN32
68 int n, i;
69 #endif
70
71 init(coreA);
72
73 fileName = fileNameA;
74 #ifdef _WIN32
75 n = fileName->getLength();
76 fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
77 for (i = 0; i < n; ++i) {
78 fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
79 }
80 fileNameU[n] = L'\0';
81 #endif
82
83 fileName1 = fileName;
84
85 // try to open file
86 fileName2 = NULL;
87 #ifdef VMS
88 if (!(file = fopen(fileName1->getCString(), fopenReadMode, "ctx=stm"))) {
89 error(errIO, -1, "Couldn't open file '{0:t}'", fileName1);
90 errCode = errOpenFile;
91 return;
92 }
93 #else
94 if (!(file = fopen(fileName1->getCString(), fopenReadMode))) {
95 fileName2 = fileName->copy();
96 fileName2->lowerCase();
97 if (!(file = fopen(fileName2->getCString(), fopenReadMode))) {
98 fileName2->upperCase();
99 if (!(file = fopen(fileName2->getCString(), fopenReadMode))) {
100 error(errIO, -1, "Couldn't open file '{0:t}'", fileName);
101 delete fileName2;
102 errCode = errOpenFile;
103 return;
104 }
105 }
106 delete fileName2;
107 }
108 #endif
109
110 // create stream
111 obj.initNull();
112 str = new FileStream(file, 0, gFalse, 0, &obj);
113
114 ok = setup(ownerPassword, userPassword);
115 }
116
117 #ifdef _WIN32
118 PDFDoc::PDFDoc(wchar_t *fileNameA, int fileNameLen, GString *ownerPassword,
119 GString *userPassword, PDFCore *coreA) {
120 OSVERSIONINFO version;
121 Object obj;
122 int i;
123
124 init(coreA);
125
126 // handle a Windows shortcut
127 wchar_t wPath[winMaxLongPath + 1];
128 int n = fileNameLen < winMaxLongPath ? fileNameLen : winMaxLongPath;
129 memcpy(wPath, fileNameA, n * sizeof(wchar_t));
130 wPath[n] = L'\0';
131 readWindowsShortcut(wPath, winMaxLongPath + 1);
132 int wPathLen = (int)wcslen(wPath);
133
134 // save both Unicode and 8-bit copies of the file name
135 fileName = new GString();
136 fileNameU = (wchar_t *)gmallocn(wPathLen + 1, sizeof(wchar_t));
137 memcpy(fileNameU, wPath, (wPathLen + 1) * sizeof(wchar_t));
138 for (i = 0; i < wPathLen; ++i) {
139 fileName->append((char)fileNameA[i]);
140 }
141
142 // try to open file
143 // NB: _wfopen is only available in NT
144 version.dwOSVersionInfoSize = sizeof(version);
145 GetVersionEx(&version);
146 if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
147 file = _wfopen(fileNameU, wfopenReadMode);
148 } else {
149 file = fopen(fileName->getCString(), fopenReadMode);
150 }
151 if (!file) {
152 error(errIO, -1, "Couldn't open file '{0:t}'", fileName);
153 errCode = errOpenFile;
154 return;
155 }
156
157 // create stream
158 obj.initNull();
159 str = new FileStream(file, 0, gFalse, 0, &obj);
160
161 ok = setup(ownerPassword, userPassword);
162 }
163 #endif
164
165 PDFDoc::PDFDoc(char *fileNameA, GString *ownerPassword,
166 GString *userPassword, PDFCore *coreA) {
167 #ifdef _WIN32
168 OSVERSIONINFO version;
169 #endif
170 Object obj;
171 #ifdef _WIN32
172 Unicode u;
173 int i, j;
174 #endif
175
176 init(coreA);
177
178 fileName = new GString(fileNameA);
179
180 #if defined(_WIN32)
181 wchar_t wPath[winMaxLongPath + 1];
182 i = 0;
183 j = 0;
184 while (j < winMaxLongPath && getUTF8(fileName, &i, &u)) {
185 wPath[j++] = (wchar_t)u;
186 }
187 wPath[j] = L'\0';
188 readWindowsShortcut(wPath, winMaxLongPath + 1);
189 int wPathLen = (int)wcslen(wPath);
190
191 fileNameU = (wchar_t *)gmallocn(wPathLen + 1, sizeof(wchar_t));
192 memcpy(fileNameU, wPath, (wPathLen + 1) * sizeof(wchar_t));
193
194 // NB: _wfopen is only available in NT
195 version.dwOSVersionInfoSize = sizeof(version);
196 GetVersionEx(&version);
197 if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
198 file = _wfopen(fileNameU, wfopenReadMode);
199 } else {
200 file = fopen(fileName->getCString(), fopenReadMode);
201 }
202 #elif defined(VMS)
203 file = fopen(fileName->getCString(), fopenReadMode, "ctx=stm");
204 #else
205 file = fopen(fileName->getCString(), fopenReadMode);
206 #endif
207
208 if (!file) {
209 error(errIO, -1, "Couldn't open file '{0:t}'", fileName);
210 errCode = errOpenFile;
211 return;
212 }
213
214 // create stream
215 obj.initNull();
216 str = new FileStream(file, 0, gFalse, 0, &obj);
217
218 ok = setup(ownerPassword, userPassword);
219 }
220
221 PDFDoc::PDFDoc(BaseStream *strA, GString *ownerPassword,
222 GString *userPassword, PDFCore *coreA) {
223 #ifdef _WIN32
224 int n, i;
225 #endif
226
227 init(coreA);
228
229 if (strA->getFileName()) {
230 fileName = strA->getFileName()->copy();
231 #ifdef _WIN32
232 n = fileName->getLength();
233 fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
234 for (i = 0; i < n; ++i) {
235 fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
236 }
237 fileNameU[n] = L'\0';
238 #endif
239 } else {
240 fileName = NULL;
241 #ifdef _WIN32
242 fileNameU = NULL;
243 #endif
244 }
245 str = strA;
246 ok = setup(ownerPassword, userPassword);
247 }
248
249 void PDFDoc::init(PDFCore *coreA) {
250 ok = gFalse;
251 errCode = errNone;
252 core = coreA;
253 file = NULL;
254 str = NULL;
255 xref = NULL;
256 catalog = NULL;
257 #ifndef DISABLE_OUTLINE
258 outline = NULL;
259 #endif
260 optContent = NULL;
261 }
262
263 GBool PDFDoc::setup(GString *ownerPassword, GString *userPassword) {
264
265 str->reset();
266
267 // check header
268 checkHeader();
269
270 // read the xref and catalog
271 if (!PDFDoc::setup2(ownerPassword, userPassword, gFalse)) {
272 if (errCode == errDamaged || errCode == errBadCatalog) {
273 // try repairing the xref table
274 error(errSyntaxWarning, -1,
275 "PDF file is damaged - attempting to reconstruct xref table...");
276 if (!PDFDoc::setup2(ownerPassword, userPassword, gTrue)) {
277 return gFalse;
278 }
279 } else {
280 return gFalse;
281 }
282 }
283
284 #ifndef DISABLE_OUTLINE
285 // read outline
286 outline = new Outline(catalog->getOutline(), xref);
287 #endif
288
289 // read the optional content info
290 optContent = new OptionalContent(this);
291
292
293 // done
294 return gTrue;
295 }
296
297 GBool PDFDoc::setup2(GString *ownerPassword, GString *userPassword,
298 GBool repairXRef) {
299 // read xref table
300 xref = new XRef(str, repairXRef);
301 if (!xref->isOk()) {
302 error(errSyntaxError, -1, "Couldn't read xref table");
303 errCode = xref->getErrorCode();
304 delete xref;
305 xref = NULL;
306 return gFalse;
307 }
308
309 // check for encryption
310 if (!checkEncryption(ownerPassword, userPassword)) {
311 errCode = errEncrypted;
312 delete xref;
313 xref = NULL;
314 return gFalse;
315 }
316
317 // read catalog
318 catalog = new Catalog(this);
319 if (!catalog->isOk()) {
320 error(errSyntaxError, -1, "Couldn't read page catalog");
321 errCode = errBadCatalog;
322 delete catalog;
323 catalog = NULL;
324 delete xref;
325 xref = NULL;
326 return gFalse;
327 }
328
329 return gTrue;
330 }
331
332 PDFDoc::~PDFDoc() {
333 if (optContent) {
334 delete optContent;
335 }
336 #ifndef DISABLE_OUTLINE
337 if (outline) {
338 delete outline;
339 }
340 #endif
341 if (catalog) {
342 delete catalog;
343 }
344 if (xref) {
345 delete xref;
346 }
347 if (str) {
348 delete str;
349 }
350 if (file) {
351 fclose(file);
352 }
353 if (fileName) {
354 delete fileName;
355 }
356 #ifdef _WIN32
357 if (fileNameU) {
358 gfree(fileNameU);
359 }
360 #endif
361 }
362
363 // Check for a PDF header on this stream. Skip past some garbage
364 // if necessary.
365 void PDFDoc::checkHeader() {
366 char hdrBuf[headerSearchSize+1];
367 char *p;
368 int i;
369
370 pdfVersion = 0;
371 memset(hdrBuf, 0, headerSearchSize + 1);
372 str->getBlock(hdrBuf, headerSearchSize);
373 for (i = 0; i < headerSearchSize - 5; ++i) {
374 if (!strncmp(&hdrBuf[i], "%PDF-", 5)) {
375 break;
376 }
377 }
378 if (i >= headerSearchSize - 5) {
379 error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
380 return;
381 }
382 str->moveStart(i);
383 if (!(p = strtok(&hdrBuf[i+5], " \t\n\r"))) {
384 error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
385 return;
386 }
387 pdfVersion = atof(p);
388 if (!(hdrBuf[i+5] >= '0' && hdrBuf[i+5] <= '9') ||
389 pdfVersion > supportedPDFVersionNum + 0.0001) {
390 error(errSyntaxWarning, -1,
391 "PDF version {0:s} -- xpdf supports version {1:s} (continuing anyway)",
392 p, supportedPDFVersionStr);
393 }
394 }
395
396 GBool PDFDoc::checkEncryption(GString *ownerPassword, GString *userPassword) {
397 Object encrypt;
398 GBool encrypted;
399 SecurityHandler *secHdlr;
400 GBool ret;
401
402 xref->getTrailerDict()->dictLookup("Encrypt", &encrypt);
403 if ((encrypted = encrypt.isDict())) {
404 if ((secHdlr = SecurityHandler::make(this, &encrypt))) {
405 if (secHdlr->isUnencrypted()) {
406 // no encryption
407 ret = gTrue;
408 } else if (secHdlr->checkEncryption(ownerPassword, userPassword)) {
409 // authorization succeeded
410 xref->setEncryption(secHdlr->getPermissionFlags(),
411 secHdlr->getOwnerPasswordOk(),
412 secHdlr->getFileKey(),
413 secHdlr->getFileKeyLength(),
414 secHdlr->getEncVersion(),
415 secHdlr->getEncAlgorithm());
416 ret = gTrue;
417 } else {
418 // authorization failed
419 ret = gFalse;
420 }
421 delete secHdlr;
422 } else {
423 // couldn't find the matching security handler
424 ret = gFalse;
425 }
426 } else {
427 // document is not encrypted
428 ret = gTrue;
429 }
430 encrypt.free();
431 return ret;
432 }
433
434 void PDFDoc::displayPage(OutputDev *out, int page,
435 double hDPI, double vDPI, int rotate,
436 GBool useMediaBox, GBool crop, GBool printing,
437 GBool (*abortCheckCbk)(void *data),
438 void *abortCheckCbkData) {
439 if (globalParams->getPrintCommands()) {
440 printf("***** page %d *****\n", page);
441 }
442 catalog->getPage(page)->display(out, hDPI, vDPI,
443 rotate, useMediaBox, crop, printing,
444 abortCheckCbk, abortCheckCbkData);
445 }
446
447 void PDFDoc::displayPages(OutputDev *out, int firstPage, int lastPage,
448 double hDPI, double vDPI, int rotate,
449 GBool useMediaBox, GBool crop, GBool printing,
450 GBool (*abortCheckCbk)(void *data),
451 void *abortCheckCbkData) {
452 int page;
453
454 for (page = firstPage; page <= lastPage; ++page) {
455 if (globalParams->getPrintStatusInfo()) {
456 fflush(stderr);
457 printf("[processing page %d]\n", page);
458 fflush(stdout);
459 }
460 displayPage(out, page, hDPI, vDPI, rotate, useMediaBox, crop, printing,
461 abortCheckCbk, abortCheckCbkData);
462 catalog->doneWithPage(page);
463 }
464 }
465
466 void PDFDoc::displayPageSlice(OutputDev *out, int page,
467 double hDPI, double vDPI, int rotate,
468 GBool useMediaBox, GBool crop, GBool printing,
469 int sliceX, int sliceY, int sliceW, int sliceH,
470 GBool (*abortCheckCbk)(void *data),
471 void *abortCheckCbkData) {
472 catalog->getPage(page)->displaySlice(out, hDPI, vDPI,
473 rotate, useMediaBox, crop,
474 sliceX, sliceY, sliceW, sliceH,
475 printing,
476 abortCheckCbk, abortCheckCbkData);
477 }
478
479 Links *PDFDoc::getLinks(int page) {
480 return catalog->getPage(page)->getLinks();
481 }
482
483 void PDFDoc::processLinks(OutputDev *out, int page) {
484 catalog->getPage(page)->processLinks(out);
485 }
486
487 #ifndef DISABLE_OUTLINE
488 int PDFDoc::getOutlineTargetPage(OutlineItem *outlineItem) {
489 LinkAction *action;
490 LinkActionKind kind;
491 LinkDest *dest;
492 GString *namedDest;
493 Ref pageRef;
494 int pg;
495
496 if (outlineItem->pageNum >= 0) {
497 return outlineItem->pageNum;
498 }
499 if (!(action = outlineItem->getAction())) {
500 outlineItem->pageNum = 0;
501 return 0;
502 }
503 kind = action->getKind();
504 if (kind != actionGoTo) {
505 outlineItem->pageNum = 0;
506 return 0;
507 }
508 if ((dest = ((LinkGoTo *)action)->getDest())) {
509 dest = dest->copy();
510 } else if ((namedDest = ((LinkGoTo *)action)->getNamedDest())) {
511 dest = findDest(namedDest);
512 }
513 pg = 0;
514 if (dest) {
515 if (dest->isPageRef()) {
516 pageRef = dest->getPageRef();
517 pg = findPage(pageRef.num, pageRef.gen);
518 } else {
519 pg = dest->getPageNum();
520 }
521 delete dest;
522 }
523 outlineItem->pageNum = pg;
524 return pg;
525 }
526 #endif
527
528 GBool PDFDoc::isLinearized() {
529 Parser *parser;
530 Object obj1, obj2, obj3, obj4, obj5;
531 GBool lin;
532
533 lin = gFalse;
534 obj1.initNull();
535 parser = new Parser(xref,
536 new Lexer(xref,
537 str->makeSubStream(str->getStart(), gFalse, 0, &obj1)),
538 gTrue);
539 parser->getObj(&obj1);
540 parser->getObj(&obj2);
541 parser->getObj(&obj3);
542 parser->getObj(&obj4);
543 if (obj1.isInt() && obj2.isInt() && obj3.isCmd("obj") &&
544 obj4.isDict()) {
545 obj4.dictLookup("Linearized", &obj5);
546 if (obj5.isNum() && obj5.getNum() > 0) {
547 lin = gTrue;
548 }
549 obj5.free();
550 }
551 obj4.free();
552 obj3.free();
553 obj2.free();
554 obj1.free();
555 delete parser;
556 return lin;
557 }
558
559 GBool PDFDoc::saveAs(GString *name) {
560 FILE *f;
561 char buf[4096];
562 int n;
563
564 if (!(f = fopen(name->getCString(), "wb"))) {
565 error(errIO, -1, "Couldn't open file '{0:t}'", name);
566 return gFalse;
567 }
568 str->reset();
569 while ((n = str->getBlock(buf, sizeof(buf))) > 0) {
570 fwrite(buf, 1, n, f);
571 }
572 str->close();
573 fclose(f);
574 return gTrue;
575 }
576
577 GBool PDFDoc::saveEmbeddedFile(int idx, const char *path) {
578 FILE *f;
579 GBool ret;
580
581 if (!(f = fopen(path, "wb"))) {
582 return gFalse;
583 }
584 ret = saveEmbeddedFile2(idx, f);
585 fclose(f);
586 return ret;
587 }
588
589 GBool PDFDoc::saveEmbeddedFileU(int idx, const char *path) {
590 FILE *f;
591 GBool ret;
592
593 if (!(f = openFile(path, "wb"))) {
594 return gFalse;
595 }
596 ret = saveEmbeddedFile2(idx, f);
597 fclose(f);
598 return ret;
599 }
600
601 #ifdef _WIN32
602 GBool PDFDoc::saveEmbeddedFile(int idx, const wchar_t *path, int pathLen) {
603 FILE *f;
604 OSVERSIONINFO version;
605 wchar_t path2w[winMaxLongPath + 1];
606 char path2c[MAX_PATH + 1];
607 int i;
608 GBool ret;
609
610 // NB: _wfopen is only available in NT
611 version.dwOSVersionInfoSize = sizeof(version);
612 GetVersionEx(&version);
613 if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
614 for (i = 0; i < pathLen && i < winMaxLongPath; ++i) {
615 path2w[i] = path[i];
616 }
617 path2w[i] = 0;
618 f = _wfopen(path2w, L"wb");
619 } else {
620 for (i = 0; i < pathLen && i < MAX_PATH; ++i) {
621 path2c[i] = (char)path[i];
622 }
623 path2c[i] = 0;
624 f = fopen(path2c, "wb");
625 }
626 if (!f) {
627 return gFalse;
628 }
629 ret = saveEmbeddedFile2(idx, f);
630 fclose(f);
631 return ret;
632 }
633 #endif
634
635 GBool PDFDoc::saveEmbeddedFile2(int idx, FILE *f) {
636 Object strObj;
637 char buf[4096];
638 int n;
639
640 if (!catalog->getEmbeddedFileStreamObj(idx, &strObj)) {
641 return gFalse;
642 }
643 strObj.streamReset();
644 while ((n = strObj.streamGetBlock(buf, sizeof(buf))) > 0) {
645 fwrite(buf, 1, n, f);
646 }
647 strObj.streamClose();
648 strObj.free();
649 return gTrue;
650 }
651
652 char *PDFDoc::getEmbeddedFileMem(int idx, int *size) {
653 Object strObj;
654 char *buf;
655 int bufSize, sizeInc, n;
656
657 if (!catalog->getEmbeddedFileStreamObj(idx, &strObj)) {
658 return NULL;
659 }
660 strObj.streamReset();
661 bufSize = 0;
662 buf = NULL;
663 do {
664 sizeInc = bufSize ? bufSize : 1024;
665 if (bufSize > INT_MAX - sizeInc) {
666 error(errIO, -1, "embedded file is too large");
667 *size = 0;
668 return NULL;
669 }
670 buf = (char *)grealloc(buf, bufSize + sizeInc);
671 n = strObj.streamGetBlock(buf + bufSize, sizeInc);
672 bufSize += n;
673 } while (n == sizeInc);
674 strObj.streamClose();
675 strObj.free();
676 *size = bufSize;
677 return buf;
678 }
679