"Fossies" - the Fresh Open Source Software Archive 
Member "xpdf-4.04/xpdf/PDFDoc.cc" (18 Apr 2022, 16078 Bytes) of package /linux/misc/xpdf-4.04.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
1 //========================================================================
2 //
3 // PDFDoc.cc
4 //
5 // Copyright 1996-2003 Glyph & Cog, LLC
6 //
7 //========================================================================
8
9 #include <aconf.h>
10
11 #ifdef USE_GCC_PRAGMAS
12 #pragma implementation
13 #endif
14
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <stddef.h>
18 #include <string.h>
19 #ifdef _WIN32
20 # include <windows.h>
21 #endif
22 #include "gmempp.h"
23 #include "GString.h"
24 #include "gfile.h"
25 #include "config.h"
26 #include "GlobalParams.h"
27 #include "Page.h"
28 #include "Catalog.h"
29 #include "Stream.h"
30 #include "XRef.h"
31 #include "Link.h"
32 #include "OutputDev.h"
33 #include "Error.h"
34 #include "ErrorCodes.h"
35 #include "Lexer.h"
36 #include "Parser.h"
37 #include "SecurityHandler.h"
38 #include "UTF8.h"
39 #ifndef DISABLE_OUTLINE
40 #include "Outline.h"
41 #endif
42 #include "OptionalContent.h"
43 #include "PDFDoc.h"
44
45 //------------------------------------------------------------------------
46
47 #define headerSearchSize 1024 // read this many bytes at beginning of
48 // file to look for '%PDF'
49
50 // Avoid sharing files with child processes on Windows, where sharing
51 // can cause problems.
52 #ifdef _WIN32
53 # define fopenReadMode "rbN"
54 # define wfopenReadMode L"rbN"
55 #else
56 # define fopenReadMode "rb"
57 #endif
58
59 //------------------------------------------------------------------------
60 // PDFDoc
61 //------------------------------------------------------------------------
62
63 PDFDoc::PDFDoc(GString *fileNameA, GString *ownerPassword,
64 GString *userPassword, PDFCore *coreA) {
65 Object obj;
66 GString *fileName1, *fileName2;
67 #ifdef _WIN32
68 int n, i;
69 #endif
70
71 init(coreA);
72
73 fileName = fileNameA;
74 #ifdef _WIN32
75 n = fileName->getLength();
76 fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
77 for (i = 0; i < n; ++i) {
78 fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
79 }
80 fileNameU[n] = L'\0';
81 #endif
82
83 fileName1 = fileName;
84
85 // try to open file
86 fileName2 = NULL;
87 #ifdef VMS
88 if (!(file = fopen(fileName1->getCString(), fopenReadMode, "ctx=stm"))) {
89 error(errIO, -1, "Couldn't open file '{0:t}'", fileName1);
90 errCode = errOpenFile;
91 return;
92 }
93 #else
94 if (!(file = fopen(fileName1->getCString(), fopenReadMode))) {
95 fileName2 = fileName->copy();
96 fileName2->lowerCase();
97 if (!(file = fopen(fileName2->getCString(), fopenReadMode))) {
98 fileName2->upperCase();
99 if (!(file = fopen(fileName2->getCString(), fopenReadMode))) {
100 error(errIO, -1, "Couldn't open file '{0:t}'", fileName);
101 delete fileName2;
102 errCode = errOpenFile;
103 return;
104 }
105 }
106 delete fileName2;
107 }
108 #endif
109
110 // create stream
111 obj.initNull();
112 str = new FileStream(file, 0, gFalse, 0, &obj);
113
114 ok = setup(ownerPassword, userPassword);
115 }
116
117 #ifdef _WIN32
118 PDFDoc::PDFDoc(wchar_t *fileNameA, int fileNameLen, GString *ownerPassword,
119 GString *userPassword, PDFCore *coreA) {
120 OSVERSIONINFO version;
121 Object obj;
122 int i;
123
124 init(coreA);
125
126 // handle a Windows shortcut
127 wchar_t wPath[winMaxLongPath + 1];
128 int n = fileNameLen < winMaxLongPath ? fileNameLen : winMaxLongPath;
129 memcpy(wPath, fileNameA, n * sizeof(wchar_t));
130 wPath[n] = L'\0';
131 readWindowsShortcut(wPath, winMaxLongPath + 1);
132 int wPathLen = (int)wcslen(wPath);
133
134 // save both Unicode and 8-bit copies of the file name
135 fileName = new GString();
136 fileNameU = (wchar_t *)gmallocn(wPathLen + 1, sizeof(wchar_t));
137 memcpy(fileNameU, wPath, (wPathLen + 1) * sizeof(wchar_t));
138 for (i = 0; i < wPathLen; ++i) {
139 fileName->append((char)fileNameA[i]);
140 }
141
142 // try to open file
143 // NB: _wfopen is only available in NT
144 version.dwOSVersionInfoSize = sizeof(version);
145 GetVersionEx(&version);
146 if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
147 file = _wfopen(fileNameU, wfopenReadMode);
148 } else {
149 file = fopen(fileName->getCString(), fopenReadMode);
150 }
151 if (!file) {
152 error(errIO, -1, "Couldn't open file '{0:t}'", fileName);
153 errCode = errOpenFile;
154 return;
155 }
156
157 // create stream
158 obj.initNull();
159 str = new FileStream(file, 0, gFalse, 0, &obj);
160
161 ok = setup(ownerPassword, userPassword);
162 }
163 #endif
164
165 PDFDoc::PDFDoc(char *fileNameA, GString *ownerPassword,
166 GString *userPassword, PDFCore *coreA) {
167 #ifdef _WIN32
168 OSVERSIONINFO version;
169 #endif
170 Object obj;
171 #ifdef _WIN32
172 Unicode u;
173 int i, j;
174 #endif
175
176 init(coreA);
177
178 fileName = new GString(fileNameA);
179
180 #if defined(_WIN32)
181 wchar_t wPath[winMaxLongPath + 1];
182 i = 0;
183 j = 0;
184 while (j < winMaxLongPath && getUTF8(fileName, &i, &u)) {
185 wPath[j++] = (wchar_t)u;
186 }
187 wPath[j] = L'\0';
188 readWindowsShortcut(wPath, winMaxLongPath + 1);
189 int wPathLen = (int)wcslen(wPath);
190
191 fileNameU = (wchar_t *)gmallocn(wPathLen + 1, sizeof(wchar_t));
192 memcpy(fileNameU, wPath, (wPathLen + 1) * sizeof(wchar_t));
193
194 // NB: _wfopen is only available in NT
195 version.dwOSVersionInfoSize = sizeof(version);
196 GetVersionEx(&version);
197 if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
198 file = _wfopen(fileNameU, wfopenReadMode);
199 } else {
200 file = fopen(fileName->getCString(), fopenReadMode);
201 }
202 #elif defined(VMS)
203 file = fopen(fileName->getCString(), fopenReadMode, "ctx=stm");
204 #else
205 file = fopen(fileName->getCString(), fopenReadMode);
206 #endif
207
208 if (!file) {
209 error(errIO, -1, "Couldn't open file '{0:t}'", fileName);
210 errCode = errOpenFile;
211 return;
212 }
213
214 // create stream
215 obj.initNull();
216 str = new FileStream(file, 0, gFalse, 0, &obj);
217
218 ok = setup(ownerPassword, userPassword);
219 }
220
221 PDFDoc::PDFDoc(BaseStream *strA, GString *ownerPassword,
222 GString *userPassword, PDFCore *coreA) {
223 #ifdef _WIN32
224 int n, i;
225 #endif
226
227 init(coreA);
228
229 if (strA->getFileName()) {
230 fileName = strA->getFileName()->copy();
231 #ifdef _WIN32
232 n = fileName->getLength();
233 fileNameU = (wchar_t *)gmallocn(n + 1, sizeof(wchar_t));
234 for (i = 0; i < n; ++i) {
235 fileNameU[i] = (wchar_t)(fileName->getChar(i) & 0xff);
236 }
237 fileNameU[n] = L'\0';
238 #endif
239 } else {
240 fileName = NULL;
241 #ifdef _WIN32
242 fileNameU = NULL;
243 #endif
244 }
245 str = strA;
246 ok = setup(ownerPassword, userPassword);
247 }
248
249 void PDFDoc::init(PDFCore *coreA) {
250 ok = gFalse;
251 errCode = errNone;
252 core = coreA;
253 file = NULL;
254 str = NULL;
255 xref = NULL;
256 catalog = NULL;
257 #ifndef DISABLE_OUTLINE
258 outline = NULL;
259 #endif
260 optContent = NULL;
261 }
262
263 GBool PDFDoc::setup(GString *ownerPassword, GString *userPassword) {
264
265 str->reset();
266
267 // check header
268 checkHeader();
269
270 // read the xref and catalog
271 if (!PDFDoc::setup2(ownerPassword, userPassword, gFalse)) {
272 if (errCode == errDamaged || errCode == errBadCatalog) {
273 // try repairing the xref table
274 error(errSyntaxWarning, -1,
275 "PDF file is damaged - attempting to reconstruct xref table...");
276 if (!PDFDoc::setup2(ownerPassword, userPassword, gTrue)) {
277 return gFalse;
278 }
279 } else {
280 return gFalse;
281 }
282 }
283
284 #ifndef DISABLE_OUTLINE
285 // read outline
286 outline = new Outline(catalog->getOutline(), xref);
287 #endif
288
289 // read the optional content info
290 optContent = new OptionalContent(this);
291
292
293 // done
294 return gTrue;
295 }
296
297 GBool PDFDoc::setup2(GString *ownerPassword, GString *userPassword,
298 GBool repairXRef) {
299 // read xref table
300 xref = new XRef(str, repairXRef);
301 if (!xref->isOk()) {
302 error(errSyntaxError, -1, "Couldn't read xref table");
303 errCode = xref->getErrorCode();
304 delete xref;
305 xref = NULL;
306 return gFalse;
307 }
308
309 // check for encryption
310 if (!checkEncryption(ownerPassword, userPassword)) {
311 errCode = errEncrypted;
312 delete xref;
313 xref = NULL;
314 return gFalse;
315 }
316
317 // read catalog
318 catalog = new Catalog(this);
319 if (!catalog->isOk()) {
320 error(errSyntaxError, -1, "Couldn't read page catalog");
321 errCode = errBadCatalog;
322 delete catalog;
323 catalog = NULL;
324 delete xref;
325 xref = NULL;
326 return gFalse;
327 }
328
329 return gTrue;
330 }
331
332 PDFDoc::~PDFDoc() {
333 if (optContent) {
334 delete optContent;
335 }
336 #ifndef DISABLE_OUTLINE
337 if (outline) {
338 delete outline;
339 }
340 #endif
341 if (catalog) {
342 delete catalog;
343 }
344 if (xref) {
345 delete xref;
346 }
347 if (str) {
348 delete str;
349 }
350 if (file) {
351 fclose(file);
352 }
353 if (fileName) {
354 delete fileName;
355 }
356 #ifdef _WIN32
357 if (fileNameU) {
358 gfree(fileNameU);
359 }
360 #endif
361 }
362
363 // Check for a PDF header on this stream. Skip past some garbage
364 // if necessary.
365 void PDFDoc::checkHeader() {
366 char hdrBuf[headerSearchSize+1];
367 char *p;
368 int i;
369
370 pdfVersion = 0;
371 memset(hdrBuf, 0, headerSearchSize + 1);
372 str->getBlock(hdrBuf, headerSearchSize);
373 for (i = 0; i < headerSearchSize - 5; ++i) {
374 if (!strncmp(&hdrBuf[i], "%PDF-", 5)) {
375 break;
376 }
377 }
378 if (i >= headerSearchSize - 5) {
379 error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
380 return;
381 }
382 str->moveStart(i);
383 if (!(p = strtok(&hdrBuf[i+5], " \t\n\r"))) {
384 error(errSyntaxWarning, -1, "May not be a PDF file (continuing anyway)");
385 return;
386 }
387 pdfVersion = atof(p);
388 if (!(hdrBuf[i+5] >= '0' && hdrBuf[i+5] <= '9') ||
389 pdfVersion > supportedPDFVersionNum + 0.0001) {
390 error(errSyntaxWarning, -1,
391 "PDF version {0:s} -- xpdf supports version {1:s} (continuing anyway)",
392 p, supportedPDFVersionStr);
393 }
394 }
395
396 GBool PDFDoc::checkEncryption(GString *ownerPassword, GString *userPassword) {
397 Object encrypt;
398 GBool encrypted;
399 SecurityHandler *secHdlr;
400 GBool ret;
401
402 xref->getTrailerDict()->dictLookup("Encrypt", &encrypt);
403 if ((encrypted = encrypt.isDict())) {
404 if ((secHdlr = SecurityHandler::make(this, &encrypt))) {
405 if (secHdlr->isUnencrypted()) {
406 // no encryption
407 ret = gTrue;
408 } else if (secHdlr->checkEncryption(ownerPassword, userPassword)) {
409 // authorization succeeded
410 xref->setEncryption(secHdlr->getPermissionFlags(),
411 secHdlr->getOwnerPasswordOk(),
412 secHdlr->getFileKey(),
413 secHdlr->getFileKeyLength(),
414 secHdlr->getEncVersion(),
415 secHdlr->getEncAlgorithm());
416 ret = gTrue;
417 } else {
418 // authorization failed
419 ret = gFalse;
420 }
421 delete secHdlr;
422 } else {
423 // couldn't find the matching security handler
424 ret = gFalse;
425 }
426 } else {
427 // document is not encrypted
428 ret = gTrue;
429 }
430 encrypt.free();
431 return ret;
432 }
433
434 void PDFDoc::displayPage(OutputDev *out, int page,
435 double hDPI, double vDPI, int rotate,
436 GBool useMediaBox, GBool crop, GBool printing,
437 GBool (*abortCheckCbk)(void *data),
438 void *abortCheckCbkData) {
439 if (globalParams->getPrintCommands()) {
440 printf("***** page %d *****\n", page);
441 }
442 catalog->getPage(page)->display(out, hDPI, vDPI,
443 rotate, useMediaBox, crop, printing,
444 abortCheckCbk, abortCheckCbkData);
445 }
446
447 void PDFDoc::displayPages(OutputDev *out, int firstPage, int lastPage,
448 double hDPI, double vDPI, int rotate,
449 GBool useMediaBox, GBool crop, GBool printing,
450 GBool (*abortCheckCbk)(void *data),
451 void *abortCheckCbkData) {
452 int page;
453
454 for (page = firstPage; page <= lastPage; ++page) {
455 if (globalParams->getPrintStatusInfo()) {
456 fflush(stderr);
457 printf("[processing page %d]\n", page);
458 fflush(stdout);
459 }
460 displayPage(out, page, hDPI, vDPI, rotate, useMediaBox, crop, printing,
461 abortCheckCbk, abortCheckCbkData);
462 catalog->doneWithPage(page);
463 }
464 }
465
466 void PDFDoc::displayPageSlice(OutputDev *out, int page,
467 double hDPI, double vDPI, int rotate,
468 GBool useMediaBox, GBool crop, GBool printing,
469 int sliceX, int sliceY, int sliceW, int sliceH,
470 GBool (*abortCheckCbk)(void *data),
471 void *abortCheckCbkData) {
472 catalog->getPage(page)->displaySlice(out, hDPI, vDPI,
473 rotate, useMediaBox, crop,
474 sliceX, sliceY, sliceW, sliceH,
475 printing,
476 abortCheckCbk, abortCheckCbkData);
477 }
478
479 Links *PDFDoc::getLinks(int page) {
480 return catalog->getPage(page)->getLinks();
481 }
482
483 void PDFDoc::processLinks(OutputDev *out, int page) {
484 catalog->getPage(page)->processLinks(out);
485 }
486
487 #ifndef DISABLE_OUTLINE
488 int PDFDoc::getOutlineTargetPage(OutlineItem *outlineItem) {
489 LinkAction *action;
490 LinkActionKind kind;
491 LinkDest *dest;
492 GString *namedDest;
493 Ref pageRef;
494 int pg;
495
496 if (outlineItem->pageNum >= 0) {
497 return outlineItem->pageNum;
498 }
499 if (!(action = outlineItem->getAction())) {
500 outlineItem->pageNum = 0;
501 return 0;
502 }
503 kind = action->getKind();
504 if (kind != actionGoTo) {
505 outlineItem->pageNum = 0;
506 return 0;
507 }
508 if ((dest = ((LinkGoTo *)action)->getDest())) {
509 dest = dest->copy();
510 } else if ((namedDest = ((LinkGoTo *)action)->getNamedDest())) {
511 dest = findDest(namedDest);
512 }
513 pg = 0;
514 if (dest) {
515 if (dest->isPageRef()) {
516 pageRef = dest->getPageRef();
517 pg = findPage(pageRef.num, pageRef.gen);
518 } else {
519 pg = dest->getPageNum();
520 }
521 delete dest;
522 }
523 outlineItem->pageNum = pg;
524 return pg;
525 }
526 #endif
527
528 GBool PDFDoc::isLinearized() {
529 Parser *parser;
530 Object obj1, obj2, obj3, obj4, obj5;
531 GBool lin;
532
533 lin = gFalse;
534 obj1.initNull();
535 parser = new Parser(xref,
536 new Lexer(xref,
537 str->makeSubStream(str->getStart(), gFalse, 0, &obj1)),
538 gTrue);
539 parser->getObj(&obj1);
540 parser->getObj(&obj2);
541 parser->getObj(&obj3);
542 parser->getObj(&obj4);
543 if (obj1.isInt() && obj2.isInt() && obj3.isCmd("obj") &&
544 obj4.isDict()) {
545 obj4.dictLookup("Linearized", &obj5);
546 if (obj5.isNum() && obj5.getNum() > 0) {
547 lin = gTrue;
548 }
549 obj5.free();
550 }
551 obj4.free();
552 obj3.free();
553 obj2.free();
554 obj1.free();
555 delete parser;
556 return lin;
557 }
558
559 GBool PDFDoc::saveAs(GString *name) {
560 FILE *f;
561 char buf[4096];
562 int n;
563
564 if (!(f = fopen(name->getCString(), "wb"))) {
565 error(errIO, -1, "Couldn't open file '{0:t}'", name);
566 return gFalse;
567 }
568 str->reset();
569 while ((n = str->getBlock(buf, sizeof(buf))) > 0) {
570 fwrite(buf, 1, n, f);
571 }
572 str->close();
573 fclose(f);
574 return gTrue;
575 }
576
577 GBool PDFDoc::saveEmbeddedFile(int idx, const char *path) {
578 FILE *f;
579 GBool ret;
580
581 if (!(f = fopen(path, "wb"))) {
582 return gFalse;
583 }
584 ret = saveEmbeddedFile2(idx, f);
585 fclose(f);
586 return ret;
587 }
588
589 GBool PDFDoc::saveEmbeddedFileU(int idx, const char *path) {
590 FILE *f;
591 GBool ret;
592
593 if (!(f = openFile(path, "wb"))) {
594 return gFalse;
595 }
596 ret = saveEmbeddedFile2(idx, f);
597 fclose(f);
598 return ret;
599 }
600
601 #ifdef _WIN32
602 GBool PDFDoc::saveEmbeddedFile(int idx, const wchar_t *path, int pathLen) {
603 FILE *f;
604 OSVERSIONINFO version;
605 wchar_t path2w[winMaxLongPath + 1];
606 char path2c[MAX_PATH + 1];
607 int i;
608 GBool ret;
609
610 // NB: _wfopen is only available in NT
611 version.dwOSVersionInfoSize = sizeof(version);
612 GetVersionEx(&version);
613 if (version.dwPlatformId == VER_PLATFORM_WIN32_NT) {
614 for (i = 0; i < pathLen && i < winMaxLongPath; ++i) {
615 path2w[i] = path[i];
616 }
617 path2w[i] = 0;
618 f = _wfopen(path2w, L"wb");
619 } else {
620 for (i = 0; i < pathLen && i < MAX_PATH; ++i) {
621 path2c[i] = (char)path[i];
622 }
623 path2c[i] = 0;
624 f = fopen(path2c, "wb");
625 }
626 if (!f) {
627 return gFalse;
628 }
629 ret = saveEmbeddedFile2(idx, f);
630 fclose(f);
631 return ret;
632 }
633 #endif
634
635 GBool PDFDoc::saveEmbeddedFile2(int idx, FILE *f) {
636 Object strObj;
637 char buf[4096];
638 int n;
639
640 if (!catalog->getEmbeddedFileStreamObj(idx, &strObj)) {
641 return gFalse;
642 }
643 strObj.streamReset();
644 while ((n = strObj.streamGetBlock(buf, sizeof(buf))) > 0) {
645 fwrite(buf, 1, n, f);
646 }
647 strObj.streamClose();
648 strObj.free();
649 return gTrue;
650 }
651
652 char *PDFDoc::getEmbeddedFileMem(int idx, int *size) {
653 Object strObj;
654 char *buf;
655 int bufSize, sizeInc, n;
656
657 if (!catalog->getEmbeddedFileStreamObj(idx, &strObj)) {
658 return NULL;
659 }
660 strObj.streamReset();
661 bufSize = 0;
662 buf = NULL;
663 do {
664 sizeInc = bufSize ? bufSize : 1024;
665 if (bufSize > INT_MAX - sizeInc) {
666 error(errIO, -1, "embedded file is too large");
667 *size = 0;
668 return NULL;
669 }
670 buf = (char *)grealloc(buf, bufSize + sizeInc);
671 n = strObj.streamGetBlock(buf + bufSize, sizeInc);
672 bufSize += n;
673 } while (n == sizeInc);
674 strObj.streamClose();
675 strObj.free();
676 *size = bufSize;
677 return buf;
678 }
679