w32tex
About: TeX Live provides a comprehensive TeX system including all the major TeX-related programs, macro packages, and fonts that are free software. Windows sources.
  Fossies Dox: w32tex-src.tar.xz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

pdfobj.c
Go to the documentation of this file.
1 /* This is dvipdfmx, an eXtended version of dvipdfm by Mark A. Wicks.
2 
3  Copyright (C) 2002-2020 by Jin-Hwan Cho and Shunsaku Hirata,
4  the dvipdfmx project team.
5 
6  Copyright (C) 1998, 1999 by Mark A. Wicks <mwicks@kettering.edu>
7 
8  This program is free software; you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation; either version 2 of the License, or
11  (at your option) any later version.
12 
13  This program is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with this program; if not, write to the Free Software
20  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
21 */
22 
23 #ifdef HAVE_CONFIG_H
24 #include <config.h>
25 #endif
26 
27 #include <ctype.h>
28 #include <string.h>
29 /* floor and abs */
30 #include <math.h>
31 
32 #include "system.h"
33 #include "mem.h"
34 #include "error.h"
35 #include "mfileio.h"
36 #include "dpxconf.h"
37 #include "dpxutil.h"
38 
39 #include "pdflimits.h"
40 #include "pdfencrypt.h"
41 #include "pdfparse.h"
42 
43 #ifdef HAVE_ZLIB
44 #include <zlib.h>
45 #endif /* HAVE_ZLIB */
46 
47 #include "pdfobj.h"
48 #include "pdfdev.h"
49 
50 #define STREAM_ALLOC_SIZE 4096u
51 #define ARRAY_ALLOC_SIZE 256
52 #define IND_OBJECTS_ALLOC_SIZE 512
53 
54 #define OBJ_NO_OBJSTM (1 << 0)
55 /* Objects with this flag will not be put into an object stream.
56  For instance, all stream objects have this flag set. */
57 #define OBJ_NO_ENCRYPT (1 << 1)
58 /* Objects with this flag will not be encrypted.
59  This implies OBJ_NO_OBJSTM if encryption is turned on. */
60 
61 /* Any of these types can be represented as follows */
62 struct pdf_obj
63 {
64  int type;
65 
66  uint32_t label; /* Only used for indirect objects all other "label" to zero */
67  uint16_t generation; /* Only used if "label" is used */
68  int refcount; /* Number of links to this object */
69  int32_t flags;
70  void *data;
71 
72 #if defined(PDFOBJ_DEBUG)
73  int obj_id;
74 #endif
75 };
76 
77 #if defined(PDFOBJ_DEBUG)
78 static pdf_obj *bucket[65535];
79 static int cur_obj_id = 0;
80 #endif
81 
82 struct pdf_boolean
83 {
84  char value;
85 };
86 
87 struct pdf_number
88 {
89  double value;
90 };
91 
92 struct pdf_string
93 {
94  unsigned char *string;
95  size_t length;
96 };
97 
98 struct pdf_name
99 {
100  char *name;
101 };
102 
103 struct pdf_array
104 {
105  size_t max;
106  size_t size;
107  struct pdf_obj **values;
108 };
109 
110 struct pdf_dict
111 {
112  struct pdf_obj *key;
113  struct pdf_obj *value;
114  struct pdf_dict *next;
115 };
116 
117 /* DecodeParms for FlateDecode */
118  struct decode_parms {
119  int predictor;
120  int colors;
121  int bits_per_component;
123  };
124 
125 /* 2015/12/27 Added support for predictor functions
126  *
127  * There are yet no way to specify the use of predictor functions.
128  * Using TIFF2 or PNG predictor usually gives better compression for images
129  * but there is a case that compression speed becomes significantly slower.
130  * Please use -C 0x20 option to disable the use of predictor functions.
131  *
132  * See, e.g., for a heuristic approach for selecting filters
133  * http://www.w3.org/TR/PNG-Encoders.html#E.Filter-selection
134  */
135 
136 struct pdf_stream
137 {
138  struct pdf_obj *dict;
139  unsigned char *stream;
140  int *objstm_data; /* used for object streams */
141  size_t stream_length;
142  size_t max_length;
143  int32_t _flags;
144  struct decode_parms decodeparms;
145 };
146 
147 struct pdf_indirect
148 {
149  pdf_file *pf;
150  pdf_obj *obj; /* used when PF == NULL */
151  uint32_t label;
153 };
154 
155 typedef void pdf_null;
156 typedef struct pdf_boolean pdf_boolean;
157 typedef struct pdf_number pdf_number;
158 typedef struct pdf_string pdf_string;
159 typedef struct pdf_name pdf_name;
160 typedef struct pdf_array pdf_array;
161 typedef struct pdf_dict pdf_dict;
162 typedef struct pdf_stream pdf_stream;
163 typedef struct pdf_indirect pdf_indirect;
164 
165 typedef struct xref_entry
166 {
167  uint8_t type; /* object storage type */
168  uint32_t field2; /* offset in file or object stream */
169  uint16_t field3; /* generation or index */
170  pdf_obj *direct; /* used for imported objects */
171  pdf_obj *indirect; /* used for imported objects */
173 
174 struct pdf_file
175 {
176  FILE *file;
177  pdf_obj *trailer;
179  pdf_obj *catalog;
180  int num_obj;
181  int file_size;
182  int version;
183 };
184 
185 static int error_out = 0;
186 
187 #define OBJSTM_MAX_OBJS 200
188 /* the limit is only 100 for linearized PDF */
189 
190 struct pdf_out {
191  struct {
192  int enc_mode; /* boolean */
193  } state;
194 
195  unsigned char id1[16];
196  unsigned char id2[16];
197 
198  struct {
199  int major;
200  int minor;
202 
203  struct {
204  struct {
205  int level;
206  int use_predictor;
208 
209  int enable_encrypt;
210  int use_objstm;
212 
213  struct {
214  FILE *file;
215  size_t file_position;
216  int line_position;
217  size_t compression_saved;
219 
220  struct {
223  } obj;
224 
225  pdf_sec *sec_data;
226 
227  pdf_obj *trailer;
230 
234  /* The following flag bits are (8,338,607+1)/8 bytes data
235  * each bit represenging if the object is freed.
236  * Where the value 8,338,607 is taken from PDF ref. manual, v.1.7,
237  * Appendix C, "Implementation Limits".
238  */
239  char *free_list;
240 };
241 
242 #if defined(LIBDPX)
243 size_t output_file_size;
244 #endif /* LIBDPX */
245 
246 /* Underway to reform PDF related code... For a moment place pdf_out
247  * object as a static variable. */
248 static pdf_out pout;
249 
250 static pdf_out *
252 {
253  return &pout;
254 }
255 
256 static void
258 {
259  ASSERT(p);
260 
261  p->state.enc_mode = 0;
262 
263  memset(p->id1, 0, 16);
264  memset(p->id2, 0, 16);
265 
266  p->version.major = 1;
267  p->version.minor = PDF_VERSION_DEFAULT;
268 
269  p->options.compression.level = 9;
270  p->options.compression.use_predictor = 1;
271  p->options.enable_encrypt = 0;
272  p->options.use_objstm = 1;
273 
274  p->output.file = NULL;
275  p->output.file_position = 0;
276  p->output.line_position = 0;
277  p->output.compression_saved = 0;
278 #if defined(LIBDPX)
279  output_file_size = 0;
280 #endif /* LIBDPX */
281 
282  p->obj.next_label = 1;
283  p->obj.max_ind_objects = 0;
284 
285  p->sec_data = NULL;
286  p->trailer = NULL;
287  p->startxref = 0;
288  p->xref_table = NULL;
289 
290  p->xref_stream = NULL;
291  p->output_stream = NULL;
292  p->current_objstm = NULL;
293 
294  p->free_list = NEW((PDF_NUM_INDIRECT_MAX+1)/8, char);
295  memset(p->free_list, 0, (PDF_NUM_INDIRECT_MAX+1)/8);
296 }
297 
298 static void
300 {
301  if (p->free_list)
302  RELEASE(p->free_list);
303  memset(p, 0, sizeof(pdf_out));
304 }
305 
306 /* Internal static routines */
307 
308 static int check_for_pdf_version (FILE *file);
309 
310 static void pdf_flush_obj (pdf_out *p, pdf_obj *object);
311 static void pdf_label_obj (pdf_out *p, pdf_obj *object);
312 static void pdf_write_obj (pdf_out *p, pdf_obj *object);
313 
314 static void set_objstm_data (pdf_obj *objstm, int *data);
315 static int *get_objstm_data (pdf_obj *objstm);
316 static void release_objstm (pdf_obj *objstm);
317 
318 static void pdf_out_char (pdf_out *p, char c);
319 static void pdf_out_str (pdf_out *p, const void *buffer, size_t length);
320 
321 static pdf_obj *pdf_new_ref (pdf_out *p, pdf_obj *object);
322 static void release_indirect (pdf_indirect *data);
323 static void write_indirect (pdf_out *p, pdf_indirect *indirect);
324 
325 static void release_boolean (pdf_obj *data);
326 static void write_boolean (pdf_out *p, pdf_boolean *data);
327 
328 static void write_null (pdf_out *p);
329 
330 static void release_number (pdf_number *number);
331 static void write_number (pdf_out *p, pdf_number *number);
332 
333 static void write_string (pdf_out *p, pdf_string *str);
334 static void release_string (pdf_string *str);
335 
336 static void write_name (pdf_out *p, pdf_name *name);
337 static void release_name (pdf_name *name);
338 
339 static void write_array (pdf_out *p, pdf_array *array);
340 static void release_array (pdf_array *array);
341 
342 static void write_dict (pdf_out *p, pdf_dict *dict);
343 static void release_dict (pdf_dict *dict);
344 
345 static void write_stream (pdf_out *p, pdf_stream *stream);
346 static void release_stream (pdf_stream *stream);
347 
348 static void
350 {
351  ASSERT(p);
352 
353 #ifndef HAVE_ZLIB
354  ERROR("You don't have compression compiled in. Possibly libz wasn't found by configure.");
355 #else
356 #ifndef HAVE_ZLIB_COMPRESS2
357  if (level != 0)
358  WARN("Unable to set compression level -- your zlib doesn't have compress2().");
359 #endif
360  if (level >= 0 && level <= 9)
361  p->options.compression.level = level;
362  else {
363  ERROR("set_compression: invalid compression level: %d", level);
364  }
365 #endif /* !HAVE_ZLIB */
366 
367  return;
368 }
369 
370 FILE *
372 {
373  pdf_out *p = current_output();
374  return p->output.file;
375 }
376 
377 static void
378 pdf_out_set_version (pdf_out *p, int ver_major, int ver_minor)
379 {
380  int version;
381 
382  ASSERT(p);
383 
384  version = ver_major * 10 + ver_minor;
385  /* Don't forget to update CIDFont_stdcc_def[] in cid.c too! */
387  p->version.major = ver_major;
388  p->version.minor = ver_minor;
389  } else {
390  WARN("Unsupported PDF version %d.%d ... Ignoring.", ver_major, ver_minor);
391  }
392 }
393 
394 int
396 {
397  pdf_out *p = current_output();
398  return (p->version.major * 10 + p->version.minor);
399 }
400 
401 int
403 {
404  pdf_out *p = current_output();
405  return p->version.major;
406 }
407 
408 int
410 {
411  pdf_out *p = current_output();
412  return p->version.minor;
413 }
414 
415 int
417 {
418  pdf_out *p = current_output();
419  if (p->version.major > major)
420  return 0;
421  else if (p->version.major < major)
422  return -1;
423  else {
424  return (p->version.minor >= minor) ? 0 : -1;
425  }
426 
427  return -1;
428 }
429 
430 static void
432  uint32_t label, uint8_t type, uint32_t field2, uint16_t field3)
433 {
434  ASSERT(p);
435 
436  if (label >= p->obj.max_ind_objects) {
437  p->obj.max_ind_objects = (label/IND_OBJECTS_ALLOC_SIZE+1)*IND_OBJECTS_ALLOC_SIZE;
438  p->xref_table = RENEW(p->xref_table, p->obj.max_ind_objects, xref_entry);
439  }
440 
441  p->xref_table[label].type = type;
442  p->xref_table[label].field2 = field2;
443  p->xref_table[label].field3 = field3;
444  p->xref_table[label].direct = NULL;
445  p->xref_table[label].indirect = NULL;
446 }
447 
448 #define BINARY_MARKER "%\344\360\355\370\n"
449 pdf_out *
450 pdf_out_init (const char *filename,
451  const unsigned char *id1,
452  const unsigned char *id2,
453  int ver_major, int ver_minor, int compression_level,
454  int enable_encrypt,
455  int enable_objstm,
456  int enable_predictor)
457 {
458  pdf_out *p = current_output();
459  char v;
460 
462 
463  pdf_out_set_version(p, ver_major, ver_minor);
465 
466  add_xref_entry(p, 0, 0, 0, 0xffff);
467 
468  /* This must be set before pdf_set_root() is called */
469  p->options.enable_encrypt = enable_encrypt;
470  if (pdf_check_version(1, 5) == 0) {
471  if (enable_objstm) {
472  p->xref_stream = pdf_new_stream(STREAM_COMPRESS);
473  p->xref_stream->flags |= OBJ_NO_ENCRYPT;
474  p->trailer = pdf_stream_dict(p->xref_stream);
475  pdf_add_dict(p->trailer, pdf_new_name("Type"), pdf_new_name("XRef"));
476  p->options.use_objstm = 1;
477  } else {
478  p->xref_stream = NULL;
479  p->trailer = pdf_new_dict();
480  p->options.use_objstm = 0;
481  }
482  } else {
483  p->xref_stream = NULL;
484  p->trailer = pdf_new_dict();
485  p->options.use_objstm = 0;
486  }
487 
488  p->output_stream = NULL;
489 
490  if (filename == NULL) { /* no filename: writing to stdout */
491 #ifdef WIN32
493 #endif
494  p->output.file = stdout;
495  } else {
496  p->output.file = MFOPEN(filename, FOPEN_WBIN_MODE);
497  if (!p->output.file) {
498  if (strlen(filename) < 128)
499  ERROR("Unable to open \"%s\".", filename);
500  else
501  ERROR("Unable to open file.");
502  }
503  }
504  pdf_out_str(p, "%PDF-", strlen("%PDF-"));
505  v = '0' + p->version.major;
506  pdf_out_str(p, &v, 1);
507  pdf_out_str(p, ".", 1);
508  v = '0' + p->version.minor;
509  pdf_out_str(p, &v, 1);
510  pdf_out_str(p, "\n", 1);
512 
513  /* Set trailer ID and setup security handler */
514  {
515  pdf_obj *id_array;
516 
517  memcpy(p->id1, id1, 16);
518  memcpy(p->id2, id2, 16);
519  id_array = pdf_new_array();
520  pdf_add_array(id_array, pdf_new_string(p->id1, 16));
521  pdf_add_array(id_array, pdf_new_string(p->id2, 16));
522  pdf_add_dict(p->trailer, pdf_new_name("ID"), id_array);
523  }
524  p->state.enc_mode = 0;
525  p->options.compression.use_predictor = enable_predictor;
526 
527  return p;
528 }
529 
530 void
532  const char *opasswd, const char *upasswd,
533  int use_aes, int encrypt_metadata)
534 {
535  pdf_out *p = current_output();
536 
537  pdf_obj *encrypt, *extension, *catalog;
538 
539  p->sec_data = pdf_enc_init(p->id1, keybits, permission,
540  opasswd, upasswd, use_aes, encrypt_metadata);
541  if (!p->sec_data) {
542  p->options.enable_encrypt = 0;
543  return;
544  }
545 
546  encrypt = pdf_enc_get_encrypt_dict(p->sec_data);
547  pdf_add_dict(p->trailer,
548  pdf_new_name("Encrypt"), pdf_ref_obj(encrypt));
549  encrypt->flags |= OBJ_NO_ENCRYPT;
550  encrypt->flags |= OBJ_NO_OBJSTM;
551  pdf_release_obj(encrypt);
552 
554  if (extension) {
556  pdf_add_dict(catalog, pdf_new_name("Extensions"), extension);
557  }
558 }
559 
560 static void
562 {
563  int i, length;
564  char buf[32];
565 
566  ASSERT(p);
567 
568  pdf_out_str(p, "xref\n", 5);
569 
570  length = sprintf(buf, "%d %u\n", 0, p->obj.next_label);
571  pdf_out_str(p, buf, length);
572 
573  /*
574  * Every space counts. The space after the 'f' and 'n' is * *essential*.
575  * The PDF spec says the lines must be 20 characters long including the
576  * end of line character.
577  */
578  for (i = 0; i < p->obj.next_label; i++) {
579  uint8_t type = p->xref_table[i].type;
580  if (type > 1)
581  ERROR("object type %hu not allowed in xref table", type);
582  length = sprintf(buf, "%010u %05hu %c \n",
583  p->xref_table[i].field2, p->xref_table[i].field3,
584  type ? 'n' : 'f');
585  pdf_out_str(p, buf, length);
586  }
587 }
588 
589 static void
591 {
592  ASSERT(p);
593 
594  pdf_out_str(p, "trailer\n", 8);
595  p->state.enc_mode = 0;
596  write_dict(p, p->trailer->data);
597  pdf_release_obj(p->trailer);
598  p->trailer = NULL;
599  pdf_out_char(p, '\n');
600 }
601 
602 /*
603  * output a PDF 1.5 cross-reference stream;
604  * contributed by Matthias Franz (March 21, 2007)
605  */
606 static void
608 {
609  uint32_t pos, i;
610  uint32_t poslen;
611  unsigned char buf[7] = {0, 0, 0, 0, 0};
612  pdf_obj *w;
613 
614  ASSERT(p);
615 
616  /* determine the necessary size of the offset field */
617  pos = p->startxref; /* maximal offset value */
618  poslen = 1;
619  while (pos >>= 8)
620  poslen++;
621 
622  w = pdf_new_array();
623  pdf_add_array(w, pdf_new_number(1)); /* type */
624  pdf_add_array(w, pdf_new_number(poslen)); /* offset (big-endian) */
625  pdf_add_array(w, pdf_new_number(2)); /* generation */
626  pdf_add_dict(p->trailer, pdf_new_name("W"), w);
627 
628  /* We need the xref entry for the xref stream right now */
629  add_xref_entry(p, p->obj.next_label - 1, 1, p->startxref, 0);
630 
631  for (i = 0; i < p->obj.next_label; i++) {
632  size_t j;
633  uint16_t f3;
634  buf[0] = p->xref_table[i].type;
635  pos = p->xref_table[i].field2;
636  for (j = poslen; j--; ) {
637  buf[1+j] = (unsigned char) pos;
638  pos >>= 8;
639  }
640  f3 = p->xref_table[i].field3;
641  buf[poslen+1] = (unsigned char) (f3 >> 8);
642  buf[poslen+2] = (unsigned char) (f3);
643  pdf_add_stream(p->xref_stream, &buf, poslen+3);
644  }
645 
646  pdf_release_obj(p->xref_stream);
647  p->xref_stream = NULL;
648 }
649 
650 #if defined(LIBDPX)
651 long
652 pdf_output_stats (void)
653 {
654  return (long) output_file_size;
655 }
656 #endif /* LIBDPX */
657 
658 void
660 {
661  pdf_out *p = current_output();
662  char buf[16];
663 
664  if (p->output.file) {
665  int length;
666 
667  /* Flush current object stream */
668  if (p->current_objstm) {
669  release_objstm(p->current_objstm);
670  p->current_objstm =NULL;
671  }
672 
673  /*
674  * Label xref stream - we need the number of correct objects
675  * for the xref stream dictionary (= trailer).
676  * Labelling it in pdf_out_init (with 1) does not work (why?).
677  */
678  if (p->xref_stream)
679  pdf_label_obj(p, p->xref_stream);
680 
681  /* Record where this xref is for trailer */
682  p->startxref = p->output.file_position;
683 
684  pdf_add_dict(p->trailer,
685  pdf_new_name("Size"), pdf_new_number(p->obj.next_label));
686 
687  if (p->xref_stream)
689  else {
691  dump_trailer(p);
692  }
693 
694  /* Done with xref table */
695  RELEASE(p->xref_table);
696  p->xref_table = NULL;
697 
698  pdf_out_str(p, "startxref\n", 10);
699  length = sprintf(buf, "%u\n", p->startxref);
700  pdf_out_str(p, buf, length);
701  pdf_out_str(p, "%%EOF\n", 6);
702 
703 #if !defined(LIBDPX)
704  MESG("\n");
705 #endif /* !LIBDPX */
706  if (dpx_conf.verbose_level > 0) {
707  if (p->options.compression.level > 0) {
708  MESG("Compression saved %ld bytes\n", p->output.compression_saved);
709  }
710  }
711 #if !defined(LIBDPX)
712  MESG("%ld bytes written", p->output.file_position);
713 #else
714  output_file_size = p->output.file_position;
715 #endif /* !LIBDPX */
716 
717  MFCLOSE(p->output.file);
718  p->output.file = NULL;
719  p->output.file_position = 0;
720  p->output.line_position = 0;
721  }
722  if (p->sec_data)
723  pdf_enc_close(&p->sec_data);
724 #if defined(PDFOBJ_DEBUG)
725  {
726  int i;
727  error_out = 1;
728  MESG("\ndebug>> %d PDF objects created.", cur_obj_id);
729  for (i = 0; i < cur_obj_id; i++) {
730  pdf_obj *obj = bucket[i];
731  if (obj) {
732  if (obj->label > 0) {
733  WARN("Object obj_id=<%lu, %u> unreleased...", obj->label, obj->generation);
734  WARN("Reference count=%d", obj->refcount);
735  pdf_write_obj(p, obj);
736  MESG("\n");
737  } else {
738  WARN("Unreleased object found: (unlabeled) id=%d", i);
739  pdf_write_obj(p, obj);
740  MESG("\n");
741  }
742  }
743  }
744  }
745 #endif
747 }
748 
749 void
751 {
752  pdf_out *p = current_output();
753  /*
754  * This routine is the cleanup required for an abnormal exit.
755  * For now, simply close the file.
756  */
757  if (p->output.file)
758  MFCLOSE(p->output.file);
759  p->output.file = NULL;
760 }
761 
762 
763 void
765 {
766  pdf_out *p = current_output();
767 
768  if (pdf_lookup_dict(p->trailer, "Root"))
769  ERROR("Root object already set!");
770  pdf_add_dict(p->trailer, pdf_new_name("Root"), pdf_ref_obj(object));
771  /* Adobe Readers don't like a document catalog inside an encrypted
772  * object stream, although the PDF v1.5 spec seems to allow this.
773  * Note that we don't set OBJ_NO_ENCRYPT since the name dictionary in
774  * a document catalog may contain strings, which should be encrypted.
775  */
776  if (p->options.enable_encrypt)
777  object->flags |= OBJ_NO_OBJSTM;
778 }
779 
780 void
782 {
783  pdf_out *p = current_output();
784 
785  if (pdf_lookup_dict(p->trailer, "Info"))
786  ERROR ("Info object already set!");
787  pdf_add_dict(p->trailer, pdf_new_name("Info"), pdf_ref_obj(object));
788 }
789 
790 
791 
792 static void
794 {
795  ASSERT(p);
796 
797  if (error_out) {
798  fputc(c, stderr);
799  } else {
800  if (p->output_stream)
801  pdf_add_stream(p->output_stream, &c, 1);
802  else {
803  fputc(c, p->output.file);
804  p->output.file_position += 1;
805  if (c == '\n')
806  p->output.line_position = 0;
807  else
808  p->output.line_position += 1;
809  }
810  }
811 }
812 
813 static char xchar[] = "0123456789abcdef";
814 
815 static void
817 {
818  ASSERT(p);
819 
820  pdf_out_char(p, xchar[(c >> 4) & 0x0f]);
821  pdf_out_char(p, xchar[c & 0x0f]);
822 }
823 
824 static void
825 pdf_out_str (pdf_out *p, const void *buffer, size_t length)
826 {
827  ASSERT(p);
828 
829  if (error_out)
830  fwrite(buffer, 1, length, stderr);
831  else {
832  if (p->output_stream)
833  pdf_add_stream(p->output_stream, buffer, length);
834  else {
835  fwrite(buffer, 1, length, p->output.file);
836  p->output.file_position += length;
837  p->output.line_position += length;
838  /* "foo\nbar\n "... */
839  if (length > 0 &&
840  ((const char *)buffer)[length-1] == '\n')
841  p->output.line_position = 0;
842  }
843  }
844 }
845 
846 /* returns 1 if a white-space character is necessary to separate
847  an object of type1 followed by an object of type2 */
848 static
849 int pdf_need_white (int type1, int type2)
850 {
851  return !(type1 == PDF_STRING || type1 == PDF_ARRAY || type1 == PDF_DICT ||
852  type2 == PDF_STRING || type2 == PDF_NAME ||
853  type2 == PDF_ARRAY || type2 == PDF_DICT);
854 }
855 
856 static
858 {
859  ASSERT(p);
860 
861  if (p->output.line_position >= 80) {
862  pdf_out_char(p, '\n');
863  } else {
864  pdf_out_char(p, ' ');
865  }
866 }
867 
868 #define TYPECHECK(o,t) if (!(o) || (o)->type != (t)) {\
869  ERROR("typecheck: Invalid object type: %d %d (line %d)", (o) ? (o)->type : -1, (t), __LINE__);\
870 }
871 
872 #define INVALIDOBJ(o) ((o) == NULL || (o)->type <= 0 || (o)->type > PDF_UNDEFINED)
873 
874 static pdf_obj *
876 {
877  pdf_obj *result;
878 
879  if (type > PDF_UNDEFINED || type < 0)
880  ERROR("Invalid object type: %d", type);
881 
882  result = NEW(1, pdf_obj);
883  result->type = type;
884  result->data = NULL;
885  result->label = 0;
886  result->generation = 0;
887  result->refcount = 1;
888  result->flags = 0;
889 
890 #if defined(PDFOBJ_DEBUG)
891  result->obj_id = cur_obj_id;
892  bucket[cur_obj_id] = result;
893  cur_obj_id++;
894 #endif
895 
896  return result;
897 }
898 
899 int
901 {
902  if (INVALIDOBJ(object))
903  return PDF_OBJ_INVALID;
904 
905  return object->type;
906 }
907 
908 static void
910 {
911  ASSERT(p);
912 
913  if (INVALIDOBJ(object))
914  ERROR("pdf_label_obj(): passed invalid object.");
915 
916  /*
917  * Don't change label on an already labeled object. Ignore such calls.
918  */
919  if (object->label == 0) {
920  if (p->obj.next_label == PDF_NUM_INDIRECT_MAX) {
921  ERROR("Number of indirect object has reached its maximum value!");
922  }
923  object->label = p->obj.next_label++;
924  object->generation = 0;
925  }
926 }
927 
928 /*
929  * Transfer the label assigned to the object src to the object dst.
930  * The object dst must not yet have been labeled.
931  */
932 void
934 {
935  ASSERT(dst && !dst->label && src);
936 
937  dst->label = src->label;
938  dst->generation = src->generation;
939  src->label = 0;
940  src->generation = 0;
941 }
942 
943 /*
944  * This doesn't really copy the object, but allows it to be used without
945  * fear that somebody else will free it.
946  */
947 pdf_obj *
949 {
950  if (INVALIDOBJ(object))
951  ERROR("pdf_link_obj(): passed invalid object.");
952 
953  object->refcount += 1;
954 
955  return object;
956 }
957 
958 
959 pdf_obj *
961 {
962  pdf_out *p = current_output();
963 
964  if (INVALIDOBJ(object))
965  ERROR("pdf_ref_obj(): passed invalid object.");
966 
967  if (object->refcount == 0) {
968  MESG("\nTrying to refer already released object!!!\n");
969  error_out = 1;
970  pdf_write_obj(p, object);
971  ERROR("Cannot continue...");
972  error_out = 0;
973  }
974 
975  if (PDF_OBJ_INDIRECTTYPE(object)) {
976  return pdf_link_obj(object);
977  } else {
978  return pdf_new_ref(p, object);
979  }
980 }
981 
982 static void
984 {
985  RELEASE(data);
986 }
987 
988 static void
990 {
991  int length;
992  char buf[64];
993 
994  ASSERT(p);
995  ASSERT(!indirect->pf);
996 
997  length = sprintf(buf, "%u %hu R",
998  indirect->label, indirect->generation);
999  pdf_out_str(p, buf, length);
1000 }
1001 
1002 /* The undefined object is used as a placeholder in pdfnames.c
1003  * for objects which are referenced before they are defined.
1004  */
1005 pdf_obj *
1007 {
1008  pdf_obj *result;
1009 
1011  result->data = NULL;
1012 
1013  return result;
1014 }
1015 
1016 pdf_obj *
1018 {
1019  pdf_obj *result;
1020 
1022  result->data = NULL;
1023 
1024  return result;
1025 }
1026 
1027 static void
1029 {
1030  pdf_out_str(p, "null", 4);
1031 }
1032 
1033 pdf_obj *
1035 {
1036  pdf_obj *result;
1037  pdf_boolean *data;
1038 
1040  data = NEW(1, pdf_boolean);
1041  data->value = value;
1042  result->data = data;
1043 
1044  return result;
1045 }
1046 
1047 static void
1049 {
1050  RELEASE (data);
1051 }
1052 
1053 static void
1055 {
1056  if (data->value) {
1057  pdf_out_str(p, "true", 4);
1058  } else {
1059  pdf_out_str(p, "false", 5);
1060  }
1061 }
1062 
1063 char
1065 {
1066  pdf_boolean *data;
1067 
1068  TYPECHECK(object, PDF_BOOLEAN);
1069 
1070  data = object->data;
1071 
1072  return data->value;
1073 }
1074 
1075 pdf_obj *
1077 {
1078  pdf_obj *result;
1079  pdf_number *data;
1080 
1082  data = NEW(1, pdf_number);
1083  data->value = value;
1084  result->data = data;
1085 
1086  return result;
1087 }
1088 
1089 static void
1091 {
1092  RELEASE (data);
1093 }
1094 
1095 static void
1097 {
1098  int count;
1099  char buf[512];
1100 
1101  count = pdf_sprint_number(buf, number->value);
1102 
1103  pdf_out_str(p, buf, count);
1104 }
1105 
1106 
1107 void
1108 pdf_set_number (pdf_obj *object, double value)
1109 {
1110  pdf_number *data;
1111 
1112  TYPECHECK(object, PDF_NUMBER);
1113 
1114  data = object->data;
1115  data->value = value;
1116 }
1117 
1118 double
1120 {
1121  pdf_number *data;
1122 
1123  TYPECHECK(object, PDF_NUMBER);
1124 
1125  data = object->data;
1126 
1127  return data->value;
1128 }
1129 
1130 pdf_obj *
1131 pdf_new_string (const void *str, unsigned length)
1132 {
1133  pdf_obj *result;
1134  pdf_string *data;
1135 
1136  ASSERT(str);
1137 
1139  data = NEW(1, pdf_string);
1140  result->data = data;
1141  data->length = length;
1142 
1143  if (length) {
1144  data->string = NEW(length+1, unsigned char);
1145  memcpy(data->string, str, length);
1146  /* Shouldn't assume NULL terminated. */
1147  data->string[length] = '\0';
1148  } else
1149  data->string = NULL;
1150 
1151  return result;
1152 }
1153 
1154 void *
1156 {
1157  pdf_string *data;
1158 
1159  TYPECHECK(object, PDF_STRING);
1160 
1161  data = object->data;
1162 
1163  return data->string;
1164 }
1165 
1166 unsigned
1168 {
1169  pdf_string *data;
1170 
1171  TYPECHECK(object, PDF_STRING);
1172 
1173  data = object->data;
1174 
1175  return (unsigned) data->length;
1176 }
1177 
1178 /*
1179  * This routine escapes non printable characters and control
1180  * characters in an output string.
1181  */
1182 int
1183 pdfobj_escape_str (char *buffer, int bufsize, const unsigned char *s, int len)
1184 {
1185  size_t result = 0;
1186  size_t i;
1187 
1188  for (i = 0; i < len; i++) {
1189  unsigned char ch;
1190 
1191  ch = s[i];
1192  if (result > bufsize - 4)
1193  ERROR("pdfobj_escape_str: Buffer overflow");
1194 
1195  /*
1196  * We always write three octal digits. Optimization only gives few Kb
1197  * smaller size for most documents when zlib compressed.
1198  */
1199  if (ch < 32 || ch > 126) {
1200  buffer[result++] = '\\';
1201 #if 0
1202  if (i < len - 1 && !isdigit(s[i+1]))
1203  result += sprintf(buffer+result, "%o", ch);
1204  else
1205  result += sprintf(buffer+result, "%03o", ch);
1206 #endif
1207  result += sprintf(buffer+result, "%03o", ch);
1208  } else {
1209  switch (ch) {
1210  case '(':
1211  buffer[result++] = '\\';
1212  buffer[result++] = '(';
1213  break;
1214  case ')':
1215  buffer[result++] = '\\';
1216  buffer[result++] = ')';
1217  break;
1218  case '\\':
1219  buffer[result++] = '\\';
1220  buffer[result++] = '\\';
1221  break;
1222  default:
1223  buffer[result++] = ch;
1224  break;
1225  }
1226  }
1227  }
1228 
1229  return (int) result;
1230 }
1231 
1232 static void
1234 {
1235  unsigned char *s = NULL;
1236  size_t i, nescc = 0;
1237  size_t len = 0;
1238 
1239  ASSERT(p);
1240 
1241  if (p->state.enc_mode) {
1242  pdf_encrypt_data(p->sec_data, str->string, str->length, &s, &len);
1243  } else {
1244  s = str->string;
1245  len = str->length;
1246  }
1247 
1248  /*
1249  * Count all ASCII non-printable characters.
1250  */
1251  for (i = 0; i < len; i++) {
1252  if (!isprint(s[i]))
1253  nescc++;
1254  }
1255  /*
1256  * If the string contains much escaped chars, then we write it as
1257  * ASCII hex string.
1258  */
1259  if (nescc > len / 3) {
1260  pdf_out_char(p, '<');
1261  for (i = 0; i < len; i++) {
1262  pdf_out_xchar(p, s[i]);
1263  }
1264  pdf_out_char(p, '>');
1265  } else {
1266  char *buf;
1267  size_t size, count;
1268 
1269  /* At most len/3 is to be escaped here. (see above)
1270  * Thus 4*len/3 + 2*len/3 + 2 is enough for buffer size.
1271  */
1272  size = len * 2 + 3;
1273  buf = NEW(size, char);
1274 
1275  pdf_out_char(p, '(');
1276  /*
1277  * This section of code probably isn't speed critical. Escaping the
1278  * characters in the string one at a time may seem slow, but it's
1279  * safe if the formatted string length exceeds FORMAT_BUF_SIZE.
1280  * Occasionally you see some long strings in PDF. pdfobj_escape_str
1281  * is also used for strings of text with no kerning. These must be
1282  * handled as quickly as possible since there are so many of them.
1283  */
1284  for (i = 0; i < len; i++) {
1285  count = pdfobj_escape_str(buf, size, &(s[i]), 1);
1286  pdf_out_str(p, buf, count);
1287  }
1288  pdf_out_char(p, ')');
1289  RELEASE(buf);
1290  }
1291 
1292  if (p->state.enc_mode && s)
1293  RELEASE(s);
1294 }
1295 
1296 static void
1298 {
1299  if (data->string != NULL) {
1300  RELEASE(data->string);
1301  data->string = NULL;
1302  }
1303  RELEASE(data);
1304 }
1305 
1306 void
1307 pdf_set_string (pdf_obj *object, unsigned char *str, unsigned length)
1308 {
1309  pdf_string *data;
1310 
1311  TYPECHECK(object, PDF_STRING);
1312 
1313  data = object->data;
1314  if (data->string != 0) {
1315  RELEASE(data->string);
1316  }
1317  if (length != 0) {
1318  data->length = length;
1319  data->string = NEW(length + 1, unsigned char);
1320  memcpy(data->string, str, length);
1321  data->string[length] = '\0';
1322  } else {
1323  data->length = 0;
1324  data->string = NULL;
1325  }
1326 }
1327 
1328 /* Name does *not* include the /. */
1329 pdf_obj *
1330 pdf_new_name (const char *name)
1331 {
1332  pdf_obj *result;
1333  size_t length;
1334  pdf_name *data;
1335 
1337  data = NEW (1, pdf_name);
1338  result->data = data;
1339  length = strlen(name);
1340  if (length != 0) {
1341  data->name = NEW(length+1, char);
1342  memcpy(data->name, name, length);
1343  data->name[length] = '\0';
1344  } else {
1345  data->name = NULL;
1346  }
1347 
1348  return result;
1349 }
1350 
1351 static void
1353 {
1354  char *s;
1355  size_t i, length;
1356 
1357  ASSERT(p);
1358 
1359  s = name->name;
1360  length = name->name ? strlen(name->name) : 0;
1361  /*
1362  * From PDF Reference, 3rd ed., p.33:
1363  *
1364  * Beginning with PDF 1.2, any character except null (character code 0)
1365  * may be included in a name by writing its 2-digit hexadecimal code,
1366  * preceded bythe number sign character (#); see implementation notes 3
1367  * and 4 in Appendix H. This syntax is required in order to represent
1368  * any of the delimiter or white-space characters or the number sign
1369  * character itself; it is recommended but not required for characters
1370  * whose codes are outside the range 33 (!) to 126 (~).
1371  */
1372 #ifndef is_delim
1373  /* Avoid '{' and '}' for PostScript compatibility? */
1374 #define is_delim(c) ((c) == '(' || (c) == ')' || \
1375  (c) == '/' || \
1376  (c) == '<' || (c) == '>' || \
1377  (c) == '[' || (c) == ']' || \
1378  (c) == '{' || (c) == '}' || \
1379  (c) == '%')
1380 #endif
1381  pdf_out_char(p, '/');
1382  for (i = 0; i < length; i++) {
1383  if (s[i] < '!' || s[i] > '~' || s[i] == '#' || is_delim(s[i])) {
1384  /* ^ "space" is here. */
1385  pdf_out_char (p, '#');
1386  pdf_out_xchar(p, s[i]);
1387  } else {
1388  pdf_out_char (p, s[i]);
1389  }
1390  }
1391 }
1392 
1393 static void
1395 {
1396  if (data->name != NULL) {
1397  RELEASE(data->name);
1398  data->name = NULL;
1399  }
1400  RELEASE(data);
1401 }
1402 
1403 char *
1405 {
1406  pdf_name *data;
1407 
1408  TYPECHECK(object, PDF_NAME);
1409 
1410  data = object->data;
1411 
1412  return data->name;
1413 }
1414 
1415 /*
1416  * We do not have pdf_name_length() since '\0' is not allowed
1417  * in PDF name object.
1418  */
1419 
1420 pdf_obj *
1422 {
1423  pdf_obj *result;
1424  pdf_array *data;
1425 
1427  data = NEW(1, pdf_array);
1428  data->values = NULL;
1429  data->max = 0;
1430  data->size = 0;
1431  result->data = data;
1432 
1433  return result;
1434 }
1435 
1436 static void
1438 {
1439  ASSERT(p);
1440 
1441  pdf_out_char(p, '[');
1442  if (array->size > 0) {
1443  size_t i;
1444  int type1 = PDF_UNDEFINED, type2;
1445 
1446  for (i = 0; i < array->size; i++) {
1447  if (array->values[i]) {
1448  type2 = array->values[i]->type;
1449  if (type1 != PDF_UNDEFINED && pdf_need_white(type1, type2))
1450  pdf_out_white(p);
1451  type1 = type2;
1452  pdf_write_obj(p, array->values[i]);
1453  } else
1454  WARN("PDF array element %ld undefined.", i);
1455  }
1456  }
1457  pdf_out_char(p, ']');
1458 }
1459 
1460 pdf_obj *
1462 {
1463  pdf_obj *result = NULL;
1464  pdf_array *data;
1465 
1467 
1468  data = array->data;
1469  if (idx < 0)
1470  result = data->values[idx + data->size];
1471  else if (idx < data->size) {
1472  result = data->values[idx];
1473  }
1474 
1475  return result;
1476 }
1477 
1478 unsigned
1480 {
1481  pdf_array *data;
1482 
1484 
1485  data = (pdf_array *) array->data;
1486 
1487  return (unsigned) data->size;
1488 }
1489 
1490 static void
1492 {
1493  size_t i;
1494 
1495  if (data->values) {
1496  for (i = 0; i < data->size; i++) {
1497  pdf_release_obj(data->values[i]);
1498  data->values[i] = NULL;
1499  }
1500  RELEASE(data->values);
1501  data->values = NULL;
1502  }
1503  RELEASE(data);
1504 }
1505 
1506 /*
1507  * The name pdf_add_array is misleading. It behaves differently than
1508  * pdf_add_dict(). This should be pdf_push_array().
1509  */
1510 void
1512 {
1513  pdf_array *data;
1514 
1516 
1517  data = array->data;
1518  if (data->size >= data->max) {
1519  data->max += ARRAY_ALLOC_SIZE;
1520  data->values = RENEW(data->values, data->max, pdf_obj *);
1521  }
1522  data->values[data->size] = object;
1523  data->size++;
1524 
1525  return;
1526 }
1527 
1528 #if 0
1529 void
1530 pdf_put_array (pdf_obj *array, int idx, pdf_obj *object)
1531 {
1532  pdf_array *data;
1533  int i;
1534 
1536 
1537  data = array->data;
1538  if (idx + 1 > data->max) {
1539  data->max += ARRAY_ALLOC_SIZE;
1540  data->values = RENEW(data->values, data->max, pdf_obj *);
1541  }
1542  /*
1543  * Rangecheck error in PostScript interpreters if
1544  * idx > data->size - 1. But pdf_new_array() doesn't set
1545  * array size, pdf_add_array() dynamically increases size
1546  * of array. This might confusing...
1547  */
1548  if (idx + 1 > data->size) {
1549  for (i = data->size; i < idx; i++)
1550  data->values[i] = pdf_new_null(); /* release_array() won't work without this */
1551  data->values[idx] = object;
1552  data->size = idx + 1;
1553  } else {
1554  if (data->values[idx])
1555  pdf_release_obj(data->values[idx]);
1556  data->values[idx] = object;
1557  }
1558 }
1559 
1560 /* Easily leaks memory... */
1561 pdf_obj *
1562 pdf_shift_array (pdf_obj *array)
1563 {
1564  pdf_obj *result = NULL;
1565  pdf_array *data;
1566 
1568 
1569  data = array->data;
1570  if (data->size > 0) {
1571  size_t i;
1572 
1573  result = data->values[0];
1574  for (i = 1; i < data->size; i++)
1575  data->values[i-1] = data->values[i];
1576  data->size--;
1577  }
1578 
1579  return result;
1580 }
1581 #endif
1582 
1583 /* Prepend an object to an array */
1584 static void
1586 {
1587  pdf_array *data;
1588 
1590 
1591  data = array->data;
1592  if (data->size >= data->max) {
1593  data->max += ARRAY_ALLOC_SIZE;
1594  data->values = RENEW(data->values, data->max, pdf_obj *);
1595  }
1596  memmove(&data->values[1], data->values, data->size * sizeof(pdf_obj *));
1597  data->values[0] = object;
1598  data->size++;
1599 }
1600 
1601 #if 0
1602 pdf_obj *
1603 pdf_pop_array (pdf_obj *array)
1604 {
1605  pdf_obj *result;
1606  pdf_array *data;
1607 
1609 
1610  data = array->data;
1611  if (data->size > 0) {
1612  result = data->values[data->size - 1];
1613  data->size--;
1614  } else {
1615  result = NULL;
1616  }
1617 
1618  return result;
1619 }
1620 #endif
1621 
1622 static void
1624 {
1625 #if 0
1626  pdf_out_str (p, "<<\n", 3); /* dropping \n saves few kb. */
1627 #else
1628  pdf_out_str(p, "<<", 2);
1629 #endif
1630  while (dict->key != NULL) {
1631  pdf_write_obj(p, dict->key);
1632  if (pdf_need_white(PDF_NAME, (dict->value)->type)) {
1633  pdf_out_white(p);
1634  }
1635  pdf_write_obj(p, dict->value);
1636 #if 0
1637  pdf_out_char (file, '\n'); /* removing this saves few kb. */
1638 #endif
1639  dict = dict->next;
1640  }
1641  pdf_out_str(p, ">>", 2);
1642 }
1643 
1644 pdf_obj *
1646 {
1647  pdf_obj *result;
1648  pdf_dict *data;
1649 
1651  data = NEW(1, pdf_dict);
1652  data->key = NULL;
1653  data->value = NULL;
1654  data->next = NULL;
1655  result->data = data;
1656 
1657  return result;
1658 }
1659 
1660 static void
1662 {
1663  pdf_dict *next;
1664 
1665  while (data != NULL && data->key != NULL) {
1666  pdf_release_obj(data->key);
1667  pdf_release_obj(data->value);
1668  data->key = NULL;
1669  data->value = NULL;
1670  next = data->next;
1671  RELEASE(data);
1672  data = next;
1673  }
1674  RELEASE(data);
1675 }
1676 
1677 /* Array is ended by a node with NULL this pointer */
1678 /* pdf_add_dict returns 0 if the key is new and non-zero otherwise */
1679 int
1681 {
1682  pdf_dict *data, *new_node;
1683 
1684  TYPECHECK(dict, PDF_DICT);
1686 
1687  /* It seems that NULL is sometimes used for null object... */
1688  if (value != NULL && INVALIDOBJ(value))
1689  ERROR("pdf_add_dict(): Passed invalid value");
1690 
1691  /* If this key already exists, simply replace the value */
1692  for (data = dict->data; data->key != NULL; data = data->next) {
1693  if (!strcmp(pdf_name_value(key), pdf_name_value(data->key))) {
1694  /* Release the old value */
1695  pdf_release_obj(data->value);
1696  /* Release the new key (we don't need it) */
1698  data->value = value;
1699  return 1;
1700  }
1701  }
1702  /*
1703  * We didn't find the key. We build a new "end" node and add
1704  * the new key just before the end
1705  */
1706  new_node = NEW (1, pdf_dict);
1707  new_node->key = NULL;
1708  new_node->value = NULL;
1709  new_node->next = NULL;
1710  data->next = new_node;
1711  data->key = key;
1712  data->value = value;
1713  return 0;
1714 }
1715 
1716 #if 0
1717 void
1718 pdf_put_dict (pdf_obj *dict, const char *key, pdf_obj *value)
1719 {
1720  pdf_dict *data;
1721 
1722  TYPECHECK(dict, PDF_DICT);
1723 
1724  if (!key) {
1725  ERROR("pdf_put_dict(): Passed invalid key.");
1726  }
1727  /* It seems that NULL is sometimes used for null object... */
1728  if (value != NULL && INVALIDOBJ(value)) {
1729  ERROR("pdf_add_dict(): Passed invalid value.");
1730  }
1731 
1732  data = dict->data;
1733 
1734  while (data->key != NULL) {
1735  if (!strcmp(key, pdf_name_value(data->key))) {
1736  pdf_release_obj(data->value);
1737  data->value = value;
1738  break;
1739  }
1740  data = data->next;
1741  }
1742 
1743  /*
1744  * If we didn't find the key, build a new "end" node and add
1745  * the new key just before the end
1746  */
1747  if (data->key == NULL) {
1748  pdf_dict *new_node;
1749 
1750  new_node = NEW (1, pdf_dict);
1751  new_node->key = NULL;
1752  new_node->value = NULL;
1753  new_node->next = NULL;
1754  data->next = new_node;
1755  data->key = pdf_new_name(key);
1756  data->value = value;
1757  }
1758 }
1759 #endif
1760 
1761 /* pdf_merge_dict makes a link for each item in dict2 before stealing it */
1762 void
1764 {
1765  pdf_dict *data;
1766 
1767  TYPECHECK(dict1, PDF_DICT);
1768  TYPECHECK(dict2, PDF_DICT);
1769 
1770  data = dict2->data;
1771  while (data->key != NULL) {
1772  pdf_add_dict(dict1, pdf_link_obj(data->key), pdf_link_obj(data->value));
1773  data = data->next;
1774  }
1775 }
1776 
1777 int
1779  int (*proc) (pdf_obj *, pdf_obj *, void *), void *pdata)
1780 {
1781  int error = 0;
1782  pdf_dict *data;
1783 
1784  ASSERT(proc);
1785 
1786  TYPECHECK(dict, PDF_DICT);
1787 
1788  data = dict->data;
1789  while (!error &&
1790  data->key != NULL) {
1791  error = proc(data->key, data->value, pdata);
1792  data = data->next;
1793  }
1794 
1795  return error;
1796 }
1797 
1798 #define pdf_match_name(o,s) ((o) && (s) && !strcmp(((pdf_name *)(o)->data)->name, (s)))
1799 pdf_obj *
1800 pdf_lookup_dict (pdf_obj *dict, const char *name)
1801 {
1802  pdf_dict *data;
1803 
1804  ASSERT(name);
1805 
1806  TYPECHECK(dict, PDF_DICT);
1807 
1808  data = dict->data;
1809  while (data->key != NULL) {
1810  if (!strcmp(name, pdf_name_value(data->key))) {
1811  return data->value;
1812  }
1813  data = data->next;
1814  }
1815 
1816  return NULL;
1817 }
1818 
1819 /* Returns array of dictionary keys */
1820 pdf_obj *
1822 {
1823  pdf_obj *keys;
1824  pdf_dict *data;
1825 
1826  TYPECHECK(dict, PDF_DICT);
1827 
1828  keys = pdf_new_array();
1829  for (data = dict->data; (data &&
1830  data->key != NULL); data = data->next) {
1831  /* We duplicate name object rather than linking keys.
1832  * If we forget to free keys, broken PDF is generated.
1833  */
1835  }
1836 
1837  return keys;
1838 }
1839 
1840 void
1841 pdf_remove_dict (pdf_obj *dict, const char *name)
1842 {
1843  pdf_dict *data, **data_p;
1844 
1845  TYPECHECK(dict, PDF_DICT);
1846 
1847  data = dict->data;
1848  data_p = (pdf_dict **) (void *) &(dict->data);
1849  while (data->key != NULL) {
1850  if (pdf_match_name(data->key, name)) {
1851  pdf_release_obj(data->key);
1852  pdf_release_obj(data->value);
1853  *data_p = data->next;
1854  RELEASE(data);
1855  break;
1856  }
1857  data_p = &(data->next);
1858  data = data->next;
1859  }
1860 }
1861 
1862 pdf_obj *
1864 {
1865  pdf_obj *result;
1866  pdf_stream *data;
1867 
1869  data = NEW(1, pdf_stream);
1870  /*
1871  * Although we are using an arbitrary pdf_object here, it must have
1872  * type=PDF_DICT and cannot be an indirect reference. This will be
1873  * checked by the output routine.
1874  */
1875  data->dict = pdf_new_dict();
1876  data->_flags = flags;
1877  data->stream = NULL;
1878  data->stream_length = 0;
1879  data->max_length = 0;
1880  data->objstm_data = NULL;
1881 
1882  data->decodeparms.predictor = 2;
1883  data->decodeparms.columns = 0;
1884  data->decodeparms.bits_per_component = 0;
1885  data->decodeparms.colors = 0;
1886 
1887  result->data = data;
1888  result->flags |= OBJ_NO_OBJSTM;
1889 
1890  return result;
1891 }
1892 
1893 void
1895  int predictor, int32_t columns, int bpc, int colors)
1896 {
1897  struct pdf_stream *data;
1898 
1900  return;
1901  else if (columns < 0 || bpc < 0 || colors < 0)
1902  return;
1903 
1904  data = (struct pdf_stream *) stream->data;
1905  data->decodeparms.predictor = predictor;
1906  data->decodeparms.columns = columns;
1907  data->decodeparms.bits_per_component = bpc;
1908  data->decodeparms.colors = colors;
1909  data->_flags |= STREAM_USE_PREDICTOR;
1910 }
1911 
1912 /* Adaptive PNG filter
1913  * We use the "minimum sum of absolute differences" heuristic approach
1914  * for finding the most optimal filter to be used.
1915  *
1916  * From http://www.libpng.org/pub/png/book/chapter09.html
1917  *
1918  * For grayscale and truecolor images of 8 or more bits per sample, with or
1919  * without alpha channels, dynamic filtering is almost always beneficial. The
1920  * approach that has by now become standard is known as the minimum sum of
1921  * absolute differences heuristic and was first proposed by Lee Daniel
1922  * Crocker in February 1995.
1923  */
1924 static unsigned char *
1928 {
1929  unsigned char *dst;
1930  int bits_per_pixel = colors * bpc;
1931  int bytes_per_pixel = (bits_per_pixel + 7) / 8;
1932  int32_t rowbytes = columns * bytes_per_pixel;
1933  int32_t i, j;
1934 
1935  ASSERT(raster && length);
1936 
1937  /* Result */
1938  dst = NEW((rowbytes+1)*rows, unsigned char);
1939  *length = (rowbytes + 1) * rows;
1940 
1941  for (j = 0; j < rows; j++) {
1942  int type = 0;
1943  unsigned char *pp = dst + j * (rowbytes + 1);
1944  unsigned char *p = raster + j * rowbytes;
1945  uint32_t sum[5] = {0, 0, 0, 0, 0};
1946  /* First calculated sum of values to make a heuristic guess
1947  * of optimal predictor function.
1948  */
1949  for (i = 0; i < rowbytes; i++) {
1950  int left = (i - bytes_per_pixel >= 0) ? p[i - bytes_per_pixel] : 0;
1951  int up = (j > 0) ? *(p+i-rowbytes) : 0;
1952  int uplft = (j > 0) ?
1953  ((i - bytes_per_pixel >= 0) ?
1954  *(p+i-rowbytes-bytes_per_pixel) : 0) : 0;
1955  /* Type 0 -- None */
1956  sum[0] += p[i];
1957  /* Type 1 -- Sub */
1958  sum[1] += abs((int) p[i] - left);
1959  /* Type 2 -- Up */
1960  sum[2] += abs((int) p[i] - up);
1961  /* Type 3 -- Average */
1962  {
1963  int tmp = floor((up + left) / 2);
1964  sum[3] += abs((int) p[i] - tmp);
1965  }
1966  /* Type 4 -- Peath */
1967  {
1968  int q = left + up - uplft;
1969  int qa = abs(q - left), qb = abs(q - up), qc = abs(q - uplft);
1970  if (qa <= qb && qa <= qc)
1971  sum[4] += abs((int) p[i] - left);
1972  else if (qb <= qc)
1973  sum[4] += abs((int) p[i] - up);
1974  else
1975  sum[4] += abs((int) p[i] - uplft);
1976  }
1977  }
1978  {
1979  int min = sum[0], min_idx = 0;
1980  for (i = 0; i < 5; i++) {
1981  if (sum[i] < min) {
1982  min = sum[i]; min_idx = i;
1983  }
1984  }
1985  type = min_idx;
1986  }
1987  /* Now we actually apply filter. */
1988  pp[0] = type;
1989  switch (type) {
1990  case 0:
1991  memcpy(pp+1, p, rowbytes);
1992  break;
1993  case 1:
1994  for (i = 0; i < rowbytes; i++) {
1995  int left = (i - bytes_per_pixel >= 0) ? p[i - bytes_per_pixel] : 0;
1996  pp[i+1] = p[i] - left;
1997  }
1998  break;
1999  case 2:
2000  for (i = 0; i < rowbytes; i++) {
2001  int up = (j > 0) ? *(p+i - rowbytes) : 0;
2002  pp[i+1] = p[i] - up;
2003  }
2004  break;
2005  case 3:
2006  {
2007  for (i = 0; i < rowbytes; i++) {
2008  int up = (j > 0) ? *(p+i-rowbytes) : 0;
2009  int left = (i - bytes_per_pixel >= 0) ? p[i - bytes_per_pixel] : 0;
2010  int tmp = floor((up + left) / 2);
2011  pp[i+1] = p[i] - tmp;
2012  }
2013  }
2014  break;
2015  case 4: /* Peath */
2016  {
2017  for (i = 0; i < rowbytes; i++) {
2018  int up = (j > 0) ? *(p+i-rowbytes) : 0;
2019  int left = (i - bytes_per_pixel >= 0) ? p[i - bytes_per_pixel] : 0;
2020  int uplft = (j > 0) ?
2021  ((i - bytes_per_pixel >= 0) ?
2022  *(p+i-rowbytes-bytes_per_pixel) : 0) : 0;
2023  int q = left + up - uplft;
2024  int qa = abs(q - left), qb = abs(q - up), qc = abs(q - uplft);
2025  if (qa <= qb && qa <= qc)
2026  pp[i+1] = p[i] - left;
2027  else if (qb <= qc)
2028  pp[i+1] = p[i] - up;
2029  else
2030  pp[i+1] = p[i] - uplft;
2031  }
2032  }
2033  break;
2034  }
2035  }
2036 
2037  return dst;
2038 }
2039 
2040 /* TIFF predictor filter support
2041  * This modifies "raster" itself!
2042  */
2043 static void
2046  int8_t bpc, int8_t num_comp)
2047 {
2048  int32_t rowbytes = (bpc * num_comp * width + 7) / 8;
2049  uint8_t mask = (1 << bpc) - 1;
2050  uint16_t *prev;
2051  int32_t i, j;
2052 
2053  ASSERT(raster);
2054  ASSERT( bpc > 0 && bpc <= 8 );
2055 
2056  prev = NEW(num_comp, uint16_t);
2057 
2058  /* Generic routine for 1 to 16 bit.
2059  * It supports, e.g., 7 bpc images too.
2060  * Actually, it is not necessary to have 16 bit inbuf and outbuf
2061  * since we only need 1, 2, and 4 bit support here. 8 bit is enough.
2062  */
2063  for (j = 0; j < height; j++) {
2064  int32_t k, l, inbits, outbits;
2066  int c;
2067 
2068  memset(prev, 0, sizeof(uint16_t)*num_comp);
2069  inbuf = outbuf = 0; inbits = outbits = 0;
2070  l = k = j * rowbytes;
2071  for (i = 0; i < width; i++) {
2072  for (c = 0; c < num_comp; c++) {
2073  uint8_t cur;
2074  int8_t sub;
2075  if (inbits < bpc) { /* need more byte */
2076  inbuf = (inbuf << 8) | raster[l]; l++;
2077  inbits += 8;
2078  }
2079  cur = (inbuf >> (inbits - bpc)) & mask;
2080  inbits -= bpc; /* consumed bpc bits */
2081  sub = cur - prev[c];
2082  prev[c] = cur;
2083  if (sub < 0)
2084  sub += (1 << bpc);
2085  /* Append newly filtered component value */
2086  outbuf = (outbuf << bpc) | sub;
2087  outbits += bpc;
2088  /* flush */
2089  if (outbits >= 8) {
2090  raster[k] = (outbuf >> (outbits - 8)); k++;
2091  outbits -= 8;
2092  }
2093  }
2094  }
2095  if (outbits > 0) {
2096  raster[k] = (outbuf << (8 - outbits));
2097  k++;
2098  }
2099  }
2100  RELEASE(prev);
2101 }
2102 
2103 static unsigned char *
2107 {
2108  unsigned char *dst;
2109  uint16_t *prev;
2110  int32_t rowbytes = (bpc * colors * columns + 7) / 8;
2111  int32_t i, j;
2112 
2113  ASSERT(raster && length);
2114 
2115  dst = NEW(rowbytes*rows, unsigned char);
2116  memcpy(dst, raster, rowbytes*rows);
2117  *length = rowbytes * rows;
2118 
2119  switch (bpc) {
2120  case 1: case 2: case 4:
2122  break;
2123 
2124  case 8:
2125  prev = NEW(colors, uint16_t);
2126  for (j = 0; j < rows; j++) {
2127  memset(prev, 0, sizeof(uint16_t)*colors);
2128  for (i = 0; i < columns; i++) {
2129  int c;
2130  int32_t pos = colors * (columns * j + i);
2131  for (c = 0; c < colors; c++) {
2132  uint8_t cur = raster[pos+c];
2133  int32_t sub = cur - prev[c];
2134  prev[c] = cur;
2135  dst[pos+c] = sub;
2136  }
2137  }
2138  }
2139  RELEASE(prev);
2140  break;
2141 
2142  case 16:
2143  prev = NEW(colors, uint16_t);
2144  for (j = 0; j < rows; j++) {
2145  memset(prev, 0, sizeof(uint16_t)*colors);
2146  for (i = 0; i < columns; i++) {
2147  int c;
2148  int32_t pos = 2 * colors * (columns * j + i);
2149  for (c = 0; c < colors; c++) {
2150  uint16_t cur = ((uint8_t)raster[pos+2*c])*256 +
2151  (uint8_t)raster[pos+2*c+1];
2152  uint16_t sub = cur - prev[c];
2153  prev[c] = cur;
2154  dst[pos+2*c ] = (sub >> 8) & 0xff;
2155  dst[pos+2*c+1] = sub & 0xff;
2156  }
2157  }
2158  }
2159  RELEASE(prev);
2160  break;
2161 
2162  }
2163 
2164  return dst;
2165 }
2166 
2167 static pdf_obj *
2169  int bpc, int colors)
2170 {
2171  pdf_obj *parms;
2172 
2173  parms = pdf_new_dict();
2174  pdf_add_dict(parms, pdf_new_name("BitsPerComponent"), pdf_new_number(bpc));
2175  pdf_add_dict(parms, pdf_new_name("Colors"), pdf_new_number(colors));
2176  pdf_add_dict(parms, pdf_new_name("Columns"), pdf_new_number(columns));
2177  pdf_add_dict(parms, pdf_new_name("Predictor"), pdf_new_number(predictor));
2178 
2179  return parms;
2180 }
2181 
2182 static void
2184 {
2185  unsigned char *filtered;
2186  size_t filtered_length;
2187 #ifdef HAVE_ZLIB
2188  uLong buffer_length;
2189 #else
2190  size_t buffer_length;
2191 #endif
2192  unsigned char *buffer;
2193 
2194  ASSERT(p);
2195 
2196  /*
2197  * Always work from a copy of the stream. All filters read from
2198  * "filtered" and leave their result in "filtered".
2199  */
2200 #if 0
2201  filtered = NEW(stream->stream_length + 1, unsigned char);
2202 #endif
2203  filtered = NEW(stream->stream_length, unsigned char);
2204  memcpy(filtered, stream->stream, stream->stream_length);
2205  filtered_length = stream->stream_length;
2206 
2207  /* PDF/A requires Metadata to be not filtered. */
2208  {
2209  pdf_obj *type;
2210  type = pdf_lookup_dict(stream->dict, "Type");
2211  if (type && !strcmp("Metadata", pdf_name_value(type))) {
2212  stream->_flags &= ~~STREAM_COMPRESS;
2213  }
2214  }
2215 
2216 #ifdef HAVE_ZLIB
2217  /* Apply compression filter if requested */
2218  if (stream->stream_length > 0 &&
2219  (stream->_flags & STREAM_COMPRESS) &&
2220  p->options.compression.level > 0) {
2221  pdf_obj *filters;
2222 
2223  /* First apply predictor filter if requested. */
2224  if ( p->options.compression.use_predictor &&
2225  (stream->_flags & STREAM_USE_PREDICTOR) &&
2226  !pdf_lookup_dict(stream->dict, "DecodeParms")) {
2227  int bits_per_pixel = stream->decodeparms.colors *
2228  stream->decodeparms.bits_per_component;
2229  int32_t len = (stream->decodeparms.columns * bits_per_pixel + 7) / 8;
2230  int32_t rows = stream->stream_length / len;
2231  unsigned char *filtered2 = NULL;
2232  int32_t length2 = stream->stream_length;
2233  pdf_obj *parms;
2234 
2235  parms = filter_create_predictor_dict(stream->decodeparms.predictor,
2236  stream->decodeparms.columns,
2237  stream->decodeparms.bits_per_component,
2238  stream->decodeparms.colors);
2239 
2240  switch (stream->decodeparms.predictor) {
2241  case 2: /* TIFF2 */
2242  filtered2 = filter_TIFF2_apply_filter(filtered,
2243  stream->decodeparms.columns,
2244  rows,
2245  stream->decodeparms.bits_per_component,
2246  stream->decodeparms.colors, &length2);
2247  break;
2248  case 15: /* PNG optimun */
2249  filtered2 = filter_PNG15_apply_filter(filtered,
2250  stream->decodeparms.columns,
2251  rows,
2252  stream->decodeparms.bits_per_component,
2253  stream->decodeparms.colors, &length2);
2254  break;
2255  default:
2256  WARN("Unknown/unsupported Predictor function %d.",
2257  stream->decodeparms.predictor);
2258  break;
2259  }
2260  if (parms && filtered2) {
2261  RELEASE(filtered);
2262  filtered = filtered2;
2263  filtered_length = length2;
2264  pdf_add_dict(stream->dict, pdf_new_name("DecodeParms"), parms);
2265  }
2266  }
2267 
2268  filters = pdf_lookup_dict(stream->dict, "Filter");
2269 
2270  buffer_length = filtered_length + filtered_length/1000 + 14;
2271  buffer = NEW(buffer_length, unsigned char);
2272  {
2273  pdf_obj *filter_name = pdf_new_name("FlateDecode");
2274 
2275  if (filters)
2276  /*
2277  * FlateDecode is the first filter to be applied to the stream.
2278  */
2279  pdf_unshift_array(filters, filter_name);
2280  else
2281  /*
2282  * Adding the filter as a name instead of a one-element array
2283  * is crucial because otherwise Adobe Reader cannot read the
2284  * cross-reference stream any more, cf. the PDF v1.5 Errata.
2285  */
2286  pdf_add_dict(stream->dict, pdf_new_name("Filter"), filter_name);
2287  }
2288 #ifdef HAVE_ZLIB_COMPRESS2
2289  if (compress2(buffer, &buffer_length, filtered,
2290  filtered_length, p->options.compression.level)) {
2291  ERROR("Zlib error");
2292  }
2293 #else
2294  if (compress(buffer, &buffer_length, filtered,
2295  filtered_length)) {
2296  ERROR ("Zlib error");
2297  }
2298 #endif /* HAVE_ZLIB_COMPRESS2 */
2299  RELEASE(filtered);
2300  p->output.compression_saved +=
2301  filtered_length - buffer_length
2302  - (filters ? strlen("/FlateDecode "): strlen("/Filter/FlateDecode\n"));
2303 
2304  filtered = buffer;
2305  filtered_length = buffer_length;
2306  }
2307 #endif /* HAVE_ZLIB */
2308 
2309  /* AES will change the size of data! */
2310  if (p->state.enc_mode) {
2311  unsigned char *cipher = NULL;
2312  size_t cipher_len = 0;
2313  pdf_encrypt_data(p->sec_data, filtered, filtered_length, &cipher, &cipher_len);
2314  RELEASE(filtered);
2315  filtered = cipher;
2316  filtered_length = cipher_len;
2317  }
2318 
2319  #if 0
2320  /*
2321  * An optional end-of-line marker preceding the "endstream" is
2322  * not part of stream data. See, PDF Reference 4th ed., p. 38.
2323  */
2324  /* Add a '\n' if the last character wasn't one */
2325  if (filtered_length > 0 &&
2326  filtered[filtered_length-1] != '\n') {
2327  filtered[filtered_length] = '\n';
2328  filtered_length++;
2329  }
2330 #endif
2331 
2332  pdf_add_dict(stream->dict,
2333  pdf_new_name("Length"), pdf_new_number(filtered_length));
2334 
2335  pdf_write_obj(p, stream->dict);
2336 
2337  pdf_out_str(p, "\nstream\n", 8);
2338 
2339  if (filtered_length > 0)
2340  pdf_out_str(p, filtered, filtered_length);
2341  RELEASE(filtered);
2342 
2343  /*
2344  * This stream length "object" gets reset every time write_stream is
2345  * called for the stream object.
2346  * If this stream gets written more than once with different
2347  * filters, this could be a problem.
2348  */
2349 
2350  pdf_out_str(p, "\n", 1);
2351  pdf_out_str(p, "endstream", 9);
2352 }
2353 
2354 static void
2356 {
2357  pdf_release_obj(stream->dict);
2358  stream->dict = NULL;
2359 
2360  if (stream->stream) {
2361  RELEASE(stream->stream);
2362  stream->stream = NULL;
2363  }
2364 
2365  if (stream->objstm_data) {
2366  RELEASE(stream->objstm_data);
2367  stream->objstm_data = NULL;
2368  }
2369 
2370  RELEASE(stream);
2371 }
2372 
2373 pdf_obj *
2375 {
2376  pdf_stream *data;
2377 
2379 
2380  data = stream->data;
2381 
2382  return data->dict;
2383 }
2384 
2385 const void *
2387 {
2388  pdf_stream *data;
2389 
2391 
2392  data = stream->data;
2393 
2394  return (const void *) data->stream;
2395 }
2396 
2397 int
2399 {
2400  pdf_stream *data;
2401 
2403 
2404  data = stream->data;
2405 
2406  return (int) data->stream_length;
2407 }
2408 
2409 static void
2410 set_objstm_data (pdf_obj *objstm, int *data) {
2411  TYPECHECK(objstm, PDF_STREAM);
2412 
2413  ((pdf_stream *) objstm->data)->objstm_data = data;
2414 }
2415 
2416 static int *
2418  TYPECHECK(objstm, PDF_STREAM);
2419 
2420  return ((pdf_stream *) objstm->data)->objstm_data;
2421 }
2422 
2423 void
2424 pdf_add_stream (pdf_obj *stream, const void *stream_data, int length)
2425 {
2426  pdf_stream *data;
2427 
2429 
2430  if (length < 1)
2431  return;
2432  data = stream->data;
2433  if (data->stream_length + length > data->max_length) {
2434  data->max_length += length + STREAM_ALLOC_SIZE;
2435  data->stream = RENEW(data->stream, data->max_length, unsigned char);
2436  }
2437  memcpy(data->stream + data->stream_length, stream_data, length);
2438  data->stream_length += length;
2439 }
2440 
2441 #if HAVE_ZLIB
2442 #define WBUF_SIZE 4096
2443 
2444 static int
2445 filter_get_DecodeParms_FlateDecode (struct decode_parms *parms, pdf_obj *dict)
2446 {
2447  pdf_obj *tmp;
2448 
2449  ASSERT(dict && parms);
2451 
2452  /* Fill with default values */
2453  parms->predictor = 1;
2454  parms->colors = 1;
2455  parms->bits_per_component = 8;
2456  parms->columns = 1;
2457 
2458  tmp = pdf_deref_obj(pdf_lookup_dict(dict, "Predictor"));
2459  if (tmp) {
2460  parms->predictor = pdf_number_value(tmp);
2461  pdf_release_obj(tmp);
2462  }
2463  tmp = pdf_deref_obj(pdf_lookup_dict(dict, "Colors"));
2464  if (tmp) {
2465  parms->colors = pdf_number_value(tmp);
2466  pdf_release_obj(tmp);
2467  }
2468  tmp = pdf_deref_obj(pdf_lookup_dict(dict, "BitsPerComponent"));
2469  if (tmp) {
2470  parms->bits_per_component = pdf_number_value(tmp);
2471  pdf_release_obj(tmp);
2472  }
2473  tmp = pdf_deref_obj(pdf_lookup_dict(dict, "Columns"));
2474  if (tmp) {
2475  parms->columns = pdf_number_value(tmp);
2476  pdf_release_obj(tmp);
2477  }
2478  if (parms->bits_per_component != 1 &&
2479  parms->bits_per_component != 2 &&
2480  parms->bits_per_component != 4 &&
2481  parms->bits_per_component != 8 &&
2482  parms->bits_per_component != 16) {
2483  WARN("Invalid BPC value in DecodeParms: %d", parms->bits_per_component);
2484  return -1;
2485  } else if (parms->predictor <= 0 || parms->colors <= 0 ||
2486  parms->columns <= 0)
2487  return -1;
2488  return 0;
2489 }
2490 
2491 /* From Xpdf version 3.04
2492  * I'm not sure if I properly ported... Untested.
2493  */
2494 static int
2495 filter_row_TIFF2 (unsigned char *dst, const unsigned char *src,
2496  struct decode_parms *parms)
2497 {
2498  const unsigned char *p = src;
2499  unsigned char *col;
2500  /* bits_per_component < 8 here */
2501  int mask = (1 << parms->bits_per_component) - 1;
2502  int inbuf, outbuf; /* 2 bytes buffer */
2503  int i, ci, j, k, inbits, outbits;
2504 
2505  col = NEW(parms->colors, unsigned char);
2506  memset(col, 0, parms->colors);
2507  inbuf = outbuf = 0; inbits = outbits = 0;
2508  j = k = 0;
2509  for (i = 0; i < parms->columns; i++) {
2510  /* expanding each color component into an 8-bits bytes array */
2511  for (ci = 0; ci < parms->colors; ci++) {
2512  if (inbits < parms->bits_per_component) {
2513  /* need more byte */
2514  inbuf = (inbuf << 8) | p[j++];
2515  inbits += 8;
2516  }
2517  /* predict current color component */
2518  col[ci] = (unsigned char) ((col[ci] +
2519  (inbuf >> (inbits - parms->bits_per_component))) & mask);
2520  inbits -= parms->bits_per_component; /* consumed bpc bits */
2521  /* append newly predicted color component value */
2522  outbuf = (outbuf << parms->bits_per_component) | col[ci];
2523  outbits += parms->bits_per_component;
2524  if (outbits >= 8) { /* flush */
2525  dst[k++] = (unsigned char) (outbuf >> (outbits - 8));
2526  outbits -= 8;
2527  }
2528  }
2529  }
2530  if (outbits > 0) {
2531  dst[k] = (unsigned char) (outbuf << (8 - outbits));
2532  }
2533  RELEASE(col);
2534 
2535  return 0;
2536 }
2537 
2538 /* This routine is inefficient. Length is typically 4 for Xref streams.
2539  * Especially, calling pdf_add_stream() for each 4 bytes append is highly
2540  * inefficient.
2541  */
2542 static pdf_obj *
2543 filter_stream_decode_Predictor (const void *src, size_t srclen, struct decode_parms *parms)
2544 {
2545  pdf_obj *dst;
2546  const unsigned char *p = (const unsigned char *) src;
2547  const unsigned char *endptr = p + srclen;
2548  unsigned char *prev, *buf;
2549  int bits_per_pixel = parms->colors * parms->bits_per_component;
2550  int bytes_per_pixel = (bits_per_pixel + 7) / 8;
2551  int length = (parms->columns * bits_per_pixel + 7) / 8;
2552  int i, error = 0;
2553 
2554  dst = pdf_new_stream(0);
2555 
2556  prev = NEW(length, unsigned char);
2557  buf = NEW(length, unsigned char);
2558 
2559  memset(prev, 0, length);
2560  switch (parms->predictor) {
2561  case 1 : /* No prediction */
2562  pdf_add_stream(dst, src, srclen); /* Just copy */
2563  break;
2564  case 2: /* TIFF Predictor 2 */
2565  {
2566  if (parms->bits_per_component == 8) {
2567  while (p + length < endptr) {
2568  /* Same as PNG Sub */
2569  for (i = 0; i < length; i++) {
2570  int pv = i - bytes_per_pixel >= 0 ? buf[i - bytes_per_pixel] : 0;
2571  buf[i] = (unsigned char)(((int) p[i] + pv) & 0xff);
2572  }
2574  p += length;
2575  }
2576  } else if (parms->bits_per_component == 16) {
2577  while (p + length < endptr) {
2578  for (i = 0; i < length; i += 2) {
2579  int b = i - bytes_per_pixel;
2580  char hi = b >= 0 ? buf[b] : 0;
2581  char lo = b >= 0 ? buf[b + 1] : 0;
2582  int pv = (hi << 8) | lo;
2583  int cv = (p[i] << 8) | p[i + 1];
2584  int c = pv + cv;
2585  buf[i] = (unsigned char) (c >> 8);
2586  buf[i + 1] = (unsigned char) (c & 0xff);
2587  }
2589  p += length;
2590  }
2591  } else { /* bits per component 1, 2, 4 */
2592  while (!error && p + length < endptr) {
2593  error = filter_row_TIFF2(buf, p, parms);
2594  if (!error) {
2596  p += length;
2597  }
2598  }
2599  }
2600  }
2601  break;
2602  /* PNG predictors: first byte of each rows is predictor type */
2603  case 10: /* PNG None */
2604  case 11: /* PNG Sub on all rows */
2605  case 12: /* PNG UP on all rows */
2606  case 13: /* PNG Average on all rows */
2607  case 14: /* PNG Paeth on all rows */
2608  case 15: /* PNG optimun: prediction algorithm can change from line to line. */
2609  {
2610  int type = parms->predictor - 10;
2611 
2612  while (!error && p + length < endptr) {
2613  if (parms->predictor == 15)
2614  type = *p;
2615  else if (*p != type) {
2616  WARN("Mismatched Predictor type in data stream.");
2617  error = -1;
2618  }
2619  p++;
2620  switch (type) {
2621  case 0: /* Do nothing just skip first byte */
2622  memcpy(buf, p, length);
2623  break;
2624  case 1:
2625  for (i = 0; i < length; i++) {
2626  int pv = i - bytes_per_pixel >= 0 ? buf[i - bytes_per_pixel] : 0;
2627  buf[i] = (unsigned char)(((int) p[i] + pv) & 0xff);
2628  }
2629  break;
2630  case 2:
2631  for (i = 0; i < length; i++) {
2632  buf[i] = (unsigned char)(((int) p[i] + (int) prev[i]) & 0xff);
2633  }
2634  break;
2635  case 3:
2636  for (i = 0; i < length; i++) {
2637  int up = prev[i];
2638  int left = i - bytes_per_pixel >= 0 ? buf[i - bytes_per_pixel] : 0;
2639  int tmp = floor((up + left) / 2);
2640  buf[i] = (unsigned char)((p[i] + tmp) & 0xff);
2641  }
2642  break;
2643  case 4:
2644  for (i = 0; i < length; i++) {
2645  int a = i - bytes_per_pixel >= 0 ? buf[i - bytes_per_pixel] : 0; /* left */
2646  int b = prev[i]; /* above */
2647  int c = i - bytes_per_pixel >= 0 ? prev[i - bytes_per_pixel] : 0; /* upper left */
2648  int q = a + b - c;
2649  int qa = q - a, qb = q - b, qc = q - c;
2650  qa = qa < 0 ? -qa : qa;
2651  qb = qb < 0 ? -qb : qb;
2652  qc = qc < 0 ? -qc : qc;
2653  if (qa <= qb && qa <= qc)
2654  buf[i] = (unsigned char) (((int) p[i] + a) & 0xff);
2655  else if (qb <= qc)
2656  buf[i] = (unsigned char) (((int) p[i] + b) & 0xff);
2657  else
2658  buf[i] = (unsigned char) (((int) p[i] + c) & 0xff);
2659  }
2660  break;
2661  default:
2662  WARN("Unknown PNG predictor type: %d", type);
2663  error = -1;
2664  }
2665  if (!error) {
2666  pdf_add_stream(dst, buf, length); /* highly inefficient */
2667  memcpy(prev, buf, length);
2668  p += length;
2669  }
2670  }
2671  }
2672  break;
2673  default:
2674  WARN("Unknown Predictor type value :%d", parms->predictor);
2675  error = -1;
2676  }
2677 
2678  RELEASE(prev);
2679  RELEASE(buf);
2680 
2681  if (error) {
2683  dst = NULL;
2684  }
2685 
2686  return dst;
2687 }
2688 
2689 static pdf_obj *
2690 filter_stream_decode_FlateDecode (const void *data, size_t len, struct decode_parms *parms)
2691 {
2692  pdf_obj *dst;
2693  pdf_obj *tmp;
2694  z_stream z;
2695  Bytef wbuf[WBUF_SIZE];
2696 
2697  z.zalloc = Z_NULL; z.zfree = Z_NULL; z.opaque = Z_NULL;
2698 
2699  z.next_in = (z_const Bytef *) data; z.avail_in = len;
2700  z.next_out = (Bytef *) wbuf; z.avail_out = WBUF_SIZE;
2701 
2702  if (inflateInit(&z) != Z_OK) {
2703  WARN("inflateInit() failed.");
2704  return NULL;
2705  }
2706 
2707  tmp = pdf_new_stream(0);
2708  for (;;) {
2709  int status;
2710  status = inflate(&z, Z_NO_FLUSH);
2711  if (status == Z_STREAM_END) {
2712  break;
2713  } else if (status == Z_DATA_ERROR && z.avail_in == 0) {
2714  WARN("Ignoring zlib error: status=%d, message=\"%s\"", status, z.msg);
2715  break;
2716  } else if (status != Z_OK) {
2717  WARN("inflate() failed. Broken PDF file?");
2718  inflateEnd(&z);
2719  pdf_release_obj(tmp);
2720  return NULL;
2721  }
2722 
2723  if (z.avail_out == 0) {
2724  pdf_add_stream(tmp, wbuf, WBUF_SIZE);
2725  z.next_out = wbuf;
2726  z.avail_out = WBUF_SIZE;
2727  }
2728  }
2729 
2730  if (WBUF_SIZE - z.avail_out > 0)
2731  pdf_add_stream(tmp, wbuf, WBUF_SIZE - z.avail_out);
2732 
2733  if (inflateEnd(&z) == Z_OK) {
2734  if (parms) {
2735  dst = filter_stream_decode_Predictor(pdf_stream_dataptr(tmp), pdf_stream_length(tmp), parms);
2736  } else {
2737  dst = pdf_link_obj(tmp);
2738  }
2739  } else {
2740  dst = NULL;
2741  }
2742  pdf_release_obj(tmp);
2743 
2744  return dst;
2745 }
2746 #endif
2747 
2748 static pdf_obj *
2750 {
2751  pdf_obj *dst;
2752  int eod, error;
2753  const char *p = (const char *) data;
2754  const char *endptr = p + len;
2755  unsigned char *buf, ch;
2756  size_t pos, n;
2757 
2758  buf = NEW((len+1)/2, unsigned char);
2759  skip_white(&p, endptr);
2760  ch = 0; n = 0; pos = 0; eod = 0; error = 0;
2761  while (p < endptr && !error && !eod) {
2762  char c1, val;
2763  c1 = p[0];
2764  if (c1 >= 'A' && c1 <= 'F') {
2765  val = c1 - 'A' + 10;
2766  } else if (c1 >= 'a' && c1 <= 'f') {
2767  val = c1 - 'a' + 10;
2768  } else if (c1 >= '0' && c1 <= '9') {
2769  val = c1 - '0';
2770  } else if (c1 == '>') {
2771  val = 0;
2772  eod = 1;
2773  if ((pos % 2) == 0)
2774  break;
2775  } else {
2776  error = -1;
2777  break;
2778  }
2779  if (pos % 2) {
2780  buf[n] = ch + val;
2781  n++;
2782  ch = 0;
2783  } else {
2784  ch = val << 4;
2785  }
2786  pos++; p++;
2787  skip_white(&p, endptr);
2788  }
2789  if (error || !eod) {
2790  WARN("Invalid ASCIIHex data seen: %s", error ? "Invalid character" : "No EOD marker");
2791  dst = NULL;
2792  } else {
2793  dst = pdf_new_stream(0);
2794  pdf_add_stream(dst, buf, n);
2795  }
2796  RELEASE(buf);
2797 
2798  return dst;
2799 }
2800 
2801 /* Percent sign is not start of comment here.
2802  * We need this for reading Ascii85 encoded data.
2803  */
2804 #define is_space(c) ((c) == ' ' || (c) == '\t' || (c) == '\f' || \
2805  (c) == '\r' || (c) == '\n' || (c) == '\0')
2806 static void
2807 skip_white_a85 (const char **p, const char *endptr)
2808 {
2809  while (*p < endptr && (is_space(**p))) {
2810  (*p)++;
2811  }
2812 }
2813 
2814 static pdf_obj *
2816 {
2817  pdf_obj *dst;
2818  int eod, error;
2819  const char *p = (const char *) data;
2820  const char *endptr = p + len;
2821  unsigned char *buf;
2822  size_t n;
2823 
2824  buf = NEW(((len+4)/5)*4, unsigned char);
2825  skip_white_a85(&p, endptr);
2826  n = 0; eod = 0; error = 0;
2827  while (p < endptr && !error && !eod) {
2828  char q[5] = {'u', 'u', 'u', 'u', 'u'};
2829  int m;
2830  char ch;
2831 
2832  ch = p[0];
2833  p++;
2834  skip_white_a85(&p, endptr);
2835  if (ch == 'z') {
2836  memset(buf+n, 0, 4);
2837  n += 4;
2838  continue;
2839  } else if (ch == '~') {
2840  if (p < endptr && p[0] == '>') {
2841  eod = 1;
2842  p++;
2843  } else {
2844  error = -1;
2845  }
2846  break;
2847  }
2848  q[0] = ch;
2849  for (m = 1; m < 5 && p < endptr; m++) {
2850  ch = p[0];
2851  p++;
2852  skip_white_a85(&p, endptr);
2853  if (ch == '~') {
2854  if (p < endptr && p[0] == '>') {
2855  eod = 1;
2856  p++;
2857  } else {
2858  error = -1;
2859  }
2860  break;
2861  } else if (ch < '!' || ch > 'u') {
2862  error = -1;
2863  break;
2864  } else {
2865  q[m] = ch;
2866  }
2867  }
2868  if (!error) {
2869  uint32_t val = 0;
2870  int i;
2871  if (m <= 1) {
2872  error = -1;
2873  break;
2874  }
2875  val = 85*85*85*(q[0] - '!') + 85*85*(q[1] - '!')
2876  + 85*(q[2] - '!') + (q[3] - '!');
2877  /* Check overflow */
2878  if (val > UINT32_MAX / 85) {
2879  error = -1;
2880  break;
2881  } else {
2882  val = 85 * val;
2883  if (val > UINT32_MAX - (q[4] - '!')) {
2884  error = -1;
2885  break;
2886  }
2887  val += (q[4] - '!');
2888  }
2889  if (!error) {
2890  for (i = 3; i >= 0; i--) {
2891  buf[n + i] = val & 0xff;
2892  val /= 256;
2893  }
2894  n += m - 1;
2895  }
2896  }
2897  }
2898 
2899  if (error) {
2900  WARN("Error in reading ASCII85 data.");
2901  dst = NULL;
2902  } else if (!eod) {
2903  WARN("Error in reading ASCII85 data: No EOD");
2904  dst = NULL;
2905  } else {
2906  dst = pdf_new_stream(0);
2907  pdf_add_stream(dst, buf, n);
2908  }
2909  RELEASE(buf);
2910 
2911  return dst;
2912 }
2913 
2914 static pdf_obj *
2915 filter_stream_decode (const char *filter_name, pdf_obj *src, pdf_obj *parm)
2916 {
2917  pdf_obj *dec;
2918  const char *stream_data;
2919  size_t stream_length;
2920 
2921  if (!filter_name)
2922  return pdf_link_obj(src);
2923 
2924  stream_data = pdf_stream_dataptr(src);
2926 
2927  if (!strcmp(filter_name, "ASCIIHexDecode")) {
2929  } else if (!strcmp(filter_name, "ASCII85Decode")) {
2931 #if HAVE_ZLIB
2932  } else if (!strcmp(filter_name, "FlateDecode")) {
2933  struct decode_parms decode_parm;
2934  if (parm)
2935  filter_get_DecodeParms_FlateDecode(&decode_parm, parm);
2936  dec = filter_stream_decode_FlateDecode(stream_data, stream_length, parm ? &decode_parm : NULL);
2937 #endif /* HAVE_ZLIB */
2938  } else {
2939  WARN("DecodeFilter \"%s\" not supported.", filter_name);
2940  dec = NULL;
2941  }
2942 
2943  return dec;
2944 }
2945 
2946 int
2948 {
2949  pdf_obj *filtered;
2950  pdf_obj *stream_dict;
2951  pdf_obj *filter, *parms;
2952  int error = 0;
2953 
2955  WARN("Passed invalid type in pdf_concat_stream().");
2956  return -1;
2957  }
2958 
2959  stream_dict = pdf_stream_dict(src);
2960 
2961  filter = pdf_lookup_dict(stream_dict, "Filter");
2962  if (!filter) {
2964  return 0;
2965  }
2966  if (pdf_lookup_dict(stream_dict, "DecodeParms")) {
2967  /* Dictionary or array */
2968  parms = pdf_deref_obj(pdf_lookup_dict(stream_dict, "DecodeParms"));
2969  if (!parms) {
2970  WARN("Failed to deref DeocdeParms...");
2971  return -1;
2972  } else if (!PDF_OBJ_ARRAYTYPE(parms) && !PDF_OBJ_DICTTYPE(parms)) {
2973  WARN("PDF dict or array expected for DecodeParms...");
2974  pdf_release_obj(parms);
2975  return -1;
2976  }
2977  } else {
2978  parms = NULL;
2979  }
2980  if (PDF_OBJ_ARRAYTYPE(filter)) {
2981  int i, num;
2982  pdf_obj *prev = NULL;
2983 
2985  if (parms) {
2986  if (!PDF_OBJ_ARRAYTYPE(parms) || pdf_array_length(parms) != num) {
2987  WARN("Invalid DecodeParam object found.");
2988  pdf_release_obj(parms);
2989  return -1;
2990  }
2991  }
2992  if (num == 0) {
2993  filtered = pdf_link_obj(src);
2994  } else {
2995  filtered = NULL;
2996  prev = pdf_link_obj(src);
2997  for (i = 0; i < num && prev != NULL; i++) {
2998  pdf_obj *tmp1, *tmp2;
2999 
3001  if (parms) {
3002  tmp2 = pdf_deref_obj(pdf_get_array(parms, i));
3003  } else {
3004  tmp2 = NULL;
3005  }
3006  if (PDF_OBJ_NAMETYPE(tmp1)) {
3007  filtered = filter_stream_decode(pdf_name_value(tmp1), prev, tmp2);
3008  } else if (PDF_OBJ_NULLTYPE(tmp1)) {
3009  filtered = pdf_link_obj(prev);
3010  } else {
3011  WARN("Unexpected object found for /Filter...");
3012  filtered = NULL;
3013  }
3014  if (prev)
3016  if (tmp1)
3018  if (tmp2)
3019  pdf_release_obj(tmp2);
3020  prev = filtered;
3021  }
3022  }
3023  } else if (PDF_OBJ_NAMETYPE(filter)) {
3024  filtered = filter_stream_decode(pdf_name_value(filter), src, parms);
3025  } else {
3026  WARN("Invalid value for /Filter found.");
3027  filtered = NULL;
3028  }
3029  if (parms)
3030  pdf_release_obj(parms);
3031  if (filtered) {
3032  pdf_add_stream(dst, pdf_stream_dataptr(filtered), pdf_stream_length(filtered));
3033  pdf_release_obj(filtered);
3034  error = 0;
3035  } else {
3036  error = -1;
3037  }
3038 
3039  return error;
3040 }
3041 
3042 static pdf_obj *
3044  pdf_obj *dst = pdf_new_stream(0);
3045 
3047 
3049  pdf_remove_dict(pdf_stream_dict(dst), "Length");
3051 
3052  return dst;
3053 }
3054 
3055 #if 0
3056 void
3057 pdf_stream_set_flags (pdf_obj *stream, int flags)
3058 {
3059  pdf_stream *data;
3060 
3062 
3063  data = stream->data;
3064  data->_flags = flags;
3065 }
3066 
3067 int
3068 pdf_stream_get_flags (pdf_obj *stream)
3069 {
3070  pdf_stream *data;
3071 
3073 
3074  data = stream->data;
3075 
3076  return data->_flags;
3077 }
3078 #endif
3079 
3080 static void
3082 {
3083  ASSERT(p);
3084 
3085  if (object == NULL) {
3086  write_null(p);
3087  return;
3088  }
3089 
3090  if (INVALIDOBJ(object) || PDF_OBJ_UNDEFINED(object))
3091  ERROR("pdf_write_obj: Invalid object, type = %d\n", object->type);
3092 
3093  if (error_out)
3094  fprintf(stderr, "{%d}", object->refcount);
3095 
3096  switch (object->type) {
3097  case PDF_BOOLEAN:
3098  write_boolean(p, object->data);
3099  break;
3100  case PDF_NUMBER:
3101  write_number (p, object->data);
3102  break;
3103  case PDF_STRING:
3104  write_string (p, object->data);
3105  break;
3106  case PDF_NAME:
3107  write_name(p, object->data);
3108  break;
3109  case PDF_ARRAY:
3110  write_array(p, object->data);
3111  break;
3112  case PDF_DICT:
3113  write_dict (p, object->data);
3114  break;
3115  case PDF_STREAM:
3116  write_stream(p, object->data);
3117  break;
3118  case PDF_NULL:
3119  write_null(p);
3120  break;
3121  case PDF_INDIRECT:
3122  write_indirect(p, object->data);
3123  break;
3124  }
3125 }
3126 
3127 /* Write the object to the file */
3128 static void
3130 {
3131  size_t length;
3132  char buf[64];
3133 
3134  /*
3135  * Record file position
3136  */
3137  add_xref_entry(p, object->label, 1,
3138  p->output.file_position, object->generation);
3139  length = sprintf(buf, "%u %hu obj\n", object->label, object->generation);
3140  p->state.enc_mode =
3141  (p->options.enable_encrypt && !(object->flags & OBJ_NO_ENCRYPT)) ? 1 : 0;
3142  if (p->state.enc_mode) {
3143  pdf_enc_set_label(p->sec_data, object->label);
3145  }
3146  pdf_out_str(p, buf, length);
3147  pdf_write_obj(p, object);
3148  pdf_out_str(p, "\nendobj\n", 8);
3149 }
3150 
3151 static int
3153 {
3154  int *data, pos;
3155 
3156  TYPECHECK(objstm, PDF_STREAM);
3157  ASSERT(p);
3158 
3159  data = get_objstm_data(objstm);
3160  pos = ++data[0];
3161 
3162  data[2*pos] = object->label;
3163  data[2*pos+1] = pdf_stream_length(objstm);
3164 
3165  add_xref_entry(p, object->label, 2, objstm->label, pos-1);
3166 
3167  /* redirect output into objstm */
3168  p->output_stream = objstm;
3169  p->state.enc_mode = 0;
3170  pdf_write_obj(p, object);
3171  pdf_out_char(p, '\n');
3172  p->output_stream = NULL;
3173 
3174  return pos;
3175 }
3176 
3177 static void
3179 {
3180  int *data = get_objstm_data(objstm);
3181  int pos = data[0];
3182  pdf_obj *dict;
3183  pdf_stream *stream;
3184  unsigned char *old_buf;
3185  size_t old_length;
3186  stream = (pdf_stream *) objstm->data;
3187 
3188  /* Precede stream data by offset table */
3189  old_buf = stream->stream;
3190  old_length = stream->stream_length;
3191  /* Reserve 22 bytes for each entry (two 10 digit numbers plus two spaces) */
3192  stream->stream = NEW(old_length + 22*pos, unsigned char);
3193  stream->stream_length = 0;
3194 
3195  {
3196  int i = 2*pos, *val = data+2;
3197  while (i--) {
3198  char buf[32];
3199  size_t length = sprintf(buf, "%d ", *(val++));
3200  pdf_add_stream(objstm, buf, length);
3201  }
3202  }
3203 
3204  dict = pdf_stream_dict(objstm);
3205  pdf_add_dict(dict, pdf_new_name("Type"), pdf_new_name("ObjStm"));
3207  pdf_add_dict(dict, pdf_new_name("First"), pdf_new_number(stream->stream_length));
3208 
3209  pdf_add_stream(objstm, old_buf, old_length);
3210  RELEASE(old_buf);
3211  pdf_release_obj(objstm);
3212 }
3213 
3214 #define is_free(b,c) (((b)[(c)/8]) & (1 << (7-((c)%8))))
3215 
3216 void
3218 {
3219  pdf_out *p = current_output();
3220 
3221  if (object == NULL)
3222  return;
3223  if (INVALIDOBJ(object) || object->refcount <= 0) {
3224  MESG("\npdf_release_obj: object=%p, type=%d, refcount=%d\n",
3225  object, object->type, object->refcount);
3226  error_out = 1;
3227  pdf_write_obj(p, object);
3228  ERROR("pdf_release_obj: Called with invalid object.");
3229  error_out = 0;
3230  }
3231  object->refcount -= 1;
3232  if (object->refcount == 0) {
3233 #if defined(PDFOBJ_DEBUG)
3234  bucket[object->obj_id] = NULL;
3235 #endif
3236  /*
3237  * Nothing is using this object so it's okay to remove it.
3238  * Nonzero "label" means object needs to be written before it's destroyed.
3239  */
3240  if (object->label) {
3241  p->free_list[object->label/8] |= (1 << (7-(object->label % 8)));
3242  if (p->output.file != NULL) {
3243  if (!p->options.use_objstm || object->flags & OBJ_NO_OBJSTM ||
3244  (p->options.enable_encrypt && (object->flags & OBJ_NO_ENCRYPT)) ||
3245  object->generation) {
3246  pdf_flush_obj(p, object);
3247  } else {
3248  if (!p->current_objstm) {
3249  int *data = NEW(2*OBJSTM_MAX_OBJS+2, int);
3250  data[0] = data[1] = 0;
3251  p->current_objstm = pdf_new_stream(STREAM_COMPRESS);
3252  set_objstm_data(p->current_objstm, data);
3253  pdf_label_obj(p, p->current_objstm);
3254  }
3255  if (pdf_add_objstm(p, p->current_objstm, object) == OBJSTM_MAX_OBJS) {
3256  release_objstm(p->current_objstm);
3257  p->current_objstm = NULL;
3258  }
3259  }
3260  }
3261  }
3262  switch (object->type) {
3263  case PDF_BOOLEAN:
3264  release_boolean(object->data);
3265  break;
3266  case PDF_NULL:
3267  break;
3268  case PDF_NUMBER:
3269  release_number(object->data);
3270  break;
3271  case PDF_STRING:
3272  release_string(object->data);
3273  break;
3274  case PDF_NAME:
3275  release_name(object->data);
3276  break;
3277  case PDF_ARRAY:
3278  release_array(object->data);
3279  break;
3280  case PDF_DICT:
3281  release_dict(object->data);
3282  break;
3283  case PDF_STREAM:
3284  release_stream(object->data);
3285  break;
3286  case PDF_INDIRECT:
3287  release_indirect(object->data);
3288  break;
3289  }
3290  /* This might help detect freeing already freed objects */
3291  object->type = -1;
3292  object->data = NULL;
3293  RELEASE(object);
3294  }
3295 }
3296 
3297 /* Reading external PDF files
3298  *
3299  */
3300 static int
3302 {
3303  int ch = -1;
3304 
3305  /*
3306  * Note: this code should work even if \r\n is eol. It could fail on a
3307  * machine where \n is eol and there is a \r in the stream --- Highly
3308  * unlikely in the last few bytes where this is likely to be used.
3309  */
3310  if (tell_position(pdf_input_file) > 1)
3311  do {
3313  } while (tell_position(pdf_input_file) > 0 &&
3314  (ch = fgetc(pdf_input_file)) >= 0 &&
3315  (ch != '\n' && ch != '\r' ));
3316  if (ch < 0) {
3317  return 0;
3318  }
3319 
3320  return 1;
3321 }
3322 
3323 static int
3325 {
3326  size_t len, xref_pos = 0;
3327  int tries = 10;
3328 
3329  do {
3330  size_t currentpos;
3331 
3332  if (!backup_line(pdf_input_file)) {
3333  tries = 0;
3334  break;
3335  }
3336  currentpos = tell_position(pdf_input_file);
3337  fread(work_buffer, sizeof(char), strlen("startxref"), pdf_input_file);
3338  seek_absolute(pdf_input_file, currentpos);
3339  tries--;
3340  } while (tries > 0 &&
3341  strncmp(work_buffer, "startxref", strlen("startxref")));
3342  if (tries <= 0)
3343  return 0;
3344 
3345  /* Skip rest of this line */
3347  /* Next line of input file should contain actual xref location */
3349  if (len <= 0)
3350  WARN("Reading xref location data failed... Not a PDF file?");
3351  else {
3352  const char *start, *end;
3353  char *number;
3354 
3355  start = work_buffer;
3356  end = start + len;
3357  skip_white(&start, end);
3359  xref_pos = (int) atof(number);
3360  RELEASE(number);
3361  }
3362 
3363  return xref_pos;
3364 }
3365 
3366 /*
3367  * This routine must be called with the file pointer located
3368  * at the start of the trailer.
3369  */
3370 static pdf_obj *
3372 {
3373  pdf_obj *result;
3374  /*
3375  * Fill work_buffer and hope trailer fits. This should
3376  * be made a bit more robust sometime.
3377  */
3378  if (fread(work_buffer, sizeof(char),
3379  WORK_BUFFER_SIZE, pf->file) == 0 ||
3380  strncmp(work_buffer, "trailer", strlen("trailer"))) {
3381  WARN("No trailer. Are you sure this is a PDF file?");
3382  WARN("buffer:\n->%s<-\n", work_buffer);
3383  result = NULL;
3384  } else {
3385  const char *p = work_buffer + strlen("trailer");
3388  }
3389 
3390  return result;
3391 }
3392 
3393 /*
3394  * This routine tries to estimate an upper bound for character position
3395  * of the end of the object, so it knows how big the buffer must be.
3396  * The parsing routines require that the entire object be read into
3397  * memory. It would be a major pain to rewrite them. The worst case
3398  * is that an object before an xref table will grab the whole table
3399  * :-(
3400  */
3401 static int
3403 {
3404  uint32_t next = pf->file_size; /* Worst case */
3405  size_t i;
3406  uint32_t curr;
3407 
3408  curr = pf->xref_table[obj_num].field2;
3409  /* Check all other type 1 objects to find next one */
3410  for (i = 0; i < pf->num_obj; i++) {
3411  if (pf->xref_table[i].type == 1 &&
3412  pf->xref_table[i].field2 > curr &&
3413  pf->xref_table[i].field2 < next)
3414  next = pf->xref_table[i].field2;
3415  }
3416 
3417  return next;
3418 }
3419 
3420 #define checklabel(pf, n, g) ((n) > 0 && (n) < (pf)->num_obj && ( \
3421  ((pf)->xref_table[(n)].type == 1 && (pf)->xref_table[(n)].field3 == (g)) || \
3422  ((pf)->xref_table[(n)].type == 2 && !(g))))
3423 
3424 pdf_obj *
3426 {
3427  pdf_obj *result;
3428  pdf_indirect *indirect;
3429 
3430  indirect = NEW(1, pdf_indirect);
3431  indirect->pf = pf;
3432  indirect->obj = NULL;
3433  indirect->label = obj_num;
3434  indirect->generation = obj_gen;
3435 
3437  result->data = indirect;
3438 
3439  return result;
3440 }
3441 
3442 static pdf_obj *
3443 pdf_read_object (uint32_t obj_num, uint16_t obj_gen, pdf_file *pf, size_t offset, size_t limit)
3444 {
3445  pdf_obj *result = NULL;
3446  size_t length;
3447  char *buffer;
3448  const char *p, *endptr;
3449 
3450  length = limit - offset;
3451 
3452  if (length <= 0)
3453  return NULL;
3454 
3455  buffer = NEW(length + 1, char);
3456 
3457  seek_absolute(pf->file, offset);
3458  fread(buffer, sizeof(char), length, pf->file);
3459 
3460  p = buffer;
3461  endptr = p + length;
3462 
3463  /* Check for obj_num and obj_gen */
3464  {
3465  const char *q = p; /* <== p */
3466  char *sp;
3467  uint32_t n, g;
3468 
3469  skip_white(&q, endptr);
3470  sp = parse_unsigned(&q, endptr);
3471  if (!sp) {
3472  RELEASE(buffer);
3473  return NULL;
3474  }
3475  n = strtoul(sp, NULL, 10);
3476  RELEASE(sp);
3477 
3478  skip_white(&q, endptr);
3479  sp = parse_unsigned(&q, endptr);
3480  if (!sp) {
3481  RELEASE(buffer);
3482  return NULL;
3483  }
3484  g = strtoul(sp, NULL, 10);
3485  RELEASE(sp);
3486 
3487  if (obj_num && (n != obj_num || g != obj_gen)) {
3488  RELEASE(buffer);
3489  return NULL;
3490  }
3491 
3492  p = q; /* ==> p */
3493  }
3494 
3495 
3496  skip_white(&p, endptr);
3497  if (memcmp(p, "obj", strlen("obj"))) {
3498  WARN("Didn't find \"obj\".");
3499  RELEASE(buffer);
3500  return NULL;
3501  }
3502  p += strlen("obj");
3503 
3504  result = parse_pdf_object(&p, endptr, pf);
3505 
3506  skip_white(&p, endptr);
3507  if (memcmp(p, "endobj", strlen("endobj"))) {
3508  WARN("Didn't find \"endobj\".");
3509  if (result)
3511  result = NULL;
3512  }
3513  RELEASE(buffer);
3514 
3515  return result;
3516 }
3517 
3518 static pdf_obj *
3520 {
3521  size_t offset = pf->xref_table[num].field2;
3522  uint16_t gen = pf->xref_table[num].field3;
3523  size_t limit = next_object_offset(pf, num);
3524  int n, first, *header = NULL;
3525  char *data = NULL, *q;
3526  const char *p, *endptr;
3527  int i;
3528  pdf_obj *objstm, *dict, *type, *n_obj, *first_obj;
3529 
3530  objstm = pdf_read_object(num, gen, pf, offset, limit);
3531 
3532  if (!PDF_OBJ_STREAMTYPE(objstm))
3533  goto error;
3534 
3535  {
3536  pdf_obj *tmp = pdf_stream_uncompress(objstm);
3537  if (!tmp)
3538  goto error;
3539  pdf_release_obj(objstm);
3540  objstm = tmp;
3541  }
3542 
3543  dict = pdf_stream_dict(objstm);
3544 
3545  type = pdf_lookup_dict(dict, "Type");
3546  if (!PDF_OBJ_NAMETYPE(type) ||
3547  strcmp(pdf_name_value(type), "ObjStm"))
3548  goto error;
3549 
3550  n_obj = pdf_lookup_dict(dict, "N");
3551  if (!PDF_OBJ_NUMBERTYPE(n_obj))
3552  goto error;
3553  n = (int) pdf_number_value(n_obj);
3554 
3555  first_obj = pdf_lookup_dict(dict, "First");
3556  if (!PDF_OBJ_NUMBERTYPE(first_obj))
3557  goto error;
3558  first = (int) pdf_number_value(first_obj);
3559  /* reject object streams without object data */
3560  if (first >= pdf_stream_length(objstm))
3561  goto error;
3562 
3563  header = NEW(2*(n+1), int);
3564  set_objstm_data(objstm, header);
3565  *(header++) = n;
3566  *(header++) = first;
3567 
3568  /* avoid parsing beyond offset table */
3569  data = NEW(first + 1, char);
3570  memcpy(data, pdf_stream_dataptr(objstm), first);
3571  data[first] = 0;
3572 
3573  p = data;
3574  endptr = p + first;
3575  i = 2*n;
3576  while (i--) {
3577  *(header++) = strtoul(p, &q, 10);
3578  if (q == p)
3579  goto error;
3580  p = q;
3581  }
3582 
3583  /* Any garbage after last entry? */
3584  skip_white(&p, endptr);
3585  if (p != endptr)
3586  goto error;
3587  RELEASE(data);
3588 
3589  return pf->xref_table[num].direct = objstm;
3590 
3591  error:
3592  WARN("Cannot parse object stream.");
3593  if (data)
3594  RELEASE(data);
3595  if (objstm)
3596  pdf_release_obj(objstm);
3597  return NULL;
3598 }
3599 
3600 /* Label without corresponding object definition are replaced by the
3601  * null object, as required by the PDF spec. This is important to parse
3602  * several cross-reference sections.
3603  */
3604 static pdf_obj *
3606 {
3607  pdf_obj *result;
3608 
3609  if (!checklabel(pf, obj_num, obj_gen)) {
3610  WARN("Trying to read nonexistent or deleted object: %lu %u",
3611  obj_num, obj_gen);
3612  return pdf_new_null();
3613  }
3614 
3615  if ((result = pf->xref_table[obj_num].direct)) {
3616  return pdf_link_obj(result);
3617  }
3618 
3619  if (pf->xref_table[obj_num].type == 1) {
3620  /* type == 1 */
3621  uint32_t offset;
3622  size_t limit;
3623  offset = pf->xref_table[obj_num].field2;
3624  limit = next_object_offset(pf, obj_num);
3625  result = pdf_read_object(obj_num, obj_gen, pf, offset, limit);
3626  } else {
3627  /* type == 2 */
3628  uint32_t objstm_num = pf->xref_table[obj_num].field2;
3629  uint16_t index = pf->xref_table[obj_num].field3;
3630  pdf_obj *objstm;
3631  int *data, n, first, length;
3632  const char *p, *q;
3633 
3634  if (objstm_num >= pf->num_obj ||
3635  pf->xref_table[objstm_num].type != 1 ||
3636  !((objstm = pf->xref_table[objstm_num].direct) ||
3637  (objstm = read_objstm(pf, objstm_num))))
3638  goto error;
3639 
3640  data = get_objstm_data(objstm);
3641  n = *(data++);
3642  first = *(data++);
3643 
3644  if (index >= n || data[2*index] != obj_num)
3645  goto error;
3646 
3647  length = pdf_stream_length(objstm);
3648  p = (const char *) pdf_stream_dataptr(objstm) + first + data[2*index+1];
3649  q = p + (index == n-1 ? length : first+data[2*index+3]);
3650  result = parse_pdf_object(&p, q, pf);
3651  if (!result)
3652  goto error;
3653  }
3654 
3655  /* Make sure the caller doesn't free this object */
3656  pf->xref_table[obj_num].direct = pdf_link_obj(result);
3657 
3658  return result;
3659 
3660  error:
3661  WARN("Could not read object from object stream.");
3662  return pdf_new_null();
3663 }
3664 
3665 #define OBJ_FILE(o) (((pdf_indirect *)((o)->data))->pf)
3666 #define OBJ_OBJ(o) (((pdf_indirect *)((o)->data))->obj)
3667 #define OBJ_NUM(o) (((pdf_indirect *)((o)->data))->label)
3668 #define OBJ_GEN(o) (((pdf_indirect *)((o)->data))->generation)
3669 
3670 static pdf_obj *
3672 {
3673  pdf_obj *result;
3674 
3675  ASSERT(p);
3676 
3677  if (object->label == 0) {
3678  pdf_label_obj(p, object);
3679  }
3681  OBJ_OBJ(result) = object;
3682  return result;
3683 }
3684 
3685 /* pdf_deref_obj always returns a link instead of the original */
3686 /* It never return the null object, but the NULL pointer instead */
3687 pdf_obj *
3689 {
3690  int count = PDF_OBJ_MAX_DEPTH;
3691 
3692  if (obj)
3693  obj = pdf_link_obj(obj);
3694 
3695  while (PDF_OBJ_INDIRECTTYPE(obj) && --count) {
3696  pdf_file *pf = OBJ_FILE(obj);
3697  if (pf) {
3698  uint32_t obj_num = OBJ_NUM(obj);
3699  uint16_t obj_gen = OBJ_GEN(obj);
3700  pdf_release_obj(obj);
3701  obj = pdf_get_object(pf, obj_num, obj_gen);
3702  } else {
3703  pdf_out *p = current_output();
3704  pdf_indirect *data = obj->data;
3705 
3706  if ((p->free_list[data->label/8] & (1 << (7-((data->label) % 8))))) {
3707  pdf_release_obj(obj);
3708  return NULL;
3709  } else {
3710  pdf_obj *next_obj = OBJ_OBJ(obj);
3711  if (!next_obj) {
3712  ERROR("Undefined object reference");
3713  }
3714  pdf_release_obj(obj);
3715  obj = pdf_link_obj(next_obj);
3716  }
3717  }
3718  }
3719 
3720  if (!count)
3721  ERROR("Loop in object hierarchy detected. Broken PDF file?");
3722 
3723  if (PDF_OBJ_NULLTYPE(obj)) {
3724  pdf_release_obj(obj);
3725  return NULL;
3726  } else
3727  return obj;
3728 }
3729 
3730 static void
3732 {
3733  size_t i;
3734 
3735  pf->xref_table = RENEW(pf->xref_table, new_size, xref_entry);
3736  for (i = pf->num_obj; i < new_size; i++) {
3737  pf->xref_table[i].direct = NULL;
3738  pf->xref_table[i].indirect = NULL;
3739  pf->xref_table[i].type = 0;
3740  pf->xref_table[i].field3 = 0;
3741  pf->xref_table[i].field2 = 0L;
3742  }
3743  pf->num_obj = new_size;
3744 }
3745 
3746 /* Returns < 0 for error, 1 for success, and 0 when xref stream found. */
3747 static int
3748 parse_xref_table (pdf_file *pf, size_t xref_pos)
3749 {
3750  FILE *pdf_input_file = pf->file;
3751  const char *p, *endptr;
3752  char buf[256]; /* See, PDF ref. v.1.7, p.91 for "255+1" here. */
3753  size_t len;
3754 
3755  /*
3756  * This routine reads one xref segment. It may be called multiple times
3757  * on the same file. xref tables sometimes come in pieces.
3758  */
3759  seek_absolute(pf->file, xref_pos);
3760  len = mfreadln(buf, 255, pdf_input_file);
3761  /* We should have already checked that "startxref" section exists.
3762  * So, EOF here (len = -1) is impossible. We don't treat too long line
3763  * case seriously.
3764  */
3765  if (len < 0) {
3766  WARN("Something went wrong while reading xref table...giving up.");
3767  return -1;
3768  }
3769  p = buf;
3770  endptr = buf + len;
3771  /* No skip_white() here. There should not be any white-spaces here. */
3772  if (memcmp(p, "xref", strlen("xref"))) {
3773  /* Might be an xref stream and not an xref table */
3774  return 0;
3775  }
3776  p += strlen("xref");
3777  skip_white(&p, endptr);
3778  if (p != endptr) {
3779  WARN("Garbage after \"xref\" keyword found.");
3780  return -1;
3781  }
3782 
3783  /* Next line in file has first item and size of table */
3784  for (;;) {
3785  char flag;
3786  size_t current_pos, size, offset;
3787  int i;
3788  uint32_t first, obj_gen;
3789 
3790  current_pos = tell_position(pdf_input_file);
3791  len = mfreadln(buf, 255, pdf_input_file);
3792  if (len == 0) /* empty line... just skip. */
3793  continue;
3794  else if (len < 0) {
3795  WARN("Reading a line failed in xref table.");
3796  return -1;
3797  }
3798 
3799  p = buf;
3800  endptr = buf + len;
3801  skip_white(&p, endptr);
3802  if (p == endptr) { /* Only white-spaces and/or comment found. */
3803  continue;
3804  } else if (!strncmp(p, "trailer", strlen ("trailer"))) {
3805  /*
3806  * Backup... This is ugly, but it seems like the safest thing to
3807  * do. It is possible the trailer dictionary starts on the same
3808  * logical line as the word trailer. In that case, the mfgets
3809  * call might have started to read the trailer dictionary and
3810  * parse_trailer would fail.
3811  */
3812  current_pos += p - buf; /* Jump to the beginning of "trailer" keyword. */
3813  seek_absolute(pdf_input_file, current_pos);
3814  break;
3815  }
3816  /* Line containing something other than white-space characters found.
3817  *
3818  * Start reading xref subsection
3819  *
3820  * This section just reads two nusigned integers, namely, the object number
3821  * of first object and the size of the xref subsection. PDF reference says
3822  * that only "a space" is allowed between those two numbers but we allow
3823  * more white-space characters.
3824  */
3825  {
3826  char *q;
3827 
3828  /* Object number of the first object whithin this xref subsection. */
3829  q = parse_unsigned(&p, endptr);
3830  if (!q) {
3831  WARN("An unsigned integer expected but could not find. (xref)");
3832  return -1;
3833  }
3834  first = atoi(q);
3835  RELEASE(q);
3836  skip_white(&p, endptr);
3837 
3838  /* Nnumber of objects in this xref subsection. */
3839  q = parse_unsigned(&p, endptr);
3840  if (!q) {
3841  WARN("An unsigned integer expected but could not find. (xref)");
3842  return -1;
3843  }
3844  size = atoi(q);
3845  RELEASE(q);
3846  skip_white(&p, endptr);
3847 
3848  /* Check for unrecognized tokens */
3849  if (p != endptr) {
3850  WARN("Unexpected token found in xref table.");
3851  return -1;
3852  }
3853  }
3854 
3855  /* The first line of a xref subsection OK. */
3856  if (pf->num_obj < first + size) {
3857  extend_xref(pf, first + size);
3858  }
3859 
3860  /* Start parsing xref subsection body... */
3861  for (i = first; i < first + size; ) {
3862  /* PDF spec. requires each xref subsection lines being exactly 20 bytes
3863  * long [including end-of-line marker(s)], offset 10 decimal digits,
3864  * generation number being 5 decimal digits, and each entries delimitted
3865  * by "a single space". However, we don't srtictly follow this rule:
3866  * More than one "white-spaces" allowed, can be ended with a comment,
3867  * and so on.
3868  */
3869  len = mfreadln(buf, 255, pdf_input_file);
3870  if (len == 0) /* empty line...just skip. */
3871  continue;
3872  else if (len < 0) {
3873  WARN("Something went wrong while reading xref subsection...");
3874  return -1;
3875  }
3876  p = buf;
3877  endptr = buf + len;
3878  skip_white(&p, endptr);
3879  if (p == endptr) /* Only white-spaces and/or comment. */
3880  continue;
3881 
3882  /*
3883  * Don't overwrite positions that have already been set by a
3884  * modified xref table. We are working our way backwards
3885  * through the reference table, so we only set "position"
3886  * if it hasn't been set yet.
3887  */
3888  offset = 0UL; obj_gen = 0; flag = 0;
3889  {
3890  char *q;
3891 
3892  /* Offset value -- 10 digits (0 padded) */
3893  q = parse_unsigned(&p, endptr);
3894  if (!q) {
3895  WARN("An unsigned integer expected but could not find. (xref)");
3896  return -1;
3897  } else if (strlen(q) != 10) { /* exactly 10 digits */
3898  WARN(("Offset must be a 10 digits number. (xref)"));
3899  RELEASE(q);
3900  return -1;
3901  }
3902  /* FIXME: Possible overflow here. Consider using strtoll(). */
3903  offset = atoi(q);
3904  RELEASE(q);
3905  skip_white(&p, endptr);
3906 
3907  /* Generation number -- 5 digits (0 padded) */
3908  q = parse_unsigned(&p, endptr);
3909  if (!q) {
3910  WARN("An unsigned integer expected but could not find. (xref)");
3911  return -1;
3912  } else if (strlen(q) != 5) { /* exactly 5 digits */
3913  WARN(("Expecting a 5 digits number. (xref)"));
3914  RELEASE(q);
3915  return -1;
3916  }
3917  obj_gen = atoi(q);
3918  RELEASE(q);
3919  skip_white(&p, endptr);
3920  }
3921  if (p == endptr) {
3922  WARN("Unexpected EOL reached while reading a xref subsection entry.");
3923  return -1;
3924  }
3925 
3926  /* Flag -- a char */
3927  flag = *p; p++;
3928  skip_white(&p, endptr);
3929  if (p < endptr) {
3930  WARN("Garbage in xref subsection entry found...");
3931  return -1;
3932  } else if (((flag != 'n' && flag != 'f') ||
3933  (flag == 'n' &&
3934  (offset >= pf->file_size || (offset > 0 && offset < 4))))) {
3935  WARN("Invalid xref table entry [%lu]. PDF file is corrupt...", i);
3936  return -1;
3937  }
3938 
3939  /* Everything seems to be OK. */
3940  if (!pf->xref_table[i].field2) {
3941  pf->xref_table[i].type = (flag == 'n');
3942  pf->xref_table[i].field2 = offset;
3943  pf->xref_table[i].field3 = obj_gen;
3944  }
3945  i++;
3946  }
3947  }
3948 
3949  return 1;
3950 }
3951 
3952 static uint32_t
3953 parse_xrefstm_field (const char **p, size_t length, uint32_t def)
3954 {
3955  uint32_t val = 0;
3956 
3957  if (!length)
3958  return def;
3959 
3960  while (length--) {
3961  val <<= 8;
3962  val |= (unsigned char) *((*p)++);
3963  }
3964 
3965  return val;
3966 }
3967 
3968 static int
3970  const char **p, size_t *length,
3971  int *W, int wsum, int first, int size)
3972 {
3973  xref_entry *e;
3974 
3975  if ((*length -= wsum*size) < 0)
3976  return -1;
3977 
3978  if (pf->num_obj < first+size)
3979  extend_xref(pf, first+size); /* TODO: change! why? */
3980 
3981  e = pf->xref_table + first;
3982  while (size--) {
3983  uint8_t type;
3984  uint32_t field2;
3985  uint16_t field3;
3986 
3987  type = (unsigned char) parse_xrefstm_field(p, W[0], 1);
3988  if (type > 2)
3989  WARN("Unknown cross-reference stream entry type.");
3990 #if 0
3991  /* Not sure */
3992  else if (!W[1] || (type != 1 && !W[2]))
3993  return -1;
3994 #endif
3995 
3996  field2 = (uint32_t) parse_xrefstm_field(p, W[1], 0);
3997  field3 = (uint16_t) parse_xrefstm_field(p, W[2], 0);
3998 
3999  if (!e->field2) {
4000  e->type = type;
4001  e->field2 = field2;
4002  e->field3 = field3;
4003  }
4004  e++;
4005  }
4006 
4007  return 0;
4008 }
4009 
4010 static int
4012 {
4013  pdf_obj *xrefstm, *size_obj, *W_obj, *index_obj;
4014  uint32_t size;
4015  size_t length;
4016  int W[3], i, wsum = 0;
4017  const char *p;
4018 
4019  xrefstm = pdf_read_object(0, 0, pf, xref_pos, pf->file_size);
4020  if (!PDF_OBJ_STREAMTYPE(xrefstm))
4021  goto error;
4022 
4023  {
4024  pdf_obj *tmp = pdf_stream_uncompress(xrefstm);
4025  if (!tmp)
4026  goto error;
4027  pdf_release_obj(xrefstm);
4028  xrefstm = tmp;
4029  }
4030 
4031  *trailer = pdf_link_obj(pdf_stream_dict(xrefstm));
4032 
4033  size_obj = pdf_lookup_dict(*trailer, "Size");
4034  if (!PDF_OBJ_NUMBERTYPE(size_obj))
4035  goto error;
4036  size = (uint32_t) pdf_number_value(size_obj);
4037 
4038  length = pdf_stream_length(xrefstm);
4039 
4040  W_obj = pdf_lookup_dict(*trailer, "W");
4041  if (!PDF_OBJ_ARRAYTYPE(W_obj) || pdf_array_length(W_obj) != 3)
4042  goto error;
4043 
4044  for (i = 0; i < 3; i++) {
4045  pdf_obj *tmp = pdf_get_array(W_obj, i);
4046  if (!PDF_OBJ_NUMBERTYPE(tmp))
4047  goto error;
4048  wsum += (W[i] = (int) pdf_number_value(tmp));
4049  }
4050 
4051  p = pdf_stream_dataptr(xrefstm);
4052 
4053  index_obj = pdf_lookup_dict(*trailer, "Index");
4054  if (index_obj) {
4055  size_t index_len;
4056  if (!PDF_OBJ_ARRAYTYPE(index_obj) ||
4057  ((index_len = pdf_array_length(index_obj)) % 2 ))
4058  goto error;
4059 
4060  i = 0;
4061  while (i < index_len) {
4062  pdf_obj *first = pdf_get_array(index_obj, i++);
4063  size_obj = pdf_get_array(index_obj, i++);
4064  if (!PDF_OBJ_NUMBERTYPE(first) || !PDF_OBJ_NUMBERTYPE(size_obj) ||
4065  parse_xrefstm_subsec(pf, &p, &length, W, wsum,
4066  (int) pdf_number_value(first), (int) pdf_number_value(size_obj)))
4067  goto error;
4068  }
4069  } else if (parse_xrefstm_subsec(pf, &p, &length, W, wsum, 0, size))
4070  goto error;
4071 
4072  if (length)
4073  WARN("Garbage in xref stream.");
4074 
4075  pdf_release_obj(xrefstm);
4076 
4077  return 1;
4078 
4079  error:
4080  WARN("Cannot parse cross-reference stream.");
4081  if (xrefstm)
4082  pdf_release_obj(xrefstm);
4083  if (*trailer) {
4085  *trailer = NULL;
4086  }
4087  return 0;
4088 }
4089 
4090 /* TODO: parse Version entry */
4091 static pdf_obj *
4093 {
4094  pdf_obj *trailer = NULL, *main_trailer = NULL;
4095  size_t xref_pos;
4096 
4097  if (!(xref_pos = find_xref(pf->file)))
4098  goto error;
4099 
4100  while (xref_pos) {
4101  pdf_obj *prev;
4102 
4103  int res = parse_xref_table(pf, xref_pos);
4104  if (res > 0) {
4105  /* cross-reference table */
4106  pdf_obj *xrefstm;
4107 
4108  if (!(trailer = parse_trailer(pf)))
4109  goto error;
4110 
4111  if (!main_trailer)
4112  main_trailer = pdf_link_obj(trailer);
4113 
4114  if ((xrefstm = pdf_lookup_dict(trailer, "XRefStm"))) {
4115  pdf_obj *new_trailer = NULL;
4116  if (PDF_OBJ_NUMBERTYPE(xrefstm) &&
4117  parse_xref_stream(pf, (int) pdf_number_value(xrefstm),
4118  &new_trailer))
4119  pdf_release_obj(new_trailer);
4120  else
4121  WARN("Skipping hybrid reference section.");
4122  /* Many PDF 1.5 xref streams use DecodeParms, which we cannot
4123  * parse. This way we can use at least xref tables in hybrid
4124  * documents. Or should we better stop parsing the file?
4125  */
4126  }
4127 
4128  } else if (!res && parse_xref_stream(pf, xref_pos, &trailer)) {
4129  /* cross-reference stream */
4130  if (!main_trailer)
4131  main_trailer = pdf_link_obj(trailer);
4132  } else
4133  goto error;
4134 
4135  if ((prev = pdf_lookup_dict(trailer, "Prev"))) {
4136  if (PDF_OBJ_NUMBERTYPE(prev))
4137  xref_pos = (size_t) pdf_number_value(prev);
4138  else
4139  goto error;
4140  } else
4141  xref_pos = 0;
4142 
4144  }
4145 
4146 #if 0
4147  if (!pdf_lookup_dict(main_trailer, "Root")) {
4148  WARN("Trailer doesn't have catalog. Is this a correct PDF file?");
4149  goto error;
4150  }
4151 #endif
4152 
4153  return main_trailer;
4154 
4155  error:
4156  WARN("Error while parsing PDF file.");
4157  if (trailer)
4159  if (main_trailer)
4160  pdf_release_obj(main_trailer);
4161  return NULL;
4162 }
4163 
4164 static struct ht_table *pdf_files = NULL;
4165 
4166 static pdf_file *
4168 {
4169  pdf_file *pf;
4170  ASSERT(file);
4171  pf = NEW(1, pdf_file);
4172  pf->file = file;
4173  pf->trailer = NULL;
4174  pf->xref_table = NULL;
4175  pf->catalog = NULL;
4176  pf->num_obj = 0;
4177  pf->version = 0;
4178 
4179  seek_end(file);
4180  pf->file_size = tell_position(file);
4181 
4182  return pf;
4183 }
4184 
4185 static void
4187 {
4188  size_t i;
4189 
4190  if (!pf) {
4191  return;
4192  }
4193 
4194  for (i = 0; i < pf->num_obj; i++) {
4195  if (pf->xref_table[i].direct)
4196  pdf_release_obj(pf->xref_table[i].direct);
4197  if (pf->xref_table[i].indirect)
4198  pdf_release_obj(pf->xref_table[i].indirect);
4199  }
4200 
4201  RELEASE(pf->xref_table);
4202  if (pf->trailer)
4203  pdf_release_obj(pf->trailer);
4204  if (pf->catalog)
4205  pdf_release_obj(pf->catalog);
4206 
4207  RELEASE(pf);
4208 }
4209 
4210 void
4212 {
4213  pdf_files = NEW(1, struct ht_table);
4214  ht_init_table(pdf_files, (void (*)(void *)) pdf_file_free);
4215 }
4216 
4217 int
4219 {
4220  ASSERT(pf);
4221  return pf->version;
4222 }
4223 
4224 pdf_obj *
4226 {
4227  ASSERT(pf);
4228  return pdf_link_obj(pf->trailer);
4229 }
4230 
4231 /* FIXME:
4232  * pdf_file_get_trailer() does pdf_link_obj() but
4233  * pdf_file_get_catalog() does not. Why?
4234  */
4235 pdf_obj *
4237 {
4238  ASSERT(pf);
4239  return pf->catalog;
4240 }
4241 
4242 pdf_file *
4243 pdf_open (const char *ident, FILE *file)
4244 {
4245  pdf_out *p = current_output();
4246  pdf_file *pf = NULL;
4247 
4248  ASSERT(pdf_files);
4249 
4250  if (ident)
4252 
4253  if (pf) {
4254  pf->file = file;
4255  } else {
4256  pdf_obj *new_version;
4258 
4259  if (!dpx_conf.is_xbb) {
4260  int ver_major, ver_minor;
4261 
4262  ver_major = version / 10;
4263  ver_minor = version % 10;
4264 
4265  if (version < 10)
4266  WARN("Unrecognized PDF version specified for input PDF file: %d.%d",
4267  ver_major, ver_minor);
4268  else if (pdf_check_version(ver_major, ver_minor) < 0) {
4269  WARN("Trying to include PDF file with version (%d.%d), which is " \
4270  "newer than current output PDF setting (%d.%d).",
4271  ver_major, ver_minor, p->version.major, p->version.minor);
4272  }
4273  }
4274  pf = pdf_file_new(file);
4275  pf->version = version;
4276 
4277  if (!(pf->trailer = read_xref(pf)))
4278  goto error;
4279 
4280  if (pdf_lookup_dict(pf->trailer, "Encrypt")) {
4281  WARN("PDF document is encrypted.");
4282  goto error;
4283  }
4284 
4285  pf->catalog = pdf_deref_obj(pdf_lookup_dict(pf->trailer, "Root"));
4286  if (!PDF_OBJ_DICTTYPE(pf->catalog)) {
4287  WARN("Cannot read PDF document catalog. Broken PDF file?");
4288  goto error;
4289  }
4290 
4291  new_version = pdf_deref_obj(pdf_lookup_dict(pf->catalog, "Version"));
4292  if (new_version) {
4293  unsigned int major, minor;
4294 
4295  if (!PDF_OBJ_NAMETYPE(new_version) ||
4296  sscanf(pdf_name_value(new_version), "%u.%u", &major, &minor) != 2) {
4297  pdf_release_obj(new_version);
4298  WARN("Illegal Version entry in document catalog. Broken PDF file?");
4299  goto error;
4300  }
4301 
4302  if (pf->version < major*10+minor)
4303  pf->version = major*10+minor;
4304 
4305  pdf_release_obj(new_version);
4306  }
4307 
4308  if (ident)
4310  }
4311 
4312  return pf;
4313 
4314  error:
4315  pdf_file_free(pf);
4316  return NULL;
4317 }
4318 
4319 void
4321 {
4322  if (pf)
4323  pf->file = NULL;
4324 }
4325 
4326 void
4328 {
4329  ASSERT(pdf_files);
4331  RELEASE(pdf_files);
4332 }
4333 
4334 static int
4336 {
4337  unsigned int major, minor;
4338 
4339  rewind(file);
4340 
4341  return (ungetc(fgetc(file), file) == '%' &&
4342  fscanf(file, "%%PDF-%u.%u", &major, &minor) == 2) ? (major*10+minor) : -1;
4343 }
4344 
4345 int
4347 {
4349 
4350  if (version < 0) /* not a PDF file */
4351  return 0;
4352 
4353  if (version <= pdf_get_version())
4354  return 1;
4355 
4356  return 1;
4357 }
4358 
4359 static inline int
4361 {
4362  pdf_obj *copy;
4363  pdf_obj *tmp;
4364 
4365  copy = (pdf_obj *) pdata;
4366 
4367  tmp = pdf_import_object(value);
4368  if (!tmp) {
4369  return -1;
4370  }
4372 
4373  return 0;
4374 }
4375 
4376 static pdf_obj *
4378 {
4379  pdf_out *p = current_output();
4380  pdf_file *pf = OBJ_FILE(object);
4381  uint32_t obj_num = OBJ_NUM(object);
4382  uint16_t obj_gen = OBJ_GEN(object);
4383  pdf_obj *ref;
4384 
4385  ASSERT(pf);
4386 
4387  if (!checklabel(pf, obj_num, obj_gen)) {
4388  WARN("Can't resolve object: %lu %u", obj_num, obj_gen);
4389  return pdf_new_null();
4390  }
4391 
4392  ref = pf->xref_table[obj_num].indirect;
4393  if (!ref) {
4394  pdf_obj *obj, *reserved, *imported;
4395 
4396  obj = pdf_get_object(pf, obj_num, obj_gen);
4397  if (!obj) {
4398  WARN("Could not read object: %lu %u", obj_num, obj_gen);
4399  return NULL;
4400  }
4401 
4402  /* Fix for circular reference issue
4403  *
4404  * Older version of dvipdfmx disallowed the following case of
4405  * circular reference:
4406  * obj #1 --> << /Kids [2 0 R] >>
4407  * obj #2 --> << /Parents [1 0 R] >>
4408  * The problem is in that dvipdfmx gives new labels to objects after they
4409  * are completely read.
4410  */
4411  reserved = pdf_new_null(); /* for reservation of label */
4412  pf->xref_table[obj_num].indirect = ref = pdf_new_ref(p, reserved);
4413  imported = pdf_import_object(obj);
4414  if (imported) {
4415  if (imported->label) {
4416  WARN("Imported object already has a label: obj_id=%lu", imported->label);
4417  }
4418  OBJ_OBJ(ref) = imported;
4419  imported->label = reserved->label;
4420  imported->generation = reserved->generation;
4421  reserved->label = 0;
4422  reserved->generation = 0;
4423  pdf_release_obj(imported);
4424  }
4425 
4427  pdf_release_obj(obj);
4428  }
4429 
4430  return pdf_link_obj(ref);
4431 }
4432 
4433 /*
4434  * pdf_import_object recursively copies the object and those
4435  * referenced by it and changes the indirect references so that
4436  * they refer to the current output file. New indirect references
4437  * are remembered, which avoids duplicating objects when they
4438  * are imported several times.
4439  */
4440 pdf_obj *
4442 {
4443  pdf_obj *imported;
4444  pdf_obj *tmp;
4445  size_t i;
4446 
4447  switch (pdf_obj_typeof(object)) {
4448 
4449  case PDF_INDIRECT:
4450  if (OBJ_FILE(object)) {
4451  imported = pdf_import_indirect(object);
4452  } else {
4453  imported = pdf_link_obj(object);
4454  }
4455  break;
4456 
4457  case PDF_STREAM:
4458  {
4459  pdf_obj *stream_dict;
4460 
4461  tmp = pdf_import_object(pdf_stream_dict(object));
4462  if (!tmp)
4463  return NULL;
4464 
4465  imported = pdf_new_stream(0);
4466  stream_dict = pdf_stream_dict(imported);
4467  pdf_merge_dict(stream_dict, tmp);
4468  pdf_release_obj(tmp);
4469  pdf_add_stream(imported,
4470  pdf_stream_dataptr(object),
4471  pdf_stream_length(object));
4472  }
4473  break;
4474 
4475  case PDF_DICT:
4476 
4477  imported = pdf_new_dict();
4478  if (pdf_foreach_dict(object, import_dict, imported) < 0) {
4479  pdf_release_obj(imported);
4480  return NULL;
4481  }
4482 
4483  break;
4484 
4485  case PDF_ARRAY:
4486 
4487  imported = pdf_new_array();
4488  for (i = 0; i < pdf_array_length(object); i++) {
4489  tmp = pdf_import_object(pdf_get_array(object, i));
4490  if (!tmp) {
4491  pdf_release_obj(imported);
4492  return NULL;
4493  }
4494  pdf_add_array(imported, tmp);
4495  }
4496  break;
4497 
4498  default:
4499  imported = pdf_link_obj(object);
4500  }
4501 
4502  return imported;
4503 }
4504 
4505 
4506 /* returns 0 if indirect references point to the same object */
4507 int
4509 {
4510  pdf_indirect *data1, *data2;
4511 
4513 
4514  data1 = (pdf_indirect *) ref1->data;
4515  data2 = (pdf_indirect *) ref2->data;
4516 
4517  return data1->pf != data2->pf || data1->label != data2->label
4518  || data1->generation != data2->generation;
4519 }
4520 
4521 int
4523 {
4524  int r = 0;
4525 
4526  if (!obj1 && !obj2) {
4527  return 0;
4528  } else if (!obj1 && obj2) {
4529  return 1;
4530  } else if (obj1 && !obj2) {
4531  return 1;
4532  } else if (pdf_obj_typeof(obj1) != pdf_obj_typeof(obj2)) {
4533  return 1;
4534  }
4535 
4536  switch (pdf_obj_typeof(obj1)) {
4537  case PDF_BOOLEAN:
4538  r = pdf_boolean_value(obj1) - pdf_boolean_value(obj2);
4539  break;
4540  case PDF_NUMBER:
4541  if (pdf_number_value(obj1) < pdf_number_value(obj2)) {
4542  r = -1;
4543  } else if (pdf_number_value(obj1) > pdf_number_value(obj2)) {
4544  r = 1;
4545  } else {
4546  r = 0;
4547  }
4548  break;
4549  case PDF_STRING:
4550  if (pdf_string_length(obj1) < pdf_string_length(obj2)) {
4551  r = -1;
4552  } else if (pdf_string_length(obj1) > pdf_string_length(obj2)) {
4553  r = 1;
4554  } else {
4556  }
4557  break;
4558  case PDF_NAME:
4559  r = strcmp(pdf_name_value(obj1), pdf_name_value(obj2));
4560  break;
4561  case PDF_NULL:
4562  /* Always same */
4563  r = 0;
4564  break;
4565  case PDF_INDIRECT:
4566  r = pdf_compare_reference(obj1, obj2);
4567  break;
4568  case PDF_ARRAY:
4569  if (pdf_array_length(obj1) < pdf_array_length(obj2)) {
4570  r = -1;
4571  } else if (pdf_array_length(obj1) > pdf_array_length(obj2)) {
4572  r = 1;
4573  } else {
4574  int i;
4575  for (i = 0; r == 0 && i < pdf_array_length(obj1); i++) {
4576  pdf_obj *v1, *v2;
4577  v1 =