"Fossies" - the Fresh Open Source Software Archive 
Member "xpdf-4.04/xpdf/GfxFont.cc" (18 Apr 2022, 65646 Bytes) of package /linux/misc/xpdf-4.04.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "GfxFont.cc" see the
Fossies "Dox" file reference documentation and the latest
Fossies "Diffs" side-by-side code changes report:
4.03_vs_4.04.
1 //========================================================================
2 //
3 // GfxFont.cc
4 //
5 // Copyright 1996-2003 Glyph & Cog, LLC
6 //
7 //========================================================================
8
9 #include <aconf.h>
10
11 #ifdef USE_GCC_PRAGMAS
12 #pragma implementation
13 #endif
14
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <ctype.h>
19 #include <math.h>
20 #include <limits.h>
21 #include "gmem.h"
22 #include "gmempp.h"
23 #include "GList.h"
24 #include "GHash.h"
25 #include "Error.h"
26 #include "Object.h"
27 #include "Dict.h"
28 #include "GlobalParams.h"
29 #include "CMap.h"
30 #include "CharCodeToUnicode.h"
31 #include "FontEncodingTables.h"
32 #include "BuiltinFontTables.h"
33 #include "FoFiIdentifier.h"
34 #include "FoFiType1.h"
35 #include "FoFiType1C.h"
36 #include "FoFiTrueType.h"
37 #include "GfxFont.h"
38
39 //------------------------------------------------------------------------
40
41 struct Base14FontMapEntry {
42 const char *altName;
43 const char *base14Name;
44 };
45
46 static Base14FontMapEntry base14FontMap[] = {
47 { "Arial", "Helvetica" },
48 { "Arial,Bold", "Helvetica-Bold" },
49 { "Arial,BoldItalic", "Helvetica-BoldOblique" },
50 { "Arial,Italic", "Helvetica-Oblique" },
51 { "Arial-Bold", "Helvetica-Bold" },
52 { "Arial-BoldItalic", "Helvetica-BoldOblique" },
53 { "Arial-BoldItalicMT", "Helvetica-BoldOblique" },
54 { "Arial-BoldMT", "Helvetica-Bold" },
55 { "Arial-Italic", "Helvetica-Oblique" },
56 { "Arial-ItalicMT", "Helvetica-Oblique" },
57 { "ArialMT", "Helvetica" },
58 { "Courier", "Courier" },
59 { "Courier,Bold", "Courier-Bold" },
60 { "Courier,BoldItalic", "Courier-BoldOblique" },
61 { "Courier,Italic", "Courier-Oblique" },
62 { "Courier-Bold", "Courier-Bold" },
63 { "Courier-BoldOblique", "Courier-BoldOblique" },
64 { "Courier-Oblique", "Courier-Oblique" },
65 { "CourierNew", "Courier" },
66 { "CourierNew,Bold", "Courier-Bold" },
67 { "CourierNew,BoldItalic", "Courier-BoldOblique" },
68 { "CourierNew,Italic", "Courier-Oblique" },
69 { "CourierNew-Bold", "Courier-Bold" },
70 { "CourierNew-BoldItalic", "Courier-BoldOblique" },
71 { "CourierNew-Italic", "Courier-Oblique" },
72 { "CourierNewPS-BoldItalicMT", "Courier-BoldOblique" },
73 { "CourierNewPS-BoldMT", "Courier-Bold" },
74 { "CourierNewPS-ItalicMT", "Courier-Oblique" },
75 { "CourierNewPSMT", "Courier" },
76 { "Helvetica", "Helvetica" },
77 { "Helvetica,Bold", "Helvetica-Bold" },
78 { "Helvetica,BoldItalic", "Helvetica-BoldOblique" },
79 { "Helvetica,Italic", "Helvetica-Oblique" },
80 { "Helvetica-Bold", "Helvetica-Bold" },
81 { "Helvetica-BoldItalic", "Helvetica-BoldOblique" },
82 { "Helvetica-BoldOblique", "Helvetica-BoldOblique" },
83 { "Helvetica-Italic", "Helvetica-Oblique" },
84 { "Helvetica-Oblique", "Helvetica-Oblique" },
85 { "Symbol", "Symbol" },
86 { "Symbol,Bold", "Symbol" },
87 { "Symbol,BoldItalic", "Symbol" },
88 { "Symbol,Italic", "Symbol" },
89 { "Times-Bold", "Times-Bold" },
90 { "Times-BoldItalic", "Times-BoldItalic" },
91 { "Times-Italic", "Times-Italic" },
92 { "Times-Roman", "Times-Roman" },
93 { "TimesNewRoman", "Times-Roman" },
94 { "TimesNewRoman,Bold", "Times-Bold" },
95 { "TimesNewRoman,BoldItalic", "Times-BoldItalic" },
96 { "TimesNewRoman,Italic", "Times-Italic" },
97 { "TimesNewRoman-Bold", "Times-Bold" },
98 { "TimesNewRoman-BoldItalic", "Times-BoldItalic" },
99 { "TimesNewRoman-Italic", "Times-Italic" },
100 { "TimesNewRomanPS", "Times-Roman" },
101 { "TimesNewRomanPS-Bold", "Times-Bold" },
102 { "TimesNewRomanPS-BoldItalic", "Times-BoldItalic" },
103 { "TimesNewRomanPS-BoldItalicMT", "Times-BoldItalic" },
104 { "TimesNewRomanPS-BoldMT", "Times-Bold" },
105 { "TimesNewRomanPS-Italic", "Times-Italic" },
106 { "TimesNewRomanPS-ItalicMT", "Times-Italic" },
107 { "TimesNewRomanPSMT", "Times-Roman" },
108 { "TimesNewRomanPSMT,Bold", "Times-Bold" },
109 { "TimesNewRomanPSMT,BoldItalic", "Times-BoldItalic" },
110 { "TimesNewRomanPSMT,Italic", "Times-Italic" },
111 { "ZapfDingbats", "ZapfDingbats" }
112 };
113
114 //------------------------------------------------------------------------
115
116 // index: {fixed:0, sans-serif:4, serif:8} + bold*2 + italic
117 // NB: must be in same order as psSubstFonts in PSOutputDev.cc
118 static const char *base14SubstFonts[14] = {
119 "Courier",
120 "Courier-Oblique",
121 "Courier-Bold",
122 "Courier-BoldOblique",
123 "Helvetica",
124 "Helvetica-Oblique",
125 "Helvetica-Bold",
126 "Helvetica-BoldOblique",
127 "Times-Roman",
128 "Times-Italic",
129 "Times-Bold",
130 "Times-BoldItalic",
131 // the last two are never used for substitution
132 "Symbol",
133 "ZapfDingbats"
134 };
135
136 //------------------------------------------------------------------------
137
138 static int readFromStream(void *data) {
139 return ((Stream *)data)->getChar();
140 }
141
142 //------------------------------------------------------------------------
143 // GfxFontLoc
144 //------------------------------------------------------------------------
145
146 GfxFontLoc::GfxFontLoc() {
147 path = NULL;
148 fontNum = 0;
149 oblique = 0;
150 encoding = NULL;
151 substIdx = -1;
152 }
153
154 GfxFontLoc::~GfxFontLoc() {
155 if (path) {
156 delete path;
157 }
158 if (encoding) {
159 delete encoding;
160 }
161 }
162
163 //------------------------------------------------------------------------
164 // GfxFont
165 //------------------------------------------------------------------------
166
167 GfxFont *GfxFont::makeFont(XRef *xref, const char *tagA,
168 Ref idA, Dict *fontDict) {
169 GString *nameA;
170 Ref embFontIDA;
171 GfxFontType typeA;
172 GfxFont *font;
173 Object obj1;
174
175 // get base font name
176 nameA = NULL;
177 fontDict->lookup("BaseFont", &obj1);
178 if (obj1.isName()) {
179 nameA = new GString(obj1.getName());
180 } else if (obj1.isString()) {
181 nameA = obj1.getString()->copy();
182 }
183 obj1.free();
184
185 // get embedded font ID and font type
186 typeA = getFontType(xref, fontDict, &embFontIDA);
187
188 // create the font object
189 font = NULL;
190 if (typeA < fontCIDType0) {
191 font = new Gfx8BitFont(xref, tagA, idA, nameA, typeA, embFontIDA,
192 fontDict);
193 } else {
194 font = new GfxCIDFont(xref, tagA, idA, nameA, typeA, embFontIDA,
195 fontDict);
196 }
197
198 return font;
199 }
200
201 GfxFont *GfxFont::makeDefaultFont(XRef *xref) {
202 Object type, subtype, baseFont;
203 type.initName("Font");
204 subtype.initName("Type1");
205 baseFont.initName("Helvetica");
206 Object fontDict;
207 fontDict.initDict(xref);
208 fontDict.dictAdd(copyString("Type"), &type);
209 fontDict.dictAdd(copyString("Subtype"), &subtype);
210 fontDict.dictAdd(copyString("BaseFont"), &baseFont);
211
212 Ref r;
213 r.gen = 100000;
214 r.num = GfxFontDict::hashFontObject(&fontDict);
215
216 GfxFont *font = makeFont(xref, "undef", r, fontDict.getDict());
217 fontDict.free();
218
219 return font;
220 }
221
222 GfxFont::GfxFont(const char *tagA, Ref idA, GString *nameA,
223 GfxFontType typeA, Ref embFontIDA) {
224 ok = gFalse;
225 tag = new GString(tagA);
226 id = idA;
227 name = nameA;
228 type = typeA;
229 embFontID = embFontIDA;
230 embFontName = NULL;
231 hasToUnicode = gFalse;
232 }
233
234 GfxFont::~GfxFont() {
235 delete tag;
236 if (name) {
237 delete name;
238 }
239 if (embFontName) {
240 delete embFontName;
241 }
242 }
243
244 // This function extracts three pieces of information:
245 // 1. the "expected" font type, i.e., the font type implied by
246 // Font.Subtype, DescendantFont.Subtype, and
247 // FontDescriptor.FontFile3.Subtype
248 // 2. the embedded font object ID
249 // 3. the actual font type - determined by examining the embedded font
250 // if there is one, otherwise equal to the expected font type
251 // If the expected and actual font types don't match, a warning
252 // message is printed. The expected font type is not used for
253 // anything else.
254 GfxFontType GfxFont::getFontType(XRef *xref, Dict *fontDict, Ref *embID) {
255 GfxFontType t, expectedType;
256 FoFiIdentifierType fft;
257 Dict *fontDict2;
258 Object subtype, fontDesc, obj1, obj2, obj3, obj4;
259 GBool isType0, err;
260
261 t = fontUnknownType;
262 embID->num = embID->gen = -1;
263 err = gFalse;
264
265 fontDict->lookup("Subtype", &subtype);
266 expectedType = fontUnknownType;
267 isType0 = gFalse;
268 if (subtype.isName("Type1") || subtype.isName("MMType1")) {
269 expectedType = fontType1;
270 } else if (subtype.isName("Type1C")) {
271 expectedType = fontType1C;
272 } else if (subtype.isName("Type3")) {
273 expectedType = fontType3;
274 } else if (subtype.isName("TrueType")) {
275 expectedType = fontTrueType;
276 } else if (subtype.isName("Type0")) {
277 isType0 = gTrue;
278 } else {
279 error(errSyntaxWarning, -1, "Unknown font type: '{0:s}'",
280 subtype.isName() ? subtype.getName() : "???");
281 }
282 subtype.free();
283
284 fontDict2 = fontDict;
285 if (fontDict->lookup("DescendantFonts", &obj1)->isArray()) {
286 if (obj1.arrayGetLength() == 0) {
287 error(errSyntaxWarning, -1, "Empty DescendantFonts array in font");
288 obj2.initNull();
289 } else if (obj1.arrayGet(0, &obj2)->isDict()) {
290 if (!isType0) {
291 error(errSyntaxWarning, -1, "Non-CID font with DescendantFonts array");
292 }
293 fontDict2 = obj2.getDict();
294 fontDict2->lookup("Subtype", &subtype);
295 if (subtype.isName("CIDFontType0")) {
296 if (isType0) {
297 expectedType = fontCIDType0;
298 }
299 } else if (subtype.isName("CIDFontType2")) {
300 if (isType0) {
301 expectedType = fontCIDType2;
302 }
303 }
304 subtype.free();
305 }
306 } else {
307 obj2.initNull();
308 }
309
310 if (fontDict2->lookup("FontDescriptor", &fontDesc)->isDict()) {
311 if (fontDesc.dictLookupNF("FontFile", &obj3)->isRef()) {
312 *embID = obj3.getRef();
313 if (expectedType != fontType1) {
314 err = gTrue;
315 }
316 }
317 obj3.free();
318 if (embID->num == -1 &&
319 fontDesc.dictLookupNF("FontFile2", &obj3)->isRef()) {
320 *embID = obj3.getRef();
321 if (isType0) {
322 expectedType = fontCIDType2;
323 } else if (expectedType != fontTrueType) {
324 err = gTrue;
325 }
326 }
327 obj3.free();
328 if (embID->num == -1 &&
329 fontDesc.dictLookupNF("FontFile3", &obj3)->isRef()) {
330 *embID = obj3.getRef();
331 if (obj3.fetch(xref, &obj4)->isStream()) {
332 obj4.streamGetDict()->lookup("Subtype", &subtype);
333 if (subtype.isName("Type1")) {
334 if (expectedType != fontType1) {
335 err = gTrue;
336 expectedType = isType0 ? fontCIDType0 : fontType1;
337 }
338 } else if (subtype.isName("Type1C")) {
339 if (expectedType == fontType1) {
340 expectedType = fontType1C;
341 } else if (expectedType != fontType1C) {
342 err = gTrue;
343 expectedType = isType0 ? fontCIDType0C : fontType1C;
344 }
345 } else if (subtype.isName("TrueType")) {
346 if (expectedType != fontTrueType) {
347 err = gTrue;
348 expectedType = isType0 ? fontCIDType2 : fontTrueType;
349 }
350 } else if (subtype.isName("CIDFontType0C")) {
351 if (expectedType == fontCIDType0) {
352 expectedType = fontCIDType0C;
353 } else {
354 err = gTrue;
355 expectedType = isType0 ? fontCIDType0C : fontType1C;
356 }
357 } else if (subtype.isName("OpenType")) {
358 if (expectedType == fontTrueType) {
359 expectedType = fontTrueTypeOT;
360 } else if (expectedType == fontType1) {
361 expectedType = fontType1COT;
362 } else if (expectedType == fontCIDType0) {
363 expectedType = fontCIDType0COT;
364 } else if (expectedType == fontCIDType2) {
365 expectedType = fontCIDType2OT;
366 } else {
367 err = gTrue;
368 }
369 } else {
370 error(errSyntaxError, -1, "Unknown font type '{0:s}'",
371 subtype.isName() ? subtype.getName() : "???");
372 }
373 subtype.free();
374 }
375 obj4.free();
376 }
377 obj3.free();
378 }
379 fontDesc.free();
380
381 t = fontUnknownType;
382 if (embID->num >= 0) {
383 obj3.initRef(embID->num, embID->gen);
384 obj3.fetch(xref, &obj4);
385 if (obj4.isStream()) {
386 obj4.streamReset();
387 fft = FoFiIdentifier::identifyStream(&readFromStream, obj4.getStream());
388 obj4.streamClose();
389 switch (fft) {
390 case fofiIdType1PFA:
391 case fofiIdType1PFB:
392 t = fontType1;
393 break;
394 case fofiIdCFF8Bit:
395 t = isType0 ? fontCIDType0C : fontType1C;
396 break;
397 case fofiIdCFFCID:
398 t = fontCIDType0C;
399 break;
400 case fofiIdTrueType:
401 case fofiIdTrueTypeCollection:
402 t = isType0 ? fontCIDType2 : fontTrueType;
403 break;
404 case fofiIdOpenTypeCFF8Bit:
405 t = isType0 ? fontCIDType0COT : fontType1COT;
406 break;
407 case fofiIdOpenTypeCFFCID:
408 t = fontCIDType0COT;
409 break;
410 default:
411 error(errSyntaxError, -1, "Embedded font file may be invalid");
412 break;
413 }
414 }
415 obj4.free();
416 obj3.free();
417 }
418
419 if (t == fontUnknownType) {
420 t = expectedType;
421 }
422
423 if (t != expectedType) {
424 err = gTrue;
425 }
426
427 if (err) {
428 error(errSyntaxWarning, -1,
429 "Mismatch between font type and embedded font file");
430 }
431
432 obj2.free();
433 obj1.free();
434
435 return t;
436 }
437
438 void GfxFont::readFontDescriptor(XRef *xref, Dict *fontDict) {
439 Object obj1, obj2, obj3, obj4;
440 double t, t2;
441 int i;
442
443 // assume Times-Roman by default (for substitution purposes)
444 flags = fontSerif;
445
446 if (fontDict->lookup("FontDescriptor", &obj1)->isDict()) {
447
448 // get flags
449 if (obj1.dictLookup("Flags", &obj2)->isInt()) {
450 flags = obj2.getInt();
451 }
452 obj2.free();
453
454 // get name
455 obj1.dictLookup("FontName", &obj2);
456 if (obj2.isName()) {
457 embFontName = new GString(obj2.getName());
458 }
459 obj2.free();
460
461 // look for MissingWidth
462 obj1.dictLookup("MissingWidth", &obj2);
463 if (obj2.isNum()) {
464 missingWidth = obj2.getNum();
465 }
466 obj2.free();
467
468 // get Ascent
469 // (CapHeight is a little more reliable - so use it if present)
470 obj1.dictLookup("Ascent", &obj2);
471 obj1.dictLookup("CapHeight", &obj3);
472 if (obj2.isNum() || obj3.isNum()) {
473 if (obj2.isNum()) {
474 t = 0.001 * obj2.getNum();
475 // some broken font descriptors specify a negative ascent
476 if (t < 0) {
477 t = -t;
478 }
479 } else {
480 t = 0;
481 }
482 if (obj3.isNum()) {
483 t2 = 0.001 * obj3.getNum();
484 // some broken font descriptors specify a negative ascent
485 if (t2 < 0) {
486 t2 = -t2;
487 }
488 } else {
489 t2 = 0;
490 }
491 if (t != 0 && t < 1.9) {
492 declaredAscent = t;
493 }
494 // if both Ascent and CapHeight are set, use the smaller one
495 // (because the most common problem is that Ascent is too large)
496 if (t2 != 0 && (t == 0 || t2 < t)) {
497 t = t2;
498 }
499 // some broken font descriptors set ascent and descent to 0;
500 // others set it to ridiculous values (e.g., 32768)
501 if (t != 0 && t < 1.9) {
502 ascent = t;
503 }
504 }
505 obj2.free();
506 obj3.free();
507
508 // get Descent
509 obj1.dictLookup("Descent", &obj2);
510 if (obj2.isNum()) {
511 t = 0.001 * obj2.getNum();
512 // some broken font descriptors specify a positive descent
513 if (t > 0) {
514 t = -t;
515 }
516 // some broken font descriptors set ascent and descent to 0
517 if (t != 0 && t > -1.9) {
518 descent = t;
519 }
520 }
521 obj2.free();
522
523 // font FontBBox
524 if (obj1.dictLookup("FontBBox", &obj2)->isArray()) {
525 for (i = 0; i < 4 && i < obj2.arrayGetLength(); ++i) {
526 if (obj2.arrayGet(i, &obj3)->isNum()) {
527 fontBBox[i] = 0.001 * obj3.getNum();
528 }
529 obj3.free();
530 }
531 }
532 obj2.free();
533
534 }
535 obj1.free();
536 }
537
538 CharCodeToUnicode *GfxFont::readToUnicodeCMap(Dict *fontDict, int nBits,
539 CharCodeToUnicode *ctu) {
540 GString *buf;
541 Object obj1;
542 char buf2[4096];
543 int n;
544
545 if (!fontDict->lookup("ToUnicode", &obj1)->isStream()) {
546 obj1.free();
547 return NULL;
548 }
549 buf = new GString();
550 obj1.streamReset();
551 while ((n = obj1.streamGetBlock(buf2, sizeof(buf2))) > 0) {
552 buf->append(buf2, n);
553 }
554 obj1.streamClose();
555 obj1.free();
556 if (ctu) {
557 ctu->mergeCMap(buf, nBits);
558 } else {
559 ctu = CharCodeToUnicode::parseCMap(buf, nBits);
560 }
561 delete buf;
562 hasToUnicode = gTrue;
563 return ctu;
564 }
565
566 GfxFontLoc *GfxFont::locateFont(XRef *xref, GBool ps) {
567 GfxFontLoc *fontLoc;
568 SysFontType sysFontType;
569 FoFiIdentifierType fft;
570 GString *path, *base14Name, *substName;
571 PSFontParam16 *psFont16;
572 Object refObj, embFontObj;
573 int substIdx, fontNum;
574 double oblique;
575 GBool embed;
576
577 if (type == fontType3) {
578 return NULL;
579 }
580
581 //----- embedded font
582 if (embFontID.num >= 0) {
583 embed = gTrue;
584 refObj.initRef(embFontID.num, embFontID.gen);
585 refObj.fetch(xref, &embFontObj);
586 if (!embFontObj.isStream()) {
587 error(errSyntaxError, -1, "Embedded font object is wrong type");
588 embed = gFalse;
589 }
590 embFontObj.free();
591 refObj.free();
592 if (embed) {
593 if (ps) {
594 switch (type) {
595 case fontType1:
596 case fontType1C:
597 case fontType1COT:
598 embed = globalParams->getPSEmbedType1();
599 break;
600 case fontTrueType:
601 case fontTrueTypeOT:
602 embed = globalParams->getPSEmbedTrueType();
603 break;
604 case fontCIDType0C:
605 case fontCIDType0COT:
606 embed = globalParams->getPSEmbedCIDPostScript();
607 break;
608 case fontCIDType2:
609 case fontCIDType2OT:
610 embed = globalParams->getPSEmbedCIDTrueType();
611 break;
612 default:
613 break;
614 }
615 }
616 if (embed) {
617 fontLoc = new GfxFontLoc();
618 fontLoc->locType = gfxFontLocEmbedded;
619 fontLoc->fontType = type;
620 fontLoc->embFontID = embFontID;
621 return fontLoc;
622 }
623 }
624 }
625
626 //----- PS passthrough
627 if (ps && name && !isCIDFont() && globalParams->getPSFontPassthrough()) {
628 fontLoc = new GfxFontLoc();
629 fontLoc->locType = gfxFontLocResident;
630 fontLoc->fontType = fontType1;
631 fontLoc->path = name->copy();
632 return fontLoc;
633 }
634
635 //----- external font file (fontFile, fontDir)
636 if (name && (path = globalParams->findFontFile(name))) {
637 if ((fontLoc = getExternalFont(path, 0, 0, isCIDFont()))) {
638 return fontLoc;
639 }
640 }
641
642 //----- PS resident Base-14 font
643 if (ps && !isCIDFont() && ((Gfx8BitFont *)this)->base14) {
644 fontLoc = new GfxFontLoc();
645 fontLoc->locType = gfxFontLocResident;
646 fontLoc->fontType = fontType1;
647 fontLoc->path = new GString(((Gfx8BitFont *)this)->base14->base14Name);
648 return fontLoc;
649 }
650
651 //----- external font file for Base-14 font
652 if (!ps && !isCIDFont() && ((Gfx8BitFont *)this)->base14) {
653 base14Name = new GString(((Gfx8BitFont *)this)->base14->base14Name);
654 path = globalParams->findBase14FontFile(base14Name, &fontNum, &oblique);
655 delete base14Name;
656 if (path && (fontLoc = getExternalFont(path, fontNum, oblique, gFalse))) {
657 return fontLoc;
658 }
659 }
660
661 //----- system font
662 if (name && (path = globalParams->findSystemFontFile(name, &sysFontType,
663 &fontNum))) {
664 fontLoc = new GfxFontLoc();
665 fontLoc->locType = gfxFontLocExternal;
666 fontLoc->path = path;
667 fontLoc->fontNum = fontNum;
668 if (isCIDFont()) {
669 if (sysFontType == sysFontTTF || sysFontType == sysFontTTC) {
670 fontLoc->fontType = fontCIDType2;
671 return fontLoc;
672 } else if (sysFontType == sysFontOTF) {
673 fft = FoFiIdentifier::identifyFile(fontLoc->path->getCString());
674 if (fft == fofiIdOpenTypeCFFCID) {
675 fontLoc->fontType = fontCIDType0COT;
676 return fontLoc;
677 } else if (fft == fofiIdTrueType) {
678 fontLoc->fontType = fontCIDType2;
679 return fontLoc;
680 }
681 }
682 } else {
683 if (sysFontType == sysFontTTF || sysFontType == sysFontTTC) {
684 fontLoc->fontType = fontTrueType;
685 return fontLoc;
686 } else if (sysFontType == sysFontPFA || sysFontType == sysFontPFB) {
687 fontLoc->fontType = fontType1;
688 return fontLoc;
689 } else if (sysFontType == sysFontOTF) {
690 fft = FoFiIdentifier::identifyFile(fontLoc->path->getCString());
691 if (fft == fofiIdOpenTypeCFF8Bit) {
692 fontLoc->fontType = fontType1COT;
693 return fontLoc;
694 } else if (fft == fofiIdTrueType) {
695 fontLoc->fontType = fontTrueTypeOT;
696 return fontLoc;
697 }
698 }
699 }
700 delete fontLoc;
701 }
702
703 if (!isCIDFont()) {
704
705 //----- 8-bit PS resident font
706 if (ps) {
707 if (name && (path = globalParams->getPSResidentFont(name))) {
708 fontLoc = new GfxFontLoc();
709 fontLoc->locType = gfxFontLocResident;
710 fontLoc->fontType = fontType1;
711 fontLoc->path = path;
712 return fontLoc;
713 }
714 }
715
716 //----- 8-bit font substitution
717 if (flags & fontFixedWidth) {
718 substIdx = 0;
719 } else if (flags & fontSerif) {
720 substIdx = 8;
721 } else {
722 substIdx = 4;
723 }
724 if (isBold()) {
725 substIdx += 2;
726 }
727 if (isItalic()) {
728 substIdx += 1;
729 }
730 substName = new GString(base14SubstFonts[substIdx]);
731 if (ps) {
732 error(errSyntaxWarning, -1, "Substituting font '{0:s}' for '{1:t}'",
733 base14SubstFonts[substIdx], name);
734 fontLoc = new GfxFontLoc();
735 fontLoc->locType = gfxFontLocResident;
736 fontLoc->fontType = fontType1;
737 fontLoc->path = substName;
738 fontLoc->substIdx = substIdx;
739 return fontLoc;
740 } else {
741 path = globalParams->findBase14FontFile(substName, &fontNum, &oblique);
742 delete substName;
743 if (path) {
744 if ((fontLoc = getExternalFont(path, fontNum, oblique, gFalse))) {
745 error(errSyntaxWarning, -1, "Substituting font '{0:s}' for '{1:t}'",
746 base14SubstFonts[substIdx], name);
747 fontLoc->substIdx = substIdx;
748 return fontLoc;
749 }
750 }
751 }
752
753 // failed to find a substitute font
754 return NULL;
755 }
756
757 //----- 16-bit PS resident font
758 if (ps && name && ((psFont16 = globalParams->getPSResidentFont16(
759 name,
760 ((GfxCIDFont *)this)->getWMode())))) {
761 fontLoc = new GfxFontLoc();
762 fontLoc->locType = gfxFontLocResident;
763 fontLoc->fontType = fontCIDType0; // this is not used
764 fontLoc->path = psFont16->psFontName->copy();
765 fontLoc->encoding = psFont16->encoding->copy();
766 fontLoc->wMode = psFont16->wMode;
767 return fontLoc;
768 }
769 if (ps && ((psFont16 = globalParams->getPSResidentFontCC(
770 ((GfxCIDFont *)this)->getCollection(),
771 ((GfxCIDFont *)this)->getWMode())))) {
772 error(errSyntaxWarning, -1, "Substituting font '{0:t}' for '{1:t}'",
773 psFont16->psFontName, name);
774 fontLoc = new GfxFontLoc();
775 fontLoc->locType = gfxFontLocResident;
776 fontLoc->fontType = fontCIDType0; // this is not used
777 fontLoc->path = psFont16->psFontName->copy();
778 fontLoc->encoding = psFont16->encoding->copy();
779 fontLoc->wMode = psFont16->wMode;
780 return fontLoc;
781 }
782
783 //----- CID font substitution
784 if ((path = globalParams->findCCFontFile(
785 ((GfxCIDFont *)this)->getCollection()))) {
786 if ((fontLoc = getExternalFont(path, 0, 0, gTrue))) {
787 error(errSyntaxWarning, -1, "Substituting font '{0:t}' for '{1:t}'",
788 fontLoc->path, name);
789 return fontLoc;
790 }
791 }
792
793 // failed to find a substitute font
794 return NULL;
795 }
796
797 GfxFontLoc *GfxFont::locateBase14Font(GString *base14Name) {
798 GString *path;
799 int fontNum;
800 double oblique;
801
802 path = globalParams->findBase14FontFile(base14Name, &fontNum, &oblique);
803 if (!path) {
804 return NULL;
805 }
806 return getExternalFont(path, fontNum, oblique, gFalse);
807 }
808
809 GfxFontLoc *GfxFont::getExternalFont(GString *path, int fontNum,
810 double oblique, GBool cid) {
811 FoFiIdentifierType fft;
812 GfxFontType fontType;
813 GfxFontLoc *fontLoc;
814
815 fft = FoFiIdentifier::identifyFile(path->getCString());
816 switch (fft) {
817 case fofiIdType1PFA:
818 case fofiIdType1PFB:
819 fontType = fontType1;
820 break;
821 case fofiIdCFF8Bit:
822 fontType = fontType1C;
823 break;
824 case fofiIdCFFCID:
825 fontType = fontCIDType0C;
826 break;
827 case fofiIdTrueType:
828 case fofiIdTrueTypeCollection:
829 fontType = cid ? fontCIDType2 : fontTrueType;
830 break;
831 case fofiIdOpenTypeCFF8Bit:
832 fontType = fontType1COT;
833 break;
834 case fofiIdOpenTypeCFFCID:
835 fontType = fontCIDType0COT;
836 break;
837 case fofiIdDfont:
838 fontType = cid ? fontCIDType2 : fontTrueType;
839 break;
840 case fofiIdUnknown:
841 case fofiIdError:
842 default:
843 fontType = fontUnknownType;
844 break;
845 }
846 if (fontType == fontUnknownType ||
847 (cid ? (fontType < fontCIDType0)
848 : (fontType >= fontCIDType0))) {
849 delete path;
850 return NULL;
851 }
852 fontLoc = new GfxFontLoc();
853 fontLoc->locType = gfxFontLocExternal;
854 fontLoc->fontType = fontType;
855 fontLoc->path = path;
856 fontLoc->fontNum = fontNum;
857 fontLoc->oblique = oblique;
858 return fontLoc;
859 }
860
861 char *GfxFont::readEmbFontFile(XRef *xref, int *len) {
862 char *buf;
863 Object obj1, obj2;
864 Stream *str;
865 int size, n;
866
867 obj1.initRef(embFontID.num, embFontID.gen);
868 obj1.fetch(xref, &obj2);
869 if (!obj2.isStream()) {
870 error(errSyntaxError, -1, "Embedded font file is not a stream");
871 obj2.free();
872 obj1.free();
873 embFontID.num = -1;
874 return NULL;
875 }
876 str = obj2.getStream();
877
878 size = 4096;
879 buf = (char *)gmalloc(size);
880 *len = 0;
881 str->reset();
882 do {
883 if (*len > size - 4096) {
884 if (size > INT_MAX / 2) {
885 error(errSyntaxError, -1, "Embedded font file is too large");
886 break;
887 }
888 size *= 2;
889 buf = (char *)grealloc(buf, size);
890 }
891 n = str->getBlock(buf + *len, 4096);
892 *len += n;
893 } while (n == 4096);
894 str->close();
895
896 obj2.free();
897 obj1.free();
898
899 return buf;
900 }
901
902 //------------------------------------------------------------------------
903 // Gfx8BitFont
904 //------------------------------------------------------------------------
905
906 Gfx8BitFont::Gfx8BitFont(XRef *xref, const char *tagA, Ref idA, GString *nameA,
907 GfxFontType typeA, Ref embFontIDA, Dict *fontDict):
908 GfxFont(tagA, idA, nameA, typeA, embFontIDA)
909 {
910 GString *name2;
911 BuiltinFont *builtinFont;
912 const char **baseEnc;
913 char *buf;
914 int len;
915 FoFiType1 *ffT1;
916 FoFiType1C *ffT1C;
917 int code, code2;
918 char *charName;
919 GBool missing, hex;
920 Unicode toUnicode[256];
921 CharCodeToUnicode *utu, *ctu2;
922 Unicode uBuf[8];
923 double mul;
924 int firstChar, lastChar;
925 Gushort w;
926 Object obj1, obj2, obj3;
927 int n, i, a, b, m;
928
929 ctu = NULL;
930
931 // do font name substitution for various aliases of the Base 14 font
932 // names
933 base14 = NULL;
934 if (name) {
935 name2 = name->copy();
936 i = 0;
937 while (i < name2->getLength()) {
938 if (name2->getChar(i) == ' ') {
939 name2->del(i);
940 } else {
941 ++i;
942 }
943 }
944 a = 0;
945 b = sizeof(base14FontMap) / sizeof(Base14FontMapEntry);
946 // invariant: base14FontMap[a].altName <= name2 < base14FontMap[b].altName
947 while (b - a > 1) {
948 m = (a + b) / 2;
949 if (name2->cmp(base14FontMap[m].altName) >= 0) {
950 a = m;
951 } else {
952 b = m;
953 }
954 }
955 if (!name2->cmp(base14FontMap[a].altName)) {
956 base14 = &base14FontMap[a];
957 }
958 delete name2;
959 }
960
961 // is it a built-in font?
962 builtinFont = NULL;
963 if (base14) {
964 for (i = 0; i < nBuiltinFonts; ++i) {
965 if (!strcmp(base14->base14Name, builtinFonts[i].name)) {
966 builtinFont = &builtinFonts[i];
967 break;
968 }
969 }
970 }
971
972 // default ascent/descent values
973 if (builtinFont) {
974 missingWidth = builtinFont->missingWidth;
975 ascent = 0.001 * builtinFont->ascent;
976 descent = 0.001 * builtinFont->descent;
977 declaredAscent = ascent;
978 fontBBox[0] = 0.001 * builtinFont->bbox[0];
979 fontBBox[1] = 0.001 * builtinFont->bbox[1];
980 fontBBox[2] = 0.001 * builtinFont->bbox[2];
981 fontBBox[3] = 0.001 * builtinFont->bbox[3];
982 } else {
983 missingWidth = 0;
984 ascent = 0.75;
985 descent = -0.25;
986 declaredAscent = ascent;
987 fontBBox[0] = fontBBox[1] = fontBBox[2] = fontBBox[3] = 0;
988 }
989
990 // get info from font descriptor
991 readFontDescriptor(xref, fontDict);
992
993 // for Base-14 fonts (even if embedded), don't trust the
994 // ascent/descent/bbox values from the font descriptor
995 if (builtinFont) {
996 ascent = 0.001 * builtinFont->ascent;
997 descent = 0.001 * builtinFont->descent;
998 declaredAscent = ascent;
999 fontBBox[0] = 0.001 * builtinFont->bbox[0];
1000 fontBBox[1] = 0.001 * builtinFont->bbox[1];
1001 fontBBox[2] = 0.001 * builtinFont->bbox[2];
1002 fontBBox[3] = 0.001 * builtinFont->bbox[3];
1003 }
1004
1005 // get font matrix
1006 fontMat[0] = fontMat[3] = 1;
1007 fontMat[1] = fontMat[2] = fontMat[4] = fontMat[5] = 0;
1008 if (fontDict->lookup("FontMatrix", &obj1)->isArray()) {
1009 for (i = 0; i < 6 && i < obj1.arrayGetLength(); ++i) {
1010 if (obj1.arrayGet(i, &obj2)->isNum()) {
1011 fontMat[i] = obj2.getNum();
1012 }
1013 obj2.free();
1014 }
1015 }
1016 obj1.free();
1017
1018 // get Type 3 bounding box, font definition, and resources
1019 if (type == fontType3) {
1020 if (fontDict->lookup("FontBBox", &obj1)->isArray()) {
1021 for (i = 0; i < 4 && i < obj1.arrayGetLength(); ++i) {
1022 if (obj1.arrayGet(i, &obj2)->isNum()) {
1023 fontBBox[i] = obj2.getNum();
1024 }
1025 obj2.free();
1026 }
1027 }
1028 obj1.free();
1029 if (!fontDict->lookup("CharProcs", &charProcs)->isDict()) {
1030 error(errSyntaxError, -1,
1031 "Missing or invalid CharProcs dictionary in Type 3 font");
1032 charProcs.free();
1033 }
1034 if (!fontDict->lookup("Resources", &resources)->isDict()) {
1035 resources.free();
1036 }
1037 }
1038
1039 //----- build the font encoding -----
1040
1041 // Encodings start with a base encoding, which can come from
1042 // (in order of priority):
1043 // 1. FontDict.Encoding or FontDict.Encoding.BaseEncoding
1044 // - MacRoman / MacExpert / WinAnsi / Standard
1045 // 2. embedded or external font file
1046 // 3. default:
1047 // - builtin --> builtin encoding
1048 // - TrueType --> WinAnsiEncoding
1049 // - others --> StandardEncoding
1050 // and then add a list of differences (if any) from
1051 // FontDict.Encoding.Differences.
1052
1053 // check FontDict for base encoding
1054 hasEncoding = gFalse;
1055 usesMacRomanEnc = gFalse;
1056 baseEnc = NULL;
1057 baseEncFromFontFile = gFalse;
1058 fontDict->lookup("Encoding", &obj1);
1059 if (obj1.isDict()) {
1060 obj1.dictLookup("BaseEncoding", &obj2);
1061 if (obj2.isName("MacRomanEncoding")) {
1062 hasEncoding = gTrue;
1063 usesMacRomanEnc = gTrue;
1064 baseEnc = macRomanEncoding;
1065 } else if (obj2.isName("MacExpertEncoding")) {
1066 hasEncoding = gTrue;
1067 baseEnc = macExpertEncoding;
1068 } else if (obj2.isName("WinAnsiEncoding")) {
1069 hasEncoding = gTrue;
1070 baseEnc = winAnsiEncoding;
1071 }
1072 obj2.free();
1073 } else if (obj1.isName("MacRomanEncoding")) {
1074 hasEncoding = gTrue;
1075 usesMacRomanEnc = gTrue;
1076 baseEnc = macRomanEncoding;
1077 } else if (obj1.isName("MacExpertEncoding")) {
1078 hasEncoding = gTrue;
1079 baseEnc = macExpertEncoding;
1080 } else if (obj1.isName("WinAnsiEncoding")) {
1081 hasEncoding = gTrue;
1082 baseEnc = winAnsiEncoding;
1083 }
1084
1085 // check embedded font file for base encoding
1086 // (only for Type 1 fonts - trying to get an encoding out of a
1087 // TrueType font is a losing proposition)
1088 ffT1 = NULL;
1089 ffT1C = NULL;
1090 buf = NULL;
1091 if (type == fontType1 && embFontID.num >= 0) {
1092 if ((buf = readEmbFontFile(xref, &len))) {
1093 if ((ffT1 = FoFiType1::make(buf, len))) {
1094 if (ffT1->getName()) {
1095 if (embFontName) {
1096 delete embFontName;
1097 }
1098 embFontName = new GString(ffT1->getName());
1099 }
1100 if (!baseEnc) {
1101 baseEnc = (const char **)ffT1->getEncoding();
1102 baseEncFromFontFile = gTrue;
1103 }
1104 }
1105 gfree(buf);
1106 }
1107 } else if (type == fontType1C && embFontID.num >= 0) {
1108 if ((buf = readEmbFontFile(xref, &len))) {
1109 if ((ffT1C = FoFiType1C::make(buf, len))) {
1110 if (ffT1C->getName()) {
1111 if (embFontName) {
1112 delete embFontName;
1113 }
1114 embFontName = new GString(ffT1C->getName());
1115 }
1116 if (!baseEnc) {
1117 baseEnc = (const char **)ffT1C->getEncoding();
1118 baseEncFromFontFile = gTrue;
1119 }
1120 }
1121 gfree(buf);
1122 }
1123 }
1124
1125 // get default base encoding
1126 if (!baseEnc) {
1127 if (builtinFont && embFontID.num < 0) {
1128 baseEnc = builtinFont->defaultBaseEnc;
1129 hasEncoding = gTrue;
1130 } else if (type == fontTrueType) {
1131 baseEnc = winAnsiEncoding;
1132 } else {
1133 baseEnc = standardEncoding;
1134 }
1135 }
1136
1137 // copy the base encoding
1138 for (i = 0; i < 256; ++i) {
1139 enc[i] = (char *)baseEnc[i];
1140 if ((encFree[i] = (char)baseEncFromFontFile) && enc[i]) {
1141 enc[i] = copyString(baseEnc[i]);
1142 }
1143 }
1144
1145 // some Type 1C font files have empty encodings, which can break the
1146 // T1C->T1 conversion (since the 'seac' operator depends on having
1147 // the accents in the encoding), so we fill in any gaps from
1148 // StandardEncoding
1149 if (type == fontType1C && embFontID.num >= 0 && baseEncFromFontFile) {
1150 for (i = 0; i < 256; ++i) {
1151 if (!enc[i] && standardEncoding[i]) {
1152 enc[i] = (char *)standardEncoding[i];
1153 encFree[i] = gFalse;
1154 }
1155 }
1156 }
1157
1158 // merge differences into encoding
1159 if (obj1.isDict()) {
1160 obj1.dictLookup("Differences", &obj2);
1161 if (obj2.isArray()) {
1162 hasEncoding = gTrue;
1163 code = 0;
1164 for (i = 0; i < obj2.arrayGetLength(); ++i) {
1165 obj2.arrayGet(i, &obj3);
1166 if (obj3.isInt()) {
1167 code = obj3.getInt();
1168 } else if (obj3.isName()) {
1169 if (code >= 0 && code < 256) {
1170 if (encFree[code]) {
1171 gfree(enc[code]);
1172 }
1173 enc[code] = copyString(obj3.getName());
1174 encFree[code] = gTrue;
1175 }
1176 ++code;
1177 } else {
1178 error(errSyntaxError, -1,
1179 "Wrong type in font encoding resource differences ({0:s})",
1180 obj3.getTypeName());
1181 }
1182 obj3.free();
1183 }
1184 }
1185 obj2.free();
1186 }
1187 obj1.free();
1188 if (ffT1) {
1189 delete ffT1;
1190 }
1191 if (ffT1C) {
1192 delete ffT1C;
1193 }
1194
1195 //----- build the mapping to Unicode -----
1196
1197 // pass 1: use the name-to-Unicode mapping table
1198 missing = hex = gFalse;
1199 for (code = 0; code < 256; ++code) {
1200 if ((charName = enc[code])) {
1201 if (!(toUnicode[code] = globalParams->mapNameToUnicode(charName)) &&
1202 strcmp(charName, ".notdef")) {
1203 // if it wasn't in the name-to-Unicode table, check for a
1204 // name that looks like 'Axx' or 'xx', where 'A' is any letter
1205 // and 'xx' is two hex digits
1206 if ((strlen(charName) == 3 &&
1207 isalpha(charName[0] & 0xff) &&
1208 isxdigit(charName[1] & 0xff) && isxdigit(charName[2] & 0xff) &&
1209 ((charName[1] >= 'a' && charName[1] <= 'f') ||
1210 (charName[1] >= 'A' && charName[1] <= 'F') ||
1211 (charName[2] >= 'a' && charName[2] <= 'f') ||
1212 (charName[2] >= 'A' && charName[2] <= 'F'))) ||
1213 (strlen(charName) == 2 &&
1214 isxdigit(charName[0] & 0xff) && isxdigit(charName[1] & 0xff) &&
1215 ((charName[0] >= 'a' && charName[0] <= 'f') ||
1216 (charName[0] >= 'A' && charName[0] <= 'F') ||
1217 (charName[1] >= 'a' && charName[1] <= 'f') ||
1218 (charName[1] >= 'A' && charName[1] <= 'F')))) {
1219 hex = gTrue;
1220 }
1221 missing = gTrue;
1222 }
1223 } else {
1224 toUnicode[code] = 0;
1225 }
1226 }
1227
1228 // pass 2: try to fill in the missing chars, looking for names of
1229 // any of the following forms:
1230 // - 'xx'
1231 // - 'Axx'
1232 // - 'nn'
1233 // - 'Ann'
1234 // - 'ABnn'
1235 // - 'unixxxx' (possibly followed by garbage - some Arabic files
1236 // use 'uni0628.medi', etc.)
1237 // where 'A' and 'B' are any letters, 'xx' is two hex digits, 'xxxx'
1238 // is four hex digits, and 'nn' is 2-4 decimal digits
1239 usedNumericHeuristic = gFalse;
1240 if (missing && globalParams->getMapNumericCharNames()) {
1241 for (code = 0; code < 256; ++code) {
1242 if ((charName = enc[code]) && !toUnicode[code] &&
1243 strcmp(charName, ".notdef")) {
1244 n = (int)strlen(charName);
1245 code2 = -1;
1246 if (hex && n == 3 && isalpha(charName[0] & 0xff) &&
1247 isxdigit(charName[1] & 0xff) && isxdigit(charName[2] & 0xff)) {
1248 sscanf(charName+1, "%x", &code2);
1249 } else if (hex && n == 2 &&
1250 isxdigit(charName[0] & 0xff) &&
1251 isxdigit(charName[1] & 0xff)) {
1252 sscanf(charName, "%x", &code2);
1253 } else if (!hex && n >= 2 && n <= 4 &&
1254 isdigit(charName[0] & 0xff) && isdigit(charName[1] & 0xff)) {
1255 code2 = atoi(charName);
1256 } else if (n >= 3 && n <= 5 &&
1257 isdigit(charName[1] & 0xff) && isdigit(charName[2] & 0xff)) {
1258 code2 = atoi(charName+1);
1259 } else if (n >= 4 && n <= 6 &&
1260 isdigit(charName[2] & 0xff) && isdigit(charName[3] & 0xff)) {
1261 code2 = atoi(charName+2);
1262 } else if (n >= 7 && charName[0] == 'u' && charName[1] == 'n' &&
1263 charName[2] == 'i' &&
1264 isxdigit(charName[3] & 0xff) &&
1265 isxdigit(charName[4] & 0xff) &&
1266 isxdigit(charName[5] & 0xff) &&
1267 isxdigit(charName[6] & 0xff)) {
1268 sscanf(charName + 3, "%x", &code2);
1269 }
1270 if (code2 >= 0 && code2 <= 0xffff) {
1271 toUnicode[code] = (Unicode)code2;
1272 usedNumericHeuristic = gTrue;
1273 }
1274 }
1275 }
1276
1277 // if the 'mapUnknownCharNames' flag is set, do a simple pass-through
1278 // mapping for unknown character names
1279 } else if (missing && globalParams->getMapUnknownCharNames()) {
1280 for (code = 0; code < 256; ++code) {
1281 if (!toUnicode[code]) {
1282 toUnicode[code] = code;
1283 }
1284 }
1285 }
1286
1287 // construct the char code -> Unicode mapping object
1288 ctu = CharCodeToUnicode::make8BitToUnicode(toUnicode);
1289
1290 // merge in a ToUnicode CMap, if there is one -- this overwrites
1291 // existing entries in ctu, i.e., the ToUnicode CMap takes
1292 // precedence, but the other encoding info is allowed to fill in any
1293 // holes
1294 readToUnicodeCMap(fontDict, 8, ctu);
1295
1296 // look for a Unicode-to-Unicode mapping
1297 if (name && (utu = globalParams->getUnicodeToUnicode(name))) {
1298 for (i = 0; i < 256; ++i) {
1299 toUnicode[i] = 0;
1300 }
1301 ctu2 = CharCodeToUnicode::make8BitToUnicode(toUnicode);
1302 for (i = 0; i < 256; ++i) {
1303 n = ctu->mapToUnicode((CharCode)i, uBuf, 8);
1304 if (n >= 1) {
1305 n = utu->mapToUnicode((CharCode)uBuf[0], uBuf, 8);
1306 if (n >= 1) {
1307 ctu2->setMapping((CharCode)i, uBuf, n);
1308 }
1309 }
1310 }
1311 utu->decRefCnt();
1312 delete ctu;
1313 ctu = ctu2;
1314 }
1315
1316 //----- get the character widths -----
1317
1318 // initialize all widths
1319 for (code = 0; code < 256; ++code) {
1320 widths[code] = missingWidth * 0.001;
1321 }
1322
1323 // use widths from font dict, if present
1324 fontDict->lookup("FirstChar", &obj1);
1325 firstChar = obj1.isInt() ? obj1.getInt() : 0;
1326 obj1.free();
1327 if (firstChar < 0 || firstChar > 255) {
1328 firstChar = 0;
1329 }
1330 fontDict->lookup("LastChar", &obj1);
1331 lastChar = obj1.isInt() ? obj1.getInt() : 255;
1332 obj1.free();
1333 if (lastChar < 0 || lastChar > 255) {
1334 lastChar = 255;
1335 }
1336 mul = (type == fontType3) ? fontMat[0] : 0.001;
1337 fontDict->lookup("Widths", &obj1);
1338 if (obj1.isArray()) {
1339 flags |= fontFixedWidth;
1340 if (obj1.arrayGetLength() < lastChar - firstChar + 1) {
1341 lastChar = firstChar + obj1.arrayGetLength() - 1;
1342 }
1343 for (code = firstChar; code <= lastChar; ++code) {
1344 obj1.arrayGet(code - firstChar, &obj2);
1345 if (obj2.isNum()) {
1346 widths[code] = obj2.getNum() * mul;
1347 if (fabs(widths[code] - widths[firstChar]) > 0.00001) {
1348 flags &= ~fontFixedWidth;
1349 }
1350 }
1351 obj2.free();
1352 }
1353
1354 // use widths from built-in font
1355 } else if (builtinFont) {
1356 // this is a kludge for broken PDF files that encode char 32
1357 // as .notdef
1358 if (builtinFont->widths->getWidth("space", &w)) {
1359 widths[32] = 0.001 * w;
1360 }
1361 for (code = 0; code < 256; ++code) {
1362 if (enc[code] && builtinFont->widths->getWidth(enc[code], &w)) {
1363 widths[code] = 0.001 * w;
1364 }
1365 }
1366
1367 // couldn't find widths -- use defaults
1368 } else {
1369 // this is technically an error -- the Widths entry is required
1370 // for all but the Base-14 fonts -- but certain PDF generators
1371 // apparently don't include widths for Arial and TimesNewRoman
1372 if (isFixedWidth()) {
1373 i = 0;
1374 } else if (isSerif()) {
1375 i = 8;
1376 } else {
1377 i = 4;
1378 }
1379 if (isBold()) {
1380 i += 2;
1381 }
1382 if (isItalic()) {
1383 i += 1;
1384 }
1385 builtinFont = builtinFontSubst[i];
1386 // this is a kludge for broken PDF files that encode char 32
1387 // as .notdef
1388 if (builtinFont->widths->getWidth("space", &w)) {
1389 widths[32] = 0.001 * w;
1390 }
1391 for (code = 0; code < 256; ++code) {
1392 if (enc[code] && builtinFont->widths->getWidth(enc[code], &w)) {
1393 widths[code] = 0.001 * w;
1394 }
1395 }
1396 }
1397 obj1.free();
1398
1399 ok = gTrue;
1400 }
1401
1402 Gfx8BitFont::~Gfx8BitFont() {
1403 int i;
1404
1405 for (i = 0; i < 256; ++i) {
1406 if (encFree[i] && enc[i]) {
1407 gfree(enc[i]);
1408 }
1409 }
1410 ctu->decRefCnt();
1411 if (charProcs.isDict()) {
1412 charProcs.free();
1413 }
1414 if (resources.isDict()) {
1415 resources.free();
1416 }
1417 }
1418
1419 int Gfx8BitFont::getNextChar(char *s, int len, CharCode *code,
1420 Unicode *u, int uSize, int *uLen,
1421 double *dx, double *dy, double *ox, double *oy) {
1422 CharCode c;
1423
1424 *code = c = (CharCode)(*s & 0xff);
1425 *uLen = ctu->mapToUnicode(c, u, uSize);
1426 *dx = widths[c];
1427 *dy = *ox = *oy = 0;
1428 return 1;
1429 }
1430
1431 CharCodeToUnicode *Gfx8BitFont::getToUnicode() {
1432 ctu->incRefCnt();
1433 return ctu;
1434 }
1435
1436 int *Gfx8BitFont::getCodeToGIDMap(FoFiTrueType *ff) {
1437 int *map;
1438 int cmapPlatform, cmapEncoding;
1439 int unicodeCmap, macRomanCmap, msSymbolCmap, cmap;
1440 GBool nonsymbolic, useMacRoman, useUnicode;
1441 char *charName;
1442 Unicode u;
1443 int code, i, n;
1444
1445 map = (int *)gmallocn(256, sizeof(int));
1446 for (i = 0; i < 256; ++i) {
1447 map[i] = 0;
1448 }
1449
1450 // This is based on the cmap/encoding selection algorithm in the PDF
1451 // 2.0 spec, but with some differences to match up with Adobe's
1452 // behavior.
1453 unicodeCmap = macRomanCmap = msSymbolCmap = -1;
1454 for (i = 0; i < ff->getNumCmaps(); ++i) {
1455 cmapPlatform = ff->getCmapPlatform(i);
1456 cmapEncoding = ff->getCmapEncoding(i);
1457 if ((cmapPlatform == 3 && cmapEncoding == 1) ||
1458 (cmapPlatform == 0 && cmapEncoding <= 4)) {
1459 unicodeCmap = i;
1460 } else if (cmapPlatform == 1 && cmapEncoding == 0) {
1461 macRomanCmap = i;
1462 } else if (cmapPlatform == 3 && cmapEncoding == 0) {
1463 msSymbolCmap = i;
1464 }
1465 }
1466 useMacRoman = gFalse;
1467 useUnicode = gFalse;
1468 nonsymbolic = !(flags & fontSymbolic);
1469 if (usesMacRomanEnc && macRomanCmap >= 0) {
1470 cmap = macRomanCmap;
1471 useMacRoman = gTrue;
1472 } else if (embFontID.num < 0 && hasEncoding && unicodeCmap >= 0) {
1473 cmap = unicodeCmap;
1474 useUnicode = gTrue;
1475 } else if (nonsymbolic && unicodeCmap >= 0) {
1476 cmap = unicodeCmap;
1477 useUnicode = gTrue;
1478 } else if (nonsymbolic && macRomanCmap >= 0) {
1479 cmap = macRomanCmap;
1480 useMacRoman = gTrue;
1481 } else if (msSymbolCmap >= 0) {
1482 cmap = msSymbolCmap;
1483 } else if (unicodeCmap >= 0) {
1484 cmap = unicodeCmap;
1485 } else if (macRomanCmap >= 0) {
1486 cmap = macRomanCmap;
1487 } else {
1488 cmap = 0;
1489 }
1490
1491 // reverse map the char names through MacRomanEncoding, then map the
1492 // char codes through the cmap; fall back on Unicode if that doesn't
1493 // work
1494 if (useMacRoman) {
1495 for (i = 0; i < 256; ++i) {
1496 if ((charName = enc[i])) {
1497 if ((code = globalParams->getMacRomanCharCode(charName))) {
1498 map[i] = ff->mapCodeToGID(cmap, code);
1499 } else if (unicodeCmap >= 0 &&
1500 (u = globalParams->mapNameToUnicode(charName))) {
1501 map[i] = ff->mapCodeToGID(unicodeCmap, u);
1502 }
1503 } else if (unicodeCmap >= 0 &&
1504 (n = ctu->mapToUnicode((CharCode)i, &u, 1))) {
1505 map[i] = ff->mapCodeToGID(cmap, u);
1506 } else {
1507 map[i] = -1;
1508 }
1509 }
1510
1511 // map Unicode through the cmap
1512 } else if (useUnicode) {
1513 for (i = 0; i < 256; ++i) {
1514 if (((charName = enc[i]) &&
1515 (u = globalParams->mapNameToUnicode(charName))) ||
1516 (n = ctu->mapToUnicode((CharCode)i, &u, 1))) {
1517 map[i] = ff->mapCodeToGID(cmap, u);
1518 } else {
1519 map[i] = -1;
1520 }
1521 }
1522
1523 // map the char codes through the cmap, possibly with an offset of
1524 // 0xf000
1525 } else {
1526 for (i = 0; i < 256; ++i) {
1527 if (!(map[i] = ff->mapCodeToGID(cmap, i))) {
1528 map[i] = ff->mapCodeToGID(cmap, 0xf000 + i);
1529 }
1530 }
1531 }
1532
1533 // try the TrueType 'post' table to handle any unmapped characters
1534 for (i = 0; i < 256; ++i) {
1535 if (map[i] <= 0 && (charName = enc[i])) {
1536 map[i] = ff->mapNameToGID(charName);
1537 }
1538 }
1539
1540 return map;
1541 }
1542
1543 int *Gfx8BitFont::getCodeToGIDMap(FoFiType1C *ff) {
1544 int *map;
1545 GHash *nameToGID;
1546 int i, gid;
1547
1548 map = (int *)gmallocn(256, sizeof(int));
1549 for (i = 0; i < 256; ++i) {
1550 map[i] = 0;
1551 }
1552
1553 nameToGID = ff->getNameToGIDMap();
1554 for (i = 0; i < 256; ++i) {
1555 if (!enc[i]) {
1556 continue;
1557 }
1558 gid = nameToGID->lookupInt(enc[i]);
1559 if (gid < 0 || gid >= 65536) {
1560 continue;
1561 }
1562 map[i] = gid;
1563 }
1564
1565 delete nameToGID;
1566
1567 return map;
1568 }
1569
1570 Dict *Gfx8BitFont::getCharProcs() {
1571 return charProcs.isDict() ? charProcs.getDict() : (Dict *)NULL;
1572 }
1573
1574 Object *Gfx8BitFont::getCharProc(int code, Object *proc) {
1575 if (enc[code] && charProcs.isDict()) {
1576 charProcs.dictLookup(enc[code], proc);
1577 } else {
1578 proc->initNull();
1579 }
1580 return proc;
1581 }
1582
1583 Object *Gfx8BitFont::getCharProcNF(int code, Object *proc) {
1584 if (enc[code] && charProcs.isDict()) {
1585 charProcs.dictLookupNF(enc[code], proc);
1586 } else {
1587 proc->initNull();
1588 }
1589 return proc;
1590 }
1591
1592 Dict *Gfx8BitFont::getResources() {
1593 return resources.isDict() ? resources.getDict() : (Dict *)NULL;
1594 }
1595
1596 GBool Gfx8BitFont::problematicForUnicode() {
1597 GString *nameLC;
1598 GBool symbolic;
1599
1600 // potential inputs:
1601 // - font is embedded (GfxFont.embFontID.num >= 0)
1602 // - font name (GfxFont.name)
1603 // - font type (GfxFont.type)
1604 // - Base-14 font (Gfx8BitFont.base14 != NULL)
1605 // - symbolic (GfxFont.flags & fontSymbolic)
1606 // - has Encoding array (Gfx8BitFont.hasEncoding)
1607 // - extracted base encoding from embedded font file
1608 // (Gfx8BitFont.baseEncFromFontFile)
1609 // - has a ToUnicode map (GfxFont.hasToUnicode)
1610 // - used the numeric glyph name heuristic
1611 // (Gfx8BitFont.usedNumericHeuristic)
1612
1613 if (name) {
1614 nameLC = name->copy();
1615 nameLC->lowerCase();
1616 symbolic = strstr(nameLC->getCString(), "dingbat") ||
1617 strstr(nameLC->getCString(), "wingding") ||
1618 strstr(nameLC->getCString(), "commpi");
1619 delete nameLC;
1620 if (symbolic) {
1621 return gFalse;
1622 }
1623 }
1624
1625 if (embFontID.num >= 0) {
1626 switch (type) {
1627 case fontType1:
1628 case fontType1C:
1629 case fontType1COT:
1630 return !hasToUnicode && (!hasEncoding || usedNumericHeuristic);
1631
1632 case fontType3:
1633 return !hasToUnicode && !hasEncoding;
1634
1635 case fontTrueType:
1636 case fontTrueTypeOT:
1637 return !hasToUnicode && !hasEncoding;
1638
1639 default:
1640 return !hasToUnicode;
1641 }
1642
1643 } else {
1644 // NB: type will be fontTypeUnknown if the PDF specifies an
1645 // invalid font type -- which is ok, if we have a ToUnicode map or
1646 // an encoding
1647 return !hasToUnicode && !hasEncoding;
1648 }
1649 }
1650
1651 //------------------------------------------------------------------------
1652 // GfxCIDFont
1653 //------------------------------------------------------------------------
1654
1655 GfxCIDFont::GfxCIDFont(XRef *xref, const char *tagA, Ref idA, GString *nameA,
1656 GfxFontType typeA, Ref embFontIDA, Dict *fontDict):
1657 GfxFont(tagA, idA, nameA, typeA, embFontIDA)
1658 {
1659 Dict *desFontDict;
1660 Object desFontDictObj;
1661 Object obj1, obj2, obj3, obj4, obj5, obj6;
1662 CharCodeToUnicode *utu;
1663 CharCode c;
1664 Unicode uBuf[8];
1665 int c1, c2;
1666 int excepsSize, i, j, k, n;
1667
1668 missingWidth = 0;
1669 ascent = 0.95;
1670 descent = -0.35;
1671 declaredAscent = ascent;
1672 fontBBox[0] = fontBBox[1] = fontBBox[2] = fontBBox[3] = 0;
1673 collection = NULL;
1674 cMap = NULL;
1675 ctu = NULL;
1676 ctuUsesCharCode = gTrue;
1677 widths.defWidth = 1.0;
1678 widths.defHeight = -1.0;
1679 widths.defVY = 0.880;
1680 widths.exceps = NULL;
1681 widths.nExceps = 0;
1682 widths.excepsV = NULL;
1683 widths.nExcepsV = 0;
1684 cidToGID = NULL;
1685 cidToGIDLen = 0;
1686
1687 // get the descendant font
1688 if (!fontDict->lookup("DescendantFonts", &obj1)->isArray() ||
1689 obj1.arrayGetLength() == 0) {
1690 error(errSyntaxError, -1,
1691 "Missing or empty DescendantFonts entry in Type 0 font");
1692 obj1.free();
1693 goto err1;
1694 }
1695 if (!obj1.arrayGet(0, &desFontDictObj)->isDict()) {
1696 error(errSyntaxError, -1, "Bad descendant font in Type 0 font");
1697 goto err2;
1698 }
1699 obj1.free();
1700 desFontDict = desFontDictObj.getDict();
1701
1702 // get info from font descriptor
1703 readFontDescriptor(xref, desFontDict);
1704
1705 //----- encoding info -----
1706
1707 // char collection
1708 if (!desFontDict->lookup("CIDSystemInfo", &obj1)->isDict()) {
1709 error(errSyntaxError, -1,
1710 "Missing CIDSystemInfo dictionary in Type 0 descendant font");
1711 goto err2;
1712 }
1713 obj1.dictLookup("Registry", &obj2);
1714 obj1.dictLookup("Ordering", &obj3);
1715 if (!obj2.isString() || !obj3.isString()) {
1716 error(errSyntaxError, -1,
1717 "Invalid CIDSystemInfo dictionary in Type 0 descendant font");
1718 goto err3;
1719 }
1720 collection = obj2.getString()->copy()->append('-')->append(obj3.getString());
1721 obj3.free();
1722 obj2.free();
1723 obj1.free();
1724
1725 // encoding (i.e., CMap)
1726 if (fontDict->lookup("Encoding", &obj1)->isNull()) {
1727 error(errSyntaxError, -1, "Missing Encoding entry in Type 0 font");
1728 goto err2;
1729 }
1730 if (!(cMap = CMap::parse(NULL, collection, &obj1))) {
1731 goto err2;
1732 }
1733
1734 // check for fonts that use the Identity-H encoding (cmap), and the
1735 // Adobe-Identity character collection
1736 identityEnc = obj1.isName("Identity-H") &&
1737 !collection->cmp("Adobe-Identity");
1738
1739 obj1.free();
1740
1741 // CIDToGIDMap
1742 // (the PDF 1.7 spec only allows these for TrueType fonts, but
1743 // Acrobat apparently also allows them for OpenType CFF fonts -- and
1744 // the PDF 2.0 spec has removed the prohibition)
1745 hasIdentityCIDToGID = gFalse;
1746 desFontDict->lookup("CIDToGIDMap", &obj1);
1747 if (obj1.isStream()) {
1748 cidToGIDLen = 0;
1749 i = 64;
1750 cidToGID = (int *)gmallocn(i, sizeof(int));
1751 obj1.streamReset();
1752 while ((c1 = obj1.streamGetChar()) != EOF &&
1753 (c2 = obj1.streamGetChar()) != EOF) {
1754 if (cidToGIDLen == i) {
1755 i *= 2;
1756 cidToGID = (int *)greallocn(cidToGID, i, sizeof(int));
1757 }
1758 cidToGID[cidToGIDLen++] = (c1 << 8) + c2;
1759 }
1760 obj1.streamClose();
1761 identityEnc = gFalse;
1762 } else if (obj1.isName("Identity")) {
1763 hasIdentityCIDToGID = gTrue;
1764 } else if (!obj1.isNull()) {
1765 error(errSyntaxError, -1, "Invalid CIDToGIDMap entry in CID font");
1766 }
1767 obj1.free();
1768
1769 // look for a ToUnicode CMap
1770 hasKnownCollection = gFalse;
1771 if (globalParams->getUseTrueTypeUnicodeMapping()) {
1772 readTrueTypeUnicodeMapping(xref);
1773 }
1774 if (!ctu) {
1775 ctu = readToUnicodeCMap(fontDict, 16, NULL);
1776 }
1777 if (!ctu) {
1778 ctuUsesCharCode = gFalse;
1779
1780 // use an identity mapping for the "Adobe-Identity" and
1781 // "Adobe-UCS" collections
1782 if (!collection->cmp("Adobe-Identity") ||
1783 !collection->cmp("Adobe-UCS")) {
1784 ctu = CharCodeToUnicode::makeIdentityMapping();
1785
1786 // look for a user-supplied .cidToUnicode file
1787 } else if ((ctu = globalParams->getCIDToUnicode(collection))) {
1788 hasKnownCollection = gTrue;
1789
1790 } else {
1791 error(errSyntaxError, -1,
1792 "Unknown character collection '{0:t}'", collection);
1793
1794 // fall back to an identity mapping
1795 ctu = CharCodeToUnicode::makeIdentityMapping();
1796 }
1797 }
1798
1799 // look for a Unicode-to-Unicode mapping
1800 if (name && (utu = globalParams->getUnicodeToUnicode(name))) {
1801 if (ctu) {
1802 if (ctu->isIdentity()) {
1803 ctu->decRefCnt();
1804 ctu = utu;
1805 } else {
1806 for (c = 0; c < ctu->getLength(); ++c) {
1807 n = ctu->mapToUnicode(c, uBuf, 8);
1808 if (n >= 1) {
1809 n = utu->mapToUnicode((CharCode)uBuf[0], uBuf, 8);
1810 if (n >= 1) {
1811 ctu->setMapping(c, uBuf, n);
1812 }
1813 }
1814 }
1815 utu->decRefCnt();
1816 }
1817 } else {
1818 ctu = utu;
1819 }
1820 }
1821
1822 //----- character metrics -----
1823
1824 // default char width
1825 if (desFontDict->lookup("DW", &obj1)->isNum()) {
1826 widths.defWidth = obj1.getNum() * 0.001;
1827 }
1828 obj1.free();
1829
1830 // char width exceptions
1831 if (desFontDict->lookup("W", &obj1)->isArray()) {
1832 excepsSize = 0;
1833 i = 0;
1834 while (i + 1 < obj1.arrayGetLength()) {
1835 obj1.arrayGet(i, &obj2);
1836 obj1.arrayGet(i + 1, &obj3);
1837 if (obj2.isInt() && obj3.isInt() && i + 2 < obj1.arrayGetLength()) {
1838 if (obj1.arrayGet(i + 2, &obj4)->isNum()) {
1839 if (widths.nExceps == excepsSize) {
1840 excepsSize += 16;
1841 widths.exceps = (GfxFontCIDWidthExcep *)
1842 greallocn(widths.exceps,
1843 excepsSize, sizeof(GfxFontCIDWidthExcep));
1844 }
1845 widths.exceps[widths.nExceps].first = obj2.getInt();
1846 widths.exceps[widths.nExceps].last = obj3.getInt();
1847 widths.exceps[widths.nExceps].width = obj4.getNum() * 0.001;
1848 ++widths.nExceps;
1849 } else {
1850 error(errSyntaxError, -1, "Bad widths array in Type 0 font");
1851 }
1852 obj4.free();
1853 i += 3;
1854 } else if (obj2.isInt() && obj3.isArray()) {
1855 if (widths.nExceps + obj3.arrayGetLength() > excepsSize) {
1856 excepsSize = (widths.nExceps + obj3.arrayGetLength() + 15) & ~15;
1857 widths.exceps = (GfxFontCIDWidthExcep *)
1858 greallocn(widths.exceps,
1859 excepsSize, sizeof(GfxFontCIDWidthExcep));
1860 }
1861 j = obj2.getInt();
1862 for (k = 0; k < obj3.arrayGetLength(); ++k) {
1863 if (obj3.arrayGet(k, &obj4)->isNum()) {
1864 widths.exceps[widths.nExceps].first = j;
1865 widths.exceps[widths.nExceps].last = j;
1866 widths.exceps[widths.nExceps].width = obj4.getNum() * 0.001;
1867 ++j;
1868 ++widths.nExceps;
1869 } else {
1870 error(errSyntaxError, -1, "Bad widths array in Type 0 font");
1871 }
1872 obj4.free();
1873 }
1874 i += 2;
1875 } else {
1876 error(errSyntaxError, -1, "Bad widths array in Type 0 font");
1877 ++i;
1878 }
1879 obj3.free();
1880 obj2.free();
1881 }
1882 }
1883 obj1.free();
1884
1885 // default metrics for vertical font
1886 if (desFontDict->lookup("DW2", &obj1)->isArray() &&
1887 obj1.arrayGetLength() == 2) {
1888 if (obj1.arrayGet(0, &obj2)->isNum()) {
1889 widths.defVY = obj2.getNum() * 0.001;
1890 }
1891 obj2.free();
1892 if (obj1.arrayGet(1, &obj2)->isNum()) {
1893 widths.defHeight = obj2.getNum() * 0.001;
1894 }
1895 obj2.free();
1896 }
1897 obj1.free();
1898
1899 // char metric exceptions for vertical font
1900 if (desFontDict->lookup("W2", &obj1)->isArray()) {
1901 excepsSize = 0;
1902 i = 0;
1903 while (i + 1 < obj1.arrayGetLength()) {
1904 obj1.arrayGet(i, &obj2);
1905 obj1.arrayGet(i+ 1, &obj3);
1906 if (obj2.isInt() && obj3.isInt() && i + 4 < obj1.arrayGetLength()) {
1907 if (obj1.arrayGet(i + 2, &obj4)->isNum() &&
1908 obj1.arrayGet(i + 3, &obj5)->isNum() &&
1909 obj1.arrayGet(i + 4, &obj6)->isNum()) {
1910 if (widths.nExcepsV == excepsSize) {
1911 excepsSize += 16;
1912 widths.excepsV = (GfxFontCIDWidthExcepV *)
1913 greallocn(widths.excepsV,
1914 excepsSize, sizeof(GfxFontCIDWidthExcepV));
1915 }
1916 widths.excepsV[widths.nExcepsV].first = obj2.getInt();
1917 widths.excepsV[widths.nExcepsV].last = obj3.getInt();
1918 widths.excepsV[widths.nExcepsV].height = obj4.getNum() * 0.001;
1919 widths.excepsV[widths.nExcepsV].vx = obj5.getNum() * 0.001;
1920 widths.excepsV[widths.nExcepsV].vy = obj6.getNum() * 0.001;
1921 ++widths.nExcepsV;
1922 } else {
1923 error(errSyntaxError, -1, "Bad widths (W2) array in Type 0 font");
1924 }
1925 obj6.free();
1926 obj5.free();
1927 obj4.free();
1928 i += 5;
1929 } else if (obj2.isInt() && obj3.isArray()) {
1930 if (widths.nExcepsV + obj3.arrayGetLength() / 3 > excepsSize) {
1931 excepsSize =
1932 (widths.nExcepsV + obj3.arrayGetLength() / 3 + 15) & ~15;
1933 widths.excepsV = (GfxFontCIDWidthExcepV *)
1934 greallocn(widths.excepsV,
1935 excepsSize, sizeof(GfxFontCIDWidthExcepV));
1936 }
1937 j = obj2.getInt();
1938 for (k = 0; k + 2 < obj3.arrayGetLength(); k += 3) {
1939 if (obj3.arrayGet(k, &obj4)->isNum() &&
1940 obj3.arrayGet(k+1, &obj5)->isNum() &&
1941 obj3.arrayGet(k+2, &obj6)->isNum()) {
1942 widths.excepsV[widths.nExcepsV].first = j;
1943 widths.excepsV[widths.nExcepsV].last = j;
1944 widths.excepsV[widths.nExcepsV].height = obj4.getNum() * 0.001;
1945 widths.excepsV[widths.nExcepsV].vx = obj5.getNum() * 0.001;
1946 widths.excepsV[widths.nExcepsV].vy = obj6.getNum() * 0.001;
1947 ++j;
1948 ++widths.nExcepsV;
1949 } else {
1950 error(errSyntaxError, -1, "Bad widths (W2) array in Type 0 font");
1951 }
1952 obj6.free();
1953 obj5.free();
1954 obj4.free();
1955 }
1956 i += 2;
1957 } else {
1958 error(errSyntaxError, -1, "Bad widths (W2) array in Type 0 font");
1959 ++i;
1960 }
1961 obj3.free();
1962 obj2.free();
1963 }
1964 }
1965 obj1.free();
1966
1967 desFontDictObj.free();
1968 ok = gTrue;
1969 return;
1970
1971 err3:
1972 obj3.free();
1973 obj2.free();
1974 err2:
1975 obj1.free();
1976 desFontDictObj.free();
1977 err1:
1978 error(errSyntaxError, -1, "Failed to parse font object for '{0:t}'", name);
1979 }
1980
1981 GfxCIDFont::~GfxCIDFont() {
1982 if (collection) {
1983 delete collection;
1984 }
1985 if (cMap) {
1986 cMap->decRefCnt();
1987 }
1988 if (ctu) {
1989 ctu->decRefCnt();
1990 }
1991 gfree(widths.exceps);
1992 gfree(widths.excepsV);
1993 if (cidToGID) {
1994 gfree(cidToGID);
1995 }
1996 }
1997
1998 // Construct a code-to-Unicode mapping, based on the TrueType Unicode
1999 // cmap (if present). Constructs ctu if succesful; leaves ctu = null
2000 // otherwise. Always leaves ctu = null for non-TrueType fonts.
2001 void GfxCIDFont::readTrueTypeUnicodeMapping(XRef *xref) {
2002 char *buf;
2003 FoFiTrueType *ff;
2004 Unicode *gidToUnicode, *codeToUnicode;
2005 Unicode u;
2006 int bufLen, cmapPlatform, cmapEncoding, unicodeCmap;
2007 int nGlyphs, nMappings, gid, i;
2008
2009 // must be an embedded TrueType font, with an unknown char collection
2010 if ((type != fontCIDType2 && type == fontCIDType2OT) ||
2011 embFontID.num < 0 ||
2012 hasKnownCollection) {
2013 goto err0;
2014 }
2015
2016 // read the embedded font and construct a FoFiTrueType
2017 if (!(buf = readEmbFontFile(xref, &bufLen))) {
2018 goto err0;
2019 }
2020 if (!(ff = FoFiTrueType::make(buf, bufLen, 0))) {
2021 goto err1;
2022 }
2023
2024 // find the TrueType Unicode cmap
2025 unicodeCmap = -1;
2026 for (i = 0; i < ff->getNumCmaps(); ++i) {
2027 cmapPlatform = ff->getCmapPlatform(i);
2028 cmapEncoding = ff->getCmapEncoding(i);
2029 if ((cmapPlatform == 3 && cmapEncoding == 1) ||
2030 (cmapPlatform == 0 && cmapEncoding <= 4)) {
2031 unicodeCmap = i;
2032 break;
2033 }
2034 }
2035 if (unicodeCmap < 0) {
2036 goto err2;
2037 }
2038
2039 // construct reverse GID-to-Unicode map
2040 nGlyphs = ff->getNumGlyphs();
2041 gidToUnicode = (Unicode *)gmallocn(nGlyphs, sizeof(Unicode));
2042 memset(gidToUnicode, 0, nGlyphs * sizeof(Unicode));
2043 nMappings = 0;
2044 for (u = 1; u <= 0xffff; ++u) {
2045 gid = ff->mapCodeToGID(unicodeCmap, (int)u);
2046 if (gid > 0 && gid < nGlyphs) {
2047 gidToUnicode[gid] = u;
2048 ++nMappings;
2049 }
2050 }
2051 // bail out if the Unicode cmap was completely empty
2052 if (nMappings == 0) {
2053 goto err3;
2054 }
2055
2056 // construct code-to-Unicode map
2057 codeToUnicode = (Unicode *)gmallocn(65536, sizeof(Unicode));
2058 memset(codeToUnicode, 0, 65536 * sizeof(Unicode));
2059 for (i = 0; i <= 0xffff; ++i) {
2060 // we've already checked for an identity encoding, so CID = i
2061 if (cidToGID && i < cidToGIDLen) {
2062 gid = cidToGID[i];
2063 } else {
2064 gid = i;
2065 }
2066 if (gid < nGlyphs && gidToUnicode[gid] > 0) {
2067 codeToUnicode[i] = gidToUnicode[gid];
2068 }
2069 }
2070 ctu = CharCodeToUnicode::make16BitToUnicode(codeToUnicode);
2071
2072 gfree(codeToUnicode);
2073 err3:
2074 gfree(gidToUnicode);
2075 err2:
2076 delete ff;
2077 err1:
2078 gfree(buf);
2079 err0:
2080 return;
2081 }
2082
2083 int GfxCIDFont::getNextChar(char *s, int len, CharCode *code,
2084 Unicode *u, int uSize, int *uLen,
2085 double *dx, double *dy, double *ox, double *oy) {
2086 CID cid;
2087 CharCode c;
2088 int n;
2089
2090 if (!cMap) {
2091 *code = 0;
2092 *uLen = 0;
2093 *dx = *dy = 0;
2094 return 1;
2095 }
2096
2097 *code = (CharCode)(cid = cMap->getCID(s, len, &c, &n));
2098 if (ctu) {
2099 *uLen = ctu->mapToUnicode(ctuUsesCharCode ? c : cid, u, uSize);
2100 } else {
2101 *uLen = 0;
2102 }
2103 if (!*uLen && uSize >= 1 && globalParams->getMapUnknownCharNames()) {
2104 u[0] = *code;
2105 *uLen = 1;
2106 }
2107
2108 // horizontal
2109 if (cMap->getWMode() == 0) {
2110 getHorizontalMetrics(cid, dx);
2111 *dy = *ox = *oy = 0;
2112
2113 // vertical
2114 } else {
2115 getVerticalMetrics(cid, dy, ox, oy);
2116 *dx = 0;
2117 }
2118
2119 return n;
2120 }
2121
2122 // NB: Section 9.7.4.3 in the PDF 2.0 spec says that, in the case of
2123 // duplicate entries in the metrics, the first entry should be used.
2124 // This means we need to leave the metrics in the original order and
2125 // perform a linear search. (Or use a more complex data structure.)
2126 void GfxCIDFont::getHorizontalMetrics(CID cid, double *w) {
2127 int i;
2128 for (i = 0; i < widths.nExceps; ++i) {
2129 if (widths.exceps[i].first <= cid && cid <= widths.exceps[i].last) {
2130 *w = widths.exceps[i].width;
2131 return;
2132 }
2133 }
2134 *w = widths.defWidth;
2135 }
2136
2137 // NB: Section 9.7.4.3 in the PDF 2.0 spec says that, in the case of
2138 // duplicate entries in the metrics, the first entry should be used.
2139 // This means we need to leave the metrics in the original order and
2140 // perform a linear search. (Or use a more complex data structure.)
2141 void GfxCIDFont::getVerticalMetrics(CID cid, double *h,
2142 double *vx, double *vy) {
2143 int i;
2144 for (i = 0; i < widths.nExcepsV; ++i) {
2145 if (widths.excepsV[i].first <= cid && cid <= widths.excepsV[i].last) {
2146 *h = widths.excepsV[i].height;
2147 *vx = widths.excepsV[i].vx;
2148 *vy = widths.excepsV[i].vy;
2149 return;
2150 }
2151 }
2152 *h = widths.defHeight;
2153 getHorizontalMetrics(cid, vx);
2154 *vx /= 2;
2155 *vy = widths.defVY;
2156 }
2157
2158 int GfxCIDFont::getWMode() {
2159 return cMap ? cMap->getWMode() : 0;
2160 }
2161
2162 CharCodeToUnicode *GfxCIDFont::getToUnicode() {
2163 if (ctu) {
2164 ctu->incRefCnt();
2165 }
2166 return ctu;
2167 }
2168
2169 GString *GfxCIDFont::getCollection() {
2170 return cMap ? cMap->getCollection() : (GString *)NULL;
2171 }
2172
2173 double GfxCIDFont::getWidth(CID cid) {
2174 double w;
2175
2176 getHorizontalMetrics(cid, &w);
2177 return w;
2178 }
2179
2180 GBool GfxCIDFont::problematicForUnicode() {
2181 GString *nameLC;
2182 GBool symbolic;
2183
2184 // potential inputs:
2185 // - font is embedded (GfxFont.embFontID.num >= 0)
2186 // - font name (GfxFont.name)
2187 // - font type (GfxFont.type)
2188 // - symbolic (GfxFont.flags & fontSymbolic)
2189 // - has a ToUnicode map (GfxFont.hasToUnicode)
2190 // - collection is Adobe-Identity or Adobe-UCS
2191 // (GfxCIDFont.collection - compare string)
2192 // - collection is known AdobeCJK (GfxCIDFont.hasKnownCollection)
2193 // - has non-Identity CIDToGIDMap (GfxCIDFont.cidToGID != NULL)
2194 // - has Identity CIDToGIDMap (GfxCIDFont.hasIdentityCIDToGID)
2195
2196 if (name) {
2197 nameLC = name->copy();
2198 nameLC->lowerCase();
2199 symbolic = strstr(nameLC->getCString(), "dingbat") ||
2200 strstr(nameLC->getCString(), "wingding") ||
2201 strstr(nameLC->getCString(), "commpi");
2202 delete nameLC;
2203 if (symbolic) {
2204 return gFalse;
2205 }
2206 }
2207
2208 if (embFontID.num >= 0) {
2209 switch (type) {
2210 case fontCIDType0:
2211 case fontCIDType0C:
2212 case fontCIDType0COT:
2213 return !hasToUnicode && !hasKnownCollection;
2214
2215 case fontCIDType2:
2216 case fontCIDType2OT:
2217 return !hasToUnicode && !hasKnownCollection;
2218
2219 default:
2220 return !hasToUnicode;
2221 }
2222
2223 } else {
2224 return !hasToUnicode;
2225 }
2226 }
2227
2228 //------------------------------------------------------------------------
2229 // GfxFontDict
2230 //------------------------------------------------------------------------
2231
2232 GfxFontDict::GfxFontDict(XRef *xref, Ref *fontDictRef, Dict *fontDict) {
2233 GfxFont *font;
2234 char *tag;
2235 Object obj1, obj2;
2236 Ref r;
2237 int i;
2238
2239 fonts = new GHash(gTrue);
2240 uniqueFonts = new GList();
2241 for (i = 0; i < fontDict->getLength(); ++i) {
2242 tag = fontDict->getKey(i);
2243 fontDict->getValNF(i, &obj1);
2244 obj1.fetch(xref, &obj2);
2245 if (!obj2.isDict()) {
2246 error(errSyntaxError, -1, "font resource is not a dictionary");
2247 } else if (obj1.isRef() && (font = lookupByRef(obj1.getRef()))) {
2248 fonts->add(new GString(tag), font);
2249 } else {
2250 if (obj1.isRef()) {
2251 r = obj1.getRef();
2252 } else if (fontDictRef) {
2253 // legal generation numbers are five digits, so we use a
2254 // 6-digit number here
2255 r.gen = 100000 + fontDictRef->num;
2256 r.num = i;
2257 } else {
2258 // no indirect reference for this font, or for the containing
2259 // font dict, so hash the font and use that
2260 r.gen = 100000;
2261 r.num = hashFontObject(&obj2);
2262 }
2263 if ((font = GfxFont::makeFont(xref, tag, r, obj2.getDict()))) {
2264 if (!font->isOk()) {
2265 delete font;
2266 } else {
2267 uniqueFonts->append(font);
2268 fonts->add(new GString(tag), font);
2269 }
2270 }
2271 }
2272 obj1.free();
2273 obj2.free();
2274 }
2275 }
2276
2277 GfxFontDict::~GfxFontDict() {
2278 deleteGList(uniqueFonts, GfxFont);
2279 delete fonts;
2280 }
2281
2282 GfxFont *GfxFontDict::lookup(char *tag) {
2283 return (GfxFont *)fonts->lookup(tag);
2284 }
2285
2286 GfxFont *GfxFontDict::lookupByRef(Ref ref) {
2287 GfxFont *font;
2288 int i;
2289
2290 for (i = 0; i < uniqueFonts->getLength(); ++i) {
2291 font = (GfxFont *)uniqueFonts->get(i);
2292 if (font->getID()->num == ref.num &&
2293 font->getID()->gen == ref.gen) {
2294 return font;
2295 }
2296 }
2297 return NULL;
2298 }
2299
2300 int GfxFontDict::getNumFonts() {
2301 return uniqueFonts->getLength();
2302 }
2303
2304 GfxFont *GfxFontDict::getFont(int i) {
2305 return (GfxFont *)uniqueFonts->get(i);
2306 }
2307
2308 // FNV-1a hash
2309 class FNVHash {
2310 public:
2311
2312 FNVHash() {
2313 h = 2166136261U;
2314 }
2315
2316 void hash(char c) {
2317 h ^= c & 0xff;
2318 h *= 16777619;
2319 }
2320
2321 void hash(char *p, int n) {
2322 int i;
2323 for (i = 0; i < n; ++i) {
2324 hash(p[i]);
2325 }
2326 }
2327
2328 int get31() {
2329 return (h ^ (h >> 31)) & 0x7fffffff;
2330 }
2331
2332 private:
2333
2334 Guint h;
2335 };
2336
2337 int GfxFontDict::hashFontObject(Object *obj) {
2338 FNVHash h;
2339
2340 hashFontObject1(obj, &h);
2341 return h.get31();
2342 }
2343
2344 void GfxFontDict::hashFontObject1(Object *obj, FNVHash *h) {
2345 Object obj2;
2346 GString *s;
2347 char *p;
2348 double r;
2349 int n, i;
2350
2351 switch (obj->getType()) {
2352 case objBool:
2353 h->hash('b');
2354 h->hash(obj->getBool() ? 1 : 0);
2355 break;
2356 case objInt:
2357 h->hash('i');
2358 n = obj->getInt();
2359 h->hash((char *)&n, sizeof(int));
2360 break;
2361 case objReal:
2362 h->hash('r');
2363 r = obj->getReal();
2364 h->hash((char *)&r, sizeof(double));
2365 break;
2366 case objString:
2367 h->hash('s');
2368 s = obj->getString();
2369 h->hash(s->getCString(), s->getLength());
2370 break;
2371 case objName:
2372 h->hash('n');
2373 p = obj->getName();
2374 h->hash(p, (int)strlen(p));
2375 break;
2376 case objNull:
2377 h->hash('z');
2378 break;
2379 case objArray:
2380 h->hash('a');
2381 n = obj->arrayGetLength();
2382 h->hash((char *)&n, sizeof(int));
2383 for (i = 0; i < n; ++i) {
2384 obj->arrayGetNF(i, &obj2);
2385 hashFontObject1(&obj2, h);
2386 obj2.free();
2387 }
2388 break;
2389 case objDict:
2390 h->hash('d');
2391 n = obj->dictGetLength();
2392 h->hash((char *)&n, sizeof(int));
2393 for (i = 0; i < n; ++i) {
2394 p = obj->dictGetKey(i);
2395 h->hash(p, (int)strlen(p));
2396 obj->dictGetValNF(i, &obj2);
2397 hashFontObject1(&obj2, h);
2398 obj2.free();
2399 }
2400 break;
2401 case objStream:
2402 // this should never happen - streams must be indirect refs
2403 break;
2404 case objRef:
2405 h->hash('f');
2406 n = obj->getRefNum();
2407 h->hash((char *)&n, sizeof(int));
2408 n = obj->getRefGen();
2409 h->hash((char *)&n, sizeof(int));
2410 break;
2411 default:
2412 h->hash('u');
2413 break;
2414 }
2415 }