"Fossies" - the Fresh Open Source Software Archive 
Member "xpdf-4.04/xpdf/GfxFont.cc" (18 Apr 2022, 65646 Bytes) of package /linux/misc/xpdf-4.04.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
1 //========================================================================
2 //
3 // GfxFont.cc
4 //
5 // Copyright 1996-2003 Glyph & Cog, LLC
6 //
7 //========================================================================
8
9 #include <aconf.h>
10
11 #ifdef USE_GCC_PRAGMAS
12 #pragma implementation
13 #endif
14
15 #include <stdio.h>
16 #include <stdlib.h>
17 #include <string.h>
18 #include <ctype.h>
19 #include <math.h>
20 #include <limits.h>
21 #include "gmem.h"
22 #include "gmempp.h"
23 #include "GList.h"
24 #include "GHash.h"
25 #include "Error.h"
26 #include "Object.h"
27 #include "Dict.h"
28 #include "GlobalParams.h"
29 #include "CMap.h"
30 #include "CharCodeToUnicode.h"
31 #include "FontEncodingTables.h"
32 #include "BuiltinFontTables.h"
33 #include "FoFiIdentifier.h"
34 #include "FoFiType1.h"
35 #include "FoFiType1C.h"
36 #include "FoFiTrueType.h"
37 #include "GfxFont.h"
38
39 //------------------------------------------------------------------------
40
41 struct Base14FontMapEntry {
42 const char *altName;
43 const char *base14Name;
44 };
45
46 static Base14FontMapEntry base14FontMap[] = {
47 { "Arial", "Helvetica" },
48 { "Arial,Bold", "Helvetica-Bold" },
49 { "Arial,BoldItalic", "Helvetica-BoldOblique" },
50 { "Arial,Italic", "Helvetica-Oblique" },
51 { "Arial-Bold", "Helvetica-Bold" },
52 { "Arial-BoldItalic", "Helvetica-BoldOblique" },
53 { "Arial-BoldItalicMT", "Helvetica-BoldOblique" },
54 { "Arial-BoldMT", "Helvetica-Bold" },
55 { "Arial-Italic", "Helvetica-Oblique" },
56 { "Arial-ItalicMT", "Helvetica-Oblique" },
57 { "ArialMT", "Helvetica" },
58 { "Courier", "Courier" },
59 { "Courier,Bold", "Courier-Bold" },
60 { "Courier,BoldItalic", "Courier-BoldOblique" },
61 { "Courier,Italic", "Courier-Oblique" },
62 { "Courier-Bold", "Courier-Bold" },
63 { "Courier-BoldOblique", "Courier-BoldOblique" },
64 { "Courier-Oblique", "Courier-Oblique" },
65 { "CourierNew", "Courier" },
66 { "CourierNew,Bold", "Courier-Bold" },
67 { "CourierNew,BoldItalic", "Courier-BoldOblique" },
68 { "CourierNew,Italic", "Courier-Oblique" },
69 { "CourierNew-Bold", "Courier-Bold" },
70 { "CourierNew-BoldItalic", "Courier-BoldOblique" },
71 { "CourierNew-Italic", "Courier-Oblique" },
72 { "CourierNewPS-BoldItalicMT", "Courier-BoldOblique" },
73 { "CourierNewPS-BoldMT", "Courier-Bold" },
74 { "CourierNewPS-ItalicMT", "Courier-Oblique" },
75 { "CourierNewPSMT", "Courier" },
76 { "Helvetica", "Helvetica" },
77 { "Helvetica,Bold", "Helvetica-Bold" },
78 { "Helvetica,BoldItalic", "Helvetica-BoldOblique" },
79 { "Helvetica,Italic", "Helvetica-Oblique" },
80 { "Helvetica-Bold", "Helvetica-Bold" },
81 { "Helvetica-BoldItalic", "Helvetica-BoldOblique" },
82 { "Helvetica-BoldOblique", "Helvetica-BoldOblique" },
83 { "Helvetica-Italic", "Helvetica-Oblique" },
84 { "Helvetica-Oblique", "Helvetica-Oblique" },
85 { "Symbol", "Symbol" },
86 { "Symbol,Bold", "Symbol" },
87 { "Symbol,BoldItalic", "Symbol" },
88 { "Symbol,Italic", "Symbol" },
89 { "Times-Bold", "Times-Bold" },
90 { "Times-BoldItalic", "Times-BoldItalic" },
91 { "Times-Italic", "Times-Italic" },
92 { "Times-Roman", "Times-Roman" },
93 { "TimesNewRoman", "Times-Roman" },
94 { "TimesNewRoman,Bold", "Times-Bold" },
95 { "TimesNewRoman,BoldItalic", "Times-BoldItalic" },
96 { "TimesNewRoman,Italic", "Times-Italic" },
97 { "TimesNewRoman-Bold", "Times-Bold" },
98 { "TimesNewRoman-BoldItalic", "Times-BoldItalic" },
99 { "TimesNewRoman-Italic", "Times-Italic" },
100 { "TimesNewRomanPS", "Times-Roman" },
101 { "TimesNewRomanPS-Bold", "Times-Bold" },
102 { "TimesNewRomanPS-BoldItalic", "Times-BoldItalic" },
103 { "TimesNewRomanPS-BoldItalicMT", "Times-BoldItalic" },
104 { "TimesNewRomanPS-BoldMT", "Times-Bold" },
105 { "TimesNewRomanPS-Italic", "Times-Italic" },
106 { "TimesNewRomanPS-ItalicMT", "Times-Italic" },
107 { "TimesNewRomanPSMT", "Times-Roman" },
108 { "TimesNewRomanPSMT,Bold", "Times-Bold" },
109 { "TimesNewRomanPSMT,BoldItalic", "Times-BoldItalic" },
110 { "TimesNewRomanPSMT,Italic", "Times-Italic" },
111 { "ZapfDingbats", "ZapfDingbats" }
112 };
113
114 //------------------------------------------------------------------------
115
116 // index: {fixed:0, sans-serif:4, serif:8} + bold*2 + italic
117 // NB: must be in same order as psSubstFonts in PSOutputDev.cc
118 static const char *base14SubstFonts[14] = {
119 "Courier",
120 "Courier-Oblique",
121 "Courier-Bold",
122 "Courier-BoldOblique",
123 "Helvetica",
124 "Helvetica-Oblique",
125 "Helvetica-Bold",
126 "Helvetica-BoldOblique",
127 "Times-Roman",
128 "Times-Italic",
129 "Times-Bold",
130 "Times-BoldItalic",
131 // the last two are never used for substitution
132 "Symbol",
133 "ZapfDingbats"
134 };
135
136 //------------------------------------------------------------------------
137
138 static int readFromStream(void *data) {
139 return ((Stream *)data)->getChar();
140 }
141
142 //------------------------------------------------------------------------
143 // GfxFontLoc
144 //------------------------------------------------------------------------
145
146 GfxFontLoc::GfxFontLoc() {
147 path = NULL;
148 fontNum = 0;
149 oblique = 0;
150 encoding = NULL;
151 substIdx = -1;
152 }
153
154 GfxFontLoc::~GfxFontLoc() {
155 if (path) {
156 delete path;
157 }
158 if (encoding) {
159 delete encoding;
160 }
161 }
162
163 //------------------------------------------------------------------------
164 // GfxFont
165 //------------------------------------------------------------------------
166
167 GfxFont *GfxFont::makeFont(XRef *xref, const char *tagA,
168 Ref idA, Dict *fontDict) {
169 GString *nameA;
170 Ref embFontIDA;
171 GfxFontType typeA;
172 GfxFont *font;
173 Object obj1;
174
175 // get base font name
176 nameA = NULL;
177 fontDict->lookup("BaseFont", &obj1);
178 if (obj1.isName()) {
179 nameA = new GString(obj1.getName());
180 } else if (obj1.isString()) {
181 nameA = obj1.getString()->copy();
182 }
183 obj1.free();
184
185 // get embedded font ID and font type
186 typeA = getFontType(xref, fontDict, &embFontIDA);
187
188 // create the font object
189 font = NULL;
190 if (typeA < fontCIDType0) {
191 font = new Gfx8BitFont(xref, tagA, idA, nameA, typeA, embFontIDA,
192 fontDict);
193 } else {
194 font = new GfxCIDFont(xref, tagA, idA, nameA, typeA, embFontIDA,
195 fontDict);
196 }
197
198 return font;
199 }
200
201 GfxFont *GfxFont::makeDefaultFont(XRef *xref) {
202 Object type, subtype, baseFont;
203 type.initName("Font");
204 subtype.initName("Type1");
205 baseFont.initName("Helvetica");
206 Object fontDict;
207 fontDict.initDict(xref);
208 fontDict.dictAdd(copyString("Type"), &type);
209 fontDict.dictAdd(copyString("Subtype"), &subtype);
210 fontDict.dictAdd(copyString("BaseFont"), &baseFont);
211
212 Ref r;
213 r.gen = 100000;
214 r.num = GfxFontDict::hashFontObject(&fontDict);
215
216 GfxFont *font = makeFont(xref, "undef", r, fontDict.getDict());
217 fontDict.free();
218
219 return font;
220 }
221
222 GfxFont::GfxFont(const char *tagA, Ref idA, GString *nameA,
223 GfxFontType typeA, Ref embFontIDA) {
224 ok = gFalse;
225 tag = new GString(tagA);
226 id = idA;
227 name = nameA;
228 type = typeA;
229 embFontID = embFontIDA;
230 embFontName = NULL;
231 hasToUnicode = gFalse;
232 }
233
234 GfxFont::~GfxFont() {
235 delete tag;
236 if (name) {
237 delete name;
238 }
239 if (embFontName) {
240 delete embFontName;
241 }
242 }
243
244 // This function extracts three pieces of information:
245 // 1. the "expected" font type, i.e., the font type implied by
246 // Font.Subtype, DescendantFont.Subtype, and
247 // FontDescriptor.FontFile3.Subtype
248 // 2. the embedded font object ID
249 // 3. the actual font type - determined by examining the embedded font
250 // if there is one, otherwise equal to the expected font type
251 // If the expected and actual font types don't match, a warning
252 // message is printed. The expected font type is not used for
253 // anything else.
254 GfxFontType GfxFont::getFontType(XRef *xref, Dict *fontDict, Ref *embID) {
255 GfxFontType t, expectedType;
256 FoFiIdentifierType fft;
257 Dict *fontDict2;
258 Object subtype, fontDesc, obj1, obj2, obj3, obj4;
259 GBool isType0, err;
260
261 t = fontUnknownType;
262 embID->num = embID->gen = -1;
263 err = gFalse;
264
265 fontDict->lookup("Subtype", &subtype);
266 expectedType = fontUnknownType;
267 isType0 = gFalse;
268 if (subtype.isName("Type1") || subtype.isName("MMType1")) {
269 expectedType = fontType1;
270 } else if (subtype.isName("Type1C")) {
271 expectedType = fontType1C;
272 } else if (subtype.isName("Type3")) {
273 expectedType = fontType3;
274 } else if (subtype.isName("TrueType")) {
275 expectedType = fontTrueType;
276 } else if (subtype.isName("Type0")) {
277 isType0 = gTrue;
278 } else {
279 error(errSyntaxWarning, -1, "Unknown font type: '{0:s}'",
280 subtype.isName() ? subtype.getName() : "???");
281 }
282 subtype.free();
283
284 fontDict2 = fontDict;
285 if (fontDict->lookup("DescendantFonts", &obj1)->isArray()) {
286 if (obj1.arrayGetLength() == 0) {
287 error(errSyntaxWarning, -1, "Empty DescendantFonts array in font");
288 obj2.initNull();
289 } else if (obj1.arrayGet(0, &obj2)->isDict()) {
290 if (!isType0) {
291 error(errSyntaxWarning, -1, "Non-CID font with DescendantFonts array");
292 }
293 fontDict2 = obj2.getDict();
294 fontDict2->lookup("Subtype", &subtype);
295 if (subtype.isName("CIDFontType0")) {
296 if (isType0) {
297 expectedType = fontCIDType0;
298 }
299 } else if (subtype.isName("CIDFontType2")) {
300 if (isType0) {
301 expectedType = fontCIDType2;
302 }
303 }
304 subtype.free();
305 }
306 } else {
307 obj2.initNull();
308 }
309
310 if (fontDict2->lookup("FontDescriptor", &fontDesc)->isDict()) {
311 if (fontDesc.dictLookupNF("FontFile", &obj3)->isRef()) {
312 *embID = obj3.getRef();
313 if (expectedType != fontType1) {
314 err = gTrue;
315 }
316 }
317 obj3.free();
318 if (embID->num == -1 &&
319 fontDesc.dictLookupNF("FontFile2", &obj3)->isRef()) {
320 *embID = obj3.getRef();
321 if (isType0) {
322 expectedType = fontCIDType2;
323 } else if (expectedType != fontTrueType) {
324 err = gTrue;
325 }
326 }
327 obj3.free();
328 if (embID->num == -1 &&
329 fontDesc.dictLookupNF("FontFile3", &obj3)->isRef()) {
330 *embID = obj3.getRef();
331 if (obj3.fetch(xref, &obj4)->isStream()) {
332 obj4.streamGetDict()->lookup("Subtype", &subtype);
333 if (subtype.isName("Type1")) {
334 if (expectedType != fontType1) {
335 err = gTrue;
336 expectedType = isType0 ? fontCIDType0 : fontType1;
337 }
338 } else if (subtype.isName("Type1C")) {
339 if (expectedType == fontType1) {
340 expectedType = fontType1C;
341 } else if (expectedType != fontType1C) {
342 err = gTrue;
343 expectedType = isType0 ? fontCIDType0C : fontType1C;
344 }
345 } else if (subtype.isName("TrueType")) {
346 if (expectedType != fontTrueType) {
347 err = gTrue;
348 expectedType = isType0 ? fontCIDType2 : fontTrueType;
349 }
350 } else if (subtype.isName("CIDFontType0C")) {
351 if (expectedType == fontCIDType0) {
352 expectedType = fontCIDType0C;
353 } else {
354 err = gTrue;
355 expectedType = isType0 ? fontCIDType0C : fontType1C;
356 }
357 } else if (subtype.isName("OpenType")) {
358 if (expectedType == fontTrueType) {
359 expectedType = fontTrueTypeOT;
360 } else if (expectedType == fontType1) {
361 expectedType = fontType1COT;
362 } else if (expectedType == fontCIDType0) {
363 expectedType = fontCIDType0COT;
364 } else if (expectedType == fontCIDType2) {
365 expectedType = fontCIDType2OT;
366 } else {
367 err = gTrue;
368 }
369 } else {
370 error(errSyntaxError, -1, "Unknown font type '{0:s}'",
371 subtype.isName() ? subtype.getName() : "???");
372 }
373 subtype.free();
374 }
375 obj4.free();
376 }
377 obj3.free();
378 }
379 fontDesc.free();
380
381 t = fontUnknownType;
382 if (embID->num >= 0) {
383 obj3.initRef(embID->num, embID->gen);
384 obj3.fetch(xref, &obj4);
385 if (obj4.isStream()) {
386 obj4.streamReset();
387 fft = FoFiIdentifier::identifyStream(&readFromStream, obj4.getStream());
388 obj4.streamClose();
389 switch (fft) {
390 case fofiIdType1PFA:
391 case fofiIdType1PFB:
392 t = fontType1;
393 break;
394 case fofiIdCFF8Bit:
395 t = isType0 ? fontCIDType0C : fontType1C;
396 break;
397 case fofiIdCFFCID:
398 t = fontCIDType0C;
399 break;
400 case fofiIdTrueType:
401 case fofiIdTrueTypeCollection:
402 t = isType0 ? fontCIDType2 : fontTrueType;
403 break;
404 case fofiIdOpenTypeCFF8Bit:
405 t = isType0 ? fontCIDType0COT : fontType1COT;
406 break;
407 case fofiIdOpenTypeCFFCID:
408 t = fontCIDType0COT;
409 break;
410 default:
411 error(errSyntaxError, -1, "Embedded font file may be invalid");
412 break;
413 }
414 }
415 obj4.free();
416 obj3.free();
417 }
418
419 if (t == fontUnknownType) {
420 t = expectedType;
421 }
422
423 if (t != expectedType) {
424 err = gTrue;
425 }
426
427 if (err) {
428 error(errSyntaxWarning, -1,
429 "Mismatch between font type and embedded font file");
430 }
431
432 obj2.free();
433 obj1.free();
434
435 return t;
436 }
437
438 void GfxFont::readFontDescriptor(XRef *xref, Dict *fontDict) {
439 Object obj1, obj2, obj3, obj4;
440 double t, t2;
441 int i;
442
443 // assume Times-Roman by default (for substitution purposes)
444 flags = fontSerif;
445
446 if (fontDict->lookup("FontDescriptor", &obj1)->isDict()) {
447
448 // get flags
449 if (obj1.dictLookup("Flags", &obj2)->isInt()) {
450 flags = obj2.getInt();
451 }
452 obj2.free();
453
454 // get name
455 obj1.dictLookup("FontName", &obj2);
456 if (obj2.isName()) {
457 embFontName = new GString(obj2.getName());
458 }
459 obj2.free();
460
461 // look for MissingWidth
462 obj1.dictLookup("MissingWidth", &obj2);
463 if (obj2.isNum()) {
464 missingWidth = obj2.getNum();
465 }
466 obj2.free();
467
468 // get Ascent
469 // (CapHeight is a little more reliable - so use it if present)
470 obj1.dictLookup("Ascent", &obj2);
471 obj1.dictLookup("CapHeight", &obj3);
472 if (obj2.isNum() || obj3.isNum()) {
473 if (obj2.isNum()) {
474 t = 0.001 * obj2.getNum();
475 // some broken font descriptors specify a negative ascent
476 if (t < 0) {
477 t = -t;
478 }
479 } else {
480 t = 0;
481 }
482 if (obj3.isNum()) {
483 t2 = 0.001 * obj3.getNum();
484 // some broken font descriptors specify a negative ascent
485 if (t2 < 0) {
486 t2 = -t2;
487 }
488 } else {
489 t2 = 0;
490 }
491 if (t != 0 && t < 1.9) {
492 declaredAscent = t;
493 }
494 // if both Ascent and CapHeight are set, use the smaller one
495 // (because the most common problem is that Ascent is too large)
496 if (t2 != 0 && (t == 0 || t2 < t)) {
497 t = t2;
498 }
499 // some broken font descriptors set ascent and descent to 0;
500 // others set it to ridiculous values (e.g., 32768)
501 if (t != 0 && t < 1.9) {
502 ascent = t;
503 }
504 }
505 obj2.free();
506 obj3.free();
507
508 // get Descent
509 obj1.dictLookup("Descent", &obj2);
510 if (obj2.isNum()) {
511 t = 0.001 * obj2.getNum();
512 // some broken font descriptors specify a positive descent
513 if (t > 0) {
514 t = -t;
515 }
516 // some broken font descriptors set ascent and descent to 0
517 if (t != 0 && t > -1.9) {
518 descent = t;
519 }
520 }
521 obj2.free();
522
523 // font FontBBox
524 if (obj1.dictLookup("FontBBox", &obj2)->isArray()) {
525 for (i = 0; i < 4 && i < obj2.arrayGetLength(); ++i) {
526 if (obj2.arrayGet(i, &obj3)->isNum()) {
527 fontBBox[i] = 0.001 * obj3.getNum();
528 }
529 obj3.free();
530 }
531 }
532 obj2.free();
533
534 }
535 obj1.free();
536 }
537
538 CharCodeToUnicode *GfxFont::readToUnicodeCMap(Dict *fontDict, int nBits,
539 CharCodeToUnicode *ctu) {
540 GString *buf;
541 Object obj1;
542 char buf2[4096];
543 int n;
544
545 if (!fontDict->lookup("ToUnicode", &obj1)->isStream()) {
546 obj1.free();
547 return NULL;
548 }
549 buf = new GString();
550 obj1.streamReset();
551 while ((n = obj1.streamGetBlock(buf2, sizeof(buf2))) > 0) {
552 buf->append(buf2, n);
553 }
554 obj1.streamClose();
555 obj1.free();
556 if (ctu) {
557 ctu->mergeCMap(buf, nBits);
558 } else {
559 ctu = CharCodeToUnicode::parseCMap(buf, nBits);
560 }
561 delete buf;
562 hasToUnicode = gTrue;
563 return ctu;
564 }
565
566 GfxFontLoc *GfxFont::locateFont(XRef *xref, GBool ps) {
567 GfxFontLoc *fontLoc;
568 SysFontType sysFontType;
569 FoFiIdentifierType fft;
570 GString *path, *base14Name, *substName;
571 PSFontParam16 *psFont16;
572 Object refObj, embFontObj;
573 int substIdx, fontNum;
574 double oblique;
575 GBool embed;
576
577 if (type == fontType3) {
578 return NULL;
579 }
580
581 //----- embedded font
582 if (embFontID.num >= 0) {
583 embed = gTrue;
584 refObj.initRef(embFontID.num, embFontID.gen);
585 refObj.fetch(xref, &embFontObj);
586 if (!embFontObj.isStream()) {
587 error(errSyntaxError, -1, "Embedded font object is wrong type");
588 embed = gFalse;
589 }
590 embFontObj.free();
591 refObj.free();
592 if (embed) {
593 if (ps) {
594 switch (type) {
595 case fontType1:
596 case fontType1C:
597 case fontType1COT:
598 embed = globalParams->getPSEmbedType1();
599 break;
600 case fontTrueType:
601 case fontTrueTypeOT:
602 embed = globalParams->getPSEmbedTrueType();
603 break;
604 case fontCIDType0C:
605 case fontCIDType0COT:
606 embed = globalParams->getPSEmbedCIDPostScript();
607 break;
608 case fontCIDType2:
609 case fontCIDType2OT:
610 embed = globalParams->getPSEmbedCIDTrueType();
611 break;
612 default:
613 break;
614 }
615 }
616 if (embed) {
617 fontLoc = new GfxFontLoc();
618 fontLoc->locType = gfxFontLocEmbedded;
619 fontLoc->fontType = type;
620 fontLoc->embFontID = embFontID;
621 return fontLoc;
622 }
623 }
624 }
625
626 //----- PS passthrough
627 if (ps && name && !isCIDFont() && globalParams->getPSFontPassthrough()) {
628 fontLoc = new GfxFontLoc();
629 fontLoc->locType = gfxFontLocResident;
630 fontLoc->fontType = fontType1;
631 fontLoc->path = name->copy();
632 return fontLoc;
633 }
634
635 //----- external font file (fontFile, fontDir)
636 if (name && (path = globalParams->findFontFile(name))) {
637 if ((fontLoc = getExternalFont(path, 0, 0, isCIDFont()))) {
638 return fontLoc;
639 }
640 }
641
642 //----- PS resident Base-14 font
643 if (ps && !isCIDFont() && ((Gfx8BitFont *)this)->base14) {
644 fontLoc = new GfxFontLoc();
645 fontLoc->locType = gfxFontLocResident;
646 fontLoc->fontType = fontType1;
647 fontLoc->path = new GString(((Gfx8BitFont *)this)->base14->base14Name);
648 return fontLoc;
649 }
650
651 //----- external font file for Base-14 font
652 if (!ps && !isCIDFont() && ((Gfx8BitFont *)this)->base14) {
653 base14Name = new GString(((Gfx8BitFont *)this)->base14->base14Name);
654 path = globalParams->findBase14FontFile(base14Name, &fontNum, &oblique);
655 delete base14Name;
656 if (path && (fontLoc = getExternalFont(path, fontNum, oblique, gFalse))) {
657 return fontLoc;
658 }
659 }
660
661 //----- system font
662 if (name && (path = globalParams->findSystemFontFile(name, &sysFontType,
663 &fontNum))) {
664 fontLoc = new GfxFontLoc();
665 fontLoc->locType = gfxFontLocExternal;
666 fontLoc->path = path;
667 fontLoc->fontNum = fontNum;
668 if (isCIDFont()) {
669 if (sysFontType == sysFontTTF || sysFontType == sysFontTTC) {
670 fontLoc->fontType = fontCIDType2;
671 return fontLoc;
672 } else if (sysFontType == sysFontOTF) {
673 fft = FoFiIdentifier::identifyFile(fontLoc->path->getCString());
674 if (fft == fofiIdOpenTypeCFFCID) {
675 fontLoc->fontType = fontCIDType0COT;
676 return fontLoc;
677 } else if (fft == fofiIdTrueType) {
678 fontLoc->fontType = fontCIDType2;
679 return fontLoc;
680 }
681 }
682 } else {
683 if (sysFontType == sysFontTTF || sysFontType == sysFontTTC) {
684 fontLoc->fontType = fontTrueType;
685 return fontLoc;
686 } else if (sysFontType == sysFontPFA || sysFontType == sysFontPFB) {
687 fontLoc->fontType = fontType1;
688 return fontLoc;
689 } else if (sysFontType == sysFontOTF) {
690 fft = FoFiIdentifier::identifyFile(fontLoc->path->getCString());
691 if (fft == fofiIdOpenTypeCFF8Bit) {
692 fontLoc->fontType = fontType1COT;
693 return fontLoc;
694 } else if (fft == fofiIdTrueType) {
695 fontLoc->fontType = fontTrueTypeOT;
696 return fontLoc;
697 }
698 }
699 }
700 delete fontLoc;
701 }
702
703 if (!isCIDFont()) {
704
705 //----- 8-bit PS resident font
706 if (ps) {
707 if (name && (path = globalParams->getPSResidentFont(name))) {
708 fontLoc = new GfxFontLoc();
709 fontLoc->locType = gfxFontLocResident;
710 fontLoc->fontType = fontType1;
711 fontLoc->path = path;
712 return fontLoc;
713 }
714 }
715
716 //----- 8-bit font substitution
717 if (flags & fontFixedWidth) {
718 substIdx = 0;
719 } else if (flags & fontSerif) {
720 substIdx = 8;
721 } else {
722 substIdx = 4;
723 }
724 if (isBold()) {
725 substIdx += 2;
726 }
727 if (isItalic()) {
728 substIdx += 1;
729 }
730 substName = new GString(base14SubstFonts[substIdx]);
731 if (ps) {
732 error(errSyntaxWarning, -1, "Substituting font '{0:s}' for '{1:t}'",
733 base14SubstFonts[substIdx], name);
734 fontLoc = new GfxFontLoc();
735 fontLoc->locType = gfxFontLocResident;
736 fontLoc->fontType = fontType1;
737 fontLoc->path = substName;
738 fontLoc->substIdx = substIdx;
739 return fontLoc;
740 } else {
741 path = globalParams->findBase14FontFile(substName, &fontNum, &oblique);
742 delete substName;
743 if (path) {
744 if ((fontLoc = getExternalFont(path, fontNum, oblique, gFalse))) {
745 error(errSyntaxWarning, -1, "Substituting font '{0:s}' for '{1:t}'",
746 base14SubstFonts[substIdx], name);
747 fontLoc->substIdx = substIdx;
748 return fontLoc;
749 }
750 }
751 }
752
753 // failed to find a substitute font
754 return NULL;
755 }
756
757 //----- 16-bit PS resident font
758 if (ps && name && ((psFont16 = globalParams->getPSResidentFont16(
759 name,
760 ((GfxCIDFont *)this)->getWMode())))) {
761 fontLoc = new GfxFontLoc();
762 fontLoc->locType = gfxFontLocResident;
763 fontLoc->fontType = fontCIDType0; // this is not used
764 fontLoc->path = psFont16->psFontName->copy();
765 fontLoc->encoding = psFont16->encoding->copy();
766 fontLoc->wMode = psFont16->wMode;
767 return fontLoc;
768 }
769 if (ps && ((psFont16 = globalParams->getPSResidentFontCC(
770 ((GfxCIDFont *)this)->getCollection(),
771 ((GfxCIDFont *)this)->getWMode())))) {
772 error(errSyntaxWarning, -1, "Substituting font '{0:t}' for '{1:t}'",
773 psFont16->psFontName, name);
774 fontLoc = new GfxFontLoc();
775 fontLoc->locType = gfxFontLocResident;
776 fontLoc->fontType = fontCIDType0; // this is not used
777 fontLoc->path = psFont16->psFontName->copy();
778 fontLoc->encoding = psFont16->encoding->copy();
779 fontLoc->wMode = psFont16->wMode;
780 return fontLoc;
781 }
782
783 //----- CID font substitution
784 if ((path = globalParams->findCCFontFile(
785 ((GfxCIDFont *)this)->getCollection()))) {
786 if ((fontLoc = getExternalFont(path, 0, 0, gTrue))) {
787 error(errSyntaxWarning, -1, "Substituting font '{0:t}' for '{1:t}'",
788 fontLoc->path, name);
789 return fontLoc;
790 }
791 }
792
793 // failed to find a substitute font
794 return NULL;
795 }
796
797 GfxFontLoc *GfxFont::locateBase14Font(GString *base14Name) {
798 GString *path;
799 int fontNum;
800 double oblique;
801
802 path = globalParams->findBase14FontFile(base14Name, &fontNum, &oblique);
803 if (!path) {
804 return NULL;
805 }
806 return getExternalFont(path, fontNum, oblique, gFalse);
807 }
808
809 GfxFontLoc *GfxFont::getExternalFont(GString *path, int fontNum,
810 double oblique, GBool cid) {
811 FoFiIdentifierType fft;
812 GfxFontType fontType;
813 GfxFontLoc *fontLoc;
814
815 fft = FoFiIdentifier::identifyFile(path->getCString());
816 switch (fft) {
817 case fofiIdType1PFA:
818 case fofiIdType1PFB:
819 fontType = fontType1;
820 break;
821 case fofiIdCFF8Bit:
822 fontType = fontType1C;
823 break;
824 case fofiIdCFFCID:
825 fontType = fontCIDType0C;
826 break;
827 case fofiIdTrueType:
828 case fofiIdTrueTypeCollection:
829 fontType = cid ? fontCIDType2 : fontTrueType;
830 break;
831 case fofiIdOpenTypeCFF8Bit:
832 fontType = fontType1COT;
833 break;
834 case fofiIdOpenTypeCFFCID:
835 fontType = fontCIDType0COT;
836 break;
837 case fofiIdDfont:
838 fontType = cid ? fontCIDType2 : fontTrueType;
839 break;
840 case fofiIdUnknown:
841 case fofiIdError:
842 default:
843 fontType = fontUnknownType;
844 break;
845 }
846 if (fontType == fontUnknownType ||
847 (cid ? (fontType < fontCIDType0)
848 : (fontType >= fontCIDType0))) {
849 delete path;
850 return NULL;
851 }
852 fontLoc = new GfxFontLoc();
853 fontLoc->locType = gfxFontLocExternal;
854 fontLoc->fontType = fontType;
855 fontLoc->path = path;
856 fontLoc->fontNum = fontNum;
857 fontLoc->oblique = oblique;
858 return fontLoc;
859 }
860
861 char *GfxFont::readEmbFontFile(XRef *xref, int *len) {
862 char *buf;
863 Object obj1, obj2;
864 Stream *str;
865 int size, n;
866
867 obj1.initRef(embFontID.num, embFontID.gen);
868 obj1.fetch(xref, &obj2);
869 if (!obj2.isStream()) {
870 error(errSyntaxError, -1, "Embedded font file is not a stream");
871 obj2.free();
872 obj1.free();
873 embFontID.num = -1;
874 return NULL;
875 }
876 str = obj2.getStream();
877
878 size = 4096;
879 buf = (char *)gmalloc(size);
880 *len = 0;
881 str->reset();
882 do {
883 if (*len > size - 4096) {
884 if (size > INT_MAX / 2) {
885 error(errSyntaxError, -1, "Embedded font file is too large");
886 break;
887 }
888 size *= 2;
889 buf = (char *)grealloc(buf, size);
890 }
891 n = str->getBlock(buf + *len, 4096);
892 *len += n;
893 } while (n == 4096);
894 str->close();
895
896 obj2.free();
897 obj1.free();
898
899 return buf;
900 }
901
902 //------------------------------------------------------------------------
903 // Gfx8BitFont
904 //------------------------------------------------------------------------
905
906 Gfx8BitFont::Gfx8BitFont(XRef *xref, const char *tagA, Ref idA, GString *nameA,
907 GfxFontType typeA, Ref embFontIDA, Dict *fontDict):
908 GfxFont(tagA, idA, nameA, typeA, embFontIDA)
909 {
910 GString *name2;
911 BuiltinFont *builtinFont;
912 const char **baseEnc;
913 char *buf;
914 int len;
915 FoFiType1 *ffT1;
916 FoFiType1C *ffT1C;
917 int code, code2;
918 char *charName;
919 GBool missing, hex;
920 Unicode toUnicode[256];
921 CharCodeToUnicode *utu, *ctu2;
922 Unicode uBuf[8];
923 double mul;
924 int firstChar, lastChar;
925 Gushort w;
926 Object obj1, obj2, obj3;
927 int n, i, a, b, m;
928
929 ctu = NULL;
930
931 // do font name substitution for various aliases of the Base 14 font
932 // names
933 base14 = NULL;
934 if (name) {
935 name2 = name->copy();
936 i = 0;
937 while (i < name2->getLength()) {
938 if (name2->getChar(i) == ' ') {
939 name2->del(i);
940 } else {
941 ++i;
942 }
943 }
944 a = 0;
945 b = sizeof(base14FontMap) / sizeof(Base14FontMapEntry);
946 // invariant: base14FontMap[a].altName <= name2 < base14FontMap[b].altName
947 while (b - a > 1) {
948 m = (a + b) / 2;
949 if (name2->cmp(base14FontMap[m].altName) >= 0) {
950 a = m;
951 } else {
952 b = m;
953 }
954 }
955 if (!name2->cmp(base14FontMap[a].altName)) {
956 base14 = &base14FontMap[a];
957 }
958 delete name2;
959 }
960
961 // is it a built-in font?
962 builtinFont = NULL;
963 if (base14) {
964 for (i = 0; i < nBuiltinFonts; ++i) {
965 if (!strcmp(base14->base14Name, builtinFonts[i].name)) {
966 builtinFont = &builtinFonts[i];
967 break;
968 }
969 }
970 }
971
972 // default ascent/descent values
973 if (builtinFont) {
974 missingWidth = builtinFont->missingWidth;
975 ascent = 0.001 * builtinFont->ascent;
976 descent = 0.001 * builtinFont->descent;
977 declaredAscent = ascent;
978 fontBBox[0] = 0.001 * builtinFont->bbox[0];
979 fontBBox[1] = 0.001 * builtinFont->bbox[1];
980 fontBBox[2] = 0.001 * builtinFont->bbox[2];
981 fontBBox[3] = 0.001 * builtinFont->bbox[3];
982 } else {
983 missingWidth = 0;
984 ascent = 0.75;
985 descent = -0.25;
986 declaredAscent = ascent;
987 fontBBox[0] = fontBBox[1] = fontBBox[2] = fontBBox[3] = 0;
988 }
989
990 // get info from font descriptor
991 readFontDescriptor(xref, fontDict);
992
993 // for Base-14 fonts (even if embedded), don't trust the
994 // ascent/descent/bbox values from the font descriptor
995 if (builtinFont) {
996 ascent = 0.001 * builtinFont->ascent;
997 descent = 0.001 * builtinFont->descent;
998 declaredAscent = ascent;
999 fontBBox[0] = 0.001 * builtinFont->bbox[0];
1000 fontBBox[1] = 0.001 * builtinFont->bbox[1];
1001 fontBBox[2] = 0.001 * builtinFont->bbox[2];
1002 fontBBox[3] = 0.001 * builtinFont->bbox[3];
1003 }
1004
1005 // get font matrix
1006 fontMat[0] = fontMat[3] = 1;
1007 fontMat[1] = fontMat[2] = fontMat[4] = fontMat[5] = 0;
1008 if (fontDict->lookup("FontMatrix", &obj1)->isArray()) {
1009 for (i = 0; i < 6 && i < obj1.arrayGetLength(); ++i) {
1010 if (obj1.arrayGet(i, &obj2)->isNum()) {
1011 fontMat[i] = obj2.getNum();
1012 }
1013 obj2.free();
1014 }
1015 }
1016 obj1.free();
1017
1018 // get Type 3 bounding box, font definition, and resources
1019 if (type == fontType3) {
1020 if (fontDict->lookup("FontBBox", &obj1)->isArray()) {
1021 for (i = 0; i < 4 && i < obj1.arrayGetLength(); ++i) {
1022 if (obj1.arrayGet(i, &obj2)->isNum()) {
1023 fontBBox[i] = obj2.getNum();
1024 }
1025 obj2.free();
1026 }
1027 }
1028 obj1.free();
1029 if (!fontDict->lookup("CharProcs", &charProcs)->isDict()) {
1030 error(errSyntaxError, -1,
1031 "Missing or invalid CharProcs dictionary in Type 3 font");
1032 charProcs.free();
1033 }
1034 if (!fontDict->lookup("Resources", &resources)->isDict()) {
1035 resources.free();
1036 }
1037 }
1038
1039 //----- build the font encoding -----
1040
1041 // Encodings start with a base encoding, which can come from
1042 // (in order of priority):
1043 // 1. FontDict.Encoding or FontDict.Encoding.BaseEncoding
1044 // - MacRoman / MacExpert / WinAnsi / Standard
1045 // 2. embedded or external font file
1046 // 3. default:
1047 // - builtin --> builtin encoding
1048 // - TrueType --> WinAnsiEncoding
1049 // - others --> StandardEncoding
1050 // and then add a list of differences (if any) from
1051 // FontDict.Encoding.Differences.
1052
1053 // check FontDict for base encoding
1054 hasEncoding = gFalse;
1055 usesMacRomanEnc = gFalse;
1056 baseEnc = NULL;
1057 baseEncFromFontFile = gFalse;
1058 fontDict->lookup("Encoding", &obj1);
1059 if (obj1.isDict()) {
1060 obj1.dictLookup("BaseEncoding", &obj2);
1061 if (obj2.isName("MacRomanEncoding")) {
1062 hasEncoding = gTrue;
1063 usesMacRomanEnc = gTrue;
1064 baseEnc = macRomanEncoding;
1065 } else if (obj2.isName("MacExpertEncoding")) {
1066 hasEncoding = gTrue;
1067 baseEnc = macExpertEncoding;
1068 } else if (obj2.isName("WinAnsiEncoding")) {
1069 hasEncoding = gTrue;
1070 baseEnc = winAnsiEncoding;
1071 }
1072 obj2.free();
1073 } else if (obj1.isName("MacRomanEncoding")) {
1074 hasEncoding = gTrue;
1075 usesMacRomanEnc = gTrue;
1076 baseEnc = macRomanEncoding;
1077 } else if (obj1.isName("MacExpertEncoding")) {
1078 hasEncoding = gTrue;
1079 baseEnc = macExpertEncoding;
1080 } else if (obj1.isName("WinAnsiEncoding")) {
1081 hasEncoding = gTrue;
1082 baseEnc = winAnsiEncoding;
1083 }
1084
1085 // check embedded font file for base encoding
1086 // (only for Type 1 fonts - trying to get an encoding out of a
1087 // TrueType font is a losing proposition)
1088 ffT1 = NULL;
1089 ffT1C = NULL;
1090 buf = NULL;
1091 if (type == fontType1 && embFontID.num >= 0) {
1092 if ((buf = readEmbFontFile(xref, &len))) {
1093 if ((ffT1 = FoFiType1::make(buf, len))) {
1094 if (ffT1->getName()) {
1095 if (embFontName) {
1096 delete embFontName;
1097 }
1098 embFontName = new GString(ffT1->getName());
1099 }
1100 if (!baseEnc) {
1101 baseEnc = (const char **)ffT1->getEncoding();
1102 baseEncFromFontFile = gTrue;
1103 }
1104 }
1105 gfree(buf);
1106 }
1107 } else if (type == fontType1C && embFontID.num >= 0) {
1108 if ((buf = readEmbFontFile(xref, &len))) {
1109 if ((ffT1C = FoFiType1C::make(buf, len))) {
1110 if (ffT1C->getName()) {
1111 if (embFontName) {
1112 delete embFontName;
1113 }
1114 embFontName = new GString(ffT1C->getName());
1115 }
1116 if (!baseEnc) {
1117 baseEnc = (const char **)ffT1C->getEncoding();
1118 baseEncFromFontFile = gTrue;
1119 }
1120 }
1121 gfree(buf);
1122 }
1123 }
1124
1125 // get default base encoding
1126 if (!baseEnc) {
1127 if (builtinFont && embFontID.num < 0) {
1128 baseEnc = builtinFont->defaultBaseEnc;
1129 hasEncoding = gTrue;
1130 } else if (type == fontTrueType) {
1131 baseEnc = winAnsiEncoding;
1132 } else {
1133 baseEnc = standardEncoding;
1134 }
1135 }
1136
1137 // copy the base encoding
1138 for (i = 0; i < 256; ++i) {
1139 enc[i] = (char *)baseEnc[i];
1140 if ((encFree[i] = (char)baseEncFromFontFile) && enc[i]) {
1141 enc[i] = copyString(baseEnc[i]);
1142 }
1143 }
1144
1145 // some Type 1C font files have empty encodings, which can break the
1146 // T1C->T1 conversion (since the 'seac' operator depends on having
1147 // the accents in the encoding), so we fill in any gaps from
1148 // StandardEncoding
1149 if (type == fontType1C && embFontID.num >= 0 && baseEncFromFontFile) {
1150 for (i = 0; i < 256; ++i) {
1151 if (!enc[i] && standardEncoding[i]) {
1152 enc[i] = (char *)standardEncoding[i];
1153 encFree[i] = gFalse;
1154 }
1155 }
1156 }
1157
1158 // merge differences into encoding
1159 if (obj1.isDict()) {
1160 obj1.dictLookup("Differences", &obj2);
1161 if (obj2.isArray()) {
1162 hasEncoding = gTrue;
1163 code = 0;
1164 for (i = 0; i < obj2.arrayGetLength(); ++i) {
1165 obj2.arrayGet(i, &obj3);
1166 if (obj3.isInt()) {
1167 code = obj3.getInt();
1168 } else if (obj3.isName()) {
1169 if (code >= 0 && code < 256) {
1170 if (encFree[code]) {
1171 gfree(enc[code]);
1172 }
1173 enc[code] = copyString(obj3.getName());
1174 encFree[code] = gTrue;
1175 }
1176 ++code;
1177 } else {
1178 error(errSyntaxError, -1,
1179 "Wrong type in font encoding resource differences ({0:s})",
1180 obj3.getTypeName());
1181 }
1182 obj3.free();
1183 }
1184 }
1185 obj2.free();
1186 }
1187 obj1.free();
1188 if (ffT1) {
1189 delete ffT1;
1190 }
1191 if (ffT1C) {
1192 delete ffT1C;
1193 }
1194
1195 //----- build the mapping to Unicode -----
1196
1197 // pass 1: use the name-to-Unicode mapping table
1198 missing = hex = gFalse;
1199 for (code = 0; code < 256; ++code) {
1200 if ((charName = enc[code])) {
1201 if (!(toUnicode[code] = globalParams->mapNameToUnicode(charName)) &&
1202 strcmp(charName, ".notdef")) {
1203 // if it wasn't in the name-to-Unicode table, check for a
1204 // name that looks like 'Axx' or 'xx', where 'A' is any letter
1205 // and 'xx' is two hex digits
1206 if ((strlen(charName) == 3 &&
1207 isalpha(charName[0] & 0xff) &&
1208 isxdigit(charName[1] & 0xff) && isxdigit(charName[2] & 0xff) &&
1209 ((charName[1] >= 'a' && charName[1] <= 'f') ||
1210 (charName[1] >= 'A' && charName[1] <= 'F') ||
1211 (charName[2] >= 'a' && charName[2] <= 'f') ||
1212 (charName[2] >= 'A' && charName[2] <= 'F'))) ||
1213 (strlen(charName) == 2 &&
1214 isxdigit(charName[0] & 0xff) && isxdigit(charName[1] & 0xff) &&
1215 ((charName[0] >= 'a' && charName[0] <= 'f') ||
1216 (charName[0] >= 'A' && charName[0] <= 'F') ||
1217 (charName[1] >= 'a' && charName[1] <= 'f') ||
1218 (charName[1] >= 'A' && charName[1] <= 'F')))) {
1219 hex = gTrue;
1220 }
1221 missing = gTrue;
1222 }
1223 } else {
1224 toUnicode[code] = 0;
1225 }
1226 }
1227
1228 // pass 2: try to fill in the missing chars, looking for names of
1229 // any of the following forms:
1230 // - 'xx'
1231 // - 'Axx'
1232 // - 'nn'
1233 // - 'Ann'
1234 // - 'ABnn'
1235 // - 'unixxxx' (possibly followed by garbage - some Arabic files
1236 // use 'uni0628.medi', etc.)
1237 // where 'A' and 'B' are any letters, 'xx' is two hex digits, 'xxxx'
1238 // is four hex digits, and 'nn' is 2-4 decimal digits
1239 usedNumericHeuristic = gFalse;
1240 if (missing && globalParams->getMapNumericCharNames()) {
1241 for (code = 0; code < 256; ++code) {
1242 if ((charName = enc[code]) && !toUnicode[code] &&
1243 strcmp(charName, ".notdef")) {
1244 n = (int)strlen(charName);
1245 code2 = -1;
1246 if (hex && n == 3 && isalpha(charName[0] & 0xff) &&
1247 isxdigit(charName[1] & 0xff) && isxdigit(charName[2] & 0xff)) {
1248 sscanf(charName+1, "%x", &code2);
1249 } else if (hex && n == 2 &&
1250 isxdigit(charName[0] & 0xff) &&
1251 isxdigit(charName[1] & 0xff)) {
1252 sscanf(charName, "%x", &code2);
1253 } else if (!hex && n >= 2 && n <= 4 &&
1254 isdigit(charName[0] & 0xff) && isdigit(charName[1] & 0xff)) {
1255 code2 = atoi(charName);
1256 } else if (n >= 3 && n <= 5 &&
1257 isdigit(charName[1] & 0xff) && isdigit(charName[2] & 0xff)) {
1258 code2 = atoi(charName+1);
1259 } else if (n >= 4 && n <= 6 &&
1260 isdigit(charName[2] & 0xff) && isdigit(charName[3] & 0xff)) {
1261 code2 = atoi(charName+2);
1262 } else if (n >= 7 && charName[0] == 'u' && charName[1] == 'n' &&
1263 charName[2] == 'i' &&
1264 isxdigit(charName[3] & 0xff) &&
1265 isxdigit(charName[4] & 0xff) &&
1266 isxdigit(charName[5] & 0xff) &&
1267 isxdigit(charName[6] & 0xff)) {
1268 sscanf(charName + 3, "%x", &code2);
1269 }
1270 if (code2 >= 0 && code2 <= 0xffff) {
1271 toUnicode[code] = (Unicode)code2;
1272 usedNumericHeuristic = gTrue;
1273 }
1274 }
1275 }
1276
1277 // if the 'mapUnknownCharNames' flag is set, do a simple pass-through
1278 // mapping for unknown character names
1279 } else if (missing && globalParams->getMapUnknownCharNames()) {
1280 for (code = 0; code < 256; ++code) {
1281 if (!toUnicode[code]) {
1282 toUnicode[code] = code;
1283 }
1284 }
1285 }
1286
1287 // construct the char code -> Unicode mapping object
1288 ctu = CharCodeToUnicode::make8BitToUnicode(toUnicode);
1289
1290 // merge in a ToUnicode CMap, if there is one -- this overwrites
1291 // existing entries in ctu, i.e., the ToUnicode CMap takes
1292 // precedence, but the other encoding info is allowed to fill in any
1293 // holes
1294 readToUnicodeCMap(fontDict, 8, ctu);
1295
1296 // look for a Unicode-to-Unicode mapping
1297 if (name && (utu = globalParams->getUnicodeToUnicode(name))) {
1298 for (i = 0; i < 256; ++i) {
1299 toUnicode[i] = 0;
1300 }
1301 ctu2 = CharCodeToUnicode::make8BitToUnicode(toUnicode);
1302 for (i = 0; i < 256; ++i) {
1303 n = ctu->mapToUnicode((CharCode)i, uBuf, 8);
1304 if (n >= 1) {
1305 n = utu->mapToUnicode((CharCode)uBuf[0], uBuf, 8);
1306 if (n >= 1) {
1307 ctu2->setMapping((CharCode)i, uBuf, n);
1308 }
1309 }
1310 }
1311 utu->decRefCnt();
1312 delete ctu;
1313 ctu = ctu2;
1314 }
1315
1316 //----- get the character widths -----
1317
1318 // initialize all widths
1319 for (code = 0; code < 256; ++code) {
1320 widths[code] = missingWidth * 0.001;
1321 }
1322
1323 // use widths from font dict, if present
1324 fontDict->lookup("FirstChar", &obj1);
1325 firstChar = obj1.isInt() ? obj1.getInt() : 0;
1326 obj1.free();
1327 if (firstChar < 0 || firstChar > 255) {
1328 firstChar = 0;
1329 }
1330 fontDict->lookup("LastChar", &obj1);
1331 lastChar = obj1.isInt() ? obj1.getInt() : 255;
1332 obj1.free();
1333 if (lastChar < 0 || lastChar > 255) {
1334 lastChar = 255;
1335 }
1336 mul = (type == fontType3) ? fontMat[0] : 0.001;
1337 fontDict->lookup("Widths", &obj1);
1338 if (obj1.isArray()) {
1339 flags |= fontFixedWidth;
1340 if (obj1.arrayGetLength() < lastChar - firstChar + 1) {
1341 lastChar = firstChar + obj1.arrayGetLength() - 1;
1342 }
1343 for (code = firstChar; code <= lastChar; ++code) {
1344 obj1.arrayGet(code - firstChar, &obj2);
1345 if (obj2.isNum()) {
1346 widths[code] = obj2.getNum() * mul;
1347 if (fabs(widths[code] - widths[firstChar]) > 0.00001) {
1348 flags &= ~fontFixedWidth;
1349 }
1350 }
1351 obj2.free();
1352 }
1353
1354 // use widths from built-in font
1355 } else if (builtinFont) {
1356 // this is a kludge for broken PDF files that encode char 32
1357 // as .notdef
1358 if (builtinFont->widths->getWidth("space", &w)) {
1359 widths[32] = 0.001 * w;
1360 }
1361 for (code = 0; code < 256; ++code) {
1362 if (enc[code] && builtinFont->widths->getWidth(enc[code], &w)) {
1363 widths[code] = 0.001 * w;
1364 }
1365 }
1366
1367 // couldn't find widths -- use defaults
1368 } else {
1369 // this is technically an error -- the Widths entry is required
1370 // for all but the Base-14 fonts -- but certain PDF generators
1371 // apparently don't include widths for Arial and TimesNewRoman
1372 if (isFixedWidth()) {
1373 i = 0;
1374 } else if (isSerif()) {
1375 i = 8;
1376 } else {
1377 i = 4;
1378 }
1379 if (isBold()) {
1380 i += 2;
1381 }
1382 if (isItalic()) {
1383 i += 1;
1384 }
1385 builtinFont = builtinFontSubst[i];
1386 // this is a kludge for broken PDF files that encode char 32
1387 // as .notdef
1388 if (builtinFont->widths->getWidth("space", &w)) {
1389 widths[32] = 0.001 * w;
1390 }
1391 for (code = 0; code < 256; ++code) {
1392 if (enc[code] && builtinFont->widths->getWidth(enc[code], &w)) {
1393 widths[code] = 0.001 * w;
1394 }
1395 }
1396 }
1397 obj1.free();
1398
1399 ok = gTrue;
1400 }
1401
1402 Gfx8BitFont::~Gfx8BitFont() {
1403 int i;
1404
1405 for (i = 0; i < 256; ++i) {
1406 if (encFree[i] && enc[i]) {
1407 gfree(enc[i]);
1408 }
1409 }
1410 ctu->decRefCnt();
1411 if (charProcs.isDict()) {
1412 charProcs.free();
1413 }
1414 if (resources.isDict()) {
1415 resources.free();
1416 }
1417 }
1418
1419 int Gfx8BitFont::getNextChar(char *s, int len, CharCode *code,
1420 Unicode *u, int uSize, int *uLen,
1421 double *dx, double *dy, double *ox, double *oy) {
1422 CharCode c;
1423
1424 *code = c = (CharCode)(*s & 0xff);
1425 *uLen = ctu->mapToUnicode(c, u, uSize);
1426 *dx = widths[c];
1427 *dy = *ox = *oy = 0;
1428 return 1;
1429 }
1430
1431 CharCodeToUnicode *Gfx8BitFont::getToUnicode() {
1432 ctu->incRefCnt();
1433 return ctu;
1434 }
1435
1436 int *Gfx8BitFont::getCodeToGIDMap(FoFiTrueType *ff) {
1437 int *map;
1438 int cmapPlatform, cmapEncoding;
1439 int unicodeCmap, macRomanCmap, msSymbolCmap, cmap;
1440 GBool nonsymbolic, useMacRoman, useUnicode;
1441 char *charName;
1442 Unicode u;
1443 int code, i, n;
1444
1445 map = (int *)gmallocn(256, sizeof(int));
1446 for (i = 0; i < 256; ++i) {
1447 map[i] = 0;
1448 }
1449
1450 // This is based on the cmap/encoding selection algorithm in the PDF
1451 // 2.0 spec, but with some differences to match up with Adobe's
1452 // behavior.
1453 unicodeCmap = macRomanCmap = msSymbolCmap = -1;
1454 for (i = 0; i < ff->getNumCmaps(); ++i) {
1455 cmapPlatform = ff->getCmapPlatform(i);
1456 cmapEncoding = ff->getCmapEncoding(i);
1457 if ((cmapPlatform == 3 && cmapEncoding == 1) ||
1458 (cmapPlatform == 0 && cmapEncoding <= 4)) {
1459 unicodeCmap = i;
1460 } else if (cmapPlatform == 1 && cmapEncoding == 0) {
1461 macRomanCmap = i;
1462 } else if (cmapPlatform == 3 && cmapEncoding == 0) {
1463 msSymbolCmap = i;
1464 }
1465 }
1466 useMacRoman = gFalse;
1467 useUnicode = gFalse;
1468 nonsymbolic = !(flags & fontSymbolic);
1469 if (usesMacRomanEnc && macRomanCmap >= 0) {
1470 cmap = macRomanCmap;
1471 useMacRoman = gTrue;
1472 } else if (embFontID.num < 0 && hasEncoding && unicodeCmap >= 0) {
1473 cmap = unicodeCmap;
1474 useUnicode = gTrue;
1475 } else if (nonsymbolic && unicodeCmap >= 0) {
1476 cmap = unicodeCmap;
1477 useUnicode = gTrue;
1478 } else if (nonsymbolic && macRomanCmap >= 0) {
1479 cmap = macRomanCmap;
1480 useMacRoman = gTrue;
1481 } else if (msSymbolCmap >= 0) {
1482 cmap = msSymbolCmap;
1483 } else if (unicodeCmap >= 0) {
1484 cmap = unicodeCmap;
1485 } else if (macRomanCmap >= 0) {
1486 cmap = macRomanCmap;
1487 } else {
1488 cmap = 0;
1489 }
1490
1491 // reverse map the char names through MacRomanEncoding, then map the
1492 // char codes through the cmap; fall back on Unicode if that doesn't
1493 // work
1494 if (useMacRoman) {
1495 for (i = 0; i < 256; ++i) {
1496 if ((charName = enc[i])) {
1497 if ((code = globalParams->getMacRomanCharCode(charName))) {
1498 map[i] = ff->mapCodeToGID(cmap, code);
1499 } else if (unicodeCmap >= 0 &&
1500 (u = globalParams->mapNameToUnicode(charName))) {
1501 map[i] = ff->mapCodeToGID(unicodeCmap, u);
1502 }
1503 } else if (unicodeCmap >= 0 &&
1504 (n = ctu->mapToUnicode((CharCode)i, &u, 1))) {
1505 map[i] = ff->mapCodeToGID(cmap, u);
1506 } else {
1507 map[i] = -1;
1508 }
1509 }
1510
1511 // map Unicode through the cmap
1512 } else if (useUnicode) {
1513 for (i = 0; i < 256; ++i) {
1514 if (((charName = enc[i]) &&
1515 (u = globalParams->mapNameToUnicode(charName))) ||
1516 (n = ctu->mapToUnicode((CharCode)i, &u, 1))) {
1517 map[i] = ff->mapCodeToGID(cmap, u);
1518 } else {
1519 map[i] = -1;
1520 }
1521 }
1522
1523 // map the char codes through the cmap, possibly with an offset of
1524 // 0xf000
1525 } else {
1526 for (i = 0; i < 256; ++i) {
1527 if (!(map[i] = ff->mapCodeToGID(cmap, i))) {
1528 map[i] = ff->mapCodeToGID(cmap, 0xf000 + i);
1529 }
1530 }
1531 }
1532
1533 // try the TrueType 'post' table to handle any unmapped characters
1534 for (i = 0; i < 256; ++i) {
1535 if (map[i] <= 0 && (charName = enc[i])) {
1536 map[i] = ff->mapNameToGID(charName);
1537 }
1538 }
1539
1540 return map;
1541 }
1542
1543 int *Gfx8BitFont::getCodeToGIDMap(FoFiType1C *ff) {
1544 int *map;
1545 GHash *nameToGID;
1546 int i, gid;
1547
1548 map = (int *)gmallocn(256, sizeof(int));
1549 for (i = 0; i < 256; ++i) {
1550 map[i] = 0;
1551 }
1552
1553 nameToGID = ff->getNameToGIDMap();
1554 for (i = 0; i < 256; ++i) {
1555 if (!enc[i]) {
1556 continue;
1557 }
1558 gid = nameToGID->lookupInt(enc[i]);
1559 if (gid < 0 || gid >= 65536) {
1560 continue;
1561 }
1562 map[i] = gid;
1563 }
1564
1565 delete nameToGID;
1566
1567 return map;
1568 }
1569
1570 Dict *Gfx8BitFont::getCharProcs() {
1571 return charProcs.isDict() ? charProcs.getDict() : (Dict *)NULL;
1572 }
1573
1574 Object *Gfx8BitFont::getCharProc(int code, Object *proc) {
1575 if (enc[code] && charProcs.isDict()) {
1576 charProcs.dictLookup(enc[code], proc);
1577 } else {
1578 proc->initNull();
1579 }
1580 return proc;
1581 }
1582
1583 Object *Gfx8BitFont::getCharProcNF(int code, Object *proc) {
1584 if (enc[code] && charProcs.isDict()) {
1585 charProcs.dictLookupNF(enc[code], proc);
1586 } else {
1587 proc->initNull();
1588 }
1589 return proc;
1590 }
1591
1592 Dict *Gfx8BitFont::getResources() {
1593 return resources.isDict() ? resources.getDict() : (Dict *)NULL;
1594 }
1595
1596 GBool Gfx8BitFont::problematicForUnicode() {
1597 GString *nameLC;
1598 GBool symbolic;
1599
1600 // potential inputs:
1601 // - font is embedded (GfxFont.embFontID.num >= 0)
1602 // - font name (GfxFont.name)
1603 // - font type (GfxFont.type)
1604 // - Base-14 font (Gfx8BitFont.base14 != NULL)
1605 // - symbolic (GfxFont.flags & fontSymbolic)
1606 // - has Encoding array (Gfx8BitFont.hasEncoding)
1607 // - extracted base encoding from embedded font file
1608 // (Gfx8BitFont.baseEncFromFontFile)
1609 // - has a ToUnicode map (GfxFont.hasToUnicode)
1610 // - used the numeric glyph name heuristic
1611 // (Gfx8BitFont.usedNumericHeuristic)
1612
1613 if (name) {
1614 nameLC = name->copy();
1615 nameLC->lowerCase();
1616 symbolic = strstr(nameLC->getCString(), "dingbat") ||
1617 strstr(nameLC->getCString(), "wingding") ||
1618 strstr(nameLC->getCString(), "commpi");
1619 delete nameLC;
1620 if (symbolic) {
1621 return gFalse;
1622 }
1623 }
1624
1625 if (embFontID.num >= 0) {
1626 switch (type) {
1627 case fontType1:
1628 case fontType1C:
1629 case fontType1COT:
1630 return !hasToUnicode && (!hasEncoding || usedNumericHeuristic);
1631
1632 case fontType3:
1633 return !hasToUnicode && !hasEncoding;
1634
1635 case fontTrueType:
1636 case fontTrueTypeOT:
1637 return !hasToUnicode && !hasEncoding;
1638
1639 default:
1640 return !hasToUnicode;
1641 }
1642
1643 } else {
1644 // NB: type will be fontTypeUnknown if the PDF specifies an
1645 // invalid font type -- which is ok, if we have a ToUnicode map or
1646 // an encoding
1647 return !hasToUnicode && !hasEncoding;
1648 }
1649 }
1650
1651 //------------------------------------------------------------------------
1652 // GfxCIDFont
1653 //------------------------------------------------------------------------
1654
1655 GfxCIDFont::GfxCIDFont(XRef *xref, const char *tagA, Ref idA, GString *nameA,
1656 GfxFontType typeA, Ref embFontIDA, Dict *fontDict):
1657 GfxFont(tagA, idA, nameA, typeA, embFontIDA)
1658 {
1659 Dict *desFontDict;
1660 Object desFontDictObj;
1661 Object obj1, obj2, obj3, obj4, obj5, obj6;
1662 CharCodeToUnicode *utu;
1663 CharCode c;
1664 Unicode uBuf[8];
1665 int c1, c2;
1666 int excepsSize, i, j, k, n;
1667
1668 missingWidth = 0;
1669 ascent = 0.95;
1670 descent = -0.35;
1671 declaredAscent = ascent;
1672 fontBBox[0] = fontBBox[1] = fontBBox[2] = fontBBox[3] = 0;
1673 collection = NULL;
1674 cMap = NULL;
1675 ctu = NULL;
1676 ctuUsesCharCode = gTrue;
1677 widths.defWidth = 1.0;
1678 widths.defHeight = -1.0;
1679 widths.defVY = 0.880;
1680 widths.exceps = NULL;
1681 widths.nExceps = 0;
1682 widths.excepsV = NULL;
1683 widths.nExcepsV = 0;
1684 cidToGID = NULL;
1685 cidToGIDLen = 0;
1686
1687 // get the descendant font
1688 if (!fontDict->lookup("DescendantFonts", &obj1)->isArray() ||
1689 obj1.arrayGetLength() == 0) {
1690 error(errSyntaxError, -1,
1691 "Missing or empty DescendantFonts entry in Type 0 font");
1692 obj1.free();
1693 goto err1;
1694 }
1695 if (!obj1.arrayGet(0, &desFontDictObj)->isDict()) {
1696 error(errSyntaxError, -1, "Bad descendant font in Type 0 font");
1697 goto err2;
1698 }
1699 obj1.free();
1700 desFontDict = desFontDictObj.getDict();
1701
1702 // get info from font descriptor
1703 readFontDescriptor(xref, desFontDict);
1704
1705 //----- encoding info -----
1706
1707 // char collection
1708 if (!desFontDict->lookup("CIDSystemInfo", &obj1)->isDict()) {
1709 error(errSyntaxError, -1,
1710 "Missing CIDSystemInfo dictionary in Type 0 descendant font");
1711 goto err2;
1712 }
1713 obj1.dictLookup("Registry", &obj2);
1714 obj1.dictLookup("Ordering", &obj3);
1715 if (!obj2.isString() || !obj3.isString()) {
1716 error(errSyntaxError, -1,
1717 "Invalid CIDSystemInfo dictionary in Type 0 descendant font");
1718 goto err3;
1719 }
1720 collection = obj2.getString()->copy()->append('-')->append(obj3.getString());
1721 obj3.free();
1722 obj2.free();
1723 obj1.free();
1724
1725 // encoding (i.e., CMap)
1726 if (fontDict->lookup("Encoding", &obj1)->isNull()) {
1727 error(errSyntaxError, -1, "Missing Encoding entry in Type 0 font");
1728 goto err2;
1729 }
1730 if (!(cMap = CMap::parse(NULL, collection, &obj1))) {
1731 goto err2;
1732 }
1733
1734 // check for fonts that use the Identity-H encoding (cmap), and the
1735 // Adobe-Identity character collection
1736 identityEnc = obj1.isName("Identity-H") &&
1737 !collection->cmp("Adobe-Identity");
1738
1739 obj1.free();
1740
1741 // CIDToGIDMap
1742 // (the PDF 1.7 spec only allows these for TrueType fonts, but
1743 // Acrobat apparently also allows them for OpenType CFF fonts -- and
1744 // the PDF 2.0 spec has removed the prohibition)
1745 hasIdentityCIDToGID = gFalse;
1746 desFontDict->lookup("CIDToGIDMap", &obj1);
1747 if (obj1.isStream()) {
1748 cidToGIDLen = 0;
1749 i = 64;
1750 cidToGID = (int *)gmallocn(i, sizeof(int));
1751 obj1.streamReset();
1752 while ((c1 = obj1.streamGetChar()) != EOF &&
1753 (c2 = obj1.streamGetChar()) != EOF) {
1754 if (cidToGIDLen == i) {
1755 i *= 2;
1756 cidToGID = (int *)greallocn(cidToGID, i, sizeof(int));
1757 }
1758 cidToGID[cidToGIDLen++] = (c1 << 8) + c2;
1759 }
1760 obj1.streamClose();
1761 identityEnc = gFalse;
1762 } else if (obj1.isName("Identity")) {
1763 hasIdentityCIDToGID = gTrue;
1764 } else if (!obj1.isNull()) {
1765 error(errSyntaxError, -1, "Invalid CIDToGIDMap entry in CID font");
1766 }
1767 obj1.free();
1768
1769 // look for a ToUnicode CMap
1770 hasKnownCollection = gFalse;
1771 if (globalParams->getUseTrueTypeUnicodeMapping()) {
1772 readTrueTypeUnicodeMapping(xref);
1773 }
1774 if (!ctu) {
1775 ctu = readToUnicodeCMap(fontDict, 16, NULL);
1776 }
1777 if (!ctu) {
1778 ctuUsesCharCode = gFalse;
1779
1780 // use an identity mapping for the "Adobe-Identity" and
1781 // "Adobe-UCS" collections
1782 if (!collection->cmp("Adobe-Identity") ||
1783 !collection->cmp("Adobe-UCS")) {
1784 ctu = CharCodeToUnicode::makeIdentityMapping();
1785
1786 // look for a user-supplied .cidToUnicode file
1787 } else if ((ctu = globalParams->getCIDToUnicode(collection))) {
1788 hasKnownCollection = gTrue;
1789
1790 } else {
1791 error(errSyntaxError, -1,
1792 "Unknown character collection '{0:t}'", collection);
1793
1794 // fall back to an identity mapping
1795 ctu = CharCodeToUnicode::makeIdentityMapping();
1796 }
1797 }
1798
1799 // look for a Unicode-to-Unicode mapping
1800 if (name && (utu = globalParams->getUnicodeToUnicode(name))) {
1801 if (ctu) {
1802 if (ctu->isIdentity()) {
1803 ctu->decRefCnt();
1804 ctu = utu;
1805 } else {
1806 for (c = 0; c < ctu->getLength(); ++c) {
1807 n = ctu->mapToUnicode(c, uBuf, 8);
1808 if (n >= 1) {
1809 n = utu->mapToUnicode((CharCode)uBuf[0], uBuf, 8);
1810 if (n >= 1) {
1811 ctu->setMapping(c, uBuf, n);
1812 }
1813 }
1814 }
1815 utu->decRefCnt();
1816 }
1817 } else {
1818 ctu = utu;
1819 }
1820 }
1821
1822 //----- character metrics -----
1823
1824 // default char width
1825 if (desFontDict->lookup("DW", &obj1)->isNum()) {
1826 widths.defWidth = obj1.getNum() * 0.001;
1827 }
1828 obj1.free();
1829
1830 // char width exceptions
1831 if (desFontDict->lookup("W", &obj1)->isArray()) {
1832 excepsSize = 0;
1833 i = 0;
1834 while (i + 1 < obj1.arrayGetLength()) {
1835 obj1.arrayGet(i, &obj2);
1836 obj1.arrayGet(i + 1, &obj3);
1837 if (obj2.isInt() && obj3.isInt() && i + 2 < obj1.arrayGetLength()) {
1838 if (obj1.arrayGet(i + 2, &obj4)->isNum()) {
1839 if (widths.nExceps == excepsSize) {
1840 excepsSize += 16;
1841 widths.exceps = (GfxFontCIDWidthExcep *)
1842 greallocn(widths.exceps,
1843 excepsSize, sizeof(GfxFontCIDWidthExcep));
1844 }
1845 widths.exceps[widths.nExceps].first = obj2.getInt();
1846 widths.exceps[widths.nExceps].last = obj3.getInt();
1847 widths.exceps[widths.nExceps].width = obj4.getNum() * 0.001;
1848 ++widths.nExceps;
1849 } else {
1850 error(errSyntaxError, -1, "Bad widths array in Type 0 font");
1851 }
1852 obj4.free();
1853 i += 3;
1854 } else if (obj2.isInt() && obj3.isArray()) {
1855 if (widths.nExceps + obj3.arrayGetLength() > excepsSize) {
1856 excepsSize = (widths.nExceps + obj3.arrayGetLength() + 15) & ~15;
1857 widths.exceps = (GfxFontCIDWidthExcep *)
1858 greallocn(widths.exceps,
1859 excepsSize, sizeof(GfxFontCIDWidthExcep));
1860 }
1861 j = obj2.getInt();
1862 for (k = 0; k < obj3.arrayGetLength(); ++k) {
1863 if (obj3.arrayGet(k, &obj4)->isNum()) {
1864 widths.exceps[widths.nExceps].first = j;
1865 widths.exceps[widths.nExceps].last = j;
1866 widths.exceps[widths.nExceps].width = obj4.getNum() * 0.001;
1867 ++j;
1868 ++widths.nExceps;
1869 } else {
1870 error(errSyntaxError, -1, "Bad widths array in Type 0 font");
1871 }
1872 obj4.free();
1873 }
1874 i += 2;
1875 } else {
1876 error(errSyntaxError, -1, "Bad widths array in Type 0 font");
1877 ++i;
1878 }
1879 obj3.free();
1880 obj2.free();
1881 }
1882 }
1883 obj1.free();
1884
1885 // default metrics for vertical font
1886 if (desFontDict->lookup("DW2", &obj1)->isArray() &&
1887 obj1.arrayGetLength() == 2) {
1888 if (obj1.arrayGet(0, &obj2)->isNum()) {
1889 widths.defVY = obj2.getNum() * 0.001;
1890 }
1891 obj2.free();
1892 if (obj1.arrayGet(1, &obj2)->isNum()) {
1893 widths.defHeight = obj2.getNum() * 0.001;
1894 }
1895 obj2.free();
1896 }
1897 obj1.free();
1898
1899 // char metric exceptions for vertical font
1900 if (desFontDict->lookup("W2", &obj1)->isArray()) {
1901 excepsSize = 0;
1902 i = 0;
1903 while (i + 1 < obj1.arrayGetLength()) {
1904 obj1.arrayGet(i, &obj2);
1905 obj1.arrayGet(i+ 1, &obj3);
1906 if (obj2.isInt() && obj3.isInt() && i + 4 < obj1.arrayGetLength()) {
1907 if (obj1.arrayGet(i + 2, &obj4)->isNum() &&
1908 obj1.arrayGet(i + 3, &obj5)->isNum() &&
1909 obj1.arrayGet(i + 4, &obj6)->isNum()) {
1910 if (widths.nExcepsV == excepsSize) {
1911 excepsSize += 16;
1912 widths.excepsV = (GfxFontCIDWidthExcepV *)
1913 greallocn(widths.excepsV,
1914 excepsSize, sizeof(GfxFontCIDWidthExcepV));
1915 }
1916 widths.excepsV[widths.nExcepsV].first = obj2.getInt();
1917 widths.excepsV[widths.nExcepsV].last = obj3.getInt();
1918 widths.excepsV[widths.nExcepsV].height = obj4.getNum() * 0.001;
1919 widths.excepsV[widths.nExcepsV].vx = obj5.getNum() * 0.001;
1920 widths.excepsV[widths.nExcepsV].vy = obj6.getNum() * 0.001;
1921 ++widths.nExcepsV;
1922 } else {
1923 error(errSyntaxError, -1, "Bad widths (W2) array in Type 0 font");
1924 }
1925 obj6.free();
1926 obj5.free();
1927 obj4.free();
1928 i += 5;
1929 } else if (obj2.isInt() && obj3.isArray()) {
1930 if (widths.nExcepsV + obj3.arrayGetLength() / 3 > excepsSize) {
1931 excepsSize =
1932 (widths.nExcepsV + obj3.arrayGetLength() / 3 + 15) & ~15;
1933 widths.excepsV = (GfxFontCIDWidthExcepV *)
1934 greallocn(widths.excepsV,
1935 excepsSize, sizeof(GfxFontCIDWidthExcepV));
1936 }
1937 j = obj2.getInt();
1938 for (k = 0; k + 2 < obj3.arrayGetLength(); k += 3) {
1939 if (obj3.arrayGet(k, &obj4)->isNum() &&
1940 obj3.arrayGet(k+1, &obj5)->isNum() &&
1941 obj3.arrayGet(k+2, &obj6)->isNum()) {
1942 widths.excepsV[widths.nExcepsV].first = j;
1943 widths.excepsV[widths.nExcepsV].last = j;
1944 widths.excepsV[widths.nExcepsV].height = obj4.getNum() * 0.001;
1945 widths.excepsV[widths.nExcepsV].vx = obj5.getNum() * 0.001;
1946 widths.excepsV[widths.nExcepsV].vy = obj6.getNum() * 0.001;
1947 ++j;
1948 ++widths.nExcepsV;
1949 } else {
1950 error(errSyntaxError, -1, "Bad widths (W2) array in Type 0 font");
1951 }
1952 obj6.free();
1953 obj5.free();
1954 obj4.free();
1955 }
1956 i += 2;
1957 } else {
1958 error(errSyntaxError, -1, "Bad widths (W2) array in Type 0 font");
1959 ++i;
1960 }
1961 obj3.free();
1962 obj2.free();
1963 }
1964 }
1965 obj1.free();
1966
1967 desFontDictObj.free();
1968 ok = gTrue;
1969 return;
1970
1971 err3:
1972 obj3.free();
1973 obj2.free();
1974 err2:
1975 obj1.free();
1976 desFontDictObj.free();
1977 err1:
1978 error(errSyntaxError, -1, "Failed to parse font object for '{0:t}'", name);
1979 }
1980
1981 GfxCIDFont::~GfxCIDFont() {
1982 if (collection) {
1983 delete collection;
1984 }
1985 if (cMap) {
1986 cMap->decRefCnt();
1987 }
1988 if (ctu) {
1989 ctu->decRefCnt();
1990 }
1991 gfree(widths.exceps);
1992 gfree(widths.excepsV);
1993 if (cidToGID) {
1994 gfree(cidToGID);
1995 }
1996 }
1997
1998 // Construct a code-to-Unicode mapping, based on the TrueType Unicode
1999 // cmap (if present). Constructs ctu if succesful; leaves ctu = null
2000 // otherwise. Always leaves ctu = null for non-TrueType fonts.
2001 void GfxCIDFont::readTrueTypeUnicodeMapping(XRef *xref) {
2002 char *buf;
2003 FoFiTrueType *ff;
2004 Unicode *gidToUnicode, *codeToUnicode;
2005 Unicode u;
2006 int bufLen, cmapPlatform, cmapEncoding, unicodeCmap;
2007 int nGlyphs, nMappings, gid, i;
2008
2009 // must be an embedded TrueType font, with an unknown char collection
2010 if ((type != fontCIDType2 && type == fontCIDType2OT) ||
2011 embFontID.num < 0 ||
2012 hasKnownCollection) {
2013 goto err0;
2014 }
2015
2016 // read the embedded font and construct a FoFiTrueType
2017 if (!(buf = readEmbFontFile(xref, &bufLen))) {
2018 goto err0;
2019 }
2020 if (!(ff = FoFiTrueType::make(buf, bufLen, 0))) {
2021 goto err1;
2022 }
2023
2024 // find the TrueType Unicode cmap
2025 unicodeCmap = -1;
2026 for (i = 0; i < ff->getNumCmaps(); ++i) {
2027 cmapPlatform = ff->getCmapPlatform(i);
2028 cmapEncoding = ff->getCmapEncoding(i);
2029 if ((cmapPlatform == 3 && cmapEncoding == 1) ||
2030 (cmapPlatform == 0 && cmapEncoding <= 4)) {
2031 unicodeCmap = i;
2032 break;
2033 }
2034 }
2035 if (unicodeCmap < 0) {
2036 goto err2;
2037 }
2038
2039 // construct reverse GID-to-Unicode map
2040 nGlyphs = ff->getNumGlyphs();
2041 gidToUnicode = (Unicode *)gmallocn(nGlyphs, sizeof(Unicode));
2042 memset(gidToUnicode, 0, nGlyphs * sizeof(Unicode));
2043 nMappings = 0;
2044 for (u = 1; u <= 0xffff; ++u) {
2045 gid = ff->mapCodeToGID(unicodeCmap, (int)u);
2046 if (gid > 0 && gid < nGlyphs) {
2047 gidToUnicode[gid] = u;
2048 ++nMappings;
2049 }
2050 }
2051 // bail out if the Unicode cmap was completely empty
2052 if (nMappings == 0) {
2053 goto err3;
2054 }
2055
2056 // construct code-to-Unicode map
2057 codeToUnicode = (Unicode *)gmallocn(65536, sizeof(Unicode));
2058 memset(codeToUnicode, 0, 65536 * sizeof(Unicode));
2059 for (i = 0; i <= 0xffff; ++i) {
2060 // we've already checked for an identity encoding, so CID = i
2061 if (cidToGID && i < cidToGIDLen) {
2062 gid = cidToGID[i];
2063 } else {
2064 gid = i;
2065 }
2066 if (gid < nGlyphs && gidToUnicode[gid] > 0) {
2067 codeToUnicode[i] = gidToUnicode[gid];
2068 }
2069 }
2070 ctu = CharCodeToUnicode::make16BitToUnicode(codeToUnicode);
2071
2072 gfree(codeToUnicode);
2073 err3:
2074 gfree(gidToUnicode);
2075 err2:
2076 delete ff;
2077 err1:
2078 gfree(buf);
2079 err0:
2080 return;
2081 }
2082
2083 int GfxCIDFont::getNextChar(char *s, int len, CharCode *code,
2084 Unicode *u, int uSize, int *uLen,
2085 double *dx, double *dy, double *ox, double *oy) {
2086 CID cid;
2087 CharCode c;
2088 int n;
2089
2090 if (!cMap) {
2091 *code = 0;
2092 *uLen = 0;
2093 *dx = *dy = 0;
2094 return 1;
2095 }
2096
2097 *code = (CharCode)(cid = cMap->getCID(s, len, &c, &n));
2098 if (ctu) {
2099 *uLen = ctu->mapToUnicode(ctuUsesCharCode ? c : cid, u, uSize);
2100 } else {
2101 *uLen = 0;
2102 }
2103 if (!*uLen && uSize >= 1 && globalParams->getMapUnknownCharNames()) {
2104 u[0] = *code;
2105 *uLen = 1;
2106 }
2107
2108 // horizontal
2109 if (cMap->getWMode() == 0) {
2110 getHorizontalMetrics(cid, dx);
2111 *dy = *ox = *oy = 0;
2112
2113 // vertical
2114 } else {
2115 getVerticalMetrics(cid, dy, ox, oy);
2116 *dx = 0;
2117 }
2118
2119 return n;
2120 }
2121
2122 // NB: Section 9.7.4.3 in the PDF 2.0 spec says that, in the case of
2123 // duplicate entries in the metrics, the first entry should be used.
2124 // This means we need to leave the metrics in the original order and
2125 // perform a linear search. (Or use a more complex data structure.)
2126 void GfxCIDFont::getHorizontalMetrics(CID cid, double *w) {
2127 int i;
2128 for (i = 0; i < widths.nExceps; ++i) {
2129 if (widths.exceps[i].first <= cid && cid <= widths.exceps[i].last) {
2130 *w = widths.exceps[i].width;
2131 return;
2132 }
2133 }
2134 *w = widths.defWidth;
2135 }
2136
2137 // NB: Section 9.7.4.3 in the PDF 2.0 spec says that, in the case of
2138 // duplicate entries in the metrics, the first entry should be used.
2139 // This means we need to leave the metrics in the original order and
2140 // perform a linear search. (Or use a more complex data structure.)
2141 void GfxCIDFont::getVerticalMetrics(CID cid, double *h,
2142 double *vx, double *vy) {
2143 int i;
2144 for (i = 0; i < widths.nExcepsV; ++i) {
2145 if (widths.excepsV[i].first <= cid && cid <= widths.excepsV[i].last) {
2146 *h = widths.excepsV[i].height;
2147 *vx = widths.excepsV[i].vx;
2148 *vy = widths.excepsV[i].vy;
2149 return;
2150 }
2151 }
2152 *h = widths.defHeight;
2153 getHorizontalMetrics(cid, vx);
2154 *vx /= 2;
2155 *vy = widths.defVY;
2156 }
2157
2158 int GfxCIDFont::getWMode() {
2159 return cMap ? cMap->getWMode() : 0;
2160 }
2161
2162 CharCodeToUnicode *GfxCIDFont::getToUnicode() {
2163 if (ctu) {
2164 ctu->incRefCnt();
2165 }
2166 return ctu;
2167 }
2168
2169 GString *GfxCIDFont::getCollection() {
2170 return cMap ? cMap->getCollection() : (GString *)NULL;
2171 }
2172
2173 double GfxCIDFont::getWidth(CID cid) {
2174 double w;
2175
2176 getHorizontalMetrics(cid, &w);
2177 return w;
2178 }
2179
2180 GBool GfxCIDFont::problematicForUnicode() {
2181 GString *nameLC;
2182 GBool symbolic;
2183
2184 // potential inputs:
2185 // - font is embedded (GfxFont.embFontID.num >= 0)
2186 // - font name (GfxFont.name)
2187 // - font type (GfxFont.type)
2188 // - symbolic (GfxFont.flags & fontSymbolic)
2189 // - has a ToUnicode map (GfxFont.hasToUnicode)
2190 // - collection is Adobe-Identity or Adobe-UCS
2191 // (GfxCIDFont.collection - compare string)
2192 // - collection is known AdobeCJK (GfxCIDFont.hasKnownCollection)
2193 // - has non-Identity CIDToGIDMap (GfxCIDFont.cidToGID != NULL)
2194 // - has Identity CIDToGIDMap (GfxCIDFont.hasIdentityCIDToGID)
2195
2196 if (name) {
2197 nameLC = name->copy();
2198 nameLC->lowerCase();
2199 symbolic = strstr(nameLC->getCString(), "dingbat") ||
2200 strstr(nameLC->getCString(), "wingding") ||
2201 strstr(nameLC->getCString(), "commpi");
2202 delete nameLC;
2203 if (symbolic) {
2204 return gFalse;
2205 }
2206 }
2207
2208 if (embFontID.num >= 0) {
2209 switch (type) {
2210 case fontCIDType0:
2211 case fontCIDType0C:
2212 case fontCIDType0COT:
2213 return !hasToUnicode && !hasKnownCollection;
2214
2215 case fontCIDType2:
2216 case fontCIDType2OT:
2217 return !hasToUnicode && !hasKnownCollection;
2218
2219 default:
2220 return !hasToUnicode;
2221 }
2222
2223 } else {
2224 return !hasToUnicode;
2225 }
2226 }
2227
2228 //------------------------------------------------------------------------
2229 // GfxFontDict
2230 //------------------------------------------------------------------------
2231
2232 GfxFontDict::GfxFontDict(XRef *xref, Ref *fontDictRef, Dict *fontDict) {
2233 GfxFont *font;
2234 char *tag;
2235 Object obj1, obj2;
2236 Ref r;
2237 int i;
2238
2239 fonts = new GHash(gTrue);
2240 uniqueFonts = new GList();
2241 for (i = 0; i < fontDict->getLength(); ++i) {
2242 tag = fontDict->getKey(i);
2243 fontDict->getValNF(i, &obj1);
2244 obj1.fetch(xref, &obj2);
2245 if (!obj2.isDict()) {
2246 error(errSyntaxError, -1, "font resource is not a dictionary");
2247 } else if (obj1.isRef() && (font = lookupByRef(obj1.getRef()))) {
2248 fonts->add(new GString(tag), font);
2249 } else {
2250 if (obj1.isRef()) {
2251 r = obj1.getRef();
2252 } else if (fontDictRef) {
2253 // legal generation numbers are five digits, so we use a
2254 // 6-digit number here
2255 r.gen = 100000 + fontDictRef->num;
2256 r.num = i;
2257 } else {
2258 // no indirect reference for this font, or for the containing
2259 // font dict, so hash the font and use that
2260 r.gen = 100000;
2261 r.num = hashFontObject(&obj2);
2262 }
2263 if ((font = GfxFont::makeFont(xref, tag, r, obj2.getDict()))) {
2264 if (!font->isOk()) {
2265 delete font;
2266 } else {
2267 uniqueFonts->append(font);
2268 fonts->add(new GString(tag), font);
2269 }
2270 }
2271 }
2272 obj1.free();
2273 obj2.free();
2274 }
2275 }
2276
2277 GfxFontDict::~GfxFontDict() {
2278 deleteGList(uniqueFonts, GfxFont);
2279 delete fonts;
2280 }
2281
2282 GfxFont *GfxFontDict::lookup(char *tag) {
2283 return (GfxFont *)fonts->lookup(tag);
2284 }
2285
2286 GfxFont *GfxFontDict::lookupByRef(Ref ref) {
2287 GfxFont *font;
2288 int i;
2289
2290 for (i = 0; i < uniqueFonts->getLength(); ++i) {
2291 font = (GfxFont *)uniqueFonts->get(i);
2292 if (font->getID()->num == ref.num &&
2293 font->getID()->gen == ref.gen) {
2294 return font;
2295 }
2296 }
2297 return NULL;
2298 }
2299
2300 int GfxFontDict::getNumFonts() {
2301 return uniqueFonts->getLength();
2302 }
2303
2304 GfxFont *GfxFontDict::getFont(int i) {
2305 return (GfxFont *)uniqueFonts->get(i);
2306 }
2307
2308 // FNV-1a hash
2309 class FNVHash {
2310 public:
2311
2312 FNVHash() {
2313 h = 2166136261U;
2314 }
2315
2316 void hash(char c) {
2317 h ^= c & 0xff;
2318 h *= 16777619;
2319 }
2320
2321 void hash(char *p, int n) {
2322 int i;
2323 for (i = 0; i < n; ++i) {
2324 hash(p[i]);
2325 }
2326 }
2327
2328 int get31() {
2329 return (h ^ (h >> 31)) & 0x7fffffff;
2330 }
2331
2332 private:
2333
2334 Guint h;
2335 };
2336
2337 int GfxFontDict::hashFontObject(Object *obj) {
2338 FNVHash h;
2339
2340 hashFontObject1(obj, &h);
2341 return h.get31();
2342 }
2343
2344 void GfxFontDict::hashFontObject1(Object *obj, FNVHash *h) {
2345 Object obj2;
2346 GString *s;
2347 char *p;
2348 double r;
2349 int n, i;
2350
2351 switch (obj->getType()) {
2352 case objBool:
2353 h->hash('b');
2354 h->hash(obj->getBool() ? 1 : 0);
2355 break;
2356 case objInt:
2357 h->hash('i');
2358 n = obj->getInt();
2359 h->hash((char *)&n, sizeof(int));
2360 break;
2361 case objReal:
2362 h->hash('r');
2363 r = obj->getReal();
2364 h->hash((char *)&r, sizeof(double));
2365 break;
2366 case objString:
2367 h->hash('s');
2368 s = obj->getString();
2369 h->hash(s->getCString(), s->getLength());
2370 break;
2371 case objName:
2372 h->hash('n');
2373 p = obj->getName();
2374 h->hash(p, (int)strlen(p));
2375 break;
2376 case objNull:
2377 h->hash('z');
2378 break;
2379 case objArray:
2380 h->hash('a');
2381 n = obj->arrayGetLength();
2382 h->hash((char *)&n, sizeof(int));
2383 for (i = 0; i < n; ++i) {
2384 obj->arrayGetNF(i, &obj2);
2385 hashFontObject1(&obj2, h);
2386 obj2.free();
2387 }
2388 break;
2389 case objDict:
2390 h->hash('d');
2391 n = obj->dictGetLength();
2392 h->hash((char *)&n, sizeof(int));
2393 for (i = 0; i < n; ++i) {
2394 p = obj->dictGetKey(i);
2395 h->hash(p, (int)strlen(p));
2396 obj->dictGetValNF(i, &obj2);
2397 hashFontObject1(&obj2, h);
2398 obj2.free();
2399 }
2400 break;
2401 case objStream:
2402 // this should never happen - streams must be indirect refs
2403 break;
2404 case objRef:
2405 h->hash('f');
2406 n = obj->getRefNum();
2407 h->hash((char *)&n, sizeof(int));
2408 n = obj->getRefGen();
2409 h->hash((char *)&n, sizeof(int));
2410 break;
2411 default:
2412 h->hash('u');
2413 break;
2414 }
2415 }