"Fossies" - the Fresh Open Source Software Archive

Member "xpdf-4.04/xpdf/GfxFont.cc" (18 Apr 2022, 65646 Bytes) of package /linux/misc/xpdf-4.04.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "GfxFont.cc" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 4.03_vs_4.04.

    1 //========================================================================
    2 //
    3 // GfxFont.cc
    4 //
    5 // Copyright 1996-2003 Glyph & Cog, LLC
    6 //
    7 //========================================================================
    8 
    9 #include <aconf.h>
   10 
   11 #ifdef USE_GCC_PRAGMAS
   12 #pragma implementation
   13 #endif
   14 
   15 #include <stdio.h>
   16 #include <stdlib.h>
   17 #include <string.h>
   18 #include <ctype.h>
   19 #include <math.h>
   20 #include <limits.h>
   21 #include "gmem.h"
   22 #include "gmempp.h"
   23 #include "GList.h"
   24 #include "GHash.h"
   25 #include "Error.h"
   26 #include "Object.h"
   27 #include "Dict.h"
   28 #include "GlobalParams.h"
   29 #include "CMap.h"
   30 #include "CharCodeToUnicode.h"
   31 #include "FontEncodingTables.h"
   32 #include "BuiltinFontTables.h"
   33 #include "FoFiIdentifier.h"
   34 #include "FoFiType1.h"
   35 #include "FoFiType1C.h"
   36 #include "FoFiTrueType.h"
   37 #include "GfxFont.h"
   38 
   39 //------------------------------------------------------------------------
   40 
   41 struct Base14FontMapEntry {
   42   const char *altName;
   43   const char *base14Name;
   44 };
   45 
   46 static Base14FontMapEntry base14FontMap[] = {
   47   { "Arial",                        "Helvetica" },
   48   { "Arial,Bold",                   "Helvetica-Bold" },
   49   { "Arial,BoldItalic",             "Helvetica-BoldOblique" },
   50   { "Arial,Italic",                 "Helvetica-Oblique" },
   51   { "Arial-Bold",                   "Helvetica-Bold" },
   52   { "Arial-BoldItalic",             "Helvetica-BoldOblique" },
   53   { "Arial-BoldItalicMT",           "Helvetica-BoldOblique" },
   54   { "Arial-BoldMT",                 "Helvetica-Bold" },
   55   { "Arial-Italic",                 "Helvetica-Oblique" },
   56   { "Arial-ItalicMT",               "Helvetica-Oblique" },
   57   { "ArialMT",                      "Helvetica" },
   58   { "Courier",                      "Courier" },
   59   { "Courier,Bold",                 "Courier-Bold" },
   60   { "Courier,BoldItalic",           "Courier-BoldOblique" },
   61   { "Courier,Italic",               "Courier-Oblique" },
   62   { "Courier-Bold",                 "Courier-Bold" },
   63   { "Courier-BoldOblique",          "Courier-BoldOblique" },
   64   { "Courier-Oblique",              "Courier-Oblique" },
   65   { "CourierNew",                   "Courier" },
   66   { "CourierNew,Bold",              "Courier-Bold" },
   67   { "CourierNew,BoldItalic",        "Courier-BoldOblique" },
   68   { "CourierNew,Italic",            "Courier-Oblique" },
   69   { "CourierNew-Bold",              "Courier-Bold" },
   70   { "CourierNew-BoldItalic",        "Courier-BoldOblique" },
   71   { "CourierNew-Italic",            "Courier-Oblique" },
   72   { "CourierNewPS-BoldItalicMT",    "Courier-BoldOblique" },
   73   { "CourierNewPS-BoldMT",          "Courier-Bold" },
   74   { "CourierNewPS-ItalicMT",        "Courier-Oblique" },
   75   { "CourierNewPSMT",               "Courier" },
   76   { "Helvetica",                    "Helvetica" },
   77   { "Helvetica,Bold",               "Helvetica-Bold" },
   78   { "Helvetica,BoldItalic",         "Helvetica-BoldOblique" },
   79   { "Helvetica,Italic",             "Helvetica-Oblique" },
   80   { "Helvetica-Bold",               "Helvetica-Bold" },
   81   { "Helvetica-BoldItalic",         "Helvetica-BoldOblique" },
   82   { "Helvetica-BoldOblique",        "Helvetica-BoldOblique" },
   83   { "Helvetica-Italic",             "Helvetica-Oblique" },
   84   { "Helvetica-Oblique",            "Helvetica-Oblique" },
   85   { "Symbol",                       "Symbol" },
   86   { "Symbol,Bold",                  "Symbol" },
   87   { "Symbol,BoldItalic",            "Symbol" },
   88   { "Symbol,Italic",                "Symbol" },
   89   { "Times-Bold",                   "Times-Bold" },
   90   { "Times-BoldItalic",             "Times-BoldItalic" },
   91   { "Times-Italic",                 "Times-Italic" },
   92   { "Times-Roman",                  "Times-Roman" },
   93   { "TimesNewRoman",                "Times-Roman" },
   94   { "TimesNewRoman,Bold",           "Times-Bold" },
   95   { "TimesNewRoman,BoldItalic",     "Times-BoldItalic" },
   96   { "TimesNewRoman,Italic",         "Times-Italic" },
   97   { "TimesNewRoman-Bold",           "Times-Bold" },
   98   { "TimesNewRoman-BoldItalic",     "Times-BoldItalic" },
   99   { "TimesNewRoman-Italic",         "Times-Italic" },
  100   { "TimesNewRomanPS",              "Times-Roman" },
  101   { "TimesNewRomanPS-Bold",         "Times-Bold" },
  102   { "TimesNewRomanPS-BoldItalic",   "Times-BoldItalic" },
  103   { "TimesNewRomanPS-BoldItalicMT", "Times-BoldItalic" },
  104   { "TimesNewRomanPS-BoldMT",       "Times-Bold" },
  105   { "TimesNewRomanPS-Italic",       "Times-Italic" },
  106   { "TimesNewRomanPS-ItalicMT",     "Times-Italic" },
  107   { "TimesNewRomanPSMT",            "Times-Roman" },
  108   { "TimesNewRomanPSMT,Bold",       "Times-Bold" },
  109   { "TimesNewRomanPSMT,BoldItalic", "Times-BoldItalic" },
  110   { "TimesNewRomanPSMT,Italic",     "Times-Italic" },
  111   { "ZapfDingbats",                 "ZapfDingbats" }
  112 };
  113 
  114 //------------------------------------------------------------------------
  115 
  116 // index: {fixed:0, sans-serif:4, serif:8} + bold*2 + italic
  117 // NB: must be in same order as psSubstFonts in PSOutputDev.cc
  118 static const char *base14SubstFonts[14] = {
  119   "Courier",
  120   "Courier-Oblique",
  121   "Courier-Bold",
  122   "Courier-BoldOblique",
  123   "Helvetica",
  124   "Helvetica-Oblique",
  125   "Helvetica-Bold",
  126   "Helvetica-BoldOblique",
  127   "Times-Roman",
  128   "Times-Italic",
  129   "Times-Bold",
  130   "Times-BoldItalic",
  131   // the last two are never used for substitution
  132   "Symbol",
  133   "ZapfDingbats"
  134 };
  135 
  136 //------------------------------------------------------------------------
  137 
  138 static int readFromStream(void *data) {
  139   return ((Stream *)data)->getChar();
  140 }
  141 
  142 //------------------------------------------------------------------------
  143 // GfxFontLoc
  144 //------------------------------------------------------------------------
  145 
  146 GfxFontLoc::GfxFontLoc() {
  147   path = NULL;
  148   fontNum = 0;
  149   oblique = 0;
  150   encoding = NULL;
  151   substIdx = -1;
  152 }
  153 
  154 GfxFontLoc::~GfxFontLoc() {
  155   if (path) {
  156     delete path;
  157   }
  158   if (encoding) {
  159     delete encoding;
  160   }
  161 }
  162 
  163 //------------------------------------------------------------------------
  164 // GfxFont
  165 //------------------------------------------------------------------------
  166 
  167 GfxFont *GfxFont::makeFont(XRef *xref, const char *tagA,
  168                Ref idA, Dict *fontDict) {
  169   GString *nameA;
  170   Ref embFontIDA;
  171   GfxFontType typeA;
  172   GfxFont *font;
  173   Object obj1;
  174 
  175   // get base font name
  176   nameA = NULL;
  177   fontDict->lookup("BaseFont", &obj1);
  178   if (obj1.isName()) {
  179     nameA = new GString(obj1.getName());
  180   } else if (obj1.isString()) {
  181     nameA = obj1.getString()->copy();
  182   }
  183   obj1.free();
  184 
  185   // get embedded font ID and font type
  186   typeA = getFontType(xref, fontDict, &embFontIDA);
  187 
  188   // create the font object
  189   font = NULL;
  190   if (typeA < fontCIDType0) {
  191     font = new Gfx8BitFont(xref, tagA, idA, nameA, typeA, embFontIDA,
  192                fontDict);
  193   } else {
  194     font = new GfxCIDFont(xref, tagA, idA, nameA, typeA, embFontIDA,
  195               fontDict);
  196   }
  197 
  198   return font;
  199 }
  200 
  201 GfxFont *GfxFont::makeDefaultFont(XRef *xref) {
  202   Object type, subtype, baseFont;
  203   type.initName("Font");
  204   subtype.initName("Type1");
  205   baseFont.initName("Helvetica");
  206   Object fontDict;
  207   fontDict.initDict(xref);
  208   fontDict.dictAdd(copyString("Type"), &type);
  209   fontDict.dictAdd(copyString("Subtype"), &subtype);
  210   fontDict.dictAdd(copyString("BaseFont"), &baseFont);
  211 
  212   Ref r;
  213   r.gen = 100000;
  214   r.num = GfxFontDict::hashFontObject(&fontDict);
  215 
  216   GfxFont *font = makeFont(xref, "undef", r, fontDict.getDict());
  217   fontDict.free();
  218 
  219   return font;
  220 }
  221 
  222 GfxFont::GfxFont(const char *tagA, Ref idA, GString *nameA,
  223          GfxFontType typeA, Ref embFontIDA) {
  224   ok = gFalse;
  225   tag = new GString(tagA);
  226   id = idA;
  227   name = nameA;
  228   type = typeA;
  229   embFontID = embFontIDA;
  230   embFontName = NULL;
  231   hasToUnicode = gFalse;
  232 }
  233 
  234 GfxFont::~GfxFont() {
  235   delete tag;
  236   if (name) {
  237     delete name;
  238   }
  239   if (embFontName) {
  240     delete embFontName;
  241   }
  242 }
  243 
  244 // This function extracts three pieces of information:
  245 // 1. the "expected" font type, i.e., the font type implied by
  246 //    Font.Subtype, DescendantFont.Subtype, and
  247 //    FontDescriptor.FontFile3.Subtype
  248 // 2. the embedded font object ID
  249 // 3. the actual font type - determined by examining the embedded font
  250 //    if there is one, otherwise equal to the expected font type
  251 // If the expected and actual font types don't match, a warning
  252 // message is printed.  The expected font type is not used for
  253 // anything else.
  254 GfxFontType GfxFont::getFontType(XRef *xref, Dict *fontDict, Ref *embID) {
  255   GfxFontType t, expectedType;
  256   FoFiIdentifierType fft;
  257   Dict *fontDict2;
  258   Object subtype, fontDesc, obj1, obj2, obj3, obj4;
  259   GBool isType0, err;
  260 
  261   t = fontUnknownType;
  262   embID->num = embID->gen = -1;
  263   err = gFalse;
  264 
  265   fontDict->lookup("Subtype", &subtype);
  266   expectedType = fontUnknownType;
  267   isType0 = gFalse;
  268   if (subtype.isName("Type1") || subtype.isName("MMType1")) {
  269     expectedType = fontType1;
  270   } else if (subtype.isName("Type1C")) {
  271     expectedType = fontType1C;
  272   } else if (subtype.isName("Type3")) {
  273     expectedType = fontType3;
  274   } else if (subtype.isName("TrueType")) {
  275     expectedType = fontTrueType;
  276   } else if (subtype.isName("Type0")) {
  277     isType0 = gTrue;
  278   } else {
  279     error(errSyntaxWarning, -1, "Unknown font type: '{0:s}'",
  280       subtype.isName() ? subtype.getName() : "???");
  281   }
  282   subtype.free();
  283 
  284   fontDict2 = fontDict;
  285   if (fontDict->lookup("DescendantFonts", &obj1)->isArray()) {
  286     if (obj1.arrayGetLength() == 0) {
  287       error(errSyntaxWarning, -1, "Empty DescendantFonts array in font");
  288       obj2.initNull();
  289     } else if (obj1.arrayGet(0, &obj2)->isDict()) {
  290       if (!isType0) {
  291     error(errSyntaxWarning, -1, "Non-CID font with DescendantFonts array");
  292       }
  293       fontDict2 = obj2.getDict();
  294       fontDict2->lookup("Subtype", &subtype);
  295       if (subtype.isName("CIDFontType0")) {
  296     if (isType0) {
  297       expectedType = fontCIDType0;
  298     }
  299       } else if (subtype.isName("CIDFontType2")) {
  300     if (isType0) {
  301       expectedType = fontCIDType2;
  302     }
  303       }
  304       subtype.free();
  305     }
  306   } else {
  307     obj2.initNull();
  308   }
  309 
  310   if (fontDict2->lookup("FontDescriptor", &fontDesc)->isDict()) {
  311     if (fontDesc.dictLookupNF("FontFile", &obj3)->isRef()) {
  312       *embID = obj3.getRef();
  313       if (expectedType != fontType1) {
  314     err = gTrue;
  315       }
  316     }
  317     obj3.free();
  318     if (embID->num == -1 &&
  319     fontDesc.dictLookupNF("FontFile2", &obj3)->isRef()) {
  320       *embID = obj3.getRef();
  321       if (isType0) {
  322     expectedType = fontCIDType2;
  323       } else if (expectedType != fontTrueType) {
  324     err = gTrue;
  325       }
  326     }
  327     obj3.free();
  328     if (embID->num == -1 &&
  329     fontDesc.dictLookupNF("FontFile3", &obj3)->isRef()) {
  330       *embID = obj3.getRef();
  331       if (obj3.fetch(xref, &obj4)->isStream()) {
  332     obj4.streamGetDict()->lookup("Subtype", &subtype);
  333     if (subtype.isName("Type1")) {
  334       if (expectedType != fontType1) {
  335         err = gTrue;
  336         expectedType = isType0 ? fontCIDType0 : fontType1;
  337       }
  338     } else if (subtype.isName("Type1C")) {
  339       if (expectedType == fontType1) {
  340         expectedType = fontType1C;
  341       } else if (expectedType != fontType1C) {
  342         err = gTrue;
  343         expectedType = isType0 ? fontCIDType0C : fontType1C;
  344       }
  345     } else if (subtype.isName("TrueType")) {
  346       if (expectedType != fontTrueType) {
  347         err = gTrue;
  348         expectedType = isType0 ? fontCIDType2 : fontTrueType;
  349       }
  350     } else if (subtype.isName("CIDFontType0C")) {
  351       if (expectedType == fontCIDType0) {
  352         expectedType = fontCIDType0C;
  353       } else {
  354         err = gTrue;
  355         expectedType = isType0 ? fontCIDType0C : fontType1C;
  356       }
  357     } else if (subtype.isName("OpenType")) {
  358       if (expectedType == fontTrueType) {
  359         expectedType = fontTrueTypeOT;
  360       } else if (expectedType == fontType1) {
  361         expectedType = fontType1COT;
  362       } else if (expectedType == fontCIDType0) {
  363         expectedType = fontCIDType0COT;
  364       } else if (expectedType == fontCIDType2) {
  365         expectedType = fontCIDType2OT;
  366       } else {
  367         err = gTrue;
  368       }
  369     } else {
  370       error(errSyntaxError, -1, "Unknown font type '{0:s}'",
  371         subtype.isName() ? subtype.getName() : "???");
  372     }
  373     subtype.free();
  374       }
  375       obj4.free();
  376     }
  377     obj3.free();
  378   }
  379   fontDesc.free();
  380 
  381   t = fontUnknownType;
  382   if (embID->num >= 0) {
  383     obj3.initRef(embID->num, embID->gen);
  384     obj3.fetch(xref, &obj4);
  385     if (obj4.isStream()) {
  386       obj4.streamReset();
  387       fft = FoFiIdentifier::identifyStream(&readFromStream, obj4.getStream());
  388       obj4.streamClose();
  389       switch (fft) {
  390       case fofiIdType1PFA:
  391       case fofiIdType1PFB:
  392     t = fontType1;
  393     break;
  394       case fofiIdCFF8Bit:
  395     t = isType0 ? fontCIDType0C : fontType1C;
  396     break;
  397       case fofiIdCFFCID:
  398     t = fontCIDType0C;
  399     break;
  400       case fofiIdTrueType:
  401       case fofiIdTrueTypeCollection:
  402     t = isType0 ? fontCIDType2 : fontTrueType;
  403     break;
  404       case fofiIdOpenTypeCFF8Bit:
  405     t = isType0 ? fontCIDType0COT : fontType1COT;
  406     break;
  407       case fofiIdOpenTypeCFFCID:
  408     t = fontCIDType0COT;
  409     break;
  410       default:
  411     error(errSyntaxError, -1, "Embedded font file may be invalid");
  412     break;
  413       }
  414     }
  415     obj4.free();
  416     obj3.free();
  417   }
  418 
  419   if (t == fontUnknownType) {
  420     t = expectedType;
  421   }
  422 
  423   if (t != expectedType) {
  424     err = gTrue;
  425   }
  426 
  427   if (err) {
  428     error(errSyntaxWarning, -1,
  429       "Mismatch between font type and embedded font file");
  430   }
  431 
  432   obj2.free();
  433   obj1.free();
  434 
  435   return t;
  436 }
  437 
  438 void GfxFont::readFontDescriptor(XRef *xref, Dict *fontDict) {
  439   Object obj1, obj2, obj3, obj4;
  440   double t, t2;
  441   int i;
  442 
  443   // assume Times-Roman by default (for substitution purposes)
  444   flags = fontSerif;
  445 
  446   if (fontDict->lookup("FontDescriptor", &obj1)->isDict()) {
  447 
  448     // get flags
  449     if (obj1.dictLookup("Flags", &obj2)->isInt()) {
  450       flags = obj2.getInt();
  451     }
  452     obj2.free();
  453 
  454     // get name
  455     obj1.dictLookup("FontName", &obj2);
  456     if (obj2.isName()) {
  457       embFontName = new GString(obj2.getName());
  458     }
  459     obj2.free();
  460 
  461     // look for MissingWidth
  462     obj1.dictLookup("MissingWidth", &obj2);
  463     if (obj2.isNum()) {
  464       missingWidth = obj2.getNum();
  465     }
  466     obj2.free();
  467 
  468     // get Ascent
  469     // (CapHeight is a little more reliable - so use it if present)
  470     obj1.dictLookup("Ascent", &obj2);
  471     obj1.dictLookup("CapHeight", &obj3);
  472     if (obj2.isNum() || obj3.isNum()) {
  473       if (obj2.isNum()) {
  474     t = 0.001 * obj2.getNum();
  475     // some broken font descriptors specify a negative ascent
  476     if (t < 0) {
  477       t = -t;
  478     }
  479       } else {
  480     t = 0;
  481       }
  482       if (obj3.isNum()) {
  483     t2 = 0.001 * obj3.getNum();
  484     // some broken font descriptors specify a negative ascent
  485     if (t2 < 0) {
  486       t2 = -t2;
  487     }
  488       } else {
  489     t2 = 0;
  490       }
  491       if (t != 0 && t < 1.9) {
  492     declaredAscent = t;
  493       }
  494       // if both Ascent and CapHeight are set, use the smaller one
  495       // (because the most common problem is that Ascent is too large)
  496       if (t2 != 0 && (t == 0 || t2 < t)) {
  497     t = t2;
  498       }
  499       // some broken font descriptors set ascent and descent to 0;
  500       // others set it to ridiculous values (e.g., 32768)
  501       if (t != 0 && t < 1.9) {
  502     ascent = t;
  503       }
  504     }
  505     obj2.free();
  506     obj3.free();
  507 
  508     // get Descent
  509     obj1.dictLookup("Descent", &obj2);
  510     if (obj2.isNum()) {
  511       t = 0.001 * obj2.getNum();
  512       // some broken font descriptors specify a positive descent
  513       if (t > 0) {
  514     t = -t;
  515       }
  516       // some broken font descriptors set ascent and descent to 0
  517       if (t != 0 && t > -1.9) {
  518     descent = t;
  519       }
  520     }
  521     obj2.free();
  522 
  523     // font FontBBox
  524     if (obj1.dictLookup("FontBBox", &obj2)->isArray()) {
  525       for (i = 0; i < 4 && i < obj2.arrayGetLength(); ++i) {
  526     if (obj2.arrayGet(i, &obj3)->isNum()) {
  527       fontBBox[i] = 0.001 * obj3.getNum();
  528     }
  529     obj3.free();
  530       }
  531     }
  532     obj2.free();
  533 
  534   }
  535   obj1.free();
  536 }
  537 
  538 CharCodeToUnicode *GfxFont::readToUnicodeCMap(Dict *fontDict, int nBits,
  539                           CharCodeToUnicode *ctu) {
  540   GString *buf;
  541   Object obj1;
  542   char buf2[4096];
  543   int n;
  544 
  545   if (!fontDict->lookup("ToUnicode", &obj1)->isStream()) {
  546     obj1.free();
  547     return NULL;
  548   }
  549   buf = new GString();
  550   obj1.streamReset();
  551   while ((n = obj1.streamGetBlock(buf2, sizeof(buf2))) > 0) {
  552     buf->append(buf2, n);
  553   }
  554   obj1.streamClose();
  555   obj1.free();
  556   if (ctu) {
  557     ctu->mergeCMap(buf, nBits);
  558   } else {
  559     ctu = CharCodeToUnicode::parseCMap(buf, nBits);
  560   }
  561   delete buf;
  562   hasToUnicode = gTrue;
  563   return ctu;
  564 }
  565 
  566 GfxFontLoc *GfxFont::locateFont(XRef *xref, GBool ps) {
  567   GfxFontLoc *fontLoc;
  568   SysFontType sysFontType;
  569   FoFiIdentifierType fft;
  570   GString *path, *base14Name, *substName;
  571   PSFontParam16 *psFont16;
  572   Object refObj, embFontObj;
  573   int substIdx, fontNum;
  574   double oblique;
  575   GBool embed;
  576 
  577   if (type == fontType3) {
  578     return NULL;
  579   }
  580 
  581   //----- embedded font
  582   if (embFontID.num >= 0) {
  583     embed = gTrue;
  584     refObj.initRef(embFontID.num, embFontID.gen);
  585     refObj.fetch(xref, &embFontObj);
  586     if (!embFontObj.isStream()) {
  587       error(errSyntaxError, -1, "Embedded font object is wrong type");
  588       embed = gFalse;
  589     }
  590     embFontObj.free();
  591     refObj.free();
  592     if (embed) {
  593       if (ps) {
  594     switch (type) {
  595     case fontType1:
  596     case fontType1C:
  597     case fontType1COT:
  598       embed = globalParams->getPSEmbedType1();
  599       break;
  600     case fontTrueType:
  601     case fontTrueTypeOT:
  602       embed = globalParams->getPSEmbedTrueType();
  603       break;
  604     case fontCIDType0C:
  605     case fontCIDType0COT:
  606       embed = globalParams->getPSEmbedCIDPostScript();
  607       break;
  608     case fontCIDType2:
  609     case fontCIDType2OT:
  610       embed = globalParams->getPSEmbedCIDTrueType();
  611       break;
  612     default:
  613       break;
  614     }
  615       }
  616       if (embed) {
  617     fontLoc = new GfxFontLoc();
  618     fontLoc->locType = gfxFontLocEmbedded;
  619     fontLoc->fontType = type;
  620     fontLoc->embFontID = embFontID;
  621     return fontLoc;
  622       }
  623     }
  624   }
  625 
  626   //----- PS passthrough
  627   if (ps && name && !isCIDFont() && globalParams->getPSFontPassthrough()) {
  628     fontLoc = new GfxFontLoc();
  629     fontLoc->locType = gfxFontLocResident;
  630     fontLoc->fontType = fontType1;
  631     fontLoc->path = name->copy();
  632     return fontLoc;
  633   }
  634 
  635   //----- external font file (fontFile, fontDir)
  636   if (name && (path = globalParams->findFontFile(name))) {
  637     if ((fontLoc = getExternalFont(path, 0, 0, isCIDFont()))) {
  638       return fontLoc;
  639     }
  640   }
  641 
  642   //----- PS resident Base-14 font
  643   if (ps && !isCIDFont() && ((Gfx8BitFont *)this)->base14) {
  644     fontLoc = new GfxFontLoc();
  645     fontLoc->locType = gfxFontLocResident;
  646     fontLoc->fontType = fontType1;
  647     fontLoc->path = new GString(((Gfx8BitFont *)this)->base14->base14Name);
  648     return fontLoc;
  649   }
  650 
  651   //----- external font file for Base-14 font
  652   if (!ps && !isCIDFont() && ((Gfx8BitFont *)this)->base14) {
  653     base14Name = new GString(((Gfx8BitFont *)this)->base14->base14Name);
  654     path = globalParams->findBase14FontFile(base14Name, &fontNum, &oblique);
  655     delete base14Name;
  656     if (path && (fontLoc = getExternalFont(path, fontNum, oblique, gFalse))) {
  657       return fontLoc;
  658     }
  659   }
  660 
  661   //----- system font
  662   if (name && (path = globalParams->findSystemFontFile(name, &sysFontType,
  663                                &fontNum))) {
  664     fontLoc = new GfxFontLoc();
  665     fontLoc->locType = gfxFontLocExternal;
  666     fontLoc->path = path;
  667     fontLoc->fontNum = fontNum;
  668     if (isCIDFont()) {
  669       if (sysFontType == sysFontTTF || sysFontType == sysFontTTC) {
  670     fontLoc->fontType = fontCIDType2;
  671     return fontLoc;
  672       } else if (sysFontType == sysFontOTF) {
  673     fft = FoFiIdentifier::identifyFile(fontLoc->path->getCString());
  674     if (fft == fofiIdOpenTypeCFFCID) {
  675       fontLoc->fontType = fontCIDType0COT;
  676       return fontLoc;
  677     } else if (fft == fofiIdTrueType) {
  678       fontLoc->fontType = fontCIDType2;
  679       return fontLoc;
  680     }
  681       }
  682     } else {
  683       if (sysFontType == sysFontTTF || sysFontType == sysFontTTC) {
  684     fontLoc->fontType = fontTrueType;
  685     return fontLoc;
  686       } else if (sysFontType == sysFontPFA || sysFontType == sysFontPFB) {
  687     fontLoc->fontType = fontType1;
  688     return fontLoc;
  689       } else if (sysFontType == sysFontOTF) {
  690     fft = FoFiIdentifier::identifyFile(fontLoc->path->getCString());
  691     if (fft == fofiIdOpenTypeCFF8Bit) {
  692       fontLoc->fontType = fontType1COT;
  693       return fontLoc;
  694     } else if (fft == fofiIdTrueType) {
  695       fontLoc->fontType = fontTrueTypeOT;
  696       return fontLoc;
  697     }
  698       }
  699     }
  700     delete fontLoc;
  701   }
  702 
  703   if (!isCIDFont()) {
  704 
  705     //----- 8-bit PS resident font
  706     if (ps) {
  707       if (name && (path = globalParams->getPSResidentFont(name))) {
  708     fontLoc = new GfxFontLoc();
  709     fontLoc->locType = gfxFontLocResident;
  710     fontLoc->fontType = fontType1;
  711     fontLoc->path = path;
  712     return fontLoc;
  713       }
  714     }
  715 
  716     //----- 8-bit font substitution
  717     if (flags & fontFixedWidth) {
  718       substIdx = 0;
  719     } else if (flags & fontSerif) {
  720       substIdx = 8;
  721     } else {
  722       substIdx = 4;
  723     }
  724     if (isBold()) {
  725       substIdx += 2;
  726     }
  727     if (isItalic()) {
  728       substIdx += 1;
  729     }
  730     substName = new GString(base14SubstFonts[substIdx]);
  731     if (ps) {
  732       error(errSyntaxWarning, -1, "Substituting font '{0:s}' for '{1:t}'",
  733         base14SubstFonts[substIdx], name);
  734       fontLoc = new GfxFontLoc();
  735       fontLoc->locType = gfxFontLocResident;
  736       fontLoc->fontType = fontType1;
  737       fontLoc->path = substName;
  738       fontLoc->substIdx = substIdx;
  739       return fontLoc;
  740     } else {
  741       path = globalParams->findBase14FontFile(substName, &fontNum, &oblique);
  742       delete substName;
  743       if (path) {
  744     if ((fontLoc = getExternalFont(path, fontNum, oblique, gFalse))) {
  745       error(errSyntaxWarning, -1, "Substituting font '{0:s}' for '{1:t}'",
  746         base14SubstFonts[substIdx], name);
  747       fontLoc->substIdx = substIdx;
  748       return fontLoc;
  749     }
  750       }
  751     }
  752 
  753     // failed to find a substitute font
  754     return NULL;
  755   }
  756 
  757   //----- 16-bit PS resident font
  758   if (ps && name && ((psFont16 = globalParams->getPSResidentFont16(
  759                      name,
  760                      ((GfxCIDFont *)this)->getWMode())))) {
  761     fontLoc = new GfxFontLoc();
  762     fontLoc->locType = gfxFontLocResident;
  763     fontLoc->fontType = fontCIDType0; // this is not used
  764     fontLoc->path = psFont16->psFontName->copy();
  765     fontLoc->encoding = psFont16->encoding->copy();
  766     fontLoc->wMode = psFont16->wMode;
  767     return fontLoc;
  768   }
  769   if (ps && ((psFont16 = globalParams->getPSResidentFontCC(
  770                  ((GfxCIDFont *)this)->getCollection(),
  771                  ((GfxCIDFont *)this)->getWMode())))) {
  772     error(errSyntaxWarning, -1, "Substituting font '{0:t}' for '{1:t}'",
  773       psFont16->psFontName, name);
  774     fontLoc = new GfxFontLoc();
  775     fontLoc->locType = gfxFontLocResident;
  776     fontLoc->fontType = fontCIDType0; // this is not used
  777     fontLoc->path = psFont16->psFontName->copy();
  778     fontLoc->encoding = psFont16->encoding->copy();
  779     fontLoc->wMode = psFont16->wMode;
  780     return fontLoc;
  781   }
  782 
  783   //----- CID font substitution
  784   if ((path = globalParams->findCCFontFile(
  785                 ((GfxCIDFont *)this)->getCollection()))) {
  786     if ((fontLoc = getExternalFont(path, 0, 0, gTrue))) {
  787       error(errSyntaxWarning, -1, "Substituting font '{0:t}' for '{1:t}'",
  788         fontLoc->path, name);
  789       return fontLoc;
  790     }
  791   }
  792 
  793   // failed to find a substitute font
  794   return NULL;
  795 }
  796 
  797 GfxFontLoc *GfxFont::locateBase14Font(GString *base14Name) {
  798   GString *path;
  799   int fontNum;
  800   double oblique;
  801 
  802   path = globalParams->findBase14FontFile(base14Name, &fontNum, &oblique);
  803   if (!path) {
  804     return NULL;
  805   }
  806   return getExternalFont(path, fontNum, oblique, gFalse);
  807 }
  808 
  809 GfxFontLoc *GfxFont::getExternalFont(GString *path, int fontNum,
  810                      double oblique, GBool cid) {
  811   FoFiIdentifierType fft;
  812   GfxFontType fontType;
  813   GfxFontLoc *fontLoc;
  814 
  815   fft = FoFiIdentifier::identifyFile(path->getCString());
  816   switch (fft) {
  817   case fofiIdType1PFA:
  818   case fofiIdType1PFB:
  819     fontType = fontType1;
  820     break;
  821   case fofiIdCFF8Bit:
  822     fontType = fontType1C;
  823     break;
  824   case fofiIdCFFCID:
  825     fontType = fontCIDType0C;
  826     break;
  827   case fofiIdTrueType:
  828   case fofiIdTrueTypeCollection:
  829     fontType = cid ? fontCIDType2 : fontTrueType;
  830     break;
  831   case fofiIdOpenTypeCFF8Bit:
  832     fontType = fontType1COT;
  833     break;
  834   case fofiIdOpenTypeCFFCID:
  835     fontType = fontCIDType0COT;
  836     break;
  837   case fofiIdDfont:
  838     fontType = cid ? fontCIDType2 : fontTrueType;
  839     break;
  840   case fofiIdUnknown:
  841   case fofiIdError:
  842   default:
  843     fontType = fontUnknownType;
  844     break;
  845   }
  846   if (fontType == fontUnknownType ||
  847       (cid ? (fontType < fontCIDType0)
  848            : (fontType >= fontCIDType0))) {
  849     delete path;
  850     return NULL;
  851   }
  852   fontLoc = new GfxFontLoc();
  853   fontLoc->locType = gfxFontLocExternal;
  854   fontLoc->fontType = fontType;
  855   fontLoc->path = path;
  856   fontLoc->fontNum = fontNum;
  857   fontLoc->oblique = oblique;
  858   return fontLoc;
  859 }
  860 
  861 char *GfxFont::readEmbFontFile(XRef *xref, int *len) {
  862   char *buf;
  863   Object obj1, obj2;
  864   Stream *str;
  865   int size, n;
  866 
  867   obj1.initRef(embFontID.num, embFontID.gen);
  868   obj1.fetch(xref, &obj2);
  869   if (!obj2.isStream()) {
  870     error(errSyntaxError, -1, "Embedded font file is not a stream");
  871     obj2.free();
  872     obj1.free();
  873     embFontID.num = -1;
  874     return NULL;
  875   }
  876   str = obj2.getStream();
  877 
  878   size = 4096;
  879   buf = (char *)gmalloc(size);
  880   *len = 0;
  881   str->reset();
  882   do {
  883     if (*len > size - 4096) {
  884       if (size > INT_MAX / 2) {
  885     error(errSyntaxError, -1, "Embedded font file is too large");
  886     break;
  887       }
  888       size *= 2;
  889       buf = (char *)grealloc(buf, size);
  890     }
  891     n = str->getBlock(buf + *len, 4096);
  892     *len += n;
  893   } while (n == 4096);
  894   str->close();
  895 
  896   obj2.free();
  897   obj1.free();
  898 
  899   return buf;
  900 }
  901 
  902 //------------------------------------------------------------------------
  903 // Gfx8BitFont
  904 //------------------------------------------------------------------------
  905 
  906 Gfx8BitFont::Gfx8BitFont(XRef *xref, const char *tagA, Ref idA, GString *nameA,
  907              GfxFontType typeA, Ref embFontIDA, Dict *fontDict):
  908   GfxFont(tagA, idA, nameA, typeA, embFontIDA)
  909 {
  910   GString *name2;
  911   BuiltinFont *builtinFont;
  912   const char **baseEnc;
  913   char *buf;
  914   int len;
  915   FoFiType1 *ffT1;
  916   FoFiType1C *ffT1C;
  917   int code, code2;
  918   char *charName;
  919   GBool missing, hex;
  920   Unicode toUnicode[256];
  921   CharCodeToUnicode *utu, *ctu2;
  922   Unicode uBuf[8];
  923   double mul;
  924   int firstChar, lastChar;
  925   Gushort w;
  926   Object obj1, obj2, obj3;
  927   int n, i, a, b, m;
  928 
  929   ctu = NULL;
  930 
  931   // do font name substitution for various aliases of the Base 14 font
  932   // names
  933   base14 = NULL;
  934   if (name) {
  935     name2 = name->copy();
  936     i = 0;
  937     while (i < name2->getLength()) {
  938       if (name2->getChar(i) == ' ') {
  939     name2->del(i);
  940       } else {
  941     ++i;
  942       }
  943     }
  944     a = 0;
  945     b = sizeof(base14FontMap) / sizeof(Base14FontMapEntry);
  946     // invariant: base14FontMap[a].altName <= name2 < base14FontMap[b].altName
  947     while (b - a > 1) {
  948       m = (a + b) / 2;
  949       if (name2->cmp(base14FontMap[m].altName) >= 0) {
  950     a = m;
  951       } else {
  952     b = m;
  953       }
  954     }
  955     if (!name2->cmp(base14FontMap[a].altName)) {
  956       base14 = &base14FontMap[a];
  957     }
  958     delete name2;
  959   }
  960 
  961   // is it a built-in font?
  962   builtinFont = NULL;
  963   if (base14) {
  964     for (i = 0; i < nBuiltinFonts; ++i) {
  965       if (!strcmp(base14->base14Name, builtinFonts[i].name)) {
  966     builtinFont = &builtinFonts[i];
  967     break;
  968       }
  969     }
  970   }
  971 
  972   // default ascent/descent values
  973   if (builtinFont) {
  974     missingWidth = builtinFont->missingWidth;
  975     ascent = 0.001 * builtinFont->ascent;
  976     descent = 0.001 * builtinFont->descent;
  977     declaredAscent = ascent;
  978     fontBBox[0] = 0.001 * builtinFont->bbox[0];
  979     fontBBox[1] = 0.001 * builtinFont->bbox[1];
  980     fontBBox[2] = 0.001 * builtinFont->bbox[2];
  981     fontBBox[3] = 0.001 * builtinFont->bbox[3];
  982   } else {
  983     missingWidth = 0;
  984     ascent = 0.75;
  985     descent = -0.25;
  986     declaredAscent = ascent;
  987     fontBBox[0] = fontBBox[1] = fontBBox[2] = fontBBox[3] = 0;
  988   }
  989 
  990   // get info from font descriptor
  991   readFontDescriptor(xref, fontDict);
  992 
  993   // for Base-14 fonts (even if embedded), don't trust the
  994   // ascent/descent/bbox values from the font descriptor
  995   if (builtinFont) {
  996     ascent = 0.001 * builtinFont->ascent;
  997     descent = 0.001 * builtinFont->descent;
  998     declaredAscent = ascent;
  999     fontBBox[0] = 0.001 * builtinFont->bbox[0];
 1000     fontBBox[1] = 0.001 * builtinFont->bbox[1];
 1001     fontBBox[2] = 0.001 * builtinFont->bbox[2];
 1002     fontBBox[3] = 0.001 * builtinFont->bbox[3];
 1003   }
 1004 
 1005   // get font matrix
 1006   fontMat[0] = fontMat[3] = 1;
 1007   fontMat[1] = fontMat[2] = fontMat[4] = fontMat[5] = 0;
 1008   if (fontDict->lookup("FontMatrix", &obj1)->isArray()) {
 1009     for (i = 0; i < 6 && i < obj1.arrayGetLength(); ++i) {
 1010       if (obj1.arrayGet(i, &obj2)->isNum()) {
 1011     fontMat[i] = obj2.getNum();
 1012       }
 1013       obj2.free();
 1014     }
 1015   }
 1016   obj1.free();
 1017 
 1018   // get Type 3 bounding box, font definition, and resources
 1019   if (type == fontType3) {
 1020     if (fontDict->lookup("FontBBox", &obj1)->isArray()) {
 1021       for (i = 0; i < 4 && i < obj1.arrayGetLength(); ++i) {
 1022     if (obj1.arrayGet(i, &obj2)->isNum()) {
 1023       fontBBox[i] = obj2.getNum();
 1024     }
 1025     obj2.free();
 1026       }
 1027     }
 1028     obj1.free();
 1029     if (!fontDict->lookup("CharProcs", &charProcs)->isDict()) {
 1030       error(errSyntaxError, -1,
 1031         "Missing or invalid CharProcs dictionary in Type 3 font");
 1032       charProcs.free();
 1033     }
 1034     if (!fontDict->lookup("Resources", &resources)->isDict()) {
 1035       resources.free();
 1036     }
 1037   }
 1038 
 1039   //----- build the font encoding -----
 1040 
 1041   // Encodings start with a base encoding, which can come from
 1042   // (in order of priority):
 1043   //   1. FontDict.Encoding or FontDict.Encoding.BaseEncoding
 1044   //        - MacRoman / MacExpert / WinAnsi / Standard
 1045   //   2. embedded or external font file
 1046   //   3. default:
 1047   //        - builtin --> builtin encoding
 1048   //        - TrueType --> WinAnsiEncoding
 1049   //        - others --> StandardEncoding
 1050   // and then add a list of differences (if any) from
 1051   // FontDict.Encoding.Differences.
 1052 
 1053   // check FontDict for base encoding
 1054   hasEncoding = gFalse;
 1055   usesMacRomanEnc = gFalse;
 1056   baseEnc = NULL;
 1057   baseEncFromFontFile = gFalse;
 1058   fontDict->lookup("Encoding", &obj1);
 1059   if (obj1.isDict()) {
 1060     obj1.dictLookup("BaseEncoding", &obj2);
 1061     if (obj2.isName("MacRomanEncoding")) {
 1062       hasEncoding = gTrue;
 1063       usesMacRomanEnc = gTrue;
 1064       baseEnc = macRomanEncoding;
 1065     } else if (obj2.isName("MacExpertEncoding")) {
 1066       hasEncoding = gTrue;
 1067       baseEnc = macExpertEncoding;
 1068     } else if (obj2.isName("WinAnsiEncoding")) {
 1069       hasEncoding = gTrue;
 1070       baseEnc = winAnsiEncoding;
 1071     }
 1072     obj2.free();
 1073   } else if (obj1.isName("MacRomanEncoding")) {
 1074     hasEncoding = gTrue;
 1075     usesMacRomanEnc = gTrue;
 1076     baseEnc = macRomanEncoding;
 1077   } else if (obj1.isName("MacExpertEncoding")) {
 1078     hasEncoding = gTrue;
 1079     baseEnc = macExpertEncoding;
 1080   } else if (obj1.isName("WinAnsiEncoding")) {
 1081     hasEncoding = gTrue;
 1082     baseEnc = winAnsiEncoding;
 1083   }
 1084 
 1085   // check embedded font file for base encoding
 1086   // (only for Type 1 fonts - trying to get an encoding out of a
 1087   // TrueType font is a losing proposition)
 1088   ffT1 = NULL;
 1089   ffT1C = NULL;
 1090   buf = NULL;
 1091   if (type == fontType1 && embFontID.num >= 0) {
 1092     if ((buf = readEmbFontFile(xref, &len))) {
 1093       if ((ffT1 = FoFiType1::make(buf, len))) {
 1094     if (ffT1->getName()) {
 1095       if (embFontName) {
 1096         delete embFontName;
 1097       }
 1098       embFontName = new GString(ffT1->getName());
 1099     }
 1100     if (!baseEnc) {
 1101       baseEnc = (const char **)ffT1->getEncoding();
 1102       baseEncFromFontFile = gTrue;
 1103     }
 1104       }
 1105       gfree(buf);
 1106     }
 1107   } else if (type == fontType1C && embFontID.num >= 0) {
 1108     if ((buf = readEmbFontFile(xref, &len))) {
 1109       if ((ffT1C = FoFiType1C::make(buf, len))) {
 1110     if (ffT1C->getName()) {
 1111       if (embFontName) {
 1112         delete embFontName;
 1113       }
 1114       embFontName = new GString(ffT1C->getName());
 1115     }
 1116     if (!baseEnc) {
 1117       baseEnc = (const char **)ffT1C->getEncoding();
 1118       baseEncFromFontFile = gTrue;
 1119     }
 1120       }
 1121       gfree(buf);
 1122     }
 1123   }
 1124 
 1125   // get default base encoding
 1126   if (!baseEnc) {
 1127     if (builtinFont && embFontID.num < 0) {
 1128       baseEnc = builtinFont->defaultBaseEnc;
 1129       hasEncoding = gTrue;
 1130     } else if (type == fontTrueType) {
 1131       baseEnc = winAnsiEncoding;
 1132     } else {
 1133       baseEnc = standardEncoding;
 1134     }
 1135   }
 1136 
 1137   // copy the base encoding
 1138   for (i = 0; i < 256; ++i) {
 1139     enc[i] = (char *)baseEnc[i];
 1140     if ((encFree[i] = (char)baseEncFromFontFile) && enc[i]) {
 1141       enc[i] = copyString(baseEnc[i]);
 1142     }
 1143   }
 1144 
 1145   // some Type 1C font files have empty encodings, which can break the
 1146   // T1C->T1 conversion (since the 'seac' operator depends on having
 1147   // the accents in the encoding), so we fill in any gaps from
 1148   // StandardEncoding
 1149   if (type == fontType1C && embFontID.num >= 0 && baseEncFromFontFile) {
 1150     for (i = 0; i < 256; ++i) {
 1151       if (!enc[i] && standardEncoding[i]) {
 1152     enc[i] = (char *)standardEncoding[i];
 1153     encFree[i] = gFalse;
 1154       }
 1155     }
 1156   }
 1157 
 1158   // merge differences into encoding
 1159   if (obj1.isDict()) {
 1160     obj1.dictLookup("Differences", &obj2);
 1161     if (obj2.isArray()) {
 1162       hasEncoding = gTrue;
 1163       code = 0;
 1164       for (i = 0; i < obj2.arrayGetLength(); ++i) {
 1165     obj2.arrayGet(i, &obj3);
 1166     if (obj3.isInt()) {
 1167       code = obj3.getInt();
 1168     } else if (obj3.isName()) {
 1169       if (code >= 0 && code < 256) {
 1170         if (encFree[code]) {
 1171           gfree(enc[code]);
 1172         }
 1173         enc[code] = copyString(obj3.getName());
 1174         encFree[code] = gTrue;
 1175       }
 1176       ++code;
 1177     } else {
 1178       error(errSyntaxError, -1,
 1179         "Wrong type in font encoding resource differences ({0:s})",
 1180         obj3.getTypeName());
 1181     }
 1182     obj3.free();
 1183       }
 1184     }
 1185     obj2.free();
 1186   }
 1187   obj1.free();
 1188   if (ffT1) {
 1189     delete ffT1;
 1190   }
 1191   if (ffT1C) {
 1192     delete ffT1C;
 1193   }
 1194 
 1195   //----- build the mapping to Unicode -----
 1196 
 1197   // pass 1: use the name-to-Unicode mapping table
 1198   missing = hex = gFalse;
 1199   for (code = 0; code < 256; ++code) {
 1200     if ((charName = enc[code])) {
 1201       if (!(toUnicode[code] = globalParams->mapNameToUnicode(charName)) &&
 1202       strcmp(charName, ".notdef")) {
 1203     // if it wasn't in the name-to-Unicode table, check for a
 1204     // name that looks like 'Axx' or 'xx', where 'A' is any letter
 1205     // and 'xx' is two hex digits
 1206     if ((strlen(charName) == 3 &&
 1207          isalpha(charName[0] & 0xff) &&
 1208          isxdigit(charName[1] & 0xff) && isxdigit(charName[2] & 0xff) &&
 1209          ((charName[1] >= 'a' && charName[1] <= 'f') ||
 1210           (charName[1] >= 'A' && charName[1] <= 'F') ||
 1211           (charName[2] >= 'a' && charName[2] <= 'f') ||
 1212           (charName[2] >= 'A' && charName[2] <= 'F'))) ||
 1213         (strlen(charName) == 2 &&
 1214          isxdigit(charName[0] & 0xff) && isxdigit(charName[1] & 0xff) &&
 1215          ((charName[0] >= 'a' && charName[0] <= 'f') ||
 1216           (charName[0] >= 'A' && charName[0] <= 'F') ||
 1217           (charName[1] >= 'a' && charName[1] <= 'f') ||
 1218           (charName[1] >= 'A' && charName[1] <= 'F')))) {
 1219       hex = gTrue;
 1220     }
 1221     missing = gTrue;
 1222       }
 1223     } else {
 1224       toUnicode[code] = 0;
 1225     }
 1226   }
 1227 
 1228   // pass 2: try to fill in the missing chars, looking for names of
 1229   // any of the following forms:
 1230   // - 'xx'
 1231   // - 'Axx'
 1232   // - 'nn'
 1233   // - 'Ann'
 1234   // - 'ABnn'
 1235   // - 'unixxxx' (possibly followed by garbage - some Arabic files
 1236   //             use 'uni0628.medi', etc.)
 1237   // where 'A' and 'B' are any letters, 'xx' is two hex digits, 'xxxx'
 1238   // is four hex digits, and 'nn' is 2-4 decimal digits
 1239   usedNumericHeuristic = gFalse;
 1240   if (missing && globalParams->getMapNumericCharNames()) {
 1241     for (code = 0; code < 256; ++code) {
 1242       if ((charName = enc[code]) && !toUnicode[code] &&
 1243       strcmp(charName, ".notdef")) {
 1244     n = (int)strlen(charName);
 1245     code2 = -1;
 1246     if (hex && n == 3 && isalpha(charName[0] & 0xff) &&
 1247         isxdigit(charName[1] & 0xff) && isxdigit(charName[2] & 0xff)) {
 1248       sscanf(charName+1, "%x", &code2);
 1249     } else if (hex && n == 2 &&
 1250            isxdigit(charName[0] & 0xff) &&
 1251            isxdigit(charName[1] & 0xff)) {
 1252       sscanf(charName, "%x", &code2);
 1253     } else if (!hex && n >= 2 && n <= 4 &&
 1254            isdigit(charName[0] & 0xff) && isdigit(charName[1] & 0xff)) {
 1255       code2 = atoi(charName);
 1256     } else if (n >= 3 && n <= 5 &&
 1257            isdigit(charName[1] & 0xff) && isdigit(charName[2] & 0xff)) {
 1258       code2 = atoi(charName+1);
 1259     } else if (n >= 4 && n <= 6 &&
 1260            isdigit(charName[2] & 0xff) && isdigit(charName[3] & 0xff)) {
 1261       code2 = atoi(charName+2);
 1262     } else if (n >= 7 && charName[0] == 'u' && charName[1] == 'n' &&
 1263            charName[2] == 'i' &&
 1264            isxdigit(charName[3] & 0xff) &&
 1265            isxdigit(charName[4] & 0xff) &&
 1266            isxdigit(charName[5] & 0xff) &&
 1267            isxdigit(charName[6] & 0xff)) {
 1268       sscanf(charName + 3, "%x", &code2);
 1269     }
 1270     if (code2 >= 0 && code2 <= 0xffff) {
 1271       toUnicode[code] = (Unicode)code2;
 1272       usedNumericHeuristic = gTrue;
 1273     }
 1274       }
 1275     }
 1276 
 1277   // if the 'mapUnknownCharNames' flag is set, do a simple pass-through
 1278   // mapping for unknown character names
 1279   } else if (missing && globalParams->getMapUnknownCharNames()) {
 1280     for (code = 0; code < 256; ++code) {
 1281       if (!toUnicode[code]) {
 1282     toUnicode[code] = code;
 1283       }
 1284     }
 1285   }
 1286 
 1287   // construct the char code -> Unicode mapping object
 1288   ctu = CharCodeToUnicode::make8BitToUnicode(toUnicode);
 1289 
 1290   // merge in a ToUnicode CMap, if there is one -- this overwrites
 1291   // existing entries in ctu, i.e., the ToUnicode CMap takes
 1292   // precedence, but the other encoding info is allowed to fill in any
 1293   // holes
 1294   readToUnicodeCMap(fontDict, 8, ctu);
 1295 
 1296   // look for a Unicode-to-Unicode mapping
 1297   if (name && (utu = globalParams->getUnicodeToUnicode(name))) {
 1298     for (i = 0; i < 256; ++i) {
 1299       toUnicode[i] = 0;
 1300     }
 1301     ctu2 = CharCodeToUnicode::make8BitToUnicode(toUnicode);
 1302     for (i = 0; i < 256; ++i) {
 1303       n = ctu->mapToUnicode((CharCode)i, uBuf, 8);
 1304       if (n >= 1) {
 1305     n = utu->mapToUnicode((CharCode)uBuf[0], uBuf, 8);
 1306     if (n >= 1) {
 1307       ctu2->setMapping((CharCode)i, uBuf, n);
 1308     }
 1309       }
 1310     }
 1311     utu->decRefCnt();
 1312     delete ctu;
 1313     ctu = ctu2;
 1314   }
 1315 
 1316   //----- get the character widths -----
 1317 
 1318   // initialize all widths
 1319   for (code = 0; code < 256; ++code) {
 1320     widths[code] = missingWidth * 0.001;
 1321   }
 1322 
 1323   // use widths from font dict, if present
 1324   fontDict->lookup("FirstChar", &obj1);
 1325   firstChar = obj1.isInt() ? obj1.getInt() : 0;
 1326   obj1.free();
 1327   if (firstChar < 0 || firstChar > 255) {
 1328     firstChar = 0;
 1329   }
 1330   fontDict->lookup("LastChar", &obj1);
 1331   lastChar = obj1.isInt() ? obj1.getInt() : 255;
 1332   obj1.free();
 1333   if (lastChar < 0 || lastChar > 255) {
 1334     lastChar = 255;
 1335   }
 1336   mul = (type == fontType3) ? fontMat[0] : 0.001;
 1337   fontDict->lookup("Widths", &obj1);
 1338   if (obj1.isArray()) {
 1339     flags |= fontFixedWidth;
 1340     if (obj1.arrayGetLength() < lastChar - firstChar + 1) {
 1341       lastChar = firstChar + obj1.arrayGetLength() - 1;
 1342     }
 1343     for (code = firstChar; code <= lastChar; ++code) {
 1344       obj1.arrayGet(code - firstChar, &obj2);
 1345       if (obj2.isNum()) {
 1346     widths[code] = obj2.getNum() * mul;
 1347     if (fabs(widths[code] - widths[firstChar]) > 0.00001) {
 1348       flags &= ~fontFixedWidth;
 1349     }
 1350       }
 1351       obj2.free();
 1352     }
 1353 
 1354   // use widths from built-in font
 1355   } else if (builtinFont) {
 1356     // this is a kludge for broken PDF files that encode char 32
 1357     // as .notdef
 1358     if (builtinFont->widths->getWidth("space", &w)) {
 1359       widths[32] = 0.001 * w;
 1360     }
 1361     for (code = 0; code < 256; ++code) {
 1362       if (enc[code] && builtinFont->widths->getWidth(enc[code], &w)) {
 1363     widths[code] = 0.001 * w;
 1364       }
 1365     }
 1366 
 1367   // couldn't find widths -- use defaults 
 1368   } else {
 1369     // this is technically an error -- the Widths entry is required
 1370     // for all but the Base-14 fonts -- but certain PDF generators
 1371     // apparently don't include widths for Arial and TimesNewRoman
 1372     if (isFixedWidth()) {
 1373       i = 0;
 1374     } else if (isSerif()) {
 1375       i = 8;
 1376     } else {
 1377       i = 4;
 1378     }
 1379     if (isBold()) {
 1380       i += 2;
 1381     }
 1382     if (isItalic()) {
 1383       i += 1;
 1384     }
 1385     builtinFont = builtinFontSubst[i];
 1386     // this is a kludge for broken PDF files that encode char 32
 1387     // as .notdef
 1388     if (builtinFont->widths->getWidth("space", &w)) {
 1389       widths[32] = 0.001 * w;
 1390     }
 1391     for (code = 0; code < 256; ++code) {
 1392       if (enc[code] && builtinFont->widths->getWidth(enc[code], &w)) {
 1393     widths[code] = 0.001 * w;
 1394       }
 1395     }
 1396   }
 1397   obj1.free();
 1398 
 1399   ok = gTrue;
 1400 }
 1401 
 1402 Gfx8BitFont::~Gfx8BitFont() {
 1403   int i;
 1404 
 1405   for (i = 0; i < 256; ++i) {
 1406     if (encFree[i] && enc[i]) {
 1407       gfree(enc[i]);
 1408     }
 1409   }
 1410   ctu->decRefCnt();
 1411   if (charProcs.isDict()) {
 1412     charProcs.free();
 1413   }
 1414   if (resources.isDict()) {
 1415     resources.free();
 1416   }
 1417 }
 1418 
 1419 int Gfx8BitFont::getNextChar(char *s, int len, CharCode *code,
 1420                  Unicode *u, int uSize, int *uLen,
 1421                  double *dx, double *dy, double *ox, double *oy) {
 1422   CharCode c;
 1423 
 1424   *code = c = (CharCode)(*s & 0xff);
 1425   *uLen = ctu->mapToUnicode(c, u, uSize);
 1426   *dx = widths[c];
 1427   *dy = *ox = *oy = 0;
 1428   return 1;
 1429 }
 1430 
 1431 CharCodeToUnicode *Gfx8BitFont::getToUnicode() {
 1432   ctu->incRefCnt();
 1433   return ctu;
 1434 }
 1435 
 1436 int *Gfx8BitFont::getCodeToGIDMap(FoFiTrueType *ff) {
 1437   int *map;
 1438   int cmapPlatform, cmapEncoding;
 1439   int unicodeCmap, macRomanCmap, msSymbolCmap, cmap;
 1440   GBool nonsymbolic, useMacRoman, useUnicode;
 1441   char *charName;
 1442   Unicode u;
 1443   int code, i, n;
 1444 
 1445   map = (int *)gmallocn(256, sizeof(int));
 1446   for (i = 0; i < 256; ++i) {
 1447     map[i] = 0;
 1448   }
 1449 
 1450   // This is based on the cmap/encoding selection algorithm in the PDF
 1451   // 2.0 spec, but with some differences to match up with Adobe's
 1452   // behavior.
 1453   unicodeCmap = macRomanCmap = msSymbolCmap = -1;
 1454   for (i = 0; i < ff->getNumCmaps(); ++i) {
 1455     cmapPlatform = ff->getCmapPlatform(i);
 1456     cmapEncoding = ff->getCmapEncoding(i);
 1457     if ((cmapPlatform == 3 && cmapEncoding == 1) ||
 1458     (cmapPlatform == 0 && cmapEncoding <= 4)) {
 1459       unicodeCmap = i;
 1460     } else if (cmapPlatform == 1 && cmapEncoding == 0) {
 1461       macRomanCmap = i;
 1462     } else if (cmapPlatform == 3 && cmapEncoding == 0) {
 1463       msSymbolCmap = i;
 1464     }
 1465   }
 1466   useMacRoman = gFalse;
 1467   useUnicode = gFalse;
 1468   nonsymbolic = !(flags & fontSymbolic);
 1469   if (usesMacRomanEnc && macRomanCmap >= 0) {
 1470     cmap = macRomanCmap;
 1471     useMacRoman = gTrue;
 1472   } else if (embFontID.num < 0 && hasEncoding && unicodeCmap >= 0) { 
 1473     cmap = unicodeCmap;
 1474     useUnicode = gTrue;
 1475   } else if (nonsymbolic && unicodeCmap >= 0) {
 1476     cmap = unicodeCmap;
 1477     useUnicode = gTrue;
 1478   } else if (nonsymbolic && macRomanCmap >= 0) {
 1479     cmap = macRomanCmap;
 1480     useMacRoman = gTrue;
 1481   } else if (msSymbolCmap >= 0) {
 1482     cmap = msSymbolCmap;
 1483   } else if (unicodeCmap >= 0) {
 1484     cmap = unicodeCmap;
 1485   } else if (macRomanCmap >= 0) {
 1486     cmap = macRomanCmap;
 1487   } else {
 1488     cmap = 0;
 1489   }
 1490 
 1491   // reverse map the char names through MacRomanEncoding, then map the
 1492   // char codes through the cmap; fall back on Unicode if that doesn't
 1493   // work
 1494   if (useMacRoman) {
 1495     for (i = 0; i < 256; ++i) {
 1496       if ((charName = enc[i])) {
 1497     if ((code = globalParams->getMacRomanCharCode(charName))) {
 1498       map[i] = ff->mapCodeToGID(cmap, code);
 1499     } else if (unicodeCmap >= 0 &&
 1500            (u = globalParams->mapNameToUnicode(charName))) {
 1501       map[i] = ff->mapCodeToGID(unicodeCmap, u);
 1502     }
 1503       } else if (unicodeCmap >= 0 &&
 1504          (n = ctu->mapToUnicode((CharCode)i, &u, 1))) {
 1505     map[i] = ff->mapCodeToGID(cmap, u);
 1506       } else {
 1507     map[i] = -1;
 1508       }
 1509     }
 1510 
 1511   // map Unicode through the cmap
 1512   } else if (useUnicode) {
 1513     for (i = 0; i < 256; ++i) {
 1514       if (((charName = enc[i]) &&
 1515        (u = globalParams->mapNameToUnicode(charName))) ||
 1516       (n = ctu->mapToUnicode((CharCode)i, &u, 1))) {
 1517     map[i] = ff->mapCodeToGID(cmap, u);
 1518       } else {
 1519     map[i] = -1;
 1520       }
 1521     }
 1522 
 1523   // map the char codes through the cmap, possibly with an offset of
 1524   // 0xf000
 1525   } else {
 1526     for (i = 0; i < 256; ++i) {
 1527       if (!(map[i] = ff->mapCodeToGID(cmap, i))) {
 1528     map[i] = ff->mapCodeToGID(cmap, 0xf000 + i);
 1529       }
 1530     }
 1531   }
 1532 
 1533   // try the TrueType 'post' table to handle any unmapped characters
 1534   for (i = 0; i < 256; ++i) {
 1535     if (map[i] <= 0 && (charName = enc[i])) {
 1536       map[i] = ff->mapNameToGID(charName);
 1537     }
 1538   }
 1539 
 1540   return map;
 1541 }
 1542 
 1543 int *Gfx8BitFont::getCodeToGIDMap(FoFiType1C *ff) {
 1544   int *map;
 1545   GHash *nameToGID;
 1546   int i, gid;
 1547 
 1548   map = (int *)gmallocn(256, sizeof(int));
 1549   for (i = 0; i < 256; ++i) {
 1550     map[i] = 0;
 1551   }
 1552 
 1553   nameToGID = ff->getNameToGIDMap();
 1554   for (i = 0; i < 256; ++i) {
 1555     if (!enc[i]) {
 1556       continue;
 1557     }
 1558     gid = nameToGID->lookupInt(enc[i]);
 1559     if (gid < 0 || gid >= 65536) {
 1560       continue;
 1561     }
 1562     map[i] = gid;
 1563   }
 1564 
 1565   delete nameToGID;
 1566 
 1567   return map;
 1568 }
 1569 
 1570 Dict *Gfx8BitFont::getCharProcs() {
 1571   return charProcs.isDict() ? charProcs.getDict() : (Dict *)NULL;
 1572 }
 1573 
 1574 Object *Gfx8BitFont::getCharProc(int code, Object *proc) {
 1575   if (enc[code] && charProcs.isDict()) {
 1576     charProcs.dictLookup(enc[code], proc);
 1577   } else {
 1578     proc->initNull();
 1579   }
 1580   return proc;
 1581 }
 1582 
 1583 Object *Gfx8BitFont::getCharProcNF(int code, Object *proc) {
 1584   if (enc[code] && charProcs.isDict()) {
 1585     charProcs.dictLookupNF(enc[code], proc);
 1586   } else {
 1587     proc->initNull();
 1588   }
 1589   return proc;
 1590 }
 1591 
 1592 Dict *Gfx8BitFont::getResources() {
 1593   return resources.isDict() ? resources.getDict() : (Dict *)NULL;
 1594 }
 1595 
 1596 GBool Gfx8BitFont::problematicForUnicode() {
 1597   GString *nameLC;
 1598   GBool symbolic;
 1599 
 1600   // potential inputs:
 1601   // - font is embedded (GfxFont.embFontID.num >= 0)
 1602   // - font name (GfxFont.name)
 1603   // - font type (GfxFont.type)
 1604   // - Base-14 font (Gfx8BitFont.base14 != NULL)
 1605   // - symbolic (GfxFont.flags & fontSymbolic)
 1606   // - has Encoding array (Gfx8BitFont.hasEncoding)
 1607   // - extracted base encoding from embedded font file
 1608   //   (Gfx8BitFont.baseEncFromFontFile)
 1609   // - has a ToUnicode map (GfxFont.hasToUnicode)
 1610   // - used the numeric glyph name heuristic
 1611   //   (Gfx8BitFont.usedNumericHeuristic)
 1612 
 1613   if (name) {
 1614     nameLC = name->copy();
 1615     nameLC->lowerCase();
 1616     symbolic = strstr(nameLC->getCString(), "dingbat") ||
 1617                strstr(nameLC->getCString(), "wingding") ||
 1618                strstr(nameLC->getCString(), "commpi");
 1619     delete nameLC;
 1620     if (symbolic) {
 1621       return gFalse;
 1622     }
 1623   }
 1624 
 1625   if (embFontID.num >= 0) {
 1626     switch (type) {
 1627     case fontType1:
 1628     case fontType1C:
 1629     case fontType1COT:
 1630       return !hasToUnicode && (!hasEncoding || usedNumericHeuristic);
 1631 
 1632     case fontType3:
 1633       return !hasToUnicode && !hasEncoding;
 1634 
 1635     case fontTrueType:
 1636     case fontTrueTypeOT:
 1637       return !hasToUnicode && !hasEncoding;
 1638 
 1639     default:
 1640       return !hasToUnicode;
 1641     }
 1642 
 1643   } else {
 1644     // NB: type will be fontTypeUnknown if the PDF specifies an
 1645     // invalid font type -- which is ok, if we have a ToUnicode map or
 1646     // an encoding
 1647     return !hasToUnicode && !hasEncoding;
 1648   }
 1649 }
 1650 
 1651 //------------------------------------------------------------------------
 1652 // GfxCIDFont
 1653 //------------------------------------------------------------------------
 1654 
 1655 GfxCIDFont::GfxCIDFont(XRef *xref, const char *tagA, Ref idA, GString *nameA,
 1656                GfxFontType typeA, Ref embFontIDA, Dict *fontDict):
 1657   GfxFont(tagA, idA, nameA, typeA, embFontIDA)
 1658 {
 1659   Dict *desFontDict;
 1660   Object desFontDictObj;
 1661   Object obj1, obj2, obj3, obj4, obj5, obj6;
 1662   CharCodeToUnicode *utu;
 1663   CharCode c;
 1664   Unicode uBuf[8];
 1665   int c1, c2;
 1666   int excepsSize, i, j, k, n;
 1667 
 1668   missingWidth = 0;
 1669   ascent = 0.95;
 1670   descent = -0.35;
 1671   declaredAscent = ascent;
 1672   fontBBox[0] = fontBBox[1] = fontBBox[2] = fontBBox[3] = 0;
 1673   collection = NULL;
 1674   cMap = NULL;
 1675   ctu = NULL;
 1676   ctuUsesCharCode = gTrue;
 1677   widths.defWidth = 1.0;
 1678   widths.defHeight = -1.0;
 1679   widths.defVY = 0.880;
 1680   widths.exceps = NULL;
 1681   widths.nExceps = 0;
 1682   widths.excepsV = NULL;
 1683   widths.nExcepsV = 0;
 1684   cidToGID = NULL;
 1685   cidToGIDLen = 0;
 1686 
 1687   // get the descendant font
 1688   if (!fontDict->lookup("DescendantFonts", &obj1)->isArray() ||
 1689       obj1.arrayGetLength() == 0) {
 1690     error(errSyntaxError, -1,
 1691       "Missing or empty DescendantFonts entry in Type 0 font");
 1692     obj1.free();
 1693     goto err1;
 1694   }
 1695   if (!obj1.arrayGet(0, &desFontDictObj)->isDict()) {
 1696     error(errSyntaxError, -1, "Bad descendant font in Type 0 font");
 1697     goto err2;
 1698   }
 1699   obj1.free();
 1700   desFontDict = desFontDictObj.getDict();
 1701 
 1702   // get info from font descriptor
 1703   readFontDescriptor(xref, desFontDict);
 1704 
 1705   //----- encoding info -----
 1706 
 1707   // char collection
 1708   if (!desFontDict->lookup("CIDSystemInfo", &obj1)->isDict()) {
 1709     error(errSyntaxError, -1,
 1710       "Missing CIDSystemInfo dictionary in Type 0 descendant font");
 1711     goto err2;
 1712   }
 1713   obj1.dictLookup("Registry", &obj2);
 1714   obj1.dictLookup("Ordering", &obj3);
 1715   if (!obj2.isString() || !obj3.isString()) {
 1716     error(errSyntaxError, -1,
 1717       "Invalid CIDSystemInfo dictionary in Type 0 descendant font");
 1718     goto err3;
 1719   }
 1720   collection = obj2.getString()->copy()->append('-')->append(obj3.getString());
 1721   obj3.free();
 1722   obj2.free();
 1723   obj1.free();
 1724 
 1725   // encoding (i.e., CMap)
 1726   if (fontDict->lookup("Encoding", &obj1)->isNull()) {
 1727     error(errSyntaxError, -1, "Missing Encoding entry in Type 0 font");
 1728     goto err2;
 1729   }
 1730   if (!(cMap = CMap::parse(NULL, collection, &obj1))) {
 1731     goto err2;
 1732   }
 1733 
 1734   // check for fonts that use the Identity-H encoding (cmap), and the
 1735   // Adobe-Identity character collection
 1736   identityEnc = obj1.isName("Identity-H") &&
 1737                 !collection->cmp("Adobe-Identity");
 1738 
 1739   obj1.free();
 1740 
 1741   // CIDToGIDMap
 1742   // (the PDF 1.7 spec only allows these for TrueType fonts, but
 1743   // Acrobat apparently also allows them for OpenType CFF fonts -- and
 1744   // the PDF 2.0 spec has removed the prohibition)
 1745   hasIdentityCIDToGID = gFalse;
 1746   desFontDict->lookup("CIDToGIDMap", &obj1);
 1747   if (obj1.isStream()) {
 1748     cidToGIDLen = 0;
 1749     i = 64;
 1750     cidToGID = (int *)gmallocn(i, sizeof(int));
 1751     obj1.streamReset();
 1752     while ((c1 = obj1.streamGetChar()) != EOF &&
 1753        (c2 = obj1.streamGetChar()) != EOF) {
 1754       if (cidToGIDLen == i) {
 1755     i *= 2;
 1756     cidToGID = (int *)greallocn(cidToGID, i, sizeof(int));
 1757       }
 1758       cidToGID[cidToGIDLen++] = (c1 << 8) + c2;
 1759     }
 1760     obj1.streamClose();
 1761     identityEnc = gFalse;
 1762   } else if (obj1.isName("Identity")) {
 1763     hasIdentityCIDToGID = gTrue;
 1764   } else if (!obj1.isNull()) {
 1765     error(errSyntaxError, -1, "Invalid CIDToGIDMap entry in CID font");
 1766   }
 1767   obj1.free();
 1768 
 1769   // look for a ToUnicode CMap
 1770   hasKnownCollection = gFalse;
 1771   if (globalParams->getUseTrueTypeUnicodeMapping()) {
 1772     readTrueTypeUnicodeMapping(xref);
 1773   }
 1774   if (!ctu) {
 1775     ctu = readToUnicodeCMap(fontDict, 16, NULL);
 1776   }
 1777   if (!ctu) {
 1778     ctuUsesCharCode = gFalse;
 1779 
 1780     // use an identity mapping for the "Adobe-Identity" and
 1781     // "Adobe-UCS" collections
 1782     if (!collection->cmp("Adobe-Identity") ||
 1783     !collection->cmp("Adobe-UCS")) {
 1784       ctu = CharCodeToUnicode::makeIdentityMapping();
 1785 
 1786     // look for a user-supplied .cidToUnicode file
 1787     } else if ((ctu = globalParams->getCIDToUnicode(collection))) {
 1788       hasKnownCollection = gTrue;
 1789 
 1790     } else {
 1791       error(errSyntaxError, -1,
 1792         "Unknown character collection '{0:t}'", collection);
 1793 
 1794       // fall back to an identity mapping
 1795       ctu = CharCodeToUnicode::makeIdentityMapping();
 1796     }
 1797   }
 1798 
 1799   // look for a Unicode-to-Unicode mapping
 1800   if (name && (utu = globalParams->getUnicodeToUnicode(name))) {
 1801     if (ctu) {
 1802       if (ctu->isIdentity()) {
 1803     ctu->decRefCnt();
 1804     ctu = utu;
 1805       } else {
 1806     for (c = 0; c < ctu->getLength(); ++c) {
 1807       n = ctu->mapToUnicode(c, uBuf, 8);
 1808       if (n >= 1) {
 1809         n = utu->mapToUnicode((CharCode)uBuf[0], uBuf, 8);
 1810         if (n >= 1) {
 1811           ctu->setMapping(c, uBuf, n);
 1812         }
 1813       }
 1814     }
 1815     utu->decRefCnt();
 1816       }
 1817     } else {
 1818       ctu = utu;
 1819     }
 1820   }
 1821 
 1822   //----- character metrics -----
 1823 
 1824   // default char width
 1825   if (desFontDict->lookup("DW", &obj1)->isNum()) {
 1826     widths.defWidth = obj1.getNum() * 0.001;
 1827   }
 1828   obj1.free();
 1829 
 1830   // char width exceptions
 1831   if (desFontDict->lookup("W", &obj1)->isArray()) {
 1832     excepsSize = 0;
 1833     i = 0;
 1834     while (i + 1 < obj1.arrayGetLength()) {
 1835       obj1.arrayGet(i, &obj2);
 1836       obj1.arrayGet(i + 1, &obj3);
 1837       if (obj2.isInt() && obj3.isInt() && i + 2 < obj1.arrayGetLength()) {
 1838     if (obj1.arrayGet(i + 2, &obj4)->isNum()) {
 1839       if (widths.nExceps == excepsSize) {
 1840         excepsSize += 16;
 1841         widths.exceps = (GfxFontCIDWidthExcep *)
 1842           greallocn(widths.exceps,
 1843             excepsSize, sizeof(GfxFontCIDWidthExcep));
 1844       }
 1845       widths.exceps[widths.nExceps].first = obj2.getInt();
 1846       widths.exceps[widths.nExceps].last = obj3.getInt();
 1847       widths.exceps[widths.nExceps].width = obj4.getNum() * 0.001;
 1848       ++widths.nExceps;
 1849     } else {
 1850       error(errSyntaxError, -1, "Bad widths array in Type 0 font");
 1851     }
 1852     obj4.free();
 1853     i += 3;
 1854       } else if (obj2.isInt() && obj3.isArray()) {
 1855     if (widths.nExceps + obj3.arrayGetLength() > excepsSize) {
 1856       excepsSize = (widths.nExceps + obj3.arrayGetLength() + 15) & ~15;
 1857       widths.exceps = (GfxFontCIDWidthExcep *)
 1858         greallocn(widths.exceps,
 1859               excepsSize, sizeof(GfxFontCIDWidthExcep));
 1860     }
 1861     j = obj2.getInt();
 1862     for (k = 0; k < obj3.arrayGetLength(); ++k) {
 1863       if (obj3.arrayGet(k, &obj4)->isNum()) {
 1864         widths.exceps[widths.nExceps].first = j;
 1865         widths.exceps[widths.nExceps].last = j;
 1866         widths.exceps[widths.nExceps].width = obj4.getNum() * 0.001;
 1867         ++j;
 1868         ++widths.nExceps;
 1869       } else {
 1870         error(errSyntaxError, -1, "Bad widths array in Type 0 font");
 1871       }
 1872       obj4.free();
 1873     }
 1874     i += 2;
 1875       } else {
 1876     error(errSyntaxError, -1, "Bad widths array in Type 0 font");
 1877     ++i;
 1878       }
 1879       obj3.free();
 1880       obj2.free();
 1881     }
 1882   }
 1883   obj1.free();
 1884 
 1885   // default metrics for vertical font
 1886   if (desFontDict->lookup("DW2", &obj1)->isArray() &&
 1887       obj1.arrayGetLength() == 2) {
 1888     if (obj1.arrayGet(0, &obj2)->isNum()) {
 1889       widths.defVY = obj2.getNum() * 0.001;
 1890     }
 1891     obj2.free();
 1892     if (obj1.arrayGet(1, &obj2)->isNum()) {
 1893       widths.defHeight = obj2.getNum() * 0.001;
 1894     }
 1895     obj2.free();
 1896   }
 1897   obj1.free();
 1898 
 1899   // char metric exceptions for vertical font
 1900   if (desFontDict->lookup("W2", &obj1)->isArray()) {
 1901     excepsSize = 0;
 1902     i = 0;
 1903     while (i + 1 < obj1.arrayGetLength()) {
 1904       obj1.arrayGet(i, &obj2);
 1905       obj1.arrayGet(i+ 1, &obj3);
 1906       if (obj2.isInt() && obj3.isInt() && i + 4 < obj1.arrayGetLength()) {
 1907     if (obj1.arrayGet(i + 2, &obj4)->isNum() &&
 1908         obj1.arrayGet(i + 3, &obj5)->isNum() &&
 1909         obj1.arrayGet(i + 4, &obj6)->isNum()) {
 1910       if (widths.nExcepsV == excepsSize) {
 1911         excepsSize += 16;
 1912         widths.excepsV = (GfxFontCIDWidthExcepV *)
 1913           greallocn(widths.excepsV,
 1914             excepsSize, sizeof(GfxFontCIDWidthExcepV));
 1915       }
 1916       widths.excepsV[widths.nExcepsV].first = obj2.getInt();
 1917       widths.excepsV[widths.nExcepsV].last = obj3.getInt();
 1918       widths.excepsV[widths.nExcepsV].height = obj4.getNum() * 0.001;
 1919       widths.excepsV[widths.nExcepsV].vx = obj5.getNum() * 0.001;
 1920       widths.excepsV[widths.nExcepsV].vy = obj6.getNum() * 0.001;
 1921       ++widths.nExcepsV;
 1922     } else {
 1923       error(errSyntaxError, -1, "Bad widths (W2) array in Type 0 font");
 1924     }
 1925     obj6.free();
 1926     obj5.free();
 1927     obj4.free();
 1928     i += 5;
 1929       } else if (obj2.isInt() && obj3.isArray()) {
 1930     if (widths.nExcepsV + obj3.arrayGetLength() / 3 > excepsSize) {
 1931       excepsSize =
 1932         (widths.nExcepsV + obj3.arrayGetLength() / 3 + 15) & ~15;
 1933       widths.excepsV = (GfxFontCIDWidthExcepV *)
 1934         greallocn(widths.excepsV,
 1935               excepsSize, sizeof(GfxFontCIDWidthExcepV));
 1936     }
 1937     j = obj2.getInt();
 1938     for (k = 0; k + 2 < obj3.arrayGetLength(); k += 3) {
 1939       if (obj3.arrayGet(k, &obj4)->isNum() &&
 1940           obj3.arrayGet(k+1, &obj5)->isNum() &&
 1941           obj3.arrayGet(k+2, &obj6)->isNum()) {
 1942         widths.excepsV[widths.nExcepsV].first = j;
 1943         widths.excepsV[widths.nExcepsV].last = j;
 1944         widths.excepsV[widths.nExcepsV].height = obj4.getNum() * 0.001;
 1945         widths.excepsV[widths.nExcepsV].vx = obj5.getNum() * 0.001;
 1946         widths.excepsV[widths.nExcepsV].vy = obj6.getNum() * 0.001;
 1947         ++j;
 1948         ++widths.nExcepsV;
 1949       } else {
 1950         error(errSyntaxError, -1, "Bad widths (W2) array in Type 0 font");
 1951       }
 1952       obj6.free();
 1953       obj5.free();
 1954       obj4.free();
 1955     }
 1956     i += 2;
 1957       } else {
 1958     error(errSyntaxError, -1, "Bad widths (W2) array in Type 0 font");
 1959     ++i;
 1960       }
 1961       obj3.free();
 1962       obj2.free();
 1963     }
 1964   }
 1965   obj1.free();
 1966 
 1967   desFontDictObj.free();
 1968   ok = gTrue;
 1969   return;
 1970 
 1971  err3:
 1972   obj3.free();
 1973   obj2.free();
 1974  err2:
 1975   obj1.free();
 1976   desFontDictObj.free();
 1977  err1:
 1978   error(errSyntaxError, -1, "Failed to parse font object for '{0:t}'", name);
 1979 }
 1980 
 1981 GfxCIDFont::~GfxCIDFont() {
 1982   if (collection) {
 1983     delete collection;
 1984   }
 1985   if (cMap) {
 1986     cMap->decRefCnt();
 1987   }
 1988   if (ctu) {
 1989     ctu->decRefCnt();
 1990   }
 1991   gfree(widths.exceps);
 1992   gfree(widths.excepsV);
 1993   if (cidToGID) {
 1994     gfree(cidToGID);
 1995   }
 1996 }
 1997 
 1998 // Construct a code-to-Unicode mapping, based on the TrueType Unicode
 1999 // cmap (if present).  Constructs ctu if succesful; leaves ctu = null
 2000 // otherwise.  Always leaves ctu = null for non-TrueType fonts.
 2001 void GfxCIDFont::readTrueTypeUnicodeMapping(XRef *xref) {
 2002   char *buf;
 2003   FoFiTrueType *ff;
 2004   Unicode *gidToUnicode, *codeToUnicode;
 2005   Unicode u;
 2006   int bufLen, cmapPlatform, cmapEncoding, unicodeCmap;
 2007   int nGlyphs, nMappings, gid, i;
 2008 
 2009   // must be an embedded TrueType font, with an unknown char collection
 2010   if ((type != fontCIDType2 && type == fontCIDType2OT) ||
 2011       embFontID.num < 0 ||
 2012       hasKnownCollection) {
 2013     goto err0;
 2014   }
 2015 
 2016   // read the embedded font and construct a FoFiTrueType
 2017   if (!(buf = readEmbFontFile(xref, &bufLen))) {
 2018     goto err0;
 2019   }
 2020   if (!(ff = FoFiTrueType::make(buf, bufLen, 0))) {
 2021     goto err1;
 2022   }
 2023 
 2024   // find the TrueType Unicode cmap
 2025   unicodeCmap = -1;
 2026   for (i = 0; i < ff->getNumCmaps(); ++i) {
 2027     cmapPlatform = ff->getCmapPlatform(i);
 2028     cmapEncoding = ff->getCmapEncoding(i);
 2029     if ((cmapPlatform == 3 && cmapEncoding == 1) ||
 2030     (cmapPlatform == 0 && cmapEncoding <= 4)) {
 2031       unicodeCmap = i;
 2032       break;
 2033     }
 2034   }
 2035   if (unicodeCmap < 0) {
 2036     goto err2;
 2037   }
 2038 
 2039   // construct reverse GID-to-Unicode map
 2040   nGlyphs = ff->getNumGlyphs();
 2041   gidToUnicode = (Unicode *)gmallocn(nGlyphs, sizeof(Unicode));
 2042   memset(gidToUnicode, 0, nGlyphs * sizeof(Unicode));
 2043   nMappings = 0;
 2044   for (u = 1; u <= 0xffff; ++u) {
 2045     gid = ff->mapCodeToGID(unicodeCmap, (int)u);
 2046     if (gid > 0 && gid < nGlyphs) {
 2047       gidToUnicode[gid] = u;
 2048       ++nMappings;
 2049     }
 2050   }
 2051   // bail out if the Unicode cmap was completely empty
 2052   if (nMappings == 0) {
 2053     goto err3;
 2054   }
 2055 
 2056   // construct code-to-Unicode map
 2057   codeToUnicode = (Unicode *)gmallocn(65536, sizeof(Unicode));
 2058   memset(codeToUnicode, 0, 65536 * sizeof(Unicode));
 2059   for (i = 0; i <= 0xffff; ++i) {
 2060     // we've already checked for an identity encoding, so CID = i
 2061     if (cidToGID && i < cidToGIDLen) {
 2062       gid = cidToGID[i];
 2063     } else {
 2064       gid = i;
 2065     }
 2066     if (gid < nGlyphs && gidToUnicode[gid] > 0) {
 2067       codeToUnicode[i] = gidToUnicode[gid];
 2068     }
 2069   }
 2070   ctu = CharCodeToUnicode::make16BitToUnicode(codeToUnicode);
 2071 
 2072   gfree(codeToUnicode);
 2073  err3:
 2074   gfree(gidToUnicode);
 2075  err2:
 2076   delete ff;
 2077  err1:
 2078   gfree(buf);
 2079  err0:
 2080   return;
 2081 }
 2082 
 2083 int GfxCIDFont::getNextChar(char *s, int len, CharCode *code,
 2084                 Unicode *u, int uSize, int *uLen,
 2085                 double *dx, double *dy, double *ox, double *oy) {
 2086   CID cid;
 2087   CharCode c;
 2088   int n;
 2089 
 2090   if (!cMap) {
 2091     *code = 0;
 2092     *uLen = 0;
 2093     *dx = *dy = 0;
 2094     return 1;
 2095   }
 2096 
 2097   *code = (CharCode)(cid = cMap->getCID(s, len, &c, &n));
 2098   if (ctu) {
 2099     *uLen = ctu->mapToUnicode(ctuUsesCharCode ? c : cid, u, uSize);
 2100   } else {
 2101     *uLen = 0;
 2102   }
 2103   if (!*uLen && uSize >= 1 && globalParams->getMapUnknownCharNames()) {
 2104     u[0] = *code;
 2105     *uLen = 1;
 2106   }
 2107 
 2108   // horizontal
 2109   if (cMap->getWMode() == 0) {
 2110     getHorizontalMetrics(cid, dx);
 2111     *dy = *ox = *oy = 0;
 2112 
 2113   // vertical
 2114   } else {
 2115     getVerticalMetrics(cid, dy, ox, oy);
 2116     *dx = 0;
 2117   }
 2118 
 2119   return n;
 2120 }
 2121 
 2122 // NB: Section 9.7.4.3 in the PDF 2.0 spec says that, in the case of
 2123 // duplicate entries in the metrics, the first entry should be used.
 2124 // This means we need to leave the metrics in the original order and
 2125 // perform a linear search.  (Or use a more complex data structure.)
 2126 void GfxCIDFont::getHorizontalMetrics(CID cid, double *w) {
 2127   int i;
 2128   for (i = 0; i < widths.nExceps; ++i) {
 2129     if (widths.exceps[i].first <= cid && cid <= widths.exceps[i].last) {
 2130       *w = widths.exceps[i].width;
 2131       return;
 2132     }
 2133   }
 2134   *w = widths.defWidth;
 2135 }
 2136 
 2137 // NB: Section 9.7.4.3 in the PDF 2.0 spec says that, in the case of
 2138 // duplicate entries in the metrics, the first entry should be used.
 2139 // This means we need to leave the metrics in the original order and
 2140 // perform a linear search.  (Or use a more complex data structure.)
 2141 void GfxCIDFont::getVerticalMetrics(CID cid, double *h,
 2142                     double *vx, double *vy) {
 2143   int i;
 2144   for (i = 0; i < widths.nExcepsV; ++i) {
 2145     if (widths.excepsV[i].first <= cid && cid <= widths.excepsV[i].last) {
 2146       *h = widths.excepsV[i].height;
 2147       *vx = widths.excepsV[i].vx;
 2148       *vy = widths.excepsV[i].vy;
 2149       return;
 2150     }
 2151   }
 2152   *h = widths.defHeight;
 2153   getHorizontalMetrics(cid, vx);
 2154   *vx /= 2;
 2155   *vy = widths.defVY;
 2156 }
 2157 
 2158 int GfxCIDFont::getWMode() {
 2159   return cMap ? cMap->getWMode() : 0;
 2160 }
 2161 
 2162 CharCodeToUnicode *GfxCIDFont::getToUnicode() {
 2163   if (ctu) {
 2164     ctu->incRefCnt();
 2165   }
 2166   return ctu;
 2167 }
 2168 
 2169 GString *GfxCIDFont::getCollection() {
 2170   return cMap ? cMap->getCollection() : (GString *)NULL;
 2171 }
 2172 
 2173 double GfxCIDFont::getWidth(CID cid) {
 2174   double w;
 2175 
 2176   getHorizontalMetrics(cid, &w);
 2177   return w;
 2178 }
 2179 
 2180 GBool GfxCIDFont::problematicForUnicode() {
 2181   GString *nameLC;
 2182   GBool symbolic;
 2183 
 2184   // potential inputs:
 2185   // - font is embedded (GfxFont.embFontID.num >= 0)
 2186   // - font name (GfxFont.name)
 2187   // - font type (GfxFont.type)
 2188   // - symbolic (GfxFont.flags & fontSymbolic)
 2189   // - has a ToUnicode map (GfxFont.hasToUnicode)
 2190   // - collection is Adobe-Identity or Adobe-UCS
 2191   //   (GfxCIDFont.collection - compare string)
 2192   // - collection is known AdobeCJK (GfxCIDFont.hasKnownCollection)
 2193   // - has non-Identity CIDToGIDMap (GfxCIDFont.cidToGID != NULL)
 2194   // - has Identity CIDToGIDMap (GfxCIDFont.hasIdentityCIDToGID)
 2195 
 2196   if (name) {
 2197     nameLC = name->copy();
 2198     nameLC->lowerCase();
 2199     symbolic = strstr(nameLC->getCString(), "dingbat") ||
 2200                strstr(nameLC->getCString(), "wingding") ||
 2201                strstr(nameLC->getCString(), "commpi");
 2202     delete nameLC;
 2203     if (symbolic) {
 2204       return gFalse;
 2205     }
 2206   }
 2207 
 2208   if (embFontID.num >= 0) {
 2209     switch (type) {
 2210     case fontCIDType0:
 2211     case fontCIDType0C:
 2212     case fontCIDType0COT:
 2213       return !hasToUnicode && !hasKnownCollection;
 2214 
 2215     case fontCIDType2:
 2216     case fontCIDType2OT:
 2217       return !hasToUnicode && !hasKnownCollection;
 2218 
 2219     default:
 2220       return !hasToUnicode;
 2221     }
 2222 
 2223   } else {
 2224     return !hasToUnicode;
 2225   }
 2226 }
 2227 
 2228 //------------------------------------------------------------------------
 2229 // GfxFontDict
 2230 //------------------------------------------------------------------------
 2231 
 2232 GfxFontDict::GfxFontDict(XRef *xref, Ref *fontDictRef, Dict *fontDict) {
 2233   GfxFont *font;
 2234   char *tag;
 2235   Object obj1, obj2;
 2236   Ref r;
 2237   int i;
 2238 
 2239   fonts = new GHash(gTrue);
 2240   uniqueFonts = new GList();
 2241   for (i = 0; i < fontDict->getLength(); ++i) {
 2242     tag = fontDict->getKey(i);
 2243     fontDict->getValNF(i, &obj1);
 2244     obj1.fetch(xref, &obj2);
 2245     if (!obj2.isDict()) {
 2246       error(errSyntaxError, -1, "font resource is not a dictionary");
 2247     } else if (obj1.isRef() && (font = lookupByRef(obj1.getRef()))) {
 2248       fonts->add(new GString(tag), font);
 2249     } else {
 2250       if (obj1.isRef()) {
 2251     r = obj1.getRef();
 2252       } else if (fontDictRef) {
 2253     // legal generation numbers are five digits, so we use a
 2254     // 6-digit number here
 2255     r.gen = 100000 + fontDictRef->num;
 2256     r.num = i;
 2257       } else {
 2258     // no indirect reference for this font, or for the containing
 2259     // font dict, so hash the font and use that
 2260     r.gen = 100000;
 2261     r.num = hashFontObject(&obj2);
 2262       }
 2263       if ((font = GfxFont::makeFont(xref, tag, r, obj2.getDict()))) {
 2264     if (!font->isOk()) {
 2265       delete font;
 2266     } else {
 2267       uniqueFonts->append(font);
 2268       fonts->add(new GString(tag), font);
 2269     }
 2270       }
 2271     }
 2272     obj1.free();
 2273     obj2.free();
 2274   }
 2275 }
 2276 
 2277 GfxFontDict::~GfxFontDict() {
 2278   deleteGList(uniqueFonts, GfxFont);
 2279   delete fonts;
 2280 }
 2281 
 2282 GfxFont *GfxFontDict::lookup(char *tag) {
 2283   return (GfxFont *)fonts->lookup(tag);
 2284 }
 2285 
 2286 GfxFont *GfxFontDict::lookupByRef(Ref ref) {
 2287   GfxFont *font;
 2288   int i;
 2289 
 2290   for (i = 0; i < uniqueFonts->getLength(); ++i) {
 2291     font = (GfxFont *)uniqueFonts->get(i);
 2292     if (font->getID()->num == ref.num &&
 2293     font->getID()->gen == ref.gen) {
 2294       return font;
 2295     }
 2296   }
 2297   return NULL;
 2298 }
 2299 
 2300 int GfxFontDict::getNumFonts() {
 2301   return uniqueFonts->getLength();
 2302 }
 2303 
 2304 GfxFont *GfxFontDict::getFont(int i) {
 2305   return (GfxFont *)uniqueFonts->get(i);
 2306 }
 2307 
 2308 // FNV-1a hash
 2309 class FNVHash {
 2310 public:
 2311 
 2312   FNVHash() {
 2313     h = 2166136261U;
 2314   }
 2315 
 2316   void hash(char c) {
 2317     h ^= c & 0xff;
 2318     h *= 16777619;
 2319   }
 2320 
 2321   void hash(char *p, int n) {
 2322     int i;
 2323     for (i = 0; i < n; ++i) {
 2324       hash(p[i]);
 2325     }
 2326   }
 2327 
 2328   int get31() {
 2329     return (h ^ (h >> 31)) & 0x7fffffff;
 2330   }
 2331 
 2332 private:
 2333 
 2334   Guint h;
 2335 };
 2336 
 2337 int GfxFontDict::hashFontObject(Object *obj) {
 2338   FNVHash h;
 2339 
 2340   hashFontObject1(obj, &h);
 2341   return h.get31();
 2342 }
 2343 
 2344 void GfxFontDict::hashFontObject1(Object *obj, FNVHash *h) {
 2345   Object obj2;
 2346   GString *s;
 2347   char *p;
 2348   double r;
 2349   int n, i;
 2350 
 2351   switch (obj->getType()) {
 2352   case objBool:
 2353     h->hash('b');
 2354     h->hash(obj->getBool() ? 1 : 0);
 2355     break;
 2356   case objInt:
 2357     h->hash('i');
 2358     n = obj->getInt();
 2359     h->hash((char *)&n, sizeof(int));
 2360     break;
 2361   case objReal:
 2362     h->hash('r');
 2363     r = obj->getReal();
 2364     h->hash((char *)&r, sizeof(double));
 2365     break;
 2366   case objString:
 2367     h->hash('s');
 2368     s = obj->getString();
 2369     h->hash(s->getCString(), s->getLength());
 2370     break;
 2371   case objName:
 2372     h->hash('n');
 2373     p = obj->getName();
 2374     h->hash(p, (int)strlen(p));
 2375     break;
 2376   case objNull:
 2377     h->hash('z');
 2378     break;
 2379   case objArray:
 2380     h->hash('a');
 2381     n = obj->arrayGetLength();
 2382     h->hash((char *)&n, sizeof(int));
 2383     for (i = 0; i < n; ++i) {
 2384       obj->arrayGetNF(i, &obj2);
 2385       hashFontObject1(&obj2, h);
 2386       obj2.free();
 2387     }
 2388     break;
 2389   case objDict:
 2390     h->hash('d');
 2391     n = obj->dictGetLength();
 2392     h->hash((char *)&n, sizeof(int));
 2393     for (i = 0; i < n; ++i) {
 2394       p = obj->dictGetKey(i);
 2395       h->hash(p, (int)strlen(p));
 2396       obj->dictGetValNF(i, &obj2);
 2397       hashFontObject1(&obj2, h);
 2398       obj2.free();
 2399     }
 2400     break;
 2401   case objStream:
 2402     // this should never happen - streams must be indirect refs
 2403     break;
 2404   case objRef:
 2405     h->hash('f');
 2406     n = obj->getRefNum();
 2407     h->hash((char *)&n, sizeof(int));
 2408     n = obj->getRefGen();
 2409     h->hash((char *)&n, sizeof(int));
 2410     break;
 2411   default:
 2412     h->hash('u');
 2413     break;
 2414   }
 2415 }