"Fossies" - the Fresh Open Source Software Archive

Member "xpdf-4.04/xpdf/GfxFont.cc" (18 Apr 2022, 65646 Bytes) of package /linux/misc/xpdf-4.04.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 //========================================================================
    2 //
    3 // GfxFont.cc
    4 //
    5 // Copyright 1996-2003 Glyph & Cog, LLC
    6 //
    7 //========================================================================
    8 
    9 #include <aconf.h>
   10 
   11 #ifdef USE_GCC_PRAGMAS
   12 #pragma implementation
   13 #endif
   14 
   15 #include <stdio.h>
   16 #include <stdlib.h>
   17 #include <string.h>
   18 #include <ctype.h>
   19 #include <math.h>
   20 #include <limits.h>
   21 #include "gmem.h"
   22 #include "gmempp.h"
   23 #include "GList.h"
   24 #include "GHash.h"
   25 #include "Error.h"
   26 #include "Object.h"
   27 #include "Dict.h"
   28 #include "GlobalParams.h"
   29 #include "CMap.h"
   30 #include "CharCodeToUnicode.h"
   31 #include "FontEncodingTables.h"
   32 #include "BuiltinFontTables.h"
   33 #include "FoFiIdentifier.h"
   34 #include "FoFiType1.h"
   35 #include "FoFiType1C.h"
   36 #include "FoFiTrueType.h"
   37 #include "GfxFont.h"
   38 
   39 //------------------------------------------------------------------------
   40 
   41 struct Base14FontMapEntry {
   42   const char *altName;
   43   const char *base14Name;
   44 };
   45 
   46 static Base14FontMapEntry base14FontMap[] = {
   47   { "Arial",                        "Helvetica" },
   48   { "Arial,Bold",                   "Helvetica-Bold" },
   49   { "Arial,BoldItalic",             "Helvetica-BoldOblique" },
   50   { "Arial,Italic",                 "Helvetica-Oblique" },
   51   { "Arial-Bold",                   "Helvetica-Bold" },
   52   { "Arial-BoldItalic",             "Helvetica-BoldOblique" },
   53   { "Arial-BoldItalicMT",           "Helvetica-BoldOblique" },
   54   { "Arial-BoldMT",                 "Helvetica-Bold" },
   55   { "Arial-Italic",                 "Helvetica-Oblique" },
   56   { "Arial-ItalicMT",               "Helvetica-Oblique" },
   57   { "ArialMT",                      "Helvetica" },
   58   { "Courier",                      "Courier" },
   59   { "Courier,Bold",                 "Courier-Bold" },
   60   { "Courier,BoldItalic",           "Courier-BoldOblique" },
   61   { "Courier,Italic",               "Courier-Oblique" },
   62   { "Courier-Bold",                 "Courier-Bold" },
   63   { "Courier-BoldOblique",          "Courier-BoldOblique" },
   64   { "Courier-Oblique",              "Courier-Oblique" },
   65   { "CourierNew",                   "Courier" },
   66   { "CourierNew,Bold",              "Courier-Bold" },
   67   { "CourierNew,BoldItalic",        "Courier-BoldOblique" },
   68   { "CourierNew,Italic",            "Courier-Oblique" },
   69   { "CourierNew-Bold",              "Courier-Bold" },
   70   { "CourierNew-BoldItalic",        "Courier-BoldOblique" },
   71   { "CourierNew-Italic",            "Courier-Oblique" },
   72   { "CourierNewPS-BoldItalicMT",    "Courier-BoldOblique" },
   73   { "CourierNewPS-BoldMT",          "Courier-Bold" },
   74   { "CourierNewPS-ItalicMT",        "Courier-Oblique" },
   75   { "CourierNewPSMT",               "Courier" },
   76   { "Helvetica",                    "Helvetica" },
   77   { "Helvetica,Bold",               "Helvetica-Bold" },
   78   { "Helvetica,BoldItalic",         "Helvetica-BoldOblique" },
   79   { "Helvetica,Italic",             "Helvetica-Oblique" },
   80   { "Helvetica-Bold",               "Helvetica-Bold" },
   81   { "Helvetica-BoldItalic",         "Helvetica-BoldOblique" },
   82   { "Helvetica-BoldOblique",        "Helvetica-BoldOblique" },
   83   { "Helvetica-Italic",             "Helvetica-Oblique" },
   84   { "Helvetica-Oblique",            "Helvetica-Oblique" },
   85   { "Symbol",                       "Symbol" },
   86   { "Symbol,Bold",                  "Symbol" },
   87   { "Symbol,BoldItalic",            "Symbol" },
   88   { "Symbol,Italic",                "Symbol" },
   89   { "Times-Bold",                   "Times-Bold" },
   90   { "Times-BoldItalic",             "Times-BoldItalic" },
   91   { "Times-Italic",                 "Times-Italic" },
   92   { "Times-Roman",                  "Times-Roman" },
   93   { "TimesNewRoman",                "Times-Roman" },
   94   { "TimesNewRoman,Bold",           "Times-Bold" },
   95   { "TimesNewRoman,BoldItalic",     "Times-BoldItalic" },
   96   { "TimesNewRoman,Italic",         "Times-Italic" },
   97   { "TimesNewRoman-Bold",           "Times-Bold" },
   98   { "TimesNewRoman-BoldItalic",     "Times-BoldItalic" },
   99   { "TimesNewRoman-Italic",         "Times-Italic" },
  100   { "TimesNewRomanPS",              "Times-Roman" },
  101   { "TimesNewRomanPS-Bold",         "Times-Bold" },
  102   { "TimesNewRomanPS-BoldItalic",   "Times-BoldItalic" },
  103   { "TimesNewRomanPS-BoldItalicMT", "Times-BoldItalic" },
  104   { "TimesNewRomanPS-BoldMT",       "Times-Bold" },
  105   { "TimesNewRomanPS-Italic",       "Times-Italic" },
  106   { "TimesNewRomanPS-ItalicMT",     "Times-Italic" },
  107   { "TimesNewRomanPSMT",            "Times-Roman" },
  108   { "TimesNewRomanPSMT,Bold",       "Times-Bold" },
  109   { "TimesNewRomanPSMT,BoldItalic", "Times-BoldItalic" },
  110   { "TimesNewRomanPSMT,Italic",     "Times-Italic" },
  111   { "ZapfDingbats",                 "ZapfDingbats" }
  112 };
  113 
  114 //------------------------------------------------------------------------
  115 
  116 // index: {fixed:0, sans-serif:4, serif:8} + bold*2 + italic
  117 // NB: must be in same order as psSubstFonts in PSOutputDev.cc
  118 static const char *base14SubstFonts[14] = {
  119   "Courier",
  120   "Courier-Oblique",
  121   "Courier-Bold",
  122   "Courier-BoldOblique",
  123   "Helvetica",
  124   "Helvetica-Oblique",
  125   "Helvetica-Bold",
  126   "Helvetica-BoldOblique",
  127   "Times-Roman",
  128   "Times-Italic",
  129   "Times-Bold",
  130   "Times-BoldItalic",
  131   // the last two are never used for substitution
  132   "Symbol",
  133   "ZapfDingbats"
  134 };
  135 
  136 //------------------------------------------------------------------------
  137 
  138 static int readFromStream(void *data) {
  139   return ((Stream *)data)->getChar();
  140 }
  141 
  142 //------------------------------------------------------------------------
  143 // GfxFontLoc
  144 //------------------------------------------------------------------------
  145 
  146 GfxFontLoc::GfxFontLoc() {
  147   path = NULL;
  148   fontNum = 0;
  149   oblique = 0;
  150   encoding = NULL;
  151   substIdx = -1;
  152 }
  153 
  154 GfxFontLoc::~GfxFontLoc() {
  155   if (path) {
  156     delete path;
  157   }
  158   if (encoding) {
  159     delete encoding;
  160   }
  161 }
  162 
  163 //------------------------------------------------------------------------
  164 // GfxFont
  165 //------------------------------------------------------------------------
  166 
  167 GfxFont *GfxFont::makeFont(XRef *xref, const char *tagA,
  168                Ref idA, Dict *fontDict) {
  169   GString *nameA;
  170   Ref embFontIDA;
  171   GfxFontType typeA;
  172   GfxFont *font;
  173   Object obj1;
  174 
  175   // get base font name
  176   nameA = NULL;
  177   fontDict->lookup("BaseFont", &obj1);
  178   if (obj1.isName()) {
  179     nameA = new GString(obj1.getName());
  180   } else if (obj1.isString()) {
  181     nameA = obj1.getString()->copy();
  182   }
  183   obj1.free();
  184 
  185   // get embedded font ID and font type
  186   typeA = getFontType(xref, fontDict, &embFontIDA);
  187 
  188   // create the font object
  189   font = NULL;
  190   if (typeA < fontCIDType0) {
  191     font = new Gfx8BitFont(xref, tagA, idA, nameA, typeA, embFontIDA,
  192                fontDict);
  193   } else {
  194     font = new GfxCIDFont(xref, tagA, idA, nameA, typeA, embFontIDA,
  195               fontDict);
  196   }
  197 
  198   return font;
  199 }
  200 
  201 GfxFont *GfxFont::makeDefaultFont(XRef *xref) {
  202   Object type, subtype, baseFont;
  203   type.initName("Font");
  204   subtype.initName("Type1");
  205   baseFont.initName("Helvetica");
  206   Object fontDict;
  207   fontDict.initDict(xref);
  208   fontDict.dictAdd(copyString("Type"), &type);
  209   fontDict.dictAdd(copyString("Subtype"), &subtype);
  210   fontDict.dictAdd(copyString("BaseFont"), &baseFont);
  211 
  212   Ref r;
  213   r.gen = 100000;
  214   r.num = GfxFontDict::hashFontObject(&fontDict);
  215 
  216   GfxFont *font = makeFont(xref, "undef", r, fontDict.getDict());
  217   fontDict.free();
  218 
  219   return font;
  220 }
  221 
  222 GfxFont::GfxFont(const char *tagA, Ref idA, GString *nameA,
  223          GfxFontType typeA, Ref embFontIDA) {
  224   ok = gFalse;
  225   tag = new GString(tagA);
  226   id = idA;
  227   name = nameA;
  228   type = typeA;
  229   embFontID = embFontIDA;
  230   embFontName = NULL;
  231   hasToUnicode = gFalse;
  232 }
  233 
  234 GfxFont::~GfxFont() {
  235   delete tag;
  236   if (name) {
  237     delete name;
  238   }
  239   if (embFontName) {
  240     delete embFontName;
  241   }
  242 }
  243 
  244 // This function extracts three pieces of information:
  245 // 1. the "expected" font type, i.e., the font type implied by
  246 //    Font.Subtype, DescendantFont.Subtype, and
  247 //    FontDescriptor.FontFile3.Subtype
  248 // 2. the embedded font object ID
  249 // 3. the actual font type - determined by examining the embedded font
  250 //    if there is one, otherwise equal to the expected font type
  251 // If the expected and actual font types don't match, a warning
  252 // message is printed.  The expected font type is not used for
  253 // anything else.
  254 GfxFontType GfxFont::getFontType(XRef *xref, Dict *fontDict, Ref *embID) {
  255   GfxFontType t, expectedType;
  256   FoFiIdentifierType fft;
  257   Dict *fontDict2;
  258   Object subtype, fontDesc, obj1, obj2, obj3, obj4;
  259   GBool isType0, err;
  260 
  261   t = fontUnknownType;
  262   embID->num = embID->gen = -1;
  263   err = gFalse;
  264 
  265   fontDict->lookup("Subtype", &subtype);
  266   expectedType = fontUnknownType;
  267   isType0 = gFalse;
  268   if (subtype.isName("Type1") || subtype.isName("MMType1")) {
  269     expectedType = fontType1;
  270   } else if (subtype.isName("Type1C")) {
  271     expectedType = fontType1C;
  272   } else if (subtype.isName("Type3")) {
  273     expectedType = fontType3;
  274   } else if (subtype.isName("TrueType")) {
  275     expectedType = fontTrueType;
  276   } else if (subtype.isName("Type0")) {
  277     isType0 = gTrue;
  278   } else {
  279     error(errSyntaxWarning, -1, "Unknown font type: '{0:s}'",
  280       subtype.isName() ? subtype.getName() : "???");
  281   }
  282   subtype.free();
  283 
  284   fontDict2 = fontDict;
  285   if (fontDict->lookup("DescendantFonts", &obj1)->isArray()) {
  286     if (obj1.arrayGetLength() == 0) {
  287       error(errSyntaxWarning, -1, "Empty DescendantFonts array in font");
  288       obj2.initNull();
  289     } else if (obj1.arrayGet(0, &obj2)->isDict()) {
  290       if (!isType0) {
  291     error(errSyntaxWarning, -1, "Non-CID font with DescendantFonts array");
  292       }
  293       fontDict2 = obj2.getDict();
  294       fontDict2->lookup("Subtype", &subtype);
  295       if (subtype.isName("CIDFontType0")) {
  296     if (isType0) {
  297       expectedType = fontCIDType0;
  298     }
  299       } else if (subtype.isName("CIDFontType2")) {
  300     if (isType0) {
  301       expectedType = fontCIDType2;
  302     }
  303       }
  304       subtype.free();
  305     }
  306   } else {
  307     obj2.initNull();
  308   }
  309 
  310   if (fontDict2->lookup("FontDescriptor", &fontDesc)->isDict()) {
  311     if (fontDesc.dictLookupNF("FontFile", &obj3)->isRef()) {
  312       *embID = obj3.getRef();
  313       if (expectedType != fontType1) {
  314     err = gTrue;
  315       }
  316     }
  317     obj3.free();
  318     if (embID->num == -1 &&
  319     fontDesc.dictLookupNF("FontFile2", &obj3)->isRef()) {
  320       *embID = obj3.getRef();
  321       if (isType0) {
  322     expectedType = fontCIDType2;
  323       } else if (expectedType != fontTrueType) {
  324     err = gTrue;
  325       }
  326     }
  327     obj3.free();
  328     if (embID->num == -1 &&
  329     fontDesc.dictLookupNF("FontFile3", &obj3)->isRef()) {
  330       *embID = obj3.getRef();
  331       if (obj3.fetch(xref, &obj4)->isStream()) {
  332     obj4.streamGetDict()->lookup("Subtype", &subtype);
  333     if (subtype.isName("Type1")) {
  334       if (expectedType != fontType1) {
  335         err = gTrue;
  336         expectedType = isType0 ? fontCIDType0 : fontType1;
  337       }
  338     } else if (subtype.isName("Type1C")) {
  339       if (expectedType == fontType1) {
  340         expectedType = fontType1C;
  341       } else if (expectedType != fontType1C) {
  342         err = gTrue;
  343         expectedType = isType0 ? fontCIDType0C : fontType1C;
  344       }
  345     } else if (subtype.isName("TrueType")) {
  346       if (expectedType != fontTrueType) {
  347         err = gTrue;
  348         expectedType = isType0 ? fontCIDType2 : fontTrueType;
  349       }
  350     } else if (subtype.isName("CIDFontType0C")) {
  351       if (expectedType == fontCIDType0) {
  352         expectedType = fontCIDType0C;
  353       } else {
  354         err = gTrue;
  355         expectedType = isType0 ? fontCIDType0C : fontType1C;
  356       }
  357     } else if (subtype.isName("OpenType")) {
  358       if (expectedType == fontTrueType) {
  359         expectedType = fontTrueTypeOT;
  360       } else if (expectedType == fontType1) {
  361         expectedType = fontType1COT;
  362       } else if (expectedType == fontCIDType0) {
  363         expectedType = fontCIDType0COT;
  364       } else if (expectedType == fontCIDType2) {
  365         expectedType = fontCIDType2OT;
  366       } else {
  367         err = gTrue;
  368       }
  369     } else {
  370       error(errSyntaxError, -1, "Unknown font type '{0:s}'",
  371         subtype.isName() ? subtype.getName() : "???");
  372     }
  373     subtype.free();
  374       }
  375       obj4.free();
  376     }
  377     obj3.free();
  378   }
  379   fontDesc.free();
  380 
  381   t = fontUnknownType;
  382   if (embID->num >= 0) {
  383     obj3.initRef(embID->num, embID->gen);
  384     obj3.fetch(xref, &obj4);
  385     if (obj4.isStream()) {
  386       obj4.streamReset();
  387       fft = FoFiIdentifier::identifyStream(&readFromStream, obj4.getStream());
  388       obj4.streamClose();
  389       switch (fft) {
  390       case fofiIdType1PFA:
  391       case fofiIdType1PFB:
  392     t = fontType1;
  393     break;
  394       case fofiIdCFF8Bit:
  395     t = isType0 ? fontCIDType0C : fontType1C;
  396     break;
  397       case fofiIdCFFCID:
  398     t = fontCIDType0C;
  399     break;
  400       case fofiIdTrueType:
  401       case fofiIdTrueTypeCollection:
  402     t = isType0 ? fontCIDType2 : fontTrueType;
  403     break;
  404       case fofiIdOpenTypeCFF8Bit:
  405     t = isType0 ? fontCIDType0COT : fontType1COT;
  406     break;
  407       case fofiIdOpenTypeCFFCID:
  408     t = fontCIDType0COT;
  409     break;
  410       default:
  411     error(errSyntaxError, -1, "Embedded font file may be invalid");
  412     break;
  413       }
  414     }
  415     obj4.free();
  416     obj3.free();
  417   }
  418 
  419   if (t == fontUnknownType) {
  420     t = expectedType;
  421   }
  422 
  423   if (t != expectedType) {
  424     err = gTrue;
  425   }
  426 
  427   if (err) {
  428     error(errSyntaxWarning, -1,
  429       "Mismatch between font type and embedded font file");
  430   }
  431 
  432   obj2.free();
  433   obj1.free();
  434 
  435   return t;
  436 }
  437 
  438 void GfxFont::readFontDescriptor(XRef *xref, Dict *fontDict) {
  439   Object obj1, obj2, obj3, obj4;
  440   double t, t2;
  441   int i;
  442 
  443   // assume Times-Roman by default (for substitution purposes)
  444   flags = fontSerif;
  445 
  446   if (fontDict->lookup("FontDescriptor", &obj1)->isDict()) {
  447 
  448     // get flags
  449     if (obj1.dictLookup("Flags", &obj2)->isInt()) {
  450       flags = obj2.getInt();
  451     }
  452     obj2.free();
  453 
  454     // get name
  455     obj1.dictLookup("FontName", &obj2);
  456     if (obj2.isName()) {
  457       embFontName = new GString(obj2.getName());
  458     }
  459     obj2.free();
  460 
  461     // look for MissingWidth
  462     obj1.dictLookup("MissingWidth", &obj2);
  463     if (obj2.isNum()) {
  464       missingWidth = obj2.getNum();
  465     }
  466     obj2.free();
  467 
  468     // get Ascent
  469     // (CapHeight is a little more reliable - so use it if present)
  470     obj1.dictLookup("Ascent", &obj2);
  471     obj1.dictLookup("CapHeight", &obj3);
  472     if (obj2.isNum() || obj3.isNum()) {
  473       if (obj2.isNum()) {
  474     t = 0.001 * obj2.getNum();
  475     // some broken font descriptors specify a negative ascent
  476     if (t < 0) {
  477       t = -t;
  478     }
  479       } else {
  480     t = 0;
  481       }
  482       if (obj3.isNum()) {
  483     t2 = 0.001 * obj3.getNum();
  484     // some broken font descriptors specify a negative ascent
  485     if (t2 < 0) {
  486       t2 = -t2;
  487     }
  488       } else {
  489     t2 = 0;
  490       }
  491       if (t != 0 && t < 1.9) {
  492     declaredAscent = t;
  493       }
  494       // if both Ascent and CapHeight are set, use the smaller one
  495       // (because the most common problem is that Ascent is too large)
  496       if (t2 != 0 && (t == 0 || t2 < t)) {
  497     t = t2;
  498       }
  499       // some broken font descriptors set ascent and descent to 0;
  500       // others set it to ridiculous values (e.g., 32768)
  501       if (t != 0 && t < 1.9) {
  502     ascent = t;
  503       }
  504     }
  505     obj2.free();
  506     obj3.free();
  507 
  508     // get Descent
  509     obj1.dictLookup("Descent", &obj2);
  510     if (obj2.isNum()) {
  511       t = 0.001 * obj2.getNum();
  512       // some broken font descriptors specify a positive descent
  513       if (t > 0) {
  514     t = -t;
  515       }
  516       // some broken font descriptors set ascent and descent to 0
  517       if (t != 0 && t > -1.9) {
  518     descent = t;
  519       }
  520     }
  521     obj2.free();
  522 
  523     // font FontBBox
  524     if (obj1.dictLookup("FontBBox", &obj2)->isArray()) {
  525       for (i = 0; i < 4 && i < obj2.arrayGetLength(); ++i) {
  526     if (obj2.arrayGet(i, &obj3)->isNum()) {
  527       fontBBox[i] = 0.001 * obj3.getNum();
  528     }
  529     obj3.free();
  530       }
  531     }
  532     obj2.free();
  533 
  534   }
  535   obj1.free();
  536 }
  537 
  538 CharCodeToUnicode *GfxFont::readToUnicodeCMap(Dict *fontDict, int nBits,
  539                           CharCodeToUnicode *ctu) {
  540   GString *buf;
  541   Object obj1;
  542   char buf2[4096];
  543   int n;
  544 
  545   if (!fontDict->lookup("ToUnicode", &obj1)->isStream()) {
  546     obj1.free();
  547     return NULL;
  548   }
  549   buf = new GString();
  550   obj1.streamReset();
  551   while ((n = obj1.streamGetBlock(buf2, sizeof(buf2))) > 0) {
  552     buf->append(buf2, n);
  553   }
  554   obj1.streamClose();
  555   obj1.free();
  556   if (ctu) {
  557     ctu->mergeCMap(buf, nBits);
  558   } else {
  559     ctu = CharCodeToUnicode::parseCMap(buf, nBits);
  560   }
  561   delete buf;
  562   hasToUnicode = gTrue;
  563   return ctu;
  564 }
  565 
  566 GfxFontLoc *GfxFont::locateFont(XRef *xref, GBool ps) {
  567   GfxFontLoc *fontLoc;
  568   SysFontType sysFontType;
  569   FoFiIdentifierType fft;
  570   GString *path, *base14Name, *substName;
  571   PSFontParam16 *psFont16;
  572   Object refObj, embFontObj;
  573   int substIdx, fontNum;
  574   double oblique;
  575   GBool embed;
  576 
  577   if (type == fontType3) {
  578     return NULL;
  579   }
  580 
  581   //----- embedded font
  582   if (embFontID.num >= 0) {
  583     embed = gTrue;
  584     refObj.initRef(embFontID.num, embFontID.gen);
  585     refObj.fetch(xref, &embFontObj);
  586     if (!embFontObj.isStream()) {
  587       error(errSyntaxError, -1, "Embedded font object is wrong type");
  588       embed = gFalse;
  589     }
  590     embFontObj.free();
  591     refObj.free();
  592     if (embed) {
  593       if (ps) {
  594     switch (type) {
  595     case fontType1:
  596     case fontType1C:
  597     case fontType1COT:
  598       embed = globalParams->getPSEmbedType1();
  599       break;
  600     case fontTrueType:
  601     case fontTrueTypeOT:
  602       embed = globalParams->getPSEmbedTrueType();
  603       break;
  604     case fontCIDType0C:
  605     case fontCIDType0COT:
  606       embed = globalParams->getPSEmbedCIDPostScript();
  607       break;
  608     case fontCIDType2:
  609     case fontCIDType2OT:
  610       embed = globalParams->getPSEmbedCIDTrueType();
  611       break;
  612     default:
  613       break;
  614     }
  615       }
  616       if (embed) {
  617     fontLoc = new GfxFontLoc();
  618     fontLoc->locType = gfxFontLocEmbedded;
  619     fontLoc->fontType = type;
  620     fontLoc->embFontID = embFontID;
  621     return fontLoc;
  622       }
  623     }
  624   }
  625 
  626   //----- PS passthrough
  627   if (ps && name && !isCIDFont() && globalParams->getPSFontPassthrough()) {
  628     fontLoc = new GfxFontLoc();
  629     fontLoc->locType = gfxFontLocResident;
  630     fontLoc->fontType = fontType1;
  631     fontLoc->path = name->copy();
  632     return fontLoc;
  633   }
  634 
  635   //----- external font file (fontFile, fontDir)
  636   if (name && (path = globalParams->findFontFile(name))) {
  637     if ((fontLoc = getExternalFont(path, 0, 0, isCIDFont()))) {
  638       return fontLoc;
  639     }
  640   }
  641 
  642   //----- PS resident Base-14 font
  643   if (ps && !isCIDFont() && ((Gfx8BitFont *)this)->base14) {
  644     fontLoc = new GfxFontLoc();
  645     fontLoc->locType = gfxFontLocResident;
  646     fontLoc->fontType = fontType1;
  647     fontLoc->path = new GString(((Gfx8BitFont *)this)->base14->base14Name);
  648     return fontLoc;
  649   }
  650 
  651   //----- external font file for Base-14 font
  652   if (!ps && !isCIDFont() && ((Gfx8BitFont *)this)->base14) {
  653     base14Name = new GString(((Gfx8BitFont *)this)->base14->base14Name);
  654     path = globalParams->findBase14FontFile(base14Name, &fontNum, &oblique);
  655     delete base14Name;
  656     if (path && (fontLoc = getExternalFont(path, fontNum, oblique, gFalse))) {
  657       return fontLoc;
  658     }
  659   }
  660 
  661   //----- system font
  662   if (name && (path = globalParams->findSystemFontFile(name, &sysFontType,
  663                                &fontNum))) {
  664     fontLoc = new GfxFontLoc();
  665     fontLoc->locType = gfxFontLocExternal;
  666     fontLoc->path = path;
  667     fontLoc->fontNum = fontNum;
  668     if (isCIDFont()) {
  669       if (sysFontType == sysFontTTF || sysFontType == sysFontTTC) {
  670     fontLoc->fontType = fontCIDType2;
  671     return fontLoc;
  672       } else if (sysFontType == sysFontOTF) {
  673     fft = FoFiIdentifier::identifyFile(fontLoc->path->getCString());
  674     if (fft == fofiIdOpenTypeCFFCID) {
  675       fontLoc->fontType = fontCIDType0COT;
  676       return fontLoc;
  677     } else if (fft == fofiIdTrueType) {
  678       fontLoc->fontType = fontCIDType2;
  679       return fontLoc;
  680     }
  681       }
  682     } else {
  683       if (sysFontType == sysFontTTF || sysFontType == sysFontTTC) {
  684     fontLoc->fontType = fontTrueType;
  685     return fontLoc;
  686       } else if (sysFontType == sysFontPFA || sysFontType == sysFontPFB) {
  687     fontLoc->fontType = fontType1;
  688     return fontLoc;
  689       } else if (sysFontType == sysFontOTF) {
  690     fft = FoFiIdentifier::identifyFile(fontLoc->path->getCString());
  691     if (fft == fofiIdOpenTypeCFF8Bit) {
  692       fontLoc->fontType = fontType1COT;
  693       return fontLoc;
  694     } else if (fft == fofiIdTrueType) {
  695       fontLoc->fontType = fontTrueTypeOT;
  696       return fontLoc;
  697     }
  698       }
  699     }
  700     delete fontLoc;
  701   }
  702 
  703   if (!isCIDFont()) {
  704 
  705     //----- 8-bit PS resident font
  706     if (ps) {
  707       if (name && (path = globalParams->getPSResidentFont(name))) {
  708     fontLoc = new GfxFontLoc();
  709     fontLoc->locType = gfxFontLocResident;
  710     fontLoc->fontType = fontType1;
  711     fontLoc->path = path;
  712     return fontLoc;
  713       }
  714     }
  715 
  716     //----- 8-bit font substitution
  717     if (flags & fontFixedWidth) {
  718       substIdx = 0;
  719     } else if (flags & fontSerif) {
  720       substIdx = 8;
  721     } else {
  722       substIdx = 4;
  723     }
  724     if (isBold()) {
  725       substIdx += 2;
  726     }
  727     if (isItalic()) {
  728       substIdx += 1;
  729     }
  730     substName = new GString(base14SubstFonts[substIdx]);
  731     if (ps) {
  732       error(errSyntaxWarning, -1, "Substituting font '{0:s}' for '{1:t}'",
  733         base14SubstFonts[substIdx], name);
  734       fontLoc = new GfxFontLoc();
  735       fontLoc->locType = gfxFontLocResident;
  736       fontLoc->fontType = fontType1;
  737       fontLoc->path = substName;
  738       fontLoc->substIdx = substIdx;
  739       return fontLoc;
  740     } else {
  741       path = globalParams->findBase14FontFile(substName, &fontNum, &oblique);
  742       delete substName;
  743       if (path) {
  744     if ((fontLoc = getExternalFont(path, fontNum, oblique, gFalse))) {
  745       error(errSyntaxWarning, -1, "Substituting font '{0:s}' for '{1:t}'",
  746         base14SubstFonts[substIdx], name);
  747       fontLoc->substIdx = substIdx;
  748       return fontLoc;
  749     }
  750       }
  751     }
  752 
  753     // failed to find a substitute font
  754     return NULL;
  755   }
  756 
  757   //----- 16-bit PS resident font
  758   if (ps && name && ((psFont16 = globalParams->getPSResidentFont16(
  759                      name,
  760                      ((GfxCIDFont *)this)->getWMode())))) {
  761     fontLoc = new GfxFontLoc();
  762     fontLoc->locType = gfxFontLocResident;
  763     fontLoc->fontType = fontCIDType0; // this is not used
  764     fontLoc->path = psFont16->psFontName->copy();
  765     fontLoc->encoding = psFont16->encoding->copy();
  766     fontLoc->wMode = psFont16->wMode;
  767     return fontLoc;
  768   }
  769   if (ps && ((psFont16 = globalParams->getPSResidentFontCC(
  770                  ((GfxCIDFont *)this)->getCollection(),
  771                  ((GfxCIDFont *)this)->getWMode())))) {
  772     error(errSyntaxWarning, -1, "Substituting font '{0:t}' for '{1:t}'",
  773       psFont16->psFontName, name);
  774     fontLoc = new GfxFontLoc();
  775     fontLoc->locType = gfxFontLocResident;
  776     fontLoc->fontType = fontCIDType0; // this is not used
  777     fontLoc->path = psFont16->psFontName->copy();
  778     fontLoc->encoding = psFont16->encoding->copy();
  779     fontLoc->wMode = psFont16->wMode;
  780     return fontLoc;
  781   }
  782 
  783   //----- CID font substitution
  784   if ((path = globalParams->findCCFontFile(
  785                 ((GfxCIDFont *)this)->getCollection()))) {
  786     if ((fontLoc = getExternalFont(path, 0, 0, gTrue))) {
  787       error(errSyntaxWarning, -1, "Substituting font '{0:t}' for '{1:t}'",
  788         fontLoc->path, name);
  789       return fontLoc;
  790     }
  791   }
  792 
  793   // failed to find a substitute font
  794   return NULL;
  795 }
  796 
  797 GfxFontLoc *GfxFont::locateBase14Font(GString *base14Name) {
  798   GString *path;
  799   int fontNum;
  800   double oblique;
  801 
  802   path = globalParams->findBase14FontFile(base14Name, &fontNum, &oblique);
  803   if (!path) {
  804     return NULL;
  805   }
  806   return getExternalFont(path, fontNum, oblique, gFalse);
  807 }
  808 
  809 GfxFontLoc *GfxFont::getExternalFont(GString *path, int fontNum,
  810                      double oblique, GBool cid) {
  811   FoFiIdentifierType fft;
  812   GfxFontType fontType;
  813   GfxFontLoc *fontLoc;
  814 
  815   fft = FoFiIdentifier::identifyFile(path->getCString());
  816   switch (fft) {
  817   case fofiIdType1PFA:
  818   case fofiIdType1PFB:
  819     fontType = fontType1;
  820     break;
  821   case fofiIdCFF8Bit:
  822     fontType = fontType1C;
  823     break;
  824   case fofiIdCFFCID:
  825     fontType = fontCIDType0C;
  826     break;
  827   case fofiIdTrueType:
  828   case fofiIdTrueTypeCollection:
  829     fontType = cid ? fontCIDType2 : fontTrueType;
  830     break;
  831   case fofiIdOpenTypeCFF8Bit:
  832     fontType = fontType1COT;
  833     break;
  834   case fofiIdOpenTypeCFFCID:
  835     fontType = fontCIDType0COT;
  836     break;
  837   case fofiIdDfont:
  838     fontType = cid ? fontCIDType2 : fontTrueType;
  839     break;
  840   case fofiIdUnknown:
  841   case fofiIdError:
  842   default:
  843     fontType = fontUnknownType;
  844     break;
  845   }
  846   if (fontType == fontUnknownType ||
  847       (cid ? (fontType < fontCIDType0)
  848            : (fontType >= fontCIDType0))) {
  849     delete path;
  850     return NULL;
  851   }
  852   fontLoc = new GfxFontLoc();
  853   fontLoc->locType = gfxFontLocExternal;
  854   fontLoc->fontType = fontType;
  855   fontLoc->path = path;
  856   fontLoc->fontNum = fontNum;
  857   fontLoc->oblique = oblique;
  858   return fontLoc;
  859 }
  860 
  861 char *GfxFont::readEmbFontFile(XRef *xref, int *len) {
  862   char *buf;
  863   Object obj1, obj2;
  864   Stream *str;
  865   int size, n;
  866 
  867   obj1.initRef(embFontID.num, embFontID.gen);
  868   obj1.fetch(xref, &obj2);
  869   if (!obj2.isStream()) {
  870     error(errSyntaxError, -1, "Embedded font file is not a stream");
  871     obj2.free();
  872     obj1.free();
  873     embFontID.num = -1;
  874     return NULL;
  875   }
  876   str = obj2.getStream();
  877 
  878   size = 4096;
  879   buf = (char *)gmalloc(size);
  880   *len = 0;
  881   str->reset();
  882   do {
  883     if (*len > size - 4096) {
  884       if (size > INT_MAX / 2) {
  885     error(errSyntaxError, -1, "Embedded font file is too large");
  886     break;
  887       }
  888       size *= 2;
  889       buf = (char *)grealloc(buf, size);
  890     }
  891     n = str->getBlock(buf + *len, 4096);
  892     *len += n;
  893   } while (n == 4096);
  894   str->close();
  895 
  896   obj2.free();
  897   obj1.free();
  898 
  899   return buf;
  900 }
  901 
  902 //------------------------------------------------------------------------
  903 // Gfx8BitFont
  904 //------------------------------------------------------------------------
  905 
  906 Gfx8BitFont::Gfx8BitFont(XRef *xref, const char *tagA, Ref idA, GString *nameA,
  907              GfxFontType typeA, Ref embFontIDA, Dict *fontDict):
  908   GfxFont(tagA, idA, nameA, typeA, embFontIDA)
  909 {
  910   GString *name2;
  911   BuiltinFont *builtinFont;
  912   const char **baseEnc;
  913   char *buf;
  914   int len;
  915   FoFiType1 *ffT1;
  916   FoFiType1C *ffT1C;
  917   int code, code2;
  918   char *charName;
  919   GBool missing, hex;
  920   Unicode toUnicode[256];
  921   CharCodeToUnicode *utu, *ctu2;
  922   Unicode uBuf[8];
  923   double mul;
  924   int firstChar, lastChar;
  925   Gushort w;
  926   Object obj1, obj2, obj3;
  927   int n, i, a, b, m;
  928 
  929   ctu = NULL;
  930 
  931   // do font name substitution for various aliases of the Base 14 font
  932   // names
  933   base14 = NULL;
  934   if (name) {
  935     name2 = name->copy();
  936     i = 0;
  937     while (i < name2->getLength()) {
  938       if (name2->getChar(i) == ' ') {
  939     name2->del(i);
  940       } else {
  941     ++i;
  942       }
  943     }
  944     a = 0;
  945     b = sizeof(base14FontMap) / sizeof(Base14FontMapEntry);
  946     // invariant: base14FontMap[a].altName <= name2 < base14FontMap[b].altName
  947     while (b - a > 1) {
  948       m = (a + b) / 2;
  949       if (name2->cmp(base14FontMap[m].altName) >= 0) {
  950     a = m;
  951       } else {
  952     b = m;
  953       }
  954     }
  955     if (!name2->cmp(base14FontMap[a].altName)) {
  956       base14 = &base14FontMap[a];
  957     }
  958     delete name2;
  959   }
  960 
  961   // is it a built-in font?
  962   builtinFont = NULL;
  963   if (base14) {
  964     for (i = 0; i < nBuiltinFonts; ++i) {
  965       if (!strcmp(base14->base14Name, builtinFonts[i].name)) {
  966     builtinFont = &builtinFonts[i];
  967     break;
  968       }
  969     }
  970   }
  971 
  972   // default ascent/descent values
  973   if (builtinFont) {
  974     missingWidth = builtinFont->missingWidth;
  975     ascent = 0.001 * builtinFont->ascent;
  976     descent = 0.001 * builtinFont->descent;
  977     declaredAscent = ascent;
  978     fontBBox[0] = 0.001 * builtinFont->bbox[0];
  979     fontBBox[1] = 0.001 * builtinFont->bbox[1];
  980     fontBBox[2] = 0.001 * builtinFont->bbox[2];
  981     fontBBox[3] = 0.001 * builtinFont->bbox[3];
  982   } else {
  983     missingWidth = 0;
  984     ascent = 0.75;
  985     descent = -0.25;
  986     declaredAscent = ascent;
  987     fontBBox[0] = fontBBox[1] = fontBBox[2] = fontBBox[3] = 0;
  988   }
  989 
  990   // get info from font descriptor
  991   readFontDescriptor(xref, fontDict);
  992 
  993   // for Base-14 fonts (even if embedded), don't trust the
  994   // ascent/descent/bbox values from the font descriptor
  995   if (builtinFont) {
  996     ascent = 0.001 * builtinFont->ascent;
  997     descent = 0.001 * builtinFont->descent;
  998     declaredAscent = ascent;
  999     fontBBox[0] = 0.001 * builtinFont->bbox[0];
 1000     fontBBox[1] = 0.001 * builtinFont->bbox[1];
 1001     fontBBox[2] = 0.001 * builtinFont->bbox[2];
 1002     fontBBox[3] = 0.001 * builtinFont->bbox[3];
 1003   }
 1004 
 1005   // get font matrix
 1006   fontMat[0] = fontMat[3] = 1;
 1007   fontMat[1] = fontMat[2] = fontMat[4] = fontMat[5] = 0;
 1008   if (fontDict->lookup("FontMatrix", &obj1)->isArray()) {
 1009     for (i = 0; i < 6 && i < obj1.arrayGetLength(); ++i) {
 1010       if (obj1.arrayGet(i, &obj2)->isNum()) {
 1011     fontMat[i] = obj2.getNum();
 1012       }
 1013       obj2.free();
 1014     }
 1015   }
 1016   obj1.free();
 1017 
 1018   // get Type 3 bounding box, font definition, and resources
 1019   if (type == fontType3) {
 1020     if (fontDict->lookup("FontBBox", &obj1)->isArray()) {
 1021       for (i = 0; i < 4 && i < obj1.arrayGetLength(); ++i) {
 1022     if (obj1.arrayGet(i, &obj2)->isNum()) {
 1023       fontBBox[i] = obj2.getNum();
 1024     }
 1025     obj2.free();
 1026       }
 1027     }
 1028     obj1.free();
 1029     if (!fontDict->lookup("CharProcs", &charProcs)->isDict()) {
 1030       error(errSyntaxError, -1,
 1031         "Missing or invalid CharProcs dictionary in Type 3 font");
 1032       charProcs.free();
 1033     }
 1034     if (!fontDict->lookup("Resources", &resources)->isDict()) {
 1035       resources.free();
 1036     }
 1037   }
 1038 
 1039   //----- build the font encoding -----
 1040 
 1041   // Encodings start with a base encoding, which can come from
 1042   // (in order of priority):
 1043   //   1. FontDict.Encoding or FontDict.Encoding.BaseEncoding
 1044   //        - MacRoman / MacExpert / WinAnsi / Standard
 1045   //   2. embedded or external font file
 1046   //   3. default:
 1047   //        - builtin --> builtin encoding
 1048   //        - TrueType --> WinAnsiEncoding
 1049   //        - others --> StandardEncoding
 1050   // and then add a list of differences (if any) from
 1051   // FontDict.Encoding.Differences.
 1052 
 1053   // check FontDict for base encoding
 1054   hasEncoding = gFalse;
 1055   usesMacRomanEnc = gFalse;
 1056   baseEnc = NULL;
 1057   baseEncFromFontFile = gFalse;
 1058   fontDict->lookup("Encoding", &obj1);
 1059   if (obj1.isDict()) {
 1060     obj1.dictLookup("BaseEncoding", &obj2);
 1061     if (obj2.isName("MacRomanEncoding")) {
 1062       hasEncoding = gTrue;
 1063       usesMacRomanEnc = gTrue;
 1064       baseEnc = macRomanEncoding;
 1065     } else if (obj2.isName("MacExpertEncoding")) {
 1066       hasEncoding = gTrue;
 1067       baseEnc = macExpertEncoding;
 1068     } else if (obj2.isName("WinAnsiEncoding")) {
 1069       hasEncoding = gTrue;
 1070       baseEnc = winAnsiEncoding;
 1071     }
 1072     obj2.free();
 1073   } else if (obj1.isName("MacRomanEncoding")) {
 1074     hasEncoding = gTrue;
 1075     usesMacRomanEnc = gTrue;
 1076     baseEnc = macRomanEncoding;
 1077   } else if (obj1.isName("MacExpertEncoding")) {
 1078     hasEncoding = gTrue;
 1079     baseEnc = macExpertEncoding;
 1080   } else if (obj1.isName("WinAnsiEncoding")) {
 1081     hasEncoding = gTrue;
 1082     baseEnc = winAnsiEncoding;
 1083   }
 1084 
 1085   // check embedded font file for base encoding
 1086   // (only for Type 1 fonts - trying to get an encoding out of a
 1087   // TrueType font is a losing proposition)
 1088   ffT1 = NULL;
 1089   ffT1C = NULL;
 1090   buf = NULL;
 1091   if (type == fontType1 && embFontID.num >= 0) {
 1092     if ((buf = readEmbFontFile(xref, &len))) {
 1093       if ((ffT1 = FoFiType1::make(buf, len))) {
 1094     if (ffT1->getName()) {
 1095       if (embFontName) {
 1096         delete embFontName;
 1097       }
 1098       embFontName = new GString(ffT1->getName());
 1099     }
 1100     if (!baseEnc) {
 1101       baseEnc = (const char **)ffT1->getEncoding();
 1102       baseEncFromFontFile = gTrue;
 1103     }
 1104       }
 1105       gfree(buf);
 1106     }
 1107   } else if (type == fontType1C && embFontID.num >= 0) {
 1108     if ((buf = readEmbFontFile(xref, &len))) {
 1109       if ((ffT1C = FoFiType1C::make(buf, len))) {
 1110     if (ffT1C->getName()) {
 1111       if (embFontName) {
 1112         delete embFontName;
 1113       }
 1114       embFontName = new GString(ffT1C->getName());
 1115     }
 1116     if (!baseEnc) {
 1117       baseEnc = (const char **)ffT1C->getEncoding();
 1118       baseEncFromFontFile = gTrue;
 1119     }
 1120       }
 1121       gfree(buf);
 1122     }
 1123   }
 1124 
 1125   // get default base encoding
 1126   if (!baseEnc) {
 1127     if (builtinFont && embFontID.num < 0) {
 1128       baseEnc = builtinFont->defaultBaseEnc;
 1129       hasEncoding = gTrue;
 1130     } else if (type == fontTrueType) {
 1131       baseEnc = winAnsiEncoding;
 1132     } else {
 1133       baseEnc = standardEncoding;
 1134     }
 1135   }
 1136 
 1137   // copy the base encoding
 1138   for (i = 0; i < 256; ++i) {
 1139     enc[i] = (char *)baseEnc[i];
 1140     if ((encFree[i] = (char)baseEncFromFontFile) && enc[i]) {
 1141       enc[i] = copyString(baseEnc[i]);
 1142     }
 1143   }
 1144 
 1145   // some Type 1C font files have empty encodings, which can break the
 1146   // T1C->T1 conversion (since the 'seac' operator depends on having
 1147   // the accents in the encoding), so we fill in any gaps from
 1148   // StandardEncoding
 1149   if (type == fontType1C && embFontID.num >= 0 && baseEncFromFontFile) {
 1150     for (i = 0; i < 256; ++i) {
 1151       if (!enc[i] && standardEncoding[i]) {
 1152     enc[i] = (char *)standardEncoding[i];
 1153     encFree[i] = gFalse;
 1154       }
 1155     }
 1156   }
 1157 
 1158   // merge differences into encoding
 1159   if (obj1.isDict()) {
 1160     obj1.dictLookup("Differences", &obj2);
 1161     if (obj2.isArray()) {
 1162       hasEncoding = gTrue;
 1163       code = 0;
 1164       for (i = 0; i < obj2.arrayGetLength(); ++i) {
 1165     obj2.arrayGet(i, &obj3);
 1166     if (obj3.isInt()) {
 1167       code = obj3.getInt();
 1168     } else if (obj3.isName()) {
 1169       if (code >= 0 && code < 256) {
 1170         if (encFree[code]) {
 1171           gfree(enc[code]);
 1172         }
 1173         enc[code] = copyString(obj3.getName());
 1174         encFree[code] = gTrue;
 1175       }
 1176       ++code;
 1177     } else {
 1178       error(errSyntaxError, -1,
 1179         "Wrong type in font encoding resource differences ({0:s})",
 1180         obj3.getTypeName());
 1181     }
 1182     obj3.free();
 1183       }
 1184     }
 1185     obj2.free();
 1186   }
 1187   obj1.free();
 1188   if (ffT1) {
 1189     delete ffT1;
 1190   }
 1191   if (ffT1C) {
 1192     delete ffT1C;
 1193   }
 1194 
 1195   //----- build the mapping to Unicode -----
 1196 
 1197   // pass 1: use the name-to-Unicode mapping table
 1198   missing = hex = gFalse;
 1199   for (code = 0; code < 256; ++code) {
 1200     if ((charName = enc[code])) {
 1201       if (!(toUnicode[code] = globalParams->mapNameToUnicode(charName)) &&
 1202       strcmp(charName, ".notdef")) {
 1203     // if it wasn't in the name-to-Unicode table, check for a
 1204     // name that looks like 'Axx' or 'xx', where 'A' is any letter
 1205     // and 'xx' is two hex digits
 1206     if ((strlen(charName) == 3 &&
 1207          isalpha(charName[0] & 0xff) &&
 1208          isxdigit(charName[1] & 0xff) && isxdigit(charName[2] & 0xff) &&
 1209          ((charName[1] >= 'a' && charName[1] <= 'f') ||
 1210           (charName[1] >= 'A' && charName[1] <= 'F') ||
 1211           (charName[2] >= 'a' && charName[2] <= 'f') ||
 1212           (charName[2] >= 'A' && charName[2] <= 'F'))) ||
 1213         (strlen(charName) == 2 &&
 1214          isxdigit(charName[0] & 0xff) && isxdigit(charName[1] & 0xff) &&
 1215          ((charName[0] >= 'a' && charName[0] <= 'f') ||
 1216           (charName[0] >= 'A' && charName[0] <= 'F') ||
 1217           (charName[1] >= 'a' && charName[1] <= 'f') ||
 1218           (charName[1] >= 'A' && charName[1] <= 'F')))) {
 1219       hex = gTrue;
 1220     }
 1221     missing = gTrue;
 1222       }
 1223     } else {
 1224       toUnicode[code] = 0;
 1225     }
 1226   }
 1227 
 1228   // pass 2: try to fill in the missing chars, looking for names of
 1229   // any of the following forms:
 1230   // - 'xx'
 1231   // - 'Axx'
 1232   // - 'nn'
 1233   // - 'Ann'
 1234   // - 'ABnn'
 1235   // - 'unixxxx' (possibly followed by garbage - some Arabic files
 1236   //             use 'uni0628.medi', etc.)
 1237   // where 'A' and 'B' are any letters, 'xx' is two hex digits, 'xxxx'
 1238   // is four hex digits, and 'nn' is 2-4 decimal digits
 1239   usedNumericHeuristic = gFalse;
 1240   if (missing && globalParams->getMapNumericCharNames()) {
 1241     for (code = 0; code < 256; ++code) {
 1242       if ((charName = enc[code]) && !toUnicode[code] &&
 1243       strcmp(charName, ".notdef")) {
 1244     n = (int)strlen(charName);
 1245     code2 = -1;
 1246     if (hex && n == 3 && isalpha(charName[0] & 0xff) &&
 1247         isxdigit(charName[1] & 0xff) && isxdigit(charName[2] & 0xff)) {
 1248       sscanf(charName+1, "%x", &code2);
 1249     } else if (hex && n == 2 &&
 1250            isxdigit(charName[0] & 0xff) &&
 1251            isxdigit(charName[1] & 0xff)) {
 1252       sscanf(charName, "%x", &code2);
 1253     } else if (!hex && n >= 2 && n <= 4 &&
 1254            isdigit(charName[0] & 0xff) && isdigit(charName[1] & 0xff)) {
 1255       code2 = atoi(charName);
 1256     } else if (n >= 3 && n <= 5 &&
 1257            isdigit(charName[1] & 0xff) && isdigit(charName[2] & 0xff)) {
 1258       code2 = atoi(charName+1);
 1259     } else if (n >= 4 && n <= 6 &&
 1260            isdigit(charName[2] & 0xff) && isdigit(charName[3] & 0xff)) {
 1261       code2 = atoi(charName+2);
 1262     } else if (n >= 7 && charName[0] == 'u' && charName[1] == 'n' &&
 1263            charName[2] == 'i' &&
 1264            isxdigit(charName[3] & 0xff) &&
 1265            isxdigit(charName[4] & 0xff) &&
 1266            isxdigit(charName[5] & 0xff) &&
 1267            isxdigit(charName[6] & 0xff)) {
 1268       sscanf(charName + 3, "%x", &code2);
 1269     }
 1270     if (code2 >= 0 && code2 <= 0xffff) {
 1271       toUnicode[code] = (Unicode)code2;
 1272       usedNumericHeuristic = gTrue;
 1273     }
 1274       }
 1275     }
 1276 
 1277   // if the 'mapUnknownCharNames' flag is set, do a simple pass-through
 1278   // mapping for unknown character names
 1279   } else if (missing && globalParams->getMapUnknownCharNames()) {
 1280     for (code = 0; code < 256; ++code) {
 1281       if (!toUnicode[code]) {
 1282     toUnicode[code] = code;
 1283       }
 1284     }
 1285   }
 1286 
 1287   // construct the char code -> Unicode mapping object
 1288   ctu = CharCodeToUnicode::make8BitToUnicode(toUnicode);
 1289 
 1290   // merge in a ToUnicode CMap, if there is one -- this overwrites
 1291   // existing entries in ctu, i.e., the ToUnicode CMap takes
 1292   // precedence, but the other encoding info is allowed to fill in any
 1293   // holes
 1294   readToUnicodeCMap(fontDict, 8, ctu);
 1295 
 1296   // look for a Unicode-to-Unicode mapping
 1297   if (name && (utu = globalParams->getUnicodeToUnicode(name))) {
 1298     for (i = 0; i < 256; ++i) {
 1299       toUnicode[i] = 0;
 1300     }
 1301     ctu2 = CharCodeToUnicode::make8BitToUnicode(toUnicode);
 1302     for (i = 0; i < 256; ++i) {
 1303       n = ctu->mapToUnicode((CharCode)i, uBuf, 8);
 1304       if (n >= 1) {
 1305     n = utu->mapToUnicode((CharCode)uBuf[0], uBuf, 8);
 1306     if (n >= 1) {
 1307       ctu2->setMapping((CharCode)i, uBuf, n);
 1308     }
 1309       }
 1310     }
 1311     utu->decRefCnt();
 1312     delete ctu;
 1313     ctu = ctu2;
 1314   }
 1315 
 1316   //----- get the character widths -----
 1317 
 1318   // initialize all widths
 1319   for (code = 0; code < 256; ++code) {
 1320     widths[code] = missingWidth * 0.001;
 1321   }
 1322 
 1323   // use widths from font dict, if present
 1324   fontDict->lookup("FirstChar", &obj1);
 1325   firstChar = obj1.isInt() ? obj1.getInt() : 0;
 1326   obj1.free();
 1327   if (firstChar < 0 || firstChar > 255) {
 1328     firstChar = 0;
 1329   }
 1330   fontDict->lookup("LastChar", &obj1);
 1331   lastChar = obj1.isInt() ? obj1.getInt() : 255;
 1332   obj1.free();
 1333   if (lastChar < 0 || lastChar > 255) {
 1334     lastChar = 255;
 1335   }
 1336   mul = (type == fontType3) ? fontMat[0] : 0.001;
 1337   fontDict->lookup("Widths", &obj1);
 1338   if (obj1.isArray()) {
 1339     flags |= fontFixedWidth;
 1340     if (obj1.arrayGetLength() < lastChar - firstChar + 1) {
 1341       lastChar = firstChar + obj1.arrayGetLength() - 1;
 1342     }
 1343     for (code = firstChar; code <= lastChar; ++code) {
 1344       obj1.arrayGet(code - firstChar, &obj2);
 1345       if (obj2.isNum()) {
 1346     widths[code] = obj2.getNum() * mul;
 1347     if (fabs(widths[code] - widths[firstChar]) > 0.00001) {
 1348       flags &= ~fontFixedWidth;
 1349     }
 1350       }
 1351       obj2.free();
 1352     }
 1353 
 1354   // use widths from built-in font
 1355   } else if (builtinFont) {
 1356     // this is a kludge for broken PDF files that encode char 32
 1357     // as .notdef
 1358     if (builtinFont->widths->getWidth("space", &w)) {
 1359       widths[32] = 0.001 * w;
 1360     }
 1361     for (code = 0; code < 256; ++code) {
 1362       if (enc[code] && builtinFont->widths->getWidth(enc[code], &w)) {
 1363     widths[code] = 0.001 * w;
 1364       }
 1365     }
 1366 
 1367   // couldn't find widths -- use defaults 
 1368   } else {
 1369     // this is technically an error -- the Widths entry is required
 1370     // for all but the Base-14 fonts -- but certain PDF generators
 1371     // apparently don't include widths for Arial and TimesNewRoman
 1372     if (isFixedWidth()) {
 1373       i = 0;
 1374     } else if (isSerif()) {
 1375       i = 8;
 1376     } else {
 1377       i = 4;
 1378     }
 1379     if (isBold()) {
 1380       i += 2;
 1381     }
 1382     if (isItalic()) {
 1383       i += 1;
 1384     }
 1385     builtinFont = builtinFontSubst[i];
 1386     // this is a kludge for broken PDF files that encode char 32
 1387     // as .notdef
 1388     if (builtinFont->widths->getWidth("space", &w)) {
 1389       widths[32] = 0.001 * w;
 1390     }
 1391     for (code = 0; code < 256; ++code) {
 1392       if (enc[code] && builtinFont->widths->getWidth(enc[code], &w)) {
 1393     widths[code] = 0.001 * w;
 1394       }
 1395     }
 1396   }
 1397   obj1.free();
 1398 
 1399   ok = gTrue;
 1400 }
 1401 
 1402 Gfx8BitFont::~Gfx8BitFont() {
 1403   int i;
 1404 
 1405   for (i = 0; i < 256; ++i) {
 1406     if (encFree[i] && enc[i]) {
 1407       gfree(enc[i]);
 1408     }
 1409   }
 1410   ctu->decRefCnt();
 1411   if (charProcs.isDict()) {
 1412     charProcs.free();
 1413   }
 1414   if (resources.isDict()) {
 1415     resources.free();
 1416   }
 1417 }
 1418 
 1419 int Gfx8BitFont::getNextChar(char *s, int len, CharCode *code,
 1420                  Unicode *u, int uSize, int *uLen,
 1421                  double *dx, double *dy, double *ox, double *oy) {
 1422   CharCode c;
 1423 
 1424   *code = c = (CharCode)(*s & 0xff);
 1425   *uLen = ctu->mapToUnicode(c, u, uSize);
 1426   *dx = widths[c];
 1427   *dy = *ox = *oy = 0;
 1428   return 1;
 1429 }
 1430 
 1431 CharCodeToUnicode *Gfx8BitFont::getToUnicode() {
 1432   ctu->incRefCnt();
 1433   return ctu;
 1434 }
 1435 
 1436 int *Gfx8BitFont::getCodeToGIDMap(FoFiTrueType *ff) {
 1437   int *map;
 1438   int cmapPlatform, cmapEncoding;
 1439   int unicodeCmap, macRomanCmap, msSymbolCmap, cmap;
 1440   GBool nonsymbolic, useMacRoman, useUnicode;
 1441   char *charName;
 1442   Unicode u;
 1443   int code, i, n;
 1444 
 1445   map = (int *)gmallocn(256, sizeof(int));
 1446   for (i = 0; i < 256; ++i) {
 1447     map[i] = 0;
 1448   }
 1449 
 1450   // This is based on the cmap/encoding selection algorithm in the PDF
 1451   // 2.0 spec, but with some differences to match up with Adobe's
 1452   // behavior.
 1453   unicodeCmap = macRomanCmap = msSymbolCmap = -1;
 1454   for (i = 0; i < ff->getNumCmaps(); ++i) {
 1455     cmapPlatform = ff->getCmapPlatform(i);
 1456     cmapEncoding = ff->getCmapEncoding(i);
 1457     if ((cmapPlatform == 3 && cmapEncoding == 1) ||
 1458     (cmapPlatform == 0 && cmapEncoding <= 4)) {
 1459       unicodeCmap = i;
 1460     } else if (cmapPlatform == 1 && cmapEncoding == 0) {
 1461       macRomanCmap = i;
 1462     } else if (cmapPlatform == 3 && cmapEncoding == 0) {
 1463       msSymbolCmap = i;
 1464     }
 1465   }
 1466   useMacRoman = gFalse;
 1467   useUnicode = gFalse;
 1468   nonsymbolic = !(flags & fontSymbolic);
 1469   if (usesMacRomanEnc && macRomanCmap >= 0) {
 1470     cmap = macRomanCmap;
 1471     useMacRoman = gTrue;
 1472   } else if (embFontID.num < 0 && hasEncoding && unicodeCmap >= 0) { 
 1473     cmap = unicodeCmap;
 1474     useUnicode = gTrue;
 1475   } else if (nonsymbolic && unicodeCmap >= 0) {
 1476     cmap = unicodeCmap;
 1477     useUnicode = gTrue;
 1478   } else if (nonsymbolic && macRomanCmap >= 0) {
 1479     cmap = macRomanCmap;
 1480     useMacRoman = gTrue;
 1481   } else if (msSymbolCmap >= 0) {
 1482     cmap = msSymbolCmap;
 1483   } else if (unicodeCmap >= 0) {
 1484     cmap = unicodeCmap;
 1485   } else if (macRomanCmap >= 0) {
 1486     cmap = macRomanCmap;
 1487   } else {
 1488     cmap = 0;
 1489   }
 1490 
 1491   // reverse map the char names through MacRomanEncoding, then map the
 1492   // char codes through the cmap; fall back on Unicode if that doesn't
 1493   // work
 1494   if (useMacRoman) {
 1495     for (i = 0; i < 256; ++i) {
 1496       if ((charName = enc[i])) {
 1497     if ((code = globalParams->getMacRomanCharCode(charName))) {
 1498       map[i] = ff->mapCodeToGID(cmap, code);
 1499     } else if (unicodeCmap >= 0 &&
 1500            (u = globalParams->mapNameToUnicode(charName))) {
 1501       map[i] = ff->mapCodeToGID(unicodeCmap, u);
 1502     }
 1503       } else if (unicodeCmap >= 0 &&
 1504          (n = ctu->mapToUnicode((CharCode)i, &u, 1))) {
 1505     map[i] = ff->mapCodeToGID(cmap, u);
 1506       } else {
 1507     map[i] = -1;
 1508       }
 1509     }
 1510 
 1511   // map Unicode through the cmap
 1512   } else if (useUnicode) {
 1513     for (i = 0; i < 256; ++i) {
 1514       if (((charName = enc[i]) &&
 1515        (u = globalParams->mapNameToUnicode(charName))) ||
 1516       (n = ctu->mapToUnicode((CharCode)i, &u, 1))) {
 1517     map[i] = ff->mapCodeToGID(cmap, u);
 1518       } else {
 1519     map[i] = -1;
 1520       }
 1521     }
 1522 
 1523   // map the char codes through the cmap, possibly with an offset of
 1524   // 0xf000
 1525   } else {
 1526     for (i = 0; i < 256; ++i) {
 1527       if (!(map[i] = ff->mapCodeToGID(cmap, i))) {
 1528     map[i] = ff->mapCodeToGID(cmap, 0xf000 + i);
 1529       }
 1530     }
 1531   }
 1532 
 1533   // try the TrueType 'post' table to handle any unmapped characters
 1534   for (i = 0; i < 256; ++i) {
 1535     if (map[i] <= 0 && (charName = enc[i])) {
 1536       map[i] = ff->mapNameToGID(charName);
 1537     }
 1538   }
 1539 
 1540   return map;
 1541 }
 1542 
 1543 int *Gfx8BitFont::getCodeToGIDMap(FoFiType1C *ff) {
 1544   int *map;
 1545   GHash *nameToGID;
 1546   int i, gid;
 1547 
 1548   map = (int *)gmallocn(256, sizeof(int));
 1549   for (i = 0; i < 256; ++i) {
 1550     map[i] = 0;
 1551   }
 1552 
 1553   nameToGID = ff->getNameToGIDMap();
 1554   for (i = 0; i < 256; ++i) {
 1555     if (!enc[i]) {
 1556       continue;
 1557     }
 1558     gid = nameToGID->lookupInt(enc[i]);
 1559     if (gid < 0 || gid >= 65536) {
 1560       continue;
 1561     }
 1562     map[i] = gid;
 1563   }
 1564 
 1565   delete nameToGID;
 1566 
 1567   return map;
 1568 }
 1569 
 1570 Dict *Gfx8BitFont::getCharProcs() {
 1571   return charProcs.isDict() ? charProcs.getDict() : (Dict *)NULL;
 1572 }
 1573 
 1574 Object *Gfx8BitFont::getCharProc(int code, Object *proc) {
 1575   if (enc[code] && charProcs.isDict()) {
 1576     charProcs.dictLookup(enc[code], proc);
 1577   } else {
 1578     proc->initNull();
 1579   }
 1580   return proc;
 1581 }
 1582 
 1583 Object *Gfx8BitFont::getCharProcNF(int code, Object *proc) {
 1584   if (enc[code] && charProcs.isDict()) {
 1585     charProcs.dictLookupNF(enc[code], proc);
 1586   } else {
 1587     proc->initNull();
 1588   }
 1589   return proc;
 1590 }
 1591 
 1592 Dict *Gfx8BitFont::getResources() {
 1593   return resources.isDict() ? resources.getDict() : (Dict *)NULL;
 1594 }
 1595 
 1596 GBool Gfx8BitFont::problematicForUnicode() {
 1597   GString *nameLC;
 1598   GBool symbolic;
 1599 
 1600   // potential inputs:
 1601   // - font is embedded (GfxFont.embFontID.num >= 0)
 1602   // - font name (GfxFont.name)
 1603   // - font type (GfxFont.type)
 1604   // - Base-14 font (Gfx8BitFont.base14 != NULL)
 1605   // - symbolic (GfxFont.flags & fontSymbolic)
 1606   // - has Encoding array (Gfx8BitFont.hasEncoding)
 1607   // - extracted base encoding from embedded font file
 1608   //   (Gfx8BitFont.baseEncFromFontFile)
 1609   // - has a ToUnicode map (GfxFont.hasToUnicode)
 1610   // - used the numeric glyph name heuristic
 1611   //   (Gfx8BitFont.usedNumericHeuristic)
 1612 
 1613   if (name) {
 1614     nameLC = name->copy();
 1615     nameLC->lowerCase();
 1616     symbolic = strstr(nameLC->getCString(), "dingbat") ||
 1617                strstr(nameLC->getCString(), "wingding") ||
 1618                strstr(nameLC->getCString(), "commpi");
 1619     delete nameLC;
 1620     if (symbolic) {
 1621       return gFalse;
 1622     }
 1623   }
 1624 
 1625   if (embFontID.num >= 0) {
 1626     switch (type) {
 1627     case fontType1:
 1628     case fontType1C:
 1629     case fontType1COT:
 1630       return !hasToUnicode && (!hasEncoding || usedNumericHeuristic);
 1631 
 1632     case fontType3:
 1633       return !hasToUnicode && !hasEncoding;
 1634 
 1635     case fontTrueType:
 1636     case fontTrueTypeOT:
 1637       return !hasToUnicode && !hasEncoding;
 1638 
 1639     default:
 1640       return !hasToUnicode;
 1641     }
 1642 
 1643   } else {
 1644     // NB: type will be fontTypeUnknown if the PDF specifies an
 1645     // invalid font type -- which is ok, if we have a ToUnicode map or
 1646     // an encoding
 1647     return !hasToUnicode && !hasEncoding;
 1648   }
 1649 }
 1650 
 1651 //------------------------------------------------------------------------
 1652 // GfxCIDFont
 1653 //------------------------------------------------------------------------
 1654 
 1655 GfxCIDFont::GfxCIDFont(XRef *xref, const char *tagA, Ref idA, GString *nameA,
 1656                GfxFontType typeA, Ref embFontIDA, Dict *fontDict):
 1657   GfxFont(tagA, idA, nameA, typeA, embFontIDA)
 1658 {
 1659   Dict *desFontDict;
 1660   Object desFontDictObj;
 1661   Object obj1, obj2, obj3, obj4, obj5, obj6;
 1662   CharCodeToUnicode *utu;
 1663   CharCode c;
 1664   Unicode uBuf[8];
 1665   int c1, c2;
 1666   int excepsSize, i, j, k, n;
 1667 
 1668   missingWidth = 0;
 1669   ascent = 0.95;
 1670   descent = -0.35;
 1671   declaredAscent = ascent;
 1672   fontBBox[0] = fontBBox[1] = fontBBox[2] = fontBBox[3] = 0;
 1673   collection = NULL;
 1674   cMap = NULL;
 1675   ctu = NULL;
 1676   ctuUsesCharCode = gTrue;
 1677   widths.defWidth = 1.0;
 1678   widths.defHeight = -1.0;
 1679   widths.defVY = 0.880;
 1680   widths.exceps = NULL;
 1681   widths.nExceps = 0;
 1682   widths.excepsV = NULL;
 1683   widths.nExcepsV = 0;
 1684   cidToGID = NULL;
 1685   cidToGIDLen = 0;
 1686 
 1687   // get the descendant font
 1688   if (!fontDict->lookup("DescendantFonts", &obj1)->isArray() ||
 1689       obj1.arrayGetLength() == 0) {
 1690     error(errSyntaxError, -1,
 1691       "Missing or empty DescendantFonts entry in Type 0 font");
 1692     obj1.free();
 1693     goto err1;
 1694   }
 1695   if (!obj1.arrayGet(0, &desFontDictObj)->isDict()) {
 1696     error(errSyntaxError, -1, "Bad descendant font in Type 0 font");
 1697     goto err2;
 1698   }
 1699   obj1.free();
 1700   desFontDict = desFontDictObj.getDict();
 1701 
 1702   // get info from font descriptor
 1703   readFontDescriptor(xref, desFontDict);
 1704 
 1705   //----- encoding info -----
 1706 
 1707   // char collection
 1708   if (!desFontDict->lookup("CIDSystemInfo", &obj1)->isDict()) {
 1709     error(errSyntaxError, -1,
 1710       "Missing CIDSystemInfo dictionary in Type 0 descendant font");
 1711     goto err2;
 1712   }
 1713   obj1.dictLookup("Registry", &obj2);
 1714   obj1.dictLookup("Ordering", &obj3);
 1715   if (!obj2.isString() || !obj3.isString()) {
 1716     error(errSyntaxError, -1,
 1717       "Invalid CIDSystemInfo dictionary in Type 0 descendant font");
 1718     goto err3;
 1719   }
 1720   collection = obj2.getString()->copy()->append('-')->append(obj3.getString());
 1721   obj3.free();
 1722   obj2.free();
 1723   obj1.free();
 1724 
 1725   // encoding (i.e., CMap)
 1726   if (fontDict->lookup("Encoding", &obj1)->isNull()) {
 1727     error(errSyntaxError, -1, "Missing Encoding entry in Type 0 font");
 1728     goto err2;
 1729   }
 1730   if (!(cMap = CMap::parse(NULL, collection, &obj1))) {
 1731     goto err2;
 1732   }
 1733 
 1734   // check for fonts that use the Identity-H encoding (cmap), and the
 1735   // Adobe-Identity character collection
 1736   identityEnc = obj1.isName("Identity-H") &&
 1737                 !collection->cmp("Adobe-Identity");
 1738 
 1739   obj1.free();
 1740 
 1741   // CIDToGIDMap
 1742   // (the PDF 1.7 spec only allows these for TrueType fonts, but
 1743   // Acrobat apparently also allows them for OpenType CFF fonts -- and
 1744   // the PDF 2.0 spec has removed the prohibition)
 1745   hasIdentityCIDToGID = gFalse;
 1746   desFontDict->lookup("CIDToGIDMap", &obj1);
 1747   if (obj1.isStream()) {
 1748     cidToGIDLen = 0;
 1749     i = 64;
 1750     cidToGID = (int *)gmallocn(i, sizeof(int));
 1751     obj1.streamReset();
 1752     while ((c1 = obj1.streamGetChar()) != EOF &&
 1753        (c2 = obj1.streamGetChar()) != EOF) {
 1754       if (cidToGIDLen == i) {
 1755     i *= 2;
 1756     cidToGID = (int *)greallocn(cidToGID, i, sizeof(int));
 1757       }
 1758       cidToGID[cidToGIDLen++] = (c1 << 8) + c2;
 1759     }
 1760     obj1.streamClose();
 1761     identityEnc = gFalse;
 1762   } else if (obj1.isName("Identity")) {
 1763     hasIdentityCIDToGID = gTrue;
 1764   } else if (!obj1.isNull()) {
 1765     error(errSyntaxError, -1, "Invalid CIDToGIDMap entry in CID font");
 1766   }
 1767   obj1.free();
 1768 
 1769   // look for a ToUnicode CMap
 1770   hasKnownCollection = gFalse;
 1771   if (globalParams->getUseTrueTypeUnicodeMapping()) {
 1772     readTrueTypeUnicodeMapping(xref);
 1773   }
 1774   if (!ctu) {
 1775     ctu = readToUnicodeCMap(fontDict, 16, NULL);
 1776   }
 1777   if (!ctu) {
 1778     ctuUsesCharCode = gFalse;
 1779 
 1780     // use an identity mapping for the "Adobe-Identity" and
 1781     // "Adobe-UCS" collections
 1782     if (!collection->cmp("Adobe-Identity") ||
 1783     !collection->cmp("Adobe-UCS")) {
 1784       ctu = CharCodeToUnicode::makeIdentityMapping();
 1785 
 1786     // look for a user-supplied .cidToUnicode file
 1787     } else if ((ctu = globalParams->getCIDToUnicode(collection))) {
 1788       hasKnownCollection = gTrue;
 1789 
 1790     } else {
 1791       error(errSyntaxError, -1,
 1792         "Unknown character collection '{0:t}'", collection);
 1793 
 1794       // fall back to an identity mapping
 1795       ctu = CharCodeToUnicode::makeIdentityMapping();
 1796     }
 1797   }
 1798 
 1799   // look for a Unicode-to-Unicode mapping
 1800   if (name && (utu = globalParams->getUnicodeToUnicode(name))) {
 1801     if (ctu) {
 1802       if (ctu->isIdentity()) {
 1803     ctu->decRefCnt();
 1804     ctu = utu;
 1805       } else {
 1806     for (c = 0; c < ctu->getLength(); ++c) {
 1807       n = ctu->mapToUnicode(c, uBuf, 8);
 1808       if (n >= 1) {
 1809         n = utu->mapToUnicode((CharCode)uBuf[0], uBuf, 8);
 1810         if (n >= 1) {
 1811           ctu->setMapping(c, uBuf, n);
 1812         }
 1813       }
 1814     }
 1815     utu->decRefCnt();
 1816       }
 1817     } else {
 1818       ctu = utu;
 1819     }
 1820   }
 1821 
 1822   //----- character metrics -----
 1823 
 1824   // default char width
 1825   if (desFontDict->lookup("DW", &obj1)->isNum()) {
 1826     widths.defWidth = obj1.getNum() * 0.001;
 1827   }
 1828   obj1.free();
 1829 
 1830   // char width exceptions
 1831   if (desFontDict->lookup("W", &obj1)->isArray()) {
 1832     excepsSize = 0;
 1833     i = 0;
 1834     while (i + 1 < obj1.arrayGetLength()) {
 1835       obj1.arrayGet(i, &obj2);
 1836       obj1.arrayGet(i + 1, &obj3);
 1837       if (obj2.isInt() && obj3.isInt() && i + 2 < obj1.arrayGetLength()) {
 1838     if (obj1.arrayGet(i + 2, &obj4)->isNum()) {
 1839       if (widths.nExceps == excepsSize) {
 1840         excepsSize += 16;
 1841         widths.exceps = (GfxFontCIDWidthExcep *)
 1842           greallocn(widths.exceps,
 1843             excepsSize, sizeof(GfxFontCIDWidthExcep));
 1844       }
 1845       widths.exceps[widths.nExceps].first = obj2.getInt();
 1846       widths.exceps[widths.nExceps].last = obj3.getInt();
 1847       widths.exceps[widths.nExceps].width = obj4.getNum() * 0.001;
 1848       ++widths.nExceps;
 1849     } else {
 1850       error(errSyntaxError, -1, "Bad widths array in Type 0 font");
 1851     }
 1852     obj4.free();
 1853     i += 3;
 1854       } else if (obj2.isInt() && obj3.isArray()) {
 1855     if (widths.nExceps + obj3.arrayGetLength() > excepsSize) {
 1856       excepsSize = (widths.nExceps + obj3.arrayGetLength() + 15) & ~15;
 1857       widths.exceps = (GfxFontCIDWidthExcep *)
 1858         greallocn(widths.exceps,
 1859               excepsSize, sizeof(GfxFontCIDWidthExcep));
 1860     }
 1861     j = obj2.getInt();
 1862     for (k = 0; k < obj3.arrayGetLength(); ++k) {
 1863       if (obj3.arrayGet(k, &obj4)->isNum()) {
 1864         widths.exceps[widths.nExceps].first = j;
 1865         widths.exceps[widths.nExceps].last = j;
 1866         widths.exceps[widths.nExceps].width = obj4.getNum() * 0.001;
 1867         ++j;
 1868         ++widths.nExceps;
 1869       } else {
 1870         error(errSyntaxError, -1, "Bad widths array in Type 0 font");
 1871       }
 1872       obj4.free();
 1873     }
 1874     i += 2;
 1875       } else {
 1876     error(errSyntaxError, -1, "Bad widths array in Type 0 font");
 1877     ++i;
 1878       }
 1879       obj3.free();
 1880       obj2.free();
 1881     }
 1882   }
 1883   obj1.free();
 1884 
 1885   // default metrics for vertical font
 1886   if (desFontDict->lookup("DW2", &obj1)->isArray() &&
 1887       obj1.arrayGetLength() == 2) {
 1888     if (obj1.arrayGet(0, &obj2)->isNum()) {
 1889       widths.defVY = obj2.getNum() * 0.001;
 1890     }
 1891     obj2.free();
 1892     if (obj1.arrayGet(1, &obj2)->isNum()) {
 1893       widths.defHeight = obj2.getNum() * 0.001;
 1894     }
 1895     obj2.free();
 1896   }
 1897   obj1.free();
 1898 
 1899   // char metric exceptions for vertical font
 1900   if (desFontDict->lookup("W2", &obj1)->isArray()) {
 1901     excepsSize = 0;
 1902     i = 0;
 1903     while (i + 1 < obj1.arrayGetLength()) {
 1904       obj1.arrayGet(i, &obj2);
 1905       obj1.arrayGet(i+ 1, &obj3);
 1906       if (obj2.isInt() && obj3.isInt() && i + 4 < obj1.arrayGetLength()) {
 1907     if (obj1.arrayGet(i + 2, &obj4)->isNum() &&
 1908         obj1.arrayGet(i + 3, &obj5)->isNum() &&
 1909         obj1.arrayGet(i + 4, &obj6)->isNum()) {
 1910       if (widths.nExcepsV == excepsSize) {
 1911         excepsSize += 16;
 1912         widths.excepsV = (GfxFontCIDWidthExcepV *)
 1913           greallocn(widths.excepsV,
 1914             excepsSize, sizeof(GfxFontCIDWidthExcepV));
 1915       }
 1916       widths.excepsV[widths.nExcepsV].first = obj2.getInt();
 1917       widths.excepsV[widths.nExcepsV].last = obj3.getInt();
 1918       widths.excepsV[widths.nExcepsV].height = obj4.getNum() * 0.001;
 1919       widths.excepsV[widths.nExcepsV].vx = obj5.getNum() * 0.001;
 1920       widths.excepsV[widths.nExcepsV].vy = obj6.getNum() * 0.001;
 1921       ++widths.nExcepsV;
 1922     } else {
 1923       error(errSyntaxError, -1, "Bad widths (W2) array in Type 0 font");
 1924     }
 1925     obj6.free();
 1926     obj5.free();
 1927     obj4.free();
 1928     i += 5;
 1929       } else if (obj2.isInt() && obj3.isArray()) {
 1930     if (widths.nExcepsV + obj3.arrayGetLength() / 3 > excepsSize) {
 1931       excepsSize =
 1932         (widths.nExcepsV + obj3.arrayGetLength() / 3 + 15) & ~15;
 1933       widths.excepsV = (GfxFontCIDWidthExcepV *)
 1934         greallocn(widths.excepsV,
 1935               excepsSize, sizeof(GfxFontCIDWidthExcepV));
 1936     }
 1937     j = obj2.getInt();
 1938     for (k = 0; k + 2 < obj3.arrayGetLength(); k += 3) {
 1939       if (obj3.arrayGet(k, &obj4)->isNum() &&
 1940           obj3.arrayGet(k+1, &obj5)->isNum() &&
 1941           obj3.arrayGet(k+2, &obj6)->isNum()) {
 1942         widths.excepsV[widths.nExcepsV].first = j;
 1943         widths.excepsV[widths.nExcepsV].last = j;
 1944         widths.excepsV[widths.nExcepsV].height = obj4.getNum() * 0.001;
 1945         widths.excepsV[widths.nExcepsV].vx = obj5.getNum() * 0.001;
 1946         widths.excepsV[widths.nExcepsV].vy = obj6.getNum() * 0.001;
 1947         ++j;
 1948         ++widths.nExcepsV;
 1949       } else {
 1950         error(errSyntaxError, -1, "Bad widths (W2) array in Type 0 font");
 1951       }
 1952       obj6.free();
 1953       obj5.free();
 1954       obj4.free();
 1955     }
 1956     i += 2;
 1957       } else {
 1958     error(errSyntaxError, -1, "Bad widths (W2) array in Type 0 font");
 1959     ++i;
 1960       }
 1961       obj3.free();
 1962       obj2.free();
 1963     }
 1964   }
 1965   obj1.free();
 1966 
 1967   desFontDictObj.free();
 1968   ok = gTrue;
 1969   return;
 1970 
 1971  err3:
 1972   obj3.free();
 1973   obj2.free();
 1974  err2:
 1975   obj1.free();
 1976   desFontDictObj.free();
 1977  err1:
 1978   error(errSyntaxError, -1, "Failed to parse font object for '{0:t}'", name);
 1979 }
 1980 
 1981 GfxCIDFont::~GfxCIDFont() {
 1982   if (collection) {
 1983     delete collection;
 1984   }
 1985   if (cMap) {
 1986     cMap->decRefCnt();
 1987   }
 1988   if (ctu) {
 1989     ctu->decRefCnt();
 1990   }
 1991   gfree(widths.exceps);
 1992   gfree(widths.excepsV);
 1993   if (cidToGID) {
 1994     gfree(cidToGID);
 1995   }
 1996 }
 1997 
 1998 // Construct a code-to-Unicode mapping, based on the TrueType Unicode
 1999 // cmap (if present).  Constructs ctu if succesful; leaves ctu = null
 2000 // otherwise.  Always leaves ctu = null for non-TrueType fonts.
 2001 void GfxCIDFont::readTrueTypeUnicodeMapping(XRef *xref) {
 2002   char *buf;
 2003   FoFiTrueType *ff;
 2004   Unicode *gidToUnicode, *codeToUnicode;
 2005   Unicode u;
 2006   int bufLen, cmapPlatform, cmapEncoding, unicodeCmap;
 2007   int nGlyphs, nMappings, gid, i;
 2008 
 2009   // must be an embedded TrueType font, with an unknown char collection
 2010   if ((type != fontCIDType2 && type == fontCIDType2OT) ||
 2011       embFontID.num < 0 ||
 2012       hasKnownCollection) {
 2013     goto err0;
 2014   }
 2015 
 2016   // read the embedded font and construct a FoFiTrueType
 2017   if (!(buf = readEmbFontFile(xref, &bufLen))) {
 2018     goto err0;
 2019   }
 2020   if (!(ff = FoFiTrueType::make(buf, bufLen, 0))) {
 2021     goto err1;
 2022   }
 2023 
 2024   // find the TrueType Unicode cmap
 2025   unicodeCmap = -1;
 2026   for (i = 0; i < ff->getNumCmaps(); ++i) {
 2027     cmapPlatform = ff->getCmapPlatform(i);
 2028     cmapEncoding = ff->getCmapEncoding(i);
 2029     if ((cmapPlatform == 3 && cmapEncoding == 1) ||
 2030     (cmapPlatform == 0 && cmapEncoding <= 4)) {
 2031       unicodeCmap = i;
 2032       break;
 2033     }
 2034   }
 2035   if (unicodeCmap < 0) {
 2036     goto err2;
 2037   }
 2038 
 2039   // construct reverse GID-to-Unicode map
 2040   nGlyphs = ff->getNumGlyphs();
 2041   gidToUnicode = (Unicode *)gmallocn(nGlyphs, sizeof(Unicode));
 2042   memset(gidToUnicode, 0, nGlyphs * sizeof(Unicode));
 2043   nMappings = 0;
 2044   for (u = 1; u <= 0xffff; ++u) {
 2045     gid = ff->mapCodeToGID(unicodeCmap, (int)u);
 2046     if (gid > 0 && gid < nGlyphs) {
 2047       gidToUnicode[gid] = u;
 2048       ++nMappings;
 2049     }
 2050   }
 2051   // bail out if the Unicode cmap was completely empty
 2052   if (nMappings == 0) {
 2053     goto err3;
 2054   }
 2055 
 2056   // construct code-to-Unicode map
 2057   codeToUnicode = (Unicode *)gmallocn(65536, sizeof(Unicode));
 2058   memset(codeToUnicode, 0, 65536 * sizeof(Unicode));
 2059   for (i = 0; i <= 0xffff; ++i) {
 2060     // we've already checked for an identity encoding, so CID = i
 2061     if (cidToGID && i < cidToGIDLen) {
 2062       gid = cidToGID[i];
 2063     } else {
 2064       gid = i;
 2065     }
 2066     if (gid < nGlyphs && gidToUnicode[gid] > 0) {
 2067       codeToUnicode[i] = gidToUnicode[gid];
 2068     }
 2069   }
 2070   ctu = CharCodeToUnicode::make16BitToUnicode(codeToUnicode);
 2071 
 2072   gfree(codeToUnicode);
 2073  err3:
 2074   gfree(gidToUnicode);
 2075  err2:
 2076   delete ff;
 2077  err1:
 2078   gfree(buf);
 2079  err0:
 2080   return;
 2081 }
 2082 
 2083 int GfxCIDFont::getNextChar(char *s, int len, CharCode *code,
 2084                 Unicode *u, int uSize, int *uLen,
 2085                 double *dx, double *dy, double *ox, double *oy) {
 2086   CID cid;
 2087   CharCode c;
 2088   int n;
 2089 
 2090   if (!cMap) {
 2091     *code = 0;
 2092     *uLen = 0;
 2093     *dx = *dy = 0;
 2094     return 1;
 2095   }
 2096 
 2097   *code = (CharCode)(cid = cMap->getCID(s, len, &c, &n));
 2098   if (ctu) {
 2099     *uLen = ctu->mapToUnicode(ctuUsesCharCode ? c : cid, u, uSize);
 2100   } else {
 2101     *uLen = 0;
 2102   }
 2103   if (!*uLen && uSize >= 1 && globalParams->getMapUnknownCharNames()) {
 2104     u[0] = *code;
 2105     *uLen = 1;
 2106   }
 2107 
 2108   // horizontal
 2109   if (cMap->getWMode() == 0) {
 2110     getHorizontalMetrics(cid, dx);
 2111     *dy = *ox = *oy = 0;
 2112 
 2113   // vertical
 2114   } else {
 2115     getVerticalMetrics(cid, dy, ox, oy);
 2116     *dx = 0;
 2117   }
 2118 
 2119   return n;
 2120 }
 2121 
 2122 // NB: Section 9.7.4.3 in the PDF 2.0 spec says that, in the case of
 2123 // duplicate entries in the metrics, the first entry should be used.
 2124 // This means we need to leave the metrics in the original order and
 2125 // perform a linear search.  (Or use a more complex data structure.)
 2126 void GfxCIDFont::getHorizontalMetrics(CID cid, double *w) {
 2127   int i;
 2128   for (i = 0; i < widths.nExceps; ++i) {
 2129     if (widths.exceps[i].first <= cid && cid <= widths.exceps[i].last) {
 2130       *w = widths.exceps[i].width;
 2131       return;
 2132     }
 2133   }
 2134   *w = widths.defWidth;
 2135 }
 2136 
 2137 // NB: Section 9.7.4.3 in the PDF 2.0 spec says that, in the case of
 2138 // duplicate entries in the metrics, the first entry should be used.
 2139 // This means we need to leave the metrics in the original order and
 2140 // perform a linear search.  (Or use a more complex data structure.)
 2141 void GfxCIDFont::getVerticalMetrics(CID cid, double *h,
 2142                     double *vx, double *vy) {
 2143   int i;
 2144   for (i = 0; i < widths.nExcepsV; ++i) {
 2145     if (widths.excepsV[i].first <= cid && cid <= widths.excepsV[i].last) {
 2146       *h = widths.excepsV[i].height;
 2147       *vx = widths.excepsV[i].vx;
 2148       *vy = widths.excepsV[i].vy;
 2149       return;
 2150     }
 2151   }
 2152   *h = widths.defHeight;
 2153   getHorizontalMetrics(cid, vx);
 2154   *vx /= 2;
 2155   *vy = widths.defVY;
 2156 }
 2157 
 2158 int GfxCIDFont::getWMode() {
 2159   return cMap ? cMap->getWMode() : 0;
 2160 }
 2161 
 2162 CharCodeToUnicode *GfxCIDFont::getToUnicode() {
 2163   if (ctu) {
 2164     ctu->incRefCnt();
 2165   }
 2166   return ctu;
 2167 }
 2168 
 2169 GString *GfxCIDFont::getCollection() {
 2170   return cMap ? cMap->getCollection() : (GString *)NULL;
 2171 }
 2172 
 2173 double GfxCIDFont::getWidth(CID cid) {
 2174   double w;
 2175 
 2176   getHorizontalMetrics(cid, &w);
 2177   return w;
 2178 }
 2179 
 2180 GBool GfxCIDFont::problematicForUnicode() {
 2181   GString *nameLC;
 2182   GBool symbolic;
 2183 
 2184   // potential inputs:
 2185   // - font is embedded (GfxFont.embFontID.num >= 0)
 2186   // - font name (GfxFont.name)
 2187   // - font type (GfxFont.type)
 2188   // - symbolic (GfxFont.flags & fontSymbolic)
 2189   // - has a ToUnicode map (GfxFont.hasToUnicode)
 2190   // - collection is Adobe-Identity or Adobe-UCS
 2191   //   (GfxCIDFont.collection - compare string)
 2192   // - collection is known AdobeCJK (GfxCIDFont.hasKnownCollection)
 2193   // - has non-Identity CIDToGIDMap (GfxCIDFont.cidToGID != NULL)
 2194   // - has Identity CIDToGIDMap (GfxCIDFont.hasIdentityCIDToGID)
 2195 
 2196   if (name) {
 2197     nameLC = name->copy();
 2198     nameLC->lowerCase();
 2199     symbolic = strstr(nameLC->getCString(), "dingbat") ||
 2200                strstr(nameLC->getCString(), "wingding") ||
 2201                strstr(nameLC->getCString(), "commpi");
 2202     delete nameLC;
 2203     if (symbolic) {
 2204       return gFalse;
 2205     }
 2206   }
 2207 
 2208   if (embFontID.num >= 0) {
 2209     switch (type) {
 2210     case fontCIDType0:
 2211     case fontCIDType0C:
 2212     case fontCIDType0COT:
 2213       return !hasToUnicode && !hasKnownCollection;
 2214 
 2215     case fontCIDType2:
 2216     case fontCIDType2OT:
 2217       return !hasToUnicode && !hasKnownCollection;
 2218 
 2219     default:
 2220       return !hasToUnicode;
 2221     }
 2222 
 2223   } else {
 2224     return !hasToUnicode;
 2225   }
 2226 }
 2227 
 2228 //------------------------------------------------------------------------
 2229 // GfxFontDict
 2230 //------------------------------------------------------------------------
 2231 
 2232 GfxFontDict::GfxFontDict(XRef *xref, Ref *fontDictRef, Dict *fontDict) {
 2233   GfxFont *font;
 2234   char *tag;
 2235   Object obj1, obj2;
 2236   Ref r;
 2237   int i;
 2238 
 2239   fonts = new GHash(gTrue);
 2240   uniqueFonts = new GList();
 2241   for (i = 0; i < fontDict->getLength(); ++i) {
 2242     tag = fontDict->getKey(i);
 2243     fontDict->getValNF(i, &obj1);
 2244     obj1.fetch(xref, &obj2);
 2245     if (!obj2.isDict()) {
 2246       error(errSyntaxError, -1, "font resource is not a dictionary");
 2247     } else if (obj1.isRef() && (font = lookupByRef(obj1.getRef()))) {
 2248       fonts->add(new GString(tag), font);
 2249     } else {
 2250       if (obj1.isRef()) {
 2251     r = obj1.getRef();
 2252       } else if (fontDictRef) {
 2253     // legal generation numbers are five digits, so we use a
 2254     // 6-digit number here
 2255     r.gen = 100000 + fontDictRef->num;
 2256     r.num = i;
 2257       } else {
 2258     // no indirect reference for this font, or for the containing
 2259     // font dict, so hash the font and use that
 2260     r.gen = 100000;
 2261     r.num = hashFontObject(&obj2);
 2262       }
 2263       if ((font = GfxFont::makeFont(xref, tag, r, obj2.getDict()))) {
 2264     if (!font->isOk()) {
 2265       delete font;
 2266     } else {
 2267       uniqueFonts->append(font);
 2268       fonts->add(new GString(tag), font);
 2269     }
 2270       }
 2271     }
 2272     obj1.free();
 2273     obj2.free();
 2274   }
 2275 }
 2276 
 2277 GfxFontDict::~GfxFontDict() {
 2278   deleteGList(uniqueFonts, GfxFont);
 2279   delete fonts;
 2280 }
 2281 
 2282 GfxFont *GfxFontDict::lookup(char *tag) {
 2283   return (GfxFont *)fonts->lookup(tag);
 2284 }
 2285 
 2286 GfxFont *GfxFontDict::lookupByRef(Ref ref) {
 2287   GfxFont *font;
 2288   int i;
 2289 
 2290   for (i = 0; i < uniqueFonts->getLength(); ++i) {
 2291     font = (GfxFont *)uniqueFonts->get(i);
 2292     if (font->getID()->num == ref.num &&
 2293     font->getID()->gen == ref.gen) {
 2294       return font;
 2295     }
 2296   }
 2297   return NULL;
 2298 }
 2299 
 2300 int GfxFontDict::getNumFonts() {
 2301   return uniqueFonts->getLength();
 2302 }
 2303 
 2304 GfxFont *GfxFontDict::getFont(int i) {
 2305   return (GfxFont *)uniqueFonts->get(i);
 2306 }
 2307 
 2308 // FNV-1a hash
 2309 class FNVHash {
 2310 public:
 2311 
 2312   FNVHash() {
 2313     h = 2166136261U;
 2314   }
 2315 
 2316   void hash(char c) {
 2317     h ^= c & 0xff;
 2318     h *= 16777619;
 2319   }
 2320 
 2321   void hash(char *p, int n) {
 2322     int i;
 2323     for (i = 0; i < n; ++i) {
 2324       hash(p[i]);
 2325     }
 2326   }
 2327 
 2328   int get31() {
 2329     return (h ^ (h >> 31)) & 0x7fffffff;
 2330   }
 2331 
 2332 private:
 2333 
 2334   Guint h;
 2335 };
 2336 
 2337 int GfxFontDict::hashFontObject(Object *obj) {
 2338   FNVHash h;
 2339 
 2340   hashFontObject1(obj, &h);
 2341   return h.get31();
 2342 }
 2343 
 2344 void GfxFontDict::hashFontObject1(Object *obj, FNVHash *h) {
 2345   Object obj2;
 2346   GString *s;
 2347   char *p;
 2348   double r;
 2349   int n, i;
 2350 
 2351   switch (obj->getType()) {
 2352   case objBool:
 2353     h->hash('b');
 2354     h->hash(obj->getBool() ? 1 : 0);
 2355     break;
 2356   case objInt:
 2357     h->hash('i');
 2358     n = obj->getInt();
 2359     h->hash((char *)&n, sizeof(int));
 2360     break;
 2361   case objReal:
 2362     h->hash('r');
 2363     r = obj->getReal();
 2364     h->hash((char *)&r, sizeof(double));
 2365     break;
 2366   case objString:
 2367     h->hash('s');
 2368     s = obj->getString();
 2369     h->hash(s->getCString(), s->getLength());
 2370     break;
 2371   case objName:
 2372     h->hash('n');
 2373     p = obj->getName();
 2374     h->hash(p, (int)strlen(p));
 2375     break;
 2376   case objNull:
 2377     h->hash('z');
 2378     break;
 2379   case objArray:
 2380     h->hash('a');
 2381     n = obj->arrayGetLength();
 2382     h->hash((char *)&n, sizeof(int));
 2383     for (i = 0; i < n; ++i) {
 2384       obj->arrayGetNF(i, &obj2);
 2385       hashFontObject1(&obj2, h);
 2386       obj2.free();
 2387     }
 2388     break;
 2389   case objDict:
 2390     h->hash('d');
 2391     n = obj->dictGetLength();
 2392     h->hash((char *)&n, sizeof(int));
 2393     for (i = 0; i < n; ++i) {
 2394       p = obj->dictGetKey(i);
 2395       h->hash(p, (int)strlen(p));
 2396       obj->dictGetValNF(i, &obj2);
 2397       hashFontObject1(&obj2, h);
 2398       obj2.free();
 2399     }
 2400     break;
 2401   case objStream:
 2402     // this should never happen - streams must be indirect refs
 2403     break;
 2404   case objRef:
 2405     h->hash('f');
 2406     n = obj->getRefNum();
 2407     h->hash((char *)&n, sizeof(int));
 2408     n = obj->getRefGen();
 2409     h->hash((char *)&n, sizeof(int));
 2410     break;
 2411   default:
 2412     h->hash('u');
 2413     break;
 2414   }
 2415 }