"Fossies" - the Fresh Open Source Software Archive

Member "xpdf-4.04/xpdf/CharCodeToUnicode.cc" (18 Apr 2022, 19175 Bytes) of package /linux/misc/xpdf-4.04.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "CharCodeToUnicode.cc" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 4.03_vs_4.04.

    1 //========================================================================
    2 //
    3 // CharCodeToUnicode.cc
    4 //
    5 // Copyright 2001-2003 Glyph & Cog, LLC
    6 //
    7 //========================================================================
    8 
    9 #include <aconf.h>
   10 
   11 #ifdef USE_GCC_PRAGMAS
   12 #pragma implementation
   13 #endif
   14 
   15 #include <stdio.h>
   16 #include <string.h>
   17 #include "gmem.h"
   18 #include "gmempp.h"
   19 #include "gfile.h"
   20 #include "GString.h"
   21 #include "Error.h"
   22 #include "GlobalParams.h"
   23 #include "PSTokenizer.h"
   24 #include "CharCodeToUnicode.h"
   25 
   26 //------------------------------------------------------------------------
   27 
   28 #define maxUnicodeString 8
   29 
   30 struct CharCodeToUnicodeString {
   31   CharCode c;
   32   Unicode u[maxUnicodeString];
   33   int len;
   34 };
   35 
   36 //------------------------------------------------------------------------
   37 
   38 struct GStringIndex {
   39   GString *s;
   40   int i;
   41 };
   42 
   43 static int getCharFromGString(void *data) {
   44   GStringIndex *idx = (GStringIndex *)data;
   45   if (idx->i >= idx->s->getLength()) {
   46     return EOF;
   47   }
   48   return idx->s->getChar(idx->i++) & 0xff;
   49 }
   50 
   51 static int getCharFromFile(void *data) {
   52   return fgetc((FILE *)data);
   53 }
   54 
   55 //------------------------------------------------------------------------
   56 
   57 static int hexCharVals[256] = {
   58   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x
   59   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 1x
   60   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 2x
   61    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 3x
   62   -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 4x
   63   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 5x
   64   -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 6x
   65   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 7x
   66   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 8x
   67   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 9x
   68   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ax
   69   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Bx
   70   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Cx
   71   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Dx
   72   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ex
   73   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1  // Fx
   74 };
   75 
   76 // Parse a <len>-byte hex string <s> into *<val>.  Returns false on
   77 // error.
   78 static GBool parseHex(char *s, int len, Guint *val) {
   79   int i, x;
   80 
   81   *val = 0;
   82   for (i = 0; i < len; ++i) {
   83     x = hexCharVals[s[i] & 0xff];
   84     if (x < 0) {
   85       return gFalse;
   86     }
   87     *val = (*val << 4) + x;
   88   }
   89   return gTrue;
   90 }
   91 
   92 //------------------------------------------------------------------------
   93 
   94 CharCodeToUnicode *CharCodeToUnicode::makeIdentityMapping() {
   95   return new CharCodeToUnicode();
   96 }
   97 
   98 CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *fileName,
   99                             GString *collection) {
  100   FILE *f;
  101   Unicode *mapA;
  102   CharCode size, mapLenA;
  103   char buf[64];
  104   Unicode u;
  105   CharCodeToUnicode *ctu;
  106 
  107   if (!(f = openFile(fileName->getCString(), "r"))) {
  108     error(errSyntaxError, -1, "Couldn't open cidToUnicode file '{0:t}'",
  109       fileName);
  110     return NULL;
  111   }
  112 
  113   size = 32768;
  114   mapA = (Unicode *)gmallocn(size, sizeof(Unicode));
  115   mapLenA = 0;
  116 
  117   while (getLine(buf, sizeof(buf), f)) {
  118     if (mapLenA == size) {
  119       size *= 2;
  120       mapA = (Unicode *)greallocn(mapA, size, sizeof(Unicode));
  121     }
  122     if (sscanf(buf, "%x", &u) == 1) {
  123       mapA[mapLenA] = u;
  124     } else {
  125       error(errSyntaxWarning, -1,
  126         "Bad line ({0:d}) in cidToUnicode file '{1:t}'",
  127         (int)(mapLenA + 1), fileName);
  128       mapA[mapLenA] = 0;
  129     }
  130     ++mapLenA;
  131   }
  132   fclose(f);
  133 
  134   ctu = new CharCodeToUnicode(collection->copy(), mapA, mapLenA, gTrue,
  135                   NULL, 0, 0);
  136   gfree(mapA);
  137   return ctu;
  138 }
  139 
  140 CharCodeToUnicode *CharCodeToUnicode::parseUnicodeToUnicode(
  141                             GString *fileName) {
  142   FILE *f;
  143   Unicode *mapA;
  144   CharCodeToUnicodeString *sMapA;
  145   CharCode size, oldSize, len, sMapSizeA, sMapLenA;
  146   char buf[256];
  147   char *tok;
  148   Unicode u0;
  149   Unicode uBuf[maxUnicodeString];
  150   CharCodeToUnicode *ctu;
  151   int line, n, i;
  152 
  153   if (!(f = openFile(fileName->getCString(), "r"))) {
  154     error(errSyntaxError, -1, "Couldn't open unicodeToUnicode file '{0:t}'",
  155       fileName);
  156     return NULL;
  157   }
  158 
  159   size = 4096;
  160   mapA = (Unicode *)gmallocn(size, sizeof(Unicode));
  161   memset(mapA, 0, size * sizeof(Unicode));
  162   len = 0;
  163   sMapA = NULL;
  164   sMapSizeA = sMapLenA = 0;
  165 
  166   line = 0;
  167   while (getLine(buf, sizeof(buf), f)) {
  168     ++line;
  169     if (!(tok = strtok(buf, " \t\r\n")) ||
  170     !parseHex(tok, (int)strlen(tok), &u0)) {
  171       error(errSyntaxWarning, -1,
  172         "Bad line ({0:d}) in unicodeToUnicode file '{1:t}'",
  173         line, fileName);
  174       continue;
  175     }
  176     n = 0;
  177     while (n < maxUnicodeString) {
  178       if (!(tok = strtok(NULL, " \t\r\n"))) {
  179     break;
  180       }
  181       if (!parseHex(tok, (int)strlen(tok), &uBuf[n])) {
  182     error(errSyntaxWarning, -1,
  183           "Bad line ({0:d}) in unicodeToUnicode file '{1:t}'",
  184           line, fileName);
  185     break;
  186       }
  187       ++n;
  188     }
  189     if (n < 1) {
  190       error(errSyntaxWarning, -1,
  191         "Bad line ({0:d}) in unicodeToUnicode file '{1:t}'",
  192         line, fileName);
  193       continue;
  194     }
  195     if (u0 >= size) {
  196       oldSize = size;
  197       while (u0 >= size) {
  198     size *= 2;
  199       }
  200       mapA = (Unicode *)greallocn(mapA, size, sizeof(Unicode));
  201       memset(mapA + oldSize, 0, (size - oldSize) * sizeof(Unicode));
  202     }
  203     if (n == 1) {
  204       mapA[u0] = uBuf[0];
  205     } else {
  206       mapA[u0] = 0;
  207       if (sMapLenA == sMapSizeA) {
  208     sMapSizeA += 16;
  209     sMapA = (CharCodeToUnicodeString *)
  210               greallocn(sMapA, sMapSizeA, sizeof(CharCodeToUnicodeString));
  211       }
  212       sMapA[sMapLenA].c = u0;
  213       for (i = 0; i < n; ++i) {
  214     sMapA[sMapLenA].u[i] = uBuf[i];
  215       }
  216       sMapA[sMapLenA].len = n;
  217       ++sMapLenA;
  218     }
  219     if (u0 >= len) {
  220       len = u0 + 1;
  221     }
  222   }
  223   fclose(f);
  224 
  225   ctu = new CharCodeToUnicode(fileName->copy(), mapA, len, gTrue,
  226                   sMapA, sMapLenA, sMapSizeA);
  227   gfree(mapA);
  228   return ctu;
  229 }
  230 
  231 CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) {
  232   return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0, 0);
  233 }
  234 
  235 CharCodeToUnicode *CharCodeToUnicode::make16BitToUnicode(Unicode *toUnicode) {
  236   return new CharCodeToUnicode(NULL, toUnicode, 65536, gTrue, NULL, 0, 0);
  237 }
  238 
  239 CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) {
  240   CharCodeToUnicode *ctu;
  241   GStringIndex idx;
  242 
  243   ctu = new CharCodeToUnicode(NULL);
  244   idx.s = buf;
  245   idx.i = 0;
  246   if (!ctu->parseCMap1(&getCharFromGString, &idx, nBits)) {
  247     delete ctu;
  248     return NULL;
  249   }
  250   return ctu;
  251 }
  252 
  253 void CharCodeToUnicode::mergeCMap(GString *buf, int nBits) {
  254   GStringIndex idx;
  255 
  256   idx.s = buf;
  257   idx.i = 0;
  258   parseCMap1(&getCharFromGString, &idx, nBits);
  259 }
  260 
  261 GBool CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data,
  262                     int nBits) {
  263   PSTokenizer *pst;
  264   char tok1[256], tok2[256], tok3[256];
  265   int n1, n2, n3;
  266   CharCode i;
  267   CharCode maxCode, code1, code2;
  268   GString *name;
  269   FILE *f;
  270   GBool ok;
  271 
  272   ok = gFalse;
  273   maxCode = (nBits == 8) ? 0xff : (nBits == 16) ? 0xffff : 0xffffffff;
  274   pst = new PSTokenizer(getCharFunc, data);
  275   pst->getToken(tok1, sizeof(tok1), &n1);
  276   while (pst->getToken(tok2, sizeof(tok2), &n2)) {
  277     if (!strcmp(tok2, "usecmap")) {
  278       if (tok1[0] == '/') {
  279     name = new GString(tok1 + 1);
  280     if ((f = globalParams->findToUnicodeFile(name))) {
  281       if (parseCMap1(&getCharFromFile, f, nBits)) {
  282         ok = gTrue;
  283       }
  284       fclose(f);
  285     } else {
  286       error(errSyntaxError, -1,
  287         "Couldn't find ToUnicode CMap file for '{1:t}'",
  288         name);
  289     }
  290     delete name;
  291       }
  292       pst->getToken(tok1, sizeof(tok1), &n1);
  293     } else if (!strcmp(tok2, "beginbfchar")) {
  294       while (pst->getToken(tok1, sizeof(tok1), &n1)) {
  295     if (!strcmp(tok1, "endbfchar")) {
  296       break;
  297     }
  298     if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
  299         !strcmp(tok2, "endbfchar")) {
  300       error(errSyntaxWarning, -1,
  301         "Illegal entry in bfchar block in ToUnicode CMap");
  302       break;
  303     }
  304     if (!(tok1[0] == '<' && tok1[n1 - 1] == '>' &&
  305           tok2[0] == '<' && tok2[n2 - 1] == '>')) {
  306       error(errSyntaxWarning, -1,
  307         "Illegal entry in bfchar block in ToUnicode CMap");
  308       continue;
  309     }
  310     tok1[n1 - 1] = tok2[n2 - 1] = '\0';
  311     if (!parseHex(tok1 + 1, n1 - 2, &code1)) {
  312       error(errSyntaxWarning, -1,
  313         "Illegal entry in bfchar block in ToUnicode CMap");
  314       continue;
  315     }
  316     if (code1 > maxCode) {
  317       error(errSyntaxWarning, -1,
  318         "Invalid entry in bfchar block in ToUnicode CMap");
  319     }
  320     addMapping(code1, tok2 + 1, n2 - 2, 0);
  321     ok = gTrue;
  322       }
  323       pst->getToken(tok1, sizeof(tok1), &n1);
  324     } else if (!strcmp(tok2, "beginbfrange")) {
  325       while (pst->getToken(tok1, sizeof(tok1), &n1)) {
  326     if (!strcmp(tok1, "endbfrange")) {
  327       break;
  328     }
  329     if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
  330         !strcmp(tok2, "endbfrange") ||
  331         !pst->getToken(tok3, sizeof(tok3), &n3) ||
  332         !strcmp(tok3, "endbfrange")) {
  333       error(errSyntaxWarning, -1,
  334         "Illegal entry in bfrange block in ToUnicode CMap");
  335       break;
  336     }
  337     if (!(tok1[0] == '<' && tok1[n1 - 1] == '>' &&
  338           tok2[0] == '<' && tok2[n2 - 1] == '>')) {
  339       error(errSyntaxWarning,
  340         -1, "Illegal entry in bfrange block in ToUnicode CMap");
  341       continue;
  342     }
  343     tok1[n1 - 1] = tok2[n2 - 1] = '\0';
  344     if (!parseHex(tok1 + 1, n1 - 2, &code1) ||
  345         !parseHex(tok2 + 1, n2 - 2, &code2)) {
  346       error(errSyntaxWarning, -1,
  347         "Illegal entry in bfrange block in ToUnicode CMap");
  348       continue;
  349     }
  350     if (code1 > maxCode || code2 > maxCode) {
  351       error(errSyntaxWarning, -1,
  352         "Invalid entry in bfrange block in ToUnicode CMap");
  353       if (code2 > maxCode) {
  354         code2 = maxCode;
  355       }
  356     }
  357     if (!strcmp(tok3, "[")) {
  358       i = 0;
  359       while (pst->getToken(tok1, sizeof(tok1), &n1)) {
  360         if (!strcmp(tok1, "]")) {
  361           break;
  362         }
  363         if (tok1[0] == '<' && tok1[n1 - 1] == '>') {
  364           if (code1 + i <= code2) {
  365         tok1[n1 - 1] = '\0';
  366         addMapping(code1 + i, tok1 + 1, n1 - 2, 0);
  367         ok = gTrue;
  368           }
  369         } else {
  370           error(errSyntaxWarning, -1,
  371             "Illegal entry in bfrange block in ToUnicode CMap");
  372         }
  373         ++i;
  374       }
  375     } else if (tok3[0] == '<' && tok3[n3 - 1] == '>') {
  376       tok3[n3 - 1] = '\0';
  377       for (i = 0; code1 <= code2; ++code1, ++i) {
  378         addMapping(code1, tok3 + 1, n3 - 2, i);
  379         ok = gTrue;
  380       }
  381     } else {
  382       error(errSyntaxWarning, -1,
  383         "Illegal entry in bfrange block in ToUnicode CMap");
  384     }
  385       }
  386       pst->getToken(tok1, sizeof(tok1), &n1);
  387     } else if (!strcmp(tok2, "begincidchar")) {
  388       // the begincidchar operator is not allowed in ToUnicode CMaps,
  389       // but some buggy PDF generators incorrectly use
  390       // code-to-CID-type CMaps here
  391       error(errSyntaxWarning, -1,
  392         "Invalid 'begincidchar' operator in ToUnicode CMap");
  393       while (pst->getToken(tok1, sizeof(tok1), &n1)) {
  394     if (!strcmp(tok1, "endcidchar")) {
  395       break;
  396     }
  397     if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
  398         !strcmp(tok2, "endcidchar")) {
  399       error(errSyntaxWarning, -1,
  400         "Illegal entry in cidchar block in ToUnicode CMap");
  401       break;
  402     }
  403     if (!(tok1[0] == '<' && tok1[n1 - 1] == '>')) {
  404       error(errSyntaxWarning, -1,
  405         "Illegal entry in cidchar block in ToUnicode CMap");
  406       continue;
  407     }
  408     tok1[n1 - 1] = '\0';
  409     if (!parseHex(tok1 + 1, n1 - 2, &code1)) {
  410       error(errSyntaxWarning, -1,
  411         "Illegal entry in cidchar block in ToUnicode CMap");
  412       continue;
  413     }
  414     if (code1 > maxCode) {
  415       error(errSyntaxWarning, -1,
  416         "Invalid entry in cidchar block in ToUnicode CMap");
  417     }
  418     addMappingInt(code1, atoi(tok2));
  419     ok = gTrue;
  420       }
  421       pst->getToken(tok1, sizeof(tok1), &n1);
  422     } else if (!strcmp(tok2, "begincidrange")) {
  423       // the begincidrange operator is not allowed in ToUnicode CMaps,
  424       // but some buggy PDF generators incorrectly use
  425       // code-to-CID-type CMaps here
  426       error(errSyntaxWarning, -1,
  427         "Invalid 'begincidrange' operator in ToUnicode CMap");
  428       while (pst->getToken(tok1, sizeof(tok1), &n1)) {
  429     if (!strcmp(tok1, "endcidrange")) {
  430       break;
  431     }
  432     if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
  433         !strcmp(tok2, "endcidrange") ||
  434         !pst->getToken(tok3, sizeof(tok3), &n3) ||
  435         !strcmp(tok3, "endcidrange")) {
  436       error(errSyntaxWarning, -1,
  437         "Illegal entry in cidrange block in ToUnicode CMap");
  438       break;
  439     }
  440     if (!(tok1[0] == '<' && tok1[n1 - 1] == '>' &&
  441           tok2[0] == '<' && tok2[n2 - 1] == '>')) {
  442       error(errSyntaxWarning,
  443         -1, "Illegal entry in cidrange block in ToUnicode CMap");
  444       continue;
  445     }
  446     tok1[n1 - 1] = tok2[n2 - 1] = '\0';
  447     if (!parseHex(tok1 + 1, n1 - 2, &code1) ||
  448         !parseHex(tok2 + 1, n2 - 2, &code2)) {
  449       error(errSyntaxWarning, -1,
  450         "Illegal entry in cidrange block in ToUnicode CMap");
  451       continue;
  452     }
  453     if (code1 > maxCode || code2 > maxCode) {
  454       error(errSyntaxWarning, -1,
  455         "Invalid entry in cidrange block in ToUnicode CMap");
  456       if (code2 > maxCode) {
  457         code2 = maxCode;
  458       }
  459     }
  460     for (i = atoi(tok3); code1 <= code2; ++code1, ++i) {
  461       addMappingInt(code1, i);
  462       ok = gTrue;
  463     }
  464       }
  465       pst->getToken(tok1, sizeof(tok1), &n1);
  466     } else {
  467       strcpy(tok1, tok2);
  468     }
  469   }
  470   delete pst;
  471   return ok;
  472 }
  473 
  474 void CharCodeToUnicode::addMapping(CharCode code, char *uStr, int n,
  475                    int offset) {
  476   CharCode oldLen, i;
  477   Unicode u[maxUnicodeString];
  478   int uLen, j;
  479 
  480   if (code > 0xffffff) {
  481     // This is an arbitrary limit to avoid integer overflow issues.
  482     // (I've seen CMaps with mappings for <ffffffff>.)
  483     return;
  484   }
  485   if ((uLen = parseUTF16String(uStr, n, u)) == 0) {
  486     return;
  487   }
  488   if (code >= mapLen) {
  489     oldLen = mapLen;
  490     mapLen = mapLen ? 2 * mapLen : 256;
  491     if (code >= mapLen) {
  492       mapLen = (code + 256) & ~255;
  493     }
  494     map = (Unicode *)greallocn(map, mapLen, sizeof(Unicode));
  495     for (i = oldLen; i < mapLen; ++i) {
  496       map[i] = 0;
  497     }
  498   }
  499   if (uLen == 1) {
  500     map[code] = u[0] + offset;
  501   } else {
  502     if (sMapLen >= sMapSize) {
  503       sMapSize = sMapSize + 16;
  504       sMap = (CharCodeToUnicodeString *)
  505            greallocn(sMap, sMapSize, sizeof(CharCodeToUnicodeString));
  506     }
  507     map[code] = 0;
  508     sMap[sMapLen].c = code;
  509     for (j = 0; j < uLen; ++j) {
  510       sMap[sMapLen].u[j] = u[j];
  511     }
  512     sMap[sMapLen].u[uLen - 1] += offset;
  513     sMap[sMapLen].len = uLen;
  514     ++sMapLen;
  515   }
  516 }
  517 
  518 // Convert a UTF-16BE hex string into a sequence of up to
  519 // maxUnicodeString Unicode chars.
  520 int CharCodeToUnicode::parseUTF16String(char *uStr, int n, Unicode *uOut) {
  521   int i = 0;
  522   int uLen = 0;
  523   while (i < n) {
  524     Unicode u;
  525     int j = n;
  526     if (j - i > 4) {
  527       j = i + 4;
  528     }
  529     if (!parseHex(uStr + i, j - i, &u)) {
  530       error(errSyntaxWarning, -1, "Illegal entry in ToUnicode CMap");
  531       return 0;
  532     }
  533     // look for a UTF-16 pair
  534     if (uLen > 0 && uOut[uLen-1] >= 0xd800 && uOut[uLen-1] <= 0xdbff &&
  535     u >= 0xdc00 && u <= 0xdfff) {
  536       uOut[uLen-1] = 0x10000 + ((uOut[uLen-1] & 0x03ff) << 10) + (u & 0x03ff);
  537     } else {
  538       if (uLen < maxUnicodeString) {
  539     uOut[uLen++] = u;
  540       }
  541     }
  542     i = j;
  543   }
  544   return uLen;
  545 }
  546 
  547 void CharCodeToUnicode::addMappingInt(CharCode code, Unicode u) {
  548   CharCode oldLen, i;
  549 
  550   if (code > 0xffffff) {
  551     // This is an arbitrary limit to avoid integer overflow issues.
  552     // (I've seen CMaps with mappings for <ffffffff>.)
  553     return;
  554   }
  555   if (code >= mapLen) {
  556     oldLen = mapLen;
  557     mapLen = mapLen ? 2 * mapLen : 256;
  558     if (code >= mapLen) {
  559       mapLen = (code + 256) & ~255;
  560     }
  561     map = (Unicode *)greallocn(map, mapLen, sizeof(Unicode));
  562     for (i = oldLen; i < mapLen; ++i) {
  563       map[i] = 0;
  564     }
  565   }
  566   map[code] = u;
  567 }
  568 
  569 CharCodeToUnicode::CharCodeToUnicode() {
  570   tag = NULL;
  571   map = NULL;
  572   mapLen = 0;
  573   sMap = NULL;
  574   sMapLen = sMapSize = 0;
  575   refCnt = 1;
  576 }
  577 
  578 CharCodeToUnicode::CharCodeToUnicode(GString *tagA) {
  579   CharCode i;
  580 
  581   tag = tagA;
  582   mapLen = 256;
  583   map = (Unicode *)gmallocn(mapLen, sizeof(Unicode));
  584   for (i = 0; i < mapLen; ++i) {
  585     map[i] = 0;
  586   }
  587   sMap = NULL;
  588   sMapLen = sMapSize = 0;
  589   refCnt = 1;
  590 }
  591 
  592 CharCodeToUnicode::CharCodeToUnicode(GString *tagA, Unicode *mapA,
  593                      CharCode mapLenA, GBool copyMap,
  594                      CharCodeToUnicodeString *sMapA,
  595                      int sMapLenA, int sMapSizeA) {
  596   tag = tagA;
  597   mapLen = mapLenA;
  598   if (copyMap) {
  599     map = (Unicode *)gmallocn(mapLen, sizeof(Unicode));
  600     memcpy(map, mapA, mapLen * sizeof(Unicode));
  601   } else {
  602     map = mapA;
  603   }
  604   sMap = sMapA;
  605   sMapLen = sMapLenA;
  606   sMapSize = sMapSizeA;
  607   refCnt = 1;
  608 }
  609 
  610 CharCodeToUnicode::~CharCodeToUnicode() {
  611   if (tag) {
  612     delete tag;
  613   }
  614   gfree(map);
  615   gfree(sMap);
  616 }
  617 
  618 void CharCodeToUnicode::incRefCnt() {
  619 #if MULTITHREADED
  620   gAtomicIncrement(&refCnt);
  621 #else
  622   ++refCnt;
  623 #endif
  624 }
  625 
  626 void CharCodeToUnicode::decRefCnt() {
  627   GBool done;
  628 
  629 #if MULTITHREADED
  630   done = gAtomicDecrement(&refCnt) == 0;
  631 #else
  632   done = --refCnt == 0;
  633 #endif
  634   if (done) {
  635     delete this;
  636   }
  637 }
  638 
  639 GBool CharCodeToUnicode::match(GString *tagA) {
  640   return tag && !tag->cmp(tagA);
  641 }
  642 
  643 void CharCodeToUnicode::setMapping(CharCode c, Unicode *u, int len) {
  644   int i, j;
  645 
  646   if (!map) {
  647     return;
  648   }
  649   if (len == 1) {
  650     map[c] = u[0];
  651   } else {
  652     for (i = 0; i < sMapLen; ++i) {
  653       if (sMap[i].c == c) {
  654     break;
  655       }
  656     }
  657     if (i == sMapLen) {
  658       if (sMapLen == sMapSize) {
  659     sMapSize += 8;
  660     sMap = (CharCodeToUnicodeString *)
  661              greallocn(sMap, sMapSize, sizeof(CharCodeToUnicodeString));
  662       }
  663       ++sMapLen;
  664     }
  665     map[c] = 0;
  666     sMap[i].c = c;
  667     sMap[i].len = len;
  668     for (j = 0; j < len && j < maxUnicodeString; ++j) {
  669       sMap[i].u[j] = u[j];
  670     }
  671   }
  672 }
  673 
  674 int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode *u, int size) {
  675   int i, j;
  676 
  677   if (!map) {
  678     u[0] = (Unicode)c;
  679     return 1;
  680   }
  681   if (c >= mapLen) {
  682     return 0;
  683   }
  684   if (map[c]) {
  685     u[0] = map[c];
  686     return 1;
  687   }
  688   for (i = 0; i < sMapLen; ++i) {
  689     if (sMap[i].c == c) {
  690       for (j = 0; j < sMap[i].len && j < size; ++j) {
  691     u[j] = sMap[i].u[j];
  692       }
  693       return j;
  694     }
  695   }
  696   return 0;
  697 }
  698 
  699 //------------------------------------------------------------------------
  700 
  701 CharCodeToUnicodeCache::CharCodeToUnicodeCache(int sizeA) {
  702   int i;
  703 
  704   size = sizeA;
  705   cache = (CharCodeToUnicode **)gmallocn(size, sizeof(CharCodeToUnicode *));
  706   for (i = 0; i < size; ++i) {
  707     cache[i] = NULL;
  708   }
  709 }
  710 
  711 CharCodeToUnicodeCache::~CharCodeToUnicodeCache() {
  712   int i;
  713 
  714   for (i = 0; i < size; ++i) {
  715     if (cache[i]) {
  716       cache[i]->decRefCnt();
  717     }
  718   }
  719   gfree(cache);
  720 }
  721 
  722 CharCodeToUnicode *CharCodeToUnicodeCache::getCharCodeToUnicode(GString *tag) {
  723   CharCodeToUnicode *ctu;
  724   int i, j;
  725 
  726   if (cache[0] && cache[0]->match(tag)) {
  727     cache[0]->incRefCnt();
  728     return cache[0];
  729   }
  730   for (i = 1; i < size; ++i) {
  731     if (cache[i] && cache[i]->match(tag)) {
  732       ctu = cache[i];
  733       for (j = i; j >= 1; --j) {
  734     cache[j] = cache[j - 1];
  735       }
  736       cache[0] = ctu;
  737       ctu->incRefCnt();
  738       return ctu;
  739     }
  740   }
  741   return NULL;
  742 }
  743 
  744 void CharCodeToUnicodeCache::add(CharCodeToUnicode *ctu) {
  745   int i;
  746 
  747   if (cache[size - 1]) {
  748     cache[size - 1]->decRefCnt();
  749   }
  750   for (i = size - 1; i >= 1; --i) {
  751     cache[i] = cache[i - 1];
  752   }
  753   cache[0] = ctu;
  754   ctu->incRefCnt();
  755 }