"Fossies" - the Fresh Open Source Software Archive

Member "xpdf-4.04/xpdf/CharCodeToUnicode.cc" (18 Apr 2022, 19175 Bytes) of package /linux/misc/xpdf-4.04.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 //========================================================================
    2 //
    3 // CharCodeToUnicode.cc
    4 //
    5 // Copyright 2001-2003 Glyph & Cog, LLC
    6 //
    7 //========================================================================
    8 
    9 #include <aconf.h>
   10 
   11 #ifdef USE_GCC_PRAGMAS
   12 #pragma implementation
   13 #endif
   14 
   15 #include <stdio.h>
   16 #include <string.h>
   17 #include "gmem.h"
   18 #include "gmempp.h"
   19 #include "gfile.h"
   20 #include "GString.h"
   21 #include "Error.h"
   22 #include "GlobalParams.h"
   23 #include "PSTokenizer.h"
   24 #include "CharCodeToUnicode.h"
   25 
   26 //------------------------------------------------------------------------
   27 
   28 #define maxUnicodeString 8
   29 
   30 struct CharCodeToUnicodeString {
   31   CharCode c;
   32   Unicode u[maxUnicodeString];
   33   int len;
   34 };
   35 
   36 //------------------------------------------------------------------------
   37 
   38 struct GStringIndex {
   39   GString *s;
   40   int i;
   41 };
   42 
   43 static int getCharFromGString(void *data) {
   44   GStringIndex *idx = (GStringIndex *)data;
   45   if (idx->i >= idx->s->getLength()) {
   46     return EOF;
   47   }
   48   return idx->s->getChar(idx->i++) & 0xff;
   49 }
   50 
   51 static int getCharFromFile(void *data) {
   52   return fgetc((FILE *)data);
   53 }
   54 
   55 //------------------------------------------------------------------------
   56 
   57 static int hexCharVals[256] = {
   58   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 0x
   59   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 1x
   60   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 2x
   61    0,  1,  2,  3,  4,  5,  6,  7,  8,  9, -1, -1, -1, -1, -1, -1, // 3x
   62   -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 4x
   63   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 5x
   64   -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 6x
   65   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 7x
   66   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 8x
   67   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 9x
   68   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ax
   69   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Bx
   70   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Cx
   71   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Dx
   72   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // Ex
   73   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1  // Fx
   74 };
   75 
   76 // Parse a <len>-byte hex string <s> into *<val>.  Returns false on
   77 // error.
   78 static GBool parseHex(char *s, int len, Guint *val) {
   79   int i, x;
   80 
   81   *val = 0;
   82   for (i = 0; i < len; ++i) {
   83     x = hexCharVals[s[i] & 0xff];
   84     if (x < 0) {
   85       return gFalse;
   86     }
   87     *val = (*val << 4) + x;
   88   }
   89   return gTrue;
   90 }
   91 
   92 //------------------------------------------------------------------------
   93 
   94 CharCodeToUnicode *CharCodeToUnicode::makeIdentityMapping() {
   95   return new CharCodeToUnicode();
   96 }
   97 
   98 CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *fileName,
   99                             GString *collection) {
  100   FILE *f;
  101   Unicode *mapA;
  102   CharCode size, mapLenA;
  103   char buf[64];
  104   Unicode u;
  105   CharCodeToUnicode *ctu;
  106 
  107   if (!(f = openFile(fileName->getCString(), "r"))) {
  108     error(errSyntaxError, -1, "Couldn't open cidToUnicode file '{0:t}'",
  109       fileName);
  110     return NULL;
  111   }
  112 
  113   size = 32768;
  114   mapA = (Unicode *)gmallocn(size, sizeof(Unicode));
  115   mapLenA = 0;
  116 
  117   while (getLine(buf, sizeof(buf), f)) {
  118     if (mapLenA == size) {
  119       size *= 2;
  120       mapA = (Unicode *)greallocn(mapA, size, sizeof(Unicode));
  121     }
  122     if (sscanf(buf, "%x", &u) == 1) {
  123       mapA[mapLenA] = u;
  124     } else {
  125       error(errSyntaxWarning, -1,
  126         "Bad line ({0:d}) in cidToUnicode file '{1:t}'",
  127         (int)(mapLenA + 1), fileName);
  128       mapA[mapLenA] = 0;
  129     }
  130     ++mapLenA;
  131   }
  132   fclose(f);
  133 
  134   ctu = new CharCodeToUnicode(collection->copy(), mapA, mapLenA, gTrue,
  135                   NULL, 0, 0);
  136   gfree(mapA);
  137   return ctu;
  138 }
  139 
  140 CharCodeToUnicode *CharCodeToUnicode::parseUnicodeToUnicode(
  141                             GString *fileName) {
  142   FILE *f;
  143   Unicode *mapA;
  144   CharCodeToUnicodeString *sMapA;
  145   CharCode size, oldSize, len, sMapSizeA, sMapLenA;
  146   char buf[256];
  147   char *tok;
  148   Unicode u0;
  149   Unicode uBuf[maxUnicodeString];
  150   CharCodeToUnicode *ctu;
  151   int line, n, i;
  152 
  153   if (!(f = openFile(fileName->getCString(), "r"))) {
  154     error(errSyntaxError, -1, "Couldn't open unicodeToUnicode file '{0:t}'",
  155       fileName);
  156     return NULL;
  157   }
  158 
  159   size = 4096;
  160   mapA = (Unicode *)gmallocn(size, sizeof(Unicode));
  161   memset(mapA, 0, size * sizeof(Unicode));
  162   len = 0;
  163   sMapA = NULL;
  164   sMapSizeA = sMapLenA = 0;
  165 
  166   line = 0;
  167   while (getLine(buf, sizeof(buf), f)) {
  168     ++line;
  169     if (!(tok = strtok(buf, " \t\r\n")) ||
  170     !parseHex(tok, (int)strlen(tok), &u0)) {
  171       error(errSyntaxWarning, -1,
  172         "Bad line ({0:d}) in unicodeToUnicode file '{1:t}'",
  173         line, fileName);
  174       continue;
  175     }
  176     n = 0;
  177     while (n < maxUnicodeString) {
  178       if (!(tok = strtok(NULL, " \t\r\n"))) {
  179     break;
  180       }
  181       if (!parseHex(tok, (int)strlen(tok), &uBuf[n])) {
  182     error(errSyntaxWarning, -1,
  183           "Bad line ({0:d}) in unicodeToUnicode file '{1:t}'",
  184           line, fileName);
  185     break;
  186       }
  187       ++n;
  188     }
  189     if (n < 1) {
  190       error(errSyntaxWarning, -1,
  191         "Bad line ({0:d}) in unicodeToUnicode file '{1:t}'",
  192         line, fileName);
  193       continue;
  194     }
  195     if (u0 >= size) {
  196       oldSize = size;
  197       while (u0 >= size) {
  198     size *= 2;
  199       }
  200       mapA = (Unicode *)greallocn(mapA, size, sizeof(Unicode));
  201       memset(mapA + oldSize, 0, (size - oldSize) * sizeof(Unicode));
  202     }
  203     if (n == 1) {
  204       mapA[u0] = uBuf[0];
  205     } else {
  206       mapA[u0] = 0;
  207       if (sMapLenA == sMapSizeA) {
  208     sMapSizeA += 16;
  209     sMapA = (CharCodeToUnicodeString *)
  210               greallocn(sMapA, sMapSizeA, sizeof(CharCodeToUnicodeString));
  211       }
  212       sMapA[sMapLenA].c = u0;
  213       for (i = 0; i < n; ++i) {
  214     sMapA[sMapLenA].u[i] = uBuf[i];
  215       }
  216       sMapA[sMapLenA].len = n;
  217       ++sMapLenA;
  218     }
  219     if (u0 >= len) {
  220       len = u0 + 1;
  221     }
  222   }
  223   fclose(f);
  224 
  225   ctu = new CharCodeToUnicode(fileName->copy(), mapA, len, gTrue,
  226                   sMapA, sMapLenA, sMapSizeA);
  227   gfree(mapA);
  228   return ctu;
  229 }
  230 
  231 CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) {
  232   return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0, 0);
  233 }
  234 
  235 CharCodeToUnicode *CharCodeToUnicode::make16BitToUnicode(Unicode *toUnicode) {
  236   return new CharCodeToUnicode(NULL, toUnicode, 65536, gTrue, NULL, 0, 0);
  237 }
  238 
  239 CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) {
  240   CharCodeToUnicode *ctu;
  241   GStringIndex idx;
  242 
  243   ctu = new CharCodeToUnicode(NULL);
  244   idx.s = buf;
  245   idx.i = 0;
  246   if (!ctu->parseCMap1(&getCharFromGString, &idx, nBits)) {
  247     delete ctu;
  248     return NULL;
  249   }
  250   return ctu;
  251 }
  252 
  253 void CharCodeToUnicode::mergeCMap(GString *buf, int nBits) {
  254   GStringIndex idx;
  255 
  256   idx.s = buf;
  257   idx.i = 0;
  258   parseCMap1(&getCharFromGString, &idx, nBits);
  259 }
  260 
  261 GBool CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data,
  262                     int nBits) {
  263   PSTokenizer *pst;
  264   char tok1[256], tok2[256], tok3[256];
  265   int n1, n2, n3;
  266   CharCode i;
  267   CharCode maxCode, code1, code2;
  268   GString *name;
  269   FILE *f;
  270   GBool ok;
  271 
  272   ok = gFalse;
  273   maxCode = (nBits == 8) ? 0xff : (nBits == 16) ? 0xffff : 0xffffffff;
  274   pst = new PSTokenizer(getCharFunc, data);
  275   pst->getToken(tok1, sizeof(tok1), &n1);
  276   while (pst->getToken(tok2, sizeof(tok2), &n2)) {
  277     if (!strcmp(tok2, "usecmap")) {
  278       if (tok1[0] == '/') {
  279     name = new GString(tok1 + 1);
  280     if ((f = globalParams->findToUnicodeFile(name))) {
  281       if (parseCMap1(&getCharFromFile, f, nBits)) {
  282         ok = gTrue;
  283       }
  284       fclose(f);
  285     } else {
  286       error(errSyntaxError, -1,
  287         "Couldn't find ToUnicode CMap file for '{1:t}'",
  288         name);
  289     }
  290     delete name;
  291       }
  292       pst->getToken(tok1, sizeof(tok1), &n1);
  293     } else if (!strcmp(tok2, "beginbfchar")) {
  294       while (pst->getToken(tok1, sizeof(tok1), &n1)) {
  295     if (!strcmp(tok1, "endbfchar")) {
  296       break;
  297     }
  298     if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
  299         !strcmp(tok2, "endbfchar")) {
  300       error(errSyntaxWarning, -1,
  301         "Illegal entry in bfchar block in ToUnicode CMap");
  302       break;
  303     }
  304     if (!(tok1[0] == '<' && tok1[n1 - 1] == '>' &&
  305           tok2[0] == '<' && tok2[n2 - 1] == '>')) {
  306       error(errSyntaxWarning, -1,
  307         "Illegal entry in bfchar block in ToUnicode CMap");
  308       continue;
  309     }
  310     tok1[n1 - 1] = tok2[n2 - 1] = '\0';
  311     if (!parseHex(tok1 + 1, n1 - 2, &code1)) {
  312       error(errSyntaxWarning, -1,
  313         "Illegal entry in bfchar block in ToUnicode CMap");
  314       continue;
  315     }
  316     if (code1 > maxCode) {
  317       error(errSyntaxWarning, -1,
  318         "Invalid entry in bfchar block in ToUnicode CMap");
  319     }
  320     addMapping(code1, tok2 + 1, n2 - 2, 0);
  321     ok = gTrue;
  322       }
  323       pst->getToken(tok1, sizeof(tok1), &n1);
  324     } else if (!strcmp(tok2, "beginbfrange")) {
  325       while (pst->getToken(tok1, sizeof(tok1), &n1)) {
  326     if (!strcmp(tok1, "endbfrange")) {
  327       break;
  328     }
  329     if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
  330         !strcmp(tok2, "endbfrange") ||
  331         !pst->getToken(tok3, sizeof(tok3), &n3) ||
  332         !strcmp(tok3, "endbfrange")) {
  333       error(errSyntaxWarning, -1,
  334         "Illegal entry in bfrange block in ToUnicode CMap");
  335       break;
  336     }
  337     if (!(tok1[0] == '<' && tok1[n1 - 1] == '>' &&
  338           tok2[0] == '<' && tok2[n2 - 1] == '>')) {
  339       error(errSyntaxWarning,
  340         -1, "Illegal entry in bfrange block in ToUnicode CMap");
  341       continue;
  342     }
  343     tok1[n1 - 1] = tok2[n2 - 1] = '\0';
  344     if (!parseHex(tok1 + 1, n1 - 2, &code1) ||
  345         !parseHex(tok2 + 1, n2 - 2, &code2)) {
  346       error(errSyntaxWarning, -1,
  347         "Illegal entry in bfrange block in ToUnicode CMap");
  348       continue;
  349     }
  350     if (code1 > maxCode || code2 > maxCode) {
  351       error(errSyntaxWarning, -1,
  352         "Invalid entry in bfrange block in ToUnicode CMap");
  353       if (code2 > maxCode) {
  354         code2 = maxCode;
  355       }
  356     }
  357     if (!strcmp(tok3, "[")) {
  358       i = 0;
  359       while (pst->getToken(tok1, sizeof(tok1), &n1)) {
  360         if (!strcmp(tok1, "]")) {
  361           break;
  362         }
  363         if (tok1[0] == '<' && tok1[n1 - 1] == '>') {
  364           if (code1 + i <= code2) {
  365         tok1[n1 - 1] = '\0';
  366         addMapping(code1 + i, tok1 + 1, n1 - 2, 0);
  367         ok = gTrue;
  368           }
  369         } else {
  370           error(errSyntaxWarning, -1,
  371             "Illegal entry in bfrange block in ToUnicode CMap");
  372         }
  373         ++i;
  374       }
  375     } else if (tok3[0] == '<' && tok3[n3 - 1] == '>') {
  376       tok3[n3 - 1] = '\0';
  377       for (i = 0; code1 <= code2; ++code1, ++i) {
  378         addMapping(code1, tok3 + 1, n3 - 2, i);
  379         ok = gTrue;
  380       }
  381     } else {
  382       error(errSyntaxWarning, -1,
  383         "Illegal entry in bfrange block in ToUnicode CMap");
  384     }
  385       }
  386       pst->getToken(tok1, sizeof(tok1), &n1);
  387     } else if (!strcmp(tok2, "begincidchar")) {
  388       // the begincidchar operator is not allowed in ToUnicode CMaps,
  389       // but some buggy PDF generators incorrectly use
  390       // code-to-CID-type CMaps here
  391       error(errSyntaxWarning, -1,
  392         "Invalid 'begincidchar' operator in ToUnicode CMap");
  393       while (pst->getToken(tok1, sizeof(tok1), &n1)) {
  394     if (!strcmp(tok1, "endcidchar")) {
  395       break;
  396     }
  397     if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
  398         !strcmp(tok2, "endcidchar")) {
  399       error(errSyntaxWarning, -1,
  400         "Illegal entry in cidchar block in ToUnicode CMap");
  401       break;
  402     }
  403     if (!(tok1[0] == '<' && tok1[n1 - 1] == '>')) {
  404       error(errSyntaxWarning, -1,
  405         "Illegal entry in cidchar block in ToUnicode CMap");
  406       continue;
  407     }
  408     tok1[n1 - 1] = '\0';
  409     if (!parseHex(tok1 + 1, n1 - 2, &code1)) {
  410       error(errSyntaxWarning, -1,
  411         "Illegal entry in cidchar block in ToUnicode CMap");
  412       continue;
  413     }
  414     if (code1 > maxCode) {
  415       error(errSyntaxWarning, -1,
  416         "Invalid entry in cidchar block in ToUnicode CMap");
  417     }
  418     addMappingInt(code1, atoi(tok2));
  419     ok = gTrue;
  420       }
  421       pst->getToken(tok1, sizeof(tok1), &n1);
  422     } else if (!strcmp(tok2, "begincidrange")) {
  423       // the begincidrange operator is not allowed in ToUnicode CMaps,
  424       // but some buggy PDF generators incorrectly use
  425       // code-to-CID-type CMaps here
  426       error(errSyntaxWarning, -1,
  427         "Invalid 'begincidrange' operator in ToUnicode CMap");
  428       while (pst->getToken(tok1, sizeof(tok1), &n1)) {
  429     if (!strcmp(tok1, "endcidrange")) {
  430       break;
  431     }
  432     if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
  433         !strcmp(tok2, "endcidrange") ||
  434         !pst->getToken(tok3, sizeof(tok3), &n3) ||
  435         !strcmp(tok3, "endcidrange")) {
  436       error(errSyntaxWarning, -1,
  437         "Illegal entry in cidrange block in ToUnicode CMap");
  438       break;
  439     }
  440     if (!(tok1[0] == '<' && tok1[n1 - 1] == '>' &&
  441           tok2[0] == '<' && tok2[n2 - 1] == '>')) {
  442       error(errSyntaxWarning,
  443         -1, "Illegal entry in cidrange block in ToUnicode CMap");
  444       continue;
  445     }
  446     tok1[n1 - 1] = tok2[n2 - 1] = '\0';
  447     if (!parseHex(tok1 + 1, n1 - 2, &code1) ||
  448         !parseHex(tok2 + 1, n2 - 2, &code2)) {
  449       error(errSyntaxWarning, -1,
  450         "Illegal entry in cidrange block in ToUnicode CMap");
  451       continue;
  452     }
  453     if (code1 > maxCode || code2 > maxCode) {
  454       error(errSyntaxWarning, -1,
  455         "Invalid entry in cidrange block in ToUnicode CMap");
  456       if (code2 > maxCode) {
  457         code2 = maxCode;
  458       }
  459     }
  460     for (i = atoi(tok3); code1 <= code2; ++code1, ++i) {
  461       addMappingInt(code1, i);
  462       ok = gTrue;
  463     }
  464       }
  465       pst->getToken(tok1, sizeof(tok1), &n1);
  466     } else {
  467       strcpy(tok1, tok2);
  468     }
  469   }
  470   delete pst;
  471   return ok;
  472 }
  473 
  474 void CharCodeToUnicode::addMapping(CharCode code, char *uStr, int n,
  475                    int offset) {
  476   CharCode oldLen, i;
  477   Unicode u[maxUnicodeString];
  478   int uLen, j;
  479 
  480   if (code > 0xffffff) {
  481     // This is an arbitrary limit to avoid integer overflow issues.
  482     // (I've seen CMaps with mappings for <ffffffff>.)
  483     return;
  484   }
  485   if ((uLen = parseUTF16String(uStr, n, u)) == 0) {
  486     return;
  487   }
  488   if (code >= mapLen) {
  489     oldLen = mapLen;
  490     mapLen = mapLen ? 2 * mapLen : 256;
  491     if (code >= mapLen) {
  492       mapLen = (code + 256) & ~255;
  493     }
  494     map = (Unicode *)greallocn(map, mapLen, sizeof(Unicode));
  495     for (i = oldLen; i < mapLen; ++i) {
  496       map[i] = 0;
  497     }
  498   }
  499   if (uLen == 1) {
  500     map[code] = u[0] + offset;
  501   } else {
  502     if (sMapLen >= sMapSize) {
  503       sMapSize = sMapSize + 16;
  504       sMap = (CharCodeToUnicodeString *)
  505            greallocn(sMap, sMapSize, sizeof(CharCodeToUnicodeString));
  506     }
  507     map[code] = 0;
  508     sMap[sMapLen].c = code;
  509     for (j = 0; j < uLen; ++j) {
  510       sMap[sMapLen].u[j] = u[j];
  511     }
  512     sMap[sMapLen].u[uLen - 1] += offset;
  513     sMap[sMapLen].len = uLen;
  514     ++sMapLen;
  515   }
  516 }
  517 
  518 // Convert a UTF-16BE hex string into a sequence of up to
  519 // maxUnicodeString Unicode chars.
  520 int CharCodeToUnicode::parseUTF16String(char *uStr, int n, Unicode *uOut) {
  521   int i = 0;
  522   int uLen = 0;
  523   while (i < n) {
  524     Unicode u;
  525     int j = n;
  526     if (j - i > 4) {
  527       j = i + 4;
  528     }
  529     if (!parseHex(uStr + i, j - i, &u)) {
  530       error(errSyntaxWarning, -1, "Illegal entry in ToUnicode CMap");
  531       return 0;
  532     }
  533     // look for a UTF-16 pair
  534     if (uLen > 0 && uOut[uLen-1] >= 0xd800 && uOut[uLen-1] <= 0xdbff &&
  535     u >= 0xdc00 && u <= 0xdfff) {
  536       uOut[uLen-1] = 0x10000 + ((uOut[uLen-1] & 0x03ff) << 10) + (u & 0x03ff);
  537     } else {
  538       if (uLen < maxUnicodeString) {
  539     uOut[uLen++] = u;
  540       }
  541     }
  542     i = j;
  543   }
  544   return uLen;
  545 }
  546 
  547 void CharCodeToUnicode::addMappingInt(CharCode code, Unicode u) {
  548   CharCode oldLen, i;
  549 
  550   if (code > 0xffffff) {
  551     // This is an arbitrary limit to avoid integer overflow issues.
  552     // (I've seen CMaps with mappings for <ffffffff>.)
  553     return;
  554   }
  555   if (code >= mapLen) {
  556     oldLen = mapLen;
  557     mapLen = mapLen ? 2 * mapLen : 256;
  558     if (code >= mapLen) {
  559       mapLen = (code + 256) & ~255;
  560     }
  561     map = (Unicode *)greallocn(map, mapLen, sizeof(Unicode));
  562     for (i = oldLen; i < mapLen; ++i) {
  563       map[i] = 0;
  564     }
  565   }
  566   map[code] = u;
  567 }
  568 
  569 CharCodeToUnicode::CharCodeToUnicode() {
  570   tag = NULL;
  571   map = NULL;
  572   mapLen = 0;
  573   sMap = NULL;
  574   sMapLen = sMapSize = 0;
  575   refCnt = 1;
  576 }
  577 
  578 CharCodeToUnicode::CharCodeToUnicode(GString *tagA) {
  579   CharCode i;
  580 
  581   tag = tagA;
  582   mapLen = 256;
  583   map = (Unicode *)gmallocn(mapLen, sizeof(Unicode));
  584   for (i = 0; i < mapLen; ++i) {
  585     map[i] = 0;
  586   }
  587   sMap = NULL;
  588   sMapLen = sMapSize = 0;
  589   refCnt = 1;
  590 }
  591 
  592 CharCodeToUnicode::CharCodeToUnicode(GString *tagA, Unicode *mapA,
  593                      CharCode mapLenA, GBool copyMap,
  594                      CharCodeToUnicodeString *sMapA,
  595                      int sMapLenA, int sMapSizeA) {
  596   tag = tagA;
  597   mapLen = mapLenA;
  598   if (copyMap) {
  599     map = (Unicode *)gmallocn(mapLen, sizeof(Unicode));
  600     memcpy(map, mapA, mapLen * sizeof(Unicode));
  601   } else {
  602     map = mapA;
  603   }
  604   sMap = sMapA;
  605   sMapLen = sMapLenA;
  606   sMapSize = sMapSizeA;
  607   refCnt = 1;
  608 }
  609 
  610 CharCodeToUnicode::~CharCodeToUnicode() {
  611   if (tag) {
  612     delete tag;
  613   }
  614   gfree(map);
  615   gfree(sMap);
  616 }
  617 
  618 void CharCodeToUnicode::incRefCnt() {
  619 #if MULTITHREADED
  620   gAtomicIncrement(&refCnt);
  621 #else
  622   ++refCnt;
  623 #endif
  624 }
  625 
  626 void CharCodeToUnicode::decRefCnt() {
  627   GBool done;
  628 
  629 #if MULTITHREADED
  630   done = gAtomicDecrement(&refCnt) == 0;
  631 #else
  632   done = --refCnt == 0;
  633 #endif
  634   if (done) {
  635     delete this;
  636   }
  637 }
  638 
  639 GBool CharCodeToUnicode::match(GString *tagA) {
  640   return tag && !tag->cmp(tagA);
  641 }
  642 
  643 void CharCodeToUnicode::setMapping(CharCode c, Unicode *u, int len) {
  644   int i, j;
  645 
  646   if (!map) {
  647     return;
  648   }
  649   if (len == 1) {
  650     map[c] = u[0];
  651   } else {
  652     for (i = 0; i < sMapLen; ++i) {
  653       if (sMap[i].c == c) {
  654     break;
  655       }
  656     }
  657     if (i == sMapLen) {
  658       if (sMapLen == sMapSize) {
  659     sMapSize += 8;
  660     sMap = (CharCodeToUnicodeString *)
  661              greallocn(sMap, sMapSize, sizeof(CharCodeToUnicodeString));
  662       }
  663       ++sMapLen;
  664     }
  665     map[c] = 0;
  666     sMap[i].c = c;
  667     sMap[i].len = len;
  668     for (j = 0; j < len && j < maxUnicodeString; ++j) {
  669       sMap[i].u[j] = u[j];
  670     }
  671   }
  672 }
  673 
  674 int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode *u, int size) {
  675   int i, j;
  676 
  677   if (!map) {
  678     u[0] = (Unicode)c;
  679     return 1;
  680   }
  681   if (c >= mapLen) {
  682     return 0;
  683   }
  684   if (map[c]) {
  685     u[0] = map[c];
  686     return 1;
  687   }
  688   for (i = 0; i < sMapLen; ++i) {
  689     if (sMap[i].c == c) {
  690       for (j = 0; j < sMap[i].len && j < size; ++j) {
  691     u[j] = sMap[i].u[j];
  692       }
  693       return j;
  694     }
  695   }
  696   return 0;
  697 }
  698 
  699 //------------------------------------------------------------------------
  700 
  701 CharCodeToUnicodeCache::CharCodeToUnicodeCache(int sizeA) {
  702   int i;
  703 
  704   size = sizeA;
  705   cache = (CharCodeToUnicode **)gmallocn(size, sizeof(CharCodeToUnicode *));
  706   for (i = 0; i < size; ++i) {
  707     cache[i] = NULL;
  708   }
  709 }
  710 
  711 CharCodeToUnicodeCache::~CharCodeToUnicodeCache() {
  712   int i;
  713 
  714   for (i = 0; i < size; ++i) {
  715     if (cache[i]) {
  716       cache[i]->decRefCnt();
  717     }
  718   }
  719   gfree(cache);
  720 }
  721 
  722 CharCodeToUnicode *CharCodeToUnicodeCache::getCharCodeToUnicode(GString *tag) {
  723   CharCodeToUnicode *ctu;
  724   int i, j;
  725 
  726   if (cache[0] && cache[0]->match(tag)) {
  727     cache[0]->incRefCnt();
  728     return cache[0];
  729   }
  730   for (i = 1; i < size; ++i) {
  731     if (cache[i] && cache[i]->match(tag)) {
  732       ctu = cache[i];
  733       for (j = i; j >= 1; --j) {
  734     cache[j] = cache[j - 1];
  735       }
  736       cache[0] = ctu;
  737       ctu->incRefCnt();
  738       return ctu;
  739     }
  740   }
  741   return NULL;
  742 }
  743 
  744 void CharCodeToUnicodeCache::add(CharCodeToUnicode *ctu) {
  745   int i;
  746 
  747   if (cache[size - 1]) {
  748     cache[size - 1]->decRefCnt();
  749   }
  750   for (i = size - 1; i >= 1; --i) {
  751     cache[i] = cache[i - 1];
  752   }
  753   cache[0] = ctu;
  754   ctu->incRefCnt();
  755 }