"Fossies" - the Fresh Open Source Software Archive

Member "poppler-0.82.0/poppler/UnicodeMap.cc" (25 Oct 2019, 8470 Bytes) of package /linux/misc/poppler-0.82.0.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "UnicodeMap.cc" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 0.81.0_vs_0.82.0.

    1 //========================================================================
    2 //
    3 // UnicodeMap.cc
    4 //
    5 // Copyright 2001-2003 Glyph & Cog, LLC
    6 //
    7 //========================================================================
    8 
    9 //========================================================================
   10 //
   11 // Modified under the Poppler project - http://poppler.freedesktop.org
   12 //
   13 // All changes made under the Poppler project to this file are licensed
   14 // under GPL version 2 or later
   15 //
   16 // Copyright (C) 2010 Jakub Wilk <jwilk@jwilk.net>
   17 // Copyright (C) 2017-2019 Albert Astals Cid <aacid@kde.org>
   18 // Copyright (C) 2017 Adrian Johnson <ajohnson@redneon.com>
   19 // Copyright (C) 2017 Jean Ghali <jghali@libertysurf.fr>
   20 // Copyright (C) 2018 Adam Reichold <adam.reichold@t-online.de>
   21 // Copyright (C) 2019 Oliver Sander <oliver.sander@tu-dresden.de>
   22 // Copyright (C) 2019 Volker Krause <vkrause@kde.org>
   23 //
   24 // To see a description of the changes please see the Changelog file that
   25 // came with your tarball or type make ChangeLog if you are building from git
   26 //
   27 //========================================================================
   28 
   29 #include <config.h>
   30 
   31 #include <stdio.h>
   32 #include <string.h>
   33 #include "goo/glibc.h"
   34 #include "goo/gmem.h"
   35 #include "goo/gfile.h"
   36 #include "goo/GooString.h"
   37 #include "Error.h"
   38 #include "GlobalParams.h"
   39 #include "UnicodeMap.h"
   40 
   41 //------------------------------------------------------------------------
   42 
   43 #define maxExtCode 16
   44 
   45 struct UnicodeMapExt {
   46   Unicode u;            // Unicode char
   47   char code[maxExtCode];
   48   unsigned int nBytes;
   49 };
   50 
   51 //------------------------------------------------------------------------
   52 
   53 UnicodeMap *UnicodeMap::parse(GooString *encodingNameA) {
   54   FILE *f;
   55   UnicodeMap *map;
   56   UnicodeMapRange *range;
   57   UnicodeMapExt *eMap;
   58   int size, eMapsSize;
   59   char buf[256];
   60   int line, nBytes, i;
   61   char *tok1, *tok2, *tok3;
   62   char *tokptr;
   63 
   64   if (!(f = globalParams->getUnicodeMapFile(encodingNameA))) {
   65     error(errSyntaxError, -1,
   66       "Couldn't find unicodeMap file for the '{0:t}' encoding",
   67       encodingNameA);
   68     return nullptr;
   69   }
   70 
   71   map = new UnicodeMap(encodingNameA->copy());
   72 
   73   size = 8;
   74   UnicodeMapRange *customRanges = (UnicodeMapRange *)gmallocn(size, sizeof(UnicodeMapRange));
   75   eMapsSize = 0;
   76 
   77   line = 1;
   78   while (getLine(buf, sizeof(buf), f)) {
   79     if ((tok1 = strtok_r(buf, " \t\r\n", &tokptr)) &&
   80     (tok2 = strtok_r(nullptr, " \t\r\n", &tokptr))) {
   81       if (!(tok3 = strtok_r(nullptr, " \t\r\n", &tokptr))) {
   82     tok3 = tok2;
   83     tok2 = tok1;
   84       }
   85       nBytes = strlen(tok3) / 2;
   86       if (nBytes <= 4) {
   87     if (map->len == size) {
   88       size *= 2;
   89       customRanges = (UnicodeMapRange *)
   90         greallocn(customRanges, size, sizeof(UnicodeMapRange));
   91     }
   92     range = &customRanges[map->len];
   93     sscanf(tok1, "%x", &range->start);
   94     sscanf(tok2, "%x", &range->end);
   95     sscanf(tok3, "%x", &range->code);
   96     range->nBytes = nBytes;
   97     ++map->len;
   98       } else if (tok2 == tok1) {
   99     if (map->eMapsLen == eMapsSize) {
  100       eMapsSize += 16;
  101       map->eMaps = (UnicodeMapExt *)
  102         greallocn(map->eMaps, eMapsSize, sizeof(UnicodeMapExt));
  103     }
  104     eMap = &map->eMaps[map->eMapsLen];
  105     sscanf(tok1, "%x", &eMap->u);
  106     for (i = 0; i < nBytes; ++i) {
  107       unsigned int x;
  108       sscanf(tok3 + i*2, "%2x", &x);
  109       eMap->code[i] = (char)x;
  110     }
  111     eMap->nBytes = nBytes;
  112     ++map->eMapsLen;
  113       } else {
  114     error(errSyntaxError, -1,
  115           "Bad line ({0:d}) in unicodeMap file for the '{1:t}' encoding",
  116           line, encodingNameA);
  117       }
  118     } else {
  119       error(errSyntaxError, -1,
  120         "Bad line ({0:d}) in unicodeMap file for the '{1:t}' encoding",
  121         line, encodingNameA);
  122     }
  123     ++line;
  124   }
  125 
  126   fclose(f);
  127 
  128   map->ranges = customRanges;
  129   return map;
  130 }
  131 
  132 UnicodeMap::UnicodeMap(GooString *encodingNameA) {
  133   encodingName = encodingNameA;
  134   unicodeOut = false;
  135   kind = unicodeMapUser;
  136   ranges = nullptr;
  137   len = 0;
  138   eMaps = nullptr;
  139   eMapsLen = 0;
  140   refCnt = 1;
  141 }
  142 
  143 UnicodeMap::UnicodeMap(const char *encodingNameA, bool unicodeOutA,
  144                const UnicodeMapRange *rangesA, int lenA) {
  145   encodingName = new GooString(encodingNameA);
  146   unicodeOut = unicodeOutA;
  147   kind = unicodeMapResident;
  148   ranges = rangesA;
  149   len = lenA;
  150   eMaps = nullptr;
  151   eMapsLen = 0;
  152   refCnt = 1;
  153 }
  154 
  155 UnicodeMap::UnicodeMap(const char *encodingNameA, bool unicodeOutA,
  156                UnicodeMapFunc funcA) {
  157   encodingName = new GooString(encodingNameA);
  158   unicodeOut = unicodeOutA;
  159   kind = unicodeMapFunc;
  160   func = funcA;
  161   eMaps = nullptr;
  162   eMapsLen = 0;
  163   refCnt = 1;
  164 }
  165 
  166 UnicodeMap::~UnicodeMap() {
  167   delete encodingName;
  168   if (kind == unicodeMapUser && ranges) {
  169     gfree(const_cast<UnicodeMapRange *>(ranges));
  170   }
  171   if (eMaps) {
  172     gfree(eMaps);
  173   }
  174 }
  175 
  176 UnicodeMap::UnicodeMap(UnicodeMap &&other) noexcept
  177   : encodingName{other.encodingName}
  178   , kind{other.kind}
  179   , unicodeOut{other.unicodeOut}
  180   , len{other.len}
  181   , eMaps{other.eMaps}
  182   , eMapsLen{other.eMapsLen}
  183   , refCnt{1}
  184 {
  185   switch (kind) {
  186   case unicodeMapUser:
  187   case unicodeMapResident:
  188     ranges = other.ranges;
  189     other.ranges = nullptr;
  190     break;
  191   case unicodeMapFunc:
  192     func = other.func;
  193     break;
  194   }
  195   other.encodingName = nullptr;
  196   other.eMaps = nullptr;
  197 }
  198 
  199 UnicodeMap& UnicodeMap::operator=(UnicodeMap &&other) noexcept
  200 {
  201   if (this != &other)
  202     swap(other);
  203   return *this;
  204 }
  205 
  206 void UnicodeMap::swap(UnicodeMap &other) noexcept
  207 {
  208   using std::swap;
  209   swap(encodingName, other.encodingName);
  210   swap(unicodeOut, other.unicodeOut);
  211   switch (kind) {
  212   case unicodeMapUser:
  213   case unicodeMapResident:
  214     switch (other.kind) {
  215     case unicodeMapUser:
  216     case unicodeMapResident:
  217       swap(ranges, other.ranges);
  218       break;
  219     case unicodeMapFunc:
  220     {
  221       const auto tmp = ranges;
  222       func = other.func;
  223       other.ranges = tmp;
  224       break;
  225     }
  226     }
  227     break;
  228   case unicodeMapFunc:
  229     switch (other.kind) {
  230     case unicodeMapUser:
  231     case unicodeMapResident:
  232     {
  233       const auto tmp = func;
  234       ranges = other.ranges;
  235       other.func = tmp;
  236       break;
  237     }
  238     case unicodeMapFunc:
  239       swap(func, other.func);
  240       break;
  241     }
  242     break;
  243   }
  244   swap(kind, other.kind);
  245   swap(len, other.len);
  246   swap(eMaps, other.eMaps);
  247   swap(eMapsLen, other.eMapsLen);
  248 }
  249 
  250 void UnicodeMap::incRefCnt() {
  251   refCnt.fetch_add(1);
  252 }
  253 
  254 void UnicodeMap::decRefCnt() {
  255   if (refCnt.fetch_sub(1) == 1) {
  256     delete this;
  257   }
  258 }
  259 
  260 bool UnicodeMap::match(const GooString *encodingNameA) const {
  261   return !encodingName->cmp(encodingNameA);
  262 }
  263 
  264 int UnicodeMap::mapUnicode(Unicode u, char *buf, int bufSize) {
  265   int a, b, m, n, i, j;
  266   unsigned int code;
  267 
  268   if (kind == unicodeMapFunc) {
  269     return (*func)(u, buf, bufSize);
  270   }
  271 
  272   a = 0;
  273   b = len;
  274   if (u >= ranges[a].start) {
  275     // invariant: ranges[a].start <= u < ranges[b].start
  276     while (b - a > 1) {
  277       m = (a + b) / 2;
  278       if (u >= ranges[m].start) {
  279     a = m;
  280       } else if (u < ranges[m].start) {
  281     b = m;
  282       }
  283     }
  284     if (u <= ranges[a].end) {
  285       n = ranges[a].nBytes;
  286       if (n > bufSize) {
  287     return 0;
  288       }
  289       code = ranges[a].code + (u - ranges[a].start);
  290       for (i = n - 1; i >= 0; --i) {
  291     buf[i] = (char)(code & 0xff);
  292     code >>= 8;
  293       }
  294       return n;
  295     }
  296   }
  297 
  298   for (i = 0; i < eMapsLen; ++i) {
  299     if (eMaps[i].u == u) {
  300       n = eMaps[i].nBytes;
  301       for (j = 0; j < n; ++j) {
  302     buf[j] = eMaps[i].code[j];
  303       }
  304       return n;
  305     }
  306   }
  307 
  308   return 0;
  309 }
  310 
  311 //------------------------------------------------------------------------
  312 
  313 UnicodeMapCache::UnicodeMapCache() {
  314   int i;
  315 
  316   for (i = 0; i < unicodeMapCacheSize; ++i) {
  317     cache[i] = nullptr;
  318   }
  319 }
  320 
  321 UnicodeMapCache::~UnicodeMapCache() {
  322   int i;
  323 
  324   for (i = 0; i < unicodeMapCacheSize; ++i) {
  325     if (cache[i]) {
  326       cache[i]->decRefCnt();
  327     }
  328   }
  329 }
  330 
  331 UnicodeMap *UnicodeMapCache::getUnicodeMap(GooString *encodingName) {
  332   UnicodeMap *map;
  333   int i, j;
  334 
  335   if (cache[0] && cache[0]->match(encodingName)) {
  336     cache[0]->incRefCnt();
  337     return cache[0];
  338   }
  339   for (i = 1; i < unicodeMapCacheSize; ++i) {
  340     if (cache[i] && cache[i]->match(encodingName)) {
  341       map = cache[i];
  342       for (j = i; j >= 1; --j) {
  343     cache[j] = cache[j - 1];
  344       }
  345       cache[0] = map;
  346       map->incRefCnt();
  347       return map;
  348     }
  349   }
  350   if ((map = UnicodeMap::parse(encodingName))) {
  351     if (cache[unicodeMapCacheSize - 1]) {
  352       cache[unicodeMapCacheSize - 1]->decRefCnt();
  353     }
  354     for (j = unicodeMapCacheSize - 1; j >= 1; --j) {
  355       cache[j] = cache[j - 1];
  356     }
  357     cache[0] = map;
  358     map->incRefCnt();
  359     return map;
  360   }
  361   return nullptr;
  362 }