w32tex
About: TeX Live provides a comprehensive TeX system including all the major TeX-related programs, macro packages, and fonts that are free software. Windows sources.
  Fossies Dox: w32tex-src.tar.xz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

Engine.cpp
Go to the documentation of this file.
1 /*------------------------------------------------------------------------
2 Copyright (C) 2002-2016 SIL International. All rights reserved.
3 
4 Distributable under the terms of either the Common Public License or the
5 GNU Lesser General Public License, as specified in the LICENSING.txt file.
6 
7 File: Engine.cp
8 Responsibility: Jonathan Kew
9 Last reviewed: Not yet.
10 
11 Description:
12  Implements the TECkit conversion engine.
13 -------------------------------------------------------------------------*/
14 
15 /*
16  2008-01-23 jk revised endian-ness stuff to allow Universal build
17  2006-06-02 jk added support for extended string rules (>255 per initial char)
18  2006-06-02 jk fixed bug handling passes with no mapping rules
19  2006-01-12 jk remove multi-char constants, use kTableType_XXX from TECkit_Format.h
20  2005-07-19 jk revised to use WORDS_BIGENDIAN conditional, config.h
21  2005-05-06 jk patched match() to forget matches within groups if we backtrack out
22  2004-03-19 jk rewrote match() to fix group/repeat bugs and be more efficient
23  2004-03-12 jk finished updating for version 2.1 with ...Opt APIs
24 */
25 
26 //#define TRACING 1
27 
28 #ifdef HAVE_CONFIG_H
29 # include "config.h" /* a Unix-ish setup where we have config.h available */
30 #endif
31 
32 #if (defined(__dest_os) && (__dest_os == __win32_os)) || defined(WIN32) /* Windows target: little-endian */
33 # undef WORDS_BIGENDIAN
34 #endif
35 
36 #ifdef __APPLE__
37 #include <TargetConditionals.h>
38 #endif
39 
40 #if defined(TARGET_RT_BIG_ENDIAN) /* the CodeWarrior prefix files or Apple TargetConditionals.h sets this */
41 # if TARGET_RT_BIG_ENDIAN
42 # undef WORDS_BIGENDIAN
43 # define WORDS_BIGENDIAN 1
44 # else
45 # undef WORDS_BIGENDIAN
46 # endif
47 #endif
48 
49 #if (defined(__dest_os) && (__dest_os == __win32_os)) || defined(WIN32)
50 # define WIN32_LEAN_AND_MEAN
51 # define NOSERVICE
52 # define NOMCX
53 # include <windows.h>
54 #endif
55 
56 #include "Engine.h"
57 
58 #ifdef TRACING
59 #include <iostream>
60 
61 int traceLevel = 1;
62 #endif
63 
64 #include <cstdlib>
65 #include <cstring>
66 #include <algorithm>
67 
68 #include "zlib.h"
69 
70 using namespace std;
71 
72 /* we apply READ to values read from the compiled table, to provide byte-swapping where needed */
73 inline UInt8
74 READ(const UInt8 p)
75 {
76  return p;
77 }
78 
79 inline UInt16
80 READ(const UInt16 p)
81 {
82 #ifdef WORDS_BIGENDIAN
83  return p;
84 #else
85  return (p >> 8) + (p << 8);
86 #endif
87 }
88 
89 inline UInt32
90 READ(const UInt32 p)
91 {
92 #ifdef WORDS_BIGENDIAN
93  return p;
94 #else
95  return (p >> 24) + ((p >> 8) & 0x0000ff00) + ((p << 8) & 0x00ff0000) + (p << 24);
96 #endif
97 }
98 
100  : oBuffer(0)
101  , oBufSize(0)
102  , oBufEnd(0)
103  , oBufPtr(0)
104  , prevStage(0)
105 {
106 }
107 
109 {
110  if (prevStage && prevStage->prevStage)
111  delete prevStage;
112 }
113 
114 UInt32
116 {
117  return 0;
118 }
119 
120 #include "NormalizationData.c"
121 
123  : prevCombClass(0)
124  , oBufSafe(0)
125  , bCompose(compose)
126 {
127  oBufSize = 256;
128  oBuffer = new UInt32[oBufSize];
129 }
130 
132 {
133  delete[] oBuffer;
134 }
135 
136 /* constants for algorithmic Hangul decomposition */
137 #define SBase 0xAC00
138 #define LBase 0x1100
139 #define VBase 0x1161
140 #define TBase 0x11A7
141 #define LCount 19
142 #define VCount 21
143 #define TCount 28
144 #define NCount (VCount * TCount)
145 #define SCount (LCount * NCount)
146 
147 UInt32
149 {
150  UInt32 inChar = prevStage->getChar();
151  if (inChar == kNeedMoreInput || inChar == kInvalidChar || inChar == kUnmappedChar)
152  return inChar;
153  if (inChar == kEndOfText) {
155  return inChar;
156  }
157 
158  UInt32 SIndex = inChar - SBase;
159  if (SIndex >= SCount)
160  decompose(inChar);
161  else {
162  generateChar(LBase + SIndex / NCount);
163  generateChar(VBase + (SIndex % NCount) / TCount);
164  UInt32 T = SIndex % TCount;
165  if (T != 0)
166  generateChar(TBase + T);
167  }
168 
169  return 0;
170 }
171 
172 void
174 {
175  oBufPtr = oBufEnd = 0;
176  prevCombClass = 0;
177  oBufSafe = 0;
178 }
179 
180 void
182 {
184  if (prefix != 0xffff)
185  decompose(prefix);
186  if (c != 0xffff)
187  generateChar(c);
188 }
189 
190 UInt32
192 {
193  UInt32 plane = c >> 16;
194  UInt32 page = (c >> 8) & 0xff;
195  UInt32 ch = c & 0xff;
196 
197  UInt16 charIndex = dcCharIndex[dcPageMaps[dcPlaneMap[plane]][page]][ch];
198  if (charIndex == 0)
199  return 0xffff;
200  c = dcDecomposition[charIndex][1];
201  return dcDecomposition[charIndex][0];
202 }
203 
204 void
206 {
207  int combClass = 0;
208  if (c != kEndOfText) {
209  UInt32 plane = c >> 16;
210  UInt32 page = (c >> 8) & 0xff;
211  UInt32 ch = c & 0xff;
212  combClass = ccCharClass[ccPageMaps[ccPlaneMap[plane]][page]][ch];
213  }
214 
215  if (combClass != 0) {
216  // combiners are always buffered for sorting and possible composition
217  if (prevCombClass <= combClass) {
218  appendChar(c);
219  prevCombClass = combClass;
220  }
221  else
222  insertChar(c, combClass);
223  }
224  else {
225  if (bCompose) {
226  if (oBufEnd > 0) {
227  // check whether last buffered char and current char should form Hangul syllable
228  UInt32 last = oBuffer[oBufEnd - 1];
229 
230  // 1. check to see if two current characters are L and V
231  UInt32 LIndex = last - LBase;
232  if (LIndex < LCount) {
233  UInt32 VIndex = c - VBase;
234  if (VIndex < VCount) {
235  // make syllable of form LV
236  last = SBase + (LIndex * VCount + VIndex) * TCount;
237  oBuffer[oBufEnd - 1] = last; // reset last
238  return; // don't append c, and don't update oBufSafe as a following V would compose
239  }
240  }
241 
242  // 2. check to see if two current characters are LV and T
243  UInt32 SIndex = last - SBase;
244  if (SIndex < SCount && (SIndex % TCount) == 0) {
245  UInt32 TIndex = c - TBase;
246  if (TIndex <= TCount) {
247  // make syllable of form LVT
248  last += TIndex;
249  oBuffer[oBufEnd - 1] = last; // reset last
250  oBufSafe = oBufEnd; // no more composition will be possible now
251  return; // don't append c
252  }
253  }
254  }
255 
256  // search for canonical compositions in the buffered text, and update oBufSafe if possible
257  compose();
258  }
259  else
260  oBufSafe = oBufEnd;
261  appendChar(c);
262  if (c == kEndOfText)
263  oBufSafe = oBufEnd;
264  prevCombClass = 0;
265  }
266 }
267 
268 void
270 {
271  /* unlikely that we'd ever need to do this--it would take a long string of non-spacing marks! */
272  if (oBufEnd == oBufSize)
273  growOutBuf();
274 
275  oBuffer[oBufEnd++] = c;
276 }
277 
278 void
279 Normalizer::insertChar(UInt32 insCh, int insCombClass)
280 {
281  if (oBufEnd == oBufSize)
282  growOutBuf();
283 
284  UInt32 i;
285  for (i = oBufEnd - 1; i > 0; --i) {
286  UInt32 c = oBuffer[i];
287  UInt32 plane = c >> 16;
288  UInt32 page = (c >> 8) & 0xff;
289  UInt32 ch = c & 0xff;
290  int combClass = ccCharClass[ccPageMaps[ccPlaneMap[plane]][page]][ch];
291  if (insCombClass >= combClass)
292  break;
293  }
294  ++i;
295 
296  for (UInt32 j = oBufEnd; j > i; --j)
297  oBuffer[j] = oBuffer[j - 1];
298 
299  oBuffer[i] = insCh;
300  oBufEnd++;
301 }
302 
303 void
305 {
306  UInt32 newSize = oBufSize + 256;
307  UInt32* newBuf = new UInt32[newSize];
308  for (long i = 0; i < oBufSize; ++i)
309  newBuf[i] = oBuffer[i];
310  delete[] oBuffer;
311  oBuffer = newBuf;
312  oBufSize = newSize;
313 }
314 
315 void
317 {
318  // search for compositions in oBuffer up to oBufEnd
319  UInt32 starterPos = 0;
320 
321  UInt32 c = oBuffer[0];
322  UInt32 plane = c >> 16;
323  UInt32 page = (c >> 8) & 0xff;
324  UInt32 ch = c & 0xff;
325  int lastClass = ccCharClass[ccPageMaps[ccPlaneMap[plane]][page]][ch];
326  if (lastClass != 0)
327  lastClass = 256;
328 
329  if (oBufEnd > 1) {
330  UInt32 compPos = 1;
331  UInt16 li = cLCharIndex[cLPageMaps[cLPlaneMap[plane]][page]][ch];
332 
333  for (long decompPos = 1; decompPos < oBufEnd; ++decompPos) {
334  c = oBuffer[decompPos];
335  plane = c >> 16;
336  page = (c >> 8) & 0xff;
337  ch = c & 0xff;
338  int chClass = ccCharClass[ccPageMaps[ccPlaneMap[plane]][page]][ch];
339  UInt16 ri = cRCharIndex[cRPageMaps[cRPlaneMap[plane]][page]][ch];
340  UInt32 cmp = cComposites[li][ri];
341  if (cmp != 0 && (lastClass < chClass || lastClass == 0)) {
342  oBuffer[starterPos] = cmp;
343  plane = cmp >> 16;
344  page = (cmp >> 8) & 0xff;
345  ch = cmp & 0xff;
346  li = cLCharIndex[cLPageMaps[cLPlaneMap[plane]][page]][ch];
347  }
348  else {
349  if (chClass == 0) {
350  starterPos = compPos;
351  plane = c >> 16;
352  page = (c >> 8) & 0xff;
353  ch = c & 0xff;
354  li = cLCharIndex[cLPageMaps[cLPlaneMap[plane]][page]][ch];
355  }
356  lastClass = chClass;
357  oBuffer[compPos++] = c;
358  }
359  }
360  oBufEnd = compPos;
361  }
362 
363  // update oBufSafe to pass any chars that definitely can't compose
364  if (lastClass != 0)
365  oBufSafe = oBufEnd;
366  else
367  oBufSafe = starterPos;
368 }
369 
370 UInt32
372 {
373  UInt32 c;
374  while (oBufSafe == 0) {
375  c = process();
376  if (c == kNeedMoreInput || c == kInvalidChar || c == kUnmappedChar)
377  return c;
378  }
379  c = oBuffer[oBufPtr++];
380  if (oBufPtr == oBufSafe) {
381  for (long i = oBufPtr; i < oBufEnd; ++i)
382  oBuffer[i - oBufPtr] = oBuffer[i];
383  oBufEnd -= oBufPtr;
384  oBufSafe = oBufPtr = 0;
385  }
386  return c;
387 }
388 
389 Pass::Pass(const TableHeader* inTable, Converter* cnv)
390  : converter(cnv)
391  , tableHeader(inTable)
392  , iBuffer(0)
393  , iBufSize(0)
394  , iBufStart(0)
395  , iBufEnd(0)
396  , iBufPtr(0)
397 {
398  bInputIsUnicode = ((READ(tableHeader->type) & 0xFF000000) >> 24) == 'U';
399  bOutputIsUnicode = (READ(tableHeader->type) & 0x000000FF) == 'U';
400  bSupplementaryChars = (READ(tableHeader->flags) & kTableFlags_Supplementary) != 0;
401 
402  numPageMaps = 1;
403  pageBase = reinterpret_cast<const Byte*>(tableHeader) + READ(tableHeader->pageBase);
404  lookupBase = reinterpret_cast<const Lookup*>(reinterpret_cast<const Byte*>(tableHeader) + READ(tableHeader->lookupBase));
405  matchClassBase = reinterpret_cast<const Byte*>(tableHeader) + READ(tableHeader->matchClassBase);
406  repClassBase = reinterpret_cast<const Byte*>(tableHeader) + READ(tableHeader->repClassBase);
407  stringListBase = reinterpret_cast<const Byte*>(tableHeader) + READ(tableHeader->stringListBase);
408  stringRuleData = reinterpret_cast<const Byte*>(tableHeader) + READ(tableHeader->stringRuleData);
409 
410  if (bInputIsUnicode && bSupplementaryChars) {
411  // support supplementary plane chars
412  planeMap = pageBase;
413  pageBase += 20;
414  numPageMaps = READ(*(planeMap + 17));
415  }
416 
417  iBufSize = (READ(inTable->maxMatch) + READ(inTable->maxPre) + READ(inTable->maxPost) + 7) & ~0x0003;
418  iBuffer = new UInt32[iBufSize];
419 
420  oBufSize = (READ(inTable->maxOutput) + 7) & ~0x0003;
421  oBuffer = new UInt32[oBufSize];
422 }
423 
425 {
426  delete[] oBuffer;
427  delete[] iBuffer;
428 }
429 
430 void
432 {
433  iBufStart = iBufEnd = iBufPtr = 0;
434  oBufPtr = oBufEnd = 0;
435 }
436 
437 UInt32
439  // called by next Pass when it wants the next character from us
440 {
441  while (oBufPtr == oBufEnd) {
442  oBufPtr = oBufEnd = 0;
443  UInt32 c = DoMapping();
444  if (c == kNeedMoreInput || c == kInvalidChar || c == kUnmappedChar)
445  return c;
446  }
447  return oBuffer[oBufPtr++];
448 }
449 
450 void
452  // Called by DoMapping to generate a character in the output stream
453 {
454  if (oBufEnd < oBufSize)
455  oBuffer[oBufEnd++] = c;
456  // Cannot overflow provided the table correctly declares maxOutput
457  // (so the compiler had better get it right!)
458 }
459 
460 UInt32
462  // return how many characters of lookahead this pass has in its input buffer
463 {
464  return iBufEnd < iBufPtr
465  ? // iBufEnd has wrapped but iBufPtr hasn't
466  iBufEnd + (iBufSize - iBufPtr)
467  : // pointers are in the "normal" order
468  iBufEnd - iBufPtr;
469 }
470 
471 UInt32
472 Pass::inputChar(long inIndex)
473  // Called by DoMapping or match to read the character at a given location
474  // relative to the current input stream location
475 {
476  long target = iBufPtr + inIndex;
477  if (inIndex < 0) {
478  // look back
479  if (target < 0)
480  target += iBufSize;
481  if (iBufPtr < iBufStart) {
482  // iBufPtr has wrapped back to beginning of buffer, leaving iBufStart beyond it
483  // so the valid pre-context is from iBufStart to iBufSize-1 and 0 to iBufPtr-1
484  if (target >= iBufStart || target < iBufPtr)
485  return iBuffer[target];
486  }
487  else {
488  // iBufPtr points beyond iBufStart
489  // so the valid pre-context is from iBufStart to iBufPtr-1
490  if (target >= iBufStart && target < iBufPtr)
491  return iBuffer[target];
492  }
493  return kEndOfText;
494  }
495  else {
496  // look ahead
497  if (target >= iBufSize)
498  target -= iBufSize;
499  if (iBufPtr == iBufEnd) {
500  // ensure that current character is actually available
501  UInt32 ch = prevStage->getChar();
502  if (ch == kNeedMoreInput || ch == kInvalidChar || ch == kUnmappedChar)
503  return ch; // don't put this into iBuffer!
504  iBuffer[iBufEnd++] = ch;
505  if (iBufEnd == iBufSize)
506  iBufEnd = 0;
507  if (iBufEnd == iBufStart) {
508  ++iBufStart;
509  if (iBufStart == iBufSize)
510  iBufStart = 0;
511  }
512  }
513  long index = iBufPtr;
514  while (index != target) {
515  // scan forward as far as necessary, reading in required chars
516  if (index == iBufSize - 1)
517  index = 0;
518  else
519  ++index;
520  if (index == iBufEnd) {
521  UInt32 ch = prevStage->getChar();
522  if (ch == kNeedMoreInput || ch == kInvalidChar || ch == kUnmappedChar)
523  return ch;
524  iBuffer[iBufEnd++] = ch;
525  if (iBufEnd == iBufSize)
526  iBufEnd = 0;
527  if (iBufEnd == iBufStart) {
528  ++iBufStart;
529  if (iBufStart == iBufSize)
530  iBufStart = 0;
531  }
532  }
533  }
534  return iBuffer[index];
535  }
536  return kEndOfText;
537 }
538 
539 void
540 Pass::advanceInput(unsigned int numChars)
541  // Called by DoMapping to move forward in the input stream
542  // Will only move forward over chars already examined by a rule;
543  // therefore, getChar() can't return kEndOfText, kNeedMoreInput, etc.
544 {
545  for (unsigned int i = 0; i < numChars; ++i) {
546  if (iBufPtr == iBufEnd) {
548  if (iBufEnd == iBufStart) {
549  ++iBufStart;
550  if (iBufStart == iBufSize)
551  iBufStart = 0;
552  }
553  if (iBufEnd == iBufSize)
554  iBufEnd = 0;
555  }
556  iBufPtr++;
557  if (iBufPtr == iBufSize)
558  iBufPtr = 0;
559  }
560 }
561 
562 template<class T>
563 static const T*
565 {
566  while (count > 0) {
567  const T* i = array;
568  UInt32 count2 = count / 2;
569  i += count2;
570  if (READ(*i) < value) {
571  array = i + 1;
572  count -= count2 + 1;
573  }
574  else
575  count = count2;
576  }
577  return array;
578 }
579 
580 long
581 Pass::classMatch(UInt32 classNumber, UInt32 inChar) const
582 {
583  const UInt32* classPtr = reinterpret_cast<const UInt32*>(matchClassBase + READ(*(reinterpret_cast<const UInt32*>(matchClassBase) + classNumber)));
584  UInt32 memberCount = READ(*classPtr++);
585  if (bInputIsUnicode) {
586  if (bSupplementaryChars) {
587  // classes are 32-bit
588  const UInt32* p = binary_search(classPtr, memberCount, inChar);
589  if (READ(*p) == inChar)
590  return p - classPtr;
591  }
592  else {
593  // classes are 16-bit
594  const UInt16* p = binary_search(reinterpret_cast<const UInt16*>(classPtr), memberCount, inChar);
595  if (READ(*p) == inChar)
596  return p - reinterpret_cast<const UInt16*>(classPtr);
597  }
598  }
599  else {
600  // classes are 8-bit
601  const UInt8* p = binary_search(reinterpret_cast<const UInt8*>(classPtr), memberCount, inChar);
602  if (READ(*p) == inChar)
603  return p - reinterpret_cast<const UInt8*>(classPtr);
604  }
605  return -1;
606 }
607 
608 UInt32
610 {
611  const UInt32* classPtr = reinterpret_cast<const UInt32*>(repClassBase + READ(*(reinterpret_cast<const UInt32*>(repClassBase) + classNumber)));
612  UInt32 memberCount = READ(*classPtr++);
613  if (index < memberCount)
614  if (bOutputIsUnicode)
616  return READ(classPtr[index]);
617  else
618  return READ(reinterpret_cast<const UInt16*>(classPtr)[index]);
619  else {
620  return READ(reinterpret_cast<const UInt8*>(classPtr)[index]);
621  }
622  else
623  return 0; // this can't happen if the compiler is right!
624 }
625 
626 #ifdef TRACING
627 static int _depth = 0;
628 #endif
629 
630 #define RETURN(x) do { _rval = (x); goto _return_label; } while (0)
631 
632 #define matchYes 1
633 #define matchNo 0
634 UInt32
635 Pass::match(int index, int repeats, int textLoc)
636 {
637 /*
638  attempt to match pattern starting at /index/
639  initial repeat count is /repeats/
640  text offset is /textLoc/
641 
642  recurses whenever we might need to backtrack
643 
644  returns
645  matchYes - succeeded
646  matchNo - can't match at this position
647  other values, eg:
648  kNeedMoreInput
649  kInvalidChar
650  kUnmappedChar
651  - aborted without a definite decision
652 */
653 
654 #ifdef TRACING
655 cerr << "match(" << index << ", " << repeats << ", " << textLoc << ")\n";
656 #endif
657 
658  UInt32 _rval = matchNo;
659 
660  // we come back here to loop rather than recurse, with new values for the arguments
661 RESTART:
662 
663  // if this is the first attempt to match at this index, record where we are
664  if (repeats == 0) {
665  if (index == matchElems)
666  matchedLength = textLoc;
667  if (index < infoLimit) {
668  info[index].matchedSpan.start = textLoc;
669 #ifdef TRACING
670 cerr << "info[" << index << "].matchedSpan.start = " << textLoc << "\n";
671 #endif
672  }
673  }
674 
675  // if we're at the end of the pattern, we have a match
676  if (index >= patternLength)
677  RETURN(matchYes);
678 
679  if (index == 0 && repeats == 0)
680  sgrStack = 0; // ensure this is cleared at start of pattern (shouldn't be necessary?)
681 
682  { // gcc complains about jumping past initializers (from RETURN above) without this
683  UInt32 mr;
684  const MatchElem& m = pattern[index];
685  int repeatMin = READ(m.flags.repeat) >> 4;
686  int repeatMax = READ(m.flags.repeat) & 0x0f;
687  UInt8 type = READ(m.flags.type);
688  bool negate = ((type & kMatchElem_Negate) != 0);
689 
690  type = ((type & kMatchElem_NonLit) != 0)
692  : 0;
693 
694  int classIndex;
695  bool matches;
696  UInt32 inChar;
697 
698  // start of group: try each alternative in turn
699  if (type == kMatchElem_Type_BGroup) {
700  // try matching one of the alternatives in the group (again)
701  info[index].groupRepeats = repeats;
702  if (repeats < repeatMax) {
703  int altIndex = index;
704  while (true) {
705  mr = match(altIndex + 1, 0, textLoc);
706  if (mr != matchNo)
707  RETURN(mr);
708  // failed, so step ahead to next alternative or end of group
709  altIndex += READ(pattern[altIndex].value.bgroup.dNext);
710  if ((READ(pattern[altIndex].flags.type) & kMatchElem_TypeMask) != kMatchElem_Type_OR)
711  break;
712  }
713  }
714  // if the group has matched enough times...
715  if (repeats >= repeatMin) {
716  // try to match following stuff
717 #ifdef TRACING
718 cerr << "repeats >= repeatMin\n";
719 #endif
720  mr = match(index + READ(m.value.bgroup.dAfter), 0, textLoc);
721  if (mr == matchYes) {
722  if (index < infoLimit) {
723  info[index].matchedSpan.limit = textLoc;
724 #ifdef TRACING
725 cerr << "group returning matchYes; info[" << index << "].matchedSpan.limit = " << textLoc << "\n";
726 #endif
727  // don't allow elements within the group to indicate matches beyond the span of the group itself
728  for (int i = index + READ(m.value.bgroup.dAfter) - 1; i > index; --i)
729  if (i < infoLimit) {
730  if (info[i].matchedSpan.start > textLoc)
731  info[i].matchedSpan.start = textLoc;
732  if (info[i].matchedSpan.limit > textLoc)
733  info[i].matchedSpan.limit = textLoc;
734  }
735  }
736  }
737  RETURN(mr);
738  }
739  // otherwise just backtrack
740  RETURN(matchNo);
741  }
742 
743  // reached end of an alternative
745  int startIndex = index - READ(m.value.egroup.dStart);
746  mr = match(startIndex, info[startIndex].groupRepeats + 1, textLoc);
747  RETURN(mr);
748  }
749 
750  // not a group, so we loop rather than recurse until optionality strikes
751  else {
752  // ensure that item matches at least repeatMin times
753  while (repeats < repeatMin) {
754  inChar = inputChar(textLoc);
755  if (inChar == kInvalidChar || inChar == kNeedMoreInput || inChar == kUnmappedChar)
756  RETURN(inChar);
757  matches = false;
758  switch (type) {
759  case 0: // literal
760  matches = (READ(m.value.usv.data) & kUSVMask) == inChar;
761  break;
762 
764  classIndex = classMatch(READ(m.value.cls.index), inChar);
765  matches = (classIndex != -1);
766  if (matches && repeats == 0 && index < infoLimit)
767  info[index].classIndex = classIndex;
768  break;
769 
770  case kMatchElem_Type_ANY:
771  matches = (inChar != kEndOfText);
772  break;
773 
774  case kMatchElem_Type_EOS:
775  matches = (inChar == kEndOfText);
776  break;
777  }
778  matches = (matches != negate);
779  if (!matches)
780  RETURN(matchNo);
781  ++repeats;
782  textLoc += direction;
783  }
784 
785  if (index < infoLimit) {
786  info[index].matchedSpan.limit = textLoc;
787 #ifdef TRACING
788 cerr << "info[" << index << "].matchedSpan.limit = " << textLoc << "\n";
789 #endif
790  }
791 
792  if (repeatMin == repeatMax) {
793  // no need to recurse, as no optionality
794  ++index;
795  repeats = 0;
796  goto RESTART;
797  }
798 
799  // try for another repeat if allowed
800  if (repeats < repeatMax) {
801  inChar = inputChar(textLoc);
802  if (inChar == kInvalidChar || inChar == kNeedMoreInput || inChar == kUnmappedChar)
803  RETURN(inChar);
804  matches = false;
805  switch (type) {
806  case 0: // literal
807  matches = (READ(m.value.usv.data) & kUSVMask) == inChar;
808  break;
809 
811  classIndex = classMatch(READ(m.value.cls.index), inChar);
812  matches = (classIndex != -1);
813  if (matches && repeats == 0 && index < infoLimit)
814  info[index].classIndex = classIndex;
815  break;
816 
817  case kMatchElem_Type_ANY:
818  matches = (inChar != kEndOfText);
819  break;
820 
821  case kMatchElem_Type_EOS:
822  matches = (inChar == kEndOfText);
823  break;
824  }
825  matches = (matches != negate);
826  if (matches) {
827  mr = match(index, repeats + 1, textLoc + direction);
828  if (mr != matchNo)
829  RETURN(mr);
830  }
831  }
832 
833  // otherwise try to match the remainder of the pattern
834  mr = match(index + 1, 0, textLoc);
835  RETURN(mr);
836  }
837  }
838 
839 _return_label:
840 
841  if (_rval == matchNo)
842  if (index < infoLimit) {
843  info[index].matchedSpan.limit = textLoc;
844 #ifdef TRACING
845 cerr << "rval == matchNo; setting info[" << index << "].matchedSpan.limit = " << textLoc << "\n";
846 #endif
847  }
848 
849 #ifdef TRACING
850 cerr << "RETURN(" << (_rval == matchYes ? "matchYes" : "matchNo") << ")\n";
851 #endif
852  return _rval;
853 }
854 
855 #undef RETURN
856 
857 #ifdef TRACING
858 static void
859 printMatchElem(const MatchElem& m)
860 {
861  string rval;
862  char buf[20];
863  if (m.flags.type & kMatchElem_Negate)
864  rval += "!";
865  if (m.flags.type & kMatchElem_NonLit) {
866  switch (m.flags.type & kMatchElem_TypeMask) {
868  sprintf(buf, "[%d]", m.value.cls.index);
869  rval += buf;
870  break;
872  rval += "(";
873  break;
875  rval += ")";
876  break;
877  case kMatchElem_Type_OR:
878  rval += "|";
879  break;
880  case kMatchElem_Type_ANY:
881  rval += ".";
882  break;
883  case kMatchElem_Type_EOS:
884  rval += "#";
885  break;
887  rval += "@";
888  break;
889  }
890  }
891  else {
892  UInt32 v = m.value.usv.data & kUSVMask;
893  if (v >= ' ' && v < 0x7e) {
894  sprintf(buf, "'%c'", (char)v);
895  rval += buf;
896  }
897  else {
898  sprintf(buf, "0x%04X", (UInt32)v);
899  rval += buf;
900  }
901  }
902  if (!(m.flags.type & kMatchElem_NonLit) || (m.flags.type & kMatchElem_TypeMask) != kMatchElem_Type_BGroup)
903  switch (m.flags.repeat) {
904  case 0x01:
905  rval += "?";
906  break;
907  case 0x11:
908  break;
909  case 0x0F:
910  rval += "*";
911  break;
912  case 0x1F:
913  rval += "+";
914  break;
915  default:
916  sprintf(buf, "{%d,%d}", m.flags.repeat >> 4, m.flags.repeat & 0x0F);
917  rval += buf;
918  break;
919  }
920  cerr << rval;
921 }
922 
923 static void
924 printMatch(const StringRule* rule)
925 {
926  for (int i = 0; i < READ(rule->matchLength); ++i) {
927  cerr << " ";
928  printMatchElem(((MatchElem*)(rule + 1))[i]);
929 // cerr << "<" << i << ">";
930  }
931  if (READ(rule->preLength) > 0 || READ(rule->postLength) > 0) {
932  cerr << " /";
933  for (int i = READ(rule->preLength) - 1; i >= 0; --i) {
934  cerr << " ";
935  printMatchElem(((MatchElem*)(rule + 1))[READ(rule->matchLength) + READ(rule->postLength) + i]);
936  }
937  cerr << " _";
938  for (int i = 0; i < READ(rule->postLength); ++i) {
939  cerr << " ";
940  printMatchElem(((MatchElem*)(rule + 1))[READ(rule->matchLength) + i]);
941  }
942  }
943 }
944 
945 static void
946 printRep(const StringRule* rule)
947 {
948  const RepElem* r = (const RepElem*)((const MatchElem*)(rule + 1) + rule->matchLength + rule->preLength + rule->postLength);
949  for (int i = 0; i < READ(rule->repLength); ++i, ++r) {
950  cerr << " ";
951  switch (READ(r->flags.type)) {
952  case kRepElem_Literal:
953  {
954  UInt32 v;
955  char buf[20];
956  v = READ(r->value);
957  if (v >= ' ' && v <= 0x7e) {
958  sprintf(buf, "'%c'", v);
959  cerr << buf;
960  }
961  else {
962  sprintf(buf, "0x%04X", v);
963  cerr << buf;
964  }
965  }
966  break;
967 
968  case kRepElem_Class:
969  cerr << "[" << (int)READ(r->flags.repClass) << "," << (int)READ(r->flags.matchIndex) << "]";
970  break;
971 
972  case kRepElem_Copy:
973  cerr << "@" << (int)READ(r->flags.matchIndex);
974  break;
975 
976  case kRepElem_Unmapped:
977  cerr << "?";
978  break;
979  }
980  }
981 }
982 #endif
983 
984 UInt32
986 {
987  UInt32 inChar = inputChar(0);
988  if (inChar == kNeedMoreInput || inChar == kInvalidChar || inChar == kUnmappedChar)
989  return inChar;
990  if (inChar == kEndOfText) {
992  return inChar;
993  }
994  matchedLength = 1;
995 
996  const Lookup* lookup;
997  if (bInputIsUnicode) {
998  // Unicode lookup
999  UInt16 charIndex = 0;
1000  if (reinterpret_cast<const UInt8*>(lookupBase) == pageBase) {
1001  // leave charIndex == 0 : pass with no rules
1002  }
1003  else {
1004  UInt8 plane = inChar >> 16;
1005  const UInt8* pageMap = 0;
1006  if (bSupplementaryChars) {
1007  if ((plane < 17) && (READ(planeMap[plane]) != 0xff)) {
1008  pageMap = reinterpret_cast<const UInt8*>(pageBase + 256 * READ(planeMap[plane]));
1009  goto GOT_PAGE_MAP;
1010  }
1011  }
1012  else if (plane == 0) {
1013  pageMap = pageBase;
1014  GOT_PAGE_MAP:
1015  UInt8 page = (inChar >> 8) & 0xff;
1016  if (READ(pageMap[page]) != 0xff) {
1017  const UInt16* charMapBase = reinterpret_cast<const UInt16*>(pageBase + 256 * numPageMaps);
1018  const UInt16* charMap = charMapBase + 256 * READ(pageMap[page]);
1019  charIndex = READ(charMap[inChar & 0xff]);
1020  }
1021  }
1022  }
1023  lookup = lookupBase + charIndex;
1024  }
1025  else {
1026  // byte-oriented lookup
1027  if (pageBase != reinterpret_cast<const Byte*>(tableHeader)) {
1028  // dbcsPage present
1029  long pageNumber = READ(pageBase[inChar]);
1030  if (pageNumber == 0)
1031  // not a valid DBCS lead byte
1032  lookup = lookupBase + inChar;
1033  else {
1034  UInt32 nextChar = inputChar(1);
1035  if (nextChar == kNeedMoreInput || nextChar == kInvalidChar || nextChar == kUnmappedChar)
1036  return nextChar;
1037  if (nextChar == kEndOfText)
1038  lookup = lookupBase + inChar;
1039  else {
1040  lookup = lookupBase + pageNumber * 256 + nextChar;
1041  if (READ(lookup->rules.type) == kLookupType_IllegalDBCS)
1042  // illegal DBCS sequence; map lead byte alone
1043  lookup = lookupBase + inChar;
1044  else
1045  matchedLength = 2;
1046  }
1047  }
1048  }
1049  else
1050  // single-byte only
1051  lookup = lookupBase + inChar;
1052  }
1053 
1054  UInt8 ruleType = READ(lookup->rules.type);
1056  // process string rule list
1057  const UInt32* ruleList = reinterpret_cast<const UInt32*>(stringListBase) + READ(lookup->rules.ruleIndex);
1058  bool matched = false;
1059  bool allowInsertion = true;
1060  int ruleCount = READ(lookup->rules.ruleCount);
1062  ruleCount += 256 * (ruleType & kLookupType_ExtRuleCountMask);
1063  for ( ; ruleCount > 0; --ruleCount) {
1064  const StringRule* rule = reinterpret_cast<const StringRule*>(stringRuleData + READ(*ruleList));
1065 #ifdef TRACING
1066 if (traceLevel > 0) {
1067  cerr << "** trying match: ";
1068  printMatch(rule);
1069  cerr << "\n";
1070 }
1071 #endif
1072  ruleList++;
1073 
1074  matchElems = READ(rule->matchLength);
1075  if (matchElems == 0 && allowInsertion == false)
1076  continue;
1077  patternLength = matchElems + READ(rule->postLength);
1078  pattern = reinterpret_cast<const MatchElem*>(rule + 1); // point past the defined struct for the rule header
1079  direction = 1;
1081 
1082  // clear junk...
1083  for (int i = 0; i < infoLimit; ++i)
1084  info[i].matchedSpan.start = info[i].matchedSpan.limit = 0;
1085 
1086  UInt32 mr = match(0, 0, 0);
1087  if (mr == matchYes) {
1088  if (matchedLength == 0 && allowInsertion == false)
1089  continue;
1091  patternLength = READ(rule->preLength);
1092  if (patternLength > 0) {
1093  direction = -1;
1094  infoLimit = 0;
1095  matchElems = -1;
1096  mr = match(0, 0, -1);
1097  }
1098  if (mr == matchYes) {
1099  // RULE MATCHED! execute it
1100 #ifdef TRACING
1101 if (traceLevel > 0) {
1102  cerr << "** MATCHED:";
1103  printMatch(rule);
1104  cerr << "\n";
1105 
1106  cerr << "** RANGES:";
1107  for (int i = 0; i < READ(rule->matchLength); ++i) {
1108  cerr << " <" << info[i].matchedSpan.start << ":" << info[i].matchedSpan.limit << ">";
1109  }
1110  cerr << "\n";
1111 
1112  cerr << "** REPLACEMENT:";
1113  printRep(rule);
1114  cerr << "\n";
1115 
1116  cerr << "** GENERATES:";
1117 }
1118 #endif
1119  const RepElem* r = reinterpret_cast<const RepElem*>(pattern + patternLength);
1120  for (int i = 0; i < READ(rule->repLength); ++i, ++r) {
1121 #ifdef TRACING
1122 if (traceLevel > 0)
1123  cerr << " <";
1124 #endif
1125  switch (READ(r->flags.type)) {
1126  case kRepElem_Literal:
1127  outputChar(READ(r->value));
1128 #ifdef TRACING
1129 if (traceLevel > 0)
1130  cerr << (int)READ(r->value);
1131 #endif
1132  break;
1133 
1134  case kRepElem_Class:
1135  {
1136  const MatchInfo& myInfo = info[READ(r->flags.matchIndex)];
1137  if (myInfo.matchedSpan.start < myInfo.matchedSpan.limit) {
1138  outputChar(repClassMember(READ(r->flags.repClass), myInfo.classIndex));
1139 #ifdef TRACING
1140 if (traceLevel > 0)
1141  cerr << (int)repClassMember(READ(r->flags.repClass), myInfo.classIndex);
1142 #endif
1143  }
1144  }
1145  break;
1146 
1147  case kRepElem_Copy:
1148  {
1149  const MatchInfo* myInfo = &info[READ(r->flags.matchIndex)];
1150  for (int i = myInfo->matchedSpan.start; i < myInfo->matchedSpan.limit; ++i) {
1152 #ifdef TRACING
1153 if (traceLevel > 0)
1154  cerr << (i > myInfo->matchedSpan.start ? "," : "") << (int)inputChar(i);
1155 #endif
1156  }
1157  }
1158  break;
1159 
1160  case kRepElem_Unmapped:
1162  outputChar(inChar);
1163 #ifdef TRACING
1164 if (traceLevel > 0)
1165  cerr << (int)inChar;
1166 #endif
1167  }
1168  else {
1169  switch (converter->unmappedBehavior) {
1171  return kUnmappedChar;
1172 
1175  // fall through
1176 
1177  default: // case kOptionsUnmapped_UseReplacementCharSilently:
1179  break;
1180  }
1181 #ifdef TRACING
1182 if (traceLevel > 0)
1183  cerr << (int)READ(tableHeader->replacementChar);
1184 #endif
1185  }
1186  break;
1187  }
1188 #ifdef TRACING
1189 if (traceLevel > 0)
1190  cerr << ">";
1191 #endif
1192  }
1193 #ifdef TRACING
1194 if (traceLevel > 0)
1195  cerr << endl;
1196 #endif
1197  if (matchedLength > 0) {
1198  // we've matched the current input character, so break the loop
1199  matched = true;
1200  break;
1201  }
1202  else {
1203  // must have been an insertion (or null!) rule, so skip any further insertion rules
1204  allowInsertion = false;
1205  }
1206  }
1207  else if (mr != matchNo) {
1208  return mr;
1209  }
1210  }
1211  else if (mr != matchNo) {
1212  return mr;
1213  }
1214  }
1215  if (!matched) {
1216  // no rule matched the current input char, so we simulate a default "Unmapped" lookup
1218  // B->B or U->U simply copies the input to the output
1219  outputChar(inChar);
1220  else {
1221  // B->U or U->B uses the replacement char or fails, depending on options
1222  switch (converter->unmappedBehavior) {
1224  return kUnmappedChar;
1225 
1228  // fall through
1229 
1230  default: // case kOptionsUnmapped_UseReplacementCharSilently:
1232  break;
1233  }
1234  }
1235  matchedLength = 1;
1236  }
1237  }
1238  else if (ruleType == kLookupType_Unmapped) {
1240  outputChar(inChar);
1241  else {
1242  switch (converter->unmappedBehavior) {
1244  return kUnmappedChar;
1245 
1248  // fall through
1249 
1250  default: // case kOptionsUnmapped_UseReplacementCharSilently:
1252  break;
1253  }
1254  }
1255  }
1256  else {
1257  // direct character output
1258  if (bOutputIsUnicode) {
1259  UInt32 usv = READ(lookup->usv);
1260  if (usv <= 0x0010ffff)
1261  outputChar(usv);
1262  }
1263  else {
1264  for (int i = 0; i < READ(lookup->bytes.count); ++i)
1265  outputChar(READ(lookup->bytes.data[i]));
1266  }
1267  }
1268 
1270  return 0;
1271 }
1272 
1273 Converter::Converter(const Byte* inTable, UInt32 inTableSize, bool inForward,
1274  UInt16 inForm, UInt16 outForm)
1275  : table(0)
1276  , finalStage(0)
1277  , forward(inForward)
1278  , inputForm(inForm & kForm_EncodingFormMask)
1279  , outputForm(outForm & kForm_EncodingFormMask)
1280  , savedCount(0)
1281  , pendingOutputChar(kInvalidChar)
1283  , warningStatus(0)
1284 {
1285  finalStage = this;
1286  UInt16 normForm = 0;
1287  if (inTable != 0) {
1288  const FileHeader* fh = reinterpret_cast<const FileHeader*>(inTable);
1289  if (READ(fh->type) == kMagicNumberCmp) {
1290  // the table is compressed; allocate a new buffer and decompress
1291  unsigned long uncompressedLen = READ(fh->version);
1292  table = static_cast<Byte*>(malloc(uncompressedLen));
1293  if (table == 0) {
1295  return;
1296  }
1297  int result = uncompress(table, &uncompressedLen, inTable + 2 * sizeof(UInt32), inTableSize - 2 * sizeof(UInt32));
1298  if (result != Z_OK) {
1300  return;
1301  }
1302  fh = reinterpret_cast<const FileHeader*>(table);
1303  }
1304 
1305  if (READ(fh->type) != kMagicNumber) {
1307  return;
1308  }
1309  if ((READ(fh->version) & 0xFFFF0000) > (kCurrentFileVersion & 0xFFFF0000)) {
1311  return;
1312  }
1313 
1314  if (table == 0) {
1315  table = static_cast<Byte*>(malloc(inTableSize));
1316  if (table == 0) {
1318  return;
1319  }
1320  memcpy(table, inTable, inTableSize);
1321  }
1322 
1323  fh = reinterpret_cast<const FileHeader*>(table);
1324  const UInt32* nameOffsets = reinterpret_cast<const UInt32*>(table + sizeof(FileHeader));
1325  const UInt32* tableBase = nameOffsets + READ(fh->numNames);
1326  UInt32 numTables = READ(fh->numFwdTables);
1327  if (!forward) {
1328  tableBase += numTables;
1329  numTables = READ(fh->numRevTables);
1330  }
1331 
1332  // check that the outputForm matches the output of the mapping
1333  UInt32 targetFlags = forward ? READ(fh->formFlagsRHS) : READ(fh->formFlagsLHS);
1334  if ((targetFlags & kFlags_Unicode) != 0) {
1335  if (outputForm < kForm_UTF8 || outputForm > kForm_UTF32LE) {
1337  return;
1338  }
1339  }
1340  else {
1341  if (outputForm != kForm_Bytes) {
1343  return;
1344  }
1345  }
1346 
1347  // if converting from Unicode, prefix a Normalizer if the mapping wants it
1348  UInt32 sourceFlags = forward ? READ(fh->formFlagsLHS) : READ(fh->formFlagsRHS);
1349  if ((sourceFlags & kFlags_Unicode) != 0) {
1350  // check that the inputForm is a Unicode form
1351  if (inputForm < kForm_UTF8 || inputForm > kForm_UTF32LE) {
1353  return;
1354  }
1355  Stage* n = 0;
1356  if ((sourceFlags & kFlags_ExpectsNFD) != 0) {
1357  n = new Normalizer(false);
1358  normForm = kForm_NFD;
1359  }
1360  else if ((sourceFlags & kFlags_ExpectsNFC) != 0) {
1361  n = new Normalizer(true);
1362  normForm = kForm_NFC;
1363  }
1364  if (n != 0) {
1365  n->prevStage = finalStage;
1366  finalStage = n;
1367  }
1368  }
1369  else {
1370  // check that the inputForm is bytes
1371  if (inputForm != kForm_Bytes) {
1373  return;
1374  }
1375  }
1376 
1377  // create the processing pipeline
1378  for (UInt32 i = 0; i < numTables; ++i) {
1379  const TableHeader* t = reinterpret_cast<const TableHeader*>(table + READ(tableBase[i]));
1380  Stage* p = 0;
1381  switch (READ(t->type)) {
1382  case kTableType_BB:
1383  case kTableType_BU:
1384  case kTableType_UU:
1385  case kTableType_UB:
1386  p = new Pass(t, this);
1387  normForm = 0;
1388  break;
1389  case kTableType_NFC:
1390  p = new Normalizer(true);
1391  normForm = kForm_NFC;
1392  break;
1393  case kTableType_NFD:
1394  p = new Normalizer(false);
1395  normForm = kForm_NFD;
1396  break;
1397  }
1398  if (p == 0) {
1400  return;
1401  }
1402  p->prevStage = finalStage;
1403  finalStage = p;
1404  }
1405  }
1406  else {
1407  // No mapping table provided, so we're mapping Unicode->Unicode,
1408  // possibly doing normalization and/or encoding form change.
1409  // Just check here that the input and output encoding forms are valid.
1410  if (inputForm < kForm_UTF8 || inputForm > kForm_UTF32LE || outputForm < kForm_UTF8 || outputForm > kForm_UTF32LE) {
1412  return;
1413  }
1414  }
1415 
1416  // if converting to Unicode, add a Normalizer pass at the end if requested
1418  Stage* n = 0;
1419  if ((outForm & kForm_NormalizationMask) == kForm_NFD && normForm != kForm_NFD)
1420  n = new Normalizer(false);
1421  else if ((outForm & kForm_NormalizationMask) == kForm_NFC && normForm != kForm_NFC)
1422  n = new Normalizer(true);
1423  if (n != 0) {
1424  n->prevStage = finalStage;
1425  finalStage = n;
1426  }
1427  }
1428 }
1429 
1431 {
1432  if (finalStage != this)
1433  delete finalStage;
1434 
1435  if (table != 0)
1436  free(table);
1437 
1438  table = 0;
1439 }
1440 
1441 static UInt32
1443  0x00000000UL,
1444  0x00003080UL,
1445  0x000E2080UL,
1446  0x03C82080UL,
1447  0xFA082080UL,
1448  0x82082080UL
1449 };
1450 
1451 static UInt8
1453  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1454  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1455  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1456  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1457  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1458  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1459  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1460  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
1461 };
1462 
1463 static UInt8
1465  0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC
1466 };
1467 
1468 const int halfShift = 10;
1469 const UInt32 halfBase = 0x0010000UL;
1470 const UInt32 halfMask = 0x3FFUL;
1471 const UInt32 kSurrogateHighStart = 0xD800UL;
1472 const UInt32 kSurrogateHighEnd = 0xDBFFUL;
1473 const UInt32 kSurrogateLowStart = 0xDC00UL;
1474 const UInt32 byteMask = 0x000000BFUL;
1475 const UInt32 byteMark = 0x00000080UL;
1476 
1477 UInt32
1479 {
1480  if (dataPtr >= savedCount + dataLen)
1482  if (inputForm == kForm_Bytes)
1483  return data[dataPtr++];
1484  return _getCharFn();
1485 }
1486 
1487 UInt32
1489 {
1490 // This is ONLY called from the public getChar() function, which has already done these tests:
1491 //
1492 // if (dataPtr >= dataLen)
1493 // return inputComplete ? kEndOfText : kNeedMoreInput;
1494 //
1495 // if (inputForm == kForm_Bytes)
1496 // return data[dataPtr++];
1497 
1498  UInt32 rval = 0;
1499 
1500  if (savedCount > 0) { // the less efficient version is only called if really needed
1501  rval = _getCharWithSavedBytes();
1502  return rval;
1503  }
1504 
1505 #define CHECK_AVAIL(x) \
1506  if (dataPtr + (x) > dataLen) { \
1507  if (inputComplete) \
1508  return kInvalidChar; \
1509  else { \
1510  _savePendingBytes(); \
1511  return kNeedMoreInput; \
1512  } \
1513  }
1514 
1515  switch (inputForm) {
1516  case kForm_UTF8:
1517  {
1518  UInt16 extraBytes = bytesFromUTF8[data[dataPtr]];
1519  CHECK_AVAIL(extraBytes + 1);
1520  switch (extraBytes) { // note: code falls through cases!
1521  case 5: rval += data[dataPtr++]; rval <<= 6;
1522  case 4: rval += data[dataPtr++]; rval <<= 6;
1523  case 3: rval += data[dataPtr++]; rval <<= 6;
1524  case 2: rval += data[dataPtr++]; rval <<= 6;
1525  case 1: rval += data[dataPtr++]; rval <<= 6;
1526  case 0: rval += data[dataPtr++];
1527  };
1528  rval -= offsetsFromUTF8[extraBytes];
1529  }
1530  break;
1531 
1532  case kForm_UTF16BE:
1533  CHECK_AVAIL(2);
1534  rval = data[dataPtr++] << 8;
1535  rval += data[dataPtr++];
1536  if (rval >= kSurrogateHighStart && rval <= kSurrogateHighEnd) {
1537  // check that 2 more bytes are available
1538  dataPtr -= 2;
1539  CHECK_AVAIL(4); // if we don't have 4 bytes available, this will return with kNeedMoreInput,
1540  // and we'll retry from the beginning of the high surrogate once more is available
1541  dataPtr += 2;
1542  UInt32 low = data[dataPtr++] << 8;
1543  low += data[dataPtr++];
1544  rval = ((rval - kSurrogateHighStart) << halfShift) + (low - kSurrogateLowStart) + halfBase;
1545  }
1546  break;
1547 
1548  case kForm_UTF16LE:
1549  CHECK_AVAIL(2);
1550  rval = data[dataPtr++];
1551  rval += data[dataPtr++] << 8;
1552  if (rval >= kSurrogateHighStart && rval <= kSurrogateHighEnd) {
1553  dataPtr -= 2;
1554  CHECK_AVAIL(4);
1555  dataPtr += 2;
1556  UInt32 low = data[dataPtr++];
1557  low += data[dataPtr++] << 8;
1558  rval = ((rval - kSurrogateHighStart) << halfShift) + (low - kSurrogateLowStart) + halfBase;
1559  }
1560  break;
1561 
1562  case kForm_UTF32BE:
1563  CHECK_AVAIL(4);
1564  rval = data[dataPtr++] << 24;
1565  rval += data[dataPtr++] << 16;
1566  rval += data[dataPtr++] << 8;
1567  rval += data[dataPtr++];
1568  break;
1569 
1570  case kForm_UTF32LE:
1571  CHECK_AVAIL(4);
1572  rval = data[dataPtr++];
1573  rval += data[dataPtr++] << 8;
1574  rval += data[dataPtr++] << 16;
1575  rval += data[dataPtr++] << 24;
1576  break;
1577  }
1578 
1579  return rval;
1580 }
1581 
1582 UInt32
1584  // This is a version of _getCharFn() that respects "saved bytes";
1585  // only call this if (savedCount > 0) because it has additional overhead for every byte read
1586 {
1587  UInt32 rval = 0;
1588 
1589 #undef CHECK_AVAIL
1590 #define CHECK_AVAIL(x) \
1591  if (dataPtr + (x) > savedCount + dataLen) { \
1592  if (inputComplete) \
1593  return kInvalidChar; \
1594  else { \
1595  _savePendingBytes(); \
1596  return kNeedMoreInput; \
1597  } \
1598  }
1599 
1600 #define DATA(x) (x < savedCount ? savedBytes[x] : data[x - savedCount])
1601 
1602  switch (inputForm) {
1603  case kForm_UTF8:
1604  {
1605  UInt16 extraBytes = bytesFromUTF8[DATA(dataPtr)];
1606  CHECK_AVAIL(extraBytes + 1);
1607  switch (extraBytes) { // note: code falls through cases!
1608  case 5: rval += DATA(dataPtr); dataPtr++; rval <<= 6;
1609  case 4: rval += DATA(dataPtr); dataPtr++; rval <<= 6;
1610  case 3: rval += DATA(dataPtr); dataPtr++; rval <<= 6;
1611  case 2: rval += DATA(dataPtr); dataPtr++; rval <<= 6;
1612  case 1: rval += DATA(dataPtr); dataPtr++; rval <<= 6;
1613  case 0: rval += DATA(dataPtr); dataPtr++;
1614  };
1615  rval -= offsetsFromUTF8[extraBytes];
1616  }
1617  break;
1618 
1619  case kForm_UTF16BE:
1620  CHECK_AVAIL(2);
1621  rval = DATA(dataPtr) << 8; dataPtr++;
1622  rval += DATA(dataPtr); dataPtr++;
1623  if (rval >= kSurrogateHighStart && rval <= kSurrogateHighEnd) {
1624  dataPtr -= 2;
1625  CHECK_AVAIL(4);
1626  dataPtr += 2;
1627  UInt32 low = DATA(dataPtr) << 8; dataPtr++;
1628  low += DATA(dataPtr); dataPtr++;
1629  rval = ((rval - kSurrogateHighStart) << halfShift) + (low - kSurrogateLowStart) + halfBase;
1630  }
1631  break;
1632 
1633  case kForm_UTF16LE:
1634  CHECK_AVAIL(2);
1635  rval = DATA(dataPtr); dataPtr++;
1636  rval += DATA(dataPtr) << 8; dataPtr++;
1637  if (rval >= kSurrogateHighStart && rval <= kSurrogateHighEnd) {
1638  dataPtr -= 2;
1639  CHECK_AVAIL(4);
1640  dataPtr += 2;
1641  UInt32 low = DATA(dataPtr); dataPtr++;
1642  low += DATA(dataPtr) << 8; dataPtr++;
1643  rval = ((rval - kSurrogateHighStart) << halfShift) + (low - kSurrogateLowStart) + halfBase;
1644  }
1645  break;
1646 
1647  case kForm_UTF32BE:
1648  CHECK_AVAIL(4);
1649  rval = DATA(dataPtr) << 24; dataPtr++;
1650  rval += DATA(dataPtr) << 16; dataPtr++;
1651  rval += DATA(dataPtr) << 8; dataPtr++;
1652  rval += DATA(dataPtr); dataPtr++;
1653  break;
1654 
1655  case kForm_UTF32LE:
1656  CHECK_AVAIL(4);
1657  rval = DATA(dataPtr); dataPtr++;
1658  rval += DATA(dataPtr) << 8; dataPtr++;
1659  rval += DATA(dataPtr) << 16; dataPtr++;
1660  rval += DATA(dataPtr) << 24; dataPtr++;
1661  break;
1662  }
1663 
1664  if (dataPtr >= savedCount) {
1665  dataPtr -= savedCount;
1666  savedCount = 0;
1667  }
1668 
1669  return rval;
1670 }
1671 
1672 void
1674 {
1675  dataPtr -= savedCount;
1676  while (dataPtr < dataLen)
1678 }
1679 
1680 bool
1682 {
1683  return forward;
1684 }
1685 
1686 void
1687 Converter::GetFlags(UInt32& sourceFlags, UInt32& targetFlags) const
1688 {
1689  const FileHeader* fh = reinterpret_cast<const FileHeader*>(table);
1690  if (forward) {
1691  sourceFlags = READ(fh->formFlagsLHS);
1692  targetFlags = READ(fh->formFlagsRHS);
1693  }
1694  else {
1695  sourceFlags = READ(fh->formFlagsRHS);
1696  targetFlags = READ(fh->formFlagsLHS);
1697  }
1698 }
1699 
1700 static bool
1701 getNamePtrFromTable(const Byte* table, UInt16 nameID, const Byte*& outNamePtr, UInt32& outNameLen)
1702 {
1703  const FileHeader* fh = reinterpret_cast<const FileHeader*>(table);
1704  const UInt32* nameOffsets = reinterpret_cast<const UInt32*>(table + sizeof(FileHeader));
1705  for (UInt32 i = 0; i < READ(fh->numNames); ++i) {
1706  const NameRec* n = reinterpret_cast<const NameRec*>(table + READ(nameOffsets[i]));
1707  if (READ(n->nameID) == nameID) {
1708  outNameLen = READ(n->nameLength);
1709  outNamePtr = reinterpret_cast<const Byte*>(n) + sizeof(NameRec);
1710  return true;
1711  }
1712  }
1713  return false;
1714 }
1715 
1716 bool
1717 Converter::GetNamePtr(UInt16 nameID, const Byte*& outNamePtr, UInt32& outNameLen) const
1718 {
1719  return getNamePtrFromTable(table, nameID, outNamePtr, outNameLen);
1720 }
1721 
1724  const Byte* inBuffer, UInt32 inLength, UInt32* inUsed,
1725  Byte* outBuffer, UInt32 outLength, UInt32* outUsed,
1726  UInt32 inOptions, UInt32* lookaheadCount)
1727 {
1728  TECkit_Status rval;
1729 #undef RETURN
1730 #define RETURN(returnStatus) rval = returnStatus; goto RETURN_LABEL
1731 
1732  UInt32 outPtr = 0;
1733 
1734  data = inBuffer;
1735  dataLen = inLength;
1736  dataPtr = 0;
1739 
1740  UInt32 c;
1742  c = pendingOutputChar;
1744  goto GOT_CHAR;
1745  }
1746  while (1) {
1747  c = finalStage->getChar();
1748  GOT_CHAR:
1749  switch (c) {
1750  case kEndOfText:
1752 
1753  case kNeedMoreInput:
1755 
1756  case kInvalidChar:
1758 
1759  case kUnmappedChar:
1761 
1762  default:
1763  switch (outputForm) {
1764  case kForm_Bytes:
1765  if (outPtr == outLength) {
1766  pendingOutputChar = c;
1768  }
1769  outBuffer[outPtr++] = c;
1770  break;
1771 
1772  case kForm_UTF8:
1773  {
1774  int bytesToWrite;
1775  if (c < 0x80) { bytesToWrite = 1;
1776  } else if (c < 0x800) { bytesToWrite = 2;
1777  } else if (c < 0x10000) { bytesToWrite = 3;
1778  } else if (c < 0x200000) { bytesToWrite = 4;
1779  } else { bytesToWrite = 2;
1780  c = 0x0000fffd;
1781  };
1782  if (outPtr + bytesToWrite > outLength) {
1783  pendingOutputChar = c;
1785  }
1786  outPtr += bytesToWrite;
1787  switch (bytesToWrite) { /* note: code falls through cases! */
1788  case 4: outBuffer[--outPtr] = (c | byteMark) & byteMask; c >>= 6;
1789  case 3: outBuffer[--outPtr] = (c | byteMark) & byteMask; c >>= 6;
1790  case 2: outBuffer[--outPtr] = (c | byteMark) & byteMask; c >>= 6;
1791  case 1: outBuffer[--outPtr] = c | firstByteMark[bytesToWrite];
1792  };
1793  outPtr += bytesToWrite;
1794  }
1795  break;
1796 
1797  case kForm_UTF16BE:
1798  if (c < 0x00010000) {
1799  if (outPtr + 2 > outLength) {
1800  pendingOutputChar = c;
1802  }
1803  outBuffer[outPtr++] = c >> 8;
1804  outBuffer[outPtr++] = c;
1805  }
1806  else {
1807  if (outPtr + 4 > outLength) {
1808  pendingOutputChar = c;
1810  }
1811  c -= halfBase;
1812  UInt32 hi = (c >> halfShift) + kSurrogateHighStart;
1813  UInt32 lo = (c & halfMask) + kSurrogateLowStart;
1814  outBuffer[outPtr++] = hi >> 8;
1815  outBuffer[outPtr++] = hi;
1816  outBuffer[outPtr++] = lo >> 8;
1817  outBuffer[outPtr++] = lo;
1818  }
1819  break;
1820 
1821  case kForm_UTF16LE:
1822  if (c < 0x00010000) {
1823  if (outPtr + 2 > outLength) {
1824  pendingOutputChar = c;
1826  }
1827  outBuffer[outPtr++] = c;
1828  outBuffer[outPtr++] = c >> 8;
1829  }
1830  else {
1831  if (outPtr + 4 > outLength) {
1832  pendingOutputChar = c;
1834  }
1835  c -= halfBase;
1836  UInt32 hi = (c >> halfShift) + kSurrogateHighStart;
1837  UInt32 lo = (c & halfMask) + kSurrogateLowStart;
1838  outBuffer[outPtr++] = hi;
1839  outBuffer[outPtr++] = hi >> 8;
1840  outBuffer[outPtr++] = lo;
1841  outBuffer[outPtr++] = lo >> 8;
1842  }
1843  break;
1844 
1845  case kForm_UTF32BE:
1846  if (outPtr + 4 > outLength) {
1847  pendingOutputChar = c;
1849  }
1850  outBuffer[outPtr++] = c >> 24;
1851  outBuffer[outPtr++] = c >> 16;
1852  outBuffer[outPtr++] = c >> 8;
1853  outBuffer[outPtr++] = c;
1854  break;
1855 
1856  case kForm_UTF32LE:
1857  if (outPtr + 4 > outLength) {
1858  pendingOutputChar = c;
1860  }
1861  outBuffer[outPtr++] = c;
1862  outBuffer[outPtr++] = c >> 8;
1863  outBuffer[outPtr++] = c >> 16;
1864  outBuffer[outPtr++] = c >> 24;
1865  break;
1866  }
1867  break;
1868  }
1869  }
1871 
1872 RETURN_LABEL:
1873  if (inUsed)
1874  *inUsed = dataPtr;
1875  if (outUsed)
1876  *outUsed = outPtr;
1877  if (lookaheadCount) {
1878  *lookaheadCount = 0;
1879  Stage* s = finalStage;
1880  while (s != this) {
1881  *lookaheadCount += s->lookaheadCount();
1882  s = s->prevStage;
1883  }
1884  }
1885 
1886  rval |= warningStatus;
1887  if ((rval & kStatusMask_Basic) == kStatus_NoError)
1888  Reset();
1889 
1890  return rval;
1891 }
1892 
1893 void
1895 {
1897  savedCount = 0;
1898  dataPtr = 0;
1899  dataLen = 0;
1900  warningStatus = 0;
1901  Stage* s = finalStage;
1902  while (s != this) {
1903  s->Reset();
1904  s = s->prevStage;
1905  }
1906 }
1907 
1908 bool
1910 {
1911  if (!cnv)
1912  return false;
1913  if (cnv->status != kStatus_NoError)
1914  return false;
1915  if (cnv->table != 0) {
1916  const FileHeader* fh = reinterpret_cast<const FileHeader*>(cnv->table);
1917  if (READ(fh->type) != kMagicNumber)
1918  return false;
1919  }
1920  return true;
1921 }
1922 
1924 WINAPI
1926  Byte* mapping,
1927  UInt32 mappingSize,
1928  Byte mapForward,
1929  UInt16 inputForm,
1930  UInt16 outputForm,
1931  TECkit_Converter* converter)
1932 {
1934  Converter* cnv = 0;
1935  *converter = 0;
1936  try {
1937  cnv = new Converter(mapping, mappingSize, mapForward, inputForm, outputForm);
1938  status = cnv->creationStatus();
1939  if (status == kStatus_NoError)
1940  *converter = reinterpret_cast<TECkit_Converter>(cnv);
1941  else
1942  delete cnv;
1943  }
1944  catch (Converter::Exception e) {
1945  status = e.errorCode;
1946  }
1947  catch (...) {
1949  }
1950  return status;
1951 }
1952 
1954 WINAPI
1956  TECkit_Converter converter)
1957 {
1959  Converter* cnv = reinterpret_cast<Converter*>(converter);
1960  if (!Converter::Validate(cnv))
1962  else
1963  delete cnv;
1964  return status;
1965 }
1966 
1968 WINAPI
1970  TECkit_Converter converter,
1971  UInt16 nameID,
1972  Byte* nameBuffer,
1973  UInt32 bufferSize,
1974  UInt32* nameLength)
1975 {
1977  Converter* cnv = reinterpret_cast<Converter*>(converter);
1978  if (!Converter::Validate(cnv))
1980  else {
1981  const Byte* namePtr;
1982  if (cnv->GetNamePtr(nameID, namePtr, *nameLength)) {
1983  UInt16 copyBytes = *nameLength < bufferSize ? *nameLength : bufferSize;
1984  if (copyBytes > 0)
1985  memcpy(nameBuffer, namePtr, copyBytes);
1986  }
1987  else
1989  }
1990  return status;
1991 }
1992 
1994 WINAPI
1996  TECkit_Converter converter,
1997  UInt32* sourceFlags,
1998  UInt32* targetFlags)
1999 {
2001  Converter* cnv = reinterpret_cast<Converter*>(converter);
2002  if (!Converter::Validate(cnv))
2004  else
2005  cnv->GetFlags(*sourceFlags, *targetFlags);
2006  return status;
2007 }
2008 
2010 WINAPI
2012  TECkit_Converter converter)
2013 {
2015  Converter* cnv = reinterpret_cast<Converter*>(converter);
2016  if (!Converter::Validate(cnv))
2018  else
2019  cnv->Reset();
2020  return status;
2021 }
2022 
2024 WINAPI
2026  TECkit_Converter converter,
2027  const Byte* inBuffer,
2028  UInt32 inLength,
2029  UInt32* inUsed,
2030  Byte* outBuffer,
2031  UInt32 outLength,
2032  UInt32* outUsed,
2033  UInt32 inOptions,
2034  UInt32* lookaheadCount)
2035 {
2037  Converter* cnv = reinterpret_cast<Converter*>(converter);
2038  if (!Converter::Validate(cnv))
2040  else
2041  status = cnv->ConvertBufferOpt(inBuffer, inLength, inUsed, outBuffer, outLength, outUsed, inOptions, lookaheadCount);
2042  return status;
2043 }
2044 
2046 WINAPI
2048  TECkit_Converter converter,
2049  const Byte* inBuffer,
2050  UInt32 inLength,
2051  UInt32* inUsed,
2052  Byte* outBuffer,
2053  UInt32 outLength,
2054  UInt32* outUsed,
2055  Byte inputIsComplete)
2056 {
2057  return TECkit_ConvertBufferOpt(converter, inBuffer, inLength, inUsed, outBuffer, outLength, outUsed,
2059 }
2060 
2062 WINAPI
2064  TECkit_Converter converter,
2065  Byte* outBuffer,
2066  UInt32 outLength,
2067  UInt32* outUsed,
2068  UInt32 inOptions,
2069  UInt32* lookaheadCount)
2070 {
2072  Converter* cnv = reinterpret_cast<Converter*>(converter);
2073  if (!Converter::Validate(cnv))
2075  else
2076  status = cnv->ConvertBufferOpt(0, 0, 0, outBuffer, outLength, outUsed,
2077  inOptions | kOptionsComplete_InputIsComplete, lookaheadCount);
2078  return status;
2079 }
2080 
2082 WINAPI
2084  TECkit_Converter converter,
2085  Byte* outBuffer,
2086  UInt32 outLength,
2087  UInt32* outUsed)
2088 {
2089  return TECkit_FlushOpt(converter, outBuffer, outLength, outUsed, kOptionsUnmapped_UseReplacementCharSilently, 0);
2090 }
2091 
2093 WINAPI
2095  Byte* mapping,
2096  UInt32 mappingSize,
2097  UInt32* lhsFlags,
2098  UInt32* rhsFlags)
2099 {
2101  if (mapping == 0)
2103  else {
2104  const FileHeader* fh = reinterpret_cast<const FileHeader*>(mapping);
2106  if (READ(fh->type) == kMagicNumberCmp) {
2107  // compressed mapping, so we need to decompress enough of it to read the flags
2108  unsigned long uncompressedLen = sizeof(FileHeader);
2109  int result = uncompress(reinterpret_cast<Byte*>(&header), &uncompressedLen, mapping + 2 * sizeof(UInt32), mappingSize - 2 * sizeof(UInt32));
2110  if (result != Z_BUF_ERROR)
2112  fh = &header;
2113  }
2114  if (status == kStatus_NoError && READ(fh->type) == kMagicNumber) {
2115  if ((READ(fh->version) & 0xFFFF0000) > (kCurrentFileVersion & 0xFFFF0000))
2117  else {
2118  *lhsFlags = READ(fh->formFlagsLHS);
2119  *rhsFlags = READ(fh->formFlagsRHS);
2120  }
2121  }
2122  else
2124  }
2125  return status;
2126 }
2127 
2129 WINAPI
2131  Byte* mapping,
2132  UInt32 mappingSize,
2133  UInt16 nameID,
2134  Byte* nameBuffer,
2135  UInt32 bufferSize,
2136  UInt32* nameLength)
2137 {
2138  void* buf = 0;
2140  if (mapping == 0)
2142  else {
2143  const FileHeader* fh = reinterpret_cast<const FileHeader*>(mapping);
2145  if (READ(fh->type) == kMagicNumberCmp) {
2146  // compressed mapping, so we need to decompress the fixed header to read the headerLength field,
2147  // and then decompress the complete header to get the names
2148  unsigned long uncompressedLen = sizeof(FileHeader);
2149  int result = uncompress(reinterpret_cast<Byte*>(&header), &uncompressedLen, mapping + 2 * sizeof(UInt32), mappingSize - 2 * sizeof(UInt32));
2150  if (result != Z_BUF_ERROR)
2152  else {
2153  fh = &header;
2154  uncompressedLen = READ(fh->headerLength);
2155  buf = malloc(uncompressedLen);
2156  if (buf == 0)
2158  else {
2159  result = uncompress(static_cast<Byte*>(buf), &uncompressedLen, mapping + 2 * sizeof(UInt32), mappingSize - 2 * sizeof(UInt32));
2160  if (result != Z_BUF_ERROR)
2162  fh = static_cast<const FileHeader*>(buf);
2163  }
2164  }
2165  }
2166  if (status == kStatus_NoError && READ(fh->type) == kMagicNumber) {
2167  if ((READ(fh->version) & 0xFFFF0000) > (kCurrentFileVersion & 0xFFFF0000))
2169  else {
2170  const Byte* namePtr;
2171  if (getNamePtrFromTable(reinterpret_cast<const Byte*>(fh), nameID, namePtr, *nameLength)) {
2172  UInt16 copyBytes = *nameLength < bufferSize ? *nameLength : bufferSize;
2173  if (copyBytes > 0)
2174  memcpy(nameBuffer, namePtr, copyBytes);
2175  }
2176  else
2178  }
2179  }
2180  else
2182  if (buf != 0)
2183  free(buf);
2184  }
2185  return status;
2186 }
2187 
2188 UInt32
2189 WINAPI
2191 {
2192  return kCurrentTECkitVersion;
2193 }
UInt32 kInvalidChar
Definition: Compiler.cpp:41
int halfShift
Definition: Engine.cpp:1468
#define SCount
Definition: Engine.cpp:145
UInt32 byteMask
Definition: Engine.cpp:1474
UInt32 kSurrogateHighStart
Definition: Engine.cpp:1471
#define DATA(x)
TECkit_Status WINAPI TECkit_CreateConverter(Byte *mapping, UInt32 mappingSize, Byte mapForward, UInt16 inputForm, UInt16 outputForm, TECkit_Converter *converter)
Definition: Engine.cpp:1925
TECkit_Status WINAPI TECkit_ConvertBufferOpt(TECkit_Converter converter, Byte *inBuffer, UInt32 inLength, UInt32 *inUsed, Byte *outBuffer, UInt32 outLength, UInt32 *outUsed, UInt32 inOptions, UInt32 *lookaheadCount)
Definition: Engine.cpp:2025
TECkit_Status WINAPI TECkit_GetMappingFlags(Byte *mapping, UInt32 mappingSize, UInt32 *lhsFlags, UInt32 *rhsFlags)
Definition: Engine.cpp:2094
TECkit_Status WINAPI TECkit_GetConverterName(TECkit_Converter converter, UInt16 nameID, Byte *nameBuffer, UInt32 bufferSize, UInt32 *nameLength)
Definition: Engine.cpp:1969
#define LCount
Definition: Engine.cpp:141
#define matchNo
Definition: Engine.cpp:633
TECkit_Status WINAPI TECkit_GetConverterFlags(TECkit_Converter converter, UInt32 *sourceFlags, UInt32 *targetFlags)
Definition: Engine.cpp:1995
#define matchYes
Definition: Engine.cpp:632
UInt32 kSurrogateHighEnd
Definition: Engine.cpp:1472
static UInt8 bytesFromUTF8[256]
Definition: Engine.cpp:1452
UInt32 kSurrogateLowStart
Definition: Engine.cpp:1473
TECkit_Status WINAPI TECkit_GetMappingName(Byte *mapping, UInt32 mappingSize, UInt16 nameID, Byte *nameBuffer, UInt32 bufferSize, UInt32 *nameLength)
Definition: Engine.cpp:2130
TECkit_Status WINAPI TECkit_ConvertBuffer(TECkit_Converter converter, Byte *inBuffer, UInt32 inLength, UInt32 *inUsed, Byte *outBuffer, UInt32 outLength, UInt32 *outUsed, Byte inputIsComplete)
Definition: Engine.cpp:2047
TECkit_Status WINAPI TECkit_ResetConverter(TECkit_Converter converter)
Definition: Engine.cpp:2011
UInt32 halfBase
Definition: Engine.cpp:1469
#define CHECK_AVAIL(x)
#define LBase
Definition: Engine.cpp:138
static bool getNamePtrFromTable(Byte *table, UInt16 nameID, Byte *&outNamePtr, UInt32 &outNameLen)
Definition: Engine.cpp:1701
#define RETURN(x)
Definition: Engine.cpp:630
TECkit_Status WINAPI TECkit_Flush(TECkit_Converter converter, Byte *outBuffer, UInt32 outLength, UInt32 *outUsed)
Definition: Engine.cpp:2083
#define TBase
Definition: Engine.cpp:140
UInt32 halfMask
Definition: Engine.cpp:1470
#define VBase
Definition: Engine.cpp:139
TECkit_Status WINAPI TECkit_FlushOpt(TECkit_Converter converter, Byte *outBuffer, UInt32 outLength, UInt32 *outUsed, UInt32 inOptions, UInt32 *lookaheadCount)
Definition: Engine.cpp:2063
#define NCount
Definition: Engine.cpp:144
UInt8 READ(UInt8 p)
Definition: Engine.cpp:74
static T * binary_search(T *array, UInt32 count, UInt32 value)
Definition: Engine.cpp:564
#define SBase
Definition: Engine.cpp:137
#define TCount
Definition: Engine.cpp:143
TECkit_Status WINAPI TECkit_DisposeConverter(TECkit_Converter converter)
Definition: Engine.cpp:1955
static UInt8 firstByteMark[7]
Definition: Engine.cpp:1464
UInt32 WINAPI TECkit_GetVersion()
Definition: Engine.cpp:2190
UInt32 byteMark
Definition: Engine.cpp:1475
#define VCount
Definition: Engine.cpp:142
static UInt32 offsetsFromUTF8[6]
Definition: Engine.cpp:1442
const UInt32 kNeedMoreInput
Definition: Engine.h:25
const UInt32 kUnmappedChar
Definition: Engine.h:27
const UInt8 ccPlaneMap[]
const UInt32 cComposites[380][67]
const UInt8 ccPageMaps[][256]
const UInt8 dcPlaneMap[]
const UInt32 dcDecomposition[][2]
const UInt8 cLPlaneMap[]
const UInt16 cLCharIndex[][256]
const UInt16 dcCharIndex[][256]
const UInt8 cRCharIndex[][256]
const UInt8 cRPlaneMap[]
const UInt8 cRPageMaps[][256]
const UInt8 dcPageMaps[][256]
const UInt8 ccCharClass[][256]
const UInt8 cLPageMaps[][256]
#define WINAPI
#define kMatchElem_NonLit
#define kLookupType_StringRules
#define kLookupType_IllegalDBCS
#define kMatchElem_Negate
#define kLookupType_RuleTypeMask
#define kTableType_NFD
Definition: TECkit_Format.h:88
#define kMatchElem_Type_EGroup
#define kRepElem_Unmapped
#define kMatchElem_Type_ANY
#define kTableType_BU
Definition: TECkit_Format.h:83
#define kMatchElem_Type_Class
#define kTableType_BB
Definition: TECkit_Format.h:82
#define kTableFlags_Supplementary
Definition: TECkit_Format.h:90
#define kMatchElem_Type_OR
#define kMatchElem_Type_Copy
#define kMagicNumberCmp
Definition: TECkit_Format.h:22
#define kMatchElem_Type_EOS
#define kLookupType_ExtStringRules
#define kUSVMask
#define kCurrentFileVersion
Definition: TECkit_Format.h:25
#define kTableType_UU
Definition: TECkit_Format.h:85
#define kMagicNumber
Definition: TECkit_Format.h:21
#define kRepElem_Class
#define kRepElem_Literal
#define kTableType_NFC
Definition: TECkit_Format.h:87
#define kTableType_UB
Definition: TECkit_Format.h:84
#define kMatchElem_Type_BGroup
#define kRepElem_Copy
#define kLookupType_Unmapped
#define kLookupType_ExtRuleCountMask
#define kMatchElem_TypeMask
struct @88 table[500]
int lookup(const char *)
#define negate(x)
Definition: aptex-macros.h:51
#define type(a)
Definition: aptex-macros.h:171
#define count(a)
Definition: aptex-macros.h:781
int cmp(const void *p, const void *q)
Definition: bkmk2uni.c:1611
void appendChar(UInt32 c)
Definition: Engine.cpp:269
void insertChar(UInt32 insCh, int insCombClass)
Definition: Engine.cpp:279
int prevCombClass
Definition: Engine.h:78
void growOutBuf()
Definition: Engine.cpp:304
void compose()
Definition: normlzr.cpp:163
void decompose(UInt32 c)
Definition: normlzr.cpp:171
void generateChar(UInt32 c)
Definition: Engine.cpp:205
UInt32 process()
Definition: Engine.cpp:148
long oBufSafe
Definition: Engine.h:79
virtual ~Normalizer()
Definition: normlzr.cpp:91
Normalizer(bool compose)
Definition: normlzr.cpp:40
UInt32 decomposeOne(UInt32 &c)
Definition: Engine.cpp:191
bool bCompose
Definition: Engine.h:81
virtual UInt32 getChar()
Definition: Engine.cpp:371
virtual void Reset()
Definition: Engine.cpp:173
void outputChar(UInt32 c)
Definition: Engine.cpp:451
MatchInfo info[256]
Definition: Engine.h:122
const Byte * stringListBase
Definition: Engine.h:141
virtual void Reset()
Definition: Engine.cpp:431
const Byte * stringRuleData
Definition: Engine.h:142
long iBufEnd
Definition: Engine.h:148
bool bInputIsUnicode
Definition: Engine.h:151
int patternLength
Definition: Engine.h:120
UInt32 repClassMember(UInt32 classNumber, UInt32 index) const
Definition: Engine.cpp:609
UInt32 match(int index, int repeats, int textLoc)
Definition: Engine.cpp:635
const Byte * planeMap
Definition: Engine.h:143
void advanceInput(unsigned int numChars)
Definition: Engine.cpp:540
const Byte * pageBase
Definition: Engine.h:137
UInt32 DoMapping()
Definition: Engine.cpp:985
UInt32 inputChar(long inIndex)
Definition: Engine.cpp:472
bool bSupplementaryChars
Definition: Engine.h:153
int groupRepeats
Definition: Engine.h:127
int direction
Definition: Engine.h:121
Pass(const TableHeader *inTable, Converter *cnv)
virtual UInt32 getChar()
Definition: Engine.cpp:438
long iBufPtr
Definition: Engine.h:149
int infoLimit
Definition: Engine.h:123
const TableHeader * tableHeader
Definition: Engine.h:135
Converter * converter
Definition: Engine.h:134
int matchedLength
Definition: Engine.h:125
const Byte * repClassBase
Definition: Engine.h:140
bool bOutputIsUnicode
Definition: Engine.h:152
long classMatch(UInt32 classNumber, UInt32 inChar) const
Definition: Engine.cpp:581
virtual ~Pass()
Definition: Engine.cpp:424
const Byte * matchClassBase
Definition: Engine.h:139
long iBufSize
Definition: Engine.h:146
UInt32 * iBuffer
Definition: Engine.h:145
virtual UInt32 lookaheadCount() const
Definition: Engine.cpp:461
int matchElems
Definition: Engine.h:124
UInt8 numPageMaps
Definition: Engine.h:154
long iBufStart
Definition: Engine.h:147
sgrStackItem * sgrStack
Definition: Engine.h:132
const Lookup * lookupBase
Definition: Engine.h:138
Definition: Engine.h:33
virtual UInt32 lookaheadCount() const
Definition: Engine.cpp:115
long oBufEnd
Definition: Engine.h:49
long oBufSize
Definition: Engine.h:48
Stage * prevStage
Definition: Engine.h:52
long oBufPtr
Definition: Engine.h:50
virtual UInt32 getChar()=0
Stage()
Definition: Engine.cpp:99
friend class Converter
Definition: Engine.h:45
UInt32 * oBuffer
Definition: Engine.h:47
virtual ~Stage()
Definition: Engine.cpp:108
#define n
Definition: t4ht.c:1290
UConverter * cnv
Definition: date.c:55
#define free(a)
Definition: decNumber.cpp:310
int v
Definition: dviconv.c:10
bool compose
Definition: dvistuff.c:161
#define T
Definition: fmt.h:20
#define s
Definition: afcover.h:80
#define c(n)
Definition: gpos-common.c:150
#define memcpy(d, s, n)
Definition: gsftopk.c:64
unsigned int UInt32
Definition: bzip2.c:165
static void uncompress(Char *name)
Definition: bzip2.c:1314
unsigned short UInt16
Definition: bzip2.c:167
small capitals from c petite p
Definition: afcover.h:72
small capitals from c petite p scientific i
Definition: afcover.h:80
#define const
Definition: ftzconf.h:91
unsigned char Byte
Definition: ftzconf.h:219
#define Z_BUF_ERROR
Definition: zlib.h:139
#define Z_OK
Definition: zlib.h:132
#define buf
union hdr header
Definition: pbmtomacp.c:291
#define malloc
Definition: alloca.c:91
#define sprintf
Definition: snprintf.c:44
int low
Definition: combiners.h:904
#define target(code, i)
Definition: lpeg.c:1165
constexpr T && forward(remove_reference_t< T > &t) noexcept
Definition: variant.hpp:390
STL namespace.
#define index(s, c)
Definition: plain2.h:351
static int32_t last
Definition: ppagelist.c:29
int r
Definition: ppmqvga.c:68
#define status
#define flags
ShellFileEnvironment e
Definition: sh6.c:388
static unsigned char pageMap[]
Definition: slnudata.c:70
#define kStatus_InvalidConverter
Definition: TECkit_Common.h:73
#define kStatus_NeedMoreInput
Definition: TECkit_Common.h:60
#define kForm_EncodingFormMask
Definition: TECkit_Common.h:85
#define kStatus_Exception
Definition: TECkit_Common.h:76
#define kStatus_NameNotFound
Definition: TECkit_Common.h:77
#define kStatus_InvalidForm
Definition: TECkit_Common.h:71
#define kStatus_UnmappedChar
Definition: TECkit_Common.h:63
#define kForm_UTF32BE
Definition: TECkit_Common.h:91
long TECkit_Status
Definition: TECkit_Common.h:49
#define kStatus_IncompleteChar
Definition: TECkit_Common.h:78
#define kStatus_InvalidMapping
Definition: TECkit_Common.h:74
#define kForm_Bytes
Definition: TECkit_Common.h:87
unsigned char UInt8
Definition: TECkit_Common.h:32
#define kStatus_OutOfMemory
Definition: TECkit_Common.h:80
#define kStatusMask_Basic
Definition: TECkit_Common.h:58
#define kStatus_OutputBufferFull
Definition: TECkit_Common.h:59
#define kStatus_BadMappingVersion
Definition: TECkit_Common.h:75
#define kForm_UTF16BE
Definition: TECkit_Common.h:89
#define kCurrentTECkitVersion
Definition: TECkit_Common.h:28
#define kForm_UTF32LE
Definition: TECkit_Common.h:92
#define kStatus_NoError
Definition: TECkit_Common.h:54
#define kForm_UTF16LE
Definition: TECkit_Common.h:90
#define kStatus_UsedReplacement
Definition: TECkit_Common.h:68
#define kForm_UTF8
Definition: TECkit_Common.h:88
#define kOptionsUnmapped_DontUseReplacementChar
#define kEndOfText
Definition: TECkit_Engine.h:68
#define kOptionsMask_InputComplete
#define kOptionsMask_UnmappedBehavior
#define kOptionsComplete_InputIsComplete
struct Opaque_TECkit_Converter * TECkit_Converter
Definition: TECkit_Engine.h:73
#define kFlags_ExpectsNFD
Definition: TECkit_Engine.h:35
#define kForm_NFD
Definition: TECkit_Engine.h:63
#define kOptionsUnmapped_UseReplacementCharSilently
#define kFlags_Unicode
Definition: TECkit_Engine.h:43
#define kOptionsUnmapped_UseReplacementCharWithWarning
#define kForm_NFC
Definition: TECkit_Engine.h:62
#define kForm_NormalizationMask
Definition: TECkit_Engine.h:61
#define kFlags_ExpectsNFC
Definition: TECkit_Engine.h:34
UInt32 dataPtr
Definition: Engine.h:204
bool forward
Definition: Engine.h:209
virtual void Reset()
Definition: Engine.cpp:1894
Stage * finalStage
Definition: Engine.h:201
TECkit_Status ConvertBufferOpt(const Byte *inBuffer, UInt32 inLength, UInt32 *inUsed, Byte *outBuffer, UInt32 outLength, UInt32 *outUsed, UInt32 inOptions, UInt32 *lookaheadCount)
Definition: Engine.cpp:1723
void GetFlags(UInt32 &sourceFlags, UInt32 &targetFlags) const
Definition: Engine.cpp:1687
UInt32 warningStatus
Definition: Engine.h:220
friend class Pass
Definition: Engine.h:192
UInt32 pendingOutputChar
Definition: Engine.h:217
UInt32 savedCount
Definition: Engine.h:215
friend class Normalizer
Definition: Engine.h:193
bool inputComplete
Definition: Engine.h:206
UInt32 dataLen
Definition: Engine.h:205
const Byte * data
Definition: Engine.h:203
bool IsForward() const
Definition: Engine.cpp:1681
void _savePendingBytes()
Definition: Engine.cpp:1673
long status
Definition: Engine.h:218
bool GetNamePtr(UInt16 inNameID, const Byte *&outNamePtr, UInt32 &outNameLen) const
Definition: Engine.cpp:1717
static bool Validate(const Converter *cnv)
Definition: Engine.cpp:1909
~Converter()
Definition: Engine.cpp:1430
UInt32 _getCharFn()
Definition: Engine.cpp:1488
Byte unmappedBehavior
Definition: Engine.h:207
Byte outputForm
Definition: Engine.h:212
UInt32 _getCharWithSavedBytes()
Definition: Engine.cpp:1583
Byte inputForm
Definition: Engine.h:211
virtual UInt32 getChar()
Definition: Engine.cpp:1478
Byte savedBytes[8]
Definition: Engine.h:214
Byte * table
Definition: Engine.h:199
UInt32 numNames
Definition: TECkit_Format.h:46
UInt32 numRevTables
Definition: TECkit_Format.h:48
UInt32 formFlagsLHS
Definition: TECkit_Format.h:44
UInt32 formFlagsRHS
Definition: TECkit_Format.h:45
UInt32 type
Definition: TECkit_Format.h:41
UInt32 numFwdTables
Definition: TECkit_Format.h:47
UInt32 headerLength
Definition: TECkit_Format.h:43
UInt32 version
Definition: TECkit_Format.h:42
UInt16 start
Definition: Engine.h:112
int groupRepeats
Definition: Engine.h:110
UInt16 limit
Definition: Engine.h:113
struct Pass::MatchInfo::@1414 matchedSpan
UInt32 classIndex
Definition: Engine.h:109
UInt8 maxOutput
Definition: TECkit_Format.h:75
UInt8 maxMatch
Definition: TECkit_Format.h:72
UInt32 replacementChar
Definition: TECkit_Format.h:76
Definition: mendex.h:20
Definition: zic.c:306
uint16 type
Definition: parsettfatt.c:198
Definition: mendex.h:14
Definition: rule.h:21
Definition: dvips.h:235
Definition: table.h:30
ch
Definition: t4ht.c:1443
int j
Definition: t4ht.c:1589
m
Definition: tex4ht.c:3990
page
Definition: tex4ht.c:3916
return() int(((double) *(font_tbl[cur_fnt].wtbl+(int)(*(font_tbl[cur_fnt].char_wi+(int)(ch - font_tbl[cur_fnt].char_f)% 256)))/(double)(1L<< 20)) *(double) font_tbl[cur_fnt].scale)
int mapping
Definition: ttf2pfb.c:116
void printMatch()
Definition: ugrep.cpp:423
Definition: obx.h:51