w32tex
About: TeX Live provides a comprehensive TeX system including all the major TeX-related programs, macro packages, and fonts that are free software. Windows sources.
  Fossies Dox: w32tex-src.tar.xz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

unames.cpp
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 *
6 * Copyright (C) 1999-2014, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 ******************************************************************************
10 * file name: unames.c
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 1999oct04
16 * created by: Markus W. Scherer
17 */
18 
19 #include "unicode/utypes.h"
20 #include "unicode/putil.h"
21 #include "unicode/uchar.h"
22 #include "unicode/udata.h"
23 #include "unicode/utf.h"
24 #include "unicode/utf16.h"
25 #include "uassert.h"
26 #include "ustr_imp.h"
27 #include "umutex.h"
28 #include "cmemory.h"
29 #include "cstring.h"
30 #include "ucln_cmn.h"
31 #include "udataswp.h"
32 #include "uprops.h"
33 
35 
36 /* prototypes ------------------------------------------------------------- */
37 
38 static const char DATA_NAME[] = "unames";
39 static const char DATA_TYPE[] = "icu";
40 
41 #define GROUP_SHIFT 5
42 #define LINES_PER_GROUP (1L<<GROUP_SHIFT)
43 #define GROUP_MASK (LINES_PER_GROUP-1)
44 
45 /*
46  * This struct was replaced by explicitly accessing equivalent
47  * fields from triples of uint16_t.
48  * The Group struct was padded to 8 bytes on compilers for early ARM CPUs,
49  * which broke the assumption that sizeof(Group)==6 and that the ++ operator
50  * would advance by 6 bytes (3 uint16_t).
51  *
52  * We can't just change the data structure because it's loaded from a data file,
53  * and we don't want to make it less compact, so we changed the access code.
54  *
55  * For details see ICU tickets 6331 and 6008.
56 typedef struct {
57  uint16_t groupMSB,
58  offsetHigh, offsetLow; / * avoid padding * /
59 } Group;
60  */
61 enum {
66 };
67 
68 /*
69  * Get the 32-bit group offset.
70  * @param group (const uint16_t *) pointer to a Group triple of uint16_t
71  * @return group offset (int32_t)
72  */
73 #define GET_GROUP_OFFSET(group) ((int32_t)(group)[GROUP_OFFSET_HIGH]<<16|(group)[GROUP_OFFSET_LOW])
74 
75 #define NEXT_GROUP(group) ((group)+GROUP_LENGTH)
76 #define PREV_GROUP(group) ((group)-GROUP_LENGTH)
77 
78 typedef struct {
80  uint8_t type, variant;
83 
84 typedef struct {
85  uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset;
86 } UCharNames;
87 
88 /*
89  * Get the groups table from a UCharNames struct.
90  * The groups table consists of one uint16_t groupCount followed by
91  * groupCount groups. Each group is a triple of uint16_t, see GROUP_LENGTH
92  * and the comment for the old struct Group above.
93  *
94  * @param names (const UCharNames *) pointer to the UCharNames indexes
95  * @return (const uint16_t *) pointer to the groups table
96  */
97 #define GET_GROUPS(names) (const uint16_t *)((const char *)names+names->groupsOffset)
98 
99 typedef struct {
100  const char *otherName;
102 } FindName;
103 
104 #define DO_FIND_NAME NULL
105 
109 
110 /*
111  * Maximum length of character names (regular & 1.0).
112  */
114 
115 /*
116  * Set of chars used in character names (regular & 1.0).
117  * Chars are platform-dependent (can be EBCDIC).
118  */
119 static uint32_t gNameSet[8]={ 0 };
120 
121 #define U_NONCHARACTER_CODE_POINT U_CHAR_CATEGORY_COUNT
122 #define U_LEAD_SURROGATE U_CHAR_CATEGORY_COUNT + 1
123 #define U_TRAIL_SURROGATE U_CHAR_CATEGORY_COUNT + 2
124 
125 #define U_CHAR_EXTENDED_CATEGORY_COUNT (U_CHAR_CATEGORY_COUNT + 3)
126 
127 static const char * const charCatNames[U_CHAR_EXTENDED_CATEGORY_COUNT] = {
128  "unassigned",
129  "uppercase letter",
130  "lowercase letter",
131  "titlecase letter",
132  "modifier letter",
133  "other letter",
134  "non spacing mark",
135  "enclosing mark",
136  "combining spacing mark",
137  "decimal digit number",
138  "letter number",
139  "other number",
140  "space separator",
141  "line separator",
142  "paragraph separator",
143  "control",
144  "format",
145  "private use area",
146  "surrogate",
147  "dash punctuation",
148  "start punctuation",
149  "end punctuation",
150  "connector punctuation",
151  "other punctuation",
152  "math symbol",
153  "currency symbol",
154  "modifier symbol",
155  "other symbol",
156  "initial punctuation",
157  "final punctuation",
158  "noncharacter",
159  "lead surrogate",
160  "trail surrogate"
161 };
162 
163 /* implementation ----------------------------------------------------------- */
164 
166 {
167  if(uCharNamesData) {
170  }
171  if(uCharNames) {
172  uCharNames = NULL;
173  }
174  gCharNamesInitOnce.reset();
175  gMaxNameLength=0;
176  return TRUE;
177 }
178 
179 static UBool U_CALLCONV
180 isAcceptable(void * /*context*/,
181  const char * /*type*/, const char * /*name*/,
182  const UDataInfo *pInfo) {
183  return (UBool)(
184  pInfo->size>=20 &&
185  pInfo->isBigEndian==U_IS_BIG_ENDIAN &&
187  pInfo->dataFormat[0]==0x75 && /* dataFormat="unam" */
188  pInfo->dataFormat[1]==0x6e &&
189  pInfo->dataFormat[2]==0x61 &&
190  pInfo->dataFormat[3]==0x6d &&
191  pInfo->formatVersion[0]==1);
192 }
193 
194 static void U_CALLCONV
198 
200  if(U_FAILURE(status)) {
202  } else {
204  }
206 }
207 
208 
209 static UBool
210 isDataLoaded(UErrorCode *pErrorCode) {
212  return U_SUCCESS(*pErrorCode);
213 }
214 
215 #define WRITE_CHAR(buffer, bufferLength, bufferPos, c) UPRV_BLOCK_MACRO_BEGIN { \
216  if((bufferLength)>0) { \
217  *(buffer)++=c; \
218  --(bufferLength); \
219  } \
220  ++(bufferPos); \
221 } UPRV_BLOCK_MACRO_END
222 
223 #define U_ISO_COMMENT U_CHAR_NAME_CHOICE_COUNT
224 
225 /*
226  * Important: expandName() and compareName() are almost the same -
227  * apply fixes to both.
228  *
229  * UnicodeData.txt uses ';' as a field separator, so no
230  * field can contain ';' as part of its contents.
231  * In unames.dat, it is marked as token[';']==-1 only if the
232  * semicolon is used in the data file - which is iff we
233  * have Unicode 1.0 names or ISO comments or aliases.
234  * So, it will be token[';']==-1 if we store U1.0 names/ISO comments/aliases
235  * although we know that it will never be part of a name.
236  */
237 static uint16_t
239  const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
240  char *buffer, uint16_t bufferLength) {
241  uint16_t *tokens=(uint16_t *)names+8;
242  uint16_t token, tokenCount=*tokens++, bufferPos=0;
243  uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
244  uint8_t c;
245 
246  if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
247  /*
248  * skip the modern name if it is not requested _and_
249  * if the semicolon byte value is a character, not a token number
250  */
251  if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
252  int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
253  do {
254  while(nameLength>0) {
255  --nameLength;
256  if(*name++==';') {
257  break;
258  }
259  }
260  } while(--fieldIndex>0);
261  } else {
262  /*
263  * the semicolon byte value is a token number, therefore
264  * only modern names are stored in unames.dat and there is no
265  * such requested alternate name here
266  */
267  nameLength=0;
268  }
269  }
270 
271  /* write each letter directly, and write a token word per token */
272  while(nameLength>0) {
273  --nameLength;
274  c=*name++;
275 
276  if(c>=tokenCount) {
277  if(c!=';') {
278  /* implicit letter */
279  WRITE_CHAR(buffer, bufferLength, bufferPos, c);
280  } else {
281  /* finished */
282  break;
283  }
284  } else {
285  token=tokens[c];
286  if(token==(uint16_t)(-2)) {
287  /* this is a lead byte for a double-byte token */
288  token=tokens[c<<8|*name++];
289  --nameLength;
290  }
291  if(token==(uint16_t)(-1)) {
292  if(c!=';') {
293  /* explicit letter */
294  WRITE_CHAR(buffer, bufferLength, bufferPos, c);
295  } else {
296  /* stop, but skip the semicolon if we are seeking
297  extended names and there was no 2.0 name but there
298  is a 1.0 name. */
299  if(!bufferPos && nameChoice == U_EXTENDED_CHAR_NAME) {
300  if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
301  continue;
302  }
303  }
304  /* finished */
305  break;
306  }
307  } else {
308  /* write token word */
309  uint8_t *tokenString=tokenStrings+token;
310  while((c=*tokenString++)!=0) {
311  WRITE_CHAR(buffer, bufferLength, bufferPos, c);
312  }
313  }
314  }
315  }
316 
317  /* zero-terminate */
318  if(bufferLength>0) {
319  *buffer=0;
320  }
321 
322  return bufferPos;
323 }
324 
325 /*
326  * compareName() is almost the same as expandName() except that it compares
327  * the currently expanded name to an input name.
328  * It returns the match/no match result as soon as possible.
329  */
330 static UBool
332  const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice,
333  const char *otherName) {
334  uint16_t *tokens=(uint16_t *)names+8;
335  uint16_t token, tokenCount=*tokens++;
336  uint8_t *tokenStrings=(uint8_t *)names+names->tokenStringOffset;
337  uint8_t c;
338  const char *origOtherName = otherName;
339 
340  if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
341  /*
342  * skip the modern name if it is not requested _and_
343  * if the semicolon byte value is a character, not a token number
344  */
345  if((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
346  int fieldIndex= nameChoice==U_ISO_COMMENT ? 2 : nameChoice;
347  do {
348  while(nameLength>0) {
349  --nameLength;
350  if(*name++==';') {
351  break;
352  }
353  }
354  } while(--fieldIndex>0);
355  } else {
356  /*
357  * the semicolon byte value is a token number, therefore
358  * only modern names are stored in unames.dat and there is no
359  * such requested alternate name here
360  */
361  nameLength=0;
362  }
363  }
364 
365  /* compare each letter directly, and compare a token word per token */
366  while(nameLength>0) {
367  --nameLength;
368  c=*name++;
369 
370  if(c>=tokenCount) {
371  if(c!=';') {
372  /* implicit letter */
373  if((char)c!=*otherName++) {
374  return FALSE;
375  }
376  } else {
377  /* finished */
378  break;
379  }
380  } else {
381  token=tokens[c];
382  if(token==(uint16_t)(-2)) {
383  /* this is a lead byte for a double-byte token */
384  token=tokens[c<<8|*name++];
385  --nameLength;
386  }
387  if(token==(uint16_t)(-1)) {
388  if(c!=';') {
389  /* explicit letter */
390  if((char)c!=*otherName++) {
391  return FALSE;
392  }
393  } else {
394  /* stop, but skip the semicolon if we are seeking
395  extended names and there was no 2.0 name but there
396  is a 1.0 name. */
397  if(otherName == origOtherName && nameChoice == U_EXTENDED_CHAR_NAME) {
398  if ((uint8_t)';'>=tokenCount || tokens[(uint8_t)';']==(uint16_t)(-1)) {
399  continue;
400  }
401  }
402  /* finished */
403  break;
404  }
405  } else {
406  /* write token word */
407  uint8_t *tokenString=tokenStrings+token;
408  while((c=*tokenString++)!=0) {
409  if((char)c!=*otherName++) {
410  return FALSE;
411  }
412  }
413  }
414  }
415  }
416 
417  /* complete match? */
418  return (UBool)(*otherName==0);
419 }
420 
422  uint8_t cat;
423 
424  if (U_IS_UNICODE_NONCHAR(cp)) {
426  }
427 
428  if ((cat = u_charType(cp)) == U_SURROGATE) {
430  }
431 
432  return cat;
433 }
434 
435 static const char *getCharCatName(UChar32 cp) {
437 
438  /* Return unknown if the table of names above is not up to
439  date. */
440 
441  if (cat >= UPRV_LENGTHOF(charCatNames)) {
442  return "unknown";
443  } else {
444  return charCatNames[cat];
445  }
446 }
447 
448 static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength) {
449  const char *catname = getCharCatName(code);
450  uint16_t length = 0;
451 
452  UChar32 cp;
453  int ndigits, i;
454 
455  WRITE_CHAR(buffer, bufferLength, length, '<');
456  while (catname[length - 1]) {
457  WRITE_CHAR(buffer, bufferLength, length, catname[length - 1]);
458  }
459  WRITE_CHAR(buffer, bufferLength, length, '-');
460  for (cp = code, ndigits = 0; cp; ++ndigits, cp >>= 4)
461  ;
462  if (ndigits < 4)
463  ndigits = 4;
464  for (cp = code, i = ndigits; (cp || i > 0) && bufferLength; cp >>= 4, bufferLength--) {
465  uint8_t v = (uint8_t)(cp & 0xf);
466  buffer[--i] = (v < 10 ? '0' + v : 'A' + v - 10);
467  }
468  buffer += ndigits;
469  length += static_cast<uint16_t>(ndigits);
470  WRITE_CHAR(buffer, bufferLength, length, '>');
471 
472  return length;
473 }
474 
475 /*
476  * getGroup() does a binary search for the group that contains the
477  * Unicode code point "code".
478  * The return value is always a valid Group* that may contain "code"
479  * or else is the highest group before "code".
480  * If the lowest group is after "code", then that one is returned.
481  */
482 static const uint16_t *
485  uint16_t groupMSB=(uint16_t)(code>>GROUP_SHIFT),
486  start=0,
487  limit=*groups++,
488  number;
489 
490  /* binary search for the group of names that contains the one for code */
491  while(start<limit-1) {
492  number=(uint16_t)((start+limit)/2);
493  if(groupMSB<groups[number*GROUP_LENGTH+GROUP_MSB]) {
494  limit=number;
495  } else {
496  start=number;
497  }
498  }
499 
500  /* return this regardless of whether it is an exact match */
501  return groups+start*GROUP_LENGTH;
502 }
503 
504 /*
505  * expandGroupLengths() reads a block of compressed lengths of 32 strings and
506  * expands them into offsets and lengths for each string.
507  * Lengths are stored with a variable-width encoding in consecutive nibbles:
508  * If a nibble<0xc, then it is the length itself (0=empty string).
509  * If a nibble>=0xc, then it forms a length value with the following nibble.
510  * Calculation see below.
511  * The offsets and lengths arrays must be at least 33 (one more) long because
512  * there is no check here at the end if the last nibble is still used.
513  */
514 static const uint8_t *
517  /* read the lengths of the 32 strings in this group and get each string's offset */
518  uint16_t i=0, offset=0, length=0;
519  uint8_t lengthByte;
520 
521  /* all 32 lengths must be read to get the offset of the first group string */
522  while(i<LINES_PER_GROUP) {
523  lengthByte=*s++;
524 
525  /* read even nibble - MSBs of lengthByte */
526  if(length>=12) {
527  /* double-nibble length spread across two bytes */
528  length=(uint16_t)(((length&0x3)<<4|lengthByte>>4)+12);
529  lengthByte&=0xf;
530  } else if((lengthByte /* &0xf0 */)>=0xc0) {
531  /* double-nibble length spread across this one byte */
532  length=(uint16_t)((lengthByte&0x3f)+12);
533  } else {
534  /* single-nibble length in MSBs */
535  length=(uint16_t)(lengthByte>>4);
536  lengthByte&=0xf;
537  }
538 
539  *offsets++=offset;
540  *lengths++=length;
541 
542  offset+=length;
543  ++i;
544 
545  /* read odd nibble - LSBs of lengthByte */
546  if((lengthByte&0xf0)==0) {
547  /* this nibble was not consumed for a double-nibble length above */
548  length=lengthByte;
549  if(length<12) {
550  /* single-nibble length in LSBs */
551  *offsets++=offset;
552  *lengths++=length;
553 
554  offset+=length;
555  ++i;
556  }
557  } else {
558  length=0; /* prevent double-nibble detection in the next iteration */
559  }
560  }
561 
562  /* now, s is at the first group string */
563  return s;
564 }
565 
566 static uint16_t
568  uint16_t lineNumber, UCharNameChoice nameChoice,
569  char *buffer, uint16_t bufferLength) {
571  const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);
572  s=expandGroupLengths(s, offsets, lengths);
573  return expandName(names, s+offsets[lineNumber], lengths[lineNumber], nameChoice,
574  buffer, bufferLength);
575 }
576 
577 static uint16_t
579  char *buffer, uint16_t bufferLength) {
580  const uint16_t *group=getGroup(names, code);
582  return expandGroupName(names, group, (uint16_t)(code&GROUP_MASK), nameChoice,
583  buffer, bufferLength);
584  } else {
585  /* group not found */
586  /* zero-terminate */
587  if(bufferLength>0) {
588  *buffer=0;
589  }
590  return 0;
591  }
592 }
593 
594 /*
595  * enumGroupNames() enumerates all the names in a 32-group
596  * and either calls the enumerator function or finds a given input name.
597  */
598 static UBool
601  UEnumCharNamesFn *fn, void *context,
602  UCharNameChoice nameChoice) {
604  const uint8_t *s=(uint8_t *)names+names->groupStringOffset+GET_GROUP_OFFSET(group);
605 
606  s=expandGroupLengths(s, offsets, lengths);
607  if(fn!=DO_FIND_NAME) {
608  char buffer[200];
610 
611  while(start<=end) {
612  length=expandName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, buffer, sizeof(buffer));
613  if (!length && nameChoice == U_EXTENDED_CHAR_NAME) {
614  buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
615  }
616  /* here, we assume that the buffer is large enough */
617  if(length>0) {
618  if(!fn(context, start, nameChoice, buffer, length)) {
619  return FALSE;
620  }
621  }
622  ++start;
623  }
624  } else {
625  const char *otherName=((FindName *)context)->otherName;
626  while(start<=end) {
627  if(compareName(names, s+offsets[start&GROUP_MASK], lengths[start&GROUP_MASK], nameChoice, otherName)) {
628  ((FindName *)context)->code=start;
629  return FALSE;
630  }
631  ++start;
632  }
633  }
634  return TRUE;
635 }
636 
637 /*
638  * enumExtNames enumerate extended names.
639  * It only needs to do it if it is called with a real function and not
640  * with the dummy DO_FIND_NAME, because u_charFromName() does a check
641  * for extended names by itself.
642  */
643 static UBool
645  UEnumCharNamesFn *fn, void *context)
646 {
647  if(fn!=DO_FIND_NAME) {
648  char buffer[200];
650 
651  while(start<=end) {
652  buffer[length = getExtName(start, buffer, sizeof(buffer))] = 0;
653  /* here, we assume that the buffer is large enough */
654  if(length>0) {
656  return FALSE;
657  }
658  }
659  ++start;
660  }
661  }
662 
663  return TRUE;
664 }
665 
666 static UBool
669  UEnumCharNamesFn *fn, void *context,
670  UCharNameChoice nameChoice) {
671  uint16_t startGroupMSB, endGroupMSB, groupCount;
672  const uint16_t *group, *groupLimit;
673 
674  startGroupMSB=(uint16_t)(start>>GROUP_SHIFT);
675  endGroupMSB=(uint16_t)((limit-1)>>GROUP_SHIFT);
676 
677  /* find the group that contains start, or the highest before it */
679 
680  if(startGroupMSB<group[GROUP_MSB] && nameChoice==U_EXTENDED_CHAR_NAME) {
681  /* enumerate synthetic names between start and the group start */
682  UChar32 extLimit=((UChar32)group[GROUP_MSB]<<GROUP_SHIFT);
683  if(extLimit>limit) {
684  extLimit=limit;
685  }
686  if(!enumExtNames(start, extLimit-1, fn, context)) {
687  return FALSE;
688  }
689  start=extLimit;
690  }
691 
692  if(startGroupMSB==endGroupMSB) {
693  if(startGroupMSB==group[GROUP_MSB]) {
694  /* if start and limit-1 are in the same group, then enumerate only in that one */
695  return enumGroupNames(names, group, start, limit-1, fn, context, nameChoice);
696  }
697  } else {
699  groupCount=*groups++;
700  groupLimit=groups+groupCount*GROUP_LENGTH;
701 
702  if(startGroupMSB==group[GROUP_MSB]) {
703  /* enumerate characters in the partial start group */
704  if((start&GROUP_MASK)!=0) {
706  start, ((UChar32)startGroupMSB<<GROUP_SHIFT)+LINES_PER_GROUP-1,
707  fn, context, nameChoice)) {
708  return FALSE;
709  }
710  group=NEXT_GROUP(group); /* continue with the next group */
711  }
712  } else if(startGroupMSB>group[GROUP_MSB]) {
713  /* make sure that we start enumerating with the first group after start */
714  const uint16_t *nextGroup=NEXT_GROUP(group);
715  if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > startGroupMSB && nameChoice == U_EXTENDED_CHAR_NAME) {
716  UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
717  if (end > limit) {
718  end = limit;
719  }
720  if (!enumExtNames(start, end - 1, fn, context)) {
721  return FALSE;
722  }
723  }
724  group=nextGroup;
725  }
726 
727  /* enumerate entire groups between the start- and end-groups */
728  while(group<groupLimit && group[GROUP_MSB]<endGroupMSB) {
729  const uint16_t *nextGroup;
731  if(!enumGroupNames(names, group, start, start+LINES_PER_GROUP-1, fn, context, nameChoice)) {
732  return FALSE;
733  }
734  nextGroup=NEXT_GROUP(group);
735  if (nextGroup < groupLimit && nextGroup[GROUP_MSB] > group[GROUP_MSB] + 1 && nameChoice == U_EXTENDED_CHAR_NAME) {
736  UChar32 end = nextGroup[GROUP_MSB] << GROUP_SHIFT;
737  if (end > limit) {
738  end = limit;
739  }
740  if (!enumExtNames((group[GROUP_MSB] + 1) << GROUP_SHIFT, end - 1, fn, context)) {
741  return FALSE;
742  }
743  }
744  group=nextGroup;
745  }
746 
747  /* enumerate within the end group (group[GROUP_MSB]==endGroupMSB) */
748  if(group<groupLimit && group[GROUP_MSB]==endGroupMSB) {
749  return enumGroupNames(names, group, (limit-1)&~GROUP_MASK, limit-1, fn, context, nameChoice);
750  } else if (nameChoice == U_EXTENDED_CHAR_NAME && group == groupLimit) {
752  if (next > start) {
753  start = next;
754  }
755  } else {
756  return TRUE;
757  }
758  }
759 
760  /* we have not found a group, which means everything is made of
761  extended names. */
762  if (nameChoice == U_EXTENDED_CHAR_NAME) {
763  if (limit > UCHAR_MAX_VALUE + 1) {
764  limit = UCHAR_MAX_VALUE + 1;
765  }
766  return enumExtNames(start, limit - 1, fn, context);
767  }
768 
769  return TRUE;
770 }
771 
772 static uint16_t
774  const char *s, /* suffix elements */
775  uint32_t code,
776  uint16_t indexes[8], /* output fields from here */
777  const char *elementBases[8], const char *elements[8],
778  char *buffer, uint16_t bufferLength) {
779  uint16_t i, factor, bufferPos=0;
780  char c;
781 
782  /* write elements according to the factors */
783 
784  /*
785  * the factorized elements are determined by modulo arithmetic
786  * with the factors of this algorithm
787  *
788  * note that for fewer operations, count is decremented here
789  */
790  --count;
791  for(i=count; i>0; --i) {
792  factor=factors[i];
794  code/=factor;
795  }
796  /*
797  * we don't need to calculate the last modulus because start<=code<=end
798  * guarantees here that code<=factors[0]
799  */
800  indexes[0]=(uint16_t)code;
801 
802  /* write each element */
803  for(;;) {
804  if(elementBases!=NULL) {
805  *elementBases++=s;
806  }
807 
808  /* skip indexes[i] strings */
809  factor=indexes[i];
810  while(factor>0) {
811  while(*s++!=0) {}
812  --factor;
813  }
814  if(elements!=NULL) {
815  *elements++=s;
816  }
817 
818  /* write element */
819  while((c=*s++)!=0) {
820  WRITE_CHAR(buffer, bufferLength, bufferPos, c);
821  }
822 
823  /* we do not need to perform the rest of this loop for i==count - break here */
824  if(i>=count) {
825  break;
826  }
827 
828  /* skip the rest of the strings for this factors[i] */
829  factor=(uint16_t)(factors[i]-indexes[i]-1);
830  while(factor>0) {
831  while(*s++!=0) {}
832  --factor;
833  }
834 
835  ++i;
836  }
837 
838  /* zero-terminate */
839  if(bufferLength>0) {
840  *buffer=0;
841  }
842 
843  return bufferPos;
844 }
845 
846 /*
847  * Important:
848  * Parts of findAlgName() are almost the same as some of getAlgName().
849  * Fixes must be applied to both.
850  */
851 static uint16_t
853  char *buffer, uint16_t bufferLength) {
854  uint16_t bufferPos=0;
855 
856  /* Only the normative character name can be algorithmic. */
857  if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
858  /* zero-terminate */
859  if(bufferLength>0) {
860  *buffer=0;
861  }
862  return 0;
863  }
864 
865  switch(range->type) {
866  case 0: {
867  /* name = prefix hex-digits */
868  const char *s=(const char *)(range+1);
869  char c;
870 
871  uint16_t i, count;
872 
873  /* copy prefix */
874  while((c=*s++)!=0) {
875  WRITE_CHAR(buffer, bufferLength, bufferPos, c);
876  }
877 
878  /* write hexadecimal code point value */
879  count=range->variant;
880 
881  /* zero-terminate */
882  if(count<bufferLength) {
883  buffer[count]=0;
884  }
885 
886  for(i=count; i>0;) {
887  if(--i<bufferLength) {
888  c=(char)(code&0xf);
889  if(c<10) {
890  c+='0';
891  } else {
892  c+='A'-10;
893  }
894  buffer[i]=c;
895  }
896  code>>=4;
897  }
898 
899  bufferPos+=count;
900  break;
901  }
902  case 1: {
903  /* name = prefix factorized-elements */
904  uint16_t indexes[8];
905  const uint16_t *factors=(const uint16_t *)(range+1);
906  uint16_t count=range->variant;
907  const char *s=(const char *)(factors+count);
908  char c;
909 
910  /* copy prefix */
911  while((c=*s++)!=0) {
912  WRITE_CHAR(buffer, bufferLength, bufferPos, c);
913  }
914 
915  bufferPos+=writeFactorSuffix(factors, count,
916  s, code-range->start, indexes, NULL, NULL, buffer, bufferLength);
917  break;
918  }
919  default:
920  /* undefined type */
921  /* zero-terminate */
922  if(bufferLength>0) {
923  *buffer=0;
924  }
925  break;
926  }
927 
928  return bufferPos;
929 }
930 
931 /*
932  * Important: enumAlgNames() and findAlgName() are almost the same.
933  * Any fix must be applied to both.
934  */
935 static UBool
938  UEnumCharNamesFn *fn, void *context,
939  UCharNameChoice nameChoice) {
940  char buffer[200];
942 
943  if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
944  return TRUE;
945  }
946 
947  switch(range->type) {
948  case 0: {
949  char *s, *end;
950  char c;
951 
952  /* get the full name of the start character */
953  length=getAlgName(range, (uint32_t)start, nameChoice, buffer, sizeof(buffer));
954  if(length<=0) {
955  return TRUE;
956  }
957 
958  /* call the enumerator function with this first character */
959  if(!fn(context, start, nameChoice, buffer, length)) {
960  return FALSE;
961  }
962 
963  /* go to the end of the name; all these names have the same length */
964  end=buffer;
965  while(*end!=0) {
966  ++end;
967  }
968 
969  /* enumerate the rest of the names */
970  while(++start<limit) {
971  /* increment the hexadecimal number on a character-basis */
972  s=end;
973  for (;;) {
974  c=*--s;
975  if(('0'<=c && c<'9') || ('A'<=c && c<'F')) {
976  *s=(char)(c+1);
977  break;
978  } else if(c=='9') {
979  *s='A';
980  break;
981  } else if(c=='F') {
982  *s='0';
983  }
984  }
985 
986  if(!fn(context, start, nameChoice, buffer, length)) {
987  return FALSE;
988  }
989  }
990  break;
991  }
992  case 1: {
993  uint16_t indexes[8];
994  const char *elementBases[8], *elements[8];
995  const uint16_t *factors=(const uint16_t *)(range+1);
996  uint16_t count=range->variant;
997  const char *s=(const char *)(factors+count);
998  char *suffix, *t;
999  uint16_t prefixLength, i, idx;
1000 
1001  char c;
1002 
1003  /* name = prefix factorized-elements */
1004 
1005  /* copy prefix */
1006  suffix=buffer;
1007  prefixLength=0;
1008  while((c=*s++)!=0) {
1009  *suffix++=c;
1010  ++prefixLength;
1011  }
1012 
1013  /* append the suffix of the start character */
1014  length=(uint16_t)(prefixLength+writeFactorSuffix(factors, count,
1015  s, (uint32_t)start-range->start,
1016  indexes, elementBases, elements,
1017  suffix, (uint16_t)(sizeof(buffer)-prefixLength)));
1018 
1019  /* call the enumerator function with this first character */
1020  if(!fn(context, start, nameChoice, buffer, length)) {
1021  return FALSE;
1022  }
1023 
1024  /* enumerate the rest of the names */
1025  while(++start<limit) {
1026  /* increment the indexes in lexical order bound by the factors */
1027  i=count;
1028  for (;;) {
1029  idx=(uint16_t)(indexes[--i]+1);
1030  if(idx<factors[i]) {
1031  /* skip one index and its element string */
1032  indexes[i]=idx;
1033  s=elements[i];
1034  while(*s++!=0) {
1035  }
1036  elements[i]=s;
1037  break;
1038  } else {
1039  /* reset this index to 0 and its element string to the first one */
1040  indexes[i]=0;
1041  elements[i]=elementBases[i];
1042  }
1043  }
1044 
1045  /* to make matters a little easier, just append all elements to the suffix */
1046  t=suffix;
1047  length=prefixLength;
1048  for(i=0; i<count; ++i) {
1049  s=elements[i];
1050  while((c=*s++)!=0) {
1051  *t++=c;
1052  ++length;
1053  }
1054  }
1055  /* zero-terminate */
1056  *t=0;
1057 
1058  if(!fn(context, start, nameChoice, buffer, length)) {
1059  return FALSE;
1060  }
1061  }
1062  break;
1063  }
1064  default:
1065  /* undefined type */
1066  break;
1067  }
1068 
1069  return TRUE;
1070 }
1071 
1072 /*
1073  * findAlgName() is almost the same as enumAlgNames() except that it
1074  * returns the code point for a name if it fits into the range.
1075  * It returns 0xffff otherwise.
1076  */
1077 static UChar32
1078 findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *otherName) {
1079  UChar32 code;
1080 
1081  if(nameChoice!=U_UNICODE_CHAR_NAME && nameChoice!=U_EXTENDED_CHAR_NAME) {
1082  return 0xffff;
1083  }
1084 
1085  switch(range->type) {
1086  case 0: {
1087  /* name = prefix hex-digits */
1088  const char *s=(const char *)(range+1);
1089  char c;
1090 
1091  uint16_t i, count;
1092 
1093  /* compare prefix */
1094  while((c=*s++)!=0) {
1095  if((char)c!=*otherName++) {
1096  return 0xffff;
1097  }
1098  }
1099 
1100  /* read hexadecimal code point value */
1101  count=range->variant;
1102  code=0;
1103  for(i=0; i<count; ++i) {
1104  c=*otherName++;
1105  if('0'<=c && c<='9') {
1106  code=(code<<4)|(c-'0');
1107  } else if('A'<=c && c<='F') {
1108  code=(code<<4)|(c-'A'+10);
1109  } else {
1110  return 0xffff;
1111  }
1112  }
1113 
1114  /* does it fit into the range? */
1115  if(*otherName==0 && range->start<=(uint32_t)code && (uint32_t)code<=range->end) {
1116  return code;
1117  }
1118  break;
1119  }
1120  case 1: {
1121  char buffer[64];
1122  uint16_t indexes[8];
1123  const char *elementBases[8], *elements[8];
1124  const uint16_t *factors=(const uint16_t *)(range+1);
1125  uint16_t count=range->variant;
1126  const char *s=(const char *)(factors+count), *t;
1127  UChar32 start, limit;
1128  uint16_t i, idx;
1129 
1130  char c;
1131 
1132  /* name = prefix factorized-elements */
1133 
1134  /* compare prefix */
1135  while((c=*s++)!=0) {
1136  if((char)c!=*otherName++) {
1137  return 0xffff;
1138  }
1139  }
1140 
1141  start=(UChar32)range->start;
1142  limit=(UChar32)(range->end+1);
1143 
1144  /* initialize the suffix elements for enumeration; indexes should all be set to 0 */
1145  writeFactorSuffix(factors, count, s, 0,
1146  indexes, elementBases, elements, buffer, sizeof(buffer));
1147 
1148  /* compare the first suffix */
1149  if(0==uprv_strcmp(otherName, buffer)) {
1150  return start;
1151  }
1152 
1153  /* enumerate and compare the rest of the suffixes */
1154  while(++start<limit) {
1155  /* increment the indexes in lexical order bound by the factors */
1156  i=count;
1157  for (;;) {
1158  idx=(uint16_t)(indexes[--i]+1);
1159  if(idx<factors[i]) {
1160  /* skip one index and its element string */
1161  indexes[i]=idx;
1162  s=elements[i];
1163  while(*s++!=0) {}
1164  elements[i]=s;
1165  break;
1166  } else {
1167  /* reset this index to 0 and its element string to the first one */
1168  indexes[i]=0;
1169  elements[i]=elementBases[i];
1170  }
1171  }
1172 
1173  /* to make matters a little easier, just compare all elements of the suffix */
1174  t=otherName;
1175  for(i=0; i<count; ++i) {
1176  s=elements[i];
1177  while((c=*s++)!=0) {
1178  if(c!=*t++) {
1179  s=""; /* does not match */
1180  i=99;
1181  }
1182  }
1183  }
1184  if(i<99 && *t==0) {
1185  return start;
1186  }
1187  }
1188  break;
1189  }
1190  default:
1191  /* undefined type */
1192  break;
1193  }
1194 
1195  return 0xffff;
1196 }
1197 
1198 /* sets of name characters, maximum name lengths ---------------------------- */
1199 
1200 #define SET_ADD(set, c) ((set)[(uint8_t)c>>5]|=((uint32_t)1<<((uint8_t)c&0x1f)))
1201 #define SET_CONTAINS(set, c) (((set)[(uint8_t)c>>5]&((uint32_t)1<<((uint8_t)c&0x1f)))!=0)
1202 
1203 static int32_t
1204 calcStringSetLength(uint32_t set[8], const char *s) {
1205  int32_t length=0;
1206  char c;
1207 
1208  while((c=*s++)!=0) {
1209  SET_ADD(set, c);
1210  ++length;
1211  }
1212  return length;
1213 }
1214 
1215 static int32_t
1218  uint32_t *p;
1219  uint32_t rangeCount;
1220  int32_t length;
1221 
1222  /* enumerate algorithmic ranges */
1224  rangeCount=*p;
1225  range=(AlgorithmicRange *)(p+1);
1226  while(rangeCount>0) {
1227  switch(range->type) {
1228  case 0:
1229  /* name = prefix + (range->variant times) hex-digits */
1230  /* prefix */
1231  length=calcStringSetLength(gNameSet, (const char *)(range+1))+range->variant;
1232  if(length>maxNameLength) {
1233  maxNameLength=length;
1234  }
1235  break;
1236  case 1: {
1237  /* name = prefix factorized-elements */
1238  const uint16_t *factors=(const uint16_t *)(range+1);
1239  const char *s;
1240  int32_t i, count=range->variant, factor, factorLength, maxFactorLength;
1241 
1242  /* prefix length */
1243  s=(const char *)(factors+count);
1245  s+=length+1; /* start of factor suffixes */
1246 
1247  /* get the set and maximum factor suffix length for each factor */
1248  for(i=0; i<count; ++i) {
1249  maxFactorLength=0;
1250  for(factor=factors[i]; factor>0; --factor) {
1251  factorLength=calcStringSetLength(gNameSet, s);
1252  s+=factorLength+1;
1253  if(factorLength>maxFactorLength) {
1254  maxFactorLength=factorLength;
1255  }
1256  }
1257  length+=maxFactorLength;
1258  }
1259 
1260  if(length>maxNameLength) {
1261  maxNameLength=length;
1262  }
1263  break;
1264  }
1265  default:
1266  /* unknown type */
1267  break;
1268  }
1269 
1270  range=(AlgorithmicRange *)((uint8_t *)range+range->size);
1271  --rangeCount;
1272  }
1273  return maxNameLength;
1274 }
1275 
1276 static int32_t
1278  int32_t i, length;
1279 
1280  for(i=0; i<UPRV_LENGTHOF(charCatNames); ++i) {
1281  /*
1282  * for each category, count the length of the category name
1283  * plus 9=
1284  * 2 for <>
1285  * 1 for -
1286  * 6 for most hex digits per code point
1287  */
1289  if(length>maxNameLength) {
1290  maxNameLength=length;
1291  }
1292  }
1293  return maxNameLength;
1294 }
1295 
1296 static int32_t
1297 calcNameSetLength(const uint16_t *tokens, uint16_t tokenCount, const uint8_t *tokenStrings, int8_t *tokenLengths,
1298  uint32_t set[8],
1299  const uint8_t **pLine, const uint8_t *lineLimit) {
1300  const uint8_t *line=*pLine;
1302  uint16_t c, token;
1303 
1304  while(line!=lineLimit && (c=*line++)!=(uint8_t)';') {
1305  if(c>=tokenCount) {
1306  /* implicit letter */
1307  SET_ADD(set, c);
1308  ++length;
1309  } else {
1310  token=tokens[c];
1311  if(token==(uint16_t)(-2)) {
1312  /* this is a lead byte for a double-byte token */
1313  c=c<<8|*line++;
1314  token=tokens[c];
1315  }
1316  if(token==(uint16_t)(-1)) {
1317  /* explicit letter */
1318  SET_ADD(set, c);
1319  ++length;
1320  } else {
1321  /* count token word */
1322  if(tokenLengths!=NULL) {
1323  /* use cached token length */
1324  tokenLength=tokenLengths[c];
1325  if(tokenLength==0) {
1326  tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
1327  tokenLengths[c]=(int8_t)tokenLength;
1328  }
1329  } else {
1330  tokenLength=calcStringSetLength(set, (const char *)tokenStrings+token);
1331  }
1333  }
1334  }
1335  }
1336 
1337  *pLine=line;
1338  return length;
1339 }
1340 
1341 static void
1344 
1345  uint16_t *tokens=(uint16_t *)uCharNames+8;
1346  uint16_t tokenCount=*tokens++;
1348 
1349  int8_t *tokenLengths;
1350 
1351  const uint16_t *group;
1352  const uint8_t *s, *line, *lineLimit;
1353 
1354  int32_t groupCount, lineNumber, length;
1355 
1356  tokenLengths=(int8_t *)uprv_malloc(tokenCount);
1357  if(tokenLengths!=NULL) {
1358  uprv_memset(tokenLengths, 0, tokenCount);
1359  }
1360 
1362  groupCount=*group++;
1363 
1364  /* enumerate all groups */
1365  while(groupCount>0) {
1367  s=expandGroupLengths(s, offsets, lengths);
1368 
1369  /* enumerate all lines in each group */
1370  for(lineNumber=0; lineNumber<LINES_PER_GROUP; ++lineNumber) {
1371  line=s+offsets[lineNumber];
1372  length=lengths[lineNumber];
1373  if(length==0) {
1374  continue;
1375  }
1376 
1377  lineLimit=line+length;
1378 
1379  /* read regular name */
1380  length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
1381  if(length>maxNameLength) {
1382  maxNameLength=length;
1383  }
1384  if(line==lineLimit) {
1385  continue;
1386  }
1387 
1388  /* read Unicode 1.0 name */
1389  length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gNameSet, &line, lineLimit);
1390  if(length>maxNameLength) {
1391  maxNameLength=length;
1392  }
1393  if(line==lineLimit) {
1394  continue;
1395  }
1396 
1397  /* read ISO comment */
1398  /*length=calcNameSetLength(tokens, tokenCount, tokenStrings, tokenLengths, gISOCommentSet, &line, lineLimit);*/
1399  }
1400 
1402  --groupCount;
1403  }
1404 
1405  if(tokenLengths!=NULL) {
1406  uprv_free(tokenLengths);
1407  }
1408 
1409  /* set gMax... - name length last for threading */
1410  gMaxNameLength=maxNameLength;
1411 }
1412 
1413 static UBool
1415  static const char extChars[]="0123456789ABCDEF<>-";
1416  int32_t i, maxNameLength;
1417 
1418  if(gMaxNameLength!=0) {
1419  return TRUE;
1420  }
1421 
1422  if(!isDataLoaded(pErrorCode)) {
1423  return FALSE;
1424  }
1425 
1426  /* set hex digits, used in various names, and <>-, used in extended names */
1427  for(i=0; i<(int32_t)sizeof(extChars)-1; ++i) {
1428  SET_ADD(gNameSet, extChars[i]);
1429  }
1430 
1431  /* set sets and lengths from algorithmic names */
1432  maxNameLength=calcAlgNameSetsLengths(0);
1433 
1434  /* set sets and lengths from extended names */
1435  maxNameLength=calcExtNameSetsLengths(maxNameLength);
1436 
1437  /* set sets and lengths from group names, set global maximum values */
1438  calcGroupNameSetsLengths(maxNameLength);
1439 
1440  return TRUE;
1441 }
1442 
1444 
1445 /* public API --------------------------------------------------------------- */
1446 
1448 
1451  char *buffer, int32_t bufferLength,
1452  UErrorCode *pErrorCode) {
1453  AlgorithmicRange *algRange;
1454  uint32_t *p;
1455  uint32_t i;
1456  int32_t length;
1457 
1458  /* check the argument values */
1459  if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1460  return 0;
1461  } else if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT ||
1462  bufferLength<0 || (bufferLength>0 && buffer==NULL)
1463  ) {
1464  *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1465  return 0;
1466  }
1467 
1468  if((uint32_t)code>UCHAR_MAX_VALUE || !isDataLoaded(pErrorCode)) {
1469  return u_terminateChars(buffer, bufferLength, 0, pErrorCode);
1470  }
1471 
1472  length=0;
1473 
1474  /* try algorithmic names first */
1476  i=*p;
1477  algRange=(AlgorithmicRange *)(p+1);
1478  while(i>0) {
1479  if(algRange->start<=(uint32_t)code && (uint32_t)code<=algRange->end) {
1480  length=getAlgName(algRange, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
1481  break;
1482  }
1483  algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
1484  --i;
1485  }
1486 
1487  if(i==0) {
1488  if (nameChoice == U_EXTENDED_CHAR_NAME) {
1490  if (!length) {
1491  /* extended character name */
1492  length = getExtName((uint32_t) code, buffer, (uint16_t) bufferLength);
1493  }
1494  } else {
1495  /* normal character name */
1496  length=getName(uCharNames, (uint32_t)code, nameChoice, buffer, (uint16_t)bufferLength);
1497  }
1498  }
1499 
1500  return u_terminateChars(buffer, bufferLength, length, pErrorCode);
1501 }
1502 
1505  char *dest, int32_t destCapacity,
1506  UErrorCode *pErrorCode) {
1507  /* check the argument values */
1508  if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1509  return 0;
1510  } else if(destCapacity<0 || (destCapacity>0 && dest==NULL)) {
1511  *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1512  return 0;
1513  }
1514 
1515  return u_terminateChars(dest, destCapacity, 0, pErrorCode);
1516 }
1517 
1520  const char *name,
1521  UErrorCode *pErrorCode) {
1522  char upper[120] = {0};
1523  char lower[120] = {0};
1524  FindName findName;
1525  AlgorithmicRange *algRange;
1526  uint32_t *p;
1527  uint32_t i;
1528  UChar32 cp = 0;
1529  char c0;
1530  static constexpr UChar32 error = 0xffff; /* Undefined, but use this for backwards compatibility. */
1531 
1532  if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1533  return error;
1534  }
1535 
1536  if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || name==NULL || *name==0) {
1537  *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1538  return error;
1539  }
1540 
1541  if(!isDataLoaded(pErrorCode)) {
1542  return error;
1543  }
1544 
1545  /* construct the uppercase and lowercase of the name first */
1546  for(i=0; i<sizeof(upper); ++i) {
1547  if((c0=*name++)!=0) {
1548  upper[i]=uprv_toupper(c0);
1549  lower[i]=uprv_tolower(c0);
1550  } else {
1551  upper[i]=lower[i]=0;
1552  break;
1553  }
1554  }
1555  if(i==sizeof(upper)) {
1556  /* name too long, there is no such character */
1557  *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1558  return error;
1559  }
1560  // i==strlen(name)==strlen(lower)==strlen(upper)
1561 
1562  /* try extended names first */
1563  if (lower[0] == '<') {
1564  if (nameChoice == U_EXTENDED_CHAR_NAME && lower[--i] == '>') {
1565  // Parse a string like "<category-HHHH>" where HHHH is a hex code point.
1566  uint32_t limit = i;
1567  while (i >= 3 && lower[--i] != '-') {}
1568 
1569  // There should be 1 to 8 hex digits.
1570  int32_t hexLength = limit - (i + 1);
1571  if (i >= 2 && lower[i] == '-' && 1 <= hexLength && hexLength <= 8) {
1572  uint32_t cIdx;
1573 
1574  lower[i] = 0;
1575 
1576  for (++i; i < limit; ++i) {
1577  if (lower[i] >= '0' && lower[i] <= '9') {
1578  cp = (cp << 4) + lower[i] - '0';
1579  } else if (lower[i] >= 'a' && lower[i] <= 'f') {
1580  cp = (cp << 4) + lower[i] - 'a' + 10;
1581  } else {
1582  *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1583  return error;
1584  }
1585  // Prevent signed-integer overflow and out-of-range code points.
1586  if (cp > UCHAR_MAX_VALUE) {
1587  *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1588  return error;
1589  }
1590  }
1591 
1592  /* Now validate the category name.
1593  We could use a binary search, or a trie, if
1594  we really wanted to. */
1595  uint8_t cat = getCharCat(cp);
1596  for (lower[i] = 0, cIdx = 0; cIdx < UPRV_LENGTHOF(charCatNames); ++cIdx) {
1597 
1598  if (!uprv_strcmp(lower + 1, charCatNames[cIdx])) {
1599  if (cat == cIdx) {
1600  return cp;
1601  }
1602  break;
1603  }
1604  }
1605  }
1606  }
1607 
1608  *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1609  return error;
1610  }
1611 
1612  /* try algorithmic names now */
1614  i=*p;
1615  algRange=(AlgorithmicRange *)(p+1);
1616  while(i>0) {
1617  if((cp=findAlgName(algRange, nameChoice, upper))!=0xffff) {
1618  return cp;
1619  }
1620  algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
1621  --i;
1622  }
1623 
1624  /* normal character name */
1625  findName.otherName=upper;
1626  findName.code=error;
1627  enumNames(uCharNames, 0, UCHAR_MAX_VALUE + 1, DO_FIND_NAME, &findName, nameChoice);
1628  if (findName.code == error) {
1629  *pErrorCode = U_ILLEGAL_CHAR_FOUND;
1630  }
1631  return findName.code;
1632 }
1633 
1634 U_CAPI void U_EXPORT2
1637  void *context,
1638  UCharNameChoice nameChoice,
1639  UErrorCode *pErrorCode) {
1640  AlgorithmicRange *algRange;
1641  uint32_t *p;
1642  uint32_t i;
1643 
1644  if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1645  return;
1646  }
1647 
1648  if(nameChoice>=U_CHAR_NAME_CHOICE_COUNT || fn==NULL) {
1649  *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
1650  return;
1651  }
1652 
1653  if((uint32_t) limit > UCHAR_MAX_VALUE + 1) {
1654  limit = UCHAR_MAX_VALUE + 1;
1655  }
1656  if((uint32_t)start>=(uint32_t)limit) {
1657  return;
1658  }
1659 
1660  if(!isDataLoaded(pErrorCode)) {
1661  return;
1662  }
1663 
1664  /* interleave the data-driven ones with the algorithmic ones */
1665  /* iterate over all algorithmic ranges; assume that they are in ascending order */
1667  i=*p;
1668  algRange=(AlgorithmicRange *)(p+1);
1669  while(i>0) {
1670  /* enumerate the character names before the current algorithmic range */
1671  /* here: start<limit */
1672  if((uint32_t)start<algRange->start) {
1673  if((uint32_t)limit<=algRange->start) {
1674  enumNames(uCharNames, start, limit, fn, context, nameChoice);
1675  return;
1676  }
1677  if(!enumNames(uCharNames, start, (UChar32)algRange->start, fn, context, nameChoice)) {
1678  return;
1679  }
1680  start=(UChar32)algRange->start;
1681  }
1682  /* enumerate the character names in the current algorithmic range */
1683  /* here: algRange->start<=start<limit */
1684  if((uint32_t)start<=algRange->end) {
1685  if((uint32_t)limit<=(algRange->end+1)) {
1686  enumAlgNames(algRange, start, limit, fn, context, nameChoice);
1687  return;
1688  }
1689  if(!enumAlgNames(algRange, start, (UChar32)algRange->end+1, fn, context, nameChoice)) {
1690  return;
1691  }
1692  start=(UChar32)algRange->end+1;
1693  }
1694  /* continue to the next algorithmic range (here: start<limit) */
1695  algRange=(AlgorithmicRange *)((uint8_t *)algRange+algRange->size);
1696  --i;
1697  }
1698  /* enumerate the character names after the last algorithmic range */
1699  enumNames(uCharNames, start, limit, fn, context, nameChoice);
1700 }
1701 
1706  return gMaxNameLength;
1707  } else {
1708  return 0;
1709  }
1710 }
1711 
1712 /**
1713  * Converts the char set cset into a Unicode set uset.
1714  * @param cset Set of 256 bit flags corresponding to a set of chars.
1715  * @param uset USet to receive characters. Existing contents are deleted.
1716  */
1717 static void
1718 charSetToUSet(uint32_t cset[8], const USetAdder *sa) {
1719  UChar us[256];
1720  char cs[256];
1721 
1722  int32_t i, length;
1724 
1726 
1728  return;
1729  }
1730 
1731  /* build a char string with all chars that are used in character names */
1732  length=0;
1733  for(i=0; i<256; ++i) {
1734  if(SET_CONTAINS(cset, i)) {
1735  cs[length++]=(char)i;
1736  }
1737  }
1738 
1739  /* convert the char string to a UChar string */
1740  u_charsToUChars(cs, us, length);
1741 
1742  /* add each UChar to the USet */
1743  for(i=0; i<length; ++i) {
1744  if(us[i]!=0 || cs[i]==0) { /* non-invariant chars become (UChar)0 */
1745  sa->add(sa->set, us[i]);
1746  }
1747  }
1748 }
1749 
1750 /**
1751  * Fills set with characters that are used in Unicode character names.
1752  * @param set USet to receive characters.
1753  */
1754 U_CAPI void U_EXPORT2
1756  charSetToUSet(gNameSet, sa);
1757 }
1758 
1759 /* data swapping ------------------------------------------------------------ */
1760 
1761 /*
1762  * The token table contains non-negative entries for token bytes,
1763  * and -1 for bytes that represent themselves in the data file's charset.
1764  * -2 entries are used for lead bytes.
1765  *
1766  * Direct bytes (-1 entries) must be translated from the input charset family
1767  * to the output charset family.
1768  * makeTokenMap() writes a permutation mapping for this.
1769  * Use it once for single-/lead-byte tokens and once more for all trail byte
1770  * tokens. (';' is an unused trail byte marked with -1.)
1771  */
1772 static void
1774  int16_t tokens[], uint16_t tokenCount,
1775  uint8_t map[256],
1776  UErrorCode *pErrorCode) {
1777  UBool usedOutChar[256];
1778  uint16_t i, j;
1779  uint8_t c1, c2;
1780 
1781  if(U_FAILURE(*pErrorCode)) {
1782  return;
1783  }
1784 
1785  if(ds->inCharset==ds->outCharset) {
1786  /* Same charset family: identity permutation */
1787  for(i=0; i<256; ++i) {
1788  map[i]=(uint8_t)i;
1789  }
1790  } else {
1791  uprv_memset(map, 0, 256);
1792  uprv_memset(usedOutChar, 0, 256);
1793 
1794  if(tokenCount>256) {
1795  tokenCount=256;
1796  }
1797 
1798  /* set the direct bytes (byte 0 always maps to itself) */
1799  for(i=1; i<tokenCount; ++i) {
1800  if(tokens[i]==-1) {
1801  /* convert the direct byte character */
1802  c1=(uint8_t)i;
1803  ds->swapInvChars(ds, &c1, 1, &c2, pErrorCode);
1804  if(U_FAILURE(*pErrorCode)) {
1805  udata_printError(ds, "unames/makeTokenMap() finds variant character 0x%02x used (input charset family %d)\n",
1806  i, ds->inCharset);
1807  return;
1808  }
1809 
1810  /* enter the converted character into the map and mark it used */
1811  map[c1]=c2;
1812  usedOutChar[c2]=TRUE;
1813  }
1814  }
1815 
1816  /* set the mappings for the rest of the permutation */
1817  for(i=j=1; i<tokenCount; ++i) {
1818  /* set mappings that were not set for direct bytes */
1819  if(map[i]==0) {
1820  /* set an output byte value that was not used as an output byte above */
1821  while(usedOutChar[j]) {
1822  ++j;
1823  }
1824  map[i]=(uint8_t)j++;
1825  }
1826  }
1827 
1828  /*
1829  * leave mappings at tokenCount and above unset if tokenCount<256
1830  * because they won't be used
1831  */
1832  }
1833 }
1834 
1837  const void *inData, int32_t length, void *outData,
1838  UErrorCode *pErrorCode) {
1839  const UDataInfo *pInfo;
1840  int32_t headerSize;
1841 
1842  const uint8_t *inBytes;
1843  uint8_t *outBytes;
1844 
1845  uint32_t tokenStringOffset, groupsOffset, groupStringOffset, algNamesOffset,
1846  offset, i, count, stringsCount;
1847 
1848  const AlgorithmicRange *inRange;
1849  AlgorithmicRange *outRange;
1850 
1851  /* udata_swapDataHeader checks the arguments */
1852  headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode);
1853  if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
1854  return 0;
1855  }
1856 
1857  /* check data format and format version */
1858  pInfo=(const UDataInfo *)((const char *)inData+4);
1859  if(!(
1860  pInfo->dataFormat[0]==0x75 && /* dataFormat="unam" */
1861  pInfo->dataFormat[1]==0x6e &&
1862  pInfo->dataFormat[2]==0x61 &&
1863  pInfo->dataFormat[3]==0x6d &&
1864  pInfo->formatVersion[0]==1
1865  )) {
1866  udata_printError(ds, "uchar_swapNames(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as unames.icu\n",
1867  pInfo->dataFormat[0], pInfo->dataFormat[1],
1868  pInfo->dataFormat[2], pInfo->dataFormat[3],
1869  pInfo->formatVersion[0]);
1870  *pErrorCode=U_UNSUPPORTED_ERROR;
1871  return 0;
1872  }
1873 
1874  inBytes=(const uint8_t *)inData+headerSize;
1875  outBytes=(uint8_t *)outData+headerSize;
1876  if(length<0) {
1877  algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]);
1878  } else {
1879  length-=headerSize;
1880  if( length<20 ||
1881  (uint32_t)length<(algNamesOffset=ds->readUInt32(((const uint32_t *)inBytes)[3]))
1882  ) {
1883  udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu\n",
1884  length);
1885  *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
1886  return 0;
1887  }
1888  }
1889 
1890  if(length<0) {
1891  /* preflighting: iterate through algorithmic ranges */
1892  offset=algNamesOffset;
1893  count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
1894  offset+=4;
1895 
1896  for(i=0; i<count; ++i) {
1897  inRange=(const AlgorithmicRange *)(inBytes+offset);
1898  offset+=ds->readUInt16(inRange->size);
1899  }
1900  } else {
1901  /* swap data */
1902  const uint16_t *p;
1903  uint16_t *q, *temp;
1904 
1905  int16_t tokens[512];
1906  uint16_t tokenCount;
1907 
1908  uint8_t map[256], trailMap[256];
1909 
1910  /* copy the data for inaccessible bytes */
1911  if(inBytes!=outBytes) {
1912  uprv_memcpy(outBytes, inBytes, length);
1913  }
1914 
1915  /* the initial 4 offsets first */
1916  tokenStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[0]);
1917  groupsOffset=ds->readUInt32(((const uint32_t *)inBytes)[1]);
1918  groupStringOffset=ds->readUInt32(((const uint32_t *)inBytes)[2]);
1919  ds->swapArray32(ds, inBytes, 16, outBytes, pErrorCode);
1920 
1921  /*
1922  * now the tokens table
1923  * it needs to be permutated along with the compressed name strings
1924  */
1925  p=(const uint16_t *)(inBytes+16);
1926  q=(uint16_t *)(outBytes+16);
1927 
1928  /* read and swap the tokenCount */
1929  tokenCount=ds->readUInt16(*p);
1930  ds->swapArray16(ds, p, 2, q, pErrorCode);
1931  ++p;
1932  ++q;
1933 
1934  /* read the first 512 tokens and make the token maps */
1935  if(tokenCount<=512) {
1936  count=tokenCount;
1937  } else {
1938  count=512;
1939  }
1940  for(i=0; i<count; ++i) {
1941  tokens[i]=udata_readInt16(ds, p[i]);
1942  }
1943  for(; i<512; ++i) {
1944  tokens[i]=0; /* fill the rest of the tokens array if tokenCount<512 */
1945  }
1946  makeTokenMap(ds, tokens, tokenCount, map, pErrorCode);
1947  makeTokenMap(ds, tokens+256, (uint16_t)(tokenCount>256 ? tokenCount-256 : 0), trailMap, pErrorCode);
1948  if(U_FAILURE(*pErrorCode)) {
1949  return 0;
1950  }
1951 
1952  /*
1953  * swap and permutate the tokens
1954  * go through a temporary array to support in-place swapping
1955  */
1956  temp=(uint16_t *)uprv_malloc(tokenCount*2);
1957  if(temp==NULL) {
1958  udata_printError(ds, "out of memory swapping %u unames.icu tokens\n",
1959  tokenCount);
1960  *pErrorCode=U_MEMORY_ALLOCATION_ERROR;
1961  return 0;
1962  }
1963 
1964  /* swap and permutate single-/lead-byte tokens */
1965  for(i=0; i<tokenCount && i<256; ++i) {
1966  ds->swapArray16(ds, p+i, 2, temp+map[i], pErrorCode);
1967  }
1968 
1969  /* swap and permutate trail-byte tokens */
1970  for(; i<tokenCount; ++i) {
1971  ds->swapArray16(ds, p+i, 2, temp+(i&0xffffff00)+trailMap[i&0xff], pErrorCode);
1972  }
1973 
1974  /* copy the result into the output and free the temporary array */
1975  uprv_memcpy(q, temp, tokenCount*2);
1976  uprv_free(temp);
1977 
1978  /*
1979  * swap the token strings but not a possible padding byte after
1980  * the terminating NUL of the last string
1981  */
1982  udata_swapInvStringBlock(ds, inBytes+tokenStringOffset, (int32_t)(groupsOffset-tokenStringOffset),
1983  outBytes+tokenStringOffset, pErrorCode);
1984  if(U_FAILURE(*pErrorCode)) {
1985  udata_printError(ds, "uchar_swapNames(token strings) failed\n");
1986  return 0;
1987  }
1988 
1989  /* swap the group table */
1990  count=ds->readUInt16(*((const uint16_t *)(inBytes+groupsOffset)));
1991  ds->swapArray16(ds, inBytes+groupsOffset, (int32_t)((1+count*3)*2),
1992  outBytes+groupsOffset, pErrorCode);
1993 
1994  /*
1995  * swap the group strings
1996  * swap the string bytes but not the nibble-encoded string lengths
1997  */
1998  if(ds->inCharset!=ds->outCharset) {
2000 
2001  const uint8_t *inStrings, *nextInStrings;
2002  uint8_t *outStrings;
2003 
2004  uint8_t c;
2005 
2006  inStrings=inBytes+groupStringOffset;
2007  outStrings=outBytes+groupStringOffset;
2008 
2009  stringsCount=algNamesOffset-groupStringOffset;
2010 
2011  /* iterate through string groups until only a few padding bytes are left */
2012  while(stringsCount>32) {
2013  nextInStrings=expandGroupLengths(inStrings, offsets, lengths);
2014 
2015  /* move past the length bytes */
2016  stringsCount-=(uint32_t)(nextInStrings-inStrings);
2017  outStrings+=nextInStrings-inStrings;
2018  inStrings=nextInStrings;
2019 
2020  count=offsets[31]+lengths[31]; /* total number of string bytes in this group */
2021  stringsCount-=count;
2022 
2023  /* swap the string bytes using map[] and trailMap[] */
2024  while(count>0) {
2025  c=*inStrings++;
2026  *outStrings++=map[c];
2027  if(tokens[c]!=-2) {
2028  --count;
2029  } else {
2030  /* token lead byte: swap the trail byte, too */
2031  *outStrings++=trailMap[*inStrings++];
2032  count-=2;
2033  }
2034  }
2035  }
2036  }
2037 
2038  /* swap the algorithmic ranges */
2039  offset=algNamesOffset;
2040  count=ds->readUInt32(*((const uint32_t *)(inBytes+offset)));
2041  ds->swapArray32(ds, inBytes+offset, 4, outBytes+offset, pErrorCode);
2042  offset+=4;
2043 
2044  for(i=0; i<count; ++i) {
2045  if(offset>(uint32_t)length) {
2046  udata_printError(ds, "uchar_swapNames(): too few bytes (%d after header) for unames.icu algorithmic range %u\n",
2047  length, i);
2048  *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
2049  return 0;
2050  }
2051 
2052  inRange=(const AlgorithmicRange *)(inBytes+offset);
2053  outRange=(AlgorithmicRange *)(outBytes+offset);
2054  offset+=ds->readUInt16(inRange->size);
2055 
2056  ds->swapArray32(ds, inRange, 8, outRange, pErrorCode);
2057  ds->swapArray16(ds, &inRange->size, 2, &outRange->size, pErrorCode);
2058  switch(inRange->type) {
2059  case 0:
2060  /* swap prefix string */
2061  ds->swapInvChars(ds, inRange+1, (int32_t)uprv_strlen((const char *)(inRange+1)),
2062  outRange+1, pErrorCode);
2063  if(U_FAILURE(*pErrorCode)) {
2064  udata_printError(ds, "uchar_swapNames(prefix string of algorithmic range %u) failed\n",
2065  i);
2066  return 0;
2067  }
2068  break;
2069  case 1:
2070  {
2071  /* swap factors and the prefix and factor strings */
2072  uint32_t factorsCount;
2073 
2074  factorsCount=inRange->variant;
2075  p=(const uint16_t *)(inRange+1);
2076  q=(uint16_t *)(outRange+1);
2077  ds->swapArray16(ds, p, (int32_t)(factorsCount*2), q, pErrorCode);
2078 
2079  /* swap the strings, up to the last terminating NUL */
2080  p+=factorsCount;
2081  q+=factorsCount;
2082  stringsCount=(uint32_t)((inBytes+offset)-(const uint8_t *)p);
2083  while(stringsCount>0 && ((const uint8_t *)p)[stringsCount-1]!=0) {
2084  --stringsCount;
2085  }
2086  ds->swapInvChars(ds, p, (int32_t)stringsCount, q, pErrorCode);
2087  }
2088  break;
2089  default:
2090  udata_printError(ds, "uchar_swapNames(): unknown type %u of algorithmic range %u\n",
2091  inRange->type, i);
2092  *pErrorCode=U_UNSUPPORTED_ERROR;
2093  return 0;
2094  }
2095  }
2096  }
2097 
2098  return headerSize+(int32_t)offset;
2099 }
2100 
2101 /*
2102  * Hey, Emacs, please set the following:
2103  *
2104  * Local Variables:
2105  * indent-tabs-mode: nil
2106  * End:
2107  *
2108  */
cp
Definition: action.c:1035
q
Definition: afm2pl.c:2287
int code
Definition: aftopl.c:52
#define count(a)
Definition: aptex-macros.h:781
#define name
#define next(a)
Definition: aptex-macros.h:924
#define UPRV_LENGTHOF(array)
Definition: cmemory.h:50
#define uprv_memcpy(dst, src, size)
Definition: cmemory.h:40
#define uprv_memset(buffer, mark, size)
Definition: cmemory.h:51
#define uprv_strcmp(s1, s2)
Definition: cstring.h:38
#define uprv_tolower
Definition: cstring.h:68
#define uprv_strlen(str)
Definition: cstring.h:37
@ FALSE
Definition: dd.h:101
@ TRUE
Definition: dd.h:102
int group
Definition: dtl.h:84
int v
Definition: dviconv.c:10
char * temp
Definition: dvidvi.c:137
#define error(a)
Definition: dviinfo.c:48
static gregorio_element ** elements
#define s
Definition: afcover.h:80
#define t
Definition: afcover.h:96
#define c(n)
Definition: gpos-common.c:150
static int lengths[256]
Definition: iframe.c:174
unsigned char UChar
Definition: bzip2.c:163
#define NULL
Definition: ftobjs.h:61
small capitals from c petite p
Definition: afcover.h:72
small capitals from c petite p scientific i
Definition: afcover.h:80
FT_UInt idx
Definition: cffcmap.c:135
signed short int16_t
Definition: stdint.h:76
unsigned short uint16_t
Definition: stdint.h:79
unsigned int uint32_t
Definition: stdint.h:80
signed int int32_t
Definition: stdint.h:77
unsigned char uint8_t
Definition: stdint.h:78
signed char int8_t
Definition: stdint.h:75
float factor
Definition: dimen.c:22
#define length(c)
Definition: ctangleboot.c:65
#define dest
#define U_CALLCONV
Definition: platform.h:877
#define U_IS_BIG_ENDIAN
Definition: platform.h:403
#define U_EXPORT2
Definition: platform.h:844
#define U_CHARSET_FAMILY
Definition: platform.h:632
boolean upper(voice_index voice)
Definition: mtxline.c:760
string fn
Definition: fc-lang.py:335
list names
Definition: fc-lang.py:151
const char * suffix
Definition: pkg_icu.cpp:27
static UnicodeString tokenString(tokenType tok)
Definition: plurrule.cpp:1080
logical lower
Definition: pmxab.c:697
static int offset
Definition: ppmtogif.c:642
char line[1024]
Definition: process_score.c:29
#define map
#define status
C API: Platform Utilities.
static int groups[]
Definition: slnudata.c:869
#define uint32_t
Definition: stdint.in.h:168
#define int32_t
Definition: stdint.in.h:167
#define uint16_t
Definition: stdint.in.h:161
#define int8_t
Definition: stdint.in.h:153
#define uint8_t
Definition: stdint.in.h:154
static int32_t indexes[_SPREP_INDEX_TOP]
Definition: store.c:154
token
Definition: strexpr.c:17
uint8_t type
Definition: unames.cpp:80
uint8_t variant
Definition: unames.cpp:80
uint16_t size
Definition: unames.cpp:81
uint32_t start
Definition: unames.cpp:79
uint32_t end
Definition: unames.cpp:79
UChar32 code
Definition: unames.cpp:101
const char * otherName
Definition: unames.cpp:100
uint32_t groupStringOffset
Definition: unames.cpp:85
uint32_t algNamesOffset
Definition: unames.cpp:85
uint32_t tokenStringOffset
Definition: unames.cpp:85
uint8_t formatVersion[4]
Definition: udata.h:148
uint8_t dataFormat[4]
Definition: udata.h:144
uint8_t charsetFamily
Definition: udata.h:132
uint8_t isBigEndian
Definition: udata.h:128
uint16_t size
Definition: udata.h:119
UDataReadUInt16 * readUInt16
Definition: udataswp.h:164
UDataSwapFn * swapInvChars
Definition: udataswp.h:186
UDataSwapFn * swapArray16
Definition: udataswp.h:180
uint8_t inCharset
Definition: udataswp.h:155
uint8_t outCharset
Definition: udataswp.h:159
UDataReadUInt32 * readUInt32
Definition: udataswp.h:166
UDataSwapFn * swapArray32
Definition: udataswp.h:182
USetAdd * add
Definition: uset_imp.h:51
USet * set
Definition: uset_imp.h:50
Definition: utils.c:300
Definition: tokst.h:327
Definition: inftrees.h:24
Definition: pst1form.c:310
Definition: sd.h:82
Definition: bdf.c:133
Definition: dvips.h:235
Definition: parser.c:43
#define c2
Definition: t1io.c:53
#define c1
Definition: t1io.c:52
int number
Definition: t1part.c:207
int j
Definition: t4ht.c:1589
int tokenLength
Definition: token.c:49
#define U_ASSERT(exp)
Definition: uassert.h:37
C API: Unicode Properties.
UCharNameChoice
Definition: uchar.h:1869
@ U_CHAR_NAME_CHOICE_COUNT
Definition: uchar.h:1889
@ U_EXTENDED_CHAR_NAME
Definition: uchar.h:1881
@ U_UNICODE_CHAR_NAME
Definition: uchar.h:1871
#define UCHAR_MAX_VALUE
Definition: uchar.h:167
UBool UEnumCharNamesFn(void *context, UChar32 code, UCharNameChoice nameChoice, const char *name, int32_t length)
Definition: uchar.h:3537
@ U_SURROGATE
Definition: uchar.h:772
@ UCLN_COMMON_UNAMES
Definition: ucln_cmn.h:52
C API: Data loading interface.
int32_t UChar32
Definition: umachine.h:467
int8_t UBool
Definition: umachine.h:269
#define U_CAPI
Definition: umachine.h:110
void umtx_initOnce(UInitOnce &uio, T *obj, void(T::*fp)())
Definition: umutex.h:114
#define U_INITONCE_INITIALIZER
Definition: umutex.h:108
#define U_LEAD_SURROGATE
Definition: unames.cpp:122
static void charSetToUSet(uint32_t cset[8], const USetAdder *sa)
Definition: unames.cpp:1718
static int32_t calcNameSetLength(const uint16_t *tokens, uint16_t tokenCount, const uint8_t *tokenStrings, int8_t *tokenLengths, uint32_t set[8], const uint8_t **pLine, const uint8_t *lineLimit)
Definition: unames.cpp:1297
static const uint8_t * expandGroupLengths(const uint8_t *s, uint16_t offsets[(1L<< 5)+1], uint16_t lengths[(1L<< 5)+1])
Definition: unames.cpp:515
#define U_ISO_COMMENT
Definition: unames.cpp:223
#define NEXT_GROUP(group)
Definition: unames.cpp:75
#define SET_ADD(set, c)
Definition: unames.cpp:1200
static UBool enumGroupNames(UCharNames *names, const uint16_t *group, UChar32 start, UChar32 end, UEnumCharNamesFn *fn, void *context, UCharNameChoice nameChoice)
Definition: unames.cpp:599
static uint8_t getCharCat(UChar32 cp)
Definition: unames.cpp:421
#define GROUP_MASK
Definition: unames.cpp:43
static uint16_t writeFactorSuffix(const uint16_t *factors, uint16_t count, const char *s, uint32_t code, uint16_t indexes[8], const char *elementBases[8], const char *elements[8], char *buffer, uint16_t bufferLength)
Definition: unames.cpp:773
#define GET_GROUPS(names)
Definition: unames.cpp:97
static UDataMemory * uCharNamesData
Definition: unames.cpp:106
static UBool compareName(UCharNames *names, const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice, const char *otherName)
Definition: unames.cpp:331
static int32_t calcStringSetLength(uint32_t set[8], const char *s)
Definition: unames.cpp:1204
static icu::UInitOnce gCharNamesInitOnce
Definition: unames.cpp:108
static void calcGroupNameSetsLengths(int32_t maxNameLength)
Definition: unames.cpp:1342
static int32_t calcExtNameSetsLengths(int32_t maxNameLength)
Definition: unames.cpp:1277
static int32_t calcAlgNameSetsLengths(int32_t maxNameLength)
Definition: unames.cpp:1216
static UChar32 findAlgName(AlgorithmicRange *range, UCharNameChoice nameChoice, const char *otherName)
Definition: unames.cpp:1078
static UBool unames_cleanup(void)
Definition: unames.cpp:165
static uint16_t getExtName(uint32_t code, char *buffer, uint16_t bufferLength)
Definition: unames.cpp:448
static uint16_t expandName(UCharNames *names, const uint8_t *name, uint16_t nameLength, UCharNameChoice nameChoice, char *buffer, uint16_t bufferLength)
Definition: unames.cpp:238
#define WRITE_CHAR(buffer, bufferLength, bufferPos, c)
Definition: unames.cpp:215
static void loadCharNames(UErrorCode &status)
Definition: unames.cpp:195
static UBool isAcceptable(void *, const char *, const char *, const UDataInfo *pInfo)
Definition: unames.cpp:180
static uint16_t expandGroupName(UCharNames *names, const uint16_t *group, uint16_t lineNumber, UCharNameChoice nameChoice, char *buffer, uint16_t bufferLength)
Definition: unames.cpp:567
#define GET_GROUP_OFFSET(group)
Definition: unames.cpp:73
static const char * getCharCatName(UChar32 cp)
Definition: unames.cpp:435
#define U_TRAIL_SURROGATE
Definition: unames.cpp:123
static uint16_t getName(UCharNames *names, uint32_t code, UCharNameChoice nameChoice, char *buffer, uint16_t bufferLength)
Definition: unames.cpp:578
static UBool enumAlgNames(AlgorithmicRange *range, UChar32 start, UChar32 limit, UEnumCharNamesFn *fn, void *context, UCharNameChoice nameChoice)
Definition: unames.cpp:936
#define SET_CONTAINS(set, c)
Definition: unames.cpp:1201
#define PREV_GROUP(group)
Definition: unames.cpp:76
static const char *const charCatNames[(U_CHAR_CATEGORY_COUNT+3)]
Definition: unames.cpp:127
#define GROUP_SHIFT
Definition: unames.cpp:41
static UBool enumExtNames(UChar32 start, UChar32 end, UEnumCharNamesFn *fn, void *context)
Definition: unames.cpp:644
static const uint16_t * getGroup(UCharNames *names, uint32_t code)
Definition: unames.cpp:483
#define U_CHAR_EXTENDED_CATEGORY_COUNT
Definition: unames.cpp:125
static UBool isDataLoaded(UErrorCode *pErrorCode)
Definition: unames.cpp:210
static uint32_t gNameSet[8]
Definition: unames.cpp:119
#define DO_FIND_NAME
Definition: unames.cpp:104
static const char DATA_TYPE[]
Definition: unames.cpp:39
#define U_NONCHARACTER_CODE_POINT
Definition: unames.cpp:121
#define LINES_PER_GROUP
Definition: unames.cpp:42
static void makeTokenMap(const UDataSwapper *ds, int16_t tokens[], uint16_t tokenCount, uint8_t map[256], UErrorCode *pErrorCode)
Definition: unames.cpp:1773
static UBool enumNames(UCharNames *names, UChar32 start, UChar32 limit, UEnumCharNamesFn *fn, void *context, UCharNameChoice nameChoice)
Definition: unames.cpp:667
static U_NAMESPACE_BEGIN const char DATA_NAME[]
Definition: unames.cpp:38
static UCharNames * uCharNames
Definition: unames.cpp:107
static UBool calcNameSetsLengths(UErrorCode *pErrorCode)
Definition: unames.cpp:1414
@ GROUP_LENGTH
Definition: unames.cpp:65
@ GROUP_OFFSET_LOW
Definition: unames.cpp:64
@ GROUP_MSB
Definition: unames.cpp:62
@ GROUP_OFFSET_HIGH
Definition: unames.cpp:63
static int32_t gMaxNameLength
Definition: unames.cpp:113
static uint16_t getAlgName(AlgorithmicRange *range, uint32_t code, UCharNameChoice nameChoice, char *buffer, uint16_t bufferLength)
Definition: unames.cpp:852
#define uprv_toupper
Definition: urename.h:1458
#define udata_getMemory
Definition: urename.h:876
#define uprv_getMaxCharNameLength
Definition: urename.h:1418
#define udata_readInt16
Definition: urename.h:883
#define u_charsToUChars
Definition: urename.h:220
#define u_enumCharNames
Definition: urename.h:224
#define u_getISOComment
Definition: urename.h:269
#define udata_printError
Definition: urename.h:882
#define uprv_malloc
Definition: urename.h:1435
#define u_terminateChars
Definition: urename.h:397
#define u_charFromName
Definition: urename.h:216
#define uprv_free
Definition: urename.h:1414
#define udata_close
Definition: urename.h:870
#define ucln_common_registerCleanup
Definition: urename.h:604
#define uchar_swapNames
Definition: urename.h:602
#define u_charType
Definition: urename.h:219
#define udata_swapInvStringBlock
Definition: urename.h:889
#define udata_openChoice
Definition: urename.h:879
#define u_charName
Definition: urename.h:218
#define udata_swapDataHeader
Definition: urename.h:888
#define uprv_getCharNameCharacters
Definition: urename.h:1415
C API: 16-bit Unicode handling macros.
C API: Code point macros.
#define U_IS_UNICODE_NONCHAR(c)
Definition: utf.h:130
#define U_IS_LEAD(c)
Definition: utf.h:177
@ start
Definition: preamble.c:52
@ range
Definition: preamble.c:52
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Definition: utypes.h:431
@ U_MEMORY_ALLOCATION_ERROR
Definition: utypes.h:473
@ U_ILLEGAL_ARGUMENT_ERROR
Definition: utypes.h:467
@ U_ILLEGAL_CHAR_FOUND
Definition: utypes.h:478
@ U_UNSUPPORTED_ERROR
Definition: utypes.h:482
@ U_INDEX_OUTOFBOUNDS_ERROR
Definition: utypes.h:474
@ U_ZERO_ERROR
Definition: utypes.h:465
#define U_FAILURE(x)
Definition: utypes.h:735
#define U_SUCCESS(x)
Definition: utypes.h:730
#define buffer
Definition: xmlparse.c:611
#define errorCode
Definition: xmlparse.c:601
#define limit(x)
Definition: yuvsplittoppm.c:26
#define end(cp)
Definition: zic.c:71