"Fossies" - the Fresh Open Source Software Archive

Member "poppler-0.82.0/poppler/gen-unicode-tables.py" (25 Oct 2019, 1484 Bytes) of package /linux/misc/poppler-0.82.0.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. For more information about "gen-unicode-tables.py" see the Fossies "Dox" file reference documentation.

    1 from __future__ import absolute_import, division, print_function
    2 
    3 import sys
    4 import unicodedata
    5 
    6 
    7 if sys.version_info[0] == 2:
    8     chr = unichr
    9 
   10 UNICODE_LAST_CHAR_PART1 = 0x2FAFF
   11 HANGUL_S_BASE = 0xAC00
   12 HANGUL_S_COUNT = 19 * 21 * 28
   13 
   14 
   15 print("""// Generated by gen-unicode-tables.py
   16 
   17 typedef struct {
   18   Unicode character;
   19   int length;
   20   int offset;
   21 } decomposition;
   22 """)
   23 
   24 decomp_table = []
   25 max_index = 0
   26 decomp_expansion_index = {}
   27 decomp_expansion = []
   28 for u in range(0, UNICODE_LAST_CHAR_PART1):
   29     if HANGUL_S_BASE <= u < HANGUL_S_BASE + HANGUL_S_COUNT:
   30         continue
   31     norm = tuple(map(ord, unicodedata.normalize("NFKD", chr(u))))
   32     if norm != (u, ):
   33         try:
   34             i = decomp_expansion_index[norm]
   35             decomp_table.append((u, len(norm), i))
   36         except KeyError:
   37             decomp_table.append((u, len(norm), max_index))
   38             decomp_expansion_index[norm] = max_index
   39             decomp_expansion.append((norm, max_index))
   40             max_index += len(norm)
   41 print("#define DECOMP_TABLE_LENGTH %d" % (len(decomp_table), ))
   42 print()
   43 print("static const decomposition decomp_table[] = {")
   44 print(*("  { 0x%x, %d, %d }" % (character, length, offset)
   45         for character, length, offset in decomp_table),
   46       sep=",\n")
   47 print("};")
   48 print()
   49 print("static const Unicode decomp_expansion[] = {")
   50 print(*("  %s /* offset %d */" % (", ".join("0x%x" % u for u in norm), index)
   51         for norm, index in decomp_expansion),
   52       sep=" ,\n")
   53 print("};")