"Fossies" - the Fresh Open Source Software Archive

Member "unifont-12.1.02/src/unigenwidth.c" (9 Jul 2017, 14486 Bytes) of package /linux/misc/unifont-12.1.02.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "unigenwidth.c" see the Fossies "Dox" file reference documentation.

    1 /*
    2    unigenwidth - IEEE 1003.1-2008 setup to calculate wchar_t string widths.
    3                  All glyphs are treated as 16 pixels high, and can be
    4                  8, 16, 24, or 32 pixels wide (resulting in widths of
    5                  1, 2, 3, or 4, respectively).
    6 
    7    Author: Paul Hardy, 2013, 2017
    8 
    9    LICENSE:
   10 
   11       This program is free software: you can redistribute it and/or modify
   12       it under the terms of the GNU General Public License as published by
   13       the Free Software Foundation, either version 2 of the License, or
   14       (at your option) any later version.
   15 
   16       This program is distributed in the hope that it will be useful,
   17       but WITHOUT ANY WARRANTY; without even the implied warranty of
   18       MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   19       GNU General Public License for more details.
   20 
   21       You should have received a copy of the GNU General Public License
   22       along with this program.  If not, see <http://www.gnu.org/licenses/>.
   23 */
   24 
   25 /*
   26    20 June 2017 [Paul Hardy]:
   27       - Now handles glyphs that are 24 or 32 pixels wide.
   28 
   29    8 July 2017 [Paul Hardy]:
   30       - Modifies sscanf format strings to ignore second field after
   31         the ":" field separator, newly added to "*combining.txt" files
   32         and already present in "*.hex" files.
   33 */
   34 
   35 #include <stdio.h>
   36 #include <stdlib.h>
   37 #include <string.h>
   38 
   39 #define MAXSTRING   256
   40 
   41 /* Definitions for Pikto in Plane 15 */
   42 #define PIKTO_START 0x0F0E70
   43 #define PIKTO_END   0x0F11EF
   44 #define PIKTO_SIZE  (PIKTO_END - PIKTO_START + 1)
   45 
   46 
   47 int
   48 main (int argc, char **argv)
   49 {
   50 
   51    int i; /* loop variable */
   52 
   53    char teststring[MAXSTRING];
   54    int  loc;
   55    char *gstart;
   56 
   57    char glyph_width[0x20000];
   58    char pikto_width[PIKTO_SIZE];
   59 
   60    FILE *infilefp;
   61 
   62    if (argc != 3) {
   63       fprintf (stderr, "\n\nUsage: %s <unifont.hex> <combining.txt>\n\n", argv[0]);
   64       exit (EXIT_FAILURE);
   65    }
   66 
   67    /*
   68       Read the collection of hex glyphs.
   69    */
   70    if ((infilefp = fopen (argv[1],"r")) == NULL) {
   71       fprintf (stderr,"ERROR - hex input file %s not found.\n\n", argv[1]);
   72       exit (EXIT_FAILURE);
   73    }
   74 
   75    /* Flag glyph as non-existent until found. */
   76    memset (glyph_width, -1, 0x20000 * sizeof (char));
   77    memset (pikto_width, -1, (PIKTO_SIZE) * sizeof (char));
   78 
   79    teststring[MAXSTRING-1] = '\0';
   80    while (fgets (teststring, MAXSTRING-1, infilefp) != NULL) {
   81       sscanf (teststring, "%X:%*s", &loc);
   82       if (loc < 0x20000) {
   83          gstart = strchr (teststring,':') + 1;
   84          /*
   85             16 rows per glyph, 2 ASCII hexadecimal digits per byte,
   86             so divide number of digits by 32 (shift right 5 bits).
   87          */
   88          glyph_width[loc] = (strlen (gstart) - 1) >> 5;
   89       }
   90       else if ((loc >= PIKTO_START) && (loc <= PIKTO_END)) {
   91          gstart = strchr (teststring,':') + 1;
   92          pikto_width[loc - PIKTO_START] = strlen (gstart) <= 34 ? 1 : 2;
   93       }
   94    }
   95 
   96    fclose (infilefp);
   97 
   98    /*
   99       Now read the combining character code points.  These have width of 0.
  100    */
  101    if ((infilefp = fopen (argv[2],"r")) == NULL) {
  102       fprintf (stderr,"ERROR - combining characters file %s not found.\n\n", argv[2]);
  103       exit (EXIT_FAILURE);
  104    }
  105 
  106    while (fgets (teststring, MAXSTRING-1, infilefp) != NULL) {
  107       sscanf (teststring, "%X:%*s", &loc);
  108       if (loc < 0x20000) glyph_width[loc] = 0;
  109    }
  110 
  111    fclose (infilefp);
  112 
  113    /*
  114       Code Points with Unusual Properties (Unicode Standard, Chapter 4).
  115 
  116       As of Unifont 10.0.04, use the widths in the "*-nonprinting.hex"
  117       files.  If an application is smart enough to know how to handle
  118       these special cases, it will not render the "nonprinting" glyph
  119       and will treat the code point as being zero-width.
  120    */
  121 // glyph_width[0]=0; /* NULL character */
  122 // for (i = 0x0001; i <= 0x001F; i++) glyph_width[i]=-1; /* Control Characters */
  123 // for (i = 0x007F; i <= 0x009F; i++) glyph_width[i]=-1; /* Control Characters */
  124 
  125 // glyph_width[0x034F]=0; /* combining grapheme joiner               */
  126 // glyph_width[0x180B]=0; /* Mongolian free variation selector one   */
  127 // glyph_width[0x180C]=0; /* Mongolian free variation selector two   */
  128 // glyph_width[0x180D]=0; /* Mongolian free variation selector three */
  129 // glyph_width[0x180E]=0; /* Mongolian vowel separator               */
  130 // glyph_width[0x200B]=0; /* zero width space                        */
  131 // glyph_width[0x200C]=0; /* zero width non-joiner                   */
  132 // glyph_width[0x200D]=0; /* zero width joiner                       */
  133 // glyph_width[0x200E]=0; /* left-to-right mark                      */
  134 // glyph_width[0x200F]=0; /* right-to-left mark                      */
  135 // glyph_width[0x202A]=0; /* left-to-right embedding                 */
  136 // glyph_width[0x202B]=0; /* right-to-left embedding                 */
  137 // glyph_width[0x202C]=0; /* pop directional formatting              */
  138 // glyph_width[0x202D]=0; /* left-to-right override                  */
  139 // glyph_width[0x202E]=0; /* right-to-left override                  */
  140 // glyph_width[0x2060]=0; /* word joiner                             */
  141 // glyph_width[0x2061]=0; /* function application                    */
  142 // glyph_width[0x2062]=0; /* invisible times                         */
  143 // glyph_width[0x2063]=0; /* invisible separator                     */
  144 // glyph_width[0x2064]=0; /* invisible plus                          */
  145 // glyph_width[0x206A]=0; /* inhibit symmetric swapping              */
  146 // glyph_width[0x206B]=0; /* activate symmetric swapping             */
  147 // glyph_width[0x206C]=0; /* inhibit arabic form shaping             */
  148 // glyph_width[0x206D]=0; /* activate arabic form shaping            */
  149 // glyph_width[0x206E]=0; /* national digit shapes                   */
  150 // glyph_width[0x206F]=0; /* nominal digit shapes                    */
  151 
  152 // /* Variation Selector-1 to Variation Selector-16 */
  153 // for (i = 0xFE00; i <= 0xFE0F; i++) glyph_width[i] = 0;
  154 
  155 // glyph_width[0xFEFF]=0; /* zero width no-break space         */
  156 // glyph_width[0xFFF9]=0; /* interlinear annotation anchor     */
  157 // glyph_width[0xFFFA]=0; /* interlinear annotation separator  */
  158 // glyph_width[0xFFFB]=0; /* interlinear annotation terminator */
  159    /*
  160       Let glyph widths represent 0xFFFC (object replacement character)
  161       and 0xFFFD (replacement character).
  162    */
  163 
  164    /*
  165       Hangul Jamo:
  166 
  167          Leading Consonant (Choseong): leave spacing as is.
  168 
  169          Hangul Choseong Filler (U+115F): set width to 2.
  170 
  171          Hangul Jungseong Filler, Hangul Vowel (Jungseong), and
  172          Final Consonant (Jongseong): set width to 0, because these
  173          combine with the leading consonant as one composite syllabic
  174          glyph.  As of Unicode 5.2, the Hangul Jamo block (U+1100..U+11FF)
  175          is completely filled.
  176    */
  177    // for (i = 0x1160; i <= 0x11FF; i++) glyph_width[i]=0; /* Vowels & Final Consonants */
  178 
  179    /*
  180       Private Use Area -- the width is undefined, but likely
  181       to be 2 charcells wide either from a graphic glyph or
  182       from a four-digit hexadecimal glyph representing the
  183       code point.  Therefore if any PUA glyph does not have
  184       a non-zero width yet, assign it a default width of 2.
  185       The Unicode Standard allows giving PUA characters
  186       default property values; see for example The Unicode
  187       Standard Version 5.0, p. 91.  This same default is
  188       used for higher plane PUA code points below.
  189    */
  190    // for (i = 0xE000; i <= 0xF8FF; i++) {
  191    //    if (glyph_width[i] == 0) glyph_width[i]=2;
  192    // }
  193 
  194    /*
  195       <not a character>
  196    */
  197    for (i = 0xFDD0; i <= 0xFDEF; i++) glyph_width[i] = -1;
  198    glyph_width[0xFFFE] = -1; /* Byte Order Mark */
  199    glyph_width[0xFFFF] = -1; /* Byte Order Mark */
  200 
  201    /* Surrogate Code Points */
  202    for (i = 0xD800; i <= 0xDFFF; i++) glyph_width[i]=-1;
  203 
  204    /* CJK Code Points */
  205    for (i = 0x4E00; i <= 0x9FFF; i++) if (glyph_width[i] < 0) glyph_width[i] = 2;
  206    for (i = 0x3400; i <= 0x4DBF; i++) if (glyph_width[i] < 0) glyph_width[i] = 2;
  207    for (i = 0xF900; i <= 0xFAFF; i++) if (glyph_width[i] < 0) glyph_width[i] = 2;
  208 
  209    /*
  210       Now generate the output file.
  211    */
  212    printf ("/*\n");
  213    printf ("   wcwidth and wcswidth functions, as per IEEE 1003.1-2008\n");
  214    printf ("   System Interfaces, pp. 2241 and 2251.\n\n");
  215    printf ("   Author: Paul Hardy, 2013\n\n");
  216    printf ("   Copyright (c) 2013 Paul Hardy\n\n");
  217    printf ("   LICENSE:\n");
  218    printf ("\n");
  219    printf ("      This program is free software: you can redistribute it and/or modify\n");
  220    printf ("      it under the terms of the GNU General Public License as published by\n");
  221    printf ("      the Free Software Foundation, either version 2 of the License, or\n");
  222    printf ("      (at your option) any later version.\n");
  223    printf ("\n");
  224    printf ("      This program is distributed in the hope that it will be useful,\n");
  225    printf ("      but WITHOUT ANY WARRANTY; without even the implied warranty of\n");
  226    printf ("      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the\n");
  227    printf ("      GNU General Public License for more details.\n");
  228    printf ("\n");
  229    printf ("      You should have received a copy of the GNU General Public License\n");
  230    printf ("      along with this program.  If not, see <http://www.gnu.org/licenses/>.\n");
  231    printf ("*/\n\n");
  232 
  233    printf ("#include <wchar.h>\n\n");
  234    printf ("/* Definitions for Pikto CSUR Private Use Area glyphs */\n");
  235    printf ("#define PIKTO_START\t0x%06X\n", PIKTO_START);
  236    printf ("#define PIKTO_END\t0x%06X\n", PIKTO_END);
  237    printf ("#define PIKTO_SIZE\t(PIKTO_END - PIKTO_START + 1)\n");
  238    printf ("\n\n");
  239    printf ("/* wcwidth -- return charcell positions of one code point */\n");
  240    printf ("inline int\nwcwidth (wchar_t wc)\n{\n");
  241    printf ("   return (wcswidth (&wc, 1));\n");
  242    printf ("}\n");
  243    printf ("\n\n");
  244    printf ("int\nwcswidth (const wchar_t *pwcs, size_t n)\n{\n\n");
  245    printf ("   int i;                    /* loop variable                                      */\n");
  246    printf ("   unsigned codept;          /* Unicode code point of current character            */\n");
  247    printf ("   unsigned plane;           /* Unicode plane, 0x00..0x10                          */\n");
  248    printf ("   unsigned lower17;         /* lower 17 bits of Unicode code point                */\n");
  249    printf ("   unsigned lower16;         /* lower 16 bits of Unicode code point                */\n");
  250    printf ("   int lowpt, midpt, highpt; /* for binary searching in plane1zeroes[]             */\n");
  251    printf ("   int found;                /* for binary searching in plane1zeroes[]             */\n");
  252    printf ("   int totalwidth;           /* total width of string, in charcells (1 or 2/glyph) */\n");
  253    printf ("   int illegalchar;          /* Whether or not this code point is illegal          */\n");
  254    putchar ('\n');
  255 
  256    /*
  257       Print the glyph_width[] array for glyphs widths in the
  258       Basic Multilingual Plane (Plane 0).
  259    */
  260    printf ("   char glyph_width[0x20000] = {");
  261    for (i = 0; i < 0x10000; i++) {
  262       if ((i & 0x1F) == 0)
  263          printf ("\n      /* U+%04X */ ", i);
  264       printf ("%d,", glyph_width[i]);
  265    }
  266    for (i = 0x10000; i < 0x20000; i++) {
  267       if ((i & 0x1F) == 0)
  268          printf ("\n      /* U+%06X */ ", i);
  269       printf ("%d", glyph_width[i]);
  270       if (i < 0x1FFFF) putchar (',');
  271    }
  272    printf ("\n   };\n\n");
  273 
  274    /*
  275       Print the pikto_width[] array for Pikto glyph widths.
  276    */
  277    printf ("   char pikto_width[PIKTO_SIZE] = {");
  278    for (i = 0; i < PIKTO_SIZE; i++) {
  279       if ((i & 0x1F) == 0)
  280          printf ("\n      /* U+%06X */ ", PIKTO_START + i);
  281       printf ("%d", pikto_width[i]);
  282       if ((PIKTO_START + i) < PIKTO_END) putchar (',');
  283    }
  284    printf ("\n   };\n\n");
  285 
  286    /*
  287       Execution part of wcswidth.
  288    */
  289    printf ("\n");
  290    printf ("   illegalchar = totalwidth = 0;\n");
  291    printf ("   for (i = 0; !illegalchar && i < n; i++) {\n");
  292    printf ("      codept  = pwcs[i];\n");
  293    printf ("      plane   = codept >> 16;\n");
  294    printf ("      lower17 = codept & 0x1FFFF;\n");
  295    printf ("      lower16 = codept & 0xFFFF;\n");
  296    printf ("      if (plane < 2) { /* the most common case */\n");
  297    printf ("         if (glyph_width[lower17] < 0) illegalchar = 1;\n");
  298    printf ("         else totalwidth += glyph_width[lower17];\n");
  299    printf ("      }\n");
  300    printf ("      else { /* a higher plane or beyond Unicode range */\n");
  301    printf ("         if  ((lower16 == 0xFFFE) || (lower16 == 0xFFFF)) {\n");
  302    printf ("            illegalchar = 1;\n");
  303    printf ("         }\n");
  304    printf ("         else if (plane < 4) {  /* Ideographic Plane */\n");
  305    printf ("            totalwidth += 2; /* Default ideographic width */\n");
  306    printf ("         }\n");
  307    printf ("         else if (plane == 0x0F) {  /* CSUR Private Use Area */\n");
  308    printf ("            if (lower16 <= 0x0E6F) { /* Kinya */\n");
  309    printf ("               totalwidth++; /* all Kinya syllables have width 1 */\n");
  310    printf ("            }\n");
  311    printf ("            else if (lower16 <= (PIKTO_END & 0xFFFF)) { /* Pikto */\n");
  312    printf ("               if (pikto_width[lower16 - (PIKTO_START & 0xFFFF)] < 0) illegalchar = 1;\n");
  313    printf ("               else totalwidth += pikto_width[lower16 - (PIKTO_START & 0xFFFF)];\n");
  314    printf ("            }\n");
  315    printf ("         }\n");
  316    printf ("         else if (plane > 0x10) {\n");
  317    printf ("            illegalchar = 1;\n");
  318    printf ("         }\n");
  319    printf ("         /* Other non-printing in higher planes; return -1 as per IEEE 1003.1-2008. */\n");
  320    printf ("         else if (/* language tags */\n");
  321    printf ("                  codept == 0x0E0001 || (codept >= 0x0E0020 && codept <= 0x0E007F) ||\n");
  322    printf ("                  /* variation selectors, 0x0E0100..0x0E01EF */\n");
  323    printf ("                  (codept >= 0x0E0100 && codept <= 0x0E01EF)) {\n");
  324    printf ("            illegalchar = 1;\n");
  325    printf ("         }\n");
  326    printf ("         /*\n");
  327    printf ("            Unicode plane 0x02..0x10 printing character\n");
  328    printf ("         */\n");
  329    printf ("         else {\n");
  330    printf ("            illegalchar = 1; /* code is not in font */\n");
  331    printf ("         }\n");
  332    printf ("\n");
  333    printf ("      }\n");
  334    printf ("   }\n");
  335    printf ("   if (illegalchar) totalwidth = -1;\n");
  336    printf ("\n");
  337    printf ("   return (totalwidth);\n");
  338    printf ("\n");
  339    printf ("}\n");
  340 
  341    exit (EXIT_SUCCESS);
  342 }