"Fossies" - the Fresh Open Source Software Archive

Member "gawk-5.1.0/support/localeinfo.c" (6 Feb 2020, 5223 Bytes) of package /linux/misc/gawk-5.1.0.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "localeinfo.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 5.0.1_vs_5.1.0.

    1 /* locale information
    2 
    3    Copyright 2016-2019 Free Software Foundation, Inc.
    4 
    5    This program is free software; you can redistribute it and/or modify
    6    it under the terms of the GNU General Public License as published by
    7    the Free Software Foundation; either version 3, or (at your option)
    8    any later version.
    9 
   10    This program is distributed in the hope that it will be useful,
   11    but WITHOUT ANY WARRANTY; without even the implied warranty of
   12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   13    GNU General Public License for more details.
   14 
   15    You should have received a copy of the GNU General Public License
   16    along with this program; if not, write to the Free Software
   17    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
   18    02110-1301, USA.  */
   19 
   20 /* Written by Paul Eggert.  */
   21 
   22 #include <config.h>
   23 
   24 #include <localeinfo.h>
   25 
   26 #include <verify.h>
   27 
   28 #include <limits.h>
   29 #include <locale.h>
   30 #include <stdlib.h>
   31 #include <string.h>
   32 #include <wctype.h>
   33 
   34 #if defined(__DJGPP__)
   35 #include "mbsupport.h"
   36 #endif
   37 
   38 /* The sbclen implementation relies on this.  */
   39 verify (MB_LEN_MAX <= SCHAR_MAX);
   40 
   41 /* Return true if the locale uses UTF-8.  */
   42 
   43 static bool
   44 is_using_utf8 (void)
   45 {
   46   wchar_t wc;
   47   mbstate_t mbs = {0};
   48   return mbrtowc (&wc, "\xc4\x80", 2, &mbs) == 2 && wc == 0x100;
   49 }
   50 
   51 /* Return true if the locale is compatible enough with the C locale so
   52    that the locale is single-byte, bytes are in collating-sequence
   53    order, and there are no multi-character collating elements.  */
   54 
   55 static bool
   56 using_simple_locale (bool multibyte)
   57 {
   58   /* The native character set is known to be compatible with
   59      the C locale.  The following test isn't perfect, but it's good
   60      enough in practice, as only ASCII and EBCDIC are in common use
   61      and this test correctly accepts ASCII and rejects EBCDIC.  */
   62   enum { native_c_charset =
   63     ('\b' == 8 && '\t' == 9 && '\n' == 10 && '\v' == 11 && '\f' == 12
   64      && '\r' == 13 && ' ' == 32 && '!' == 33 && '"' == 34 && '#' == 35
   65      && '%' == 37 && '&' == 38 && '\'' == 39 && '(' == 40 && ')' == 41
   66      && '*' == 42 && '+' == 43 && ',' == 44 && '-' == 45 && '.' == 46
   67      && '/' == 47 && '0' == 48 && '9' == 57 && ':' == 58 && ';' == 59
   68      && '<' == 60 && '=' == 61 && '>' == 62 && '?' == 63 && 'A' == 65
   69      && 'Z' == 90 && '[' == 91 && '\\' == 92 && ']' == 93 && '^' == 94
   70      && '_' == 95 && 'a' == 97 && 'z' == 122 && '{' == 123 && '|' == 124
   71      && '}' == 125 && '~' == 126)
   72   };
   73 
   74   if (!native_c_charset || multibyte)
   75     return false;
   76 
   77   /* As a heuristic, use strcoll to compare native character order.
   78      If this agrees with byte order the locale should be simple.
   79      This heuristic should work for all known practical locales,
   80      although it would be invalid for artificially-constructed locales
   81      where the native order is the collating-sequence order but there
   82      are multi-character collating elements.  */
   83   for (int i = 0; i < UCHAR_MAX; i++)
   84     if (strcoll (((char []) {i, 0}), ((char []) {i + 1, 0})) <= 0)
   85       return false;
   86 
   87   return true;
   88 }
   89 
   90 /* Initialize *LOCALEINFO from the current locale.  */
   91 
   92 void
   93 init_localeinfo (struct localeinfo *localeinfo)
   94 {
   95   localeinfo->multibyte = MB_CUR_MAX > 1;
   96   localeinfo->simple = using_simple_locale (localeinfo->multibyte);
   97   localeinfo->using_utf8 = is_using_utf8 ();
   98 
   99   for (int i = CHAR_MIN; i <= CHAR_MAX; i++)
  100     {
  101       char c = i;
  102       unsigned char uc = i;
  103       mbstate_t s = {0};
  104       wchar_t wc;
  105       size_t len = mbrtowc (&wc, &c, 1, &s);
  106       localeinfo->sbclen[uc] = len <= 1 ? 1 : - (int) - len;
  107       localeinfo->sbctowc[uc] = len <= 1 ? wc : WEOF;
  108     }
  109 }
  110 
  111 /* The set of wchar_t values C such that there's a useful locale
  112    somewhere where C != towupper (C) && C != towlower (towupper (C)).
  113    For example, 0x00B5 (U+00B5 MICRO SIGN) is in this table, because
  114    towupper (0x00B5) == 0x039C (U+039C GREEK CAPITAL LETTER MU), and
  115    towlower (0x039C) == 0x03BC (U+03BC GREEK SMALL LETTER MU).  */
  116 static short const lonesome_lower[] =
  117   {
  118     0x00B5, 0x0131, 0x017F, 0x01C5, 0x01C8, 0x01CB, 0x01F2, 0x0345,
  119     0x03C2, 0x03D0, 0x03D1, 0x03D5, 0x03D6, 0x03F0, 0x03F1,
  120 
  121     /* U+03F2 GREEK LUNATE SIGMA SYMBOL lacks a specific uppercase
  122        counterpart in locales predating Unicode 4.0.0 (April 2003).  */
  123     0x03F2,
  124 
  125     0x03F5, 0x1E9B, 0x1FBE,
  126   };
  127 
  128 /* Verify that the worst case fits.  This is 1 for towupper, 1 for
  129    towlower, and 1 for each entry in LONESOME_LOWER.  */
  130 verify (1 + 1 + sizeof lonesome_lower / sizeof *lonesome_lower
  131         <= CASE_FOLDED_BUFSIZE);
  132 
  133 /* Find the characters equal to C after case-folding, other than C
  134    itself, and store them into FOLDED.  Return the number of characters
  135    stored; this is zero if C is WEOF.  */
  136 
  137 int
  138 case_folded_counterparts (wint_t c, wchar_t folded[CASE_FOLDED_BUFSIZE])
  139 {
  140   int i;
  141   int n = 0;
  142   wint_t uc = towupper (c);
  143   wint_t lc = towlower (uc);
  144   if (uc != c)
  145     folded[n++] = uc;
  146   if (lc != uc && lc != c && towupper (lc) == uc)
  147     folded[n++] = lc;
  148   for (i = 0; i < sizeof lonesome_lower / sizeof *lonesome_lower; i++)
  149     {
  150       wint_t li = lonesome_lower[i];
  151       if (li != lc && li != uc && li != c && towupper (li) == uc)
  152         folded[n++] = li;
  153     }
  154   return n;
  155 }