"Fossies" - the Fresh Open Source Software Archive

Member "nano-4.5/lib/localcharset.c" (4 Oct 2019, 38857 Bytes) of package /linux/misc/nano-4.5.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "localcharset.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 4.4_vs_4.5.

    1 /* Determine a canonical name for the current locale's character encoding.
    2 
    3    Copyright (C) 2000-2006, 2008-2019 Free Software Foundation, Inc.
    4 
    5    This program is free software; you can redistribute it and/or modify
    6    it under the terms of the GNU General Public License as published by
    7    the Free Software Foundation; either version 3, or (at your option)
    8    any later version.
    9 
   10    This program is distributed in the hope that it will be useful,
   11    but WITHOUT ANY WARRANTY; without even the implied warranty of
   12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   13    GNU General Public License for more details.
   14 
   15    You should have received a copy of the GNU General Public License along
   16    with this program; if not, see <https://www.gnu.org/licenses/>.  */
   17 
   18 /* Written by Bruno Haible <bruno@clisp.org>.  */
   19 
   20 #include <config.h>
   21 
   22 /* Specification.  */
   23 #include "localcharset.h"
   24 
   25 #include <stddef.h>
   26 #include <stdio.h>
   27 #include <string.h>
   28 #include <stdlib.h>
   29 
   30 #if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET
   31 # define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */
   32 #endif
   33 
   34 #if defined _WIN32 && !defined __CYGWIN__
   35 # define WINDOWS_NATIVE
   36 # include <locale.h>
   37 #endif
   38 
   39 #if defined __EMX__
   40 /* Assume EMX program runs on OS/2, even if compiled under DOS.  */
   41 # ifndef OS2
   42 #  define OS2
   43 # endif
   44 #endif
   45 
   46 #if !defined WINDOWS_NATIVE
   47 # if HAVE_LANGINFO_CODESET
   48 #  include <langinfo.h>
   49 # else
   50 #  if 0 /* see comment regarding use of setlocale(), below */
   51 #   include <locale.h>
   52 #  endif
   53 # endif
   54 # ifdef __CYGWIN__
   55 #  define WIN32_LEAN_AND_MEAN
   56 #  include <windows.h>
   57 # endif
   58 #elif defined WINDOWS_NATIVE
   59 # define WIN32_LEAN_AND_MEAN
   60 # include <windows.h>
   61 #endif
   62 #if defined OS2
   63 # define INCL_DOS
   64 # include <os2.h>
   65 #endif
   66 
   67 /* For MB_CUR_MAX_L */
   68 #if defined DARWIN7
   69 # include <xlocale.h>
   70 #endif
   71 
   72 
   73 #if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2
   74 
   75 /* On these platforms, we use a mapping from non-canonical encoding name
   76    to GNU canonical encoding name.  */
   77 
   78 /* With glibc-2.1 or newer, we don't need any canonicalization,
   79    because glibc has iconv and both glibc and libiconv support all
   80    GNU canonical names directly.  */
   81 # if !((defined __GNU_LIBRARY__ && __GLIBC__ >= 2) || defined __UCLIBC__)
   82 
   83 struct table_entry
   84 {
   85   const char alias[11+1];
   86   const char canonical[11+1];
   87 };
   88 
   89 /* Table of platform-dependent mappings, sorted in ascending order.  */
   90 static const struct table_entry alias_table[] =
   91   {
   92 #  if defined __FreeBSD__                                   /* FreeBSD */
   93   /*{ "ARMSCII-8",  "ARMSCII-8" },*/
   94     { "Big5",       "BIG5" },
   95     { "C",          "ASCII" },
   96   /*{ "CP1131",     "CP1131" },*/
   97   /*{ "CP1251",     "CP1251" },*/
   98   /*{ "CP866",      "CP866" },*/
   99   /*{ "GB18030",    "GB18030" },*/
  100   /*{ "GB2312",     "GB2312" },*/
  101   /*{ "GBK",        "GBK" },*/
  102   /*{ "ISCII-DEV",  "?" },*/
  103     { "ISO8859-1",  "ISO-8859-1" },
  104     { "ISO8859-13", "ISO-8859-13" },
  105     { "ISO8859-15", "ISO-8859-15" },
  106     { "ISO8859-2",  "ISO-8859-2" },
  107     { "ISO8859-5",  "ISO-8859-5" },
  108     { "ISO8859-7",  "ISO-8859-7" },
  109     { "ISO8859-9",  "ISO-8859-9" },
  110   /*{ "KOI8-R",     "KOI8-R" },*/
  111   /*{ "KOI8-U",     "KOI8-U" },*/
  112     { "SJIS",       "SHIFT_JIS" },
  113     { "US-ASCII",   "ASCII" },
  114     { "eucCN",      "GB2312" },
  115     { "eucJP",      "EUC-JP" },
  116     { "eucKR",      "EUC-KR" }
  117 #   define alias_table_defined
  118 #  endif
  119 #  if defined __NetBSD__                                    /* NetBSD */
  120     { "646",        "ASCII" },
  121   /*{ "ARMSCII-8",  "ARMSCII-8" },*/
  122   /*{ "BIG5",       "BIG5" },*/
  123     { "Big5-HKSCS", "BIG5-HKSCS" },
  124   /*{ "CP1251",     "CP1251" },*/
  125   /*{ "CP866",      "CP866" },*/
  126   /*{ "GB18030",    "GB18030" },*/
  127   /*{ "GB2312",     "GB2312" },*/
  128     { "ISO8859-1",  "ISO-8859-1" },
  129     { "ISO8859-13", "ISO-8859-13" },
  130     { "ISO8859-15", "ISO-8859-15" },
  131     { "ISO8859-2",  "ISO-8859-2" },
  132     { "ISO8859-4",  "ISO-8859-4" },
  133     { "ISO8859-5",  "ISO-8859-5" },
  134     { "ISO8859-7",  "ISO-8859-7" },
  135   /*{ "KOI8-R",     "KOI8-R" },*/
  136   /*{ "KOI8-U",     "KOI8-U" },*/
  137   /*{ "PT154",      "PT154" },*/
  138     { "SJIS",       "SHIFT_JIS" },
  139     { "eucCN",      "GB2312" },
  140     { "eucJP",      "EUC-JP" },
  141     { "eucKR",      "EUC-KR" },
  142     { "eucTW",      "EUC-TW" }
  143 #   define alias_table_defined
  144 #  endif
  145 #  if defined __OpenBSD__                                   /* OpenBSD */
  146     { "646",        "ASCII" },
  147     { "ISO8859-1",  "ISO-8859-1" },
  148     { "ISO8859-13", "ISO-8859-13" },
  149     { "ISO8859-15", "ISO-8859-15" },
  150     { "ISO8859-2",  "ISO-8859-2" },
  151     { "ISO8859-4",  "ISO-8859-4" },
  152     { "ISO8859-5",  "ISO-8859-5" },
  153     { "ISO8859-7",  "ISO-8859-7" }
  154 #   define alias_table_defined
  155 #  endif
  156 #  if defined __APPLE__ && defined __MACH__                 /* Mac OS X */
  157     /* Darwin 7.5 has nl_langinfo(CODESET), but sometimes its value is
  158        useless:
  159        - It returns the empty string when LANG is set to a locale of the
  160          form ll_CC, although ll_CC/LC_CTYPE is a symlink to an UTF-8
  161          LC_CTYPE file.
  162        - The environment variables LANG, LC_CTYPE, LC_ALL are not set by
  163          the system; nl_langinfo(CODESET) returns "US-ASCII" in this case.
  164        - The documentation says:
  165            "... all code that calls BSD system routines should ensure
  166             that the const *char parameters of these routines are in UTF-8
  167             encoding. All BSD system functions expect their string
  168             parameters to be in UTF-8 encoding and nothing else."
  169          It also says
  170            "An additional caveat is that string parameters for files,
  171             paths, and other file-system entities must be in canonical
  172             UTF-8. In a canonical UTF-8 Unicode string, all decomposable
  173             characters are decomposed ..."
  174          but this is not true: You can pass non-decomposed UTF-8 strings
  175          to file system functions, and it is the OS which will convert
  176          them to decomposed UTF-8 before accessing the file system.
  177        - The Apple Terminal application displays UTF-8 by default.
  178        - However, other applications are free to use different encodings:
  179          - xterm uses ISO-8859-1 by default.
  180          - TextEdit uses MacRoman by default.
  181        We prefer UTF-8 over decomposed UTF-8-MAC because one should
  182        minimize the use of decomposed Unicode. Unfortunately, through the
  183        Darwin file system, decomposed UTF-8 strings are leaked into user
  184        space nevertheless.
  185        Then there are also the locales with encodings other than US-ASCII
  186        and UTF-8. These locales can be occasionally useful to users (e.g.
  187        when grepping through ISO-8859-1 encoded text files), when all their
  188        file names are in US-ASCII.
  189      */
  190     { "ARMSCII-8",  "ARMSCII-8" },
  191     { "Big5",       "BIG5" },
  192     { "Big5HKSCS",  "BIG5-HKSCS" },
  193     { "CP1131",     "CP1131" },
  194     { "CP1251",     "CP1251" },
  195     { "CP866",      "CP866" },
  196     { "CP949",      "CP949" },
  197     { "GB18030",    "GB18030" },
  198     { "GB2312",     "GB2312" },
  199     { "GBK",        "GBK" },
  200   /*{ "ISCII-DEV",  "?" },*/
  201     { "ISO8859-1",  "ISO-8859-1" },
  202     { "ISO8859-13", "ISO-8859-13" },
  203     { "ISO8859-15", "ISO-8859-15" },
  204     { "ISO8859-2",  "ISO-8859-2" },
  205     { "ISO8859-4",  "ISO-8859-4" },
  206     { "ISO8859-5",  "ISO-8859-5" },
  207     { "ISO8859-7",  "ISO-8859-7" },
  208     { "ISO8859-9",  "ISO-8859-9" },
  209     { "KOI8-R",     "KOI8-R" },
  210     { "KOI8-U",     "KOI8-U" },
  211     { "PT154",      "PT154" },
  212     { "SJIS",       "SHIFT_JIS" },
  213     { "eucCN",      "GB2312" },
  214     { "eucJP",      "EUC-JP" },
  215     { "eucKR",      "EUC-KR" }
  216 #   define alias_table_defined
  217 #  endif
  218 #  if defined _AIX                                          /* AIX */
  219   /*{ "GBK",        "GBK" },*/
  220     { "IBM-1046",   "CP1046" },
  221     { "IBM-1124",   "CP1124" },
  222     { "IBM-1129",   "CP1129" },
  223     { "IBM-1252",   "CP1252" },
  224     { "IBM-850",    "CP850" },
  225     { "IBM-856",    "CP856" },
  226     { "IBM-921",    "ISO-8859-13" },
  227     { "IBM-922",    "CP922" },
  228     { "IBM-932",    "CP932" },
  229     { "IBM-943",    "CP943" },
  230     { "IBM-eucCN",  "GB2312" },
  231     { "IBM-eucJP",  "EUC-JP" },
  232     { "IBM-eucKR",  "EUC-KR" },
  233     { "IBM-eucTW",  "EUC-TW" },
  234     { "ISO8859-1",  "ISO-8859-1" },
  235     { "ISO8859-15", "ISO-8859-15" },
  236     { "ISO8859-2",  "ISO-8859-2" },
  237     { "ISO8859-5",  "ISO-8859-5" },
  238     { "ISO8859-6",  "ISO-8859-6" },
  239     { "ISO8859-7",  "ISO-8859-7" },
  240     { "ISO8859-8",  "ISO-8859-8" },
  241     { "ISO8859-9",  "ISO-8859-9" },
  242     { "TIS-620",    "TIS-620" },
  243   /*{ "UTF-8",      "UTF-8" },*/
  244     { "big5",       "BIG5" }
  245 #   define alias_table_defined
  246 #  endif
  247 #  if defined __hpux                                        /* HP-UX */
  248     { "SJIS",      "SHIFT_JIS" },
  249     { "arabic8",   "HP-ARABIC8" },
  250     { "big5",      "BIG5" },
  251     { "cp1251",    "CP1251" },
  252     { "eucJP",     "EUC-JP" },
  253     { "eucKR",     "EUC-KR" },
  254     { "eucTW",     "EUC-TW" },
  255     { "gb18030",   "GB18030" },
  256     { "greek8",    "HP-GREEK8" },
  257     { "hebrew8",   "HP-HEBREW8" },
  258     { "hkbig5",    "BIG5-HKSCS" },
  259     { "hp15CN",    "GB2312" },
  260     { "iso88591",  "ISO-8859-1" },
  261     { "iso885913", "ISO-8859-13" },
  262     { "iso885915", "ISO-8859-15" },
  263     { "iso88592",  "ISO-8859-2" },
  264     { "iso88594",  "ISO-8859-4" },
  265     { "iso88595",  "ISO-8859-5" },
  266     { "iso88596",  "ISO-8859-6" },
  267     { "iso88597",  "ISO-8859-7" },
  268     { "iso88598",  "ISO-8859-8" },
  269     { "iso88599",  "ISO-8859-9" },
  270     { "kana8",     "HP-KANA8" },
  271     { "koi8r",     "KOI8-R" },
  272     { "roman8",    "HP-ROMAN8" },
  273     { "tis620",    "TIS-620" },
  274     { "turkish8",  "HP-TURKISH8" },
  275     { "utf8",      "UTF-8" }
  276 #   define alias_table_defined
  277 #  endif
  278 #  if defined __sgi                                         /* IRIX */
  279     { "ISO8859-1",  "ISO-8859-1" },
  280     { "ISO8859-15", "ISO-8859-15" },
  281     { "ISO8859-2",  "ISO-8859-2" },
  282     { "ISO8859-5",  "ISO-8859-5" },
  283     { "ISO8859-7",  "ISO-8859-7" },
  284     { "ISO8859-9",  "ISO-8859-9" },
  285     { "eucCN",      "GB2312" },
  286     { "eucJP",      "EUC-JP" },
  287     { "eucKR",      "EUC-KR" },
  288     { "eucTW",      "EUC-TW" }
  289 #   define alias_table_defined
  290 #  endif
  291 #  if defined __osf__                                       /* OSF/1 */
  292   /*{ "GBK",        "GBK" },*/
  293     { "ISO8859-1",  "ISO-8859-1" },
  294     { "ISO8859-15", "ISO-8859-15" },
  295     { "ISO8859-2",  "ISO-8859-2" },
  296     { "ISO8859-4",  "ISO-8859-4" },
  297     { "ISO8859-5",  "ISO-8859-5" },
  298     { "ISO8859-7",  "ISO-8859-7" },
  299     { "ISO8859-8",  "ISO-8859-8" },
  300     { "ISO8859-9",  "ISO-8859-9" },
  301     { "KSC5601",    "CP949" },
  302     { "SJIS",       "SHIFT_JIS" },
  303     { "TACTIS",     "TIS-620" },
  304   /*{ "UTF-8",      "UTF-8" },*/
  305     { "big5",       "BIG5" },
  306     { "cp850",      "CP850" },
  307     { "dechanyu",   "DEC-HANYU" },
  308     { "dechanzi",   "GB2312" },
  309     { "deckanji",   "DEC-KANJI" },
  310     { "deckorean",  "EUC-KR" },
  311     { "eucJP",      "EUC-JP" },
  312     { "eucKR",      "EUC-KR" },
  313     { "eucTW",      "EUC-TW" },
  314     { "sdeckanji",  "EUC-JP" }
  315 #   define alias_table_defined
  316 #  endif
  317 #  if defined __sun                                         /* Solaris */
  318     { "5601",        "EUC-KR" },
  319     { "646",         "ASCII" },
  320   /*{ "BIG5",        "BIG5" },*/
  321     { "Big5-HKSCS",  "BIG5-HKSCS" },
  322     { "GB18030",     "GB18030" },
  323   /*{ "GBK",         "GBK" },*/
  324     { "ISO8859-1",   "ISO-8859-1" },
  325     { "ISO8859-11",  "TIS-620" },
  326     { "ISO8859-13",  "ISO-8859-13" },
  327     { "ISO8859-15",  "ISO-8859-15" },
  328     { "ISO8859-2",   "ISO-8859-2" },
  329     { "ISO8859-3",   "ISO-8859-3" },
  330     { "ISO8859-4",   "ISO-8859-4" },
  331     { "ISO8859-5",   "ISO-8859-5" },
  332     { "ISO8859-6",   "ISO-8859-6" },
  333     { "ISO8859-7",   "ISO-8859-7" },
  334     { "ISO8859-8",   "ISO-8859-8" },
  335     { "ISO8859-9",   "ISO-8859-9" },
  336     { "PCK",         "SHIFT_JIS" },
  337     { "TIS620.2533", "TIS-620" },
  338   /*{ "UTF-8",       "UTF-8" },*/
  339     { "ansi-1251",   "CP1251" },
  340     { "cns11643",    "EUC-TW" },
  341     { "eucJP",       "EUC-JP" },
  342     { "gb2312",      "GB2312" },
  343     { "koi8-r",      "KOI8-R" }
  344 #   define alias_table_defined
  345 #  endif
  346 #  if defined __minix                                       /* Minix */
  347     { "646", "ASCII" }
  348 #   define alias_table_defined
  349 #  endif
  350 #  if defined WINDOWS_NATIVE || defined __CYGWIN__          /* Windows */
  351     { "CP1361",  "JOHAB" },
  352     { "CP20127", "ASCII" },
  353     { "CP20866", "KOI8-R" },
  354     { "CP20936", "GB2312" },
  355     { "CP21866", "KOI8-RU" },
  356     { "CP28591", "ISO-8859-1" },
  357     { "CP28592", "ISO-8859-2" },
  358     { "CP28593", "ISO-8859-3" },
  359     { "CP28594", "ISO-8859-4" },
  360     { "CP28595", "ISO-8859-5" },
  361     { "CP28596", "ISO-8859-6" },
  362     { "CP28597", "ISO-8859-7" },
  363     { "CP28598", "ISO-8859-8" },
  364     { "CP28599", "ISO-8859-9" },
  365     { "CP28605", "ISO-8859-15" },
  366     { "CP38598", "ISO-8859-8" },
  367     { "CP51932", "EUC-JP" },
  368     { "CP51936", "GB2312" },
  369     { "CP51949", "EUC-KR" },
  370     { "CP51950", "EUC-TW" },
  371     { "CP54936", "GB18030" },
  372     { "CP65001", "UTF-8" },
  373     { "CP936",   "GBK" }
  374 #   define alias_table_defined
  375 #  endif
  376 #  if defined OS2                                           /* OS/2 */
  377     /* The list of encodings is taken from "List of OS/2 Codepages"
  378        by Alex Taylor:
  379        <http://altsan.org/os2/toolkits/uls/index.html#codepages>.
  380        See also "__convcp() of kLIBC":
  381        <https://github.com/bitwiseworks/libc/blob/master/src/emx/src/lib/locale/__convcp.c>.  */
  382     { "CP1004",        "CP1252" },
  383   /*{ "CP1041",        "CP943" },*/
  384   /*{ "CP1088",        "CP949" },*/
  385     { "CP1089",        "ISO-8859-6" },
  386   /*{ "CP1114",        "CP950" },*/
  387   /*{ "CP1115",        "GB2312" },*/
  388     { "CP1208",        "UTF-8" },
  389   /*{ "CP1380",        "GB2312" },*/
  390     { "CP1381",        "GB2312" },
  391     { "CP1383",        "GB2312" },
  392     { "CP1386",        "GBK" },
  393   /*{ "CP301",         "CP943" },*/
  394     { "CP3372",        "EUC-JP" },
  395     { "CP4946",        "CP850" },
  396   /*{ "CP5048",        "JIS_X0208-1990" },*/
  397   /*{ "CP5049",        "JIS_X0212-1990" },*/
  398   /*{ "CP5067",        "KS_C_5601-1987" },*/
  399     { "CP813",         "ISO-8859-7" },
  400     { "CP819",         "ISO-8859-1" },
  401     { "CP878",         "KOI8-R" },
  402   /*{ "CP897",         "CP943" },*/
  403     { "CP912",         "ISO-8859-2" },
  404     { "CP913",         "ISO-8859-3" },
  405     { "CP914",         "ISO-8859-4" },
  406     { "CP915",         "ISO-8859-5" },
  407     { "CP916",         "ISO-8859-8" },
  408     { "CP920",         "ISO-8859-9" },
  409     { "CP921",         "ISO-8859-13" },
  410     { "CP923",         "ISO-8859-15" },
  411   /*{ "CP941",         "CP943" },*/
  412   /*{ "CP947",         "CP950" },*/
  413   /*{ "CP951",         "CP949" },*/
  414   /*{ "CP952",         "JIS_X0208-1990" },*/
  415   /*{ "CP953",         "JIS_X0212-1990" },*/
  416     { "CP954",         "EUC-JP" },
  417     { "CP964",         "EUC-TW" },
  418     { "CP970",         "EUC-KR" },
  419   /*{ "CP971",         "KS_C_5601-1987" },*/
  420     { "IBM-1004",      "CP1252" },
  421   /*{ "IBM-1006",      "?" },*/
  422   /*{ "IBM-1008",      "?" },*/
  423   /*{ "IBM-1041",      "CP943" },*/
  424   /*{ "IBM-1051",      "?" },*/
  425   /*{ "IBM-1088",      "CP949" },*/
  426     { "IBM-1089",      "ISO-8859-6" },
  427   /*{ "IBM-1098",      "?" },*/
  428   /*{ "IBM-1114",      "CP950" },*/
  429   /*{ "IBM-1115",      "GB2312" },*/
  430   /*{ "IBM-1116",      "?" },*/
  431   /*{ "IBM-1117",      "?" },*/
  432   /*{ "IBM-1118",      "?" },*/
  433   /*{ "IBM-1119",      "?" },*/
  434     { "IBM-1124",      "CP1124" },
  435     { "IBM-1125",      "CP1125" },
  436     { "IBM-1131",      "CP1131" },
  437     { "IBM-1208",      "UTF-8" },
  438     { "IBM-1250",      "CP1250" },
  439     { "IBM-1251",      "CP1251" },
  440     { "IBM-1252",      "CP1252" },
  441     { "IBM-1253",      "CP1253" },
  442     { "IBM-1254",      "CP1254" },
  443     { "IBM-1255",      "CP1255" },
  444     { "IBM-1256",      "CP1256" },
  445     { "IBM-1257",      "CP1257" },
  446   /*{ "IBM-1275",      "?" },*/
  447   /*{ "IBM-1276",      "?" },*/
  448   /*{ "IBM-1277",      "?" },*/
  449   /*{ "IBM-1280",      "?" },*/
  450   /*{ "IBM-1281",      "?" },*/
  451   /*{ "IBM-1282",      "?" },*/
  452   /*{ "IBM-1283",      "?" },*/
  453   /*{ "IBM-1380",      "GB2312" },*/
  454     { "IBM-1381",      "GB2312" },
  455     { "IBM-1383",      "GB2312" },
  456     { "IBM-1386",      "GBK" },
  457   /*{ "IBM-301",       "CP943" },*/
  458     { "IBM-3372",      "EUC-JP" },
  459     { "IBM-367",       "ASCII" },
  460     { "IBM-437",       "CP437" },
  461     { "IBM-4946",      "CP850" },
  462   /*{ "IBM-5048",      "JIS_X0208-1990" },*/
  463   /*{ "IBM-5049",      "JIS_X0212-1990" },*/
  464   /*{ "IBM-5067",      "KS_C_5601-1987" },*/
  465     { "IBM-813",       "ISO-8859-7" },
  466     { "IBM-819",       "ISO-8859-1" },
  467     { "IBM-850",       "CP850" },
  468   /*{ "IBM-851",       "?" },*/
  469     { "IBM-852",       "CP852" },
  470     { "IBM-855",       "CP855" },
  471     { "IBM-856",       "CP856" },
  472     { "IBM-857",       "CP857" },
  473   /*{ "IBM-859",       "?" },*/
  474     { "IBM-860",       "CP860" },
  475     { "IBM-861",       "CP861" },
  476     { "IBM-862",       "CP862" },
  477     { "IBM-863",       "CP863" },
  478     { "IBM-864",       "CP864" },
  479     { "IBM-865",       "CP865" },
  480     { "IBM-866",       "CP866" },
  481   /*{ "IBM-868",       "?" },*/
  482     { "IBM-869",       "CP869" },
  483     { "IBM-874",       "CP874" },
  484     { "IBM-878",       "KOI8-R" },
  485   /*{ "IBM-895",       "?" },*/
  486   /*{ "IBM-897",       "CP943" },*/
  487   /*{ "IBM-907",       "?" },*/
  488   /*{ "IBM-909",       "?" },*/
  489     { "IBM-912",       "ISO-8859-2" },
  490     { "IBM-913",       "ISO-8859-3" },
  491     { "IBM-914",       "ISO-8859-4" },
  492     { "IBM-915",       "ISO-8859-5" },
  493     { "IBM-916",       "ISO-8859-8" },
  494     { "IBM-920",       "ISO-8859-9" },
  495     { "IBM-921",       "ISO-8859-13" },
  496     { "IBM-922",       "CP922" },
  497     { "IBM-923",       "ISO-8859-15" },
  498     { "IBM-932",       "CP932" },
  499   /*{ "IBM-941",       "CP943" },*/
  500   /*{ "IBM-942",       "?" },*/
  501     { "IBM-943",       "CP943" },
  502   /*{ "IBM-947",       "CP950" },*/
  503     { "IBM-949",       "CP949" },
  504     { "IBM-950",       "CP950" },
  505   /*{ "IBM-951",       "CP949" },*/
  506   /*{ "IBM-952",       "JIS_X0208-1990" },*/
  507   /*{ "IBM-953",       "JIS_X0212-1990" },*/
  508     { "IBM-954",       "EUC-JP" },
  509   /*{ "IBM-955",       "?" },*/
  510     { "IBM-964",       "EUC-TW" },
  511     { "IBM-970",       "EUC-KR" },
  512   /*{ "IBM-971",       "KS_C_5601-1987" },*/
  513     { "IBM-eucCN",     "GB2312" },
  514     { "IBM-eucJP",     "EUC-JP" },
  515     { "IBM-eucKR",     "EUC-KR" },
  516     { "IBM-eucTW",     "EUC-TW" },
  517     { "IBM33722",      "EUC-JP" },
  518     { "ISO8859-1",     "ISO-8859-1" },
  519     { "ISO8859-2",     "ISO-8859-2" },
  520     { "ISO8859-3",     "ISO-8859-3" },
  521     { "ISO8859-4",     "ISO-8859-4" },
  522     { "ISO8859-5",     "ISO-8859-5" },
  523     { "ISO8859-6",     "ISO-8859-6" },
  524     { "ISO8859-7",     "ISO-8859-7" },
  525     { "ISO8859-8",     "ISO-8859-8" },
  526     { "ISO8859-9",     "ISO-8859-9" },
  527   /*{ "JISX0201-1976", "JISX0201-1976" },*/
  528   /*{ "JISX0208-1978", "?" },*/
  529   /*{ "JISX0208-1983", "JIS_X0208-1983" },*/
  530   /*{ "JISX0208-1990", "JIS_X0208-1990" },*/
  531   /*{ "JISX0212-1990", "JIS_X0212-1990" },*/
  532   /*{ "KSC5601-1987",  "KS_C_5601-1987" },*/
  533     { "SJIS-1",        "CP943" },
  534     { "SJIS-2",        "CP943" },
  535     { "eucJP",         "EUC-JP" },
  536     { "eucKR",         "EUC-KR" },
  537     { "eucTW-1993",    "EUC-TW" }
  538 #   define alias_table_defined
  539 #  endif
  540 #  if defined VMS                                           /* OpenVMS */
  541     /* The list of encodings is taken from the OpenVMS 7.3-1 documentation
  542        "Compaq C Run-Time Library Reference Manual for OpenVMS systems"
  543        section 10.7 "Handling Different Character Sets".  */
  544     { "DECHANYU",  "DEC-HANYU" },
  545     { "DECHANZI",  "GB2312" },
  546     { "DECKANJI",  "DEC-KANJI" },
  547     { "DECKOREAN", "EUC-KR" },
  548     { "ISO8859-1", "ISO-8859-1" },
  549     { "ISO8859-2", "ISO-8859-2" },
  550     { "ISO8859-5", "ISO-8859-5" },
  551     { "ISO8859-7", "ISO-8859-7" },
  552     { "ISO8859-8", "ISO-8859-8" },
  553     { "ISO8859-9", "ISO-8859-9" },
  554     { "SDECKANJI", "EUC-JP" },
  555     { "SJIS",      "SHIFT_JIS" },
  556     { "eucJP",     "EUC-JP" },
  557     { "eucTW",     "EUC-TW" }
  558 #   define alias_table_defined
  559 #  endif
  560 #  ifndef alias_table_defined
  561     /* Just a dummy entry, to avoid a C syntax error.  */
  562     { "", "" }
  563 #  endif
  564   };
  565 
  566 # endif
  567 
  568 #else
  569 
  570 /* On these platforms, we use a mapping from locale name to GNU canonical
  571    encoding name.  */
  572 
  573 struct table_entry
  574 {
  575   const char locale[17+1];
  576   const char canonical[11+1];
  577 };
  578 
  579 /* Table of platform-dependent mappings, sorted in ascending order.  */
  580 static const struct table_entry locale_table[] =
  581   {
  582 # if defined __FreeBSD__                                    /* FreeBSD 4.2 */
  583     { "cs_CZ.ISO_8859-2",  "ISO-8859-2" },
  584     { "da_DK.DIS_8859-15", "ISO-8859-15" },
  585     { "da_DK.ISO_8859-1",  "ISO-8859-1" },
  586     { "de_AT.DIS_8859-15", "ISO-8859-15" },
  587     { "de_AT.ISO_8859-1",  "ISO-8859-1" },
  588     { "de_CH.DIS_8859-15", "ISO-8859-15" },
  589     { "de_CH.ISO_8859-1",  "ISO-8859-1" },
  590     { "de_DE.DIS_8859-15", "ISO-8859-15" },
  591     { "de_DE.ISO_8859-1",  "ISO-8859-1" },
  592     { "en_AU.DIS_8859-15", "ISO-8859-15" },
  593     { "en_AU.ISO_8859-1",  "ISO-8859-1" },
  594     { "en_CA.DIS_8859-15", "ISO-8859-15" },
  595     { "en_CA.ISO_8859-1",  "ISO-8859-1" },
  596     { "en_GB.DIS_8859-15", "ISO-8859-15" },
  597     { "en_GB.ISO_8859-1",  "ISO-8859-1" },
  598     { "en_US.DIS_8859-15", "ISO-8859-15" },
  599     { "en_US.ISO_8859-1",  "ISO-8859-1" },
  600     { "es_ES.DIS_8859-15", "ISO-8859-15" },
  601     { "es_ES.ISO_8859-1",  "ISO-8859-1" },
  602     { "fi_FI.DIS_8859-15", "ISO-8859-15" },
  603     { "fi_FI.ISO_8859-1",  "ISO-8859-1" },
  604     { "fr_BE.DIS_8859-15", "ISO-8859-15" },
  605     { "fr_BE.ISO_8859-1",  "ISO-8859-1" },
  606     { "fr_CA.DIS_8859-15", "ISO-8859-15" },
  607     { "fr_CA.ISO_8859-1",  "ISO-8859-1" },
  608     { "fr_CH.DIS_8859-15", "ISO-8859-15" },
  609     { "fr_CH.ISO_8859-1",  "ISO-8859-1" },
  610     { "fr_FR.DIS_8859-15", "ISO-8859-15" },
  611     { "fr_FR.ISO_8859-1",  "ISO-8859-1" },
  612     { "hr_HR.ISO_8859-2",  "ISO-8859-2" },
  613     { "hu_HU.ISO_8859-2",  "ISO-8859-2" },
  614     { "is_IS.DIS_8859-15", "ISO-8859-15" },
  615     { "is_IS.ISO_8859-1",  "ISO-8859-1" },
  616     { "it_CH.DIS_8859-15", "ISO-8859-15" },
  617     { "it_CH.ISO_8859-1",  "ISO-8859-1" },
  618     { "it_IT.DIS_8859-15", "ISO-8859-15" },
  619     { "it_IT.ISO_8859-1",  "ISO-8859-1" },
  620     { "ja_JP.EUC",         "EUC-JP" },
  621     { "ja_JP.SJIS",        "SHIFT_JIS" },
  622     { "ja_JP.Shift_JIS",   "SHIFT_JIS" },
  623     { "ko_KR.EUC",         "EUC-KR" },
  624     { "la_LN.ASCII",       "ASCII" },
  625     { "la_LN.DIS_8859-15", "ISO-8859-15" },
  626     { "la_LN.ISO_8859-1",  "ISO-8859-1" },
  627     { "la_LN.ISO_8859-2",  "ISO-8859-2" },
  628     { "la_LN.ISO_8859-4",  "ISO-8859-4" },
  629     { "lt_LN.ASCII",       "ASCII" },
  630     { "lt_LN.DIS_8859-15", "ISO-8859-15" },
  631     { "lt_LN.ISO_8859-1",  "ISO-8859-1" },
  632     { "lt_LN.ISO_8859-2",  "ISO-8859-2" },
  633     { "lt_LT.ISO_8859-4",  "ISO-8859-4" },
  634     { "nl_BE.DIS_8859-15", "ISO-8859-15" },
  635     { "nl_BE.ISO_8859-1",  "ISO-8859-1" },
  636     { "nl_NL.DIS_8859-15", "ISO-8859-15" },
  637     { "nl_NL.ISO_8859-1",  "ISO-8859-1" },
  638     { "no_NO.DIS_8859-15", "ISO-8859-15" },
  639     { "no_NO.ISO_8859-1",  "ISO-8859-1" },
  640     { "pl_PL.ISO_8859-2",  "ISO-8859-2" },
  641     { "pt_PT.DIS_8859-15", "ISO-8859-15" },
  642     { "pt_PT.ISO_8859-1",  "ISO-8859-1" },
  643     { "ru_RU.CP866",       "CP866" },
  644     { "ru_RU.ISO_8859-5",  "ISO-8859-5" },
  645     { "ru_RU.KOI8-R",      "KOI8-R" },
  646     { "ru_SU.CP866",       "CP866" },
  647     { "ru_SU.ISO_8859-5",  "ISO-8859-5" },
  648     { "ru_SU.KOI8-R",      "KOI8-R" },
  649     { "sl_SI.ISO_8859-2",  "ISO-8859-2" },
  650     { "sv_SE.DIS_8859-15", "ISO-8859-15" },
  651     { "sv_SE.ISO_8859-1",  "ISO-8859-1" },
  652     { "uk_UA.KOI8-U",      "KOI8-U" },
  653     { "zh_CN.EUC",         "GB2312" },
  654     { "zh_TW.BIG5",        "BIG5" },
  655     { "zh_TW.Big5",        "BIG5" }
  656 #  define locale_table_defined
  657 # endif
  658 # if defined __DJGPP__                                      /* DOS / DJGPP 2.03 */
  659     /* The encodings given here may not all be correct.
  660        If you find that the encoding given for your language and
  661        country is not the one your DOS machine actually uses, just
  662        correct it in this file, and send a mail to
  663        Juan Manuel Guerrero <juan.guerrero@gmx.de>
  664        and <bug-gnulib@gnu.org>.  */
  665     { "C",     "ASCII" },
  666     { "ar",    "CP864" },
  667     { "ar_AE", "CP864" },
  668     { "ar_DZ", "CP864" },
  669     { "ar_EG", "CP864" },
  670     { "ar_IQ", "CP864" },
  671     { "ar_IR", "CP864" },
  672     { "ar_JO", "CP864" },
  673     { "ar_KW", "CP864" },
  674     { "ar_MA", "CP864" },
  675     { "ar_OM", "CP864" },
  676     { "ar_QA", "CP864" },
  677     { "ar_SA", "CP864" },
  678     { "ar_SY", "CP864" },
  679     { "be",    "CP866" },
  680     { "be_BE", "CP866" },
  681     { "bg",    "CP866" }, /* not CP855 ?? */
  682     { "bg_BG", "CP866" }, /* not CP855 ?? */
  683     { "ca",    "CP850" },
  684     { "ca_ES", "CP850" },
  685     { "cs",    "CP852" },
  686     { "cs_CZ", "CP852" },
  687     { "da",    "CP865" }, /* not CP850 ?? */
  688     { "da_DK", "CP865" }, /* not CP850 ?? */
  689     { "de",    "CP850" },
  690     { "de_AT", "CP850" },
  691     { "de_CH", "CP850" },
  692     { "de_DE", "CP850" },
  693     { "el",    "CP869" },
  694     { "el_GR", "CP869" },
  695     { "en",    "CP850" },
  696     { "en_AU", "CP850" }, /* not CP437 ?? */
  697     { "en_CA", "CP850" },
  698     { "en_GB", "CP850" },
  699     { "en_NZ", "CP437" },
  700     { "en_US", "CP437" },
  701     { "en_ZA", "CP850" }, /* not CP437 ?? */
  702     { "eo",    "CP850" },
  703     { "eo_EO", "CP850" },
  704     { "es",    "CP850" },
  705     { "es_AR", "CP850" },
  706     { "es_BO", "CP850" },
  707     { "es_CL", "CP850" },
  708     { "es_CO", "CP850" },
  709     { "es_CR", "CP850" },
  710     { "es_CU", "CP850" },
  711     { "es_DO", "CP850" },
  712     { "es_EC", "CP850" },
  713     { "es_ES", "CP850" },
  714     { "es_GT", "CP850" },
  715     { "es_HN", "CP850" },
  716     { "es_MX", "CP850" },
  717     { "es_NI", "CP850" },
  718     { "es_PA", "CP850" },
  719     { "es_PE", "CP850" },
  720     { "es_PY", "CP850" },
  721     { "es_SV", "CP850" },
  722     { "es_UY", "CP850" },
  723     { "es_VE", "CP850" },
  724     { "et",    "CP850" },
  725     { "et_EE", "CP850" },
  726     { "eu",    "CP850" },
  727     { "eu_ES", "CP850" },
  728     { "fi",    "CP850" },
  729     { "fi_FI", "CP850" },
  730     { "fr",    "CP850" },
  731     { "fr_BE", "CP850" },
  732     { "fr_CA", "CP850" },
  733     { "fr_CH", "CP850" },
  734     { "fr_FR", "CP850" },
  735     { "ga",    "CP850" },
  736     { "ga_IE", "CP850" },
  737     { "gd",    "CP850" },
  738     { "gd_GB", "CP850" },
  739     { "gl",    "CP850" },
  740     { "gl_ES", "CP850" },
  741     { "he",    "CP862" },
  742     { "he_IL", "CP862" },
  743     { "hr",    "CP852" },
  744     { "hr_HR", "CP852" },
  745     { "hu",    "CP852" },
  746     { "hu_HU", "CP852" },
  747     { "id",    "CP850" }, /* not CP437 ?? */
  748     { "id_ID", "CP850" }, /* not CP437 ?? */
  749     { "is",    "CP861" }, /* not CP850 ?? */
  750     { "is_IS", "CP861" }, /* not CP850 ?? */
  751     { "it",    "CP850" },
  752     { "it_CH", "CP850" },
  753     { "it_IT", "CP850" },
  754     { "ja",    "CP932" },
  755     { "ja_JP", "CP932" },
  756     { "kr",    "CP949" }, /* not CP934 ?? */
  757     { "kr_KR", "CP949" }, /* not CP934 ?? */
  758     { "lt",    "CP775" },
  759     { "lt_LT", "CP775" },
  760     { "lv",    "CP775" },
  761     { "lv_LV", "CP775" },
  762     { "mk",    "CP866" }, /* not CP855 ?? */
  763     { "mk_MK", "CP866" }, /* not CP855 ?? */
  764     { "mt",    "CP850" },
  765     { "mt_MT", "CP850" },
  766     { "nb",    "CP865" }, /* not CP850 ?? */
  767     { "nb_NO", "CP865" }, /* not CP850 ?? */
  768     { "nl",    "CP850" },
  769     { "nl_BE", "CP850" },
  770     { "nl_NL", "CP850" },
  771     { "nn",    "CP865" }, /* not CP850 ?? */
  772     { "nn_NO", "CP865" }, /* not CP850 ?? */
  773     { "no",    "CP865" }, /* not CP850 ?? */
  774     { "no_NO", "CP865" }, /* not CP850 ?? */
  775     { "pl",    "CP852" },
  776     { "pl_PL", "CP852" },
  777     { "pt",    "CP850" },
  778     { "pt_BR", "CP850" },
  779     { "pt_PT", "CP850" },
  780     { "ro",    "CP852" },
  781     { "ro_RO", "CP852" },
  782     { "ru",    "CP866" },
  783     { "ru_RU", "CP866" },
  784     { "sk",    "CP852" },
  785     { "sk_SK", "CP852" },
  786     { "sl",    "CP852" },
  787     { "sl_SI", "CP852" },
  788     { "sq",    "CP852" },
  789     { "sq_AL", "CP852" },
  790     { "sr",    "CP852" }, /* CP852 or CP866 or CP855 ?? */
  791     { "sr_CS", "CP852" }, /* CP852 or CP866 or CP855 ?? */
  792     { "sr_YU", "CP852" }, /* CP852 or CP866 or CP855 ?? */
  793     { "sv",    "CP850" },
  794     { "sv_SE", "CP850" },
  795     { "th",    "CP874" },
  796     { "th_TH", "CP874" },
  797     { "tr",    "CP857" },
  798     { "tr_TR", "CP857" },
  799     { "uk",    "CP1125" },
  800     { "uk_UA", "CP1125" },
  801     { "zh_CN", "GBK" },
  802     { "zh_TW", "CP950" } /* not CP938 ?? */
  803 #  define locale_table_defined
  804 # endif
  805 # ifndef locale_table_defined
  806     /* Just a dummy entry, to avoid a C syntax error.  */
  807     { "", "" }
  808 # endif
  809   };
  810 
  811 #endif
  812 
  813 
  814 /* Determine the current locale's character encoding, and canonicalize it
  815    into one of the canonical names listed in localcharset.h.
  816    The result must not be freed; it is statically allocated.
  817    If the canonical name cannot be determined, the result is a non-canonical
  818    name.  */
  819 
  820 #ifdef STATIC
  821 STATIC
  822 #endif
  823 const char *
  824 locale_charset (void)
  825 {
  826   const char *codeset;
  827 
  828 #if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2
  829 
  830 # if HAVE_LANGINFO_CODESET
  831 
  832   /* Most systems support nl_langinfo (CODESET) nowadays.  */
  833   codeset = nl_langinfo (CODESET);
  834 
  835 #  ifdef __CYGWIN__
  836   /* Cygwin < 1.7 does not have locales.  nl_langinfo (CODESET) always
  837      returns "US-ASCII".  Return the suffix of the locale name from the
  838      environment variables (if present) or the codepage as a number.  */
  839   if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0)
  840     {
  841       const char *locale;
  842       static char buf[2 + 10 + 1];
  843 
  844       locale = getenv ("LC_ALL");
  845       if (locale == NULL || locale[0] == '\0')
  846         {
  847           locale = getenv ("LC_CTYPE");
  848           if (locale == NULL || locale[0] == '\0')
  849             locale = getenv ("LANG");
  850         }
  851       if (locale != NULL && locale[0] != '\0')
  852         {
  853           /* If the locale name contains an encoding after the dot, return
  854              it.  */
  855           const char *dot = strchr (locale, '.');
  856 
  857           if (dot != NULL)
  858             {
  859               const char *modifier;
  860 
  861               dot++;
  862               /* Look for the possible @... trailer and remove it, if any.  */
  863               modifier = strchr (dot, '@');
  864               if (modifier == NULL)
  865                 return dot;
  866               if (modifier - dot < sizeof (buf))
  867                 {
  868                   memcpy (buf, dot, modifier - dot);
  869                   buf [modifier - dot] = '\0';
  870                   return buf;
  871                 }
  872             }
  873         }
  874 
  875       /* The Windows API has a function returning the locale's codepage as a
  876          number: GetACP().  This encoding is used by Cygwin, unless the user
  877          has set the environment variable CYGWIN=codepage:oem (which very few
  878          people do).
  879          Output directed to console windows needs to be converted (to
  880          GetOEMCP() if the console is using a raster font, or to
  881          GetConsoleOutputCP() if it is using a TrueType font).  Cygwin does
  882          this conversion transparently (see winsup/cygwin/fhandler_console.cc),
  883          converting to GetConsoleOutputCP().  This leads to correct results,
  884          except when SetConsoleOutputCP has been called and a raster font is
  885          in use.  */
  886       sprintf (buf, "CP%u", GetACP ());
  887       codeset = buf;
  888     }
  889 #  endif
  890 
  891   if (codeset == NULL)
  892     /* The canonical name cannot be determined.  */
  893     codeset = "";
  894 
  895 # elif defined WINDOWS_NATIVE
  896 
  897   static char buf[2 + 10 + 1];
  898 
  899   /* The Windows API has a function returning the locale's codepage as
  900      a number, but the value doesn't change according to what the
  901      'setlocale' call specified.  So we use it as a last resort, in
  902      case the string returned by 'setlocale' doesn't specify the
  903      codepage.  */
  904   char *current_locale = setlocale (LC_ALL, NULL);
  905   char *pdot;
  906 
  907   /* If they set different locales for different categories,
  908      'setlocale' will return a semi-colon separated list of locale
  909      values.  To make sure we use the correct one, we choose LC_CTYPE.  */
  910   if (strchr (current_locale, ';'))
  911     current_locale = setlocale (LC_CTYPE, NULL);
  912 
  913   pdot = strrchr (current_locale, '.');
  914   if (pdot && 2 + strlen (pdot + 1) + 1 <= sizeof (buf))
  915     sprintf (buf, "CP%s", pdot + 1);
  916   else
  917     {
  918       /* The Windows API has a function returning the locale's codepage as a
  919         number: GetACP().
  920         When the output goes to a console window, it needs to be provided in
  921         GetOEMCP() encoding if the console is using a raster font, or in
  922         GetConsoleOutputCP() encoding if it is using a TrueType font.
  923         But in GUI programs and for output sent to files and pipes, GetACP()
  924         encoding is the best bet.  */
  925       sprintf (buf, "CP%u", GetACP ());
  926     }
  927   /* For a locale name such as "French_France.65001", in Windows 10,
  928      setlocale now returns "French_France.utf8" instead.  */
  929   if (strcmp (buf + 2, "65001") == 0 || strcmp (buf + 2, "utf8") == 0)
  930     codeset = "UTF-8";
  931   else
  932     codeset = buf;
  933 
  934 # elif defined OS2
  935 
  936   const char *locale;
  937   static char buf[2 + 10 + 1];
  938   ULONG cp[3];
  939   ULONG cplen;
  940 
  941   codeset = NULL;
  942 
  943   /* Allow user to override the codeset, as set in the operating system,
  944      with standard language environment variables.  */
  945   locale = getenv ("LC_ALL");
  946   if (locale == NULL || locale[0] == '\0')
  947     {
  948       locale = getenv ("LC_CTYPE");
  949       if (locale == NULL || locale[0] == '\0')
  950         locale = getenv ("LANG");
  951     }
  952   if (locale != NULL && locale[0] != '\0')
  953     {
  954       /* If the locale name contains an encoding after the dot, return it.  */
  955       const char *dot = strchr (locale, '.');
  956 
  957       if (dot != NULL)
  958         {
  959           const char *modifier;
  960 
  961           dot++;
  962           /* Look for the possible @... trailer and remove it, if any.  */
  963           modifier = strchr (dot, '@');
  964           if (modifier == NULL)
  965             return dot;
  966           if (modifier - dot < sizeof (buf))
  967             {
  968               memcpy (buf, dot, modifier - dot);
  969               buf [modifier - dot] = '\0';
  970               return buf;
  971             }
  972         }
  973 
  974       /* For the POSIX locale, don't use the system's codepage.  */
  975       if (strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0)
  976         codeset = "";
  977     }
  978 
  979   if (codeset == NULL)
  980     {
  981       /* OS/2 has a function returning the locale's codepage as a number.  */
  982       if (DosQueryCp (sizeof (cp), cp, &cplen))
  983         codeset = "";
  984       else
  985         {
  986           sprintf (buf, "CP%u", cp[0]);
  987           codeset = buf;
  988         }
  989     }
  990 
  991 # else
  992 
  993 #  error "Add code for other platforms here."
  994 
  995 # endif
  996 
  997   /* Resolve alias.  */
  998   {
  999 # ifdef alias_table_defined
 1000     /* On some platforms, UTF-8 locales are the most frequently used ones.
 1001        Speed up the common case and slow down the less common cases by
 1002        testing for this case first.  */
 1003 #  if defined __OpenBSD__ || (defined __APPLE__ && defined __MACH__) || defined __sun || defined __CYGWIN__
 1004     if (strcmp (codeset, "UTF-8") == 0)
 1005       goto done_table_lookup;
 1006     else
 1007 #  endif
 1008       {
 1009         const struct table_entry * const table = alias_table;
 1010         size_t const table_size =
 1011           sizeof (alias_table) / sizeof (struct table_entry);
 1012         /* The table is sorted.  Perform a binary search.  */
 1013         size_t hi = table_size;
 1014         size_t lo = 0;
 1015         while (lo < hi)
 1016           {
 1017             /* Invariant:
 1018                for i < lo, strcmp (table[i].alias, codeset) < 0,
 1019                for i >= hi, strcmp (table[i].alias, codeset) > 0.  */
 1020             size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
 1021             int cmp = strcmp (table[mid].alias, codeset);
 1022             if (cmp < 0)
 1023               lo = mid + 1;
 1024             else if (cmp > 0)
 1025               hi = mid;
 1026             else
 1027               {
 1028                 /* Found an i with
 1029                      strcmp (table[i].alias, codeset) == 0.  */
 1030                 codeset = table[mid].canonical;
 1031                 goto done_table_lookup;
 1032               }
 1033           }
 1034       }
 1035     if (0)
 1036       done_table_lookup: ;
 1037     else
 1038 # endif
 1039       {
 1040         /* Did not find it in the table.  */
 1041         /* On Mac OS X, all modern locales use the UTF-8 encoding.
 1042            BeOS and Haiku have a single locale, and it has UTF-8 encoding.  */
 1043 # if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__
 1044         codeset = "UTF-8";
 1045 # else
 1046         /* Don't return an empty string.  GNU libc and GNU libiconv interpret
 1047            the empty string as denoting "the locale's character encoding",
 1048            thus GNU libiconv would call this function a second time.  */
 1049         if (codeset[0] == '\0')
 1050           codeset = "ASCII";
 1051 # endif
 1052       }
 1053   }
 1054 
 1055 #else
 1056 
 1057   /* On old systems which lack it, use setlocale or getenv.  */
 1058   const char *locale = NULL;
 1059 
 1060   /* But most old systems don't have a complete set of locales.  Some
 1061      (like DJGPP) have only the C locale.  Therefore we don't use setlocale
 1062      here; it would return "C" when it doesn't support the locale name the
 1063      user has set.  */
 1064 # if 0
 1065   locale = setlocale (LC_CTYPE, NULL);
 1066 # endif
 1067   if (locale == NULL || locale[0] == '\0')
 1068     {
 1069       locale = getenv ("LC_ALL");
 1070       if (locale == NULL || locale[0] == '\0')
 1071         {
 1072           locale = getenv ("LC_CTYPE");
 1073           if (locale == NULL || locale[0] == '\0')
 1074             locale = getenv ("LANG");
 1075             if (locale == NULL)
 1076               locale = "";
 1077         }
 1078     }
 1079 
 1080   /* Map locale name to canonical encoding name.  */
 1081   {
 1082 # ifdef locale_table_defined
 1083     const struct table_entry * const table = locale_table;
 1084     size_t const table_size =
 1085       sizeof (locale_table) / sizeof (struct table_entry);
 1086     /* The table is sorted.  Perform a binary search.  */
 1087     size_t hi = table_size;
 1088     size_t lo = 0;
 1089     while (lo < hi)
 1090       {
 1091         /* Invariant:
 1092            for i < lo, strcmp (table[i].locale, locale) < 0,
 1093            for i >= hi, strcmp (table[i].locale, locale) > 0.  */
 1094         size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
 1095         int cmp = strcmp (table[mid].locale, locale);
 1096         if (cmp < 0)
 1097           lo = mid + 1;
 1098         else if (cmp > 0)
 1099           hi = mid;
 1100         else
 1101           {
 1102             /* Found an i with
 1103                  strcmp (table[i].locale, locale) == 0.  */
 1104             codeset = table[mid].canonical;
 1105             goto done_table_lookup;
 1106           }
 1107       }
 1108     if (0)
 1109       done_table_lookup: ;
 1110     else
 1111 # endif
 1112       {
 1113         /* Did not find it in the table.  */
 1114         /* On Mac OS X, all modern locales use the UTF-8 encoding.
 1115            BeOS and Haiku have a single locale, and it has UTF-8 encoding.  */
 1116 # if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__
 1117         codeset = "UTF-8";
 1118 # else
 1119         /* The canonical name cannot be determined.  */
 1120         /* Don't return an empty string.  GNU libc and GNU libiconv interpret
 1121            the empty string as denoting "the locale's character encoding",
 1122            thus GNU libiconv would call this function a second time.  */
 1123         codeset = "ASCII";
 1124 # endif
 1125       }
 1126   }
 1127 
 1128 #endif
 1129 
 1130 #ifdef DARWIN7
 1131   /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8"
 1132      (the default codeset) does not work when MB_CUR_MAX is 1.  */
 1133   if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL)) <= 1)
 1134     codeset = "ASCII";
 1135 #endif
 1136 
 1137   return codeset;
 1138 }