gsasl  1.10.0
About: GNU SASL is an implementation of the Simple Authentication and Security Layer (SASL). Development version.
  Fossies Dox: gsasl-1.10.0.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

localcharset.c
Go to the documentation of this file.
1 /* Determine a canonical name for the current locale's character encoding.
2 
3  Copyright (C) 2000-2006, 2008-2021 Free Software Foundation, Inc.
4 
5  This program is free software; you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published by
7  the Free Software Foundation; either version 3, or (at your option)
8  any later version.
9 
10  This program is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  GNU General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License along
16  with this program; if not, see <https://www.gnu.org/licenses/>. */
17 
18 /* Written by Bruno Haible <bruno@clisp.org>. */
19 
20 #include <config.h>
21 
22 /* Specification. */
23 #include "localcharset.h"
24 
25 #include <stddef.h>
26 #include <stdio.h>
27 #include <string.h>
28 #include <stdlib.h>
29 
30 #if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET
31 # define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */
32 #endif
33 
34 #if defined _WIN32 && !defined __CYGWIN__
35 # define WINDOWS_NATIVE
36 # include <locale.h>
37 #endif
38 
39 #if defined __EMX__
40 /* Assume EMX program runs on OS/2, even if compiled under DOS. */
41 # ifndef OS2
42 # define OS2
43 # endif
44 #endif
45 
46 #if !defined WINDOWS_NATIVE
47 # if HAVE_LANGINFO_CODESET
48 # include <langinfo.h>
49 # else
50 # if 0 /* see comment regarding use of setlocale(), below */
51 # include <locale.h>
52 # endif
53 # endif
54 # ifdef __CYGWIN__
55 # define WIN32_LEAN_AND_MEAN
56 # include <windows.h>
57 # endif
58 #elif defined WINDOWS_NATIVE
59 # define WIN32_LEAN_AND_MEAN
60 # include <windows.h>
61  /* For the use of setlocale() below, the Gnulib override in setlocale.c is
62  not needed; see the platform lists in setlocale_null.m4. */
63 # undef setlocale
64 #endif
65 #if defined OS2
66 # define INCL_DOS
67 # include <os2.h>
68 #endif
69 
70 /* For MB_CUR_MAX_L */
71 #if defined DARWIN7
72 # include <xlocale.h>
73 #endif
74 
75 
76 #if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2
77 
78 /* On these platforms, we use a mapping from non-canonical encoding name
79  to GNU canonical encoding name. */
80 
81 /* With glibc-2.1 or newer, we don't need any canonicalization,
82  because glibc has iconv and both glibc and libiconv support all
83  GNU canonical names directly. */
84 # if !((defined __GNU_LIBRARY__ && __GLIBC__ >= 2) || defined __UCLIBC__)
85 
86 struct table_entry
87 {
88  const char alias[11+1];
89  const char canonical[11+1];
90 };
91 
92 /* Table of platform-dependent mappings, sorted in ascending order. */
93 static const struct table_entry alias_table[] =
94  {
95 # if defined __FreeBSD__ /* FreeBSD */
96  /*{ "ARMSCII-8", "ARMSCII-8" },*/
97  { "Big5", "BIG5" },
98  { "C", "ASCII" },
99  /*{ "CP1131", "CP1131" },*/
100  /*{ "CP1251", "CP1251" },*/
101  /*{ "CP866", "CP866" },*/
102  /*{ "GB18030", "GB18030" },*/
103  /*{ "GB2312", "GB2312" },*/
104  /*{ "GBK", "GBK" },*/
105  /*{ "ISCII-DEV", "?" },*/
106  { "ISO8859-1", "ISO-8859-1" },
107  { "ISO8859-13", "ISO-8859-13" },
108  { "ISO8859-15", "ISO-8859-15" },
109  { "ISO8859-2", "ISO-8859-2" },
110  { "ISO8859-5", "ISO-8859-5" },
111  { "ISO8859-7", "ISO-8859-7" },
112  { "ISO8859-9", "ISO-8859-9" },
113  /*{ "KOI8-R", "KOI8-R" },*/
114  /*{ "KOI8-U", "KOI8-U" },*/
115  { "SJIS", "SHIFT_JIS" },
116  { "US-ASCII", "ASCII" },
117  { "eucCN", "GB2312" },
118  { "eucJP", "EUC-JP" },
119  { "eucKR", "EUC-KR" }
120 # define alias_table_defined
121 # endif
122 # if defined __NetBSD__ /* NetBSD */
123  { "646", "ASCII" },
124  /*{ "ARMSCII-8", "ARMSCII-8" },*/
125  /*{ "BIG5", "BIG5" },*/
126  { "Big5-HKSCS", "BIG5-HKSCS" },
127  /*{ "CP1251", "CP1251" },*/
128  /*{ "CP866", "CP866" },*/
129  /*{ "GB18030", "GB18030" },*/
130  /*{ "GB2312", "GB2312" },*/
131  { "ISO8859-1", "ISO-8859-1" },
132  { "ISO8859-13", "ISO-8859-13" },
133  { "ISO8859-15", "ISO-8859-15" },
134  { "ISO8859-2", "ISO-8859-2" },
135  { "ISO8859-4", "ISO-8859-4" },
136  { "ISO8859-5", "ISO-8859-5" },
137  { "ISO8859-7", "ISO-8859-7" },
138  /*{ "KOI8-R", "KOI8-R" },*/
139  /*{ "KOI8-U", "KOI8-U" },*/
140  /*{ "PT154", "PT154" },*/
141  { "SJIS", "SHIFT_JIS" },
142  { "eucCN", "GB2312" },
143  { "eucJP", "EUC-JP" },
144  { "eucKR", "EUC-KR" },
145  { "eucTW", "EUC-TW" }
146 # define alias_table_defined
147 # endif
148 # if defined __OpenBSD__ /* OpenBSD */
149  { "646", "ASCII" },
150  { "ISO8859-1", "ISO-8859-1" },
151  { "ISO8859-13", "ISO-8859-13" },
152  { "ISO8859-15", "ISO-8859-15" },
153  { "ISO8859-2", "ISO-8859-2" },
154  { "ISO8859-4", "ISO-8859-4" },
155  { "ISO8859-5", "ISO-8859-5" },
156  { "ISO8859-7", "ISO-8859-7" },
157  { "US-ASCII", "ASCII" }
158 # define alias_table_defined
159 # endif
160 # if defined __APPLE__ && defined __MACH__ /* Mac OS X */
161  /* Darwin 7.5 has nl_langinfo(CODESET), but sometimes its value is
162  useless:
163  - It returns the empty string when LANG is set to a locale of the
164  form ll_CC, although ll_CC/LC_CTYPE is a symlink to an UTF-8
165  LC_CTYPE file.
166  - The environment variables LANG, LC_CTYPE, LC_ALL are not set by
167  the system; nl_langinfo(CODESET) returns "US-ASCII" in this case.
168  - The documentation says:
169  "... all code that calls BSD system routines should ensure
170  that the const *char parameters of these routines are in UTF-8
171  encoding. All BSD system functions expect their string
172  parameters to be in UTF-8 encoding and nothing else."
173  It also says
174  "An additional caveat is that string parameters for files,
175  paths, and other file-system entities must be in canonical
176  UTF-8. In a canonical UTF-8 Unicode string, all decomposable
177  characters are decomposed ..."
178  but this is not true: You can pass non-decomposed UTF-8 strings
179  to file system functions, and it is the OS which will convert
180  them to decomposed UTF-8 before accessing the file system.
181  - The Apple Terminal application displays UTF-8 by default.
182  - However, other applications are free to use different encodings:
183  - xterm uses ISO-8859-1 by default.
184  - TextEdit uses MacRoman by default.
185  We prefer UTF-8 over decomposed UTF-8-MAC because one should
186  minimize the use of decomposed Unicode. Unfortunately, through the
187  Darwin file system, decomposed UTF-8 strings are leaked into user
188  space nevertheless.
189  Then there are also the locales with encodings other than US-ASCII
190  and UTF-8. These locales can be occasionally useful to users (e.g.
191  when grepping through ISO-8859-1 encoded text files), when all their
192  file names are in US-ASCII.
193  */
194  { "ARMSCII-8", "ARMSCII-8" },
195  { "Big5", "BIG5" },
196  { "Big5HKSCS", "BIG5-HKSCS" },
197  { "CP1131", "CP1131" },
198  { "CP1251", "CP1251" },
199  { "CP866", "CP866" },
200  { "CP949", "CP949" },
201  { "GB18030", "GB18030" },
202  { "GB2312", "GB2312" },
203  { "GBK", "GBK" },
204  /*{ "ISCII-DEV", "?" },*/
205  { "ISO8859-1", "ISO-8859-1" },
206  { "ISO8859-13", "ISO-8859-13" },
207  { "ISO8859-15", "ISO-8859-15" },
208  { "ISO8859-2", "ISO-8859-2" },
209  { "ISO8859-4", "ISO-8859-4" },
210  { "ISO8859-5", "ISO-8859-5" },
211  { "ISO8859-7", "ISO-8859-7" },
212  { "ISO8859-9", "ISO-8859-9" },
213  { "KOI8-R", "KOI8-R" },
214  { "KOI8-U", "KOI8-U" },
215  { "PT154", "PT154" },
216  { "SJIS", "SHIFT_JIS" },
217  { "eucCN", "GB2312" },
218  { "eucJP", "EUC-JP" },
219  { "eucKR", "EUC-KR" }
220 # define alias_table_defined
221 # endif
222 # if defined _AIX /* AIX */
223  /*{ "GBK", "GBK" },*/
224  { "IBM-1046", "CP1046" },
225  { "IBM-1124", "CP1124" },
226  { "IBM-1129", "CP1129" },
227  { "IBM-1252", "CP1252" },
228  { "IBM-850", "CP850" },
229  { "IBM-856", "CP856" },
230  { "IBM-921", "ISO-8859-13" },
231  { "IBM-922", "CP922" },
232  { "IBM-932", "CP932" },
233  { "IBM-943", "CP943" },
234  { "IBM-eucCN", "GB2312" },
235  { "IBM-eucJP", "EUC-JP" },
236  { "IBM-eucKR", "EUC-KR" },
237  { "IBM-eucTW", "EUC-TW" },
238  { "ISO8859-1", "ISO-8859-1" },
239  { "ISO8859-15", "ISO-8859-15" },
240  { "ISO8859-2", "ISO-8859-2" },
241  { "ISO8859-5", "ISO-8859-5" },
242  { "ISO8859-6", "ISO-8859-6" },
243  { "ISO8859-7", "ISO-8859-7" },
244  { "ISO8859-8", "ISO-8859-8" },
245  { "ISO8859-9", "ISO-8859-9" },
246  { "TIS-620", "TIS-620" },
247  /*{ "UTF-8", "UTF-8" },*/
248  { "big5", "BIG5" }
249 # define alias_table_defined
250 # endif
251 # if defined __hpux /* HP-UX */
252  { "SJIS", "SHIFT_JIS" },
253  { "arabic8", "HP-ARABIC8" },
254  { "big5", "BIG5" },
255  { "cp1251", "CP1251" },
256  { "eucJP", "EUC-JP" },
257  { "eucKR", "EUC-KR" },
258  { "eucTW", "EUC-TW" },
259  { "gb18030", "GB18030" },
260  { "greek8", "HP-GREEK8" },
261  { "hebrew8", "HP-HEBREW8" },
262  { "hkbig5", "BIG5-HKSCS" },
263  { "hp15CN", "GB2312" },
264  { "iso88591", "ISO-8859-1" },
265  { "iso885913", "ISO-8859-13" },
266  { "iso885915", "ISO-8859-15" },
267  { "iso88592", "ISO-8859-2" },
268  { "iso88594", "ISO-8859-4" },
269  { "iso88595", "ISO-8859-5" },
270  { "iso88596", "ISO-8859-6" },
271  { "iso88597", "ISO-8859-7" },
272  { "iso88598", "ISO-8859-8" },
273  { "iso88599", "ISO-8859-9" },
274  { "kana8", "HP-KANA8" },
275  { "koi8r", "KOI8-R" },
276  { "roman8", "HP-ROMAN8" },
277  { "tis620", "TIS-620" },
278  { "turkish8", "HP-TURKISH8" },
279  { "utf8", "UTF-8" }
280 # define alias_table_defined
281 # endif
282 # if defined __sgi /* IRIX */
283  { "ISO8859-1", "ISO-8859-1" },
284  { "ISO8859-15", "ISO-8859-15" },
285  { "ISO8859-2", "ISO-8859-2" },
286  { "ISO8859-5", "ISO-8859-5" },
287  { "ISO8859-7", "ISO-8859-7" },
288  { "ISO8859-9", "ISO-8859-9" },
289  { "eucCN", "GB2312" },
290  { "eucJP", "EUC-JP" },
291  { "eucKR", "EUC-KR" },
292  { "eucTW", "EUC-TW" }
293 # define alias_table_defined
294 # endif
295 # if defined __osf__ /* OSF/1 */
296  /*{ "GBK", "GBK" },*/
297  { "ISO8859-1", "ISO-8859-1" },
298  { "ISO8859-15", "ISO-8859-15" },
299  { "ISO8859-2", "ISO-8859-2" },
300  { "ISO8859-4", "ISO-8859-4" },
301  { "ISO8859-5", "ISO-8859-5" },
302  { "ISO8859-7", "ISO-8859-7" },
303  { "ISO8859-8", "ISO-8859-8" },
304  { "ISO8859-9", "ISO-8859-9" },
305  { "KSC5601", "CP949" },
306  { "SJIS", "SHIFT_JIS" },
307  { "TACTIS", "TIS-620" },
308  /*{ "UTF-8", "UTF-8" },*/
309  { "big5", "BIG5" },
310  { "cp850", "CP850" },
311  { "dechanyu", "DEC-HANYU" },
312  { "dechanzi", "GB2312" },
313  { "deckanji", "DEC-KANJI" },
314  { "deckorean", "EUC-KR" },
315  { "eucJP", "EUC-JP" },
316  { "eucKR", "EUC-KR" },
317  { "eucTW", "EUC-TW" },
318  { "sdeckanji", "EUC-JP" }
319 # define alias_table_defined
320 # endif
321 # if defined __sun /* Solaris */
322  { "5601", "EUC-KR" },
323  { "646", "ASCII" },
324  /*{ "BIG5", "BIG5" },*/
325  { "Big5-HKSCS", "BIG5-HKSCS" },
326  { "GB18030", "GB18030" },
327  /*{ "GBK", "GBK" },*/
328  { "ISO8859-1", "ISO-8859-1" },
329  { "ISO8859-11", "TIS-620" },
330  { "ISO8859-13", "ISO-8859-13" },
331  { "ISO8859-15", "ISO-8859-15" },
332  { "ISO8859-2", "ISO-8859-2" },
333  { "ISO8859-3", "ISO-8859-3" },
334  { "ISO8859-4", "ISO-8859-4" },
335  { "ISO8859-5", "ISO-8859-5" },
336  { "ISO8859-6", "ISO-8859-6" },
337  { "ISO8859-7", "ISO-8859-7" },
338  { "ISO8859-8", "ISO-8859-8" },
339  { "ISO8859-9", "ISO-8859-9" },
340  { "PCK", "SHIFT_JIS" },
341  { "TIS620.2533", "TIS-620" },
342  /*{ "UTF-8", "UTF-8" },*/
343  { "ansi-1251", "CP1251" },
344  { "cns11643", "EUC-TW" },
345  { "eucJP", "EUC-JP" },
346  { "gb2312", "GB2312" },
347  { "koi8-r", "KOI8-R" }
348 # define alias_table_defined
349 # endif
350 # if defined __minix /* Minix */
351  { "646", "ASCII" }
352 # define alias_table_defined
353 # endif
354 # if defined WINDOWS_NATIVE || defined __CYGWIN__ /* Windows */
355  { "CP1361", "JOHAB" },
356  { "CP20127", "ASCII" },
357  { "CP20866", "KOI8-R" },
358  { "CP20936", "GB2312" },
359  { "CP21866", "KOI8-RU" },
360  { "CP28591", "ISO-8859-1" },
361  { "CP28592", "ISO-8859-2" },
362  { "CP28593", "ISO-8859-3" },
363  { "CP28594", "ISO-8859-4" },
364  { "CP28595", "ISO-8859-5" },
365  { "CP28596", "ISO-8859-6" },
366  { "CP28597", "ISO-8859-7" },
367  { "CP28598", "ISO-8859-8" },
368  { "CP28599", "ISO-8859-9" },
369  { "CP28605", "ISO-8859-15" },
370  { "CP38598", "ISO-8859-8" },
371  { "CP51932", "EUC-JP" },
372  { "CP51936", "GB2312" },
373  { "CP51949", "EUC-KR" },
374  { "CP51950", "EUC-TW" },
375  { "CP54936", "GB18030" },
376  { "CP65001", "UTF-8" },
377  { "CP936", "GBK" }
378 # define alias_table_defined
379 # endif
380 # if defined OS2 /* OS/2 */
381  /* The list of encodings is taken from "List of OS/2 Codepages"
382  by Alex Taylor:
383  <http://altsan.org/os2/toolkits/uls/index.html#codepages>.
384  See also "__convcp() of kLIBC":
385  <https://github.com/bitwiseworks/libc/blob/master/src/emx/src/lib/locale/__convcp.c>. */
386  { "CP1004", "CP1252" },
387  /*{ "CP1041", "CP943" },*/
388  /*{ "CP1088", "CP949" },*/
389  { "CP1089", "ISO-8859-6" },
390  /*{ "CP1114", "CP950" },*/
391  /*{ "CP1115", "GB2312" },*/
392  { "CP1208", "UTF-8" },
393  /*{ "CP1380", "GB2312" },*/
394  { "CP1381", "GB2312" },
395  { "CP1383", "GB2312" },
396  { "CP1386", "GBK" },
397  /*{ "CP301", "CP943" },*/
398  { "CP3372", "EUC-JP" },
399  { "CP4946", "CP850" },
400  /*{ "CP5048", "JIS_X0208-1990" },*/
401  /*{ "CP5049", "JIS_X0212-1990" },*/
402  /*{ "CP5067", "KS_C_5601-1987" },*/
403  { "CP813", "ISO-8859-7" },
404  { "CP819", "ISO-8859-1" },
405  { "CP878", "KOI8-R" },
406  /*{ "CP897", "CP943" },*/
407  { "CP912", "ISO-8859-2" },
408  { "CP913", "ISO-8859-3" },
409  { "CP914", "ISO-8859-4" },
410  { "CP915", "ISO-8859-5" },
411  { "CP916", "ISO-8859-8" },
412  { "CP920", "ISO-8859-9" },
413  { "CP921", "ISO-8859-13" },
414  { "CP923", "ISO-8859-15" },
415  /*{ "CP941", "CP943" },*/
416  /*{ "CP947", "CP950" },*/
417  /*{ "CP951", "CP949" },*/
418  /*{ "CP952", "JIS_X0208-1990" },*/
419  /*{ "CP953", "JIS_X0212-1990" },*/
420  { "CP954", "EUC-JP" },
421  { "CP964", "EUC-TW" },
422  { "CP970", "EUC-KR" },
423  /*{ "CP971", "KS_C_5601-1987" },*/
424  { "IBM-1004", "CP1252" },
425  /*{ "IBM-1006", "?" },*/
426  /*{ "IBM-1008", "?" },*/
427  /*{ "IBM-1041", "CP943" },*/
428  /*{ "IBM-1051", "?" },*/
429  /*{ "IBM-1088", "CP949" },*/
430  { "IBM-1089", "ISO-8859-6" },
431  /*{ "IBM-1098", "?" },*/
432  /*{ "IBM-1114", "CP950" },*/
433  /*{ "IBM-1115", "GB2312" },*/
434  /*{ "IBM-1116", "?" },*/
435  /*{ "IBM-1117", "?" },*/
436  /*{ "IBM-1118", "?" },*/
437  /*{ "IBM-1119", "?" },*/
438  { "IBM-1124", "CP1124" },
439  { "IBM-1125", "CP1125" },
440  { "IBM-1131", "CP1131" },
441  { "IBM-1208", "UTF-8" },
442  { "IBM-1250", "CP1250" },
443  { "IBM-1251", "CP1251" },
444  { "IBM-1252", "CP1252" },
445  { "IBM-1253", "CP1253" },
446  { "IBM-1254", "CP1254" },
447  { "IBM-1255", "CP1255" },
448  { "IBM-1256", "CP1256" },
449  { "IBM-1257", "CP1257" },
450  /*{ "IBM-1275", "?" },*/
451  /*{ "IBM-1276", "?" },*/
452  /*{ "IBM-1277", "?" },*/
453  /*{ "IBM-1280", "?" },*/
454  /*{ "IBM-1281", "?" },*/
455  /*{ "IBM-1282", "?" },*/
456  /*{ "IBM-1283", "?" },*/
457  /*{ "IBM-1380", "GB2312" },*/
458  { "IBM-1381", "GB2312" },
459  { "IBM-1383", "GB2312" },
460  { "IBM-1386", "GBK" },
461  /*{ "IBM-301", "CP943" },*/
462  { "IBM-3372", "EUC-JP" },
463  { "IBM-367", "ASCII" },
464  { "IBM-437", "CP437" },
465  { "IBM-4946", "CP850" },
466  /*{ "IBM-5048", "JIS_X0208-1990" },*/
467  /*{ "IBM-5049", "JIS_X0212-1990" },*/
468  /*{ "IBM-5067", "KS_C_5601-1987" },*/
469  { "IBM-813", "ISO-8859-7" },
470  { "IBM-819", "ISO-8859-1" },
471  { "IBM-850", "CP850" },
472  /*{ "IBM-851", "?" },*/
473  { "IBM-852", "CP852" },
474  { "IBM-855", "CP855" },
475  { "IBM-856", "CP856" },
476  { "IBM-857", "CP857" },
477  /*{ "IBM-859", "?" },*/
478  { "IBM-860", "CP860" },
479  { "IBM-861", "CP861" },
480  { "IBM-862", "CP862" },
481  { "IBM-863", "CP863" },
482  { "IBM-864", "CP864" },
483  { "IBM-865", "CP865" },
484  { "IBM-866", "CP866" },
485  /*{ "IBM-868", "?" },*/
486  { "IBM-869", "CP869" },
487  { "IBM-874", "CP874" },
488  { "IBM-878", "KOI8-R" },
489  /*{ "IBM-895", "?" },*/
490  /*{ "IBM-897", "CP943" },*/
491  /*{ "IBM-907", "?" },*/
492  /*{ "IBM-909", "?" },*/
493  { "IBM-912", "ISO-8859-2" },
494  { "IBM-913", "ISO-8859-3" },
495  { "IBM-914", "ISO-8859-4" },
496  { "IBM-915", "ISO-8859-5" },
497  { "IBM-916", "ISO-8859-8" },
498  { "IBM-920", "ISO-8859-9" },
499  { "IBM-921", "ISO-8859-13" },
500  { "IBM-922", "CP922" },
501  { "IBM-923", "ISO-8859-15" },
502  { "IBM-932", "CP932" },
503  /*{ "IBM-941", "CP943" },*/
504  /*{ "IBM-942", "?" },*/
505  { "IBM-943", "CP943" },
506  /*{ "IBM-947", "CP950" },*/
507  { "IBM-949", "CP949" },
508  { "IBM-950", "CP950" },
509  /*{ "IBM-951", "CP949" },*/
510  /*{ "IBM-952", "JIS_X0208-1990" },*/
511  /*{ "IBM-953", "JIS_X0212-1990" },*/
512  { "IBM-954", "EUC-JP" },
513  /*{ "IBM-955", "?" },*/
514  { "IBM-964", "EUC-TW" },
515  { "IBM-970", "EUC-KR" },
516  /*{ "IBM-971", "KS_C_5601-1987" },*/
517  { "IBM-eucCN", "GB2312" },
518  { "IBM-eucJP", "EUC-JP" },
519  { "IBM-eucKR", "EUC-KR" },
520  { "IBM-eucTW", "EUC-TW" },
521  { "IBM33722", "EUC-JP" },
522  { "ISO8859-1", "ISO-8859-1" },
523  { "ISO8859-2", "ISO-8859-2" },
524  { "ISO8859-3", "ISO-8859-3" },
525  { "ISO8859-4", "ISO-8859-4" },
526  { "ISO8859-5", "ISO-8859-5" },
527  { "ISO8859-6", "ISO-8859-6" },
528  { "ISO8859-7", "ISO-8859-7" },
529  { "ISO8859-8", "ISO-8859-8" },
530  { "ISO8859-9", "ISO-8859-9" },
531  /*{ "JISX0201-1976", "JISX0201-1976" },*/
532  /*{ "JISX0208-1978", "?" },*/
533  /*{ "JISX0208-1983", "JIS_X0208-1983" },*/
534  /*{ "JISX0208-1990", "JIS_X0208-1990" },*/
535  /*{ "JISX0212-1990", "JIS_X0212-1990" },*/
536  /*{ "KSC5601-1987", "KS_C_5601-1987" },*/
537  { "SJIS-1", "CP943" },
538  { "SJIS-2", "CP943" },
539  { "eucJP", "EUC-JP" },
540  { "eucKR", "EUC-KR" },
541  { "eucTW-1993", "EUC-TW" }
542 # define alias_table_defined
543 # endif
544 # if defined VMS /* OpenVMS */
545  /* The list of encodings is taken from the OpenVMS 7.3-1 documentation
546  "Compaq C Run-Time Library Reference Manual for OpenVMS systems"
547  section 10.7 "Handling Different Character Sets". */
548  { "DECHANYU", "DEC-HANYU" },
549  { "DECHANZI", "GB2312" },
550  { "DECKANJI", "DEC-KANJI" },
551  { "DECKOREAN", "EUC-KR" },
552  { "ISO8859-1", "ISO-8859-1" },
553  { "ISO8859-2", "ISO-8859-2" },
554  { "ISO8859-5", "ISO-8859-5" },
555  { "ISO8859-7", "ISO-8859-7" },
556  { "ISO8859-8", "ISO-8859-8" },
557  { "ISO8859-9", "ISO-8859-9" },
558  { "SDECKANJI", "EUC-JP" },
559  { "SJIS", "SHIFT_JIS" },
560  { "eucJP", "EUC-JP" },
561  { "eucTW", "EUC-TW" }
562 # define alias_table_defined
563 # endif
564 # ifndef alias_table_defined
565  /* Just a dummy entry, to avoid a C syntax error. */
566  { "", "" }
567 # endif
568  };
569 
570 # endif
571 
572 #else
573 
574 /* On these platforms, we use a mapping from locale name to GNU canonical
575  encoding name. */
576 
578 {
579  const char locale[17+1];
580  const char canonical[11+1];
581 };
582 
583 /* Table of platform-dependent mappings, sorted in ascending order. */
584 static const struct table_entry locale_table[] =
585  {
586 # if defined __FreeBSD__ /* FreeBSD 4.2 */
587  { "cs_CZ.ISO_8859-2", "ISO-8859-2" },
588  { "da_DK.DIS_8859-15", "ISO-8859-15" },
589  { "da_DK.ISO_8859-1", "ISO-8859-1" },
590  { "de_AT.DIS_8859-15", "ISO-8859-15" },
591  { "de_AT.ISO_8859-1", "ISO-8859-1" },
592  { "de_CH.DIS_8859-15", "ISO-8859-15" },
593  { "de_CH.ISO_8859-1", "ISO-8859-1" },
594  { "de_DE.DIS_8859-15", "ISO-8859-15" },
595  { "de_DE.ISO_8859-1", "ISO-8859-1" },
596  { "en_AU.DIS_8859-15", "ISO-8859-15" },
597  { "en_AU.ISO_8859-1", "ISO-8859-1" },
598  { "en_CA.DIS_8859-15", "ISO-8859-15" },
599  { "en_CA.ISO_8859-1", "ISO-8859-1" },
600  { "en_GB.DIS_8859-15", "ISO-8859-15" },
601  { "en_GB.ISO_8859-1", "ISO-8859-1" },
602  { "en_US.DIS_8859-15", "ISO-8859-15" },
603  { "en_US.ISO_8859-1", "ISO-8859-1" },
604  { "es_ES.DIS_8859-15", "ISO-8859-15" },
605  { "es_ES.ISO_8859-1", "ISO-8859-1" },
606  { "fi_FI.DIS_8859-15", "ISO-8859-15" },
607  { "fi_FI.ISO_8859-1", "ISO-8859-1" },
608  { "fr_BE.DIS_8859-15", "ISO-8859-15" },
609  { "fr_BE.ISO_8859-1", "ISO-8859-1" },
610  { "fr_CA.DIS_8859-15", "ISO-8859-15" },
611  { "fr_CA.ISO_8859-1", "ISO-8859-1" },
612  { "fr_CH.DIS_8859-15", "ISO-8859-15" },
613  { "fr_CH.ISO_8859-1", "ISO-8859-1" },
614  { "fr_FR.DIS_8859-15", "ISO-8859-15" },
615  { "fr_FR.ISO_8859-1", "ISO-8859-1" },
616  { "hr_HR.ISO_8859-2", "ISO-8859-2" },
617  { "hu_HU.ISO_8859-2", "ISO-8859-2" },
618  { "is_IS.DIS_8859-15", "ISO-8859-15" },
619  { "is_IS.ISO_8859-1", "ISO-8859-1" },
620  { "it_CH.DIS_8859-15", "ISO-8859-15" },
621  { "it_CH.ISO_8859-1", "ISO-8859-1" },
622  { "it_IT.DIS_8859-15", "ISO-8859-15" },
623  { "it_IT.ISO_8859-1", "ISO-8859-1" },
624  { "ja_JP.EUC", "EUC-JP" },
625  { "ja_JP.SJIS", "SHIFT_JIS" },
626  { "ja_JP.Shift_JIS", "SHIFT_JIS" },
627  { "ko_KR.EUC", "EUC-KR" },
628  { "la_LN.ASCII", "ASCII" },
629  { "la_LN.DIS_8859-15", "ISO-8859-15" },
630  { "la_LN.ISO_8859-1", "ISO-8859-1" },
631  { "la_LN.ISO_8859-2", "ISO-8859-2" },
632  { "la_LN.ISO_8859-4", "ISO-8859-4" },
633  { "lt_LN.ASCII", "ASCII" },
634  { "lt_LN.DIS_8859-15", "ISO-8859-15" },
635  { "lt_LN.ISO_8859-1", "ISO-8859-1" },
636  { "lt_LN.ISO_8859-2", "ISO-8859-2" },
637  { "lt_LT.ISO_8859-4", "ISO-8859-4" },
638  { "nl_BE.DIS_8859-15", "ISO-8859-15" },
639  { "nl_BE.ISO_8859-1", "ISO-8859-1" },
640  { "nl_NL.DIS_8859-15", "ISO-8859-15" },
641  { "nl_NL.ISO_8859-1", "ISO-8859-1" },
642  { "no_NO.DIS_8859-15", "ISO-8859-15" },
643  { "no_NO.ISO_8859-1", "ISO-8859-1" },
644  { "pl_PL.ISO_8859-2", "ISO-8859-2" },
645  { "pt_PT.DIS_8859-15", "ISO-8859-15" },
646  { "pt_PT.ISO_8859-1", "ISO-8859-1" },
647  { "ru_RU.CP866", "CP866" },
648  { "ru_RU.ISO_8859-5", "ISO-8859-5" },
649  { "ru_RU.KOI8-R", "KOI8-R" },
650  { "ru_SU.CP866", "CP866" },
651  { "ru_SU.ISO_8859-5", "ISO-8859-5" },
652  { "ru_SU.KOI8-R", "KOI8-R" },
653  { "sl_SI.ISO_8859-2", "ISO-8859-2" },
654  { "sv_SE.DIS_8859-15", "ISO-8859-15" },
655  { "sv_SE.ISO_8859-1", "ISO-8859-1" },
656  { "uk_UA.KOI8-U", "KOI8-U" },
657  { "zh_CN.EUC", "GB2312" },
658  { "zh_TW.BIG5", "BIG5" },
659  { "zh_TW.Big5", "BIG5" }
660 # define locale_table_defined
661 # endif
662 # if defined __DJGPP__ /* DOS / DJGPP 2.03 */
663  /* The encodings given here may not all be correct.
664  If you find that the encoding given for your language and
665  country is not the one your DOS machine actually uses, just
666  correct it in this file, and send a mail to
667  Juan Manuel Guerrero <juan.guerrero@gmx.de>
668  and <bug-gnulib@gnu.org>. */
669  { "C", "ASCII" },
670  { "ar", "CP864" },
671  { "ar_AE", "CP864" },
672  { "ar_DZ", "CP864" },
673  { "ar_EG", "CP864" },
674  { "ar_IQ", "CP864" },
675  { "ar_IR", "CP864" },
676  { "ar_JO", "CP864" },
677  { "ar_KW", "CP864" },
678  { "ar_MA", "CP864" },
679  { "ar_OM", "CP864" },
680  { "ar_QA", "CP864" },
681  { "ar_SA", "CP864" },
682  { "ar_SY", "CP864" },
683  { "be", "CP866" },
684  { "be_BE", "CP866" },
685  { "bg", "CP866" }, /* not CP855 ?? */
686  { "bg_BG", "CP866" }, /* not CP855 ?? */
687  { "ca", "CP850" },
688  { "ca_ES", "CP850" },
689  { "cs", "CP852" },
690  { "cs_CZ", "CP852" },
691  { "da", "CP865" }, /* not CP850 ?? */
692  { "da_DK", "CP865" }, /* not CP850 ?? */
693  { "de", "CP850" },
694  { "de_AT", "CP850" },
695  { "de_CH", "CP850" },
696  { "de_DE", "CP850" },
697  { "el", "CP869" },
698  { "el_GR", "CP869" },
699  { "en", "CP850" },
700  { "en_AU", "CP850" }, /* not CP437 ?? */
701  { "en_CA", "CP850" },
702  { "en_GB", "CP850" },
703  { "en_NZ", "CP437" },
704  { "en_US", "CP437" },
705  { "en_ZA", "CP850" }, /* not CP437 ?? */
706  { "eo", "CP850" },
707  { "eo_EO", "CP850" },
708  { "es", "CP850" },
709  { "es_AR", "CP850" },
710  { "es_BO", "CP850" },
711  { "es_CL", "CP850" },
712  { "es_CO", "CP850" },
713  { "es_CR", "CP850" },
714  { "es_CU", "CP850" },
715  { "es_DO", "CP850" },
716  { "es_EC", "CP850" },
717  { "es_ES", "CP850" },
718  { "es_GT", "CP850" },
719  { "es_HN", "CP850" },
720  { "es_MX", "CP850" },
721  { "es_NI", "CP850" },
722  { "es_PA", "CP850" },
723  { "es_PE", "CP850" },
724  { "es_PY", "CP850" },
725  { "es_SV", "CP850" },
726  { "es_UY", "CP850" },
727  { "es_VE", "CP850" },
728  { "et", "CP850" },
729  { "et_EE", "CP850" },
730  { "eu", "CP850" },
731  { "eu_ES", "CP850" },
732  { "fi", "CP850" },
733  { "fi_FI", "CP850" },
734  { "fr", "CP850" },
735  { "fr_BE", "CP850" },
736  { "fr_CA", "CP850" },
737  { "fr_CH", "CP850" },
738  { "fr_FR", "CP850" },
739  { "ga", "CP850" },
740  { "ga_IE", "CP850" },
741  { "gd", "CP850" },
742  { "gd_GB", "CP850" },
743  { "gl", "CP850" },
744  { "gl_ES", "CP850" },
745  { "he", "CP862" },
746  { "he_IL", "CP862" },
747  { "hr", "CP852" },
748  { "hr_HR", "CP852" },
749  { "hu", "CP852" },
750  { "hu_HU", "CP852" },
751  { "id", "CP850" }, /* not CP437 ?? */
752  { "id_ID", "CP850" }, /* not CP437 ?? */
753  { "is", "CP861" }, /* not CP850 ?? */
754  { "is_IS", "CP861" }, /* not CP850 ?? */
755  { "it", "CP850" },
756  { "it_CH", "CP850" },
757  { "it_IT", "CP850" },
758  { "ja", "CP932" },
759  { "ja_JP", "CP932" },
760  { "kr", "CP949" }, /* not CP934 ?? */
761  { "kr_KR", "CP949" }, /* not CP934 ?? */
762  { "lt", "CP775" },
763  { "lt_LT", "CP775" },
764  { "lv", "CP775" },
765  { "lv_LV", "CP775" },
766  { "mk", "CP866" }, /* not CP855 ?? */
767  { "mk_MK", "CP866" }, /* not CP855 ?? */
768  { "mt", "CP850" },
769  { "mt_MT", "CP850" },
770  { "nb", "CP865" }, /* not CP850 ?? */
771  { "nb_NO", "CP865" }, /* not CP850 ?? */
772  { "nl", "CP850" },
773  { "nl_BE", "CP850" },
774  { "nl_NL", "CP850" },
775  { "nn", "CP865" }, /* not CP850 ?? */
776  { "nn_NO", "CP865" }, /* not CP850 ?? */
777  { "no", "CP865" }, /* not CP850 ?? */
778  { "no_NO", "CP865" }, /* not CP850 ?? */
779  { "pl", "CP852" },
780  { "pl_PL", "CP852" },
781  { "pt", "CP850" },
782  { "pt_BR", "CP850" },
783  { "pt_PT", "CP850" },
784  { "ro", "CP852" },
785  { "ro_RO", "CP852" },
786  { "ru", "CP866" },
787  { "ru_RU", "CP866" },
788  { "sk", "CP852" },
789  { "sk_SK", "CP852" },
790  { "sl", "CP852" },
791  { "sl_SI", "CP852" },
792  { "sq", "CP852" },
793  { "sq_AL", "CP852" },
794  { "sr", "CP852" }, /* CP852 or CP866 or CP855 ?? */
795  { "sr_CS", "CP852" }, /* CP852 or CP866 or CP855 ?? */
796  { "sr_YU", "CP852" }, /* CP852 or CP866 or CP855 ?? */
797  { "sv", "CP850" },
798  { "sv_SE", "CP850" },
799  { "th", "CP874" },
800  { "th_TH", "CP874" },
801  { "tr", "CP857" },
802  { "tr_TR", "CP857" },
803  { "uk", "CP1125" },
804  { "uk_UA", "CP1125" },
805  { "zh_CN", "GBK" },
806  { "zh_TW", "CP950" } /* not CP938 ?? */
807 # define locale_table_defined
808 # endif
809 # ifndef locale_table_defined
810  /* Just a dummy entry, to avoid a C syntax error. */
811  { "", "" }
812 # endif
813  };
814 
815 #endif
816 
817 
818 /* Determine the current locale's character encoding, and canonicalize it
819  into one of the canonical names listed below.
820  The result must not be freed; it is statically allocated. The result
821  becomes invalid when setlocale() is used to change the global locale, or
822  when the value of one of the environment variables LC_ALL, LC_CTYPE, LANG
823  is changed; threads in multithreaded programs should not do this.
824  If the canonical name cannot be determined, the result is a non-canonical
825  name. */
826 
827 #ifdef STATIC
828 STATIC
829 #endif
830 const char *
832 {
833  const char *codeset;
834 
835  /* This function must be multithread-safe. To achieve this without using
836  thread-local storage, we use a simple strcpy or memcpy to fill this static
837  buffer. Filling it through, for example, strcpy + strcat would not be
838  guaranteed to leave the buffer's contents intact if another thread is
839  currently accessing it. If necessary, the contents is first assembled in
840  a stack-allocated buffer. */
841 
842 #if HAVE_LANGINFO_CODESET || defined WINDOWS_NATIVE || defined OS2
843 
844 # if HAVE_LANGINFO_CODESET
845 
846  /* Most systems support nl_langinfo (CODESET) nowadays. */
847  codeset = nl_langinfo (CODESET);
848 
849 # ifdef __CYGWIN__
850  /* Cygwin < 1.7 does not have locales. nl_langinfo (CODESET) always
851  returns "US-ASCII". Return the suffix of the locale name from the
852  environment variables (if present) or the codepage as a number. */
853  if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0)
854  {
855  const char *locale;
856  static char resultbuf[2 + 10 + 1];
857 
858  locale = getenv ("LC_ALL");
859  if (locale == NULL || locale[0] == '\0')
860  {
861  locale = getenv ("LC_CTYPE");
862  if (locale == NULL || locale[0] == '\0')
863  locale = getenv ("LANG");
864  }
865  if (locale != NULL && locale[0] != '\0')
866  {
867  /* If the locale name contains an encoding after the dot, return
868  it. */
869  const char *dot = strchr (locale, '.');
870 
871  if (dot != NULL)
872  {
873  const char *modifier;
874 
875  dot++;
876  /* Look for the possible @... trailer and remove it, if any. */
877  modifier = strchr (dot, '@');
878  if (modifier == NULL)
879  return dot;
880  if (modifier - dot < sizeof (resultbuf))
881  {
882  /* This way of filling resultbuf is multithread-safe. */
883  memcpy (resultbuf, dot, modifier - dot);
884  resultbuf [modifier - dot] = '\0';
885  return resultbuf;
886  }
887  }
888  }
889 
890  /* The Windows API has a function returning the locale's codepage as a
891  number: GetACP(). This encoding is used by Cygwin, unless the user
892  has set the environment variable CYGWIN=codepage:oem (which very few
893  people do).
894  Output directed to console windows needs to be converted (to
895  GetOEMCP() if the console is using a raster font, or to
896  GetConsoleOutputCP() if it is using a TrueType font). Cygwin does
897  this conversion transparently (see winsup/cygwin/fhandler_console.cc),
898  converting to GetConsoleOutputCP(). This leads to correct results,
899  except when SetConsoleOutputCP has been called and a raster font is
900  in use. */
901  {
902  char buf[2 + 10 + 1];
903 
904  sprintf (buf, "CP%u", GetACP ());
905  strcpy (resultbuf, buf);
906  codeset = resultbuf;
907  }
908  }
909 # endif
910 
911  if (codeset == NULL)
912  /* The canonical name cannot be determined. */
913  codeset = "";
914 
915 # elif defined WINDOWS_NATIVE
916 
917  char buf[2 + 10 + 1];
918  static char resultbuf[2 + 10 + 1];
919 
920  /* The Windows API has a function returning the locale's codepage as
921  a number, but the value doesn't change according to what the
922  'setlocale' call specified. So we use it as a last resort, in
923  case the string returned by 'setlocale' doesn't specify the
924  codepage. */
925  char *current_locale = setlocale (LC_CTYPE, NULL);
926  char *pdot = strrchr (current_locale, '.');
927 
928  if (pdot && 2 + strlen (pdot + 1) + 1 <= sizeof (buf))
929  sprintf (buf, "CP%s", pdot + 1);
930  else
931  {
932  /* The Windows API has a function returning the locale's codepage as a
933  number: GetACP().
934  When the output goes to a console window, it needs to be provided in
935  GetOEMCP() encoding if the console is using a raster font, or in
936  GetConsoleOutputCP() encoding if it is using a TrueType font.
937  But in GUI programs and for output sent to files and pipes, GetACP()
938  encoding is the best bet. */
939  sprintf (buf, "CP%u", GetACP ());
940  }
941  /* For a locale name such as "French_France.65001", in Windows 10,
942  setlocale now returns "French_France.utf8" instead. */
943  if (strcmp (buf + 2, "65001") == 0 || strcmp (buf + 2, "utf8") == 0)
944  codeset = "UTF-8";
945  else
946  {
947  strcpy (resultbuf, buf);
948  codeset = resultbuf;
949  }
950 
951 # elif defined OS2
952 
953  const char *locale;
954  static char resultbuf[2 + 10 + 1];
955  ULONG cp[3];
956  ULONG cplen;
957 
958  codeset = NULL;
959 
960  /* Allow user to override the codeset, as set in the operating system,
961  with standard language environment variables. */
962  locale = getenv ("LC_ALL");
963  if (locale == NULL || locale[0] == '\0')
964  {
965  locale = getenv ("LC_CTYPE");
966  if (locale == NULL || locale[0] == '\0')
967  locale = getenv ("LANG");
968  }
969  if (locale != NULL && locale[0] != '\0')
970  {
971  /* If the locale name contains an encoding after the dot, return it. */
972  const char *dot = strchr (locale, '.');
973 
974  if (dot != NULL)
975  {
976  const char *modifier;
977 
978  dot++;
979  /* Look for the possible @... trailer and remove it, if any. */
980  modifier = strchr (dot, '@');
981  if (modifier == NULL)
982  return dot;
983  if (modifier - dot < sizeof (resultbuf))
984  {
985  /* This way of filling resultbuf is multithread-safe. */
986  memcpy (resultbuf, dot, modifier - dot);
987  resultbuf [modifier - dot] = '\0';
988  return resultbuf;
989  }
990  }
991 
992  /* For the POSIX locale, don't use the system's codepage. */
993  if (strcmp (locale, "C") == 0 || strcmp (locale, "POSIX") == 0)
994  codeset = "";
995  }
996 
997  if (codeset == NULL)
998  {
999  /* OS/2 has a function returning the locale's codepage as a number. */
1000  if (DosQueryCp (sizeof (cp), cp, &cplen))
1001  codeset = "";
1002  else
1003  {
1004  char buf[2 + 10 + 1];
1005 
1006  sprintf (buf, "CP%u", cp[0]);
1007  strcpy (resultbuf, buf);
1008  codeset = resultbuf;
1009  }
1010  }
1011 
1012 # else
1013 
1014 # error "Add code for other platforms here."
1015 
1016 # endif
1017 
1018  /* Resolve alias. */
1019  {
1020 # ifdef alias_table_defined
1021  /* On some platforms, UTF-8 locales are the most frequently used ones.
1022  Speed up the common case and slow down the less common cases by
1023  testing for this case first. */
1024 # if defined __OpenBSD__ || (defined __APPLE__ && defined __MACH__) || defined __sun || defined __CYGWIN__
1025  if (strcmp (codeset, "UTF-8") == 0)
1026  goto done_table_lookup;
1027  else
1028 # endif
1029  {
1030  const struct table_entry * const table = alias_table;
1031  size_t const table_size =
1032  sizeof (alias_table) / sizeof (struct table_entry);
1033  /* The table is sorted. Perform a binary search. */
1034  size_t hi = table_size;
1035  size_t lo = 0;
1036  while (lo < hi)
1037  {
1038  /* Invariant:
1039  for i < lo, strcmp (table[i].alias, codeset) < 0,
1040  for i >= hi, strcmp (table[i].alias, codeset) > 0. */
1041  size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
1042  int cmp = strcmp (table[mid].alias, codeset);
1043  if (cmp < 0)
1044  lo = mid + 1;
1045  else if (cmp > 0)
1046  hi = mid;
1047  else
1048  {
1049  /* Found an i with
1050  strcmp (table[i].alias, codeset) == 0. */
1051  codeset = table[mid].canonical;
1052  goto done_table_lookup;
1053  }
1054  }
1055  }
1056  if (0)
1057  done_table_lookup: ;
1058  else
1059 # endif
1060  {
1061  /* Did not find it in the table. */
1062  /* On Mac OS X, all modern locales use the UTF-8 encoding.
1063  BeOS and Haiku have a single locale, and it has UTF-8 encoding. */
1064 # if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__
1065  codeset = "UTF-8";
1066 # else
1067  /* Don't return an empty string. GNU libc and GNU libiconv interpret
1068  the empty string as denoting "the locale's character encoding",
1069  thus GNU libiconv would call this function a second time. */
1070  if (codeset[0] == '\0')
1071  codeset = "ASCII";
1072 # endif
1073  }
1074  }
1075 
1076 #else
1077 
1078  /* On old systems which lack it, use setlocale or getenv. */
1079  const char *locale = NULL;
1080 
1081  /* But most old systems don't have a complete set of locales. Some
1082  (like DJGPP) have only the C locale. Therefore we don't use setlocale
1083  here; it would return "C" when it doesn't support the locale name the
1084  user has set. */
1085 # if 0
1086  locale = setlocale (LC_CTYPE, NULL);
1087 # endif
1088  if (locale == NULL || locale[0] == '\0')
1089  {
1090  locale = getenv ("LC_ALL");
1091  if (locale == NULL || locale[0] == '\0')
1092  {
1093  locale = getenv ("LC_CTYPE");
1094  if (locale == NULL || locale[0] == '\0')
1095  locale = getenv ("LANG");
1096  if (locale == NULL)
1097  locale = "";
1098  }
1099  }
1100 
1101  /* Map locale name to canonical encoding name. */
1102  {
1103 # ifdef locale_table_defined
1104  const struct table_entry * const table = locale_table;
1105  size_t const table_size =
1106  sizeof (locale_table) / sizeof (struct table_entry);
1107  /* The table is sorted. Perform a binary search. */
1108  size_t hi = table_size;
1109  size_t lo = 0;
1110  while (lo < hi)
1111  {
1112  /* Invariant:
1113  for i < lo, strcmp (table[i].locale, locale) < 0,
1114  for i >= hi, strcmp (table[i].locale, locale) > 0. */
1115  size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
1116  int cmp = strcmp (table[mid].locale, locale);
1117  if (cmp < 0)
1118  lo = mid + 1;
1119  else if (cmp > 0)
1120  hi = mid;
1121  else
1122  {
1123  /* Found an i with
1124  strcmp (table[i].locale, locale) == 0. */
1125  codeset = table[mid].canonical;
1126  goto done_table_lookup;
1127  }
1128  }
1129  if (0)
1130  done_table_lookup: ;
1131  else
1132 # endif
1133  {
1134  /* Did not find it in the table. */
1135  /* On Mac OS X, all modern locales use the UTF-8 encoding.
1136  BeOS and Haiku have a single locale, and it has UTF-8 encoding. */
1137 # if (defined __APPLE__ && defined __MACH__) || defined __BEOS__ || defined __HAIKU__
1138  codeset = "UTF-8";
1139 # else
1140  /* The canonical name cannot be determined. */
1141  /* Don't return an empty string. GNU libc and GNU libiconv interpret
1142  the empty string as denoting "the locale's character encoding",
1143  thus GNU libiconv would call this function a second time. */
1144  codeset = "ASCII";
1145 # endif
1146  }
1147  }
1148 
1149 #endif
1150 
1151 #ifdef DARWIN7
1152  /* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8"
1153  (the default codeset) does not work when MB_CUR_MAX is 1. */
1154  if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL)) <= 1)
1155  codeset = "ASCII";
1156 #endif
1157 
1158  return codeset;
1159 }
#define NULL
Definition: stddef.in.h:72
#define CODESET
Definition: langinfo.in.h:49
static const struct table_entry locale_table[]
Definition: localcharset.c:584
const char * locale_charset(void)
Definition: localcharset.c:831
char buf[4]
Definition: mbrtowc-impl.h:39
Definition: localcharset.c:578
const char locale[17+1]
Definition: localcharset.c:579
const char canonical[11+1]
Definition: localcharset.c:580