"Fossies" - the Fresh Open Source Software Archive 
Member "dosfstools-4.2/src/charconv.c" (31 Jan 2021, 13207 Bytes) of package /linux/misc/dosfstools-4.2.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "charconv.c" see the
Fossies "Dox" file reference documentation and the latest
Fossies "Diffs" side-by-side code changes report:
4.1_vs_4.2.
1 /* charconv.c
2
3 Copyright (C) 2010 Alexander Korolkov <alexander.korolkov@gmail.com>
4 Copyright (C) 2018-2020 Pali Rohár <pali.rohar@gmail.com>
5
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation, either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
18
19 The complete text of the GNU General Public License
20 can be found in /usr/share/common-licenses/GPL-3 file.
21 */
22
23 #include "charconv.h"
24 #include <langinfo.h>
25 #include <locale.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <errno.h>
30 #include <wchar.h>
31
32 #ifdef HAVE_ICONV
33 #include <iconv.h>
34 #endif
35
36 /* CP850 table for 0x80-0xFF range from:
37 * http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP850.TXT
38 */
39 static const wchar_t cp850_table[128] = {
40 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7,
41 0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5,
42 0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
43 0x00ff, 0x00d6, 0x00dc, 0x00f8, 0x00a3, 0x00d8, 0x00d7, 0x0192,
44 0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba,
45 0x00bf, 0x00ae, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
46 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1, 0x00c2, 0x00c0,
47 0x00a9, 0x2563, 0x2551, 0x2557, 0x255d, 0x00a2, 0x00a5, 0x2510,
48 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x00e3, 0x00c3,
49 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x00a4,
50 0x00f0, 0x00d0, 0x00ca, 0x00cb, 0x00c8, 0x0131, 0x00cd, 0x00ce,
51 0x00cf, 0x2518, 0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580,
52 0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5, 0x00b5, 0x00fe,
53 0x00de, 0x00da, 0x00db, 0x00d9, 0x00fd, 0x00dd, 0x00af, 0x00b4,
54 0x00ad, 0x00b1, 0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8,
55 0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2, 0x25a0, 0x00a0,
56 };
57
58 /* CP850 translit table to 7bit ASCII for 0x80-0xFF range */
59 static const char *const cp850_translit_table[128] = {
60 "C", "u", "e", "a", "a", "a", "a", "c",
61 "e", "e", "e", "i", "i", "i", "A", "A",
62 "E", "ae", "AE", "o", "o", "o", "u", "u",
63 "y", "O", "U", "o", "GBP", "O", "x", "f",
64 "a", "i", "o", "u", "n", "N", "a", "o",
65 "?", "(R)", "!", " 1/2 ", " 1/4 ", "!", "<<", ">>",
66 "?", "?", "?", "|", "+", "A", "A", "A",
67 "(C)", "?", "?", "?", "?", "c", "JPY", "+",
68 "+", "+", "+", "+", "-", "+", "a", "A",
69 "?", "?", "?", "?", "?", "?", "?", "?",
70 "d", "D", "E", "E", "E", "i", "I", "I",
71 "I", "+", "+", "?", "?", "|", "I", "?",
72 "O", "ss", "O", "O", "o", "O", "u", "th",
73 "TH", "U", "U", "U", "y", "Y", "?", "'",
74 "-", "+-", "?", " 3/4 ", "?", "?", "/", ",",
75 "?", "?", ".", "1", "3", "2", "?", " ",
76 };
77
78 static int wchar_string_to_cp850_string(char *out, const wchar_t *in, unsigned int out_size)
79 {
80 unsigned i, j;
81 for (i = 0; i < out_size-1 && in[i]; ++i) {
82 if (in[i] > 0 && in[i] < 0x80) {
83 out[i] = in[i];
84 continue;
85 }
86 for (j = 0; j < 0x80; ++j) {
87 if (in[i] == cp850_table[j]) {
88 out[i] = (0x80 | j);
89 break;
90 }
91 }
92 if (j == 0x80) {
93 fprintf(stderr, "Cannot convert input character 0x%04x to 'CP850': %s\n", (unsigned int)in[i], strerror(EILSEQ));
94 return 0;
95 }
96 }
97 if (in[i]) {
98 fprintf(stderr, "Cannot convert input string to 'CP850': String is too long\n");
99 return 0;
100 }
101 out[i] = 0;
102 return 1;
103 }
104
105 static int cp850_string_to_wchar_string(wchar_t *out, const char *in, unsigned int out_size)
106 {
107 unsigned i;
108 for (i = 0; i < out_size-1 && i < 11 && in[i]; ++i) {
109 out[i] = (in[i] & 0x80) ? cp850_table[in[i] & 0x7F] : in[i];
110 }
111 if (i < 11 && in[i]) {
112 fprintf(stderr, "Cannot convert input string to 'CP850': String is too long\n");
113 return 0;
114 }
115 out[i] = L'\0';
116 return 1;
117 }
118
119 static int cp850_char_to_printable(char **p, unsigned char c, unsigned int out_size)
120 {
121 size_t ret;
122 wchar_t wcs[2];
123 wcs[0] = (c & 0x80) ? cp850_table[c & 0x7F] : c;
124 wcs[1] = 0;
125 ret = wcstombs(*p, wcs, out_size);
126 if (ret == 0)
127 return 0;
128 if (ret != (size_t)-1)
129 *p += ret;
130 else if (!(c & 0x80))
131 *(*p++) = c;
132 else {
133 ret = strlen(cp850_translit_table[c & 0x7F]);
134 if (ret > out_size)
135 return 0;
136 memcpy(*p, cp850_translit_table[c & 0x7F], ret);
137 *p += ret;
138 }
139 return 1;
140 }
141
142 static int local_string_to_cp850_string(char *out, const char *in, unsigned int out_size)
143 {
144 int ret;
145 wchar_t *wcs;
146 if (strlen(in) >= out_size) {
147 fprintf(stderr, "Cannot convert input string '%s' to 'CP850': String is too long\n", in);
148 return 0;
149 }
150 wcs = calloc(out_size, sizeof(wchar_t));
151 if (!wcs) {
152 fprintf(stderr, "Cannot convert input string '%s' to 'CP850': %s\n", in, strerror(ENOMEM));
153 return 0;
154 }
155 if (mbstowcs(wcs, in, out_size) == (size_t)-1) {
156 fprintf(stderr, "Cannot convert input string '%s' to 'CP850': %s\n", in, strerror(errno));
157 free(wcs);
158 return 0;
159 }
160 ret = wchar_string_to_cp850_string(out, wcs, out_size);
161 free(wcs);
162 return ret;
163 }
164
165 #ifdef HAVE_ICONV
166
167 static int iconv_init_codepage(int codepage, const char *local, iconv_t *to_local, iconv_t *from_local)
168 {
169 char codepage_name[32];
170 snprintf(codepage_name, sizeof(codepage_name), "CP%d//TRANSLIT", codepage);
171 *to_local = iconv_open(local, codepage_name);
172 if (*to_local == (iconv_t) - 1) {
173 snprintf(codepage_name, sizeof(codepage_name), "CP%d", codepage);
174 *to_local = iconv_open(local, codepage_name);
175 }
176 if (*to_local == (iconv_t) - 1)
177 fprintf(stderr, "Cannot initialize conversion from codepage %d to %s: %s\n", codepage, local, strerror(errno));
178 snprintf(codepage_name, sizeof(codepage_name), "CP%d", codepage);
179 *from_local = iconv_open(codepage_name, local);
180 if (*from_local == (iconv_t) - 1)
181 fprintf(stderr, "Cannot initialize conversion from %s to codepage %d: %s\n", local, codepage, strerror(errno));
182 return (*to_local != (iconv_t)-1 && *from_local != (iconv_t)-1) ? 1 : 0;
183 }
184
185 static iconv_t dos_to_local;
186 static iconv_t local_to_dos;
187 static iconv_t dos_to_wchar;
188 static iconv_t wchar_to_dos;
189 static int used_codepage;
190 static int internal_cp850;
191
192 /*
193 * Initialize conversion from codepage.
194 * codepage = -1 means default codepage.
195 * Returns non-zero on success, 0 on failure
196 */
197 static int init_conversion(int codepage)
198 {
199 static int initialized = -1;
200 if (initialized < 0) {
201 initialized = 1;
202 if (codepage < 0)
203 codepage = DEFAULT_DOS_CODEPAGE;
204 setlocale(LC_CTYPE, ""); /* initialize locale for CODESET */
205 if (!iconv_init_codepage(codepage, nl_langinfo(CODESET), &dos_to_local, &local_to_dos))
206 initialized = 0;
207 if (initialized && !iconv_init_codepage(codepage, "WCHAR_T", &dos_to_wchar, &wchar_to_dos))
208 initialized = 0;
209 if (!initialized && codepage == 850) {
210 fprintf(stderr, "Using internal CP850 conversion table\n");
211 internal_cp850 = 1; /* use internal CP850 conversion table */
212 initialized = 1;
213 }
214 if (initialized)
215 used_codepage = codepage;
216 }
217 return initialized;
218 }
219
220 int set_dos_codepage(int codepage)
221 {
222 return init_conversion(codepage);
223 }
224
225 int dos_char_to_printable(char **p, unsigned char c, unsigned int out_size)
226 {
227 char in[1] = { c };
228 ICONV_CONST char *pin = in;
229 size_t bytes_in = 1;
230 size_t bytes_out = out_size;
231 if (!init_conversion(-1))
232 return 0;
233 if (internal_cp850)
234 return cp850_char_to_printable(p, c, out_size);
235 return iconv(dos_to_local, &pin, &bytes_in, p, &bytes_out) != (size_t)-1;
236 }
237
238 int local_string_to_dos_string(char *out, char *in, unsigned int out_size)
239 {
240 ICONV_CONST char *pin = in;
241 char *pout = out;
242 size_t bytes_in = strlen(in);
243 size_t bytes_out = out_size-1;
244 size_t ret;
245 if (!init_conversion(-1))
246 return 0;
247 if (internal_cp850)
248 return local_string_to_cp850_string(out, in, out_size);
249 ret = iconv(local_to_dos, &pin, &bytes_in, &pout, &bytes_out);
250 if (ret == (size_t)-1) {
251 if (errno == E2BIG)
252 fprintf(stderr, "Cannot convert input string '%s' to 'CP%d': String is too long\n",
253 in, used_codepage);
254 else
255 fprintf(stderr, "Cannot convert input sequence '\\x%.02hhX' from codeset '%s' to 'CP%d': %s\n",
256 *pin, nl_langinfo(CODESET), used_codepage, strerror(errno));
257 iconv(local_to_dos, NULL, NULL, &pout, &bytes_out);
258 return 0;
259 } else {
260 ret = iconv(local_to_dos, NULL, NULL, &pout, &bytes_out);
261 if (ret == (size_t)-1) {
262 fprintf(stderr, "Cannot convert input string '%s' to 'CP%d': String is too long\n",
263 in, used_codepage);
264 return 0;
265 }
266 }
267 out[out_size-1-bytes_out] = 0;
268 return 1;
269 }
270
271 int dos_string_to_wchar_string(wchar_t *out, char *in, unsigned int out_size)
272 {
273 ICONV_CONST char *pin = in;
274 char *pout = (char *)out;
275 size_t bytes_in = strnlen(in, 11);
276 size_t bytes_out = out_size-sizeof(wchar_t);
277 size_t ret;
278 if (!init_conversion(-1))
279 return 0;
280 if (internal_cp850)
281 return cp850_string_to_wchar_string(out, in, out_size);
282 ret = iconv(dos_to_wchar, &pin, &bytes_in, &pout, &bytes_out);
283 if (ret == (size_t)-1) {
284 if (errno == E2BIG)
285 fprintf(stderr, "Cannot convert input string from 'CP%d': String is too long\n",
286 used_codepage);
287 else
288 fprintf(stderr, "Cannot convert input sequence '\\x%.02hhX' from 'CP%d': %s\n",
289 *pin, used_codepage, strerror(errno));
290 iconv(dos_to_wchar, NULL, NULL, &pout, &bytes_out);
291 return 0;
292 } else {
293 ret = iconv(dos_to_wchar, NULL, NULL, &pout, &bytes_out);
294 if (ret == (size_t)-1) {
295 fprintf(stderr, "Cannot convert input string from 'CP%d': String is too long\n",
296 used_codepage);
297 return 0;
298 }
299 }
300 out[(out_size-sizeof(wchar_t)-bytes_out)/sizeof(wchar_t)] = L'\0';
301 return 1;
302 }
303
304 int wchar_string_to_dos_string(char *out, wchar_t *in, unsigned int out_size)
305 {
306 ICONV_CONST char *pin = (char *)in;
307 char *pout = out;
308 size_t bytes_in = wcslen(in)*sizeof(wchar_t);
309 size_t bytes_out = out_size-1;
310 size_t ret;
311 if (!init_conversion(-1))
312 return 0;
313 if (internal_cp850)
314 return wchar_string_to_cp850_string(out, in, out_size);
315 ret = iconv(wchar_to_dos, &pin, &bytes_in, &pout, &bytes_out);
316 if (ret == (size_t)-1) {
317 if (errno == E2BIG)
318 fprintf(stderr, "Cannot convert input string '%ls' to 'CP%d': String is too long\n",
319 in, used_codepage);
320 else
321 fprintf(stderr, "Cannot convert input character '%lc' to 'CP%d': %s\n",
322 (wint_t)*(wchar_t *)pin, used_codepage, strerror(errno));
323 iconv(wchar_to_dos, NULL, NULL, &pout, &bytes_out);
324 return 0;
325 } else {
326 ret = iconv(wchar_to_dos, NULL, NULL, &pout, &bytes_out);
327 if (ret == (size_t)-1) {
328 fprintf(stderr, "Cannot convert input string '%ls' to 'CP%d': String is too long\n",
329 in, used_codepage);
330 return 0;
331 }
332 }
333 out[out_size-1-bytes_out] = 0;
334 return 1;
335 }
336
337 #else
338
339 int set_dos_codepage(int codepage)
340 {
341 static int initialized = -1;
342 if (initialized < 0) {
343 setlocale(LC_CTYPE, ""); /* initialize locale for wide character functions */
344 if (codepage < 0)
345 codepage = DEFAULT_DOS_CODEPAGE;
346 initialized = (codepage == 850) ? 1 : 0;
347 if (!initialized)
348 fprintf(stderr, "Cannot initialize unsupported codepage %d, only codepage 850 is supported\n", codepage);
349 }
350 return initialized;
351 }
352
353 int dos_char_to_printable(char **p, unsigned char c, unsigned int out_size)
354 {
355 return cp850_char_to_printable(p, c, out_size);
356 }
357
358 int local_string_to_dos_string(char *out, char *in, unsigned int out_size)
359 {
360 return local_string_to_cp850_string(out, in, out_size);
361 }
362
363 int dos_string_to_wchar_string(wchar_t *out, char *in, unsigned int out_size)
364 {
365 return cp850_string_to_wchar_string(out, in, out_size);
366 }
367
368 int wchar_string_to_dos_string(char *out, wchar_t *in, unsigned int out_size)
369 {
370 return wchar_string_to_cp850_string(out, in, out_size);
371 }
372
373 #endif