dosfstools  4.2
About: dosfstools are utilities to create, check and label (MS-DOS) FAT filesystems.
  Fossies Dox: dosfstools-4.2.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

charconv.c
Go to the documentation of this file.
1 /* charconv.c
2 
3  Copyright (C) 2010 Alexander Korolkov <alexander.korolkov@gmail.com>
4  Copyright (C) 2018-2020 Pali Roh├ír <pali.rohar@gmail.com>
5 
6  This program is free software: you can redistribute it and/or modify
7  it under the terms of the GNU General Public License as published by
8  the Free Software Foundation, either version 3 of the License, or
9  (at your option) any later version.
10 
11  This program is distributed in the hope that it will be useful,
12  but WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  GNU General Public License for more details.
15 
16  You should have received a copy of the GNU General Public License
17  along with this program. If not, see <http://www.gnu.org/licenses/>.
18 
19  The complete text of the GNU General Public License
20  can be found in /usr/share/common-licenses/GPL-3 file.
21 */
22 
23 #include "charconv.h"
24 #include <langinfo.h>
25 #include <locale.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <errno.h>
30 #include <wchar.h>
31 
32 #ifdef HAVE_ICONV
33 #include <iconv.h>
34 #endif
35 
36 /* CP850 table for 0x80-0xFF range from:
37  * http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP850.TXT
38  */
39 static const wchar_t cp850_table[128] = {
40  0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7,
41  0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5,
42  0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
43  0x00ff, 0x00d6, 0x00dc, 0x00f8, 0x00a3, 0x00d8, 0x00d7, 0x0192,
44  0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba,
45  0x00bf, 0x00ae, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
46  0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1, 0x00c2, 0x00c0,
47  0x00a9, 0x2563, 0x2551, 0x2557, 0x255d, 0x00a2, 0x00a5, 0x2510,
48  0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x00e3, 0x00c3,
49  0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x00a4,
50  0x00f0, 0x00d0, 0x00ca, 0x00cb, 0x00c8, 0x0131, 0x00cd, 0x00ce,
51  0x00cf, 0x2518, 0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580,
52  0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5, 0x00b5, 0x00fe,
53  0x00de, 0x00da, 0x00db, 0x00d9, 0x00fd, 0x00dd, 0x00af, 0x00b4,
54  0x00ad, 0x00b1, 0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8,
55  0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2, 0x25a0, 0x00a0,
56 };
57 
58 /* CP850 translit table to 7bit ASCII for 0x80-0xFF range */
59 static const char *const cp850_translit_table[128] = {
60  "C", "u", "e", "a", "a", "a", "a", "c",
61  "e", "e", "e", "i", "i", "i", "A", "A",
62  "E", "ae", "AE", "o", "o", "o", "u", "u",
63  "y", "O", "U", "o", "GBP", "O", "x", "f",
64  "a", "i", "o", "u", "n", "N", "a", "o",
65  "?", "(R)", "!", " 1/2 ", " 1/4 ", "!", "<<", ">>",
66  "?", "?", "?", "|", "+", "A", "A", "A",
67  "(C)", "?", "?", "?", "?", "c", "JPY", "+",
68  "+", "+", "+", "+", "-", "+", "a", "A",
69  "?", "?", "?", "?", "?", "?", "?", "?",
70  "d", "D", "E", "E", "E", "i", "I", "I",
71  "I", "+", "+", "?", "?", "|", "I", "?",
72  "O", "ss", "O", "O", "o", "O", "u", "th",
73  "TH", "U", "U", "U", "y", "Y", "?", "'",
74  "-", "+-", "?", " 3/4 ", "?", "?", "/", ",",
75  "?", "?", ".", "1", "3", "2", "?", " ",
76 };
77 
78 static int wchar_string_to_cp850_string(char *out, const wchar_t *in, unsigned int out_size)
79 {
80  unsigned i, j;
81  for (i = 0; i < out_size-1 && in[i]; ++i) {
82  if (in[i] > 0 && in[i] < 0x80) {
83  out[i] = in[i];
84  continue;
85  }
86  for (j = 0; j < 0x80; ++j) {
87  if (in[i] == cp850_table[j]) {
88  out[i] = (0x80 | j);
89  break;
90  }
91  }
92  if (j == 0x80) {
93  fprintf(stderr, "Cannot convert input character 0x%04x to 'CP850': %s\n", (unsigned int)in[i], strerror(EILSEQ));
94  return 0;
95  }
96  }
97  if (in[i]) {
98  fprintf(stderr, "Cannot convert input string to 'CP850': String is too long\n");
99  return 0;
100  }
101  out[i] = 0;
102  return 1;
103 }
104 
105 static int cp850_string_to_wchar_string(wchar_t *out, const char *in, unsigned int out_size)
106 {
107  unsigned i;
108  for (i = 0; i < out_size-1 && i < 11 && in[i]; ++i) {
109  out[i] = (in[i] & 0x80) ? cp850_table[in[i] & 0x7F] : in[i];
110  }
111  if (i < 11 && in[i]) {
112  fprintf(stderr, "Cannot convert input string to 'CP850': String is too long\n");
113  return 0;
114  }
115  out[i] = L'\0';
116  return 1;
117 }
118 
119 static int cp850_char_to_printable(char **p, unsigned char c, unsigned int out_size)
120 {
121  size_t ret;
122  wchar_t wcs[2];
123  wcs[0] = (c & 0x80) ? cp850_table[c & 0x7F] : c;
124  wcs[1] = 0;
125  ret = wcstombs(*p, wcs, out_size);
126  if (ret == 0)
127  return 0;
128  if (ret != (size_t)-1)
129  *p += ret;
130  else if (!(c & 0x80))
131  *(*p++) = c;
132  else {
133  ret = strlen(cp850_translit_table[c & 0x7F]);
134  if (ret > out_size)
135  return 0;
136  memcpy(*p, cp850_translit_table[c & 0x7F], ret);
137  *p += ret;
138  }
139  return 1;
140 }
141 
142 static int local_string_to_cp850_string(char *out, const char *in, unsigned int out_size)
143 {
144  int ret;
145  wchar_t *wcs;
146  if (strlen(in) >= out_size) {
147  fprintf(stderr, "Cannot convert input string '%s' to 'CP850': String is too long\n", in);
148  return 0;
149  }
150  wcs = calloc(out_size, sizeof(wchar_t));
151  if (!wcs) {
152  fprintf(stderr, "Cannot convert input string '%s' to 'CP850': %s\n", in, strerror(ENOMEM));
153  return 0;
154  }
155  if (mbstowcs(wcs, in, out_size) == (size_t)-1) {
156  fprintf(stderr, "Cannot convert input string '%s' to 'CP850': %s\n", in, strerror(errno));
157  free(wcs);
158  return 0;
159  }
160  ret = wchar_string_to_cp850_string(out, wcs, out_size);
161  free(wcs);
162  return ret;
163 }
164 
165 #ifdef HAVE_ICONV
166 
167 static int iconv_init_codepage(int codepage, const char *local, iconv_t *to_local, iconv_t *from_local)
168 {
169  char codepage_name[32];
170  snprintf(codepage_name, sizeof(codepage_name), "CP%d//TRANSLIT", codepage);
171  *to_local = iconv_open(local, codepage_name);
172  if (*to_local == (iconv_t) - 1) {
173  snprintf(codepage_name, sizeof(codepage_name), "CP%d", codepage);
174  *to_local = iconv_open(local, codepage_name);
175  }
176  if (*to_local == (iconv_t) - 1)
177  fprintf(stderr, "Cannot initialize conversion from codepage %d to %s: %s\n", codepage, local, strerror(errno));
178  snprintf(codepage_name, sizeof(codepage_name), "CP%d", codepage);
179  *from_local = iconv_open(codepage_name, local);
180  if (*from_local == (iconv_t) - 1)
181  fprintf(stderr, "Cannot initialize conversion from %s to codepage %d: %s\n", local, codepage, strerror(errno));
182  return (*to_local != (iconv_t)-1 && *from_local != (iconv_t)-1) ? 1 : 0;
183 }
184 
185 static iconv_t dos_to_local;
186 static iconv_t local_to_dos;
187 static iconv_t dos_to_wchar;
188 static iconv_t wchar_to_dos;
189 static int used_codepage;
190 static int internal_cp850;
191 
192 /*
193  * Initialize conversion from codepage.
194  * codepage = -1 means default codepage.
195  * Returns non-zero on success, 0 on failure
196  */
197 static int init_conversion(int codepage)
198 {
199  static int initialized = -1;
200  if (initialized < 0) {
201  initialized = 1;
202  if (codepage < 0)
203  codepage = DEFAULT_DOS_CODEPAGE;
204  setlocale(LC_CTYPE, ""); /* initialize locale for CODESET */
205  if (!iconv_init_codepage(codepage, nl_langinfo(CODESET), &dos_to_local, &local_to_dos))
206  initialized = 0;
207  if (initialized && !iconv_init_codepage(codepage, "WCHAR_T", &dos_to_wchar, &wchar_to_dos))
208  initialized = 0;
209  if (!initialized && codepage == 850) {
210  fprintf(stderr, "Using internal CP850 conversion table\n");
211  internal_cp850 = 1; /* use internal CP850 conversion table */
212  initialized = 1;
213  }
214  if (initialized)
215  used_codepage = codepage;
216  }
217  return initialized;
218 }
219 
220 int set_dos_codepage(int codepage)
221 {
222  return init_conversion(codepage);
223 }
224 
225 int dos_char_to_printable(char **p, unsigned char c, unsigned int out_size)
226 {
227  char in[1] = { c };
228  ICONV_CONST char *pin = in;
229  size_t bytes_in = 1;
230  size_t bytes_out = out_size;
231  if (!init_conversion(-1))
232  return 0;
233  if (internal_cp850)
234  return cp850_char_to_printable(p, c, out_size);
235  return iconv(dos_to_local, &pin, &bytes_in, p, &bytes_out) != (size_t)-1;
236 }
237 
238 int local_string_to_dos_string(char *out, char *in, unsigned int out_size)
239 {
240  ICONV_CONST char *pin = in;
241  char *pout = out;
242  size_t bytes_in = strlen(in);
243  size_t bytes_out = out_size-1;
244  size_t ret;
245  if (!init_conversion(-1))
246  return 0;
247  if (internal_cp850)
248  return local_string_to_cp850_string(out, in, out_size);
249  ret = iconv(local_to_dos, &pin, &bytes_in, &pout, &bytes_out);
250  if (ret == (size_t)-1) {
251  if (errno == E2BIG)
252  fprintf(stderr, "Cannot convert input string '%s' to 'CP%d': String is too long\n",
253  in, used_codepage);
254  else
255  fprintf(stderr, "Cannot convert input sequence '\\x%.02hhX' from codeset '%s' to 'CP%d': %s\n",
256  *pin, nl_langinfo(CODESET), used_codepage, strerror(errno));
257  iconv(local_to_dos, NULL, NULL, &pout, &bytes_out);
258  return 0;
259  } else {
260  ret = iconv(local_to_dos, NULL, NULL, &pout, &bytes_out);
261  if (ret == (size_t)-1) {
262  fprintf(stderr, "Cannot convert input string '%s' to 'CP%d': String is too long\n",
263  in, used_codepage);
264  return 0;
265  }
266  }
267  out[out_size-1-bytes_out] = 0;
268  return 1;
269 }
270 
271 int dos_string_to_wchar_string(wchar_t *out, char *in, unsigned int out_size)
272 {
273  ICONV_CONST char *pin = in;
274  char *pout = (char *)out;
275  size_t bytes_in = strnlen(in, 11);
276  size_t bytes_out = out_size-sizeof(wchar_t);
277  size_t ret;
278  if (!init_conversion(-1))
279  return 0;
280  if (internal_cp850)
281  return cp850_string_to_wchar_string(out, in, out_size);
282  ret = iconv(dos_to_wchar, &pin, &bytes_in, &pout, &bytes_out);
283  if (ret == (size_t)-1) {
284  if (errno == E2BIG)
285  fprintf(stderr, "Cannot convert input string from 'CP%d': String is too long\n",
286  used_codepage);
287  else
288  fprintf(stderr, "Cannot convert input sequence '\\x%.02hhX' from 'CP%d': %s\n",
289  *pin, used_codepage, strerror(errno));
290  iconv(dos_to_wchar, NULL, NULL, &pout, &bytes_out);
291  return 0;
292  } else {
293  ret = iconv(dos_to_wchar, NULL, NULL, &pout, &bytes_out);
294  if (ret == (size_t)-1) {
295  fprintf(stderr, "Cannot convert input string from 'CP%d': String is too long\n",
296  used_codepage);
297  return 0;
298  }
299  }
300  out[(out_size-sizeof(wchar_t)-bytes_out)/sizeof(wchar_t)] = L'\0';
301  return 1;
302 }
303 
304 int wchar_string_to_dos_string(char *out, wchar_t *in, unsigned int out_size)
305 {
306  ICONV_CONST char *pin = (char *)in;
307  char *pout = out;
308  size_t bytes_in = wcslen(in)*sizeof(wchar_t);
309  size_t bytes_out = out_size-1;
310  size_t ret;
311  if (!init_conversion(-1))
312  return 0;
313  if (internal_cp850)
314  return wchar_string_to_cp850_string(out, in, out_size);
315  ret = iconv(wchar_to_dos, &pin, &bytes_in, &pout, &bytes_out);
316  if (ret == (size_t)-1) {
317  if (errno == E2BIG)
318  fprintf(stderr, "Cannot convert input string '%ls' to 'CP%d': String is too long\n",
319  in, used_codepage);
320  else
321  fprintf(stderr, "Cannot convert input character '%lc' to 'CP%d': %s\n",
322  (wint_t)*(wchar_t *)pin, used_codepage, strerror(errno));
323  iconv(wchar_to_dos, NULL, NULL, &pout, &bytes_out);
324  return 0;
325  } else {
326  ret = iconv(wchar_to_dos, NULL, NULL, &pout, &bytes_out);
327  if (ret == (size_t)-1) {
328  fprintf(stderr, "Cannot convert input string '%ls' to 'CP%d': String is too long\n",
329  in, used_codepage);
330  return 0;
331  }
332  }
333  out[out_size-1-bytes_out] = 0;
334  return 1;
335 }
336 
337 #else
338 
339 int set_dos_codepage(int codepage)
340 {
341  static int initialized = -1;
342  if (initialized < 0) {
343  setlocale(LC_CTYPE, ""); /* initialize locale for wide character functions */
344  if (codepage < 0)
345  codepage = DEFAULT_DOS_CODEPAGE;
346  initialized = (codepage == 850) ? 1 : 0;
347  if (!initialized)
348  fprintf(stderr, "Cannot initialize unsupported codepage %d, only codepage 850 is supported\n", codepage);
349  }
350  return initialized;
351 }
352 
353 int dos_char_to_printable(char **p, unsigned char c, unsigned int out_size)
354 {
355  return cp850_char_to_printable(p, c, out_size);
356 }
357 
358 int local_string_to_dos_string(char *out, char *in, unsigned int out_size)
359 {
360  return local_string_to_cp850_string(out, in, out_size);
361 }
362 
363 int dos_string_to_wchar_string(wchar_t *out, char *in, unsigned int out_size)
364 {
365  return cp850_string_to_wchar_string(out, in, out_size);
366 }
367 
368 int wchar_string_to_dos_string(char *out, wchar_t *in, unsigned int out_size)
369 {
370  return wchar_string_to_cp850_string(out, in, out_size);
371 }
372 
373 #endif
static int local_string_to_cp850_string(char *out, const char *in, unsigned int out_size)
Definition: charconv.c:142
static int cp850_string_to_wchar_string(wchar_t *out, const char *in, unsigned int out_size)
Definition: charconv.c:105
int dos_string_to_wchar_string(wchar_t *out, char *in, unsigned int out_size)
Definition: charconv.c:363
int wchar_string_to_dos_string(char *out, wchar_t *in, unsigned int out_size)
Definition: charconv.c:368
static int wchar_string_to_cp850_string(char *out, const wchar_t *in, unsigned int out_size)
Definition: charconv.c:78
static const char *const cp850_translit_table[128]
Definition: charconv.c:59
int local_string_to_dos_string(char *out, char *in, unsigned int out_size)
Definition: charconv.c:358
static int cp850_char_to_printable(char **p, unsigned char c, unsigned int out_size)
Definition: charconv.c:119
int dos_char_to_printable(char **p, unsigned char c, unsigned int out_size)
Definition: charconv.c:353
static const wchar_t cp850_table[128]
Definition: charconv.c:39
int set_dos_codepage(int codepage)
Definition: charconv.c:339
#define DEFAULT_DOS_CODEPAGE
Definition: charconv.h:28