"Fossies" - the Fresh Open Source Software Archive

Member "dosfstools-4.2/src/charconv.c" (31 Jan 2021, 13207 Bytes) of package /linux/misc/dosfstools-4.2.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "charconv.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 4.1_vs_4.2.

    1 /* charconv.c
    2 
    3    Copyright (C) 2010 Alexander Korolkov <alexander.korolkov@gmail.com>
    4    Copyright (C) 2018-2020 Pali Rohár <pali.rohar@gmail.com>
    5 
    6    This program is free software: you can redistribute it and/or modify
    7    it under the terms of the GNU General Public License as published by
    8    the Free Software Foundation, either version 3 of the License, or
    9    (at your option) any later version.
   10 
   11    This program is distributed in the hope that it will be useful,
   12    but WITHOUT ANY WARRANTY; without even the implied warranty of
   13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
   14    GNU General Public License for more details.
   15 
   16    You should have received a copy of the GNU General Public License
   17    along with this program. If not, see <http://www.gnu.org/licenses/>.
   18 
   19    The complete text of the GNU General Public License
   20    can be found in /usr/share/common-licenses/GPL-3 file.
   21 */
   22 
   23 #include "charconv.h"
   24 #include <langinfo.h>
   25 #include <locale.h>
   26 #include <stdio.h>
   27 #include <stdlib.h>
   28 #include <string.h>
   29 #include <errno.h>
   30 #include <wchar.h>
   31 
   32 #ifdef HAVE_ICONV
   33 #include <iconv.h>
   34 #endif
   35 
   36 /* CP850 table for 0x80-0xFF range from:
   37  * http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/PC/CP850.TXT
   38  */
   39 static const wchar_t cp850_table[128] = {
   40     0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7,
   41     0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5,
   42     0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
   43     0x00ff, 0x00d6, 0x00dc, 0x00f8, 0x00a3, 0x00d8, 0x00d7, 0x0192,
   44     0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba,
   45     0x00bf, 0x00ae, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
   46     0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00c1, 0x00c2, 0x00c0,
   47     0x00a9, 0x2563, 0x2551, 0x2557, 0x255d, 0x00a2, 0x00a5, 0x2510,
   48     0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x00e3, 0x00c3,
   49     0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x00a4,
   50     0x00f0, 0x00d0, 0x00ca, 0x00cb, 0x00c8, 0x0131, 0x00cd, 0x00ce,
   51     0x00cf, 0x2518, 0x250c, 0x2588, 0x2584, 0x00a6, 0x00cc, 0x2580,
   52     0x00d3, 0x00df, 0x00d4, 0x00d2, 0x00f5, 0x00d5, 0x00b5, 0x00fe,
   53     0x00de, 0x00da, 0x00db, 0x00d9, 0x00fd, 0x00dd, 0x00af, 0x00b4,
   54     0x00ad, 0x00b1, 0x2017, 0x00be, 0x00b6, 0x00a7, 0x00f7, 0x00b8,
   55     0x00b0, 0x00a8, 0x00b7, 0x00b9, 0x00b3, 0x00b2, 0x25a0, 0x00a0,
   56 };
   57 
   58 /* CP850 translit table to 7bit ASCII for 0x80-0xFF range */
   59 static const char *const cp850_translit_table[128] = {
   60     "C",   "u",   "e",  "a",     "a",     "a", "a",   "c",
   61     "e",   "e",   "e",  "i",     "i",     "i", "A",   "A",
   62     "E",   "ae",  "AE", "o",     "o",     "o", "u",   "u",
   63     "y",   "O",   "U",  "o",     "GBP",   "O", "x",   "f",
   64     "a",   "i",   "o",  "u",     "n",     "N", "a",   "o",
   65     "?",   "(R)", "!",  " 1/2 ", " 1/4 ", "!", "<<",  ">>",
   66     "?",   "?",   "?",  "|",     "+",     "A", "A",   "A",
   67     "(C)", "?",   "?",  "?",     "?",     "c", "JPY", "+",
   68     "+",   "+",   "+",  "+",     "-",     "+", "a",   "A",
   69     "?",   "?",   "?",  "?",     "?",     "?", "?",   "?",
   70     "d",   "D",   "E",  "E",     "E",     "i", "I",   "I",
   71     "I",   "+",   "+",  "?",     "?",     "|", "I",   "?",
   72     "O",   "ss",  "O",  "O",     "o",     "O", "u",   "th",
   73     "TH",  "U",   "U",  "U",     "y",     "Y", "?",   "'",
   74     "-",   "+-",  "?",  " 3/4 ", "?",     "?", "/",   ",",
   75     "?",   "?",   ".",  "1",     "3",     "2", "?",   " ",
   76 };
   77 
   78 static int wchar_string_to_cp850_string(char *out, const wchar_t *in, unsigned int out_size)
   79 {
   80     unsigned i, j;
   81     for (i = 0; i < out_size-1 && in[i]; ++i) {
   82         if (in[i] > 0 && in[i] < 0x80) {
   83             out[i] = in[i];
   84             continue;
   85         }
   86         for (j = 0; j < 0x80; ++j) {
   87             if (in[i] == cp850_table[j]) {
   88                 out[i] = (0x80 | j);
   89                 break;
   90             }
   91         }
   92         if (j == 0x80) {
   93             fprintf(stderr, "Cannot convert input character 0x%04x to 'CP850': %s\n", (unsigned int)in[i], strerror(EILSEQ));
   94             return 0;
   95         }
   96     }
   97     if (in[i]) {
   98         fprintf(stderr, "Cannot convert input string to 'CP850': String is too long\n");
   99         return 0;
  100     }
  101     out[i] = 0;
  102     return 1;
  103 }
  104 
  105 static int cp850_string_to_wchar_string(wchar_t *out, const char *in, unsigned int out_size)
  106 {
  107     unsigned i;
  108     for (i = 0; i < out_size-1 && i < 11 && in[i]; ++i) {
  109         out[i] = (in[i] & 0x80) ? cp850_table[in[i] & 0x7F] : in[i];
  110     }
  111     if (i < 11 && in[i]) {
  112         fprintf(stderr, "Cannot convert input string to 'CP850': String is too long\n");
  113         return 0;
  114     }
  115     out[i] = L'\0';
  116     return 1;
  117 }
  118 
  119 static int cp850_char_to_printable(char **p, unsigned char c, unsigned int out_size)
  120 {
  121     size_t ret;
  122     wchar_t wcs[2];
  123     wcs[0] = (c & 0x80) ? cp850_table[c & 0x7F] : c;
  124     wcs[1] = 0;
  125     ret = wcstombs(*p, wcs, out_size);
  126     if (ret == 0)
  127         return 0;
  128     if (ret != (size_t)-1)
  129         *p += ret;
  130     else if (!(c & 0x80))
  131         *(*p++) = c;
  132     else {
  133         ret = strlen(cp850_translit_table[c & 0x7F]);
  134         if (ret > out_size)
  135             return 0;
  136         memcpy(*p, cp850_translit_table[c & 0x7F], ret);
  137         *p += ret;
  138     }
  139     return 1;
  140 }
  141 
  142 static int local_string_to_cp850_string(char *out, const char *in, unsigned int out_size)
  143 {
  144     int ret;
  145     wchar_t *wcs;
  146     if (strlen(in) >= out_size) {
  147         fprintf(stderr, "Cannot convert input string '%s' to 'CP850': String is too long\n", in);
  148         return 0;
  149     }
  150     wcs = calloc(out_size, sizeof(wchar_t));
  151     if (!wcs) {
  152         fprintf(stderr, "Cannot convert input string '%s' to 'CP850': %s\n", in, strerror(ENOMEM));
  153         return 0;
  154     }
  155     if (mbstowcs(wcs, in, out_size) == (size_t)-1) {
  156         fprintf(stderr, "Cannot convert input string '%s' to 'CP850': %s\n", in, strerror(errno));
  157         free(wcs);
  158         return 0;
  159     }
  160     ret = wchar_string_to_cp850_string(out, wcs, out_size);
  161     free(wcs);
  162     return ret;
  163 }
  164 
  165 #ifdef HAVE_ICONV
  166 
  167 static int iconv_init_codepage(int codepage, const char *local, iconv_t *to_local, iconv_t *from_local)
  168 {
  169     char codepage_name[32];
  170     snprintf(codepage_name, sizeof(codepage_name), "CP%d//TRANSLIT", codepage);
  171     *to_local = iconv_open(local, codepage_name);
  172     if (*to_local == (iconv_t) - 1) {
  173         snprintf(codepage_name, sizeof(codepage_name), "CP%d", codepage);
  174         *to_local = iconv_open(local, codepage_name);
  175     }
  176     if (*to_local == (iconv_t) - 1)
  177         fprintf(stderr, "Cannot initialize conversion from codepage %d to %s: %s\n", codepage, local, strerror(errno));
  178     snprintf(codepage_name, sizeof(codepage_name), "CP%d", codepage);
  179     *from_local = iconv_open(codepage_name, local);
  180     if (*from_local == (iconv_t) - 1)
  181         fprintf(stderr, "Cannot initialize conversion from %s to codepage %d: %s\n", local, codepage, strerror(errno));
  182     return (*to_local != (iconv_t)-1 && *from_local != (iconv_t)-1) ? 1 : 0;
  183 }
  184 
  185 static iconv_t dos_to_local;
  186 static iconv_t local_to_dos;
  187 static iconv_t dos_to_wchar;
  188 static iconv_t wchar_to_dos;
  189 static int used_codepage;
  190 static int internal_cp850;
  191 
  192 /*
  193  * Initialize conversion from codepage.
  194  * codepage = -1 means default codepage.
  195  * Returns non-zero on success, 0 on failure
  196  */
  197 static int init_conversion(int codepage)
  198 {
  199     static int initialized = -1;
  200     if (initialized < 0) {
  201     initialized = 1;
  202     if (codepage < 0)
  203         codepage = DEFAULT_DOS_CODEPAGE;
  204     setlocale(LC_CTYPE, "");    /* initialize locale for CODESET */
  205     if (!iconv_init_codepage(codepage, nl_langinfo(CODESET), &dos_to_local, &local_to_dos))
  206         initialized = 0;
  207     if (initialized && !iconv_init_codepage(codepage, "WCHAR_T", &dos_to_wchar, &wchar_to_dos))
  208         initialized = 0;
  209     if (!initialized && codepage == 850) {
  210         fprintf(stderr, "Using internal CP850 conversion table\n");
  211         internal_cp850 = 1; /* use internal CP850 conversion table */
  212         initialized = 1;
  213     }
  214     if (initialized)
  215         used_codepage = codepage;
  216     }
  217     return initialized;
  218 }
  219 
  220 int set_dos_codepage(int codepage)
  221 {
  222     return init_conversion(codepage);
  223 }
  224 
  225 int dos_char_to_printable(char **p, unsigned char c, unsigned int out_size)
  226 {
  227     char in[1] = { c };
  228     ICONV_CONST char *pin = in;
  229     size_t bytes_in = 1;
  230     size_t bytes_out = out_size;
  231     if (!init_conversion(-1))
  232     return 0;
  233     if (internal_cp850)
  234         return cp850_char_to_printable(p, c, out_size);
  235     return iconv(dos_to_local, &pin, &bytes_in, p, &bytes_out) != (size_t)-1;
  236 }
  237 
  238 int local_string_to_dos_string(char *out, char *in, unsigned int out_size)
  239 {
  240     ICONV_CONST char *pin = in;
  241     char *pout = out;
  242     size_t bytes_in = strlen(in);
  243     size_t bytes_out = out_size-1;
  244     size_t ret;
  245     if (!init_conversion(-1))
  246         return 0;
  247     if (internal_cp850)
  248         return local_string_to_cp850_string(out, in, out_size);
  249     ret = iconv(local_to_dos, &pin, &bytes_in, &pout, &bytes_out);
  250     if (ret == (size_t)-1) {
  251         if (errno == E2BIG)
  252             fprintf(stderr, "Cannot convert input string '%s' to 'CP%d': String is too long\n",
  253                     in, used_codepage);
  254         else
  255             fprintf(stderr, "Cannot convert input sequence '\\x%.02hhX' from codeset '%s' to 'CP%d': %s\n",
  256                     *pin, nl_langinfo(CODESET), used_codepage, strerror(errno));
  257         iconv(local_to_dos, NULL, NULL, &pout, &bytes_out);
  258         return 0;
  259     } else {
  260         ret = iconv(local_to_dos, NULL, NULL, &pout, &bytes_out);
  261         if (ret == (size_t)-1) {
  262             fprintf(stderr, "Cannot convert input string '%s' to 'CP%d': String is too long\n",
  263                     in, used_codepage);
  264             return 0;
  265         }
  266     }
  267     out[out_size-1-bytes_out] = 0;
  268     return 1;
  269 }
  270 
  271 int dos_string_to_wchar_string(wchar_t *out, char *in, unsigned int out_size)
  272 {
  273     ICONV_CONST char *pin = in;
  274     char *pout = (char *)out;
  275     size_t bytes_in = strnlen(in, 11);
  276     size_t bytes_out = out_size-sizeof(wchar_t);
  277     size_t ret;
  278     if (!init_conversion(-1))
  279         return 0;
  280     if (internal_cp850)
  281         return cp850_string_to_wchar_string(out, in, out_size);
  282     ret = iconv(dos_to_wchar, &pin, &bytes_in, &pout, &bytes_out);
  283     if (ret == (size_t)-1) {
  284         if (errno == E2BIG)
  285             fprintf(stderr, "Cannot convert input string from 'CP%d': String is too long\n",
  286                     used_codepage);
  287         else
  288             fprintf(stderr, "Cannot convert input sequence '\\x%.02hhX' from 'CP%d': %s\n",
  289                     *pin, used_codepage, strerror(errno));
  290         iconv(dos_to_wchar, NULL, NULL, &pout, &bytes_out);
  291         return 0;
  292     } else {
  293         ret = iconv(dos_to_wchar, NULL, NULL, &pout, &bytes_out);
  294         if (ret == (size_t)-1) {
  295             fprintf(stderr, "Cannot convert input string from 'CP%d': String is too long\n",
  296                     used_codepage);
  297             return 0;
  298         }
  299     }
  300     out[(out_size-sizeof(wchar_t)-bytes_out)/sizeof(wchar_t)] = L'\0';
  301     return 1;
  302 }
  303 
  304 int wchar_string_to_dos_string(char *out, wchar_t *in, unsigned int out_size)
  305 {
  306     ICONV_CONST char *pin = (char *)in;
  307     char *pout = out;
  308     size_t bytes_in = wcslen(in)*sizeof(wchar_t);
  309     size_t bytes_out = out_size-1;
  310     size_t ret;
  311     if (!init_conversion(-1))
  312         return 0;
  313     if (internal_cp850)
  314         return wchar_string_to_cp850_string(out, in, out_size);
  315     ret = iconv(wchar_to_dos, &pin, &bytes_in, &pout, &bytes_out);
  316     if (ret == (size_t)-1) {
  317         if (errno == E2BIG)
  318             fprintf(stderr, "Cannot convert input string '%ls' to 'CP%d': String is too long\n",
  319                     in, used_codepage);
  320         else
  321             fprintf(stderr, "Cannot convert input character '%lc' to 'CP%d': %s\n",
  322                     (wint_t)*(wchar_t *)pin, used_codepage, strerror(errno));
  323         iconv(wchar_to_dos, NULL, NULL, &pout, &bytes_out);
  324         return 0;
  325     } else {
  326         ret = iconv(wchar_to_dos, NULL, NULL, &pout, &bytes_out);
  327         if (ret == (size_t)-1) {
  328             fprintf(stderr, "Cannot convert input string '%ls' to 'CP%d': String is too long\n",
  329                     in, used_codepage);
  330             return 0;
  331         }
  332     }
  333     out[out_size-1-bytes_out] = 0;
  334     return 1;
  335 }
  336 
  337 #else
  338 
  339 int set_dos_codepage(int codepage)
  340 {
  341     static int initialized = -1;
  342     if (initialized < 0) {
  343         setlocale(LC_CTYPE, ""); /* initialize locale for wide character functions */
  344         if (codepage < 0)
  345             codepage = DEFAULT_DOS_CODEPAGE;
  346         initialized = (codepage == 850) ? 1 : 0;
  347         if (!initialized)
  348             fprintf(stderr, "Cannot initialize unsupported codepage %d, only codepage 850 is supported\n", codepage);
  349     }
  350     return initialized;
  351 }
  352 
  353 int dos_char_to_printable(char **p, unsigned char c, unsigned int out_size)
  354 {
  355     return cp850_char_to_printable(p, c, out_size);
  356 }
  357 
  358 int local_string_to_dos_string(char *out, char *in, unsigned int out_size)
  359 {
  360     return local_string_to_cp850_string(out, in, out_size);
  361 }
  362 
  363 int dos_string_to_wchar_string(wchar_t *out, char *in, unsigned int out_size)
  364 {
  365     return cp850_string_to_wchar_string(out, in, out_size);
  366 }
  367 
  368 int wchar_string_to_dos_string(char *out, wchar_t *in, unsigned int out_size)
  369 {
  370     return wchar_string_to_cp850_string(out, in, out_size);
  371 }
  372 
  373 #endif