"Fossies" - the Fresh Open Source Software Archive

Member "recode-3.7.12/src/iconv.c" (17 Feb 2022, 8745 Bytes) of package /linux/misc/recode-3.7.12.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "iconv.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 3.7.11_vs_3.7.12.

    1 /* Conversion of files between different charsets and surfaces.
    2    Copyright © 1999-2022 Free Software Foundation, Inc.
    3    Contributed by François Pinard <pinard@iro.umontreal.ca>, 1999,
    4    and Bruno Haible <haible@clisp.cons.org>, 2000.
    5 
    6    This library is free software; you can redistribute it and/or
    7    modify it under the terms of the GNU Lesser General Public License
    8    as published by the Free Software Foundation; either version 3 of the
    9    License, or (at your option) any later version.
   10 
   11    This library is distributed in the hope that it will be
   12    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
   13    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   14    Lesser General Public License for more details.
   15 
   16    You should have received a copy of the GNU Lesser General Public
   17    License along with the Recode Library; see the file `COPYING.LIB'.
   18    If not, see <https://www.gnu.org/licenses/>.
   19 */
   20 
   21 #include "config.h"
   22 #include "common.h"
   23 #include "decsteps.h"
   24 #include <iconv.h>
   25 #include "iconvdecl.h"
   26 
   27 /*--------------------------------------.
   28 | Use `iconv' to handle a double step.  |
   29 `--------------------------------------*/
   30 
   31 static void
   32 do_iconv (RECODE_OUTER outer,
   33           iconv_t conversion,
   34           char **input, size_t *input_left,
   35           char **output, size_t *output_left,
   36           int *saved_errno)
   37 {
   38   size_t converted = iconv (conversion, input, input_left, output, output_left);
   39   if (converted == (size_t) -1 && !(errno == EILSEQ && outer->force))
   40     *saved_errno = errno;
   41 }
   42 
   43 #define BUFFER_SIZE 2048
   44 
   45 static bool
   46 wrapped_transform (iconv_t conversion, RECODE_SUBTASK subtask)
   47 {
   48   char output_buffer[BUFFER_SIZE];
   49   char input_buffer[BUFFER_SIZE];
   50   int input_char = get_byte (subtask);
   51   char *cursor = input_buffer;
   52   bool drain_first = false;
   53 
   54   while (true)
   55     {
   56       /* The output buffer is fully available at this point.  */
   57 
   58       char *input = input_buffer;
   59       char *output = output_buffer;
   60       size_t input_left = 0;
   61       size_t output_left = BUFFER_SIZE;
   62       int saved_errno = 0;
   63 
   64       if (drain_first)
   65         {
   66           /* Drain all accumulated partial state and emit output
   67              to return to the initial shift state.  */
   68           do_iconv (subtask->task->request->outer,
   69                     conversion,
   70                     NULL, NULL,
   71                     &output, &output_left,
   72                     &saved_errno);
   73         }
   74 
   75       if (saved_errno == 0)
   76         {
   77           /* Continue filling the input buffer.  */
   78           while (input_char != EOF && cursor < input_buffer + BUFFER_SIZE)
   79             {
   80               *cursor++ = input_char;
   81               input_char = get_byte (subtask);
   82             }
   83 
   84           if (cursor == input_buffer)
   85             {
   86               if (output == output_buffer)
   87                 {
   88                   /* All work has been done, just make sure we drained.  */
   89                   if (drain_first)
   90                     break;
   91                   drain_first = true;
   92                   continue;
   93                 }
   94             }
   95           else
   96             {
   97               /* Convert accumulated input and add it to the output buffer.  */
   98               input = input_buffer;
   99               input_left = cursor - input_buffer;
  100               do_iconv (subtask->task->request->outer,
  101                         conversion,
  102                         &input, &input_left,
  103                         &output, &output_left,
  104                         &saved_errno);
  105             }
  106         }
  107 
  108       /* Send the converted result, so freeing the output buffer.  */
  109       for (cursor = output_buffer; cursor < output; cursor++)
  110         put_byte (*cursor, subtask);
  111 
  112       /* Act according to the outcome of the iconv call.  */
  113 
  114       drain_first = false;
  115       if (saved_errno != 0 && saved_errno != E2BIG)
  116     {
  117       if (saved_errno == EILSEQ)
  118         {
  119           /* Check whether the input was really just untranslatable.  */
  120               enum recode_error recode_error = RECODE_INVALID_INPUT;
  121           RECODE_CONST_STEP step = subtask->step;
  122           iconv_t check_conversion = iconv_open (step->before->iconv_name,
  123                              step->before->iconv_name);
  124 
  125           /* On error, give up and assume input is invalid.  */
  126           if (input_left > 0 && check_conversion != (iconv_t) -1)
  127         {
  128                   /* Assume iconv does not modify its input.  */
  129           char *check_input = input;
  130           size_t check_input_left = input_left;
  131                   size_t check_output_left = input_left;
  132           char *check_output_buffer, *check_output;
  133                   RECODE_OUTER outer = subtask->task->request->outer;
  134 
  135                   if ((check_output = ALLOC (check_output_buffer, input_left, char)) != NULL)
  136                     {
  137                       size_t check_converted = iconv (check_conversion,
  138                                                       &check_input, &check_input_left,
  139                                                       &check_output, &check_output_left);
  140 
  141                       if (check_converted != (size_t) -1)
  142                         recode_error = RECODE_UNTRANSLATABLE;
  143 
  144                       free (check_output_buffer);
  145                     }
  146 
  147                   iconv_close (check_conversion);
  148         }
  149 
  150           /* Invalid or untranslatable input.  */
  151           RETURN_IF_NOGO (recode_error, subtask);
  152         }
  153       else if (saved_errno == EINVAL)
  154         {
  155           if (input + input_left < input_buffer + BUFFER_SIZE
  156           && input_char == EOF)
  157         /* Incomplete multibyte sequence at end of input.  */
  158         RETURN_IF_NOGO (RECODE_INVALID_INPUT, subtask);
  159         }
  160       else
  161         {
  162           recode_perror (subtask->task->request->outer, "iconv ()");
  163           RETURN_IF_NOGO (RECODE_SYSTEM_ERROR, subtask);
  164         }
  165     }
  166 
  167       /* Move back any unprocessed part of the input buffer.  */
  168       for (cursor = input_buffer; input_left != 0; input_left--)
  169         *cursor++ = *input++;
  170     }
  171 
  172   SUBTASK_RETURN (subtask);
  173 }
  174 
  175 static bool
  176 ends_with (const char *s, size_t s_len, const char *suff, size_t suff_len)
  177 {
  178   return suff_len <= s_len && !memcmp (s + s_len - suff_len, suff, suff_len);
  179 }
  180 
  181 static char *
  182 iconv_fix_options (RECODE_OUTER outer, const char *charset)
  183 {
  184   size_t charset_len = strlen (charset);
  185   bool translit = false;
  186 
  187   if (ends_with (charset, charset_len, "-translit", strlen ("-translit")))
  188     {
  189       translit = true;
  190       charset_len -= strlen ("-translit");
  191     }
  192 
  193   char *result;
  194   if (asprintf (&result, "%.*s%s%s", (int) charset_len, charset,
  195                 translit ? "//TRANSLIT" : "",
  196                 outer->strict_mapping ? "//IGNORE": "")
  197       == -1)
  198     return NULL;
  199   return result;
  200 }
  201 
  202 bool
  203 transform_with_iconv (RECODE_SUBTASK subtask)
  204 {
  205   RECODE_OUTER outer = subtask->task->request->outer;
  206   RECODE_CONST_STEP step = subtask->step;
  207   char *tocode = iconv_fix_options (outer, step->after->iconv_name);
  208   const char *fromcode = step->before->iconv_name;
  209   iconv_t conversion = (iconv_t) -1;
  210 
  211   if (tocode)
  212     conversion = iconv_open (tocode, fromcode);
  213   if (conversion == (iconv_t) -1)
  214     {
  215       recode_if_nogo (RECODE_SYSTEM_ERROR, subtask);
  216       free (tocode);
  217       SUBTASK_RETURN (subtask);
  218     }
  219 
  220   bool status = wrapped_transform (conversion, subtask);
  221   iconv_close (conversion);
  222   free (tocode);
  223   return status;
  224 }
  225 
  226 /*------------------------------------------------------.
  227 | Declare all character sets which `iconv' may handle.  |
  228 `------------------------------------------------------*/
  229 
  230 bool
  231 module_iconv (RECODE_OUTER outer)
  232 {
  233   const char **cursor;
  234 
  235   for (cursor = iconv_name_list; *cursor; cursor++)
  236     {
  237       const char **aliases = cursor;
  238       const char *charset_name = *cursor;
  239 
  240       /* Scan aliases for some charset which would already be known.  If any,
  241      use its official name as a charset.  Else, use the first alias.  */
  242 
  243       while (*cursor)
  244     {
  245           RECODE_ALIAS alias
  246             = find_alias (outer, *cursor, ALIAS_FIND_AS_CHARSET);
  247 
  248       if (alias)
  249         {
  250           charset_name = alias->symbol->name;
  251           break;
  252         }
  253       cursor++;
  254     }
  255 
  256       if (!declare_iconv (outer, charset_name, *aliases))
  257     return false;
  258 
  259       /* Declare all aliases, given they bring something we do not already
  260      know.  Even then, we still declare too many useless aliases, as the
  261      disambiguating tables are not recomputed as we go.  FIXME!  */
  262 
  263       for (cursor = aliases; *cursor; cursor++)
  264     {
  265       RECODE_ALIAS alias
  266         = find_alias (outer, *cursor, ALIAS_FIND_AS_CHARSET);
  267 
  268       /* If there is a charset contradiction, call declare_alias
  269          nevertheless, as the error processing will occur there.  */
  270       if (!alias || alias->symbol->name != charset_name)
  271         if (!declare_alias (outer, *cursor, charset_name))
  272           return false;
  273     }
  274     }
  275 
  276   return true;
  277 }
  278 
  279 void
  280 delmodule_iconv (_GL_UNUSED RECODE_OUTER outer)
  281 {
  282 }