"Fossies" - the Fresh Open Source Software Archive

Member "recode-3.7.12/src/outer.c" (15 Feb 2022, 16438 Bytes) of package /linux/misc/recode-3.7.12.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "outer.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 3.7.11_vs_3.7.12.

    1 /* Conversion of files between different charsets and surfaces.
    2    Copyright © 1990,92,93,94,96,97,98,99,00 Free Software Foundation, Inc.
    3    Contributed by François Pinard <pinard@iro.umontreal.ca>, 1990.
    4 
    5    This library is free software; you can redistribute it and/or
    6    modify it under the terms of the GNU Lesser General Public License
    7    as published by the Free Software Foundation; either version 3 of the
    8    License, or (at your option) any later version.
    9 
   10    This library is distributed in the hope that it will be
   11    useful, but WITHOUT ANY WARRANTY; without even the implied warranty
   12    of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   13    Lesser General Public License for more details.
   14 
   15    You should have received a copy of the GNU Lesser General Public
   16    License along with the Recode Library; see the file `COPYING.LIB'.
   17    If not, see <https://www.gnu.org/licenses/>.
   18 */
   19 
   20 #include "config.h"
   21 #include "common.h"
   22 #include "hash.h"
   23 
   24 /*-----------------------------------------------------------------------.
   25 | This dummy fallback routine is used to flag the intent of a reversible |
   26 | coding as a fallback, which is the traditional Recode behaviour.       |
   27 `-----------------------------------------------------------------------*/
   28 
   29 _GL_ATTRIBUTE_CONST bool
   30 reversibility (_GL_UNUSED RECODE_SUBTASK subtask, _GL_UNUSED unsigned code)
   31 {
   32   return false;
   33 }
   34 
   35 /*-------------------------------------------------------------------------.
   36 | Allocate and initialize a new single step, save for the before and after |
   37 | charsets and quality.                            |
   38 `-------------------------------------------------------------------------*/
   39 
   40 static RECODE_SINGLE
   41 new_single_step (RECODE_OUTER outer)
   42 {
   43   RECODE_SINGLE single;
   44 
   45   if (!ALLOC (single, 1, struct recode_single))
   46     return NULL;
   47   single->next = outer->single_list;
   48   outer->single_list = single;
   49   outer->number_of_singles++;
   50 
   51   single->initial_step_table = NULL;
   52   single->init_routine = NULL;
   53   single->transform_routine = NULL;
   54   single->fallback_routine = reversibility;
   55 
   56   return single;
   57 }
   58 
   59 /*-------------------------------------------------------------------------.
   60 | Create and initialize a new single step for recoding between BEFORE_NAME |
   61 | and AFTER_NAME.  Give it a recoding QUALITY, also saving an INIT_ROUTINE |
   62 | and a TRANSFORM_ROUTINE functions.                                       |
   63 `-------------------------------------------------------------------------*/
   64 
   65 RECODE_SINGLE
   66 declare_single (RECODE_OUTER outer,
   67         const char *before_name, const char *after_name,
   68         struct recode_quality quality,
   69         Recode_init init_routine, Recode_transform transform_routine)
   70 {
   71   RECODE_SINGLE single = new_single_step (outer);
   72   RECODE_ALIAS before = NULL, after = NULL;
   73 
   74   if (!single)
   75     return NULL;
   76 
   77   if (strcmp (before_name, "data") == 0)
   78     {
   79       single->before = outer->data_symbol;
   80       after = find_alias (outer, after_name, SYMBOL_CREATE_DATA_SURFACE);
   81       single->after = after->symbol;
   82     }
   83   else if (strcmp(after_name, "data") == 0)
   84     {
   85       before = find_alias (outer, before_name, SYMBOL_CREATE_DATA_SURFACE);
   86       single->before = before->symbol;
   87       single->after = outer->data_symbol;
   88     }
   89   else
   90     {
   91       before = find_alias (outer, before_name, SYMBOL_CREATE_CHARSET);
   92       single->before = before->symbol;
   93       after = find_alias (outer, after_name, SYMBOL_CREATE_CHARSET);
   94       single->after = after->symbol;
   95     }
   96 
   97   if (!single->before || !single->after)
   98     {
   99       if (before)
  100         delete_alias (before);
  101       if (after)
  102         delete_alias (after);
  103       outer->single_list = single->next;
  104       free (single);
  105       return NULL;
  106     }
  107 
  108   single->quality = quality;
  109   single->init_routine = init_routine;
  110   single->transform_routine = transform_routine;
  111 
  112   if (single->before == outer->data_symbol)
  113     {
  114       if (single->after->resurfacer)
  115     recode_error (outer, _("Resurfacer set more than once for `%s'"),
  116               after_name);
  117       single->after->resurfacer = single;
  118     }
  119   else if (single->after == outer->data_symbol)
  120     {
  121       if (single->before->unsurfacer)
  122     recode_error (outer, _("Unsurfacer set more than once for `%s'"),
  123               before_name);
  124       single->before->unsurfacer = single;
  125     }
  126 
  127   return single;
  128 }
  129 
  130 /*---------------------------------------------------------------.
  131 | Declare a charset available through `iconv', given the NAME of |
  132 | this charset (which might already exist as an alias), and the  |
  133 | ICONV_NAME to use when calling `iconv'.  Make two single steps |
  134 | in and out of it.                                              |
  135 `---------------------------------------------------------------*/
  136 
  137 static bool
  138 internal_iconv (RECODE_SUBTASK subtask)
  139 {
  140   recode_if_nogo (RECODE_USER_ERROR, subtask);
  141   SUBTASK_RETURN (subtask);
  142 }
  143 
  144 bool
  145 declare_iconv (RECODE_OUTER outer, const char *name, const char *iconv_name)
  146 {
  147   RECODE_ALIAS alias;
  148   RECODE_SINGLE single;
  149 
  150   if (alias = find_alias (outer, name, ALIAS_FIND_AS_EITHER),
  151       !alias)
  152     if (alias = find_alias (outer, name, SYMBOL_CREATE_CHARSET),
  153     !alias)
  154       return false;
  155   assert(alias->symbol->type == RECODE_CHARSET);
  156 
  157   if (!alias->symbol->iconv_name)
  158     alias->symbol->iconv_name = iconv_name;
  159 
  160   if (single = new_single_step (outer), !single)
  161     return false;
  162   single->before = alias->symbol;
  163   single->after = outer->iconv_pivot;
  164   single->quality = outer->quality_variable_to_variable;
  165   single->init_routine = NULL;
  166   single->transform_routine = internal_iconv;
  167 
  168   if (single = new_single_step (outer), !single)
  169     return false;
  170   single->before = outer->iconv_pivot;
  171   single->after = alias->symbol;
  172   single->quality = outer->quality_variable_to_variable;
  173   single->init_routine = NULL;
  174   single->transform_routine = internal_iconv;
  175 
  176   return true;
  177 }
  178 
  179 /*--------------------------------------------------------------------------.
  180 | Associate an explode format DATA structure with charset NAME_COMBINED, an |
  181 | 8-bit charset.  A NULL value for NAME_EXPLODED implies UCS-2.  Otherwise, |
  182 | NAME_EXPLODED should be the name of a 8-bit based charset.                |
  183 `--------------------------------------------------------------------------*/
  184 
  185 bool
  186 declare_explode_data (RECODE_OUTER outer, const unsigned short *data,
  187               const char *name_combined, const char *name_exploded)
  188 {
  189   RECODE_ALIAS alias;
  190   RECODE_SYMBOL charset_combined;
  191   RECODE_SYMBOL charset_exploded;
  192   RECODE_SINGLE single;
  193 
  194   if (alias = find_alias (outer, name_combined, SYMBOL_CREATE_CHARSET),
  195       !alias)
  196     return false;
  197 
  198   charset_combined = alias->symbol;
  199   assert(charset_combined->type == RECODE_CHARSET);
  200 
  201   if (name_exploded)
  202     {
  203       if (alias = find_alias (outer, name_exploded, SYMBOL_CREATE_CHARSET),
  204       !alias)
  205     return false;
  206 
  207       charset_exploded = alias->symbol;
  208       assert(charset_exploded->type == RECODE_CHARSET);
  209     }
  210   else
  211     {
  212       charset_combined->data_type = RECODE_EXPLODE_DATA;
  213       charset_combined->data = (void *) data;
  214       charset_exploded = outer->ucs2_charset;
  215     }
  216 
  217   single = new_single_step (outer);
  218   if (!single)
  219     return false;
  220 
  221   single->before = charset_combined;
  222   single->after = charset_exploded;
  223   single->quality = outer->quality_byte_to_variable;
  224   single->initial_step_table = (void *) data;
  225   single->init_routine = init_explode;
  226   single->transform_routine
  227     = name_exploded ? explode_byte_byte : explode_byte_ucs2;
  228 
  229   single = new_single_step (outer);
  230   if (!single)
  231     return false;
  232 
  233   single->before = charset_exploded;
  234   single->after = charset_combined;
  235   single->quality = outer->quality_variable_to_byte;
  236   single->initial_step_table = (void *) data;
  237   single->init_routine = init_combine;
  238   single->transform_routine
  239     = name_exploded ? combine_byte_byte : combine_ucs2_byte;
  240 
  241   return true;
  242 }
  243 
  244 /*-------------------------------------------------------------------.
  245 | Associate an UCS-2 strip format DATA structure with charset NAME.  |
  246 `-------------------------------------------------------------------*/
  247 
  248 bool
  249 declare_strip_data (RECODE_OUTER outer, struct strip_data *data,
  250             const char *name)
  251 {
  252   RECODE_ALIAS alias;
  253   RECODE_SYMBOL charset;
  254   RECODE_SINGLE single;
  255 
  256   if (alias = find_alias (outer, name, SYMBOL_CREATE_CHARSET), !alias)
  257     return false;
  258 
  259   charset = alias->symbol;
  260   assert(charset->type == RECODE_CHARSET);
  261   charset->data_type = RECODE_STRIP_DATA;
  262   charset->data = data;
  263 
  264   single = new_single_step (outer);
  265   if (!single)
  266     return false;
  267 
  268   single->before = charset;
  269   single->after = outer->ucs2_charset;
  270   single->quality = outer->quality_byte_to_ucs2;
  271   single->transform_routine = transform_byte_to_ucs2;
  272 
  273   single = new_single_step (outer);
  274   if (!single)
  275     return false;
  276 
  277   single->before = outer->ucs2_charset;
  278   single->after = charset;
  279   single->quality = outer->quality_ucs2_to_byte;
  280   single->init_routine = init_ucs2_to_byte;
  281   single->transform_routine = transform_ucs2_to_byte;
  282 
  283   return true;
  284 }
  285 
  286 /*---------------------------------------------------------------.
  287 | For a given SINGLE step, roughly establish a conversion cost.  |
  288 `---------------------------------------------------------------*/
  289 
  290 static void
  291 estimate_single_cost (_GL_UNUSED RECODE_OUTER outer, RECODE_SINGLE single)
  292 {
  293   int cost;
  294 
  295   /* Ensure a small average cost for each single step, yet much trying to
  296      avoid single steps prone to loosing information.  */
  297 
  298   cost = single->quality.reversible ? 10 : 200;
  299 
  300   /* Use a few heuristics based on the byte size of both charsets.  */
  301 
  302   switch (single->quality.in_size)
  303     {
  304     case RECODE_1:
  305       /* The fastest is to get one character per read byte.  */
  306       cost += 15;
  307       break;
  308 
  309     case RECODE_2:
  310       /* Reading two requires a routine call and swapping considerations.  */
  311       cost += 25;
  312       break;
  313 
  314     case RECODE_4:
  315       /* Reading four is more work than reading two.  */
  316       cost += 30;
  317       break;
  318 
  319     case RECODE_N:
  320       /* Analysing varysizes is surely much harder than producing them.  */
  321       cost += 60;
  322 
  323     default:
  324       break;
  325     }
  326 
  327   switch (single->quality.out_size)
  328     {
  329     case RECODE_1:
  330       /* Information might be more often lost when not going through UCS.  */
  331       cost += 20;
  332       break;
  333 
  334     case RECODE_2:
  335       /* This is our best bet while writing.  */
  336       cost += 10;
  337       break;
  338 
  339     case RECODE_4:
  340       /* Writing four is more work than writing two.  */
  341       cost += 15;
  342       break;
  343 
  344     case RECODE_N:
  345       /* Writing varysizes requires loops and such.  */
  346       cost += 35;
  347       break;
  348 
  349     default:
  350       break;
  351     }
  352 
  353   /* Consider speed for fine tuning the cost.  */
  354 
  355   if (single->quality.slower)
  356     cost += 3;
  357   else if (single->quality.faster)
  358     cost -= 2;
  359 
  360   /* Write the price on the ticket.  */
  361 
  362   single->conversion_cost = cost;
  363   return;
  364 }
  365 
  366 /*----------------------------------------.
  367 | Initialize all collected single steps.  |
  368 `----------------------------------------*/
  369 
  370 #include "decsteps.h"
  371 bool module_iconv (struct recode_outer *);
  372 void delmodule_iconv (struct recode_outer *);
  373 
  374 static bool
  375 register_all_modules (RECODE_OUTER outer)
  376 {
  377   RECODE_ALIAS alias;
  378   RECODE_SINGLE single;
  379   unsigned counter;
  380   unsigned char *table;
  381 
  382   if (!ALLOC (table, 256, unsigned char))
  383     return false;
  384   for (counter = 0; counter < 256; counter++)
  385     table[counter] = counter;
  386   outer->one_to_same = table;
  387 
  388   prepare_for_aliases (outer);
  389   outer->single_list = NULL;
  390   outer->number_of_singles = 0;
  391 
  392   if (alias = find_alias (outer, "data", SYMBOL_CREATE_CHARSET), !alias)
  393     return false;
  394   outer->data_symbol = alias->symbol;
  395 
  396   if (alias = find_alias (outer, "ISO-10646-UCS-2", SYMBOL_CREATE_CHARSET),
  397       !alias)
  398     return false;
  399   assert(alias->symbol->type == RECODE_CHARSET);
  400   outer->ucs2_charset = alias->symbol;
  401 
  402   if (alias = find_alias (outer, ":iconv:", SYMBOL_CREATE_CHARSET),
  403       !alias)
  404     return false;
  405   assert(alias->symbol->type == RECODE_CHARSET);
  406   outer->iconv_pivot = alias->symbol;
  407   if (!declare_alias (outer, ":", ":iconv:"))
  408     return false;
  409   /* Needed for compatibility with Recode 3.6.  */
  410   if (!declare_alias (outer, ":libiconv:", ":iconv:"))
  411     return false;
  412 
  413   if (alias = find_alias (outer, "CR-LF", SYMBOL_CREATE_CHARSET), !alias)
  414     return false;
  415   alias->symbol->type = RECODE_DATA_SURFACE;
  416   outer->crlf_surface = alias->symbol;
  417 
  418   if (alias = find_alias (outer, "CR", SYMBOL_CREATE_CHARSET), !alias)
  419     return false;
  420   alias->symbol->type = RECODE_DATA_SURFACE;
  421   outer->cr_surface = alias->symbol;
  422 
  423   if (!declare_alias (outer, "ASCII", "ANSI_X3.4-1968"))
  424     return false;
  425   if (!declare_alias (outer, "BS", "ASCII-BS"))
  426     return false;
  427   if (!declare_alias (outer, "Latin-1", "ISO-8859-1"))
  428     return false;
  429 
  430 #include "inisteps.h"
  431 
  432   /* Force this one last: it does not segregate between charsets and aliases,
  433      confusing some other initialisations that would come after it.  */
  434   if (!make_argmatch_arrays (outer))
  435     return false;
  436   if (outer->use_iconv)
  437     if (!module_iconv (outer))
  438       return false;
  439 
  440   for (single = outer->single_list; single; single = single->next)
  441     estimate_single_cost (outer, single);
  442 
  443   return true;
  444 }
  445 
  446 static void
  447 unregister_all_modules (RECODE_OUTER outer)
  448 {
  449 #include "tersteps.h"
  450   if (outer->use_iconv)
  451     delmodule_iconv(outer);
  452 }
  453 
  454 /* Library interface.  */
  455 
  456 /* See the recode manual for a more detailed description of the library
  457    interface.  */
  458 
  459 /*-------------------------.
  460 | GLOBAL level functions.  |
  461 `-------------------------*/
  462 
  463 RECODE_OUTER
  464 recode_new_outer (unsigned flags)
  465 {
  466   RECODE_OUTER outer = (RECODE_OUTER) calloc (1, sizeof (struct recode_outer));
  467 
  468   if (!outer)
  469     {
  470       recode_error (NULL, _("Virtual memory exhausted"));
  471       if (flags & RECODE_AUTO_ABORT_FLAG)
  472     exit (1);
  473       return NULL;
  474     }
  475 
  476   outer->auto_abort = (flags & RECODE_AUTO_ABORT_FLAG) != 0;
  477   outer->use_iconv = (flags & RECODE_NO_ICONV_FLAG) == 0;
  478   outer->strict_mapping = (flags & RECODE_STRICT_MAPPING_FLAG) != 0;
  479   outer->force = (flags & RECODE_FORCE_FLAG) != 0;
  480 
  481   if (!register_all_modules (outer) || !make_argmatch_arrays (outer))
  482     {
  483       recode_delete_outer (outer);
  484       return NULL;
  485     }
  486 
  487   outer->quality_byte_reversible.in_size = RECODE_1;
  488   outer->quality_byte_reversible.out_size = RECODE_1;
  489   outer->quality_byte_reversible.reversible = true;
  490   outer->quality_byte_reversible.faster = true;
  491 
  492   outer->quality_byte_to_byte.in_size = RECODE_1;
  493   outer->quality_byte_to_byte.out_size = RECODE_1;
  494   outer->quality_byte_to_byte.faster = true;
  495 
  496   outer->quality_byte_to_ucs2.in_size = RECODE_1;
  497   outer->quality_byte_to_ucs2.out_size = RECODE_2;
  498 
  499   outer->quality_byte_to_variable.in_size = RECODE_1;
  500   outer->quality_byte_to_variable.out_size = RECODE_N;
  501 
  502   outer->quality_ucs2_to_byte.in_size = RECODE_2;
  503   outer->quality_ucs2_to_byte.out_size = RECODE_1;
  504 
  505   outer->quality_ucs2_to_variable.in_size = RECODE_2;
  506   outer->quality_ucs2_to_variable.out_size = RECODE_N;
  507 
  508   outer->quality_variable_to_byte.in_size = RECODE_N;
  509   outer->quality_variable_to_byte.out_size = RECODE_1;
  510   outer->quality_variable_to_byte.slower = true;
  511 
  512   outer->quality_variable_to_ucs2.in_size = RECODE_N;
  513   outer->quality_variable_to_ucs2.out_size = RECODE_2;
  514   outer->quality_variable_to_ucs2.slower = true;
  515 
  516   outer->quality_variable_to_variable.in_size = RECODE_N;
  517   outer->quality_variable_to_variable.out_size = RECODE_N;
  518   outer->quality_variable_to_variable.slower = true;
  519 
  520   return outer;
  521 }
  522 
  523 bool
  524 recode_delete_outer (RECODE_OUTER outer)
  525 {
  526   unregister_all_modules (outer);
  527   while (outer->number_of_symbols > 0)
  528     {
  529       RECODE_SYMBOL symbol = outer->symbol_list;
  530 
  531       outer->symbol_list = symbol->next;
  532       outer->number_of_symbols--;
  533       free (symbol);
  534     }
  535   while (outer->number_of_singles > 0)
  536     {
  537       RECODE_SINGLE single = outer->single_list;
  538 
  539       outer->single_list = single->next;
  540       outer->number_of_singles--;
  541       free (single);
  542     }
  543   free (outer->pair_restriction);
  544   if (outer->alias_table)
  545     hash_free ((Hash_table *) outer->alias_table);
  546   if (outer->argmatch_charset_array)
  547     {
  548       const char **cursor;
  549 
  550       for (cursor = outer->argmatch_charset_array; *cursor; cursor++)
  551         free ((char *) *cursor);
  552       for (cursor = outer->argmatch_surface_array; *cursor; cursor++)
  553         free ((char *) *cursor);
  554       free (outer->argmatch_charset_array);
  555     }
  556   free ((void *) outer->one_to_same);
  557   free (outer);
  558   return true;
  559 }