"Fossies" - the Fresh Open Source Software Archive

Member "utrac-0.3.2/src/ut_charset.c" (4 Jan 2009, 21110 Bytes) of package /linux/privat/old/utrac-0.3.2.tgz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "ut_charset.c" see the Fossies "Dox" file reference documentation.

    1 /***************************************************************************
    2  *            ut_charset.c
    3  *
    4  *  Fri Apr 23 15:24:30 2004
    5  *  Copyright  2004  Alliance MCA
    6  *  Written by : Antoine Calando (antoine@alliancemca.net)
    7  ****************************************************************************/
    8 /*
    9  *  This program is free software; you can redistribute it and/or modify
   10  *  it under the terms of the GNU General Public License as published by
   11  *  the Free Software Foundation; either version 2 of the License, or
   12  *  (at your option) any later version.
   13  *
   14  *  This program is distributed in the hope that it will be useful,
   15  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   16  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   17  *  GNU Library General Public License for more details.
   18  *
   19  *  You should have received a copy of the GNU General Public License
   20  *  along with this program; if not, write to the Free Software
   21  *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
   22  */
   23  
   24  
   25 /*!\file
   26  * \brief Functions which parse the charset.dat file.
   27  *
   28  * \author Antoine Calando (antoine@alliancemca.net)
   29  *
   30  * \todo EC les fonction inline be fonctionne qu'avec gcc !! il faudrait mettre
   31  *       une macro UT_INLINE dans un header.
   32  *       AC ???
   33  */
   34 
   35 #define _UT_CHARSET_C_
   36 
   37 #include <stdlib.h>
   38 #include <stdio.h>
   39 #include <endian.h>
   40 #include <byteswap.h>
   41 #define __USE_GNU   //for strndup
   42 #include <string.h>
   43 
   44 #include "utrac.h"
   45 #include "ut_charset.h"
   46 
   47 //#undef UT_DEBUG
   48 //#define UT_DEBUG 3
   49 #include "debug.h"
   50 
   51 // ***************************************************************************************
   52 // const char * charmaps_filename = "/home/antoine/dev/libimport/charmaps_categ.txt";
   53 
   54 //! \brief  Keywords used in file charset.dat. 
   55 const static char * charmap_keyword = "Charmap:";
   56 const static char * alias_keyword = "Alias:";
   57 const static char * common_name_keyword = "CommonName:";
   58 const static char * comment_keyword = "Comment:";
   59 const static char * language_keyword = "Language:";
   60 const static char * system_keyword = "System:";
   61 const static char * language_def_keyword = "DefineLanguage:";
   62 const static char * system_def_keyword = "DefineSystem:";
   63 
   64 //! \brief Alphabet names that can be recognized in file charset.dat
   65 const static char * SCRIPT_NAME[] = { "LATIN", "CYRILLIC", "ARABIC", "GREEK", "HEBREW", "THAI", NULL};
   66 
   67 // ***************************************************************************************
   68 /*!
   69  * \brief test si le caractère ASCII (sur un octet) est un espace ou une tabulation
   70  * \note EC pourquoi ne pas utiliser isblank à la place de is_blank ? (a cause du char ?)
   71  * \note AC parce que isblank est spécifique à la glibc! (réponse trouvé à posteriori :)
   72  */
   73 static inline bool is_blank (char c) { return (c==' ' || c=='\t'); }
   74 
   75 /*!
   76  * \brief test si le caractère ASCII (sur un octet) est une fin de ligne
   77  * \note EC On a réservé des octets de fin lors du malloc pour charger le fichier texte, ces
   78  *       octets devaient contenir la dernière fin de ligne si elle n'est pas présente et
   79  *       le caractère 0 final (une ligne devrait toujours ce finir par la combinaison de fin
   80  *       de ligne. De plus, un \r n'est pas forcemment une fin de ligne, cela peut être
   81  *       un saut de ligne dans un champ... Dans quels cas est utilsée cette fonction ? Ne
   82  *       faudrait-il pas plusieurs fonctions ?
   83  * \note AC Ok pour la première remarque (un '\n' a été rajouté à la fin du fichier dans
   84  *       ut_load_file() et le test sur 0 a été retiré ici). Sinon le '\r' est forcement
   85  *       une fin de ligne car il s'agit ici du fichier contenant les infos charsets
   86  *       qui peut au pire être au format CR/LF/CRLF (selon système) mais rien d'autre.
   87  * 
   88  */
   89 static inline bool is_eol (char c)   { return (c=='\n' || c=='\r' /*|| c=='\0'*/); }
   90 
   91 /*!
   92  * \brief test si le caractère ASCII (sur un octet) est une fin de ligne ou un début de commentaire
   93  * \note EC pourquoi ne pas utiliser is_eol ?
   94  * \note AC pour le test sur '#' (un appel, même inline, embrouillerait le source) 
   95  */
   96 static inline bool is_eol_c (char c) { return (c=='#' || c=='\n' || c=='\r' /*|| c=='\0'*/); }
   97 
   98 /*!
   99  * \brief Link used to store temporarily UtCharset structures in a linked list
  100  * \note EC Une définition de structure doit se trouver dans un header
  101  * \note AC Elle n'est nécessaire qu'ici, ça permet d'alléger les .h
  102  */
  103 typedef struct UtCharmapLink {
  104     UtCharset * charset;
  105     struct UtCharmapLink * next;
  106 } UtCharmapLink;
  107 
  108 /**************************************************************************/
  109 /*!
  110  * \brief Expend size of an UtLangSys dynamic array
  111  *
  112  * \bug EC Il n'y a pas de test du retour de realloc !
  113  *       La fonction pouvant planter (realloc), elle doit renvoyer un code d'erreur.
  114  *      AC corrigé.
  115  */
  116 static UtCode expend_lang_sys (UtLangSys *lang_sys) {
  117 
  118     lang_sys->n_max += UT_LANG_SYS_ALLOC_STEP;  
  119     lang_sys->name = (char**)  realloc (lang_sys->name, lang_sys->n_max*sizeof(char*));
  120     //lang_sys->code = (ushort*) realloc (lang_sys->code, lang_sys->n_max*sizeof(ushort));
  121     lang_sys->code = (char*) realloc (lang_sys->code, lang_sys->n_max*2);
  122     //lang_sys->code[0] = 0; lang_sys->code[1] = 0;
  123     
  124     if (!lang_sys->name || !lang_sys->code) return UT_MALLOC_ERROR;
  125     //if (!lang_sys->name   ) return UT_MALLOC_ERROR;
  126     else return UT_OK;
  127         
  128     DBG3 ("Lang/sys dynamic array (at %p) expended to %d elements", lang_sys, lang_sys->n_max)
  129 }
  130 
  131 /**************************************************************************/
  132 /*!
  133  * \brief Copy a string from file buffer.
  134  *
  135  * \bug  EC Dans le cas ou la premiere ligne d'un buffer lu en mémoire ne contienne pas de données
  136  *       pertinantes (ligne vide, ligne d'espace, etc.), la fin de ligne serait touvée, puis
  137  *       lors de la boucle de recherche du dernier caractère espace ou tabulation, on remonterait
  138  *       à *(buffer-1) ce qui entrainerait une segmentation fault (lors de la commande ou lors
  139  *       du free).
  140  *       AC La fonction n'est appelé que sur une ligne débutant par une commande, donc de la mémoire
  141  *       lisible.
  142  * \bug  EC le retour de strndup() n'est pas testé ! 
  143  *       AC pfff...  pour un dizaine d'octet max... c'est de la diptèrophilie...
  144  */
  145 static UtCode parse_string_line (char** scan_in, char ** dst) {
  146 
  147     char *scan = *scan_in;
  148 
  149     while (is_blank(*scan)) scan++; //trim space before language name
  150 
  151     char * name_beg = scan;
  152     while (!is_eol_c(*scan)) scan++; //find eol or comment
  153     do scan--; while (is_blank(*scan)); //go back until first nonblank char
  154     
  155     if (scan-name_beg<0) return UT_STRING_MISSING_ERROR;
  156     
  157     *dst = strndup (name_beg, scan-name_beg+1);
  158 
  159     *scan_in = scan;
  160     return UT_OK;
  161 }
  162 
  163 /**************************************************************************/
  164 /*!
  165  * \brief Parse parameter of a "DefineLanguage" or "DefineSystem" line.
  166  */
  167 static UtCode parse_lang_sys_def_line (char** scan_in, UtLangSys * lang_sys) {
  168 
  169     char *scan = *scan_in;
  170     
  171     if (ut_session->nb_charsets) return UT_LANG_SYS_DEF_AFTER_CHARSET_ERROR;
  172     if (lang_sys->n == lang_sys->n_max) {
  173         UT_TRY( expend_lang_sys (lang_sys) )
  174     }
  175 
  176     //printf (scan);
  177     while (is_blank(*scan)) 
  178         scan++; //trim space before language id
  179     
  180     if (is_eol_c(*scan)) return     UT_LANG_SYS_CODE_MISSING_ERROR;
  181         
  182     if (is_blank(*(scan+1)) || is_eol_c(*(scan+1))) return UT_PARTIAL_LANG_SYS_CODE_ERROR;
  183     
  184     //lang_sys->code [lang_sys->n] = *(((ushort*)(scan)))++;
  185     //#if BYTE_ORDER == LITTLE_ENDIAN
  186     //bswap_16(lang_sys->code [lang_sys->n]);
  187     //#endif
  188     lang_sys->code [lang_sys->n*2+0] = *scan++;
  189     lang_sys->code [lang_sys->n*2+1] = *scan++;
  190     
  191     //check if language exists
  192     int i; for (i=0; i<lang_sys->n; i++)
  193         if (lang_sys->code [i*2+0] == lang_sys->code [lang_sys->n*2+0] && 
  194             lang_sys->code [i*2+1] == lang_sys->code [lang_sys->n*2+1]) 
  195             return UT_LANG_SYS_ALREADY_DEFINED_ERROR;
  196 
  197     UtCode rcode = parse_string_line (&scan, &lang_sys->name[lang_sys->n]);
  198     if (rcode!=UT_OK) return rcode;
  199     
  200     lang_sys->n++;
  201     
  202     DBG("Lang/sys (%p) added : %s (%c%c) at pos %d", 
  203             lang_sys, lang_sys->name [lang_sys->n],
  204             lang_sys->code [lang_sys->n*2+0],
  205             lang_sys->code [lang_sys->n*2+1], lang_sys->n-1)
  206     
  207     *scan_in = scan;
  208     return UT_OK;
  209 }
  210 
  211 
  212 /**************************************************************************/
  213 /*!
  214  * \brief Parse parameter of a "Charmap"line.
  215  */
  216 static UtCode parse_charmap_line (char** scan_in, UtCharmapLink ** current_link) {
  217     
  218     char* scan = *scan_in;
  219     UtCharmapLink * old_link = *current_link;
  220     
  221     UtCharset * new_charset = (UtCharset*) malloc (sizeof(UtCharset));
  222     if (!new_charset) return UT_MALLOC_ERROR;
  223     new_charset->name = NULL;
  224     new_charset->alias = NULL;
  225     new_charset->common_name = NULL;
  226     new_charset->comment = NULL;
  227     new_charset->type = UT_CST_UNSET;
  228     new_charset->language = (u_char*) malloc (ut_session->language.n*(sizeof(u_char)));
  229     new_charset->system = (u_char*) malloc (ut_session->system.n*(sizeof(u_char)));
  230     new_charset->unicode = NULL;
  231     new_charset->char_type = NULL;
  232     
  233     int i; 
  234     for (i=0; i<ut_session->language.n; i++) new_charset->language[i] = 0;
  235     for (i=0; i<ut_session->system.n; i++) new_charset->system[i] = 0;
  236 
  237     UtCode rcode = parse_string_line (&scan, &new_charset->name);
  238     if (rcode!=UT_OK) return rcode;
  239 
  240     i = 0; while (UT_CHARSET_NAME[i]) {
  241         if (strcmp (UT_CHARSET_NAME[i], new_charset->name)==0) break;
  242         i++;
  243     }
  244     new_charset->type = (UtCharsetType) i;
  245     
  246     UtCharmapLink * new_link;
  247     if (old_link->charset ) {
  248         new_link = (UtCharmapLink*) calloc (1, sizeof(UtCharmapLink));
  249         old_link->next = new_link; 
  250     } else {
  251         new_link = old_link;
  252     }
  253     new_link->charset = new_charset;
  254     new_link->next = NULL;
  255     ut_session->nb_charsets++;
  256 
  257     DBG3 (" - Charset %s added! - ", new_charset->name)
  258     *current_link = new_link;
  259     *scan_in = scan;
  260     return UT_OK;
  261 }
  262 
  263 /**************************************************************************/
  264 /*!
  265  * \brief Parse parameter of a "Language" or "System" line.
  266  */
  267 static UtCode parse_lang_sys_line (char** scan_in, UtLangSys * lang_sys, char * lang_sys_coef) {
  268     char *scan = *scan_in;
  269     
  270     u_char language_id, coef_id;
  271 
  272     for(;;) {
  273         while (is_blank(*scan)) scan++;
  274         if (is_eol_c(*scan)) break;
  275         //ushort lang_sys_code = *(ushort*)scan;
  276         #if BYTE_ORDER == LITTLE_ENDIAN
  277         bswap_16 (*(ushort*)scan);
  278         #endif
  279         
  280         for (language_id=0; language_id<lang_sys->n; language_id++) {
  281             //if ( *(ushort*)scan == lang_sys->code[language_id]) break;
  282             if (    *scan == lang_sys->code[language_id*2+0] &&
  283                 *(scan+1) == lang_sys->code[language_id*2+1]) break;
  284         }
  285         
  286         if (language_id==lang_sys->n) return UT_LANG_SYS_UNDEFINED_ERROR;
  287             
  288         scan+=2;
  289         if (*scan==':') {
  290             char * beg = ++scan;
  291             coef_id = strtoul (beg, &scan, 0);
  292             if (beg==scan) return UT_LANG_SYS_COEF_MISSING_ERROR;
  293             if (!is_blank(*scan) && !is_eol_c(*scan)) return UT_LANG_SYS_INCORRECT_COEF_ERROR;
  294             if (coef_id>UT_COEF_MAX) return UT_LANG_SYS_COEF_TOO_BIG_ERROR;
  295         } else coef_id = 1;
  296 
  297         lang_sys_coef[language_id] = coef_id;
  298     } // for(;;)
  299 
  300     *scan_in = scan-1;
  301     return UT_OK;
  302 }
  303 
  304 /**************************************************************************/
  305 /*!
  306  * \brief Parse charmap entry.
  307  */
  308 static UtCode parse_charmap_entry (char** scan_in, UtCharset * charset) {
  309     
  310     if (charset->type!=UT_CST_ASCII && charset->type!=UT_CST_ASCII_EXTENSION)
  311         return UT_CHARMAP_ENTRY_ILLEGAL_ERROR;
  312     
  313     char* scan = *scan_in;
  314     char * hex_beg = scan;
  315     
  316     ulong character = strtoul (hex_beg, &scan, 16);
  317     if (hex_beg==scan) return UT_INCORRECT_CHARMAP_ENTRY_ERROR;    //useless?
  318 
  319     if (character >= 0x80 && charset->type!=UT_CST_ASCII_EXTENSION)
  320         return UT_CHARMAP_ENTRY_ILLEGAL_ERROR;
  321 
  322     hex_beg = scan;
  323     ulong unicode = strtoul (hex_beg, &scan, 16);
  324     if (hex_beg==scan) unicode = UT_UNICODE_NONCHAR;   //some unicode entries are empty!
  325     if (character>0xFF)     return UT_CHAR_TOO_BIG_ERROR;
  326     if (unicode > 0xFFFF) return UT_UNICODE_CHAR_TOO_BIG_ERROR;
  327         
  328     if (!charset->unicode && !charset->char_type) {
  329         charset->unicode = (ushort*) malloc (sizeof( ushort[0x100]));
  330         charset->char_type = (UtCharType*) malloc (sizeof( UtCharType[0x100]));
  331         if (!charset->unicode || !charset->char_type) 
  332             return UT_MALLOC_ERROR;
  333         int i; for (i=0; i<0x100; i++) {
  334             charset->unicode[i] = UT_UNICODE_NONCHAR;
  335             charset->char_type[i].categorie = UT_CTG_UNSET;
  336             charset->char_type[i].script = 0;
  337         }
  338     }
  339     
  340     charset->unicode[(u_char)character] = (ushort) unicode;
  341     
  342     while (is_blank(*scan)) scan++;
  343     
  344     if ('A'<=*scan && *scan <= 'Z') {
  345         
  346         if (character==0||character==0x9||character==0xA||character==0xD||character==0x20)
  347             charset->char_type[(u_char) character].categorie = UT_CTG_DELIMITER;
  348         else 
  349           #if BYTE_ORDER==BIG_ENDIAN
  350           switch (* (ushort*) scan ) {
  351           #else
  352           switch (bswap_16(* (ushort*) scan )) { //}
  353           #endif
  354             case 'Lu': charset->char_type[(u_char) character].categorie = UT_CTG_UPPERCASE; break;
  355             case 'Ll': charset->char_type[(u_char) character].categorie = UT_CTG_LOWERCASE; break;
  356             case 'Lt': 
  357             case 'Lm': 
  358             case 'Lo': charset->char_type[(u_char) character].categorie = UT_CTG_OTHER_LETTER; break;
  359 
  360             case 'Mn': charset->char_type[(u_char) character].categorie = UT_CTG_MARK; break;
  361             case 'Mc': 
  362             case 'Me': charset->char_type[(u_char) character].categorie = UT_CTG_OTHER; break;
  363 
  364             case 'Nd': 
  365             case 'Nl': 
  366             case 'No': charset->char_type[(u_char) character].categorie = UT_CTG_NUMBER; break;
  367 
  368             case 'Pc': 
  369             case 'Pd': 
  370             case 'Po': charset->char_type[(u_char) character].categorie = UT_CTG_PONCTUATION; break;
  371             case 'Ps': charset->char_type[(u_char) character].categorie = UT_CTG_PONCT_INIT_OTHER ; break;
  372             case 'Pe': charset->char_type[(u_char) character].categorie = UT_CTG_PONCT_FINAL_OTHER ; break;
  373             case 'Pi':
  374                 switch (unicode) {
  375                     case 0x00AB: charset->char_type[(u_char) character].categorie = UT_CTG_PONCT_INIT_0 ; break;
  376                     case 0x2018: charset->char_type[(u_char) character].categorie = UT_CTG_PONCT_INIT_1 ; break;
  377                     case 0x201C: charset->char_type[(u_char) character].categorie = UT_CTG_PONCT_INIT_2 ; break;
  378                     case 0x2039: charset->char_type[(u_char) character].categorie = UT_CTG_PONCT_INIT_3 ; break;
  379                     default: charset->char_type[(u_char) character].categorie = UT_CTG_PONCT_INIT_OTHER ; break;
  380                 } break;
  381         
  382             case 'Pf': 
  383                 switch (unicode) {
  384                     case 0x00BB: charset->char_type[(u_char) character].categorie = UT_CTG_PONCT_FINAL_0 ; break;
  385                     case 0x2019: charset->char_type[(u_char) character].categorie = UT_CTG_PONCT_FINAL_1 ; break;
  386                     case 0x201D: charset->char_type[(u_char) character].categorie = UT_CTG_PONCT_FINAL_2 ; break;
  387                     case 0x203A: charset->char_type[(u_char) character].categorie = UT_CTG_PONCT_FINAL_3 ; break;
  388                     default: charset->char_type[(u_char) character].categorie = UT_CTG_PONCT_FINAL_OTHER ; break;
  389                 } break;
  390 
  391             case 'Sc': charset->char_type[(u_char) character].categorie = UT_CTG_CURRENCY; break;
  392             case 'Sm': 
  393             case 'Sk': 
  394             case 'So': charset->char_type[(u_char) character].categorie = UT_CTG_SYMBOL; break;
  395 
  396             case 'Zs': charset->char_type[(u_char) character].categorie = UT_CTG_DELIMITER; break;
  397             case 'Zl': 
  398             case 'Zp': charset->char_type[(u_char) character].categorie = UT_CTG_OTHER; break;
  399 
  400             case 'Cc': charset->char_type[(u_char) character].categorie = UT_CTG_CONTROL; break;
  401             case 'Cf': 
  402             case 'Cs': 
  403             case 'Co': 
  404             case 'Cn': charset->char_type[(u_char) character].categorie = UT_CTG_OTHER; break;
  405             default: return UT_UNDEFINED_CATEGORY_ERROR;
  406         }
  407         scan +=2;
  408         while (is_blank(*scan)) scan++;
  409     }
  410     
  411     //look for an script type in the comment (latin, arabic, hebrew...)
  412     if (*scan == '#') {                         //is there a comment?
  413         const char ** script = SCRIPT_NAME;
  414         int index_script = 0;
  415         char * first_eol, *first_script;
  416         first_eol = strchr (scan, '\n');        //find the eol and replace it by \0
  417         if (first_eol) *first_eol=0;            //in order to use strstr
  418         while (*script) {
  419             index_script++;
  420             first_script = strstr (scan, *script);      //locate substring
  421             if (first_script && first_script < first_eol) {
  422                 charset->char_type [(u_char) character].script = (char) index_script;//found
  423                 if (first_eol) scan = first_eol; //speed up the parsing
  424                 break;
  425             } 
  426             script++;
  427         }
  428         if (first_eol) *first_eol='\n';         //replace the 0 by the initial eol
  429     }
  430     //while (*scan!='\n') scan++;
  431     *scan_in = scan-1;
  432     return UT_OK;
  433 }
  434 
  435 //! \brief Compare a null-ended string with a non-null-ended string.
  436 static bool streq (const char * src, char **cmp) {
  437     char *cmp_scan = *cmp;
  438     while (*src) {
  439         if (*src!=*cmp_scan || !*cmp_scan) return false;
  440         src++; cmp_scan++;
  441     }
  442     *cmp = cmp_scan;
  443     return true;
  444 }
  445 
  446 
  447 UtCode ut_print_charsets () {
  448     
  449     int i; for (i=0; i < ut_session->nb_charsets; i++) {
  450         printf ("%2d: %20s %2d [", i, ut_session->charset[i].name, ut_session->charset[i].type);
  451         int j; for (j=0; j<ut_session->language.n_max;j++) printf("%d ",(int)ut_session->charset[i].language[j]);
  452         printf("] [");  
  453         for (j=0; j<ut_session->system.n_max;j++) printf("%d ",(int)ut_session->charset[i].system[j]);
  454         printf("]\n");
  455     }
  456     
  457     
  458 }
  459 
  460 
  461 /*****************************************************************************/
  462 /*!
  463  * \brief Loads and parses file charset.dat.
  464  *
  465  * This function loads and parses file charset.dat containing all informations about
  466  * charset in a UtCharset array in UtSession::charset.
  467  *
  468  * \return UT_CODE on success, error code otherwise
  469  *
  470  * \todo documentation of the charset.dat file
  471  */
  472 UtCode ut_load_charsets () {
  473     
  474     DBG3 ("Loading charsets...")
  475 
  476     int i;
  477     char * file_buffer;
  478     int rcode;
  479     const char * filename;
  480     {
  481         #ifdef UT_CHARMAPS_FILENAME
  482         filename = UT_CHARMAPS_FILENAME;
  483         rcode = ut_load_charset_file (filename, &file_buffer);
  484     }
  485     if (rcode!=UT_OK) {
  486         #endif
  487         filename = UT_CHARMAPS_FILENAME2;
  488         rcode = ut_load_charset_file (filename, &file_buffer);
  489     }
  490 
  491     if (rcode!=UT_OK) return rcode;
  492     
  493     char * scan = file_buffer;
  494     int line = 1;
  495     
  496     //each new charmap is added to a linked list
  497     UtCharmapLink * charmap_list = (UtCharmapLink*) calloc (1, sizeof(UtCharmapLink));
  498     UtCharmapLink * current_link = charmap_list;
  499     
  500     //parse file 
  501     while (*scan) {
  502         if (*scan=='\r') {
  503             if (*(scan+1)=='\n') scan++;
  504             line++;
  505         } else if (*scan=='\n') {
  506             line++;
  507         } else if (!is_blank(*scan)) {
  508             if (*scan=='#') {
  509                 while (!is_eol(*++scan));
  510                 scan--;
  511             } else if (*scan=='0' && *(scan+1)=='x') {
  512                 rcode = parse_charmap_entry(&scan, current_link->charset);
  513 
  514             } else if ( streq (charmap_keyword, &scan) ) {
  515                 rcode = parse_charmap_line(&scan, &current_link);
  516             } else if ( streq (alias_keyword, &scan) ) {
  517                 rcode = parse_string_line(&scan, &current_link->charset->alias);
  518             } else if ( streq (common_name_keyword, &scan) ) {
  519                 rcode = parse_string_line(&scan, &current_link->charset->common_name);
  520             } else if ( streq (comment_keyword, &scan) ) {
  521                 rcode = parse_string_line(&scan, &current_link->charset->comment);
  522 
  523             } else if ( streq (language_keyword, &scan) ) {
  524                 rcode = parse_lang_sys_line(&scan, &ut_session->language, current_link->charset->language);
  525             } else if ( streq (system_keyword, &scan) ) {
  526                 rcode = parse_lang_sys_line(&scan, &ut_session->system, current_link->charset->system);
  527 
  528             } else if ( streq (language_def_keyword, &scan) ) {
  529                 rcode = parse_lang_sys_def_line(&scan, &ut_session->language);
  530             } else if ( streq (system_def_keyword, &scan) ) {
  531                 rcode = parse_lang_sys_def_line(&scan, &ut_session->system);
  532             } else {
  533                 //error
  534                 //rcode = utSYNTAX_ERROR;
  535                 if (!ut_session->error_string) ut_session->error_string = (char*) malloc (UT_ERROR_STRING_SIZE);
  536                 snprintf (ut_session->error_string, UT_ERROR_STRING_SIZE,
  537                         "syntax error in %s at line %d:\n%s", filename, line, scan);
  538                 return UT_SYNTAX_ERROR;
  539             }
  540             if (rcode!=UT_OK) {
  541                 if (!ut_session->error_string) ut_session->error_string = (char*) malloc (UT_ERROR_STRING_SIZE);
  542                 snprintf (ut_session->error_string, UT_ERROR_STRING_SIZE,
  543                         "error %d in %s at line %d", rcode, filename, line);
  544                 //malloc'ed blocs (file_buffer & links) not free'ed
  545                 return UT_CHARSET_FILE_ERROR;
  546             }
  547         } //else
  548         scan++;
  549     } //while
  550     
  551     //put pointers from charmap linked list in an array
  552     ut_session->charset = (UtCharset*) calloc (ut_session->nb_charsets, sizeof (UtCharset));
  553     i=0;
  554     current_link = charmap_list;
  555     while (current_link) {
  556         ut_session->charset[i].name         = current_link->charset->name;
  557         ut_session->charset[i].alias        = current_link->charset->alias;
  558         ut_session->charset[i].common_name  = current_link->charset->common_name;
  559         ut_session->charset[i].comment      = current_link->charset->comment;
  560         ut_session->charset[i].type         = current_link->charset->type;
  561         ut_session->charset[i].unicode      = current_link->charset->unicode;
  562         ut_session->charset[i].char_type    = current_link->charset->char_type;
  563         ut_session->charset[i].language     = current_link->charset->language;
  564         ut_session->charset[i].system       = current_link->charset->system;
  565         charmap_list = current_link->next;
  566         free(current_link->charset);
  567         free(current_link);
  568         current_link = charmap_list;
  569         i++;
  570     }
  571     free (file_buffer);
  572     
  573     DBG2 ("Charset file %s processed!", filename)
  574     //ut_print_charsets () ;
  575     return UT_OK;
  576     
  577 }