"Fossies" - the Fresh Open Source Software Archive 
Member "utrac-0.3.2/src/ut_charset.c" (4 Jan 2009, 21110 Bytes) of package /linux/privat/old/utrac-0.3.2.tgz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "ut_charset.c" see the
Fossies "Dox" file reference documentation.
1 /***************************************************************************
2 * ut_charset.c
3 *
4 * Fri Apr 23 15:24:30 2004
5 * Copyright 2004 Alliance MCA
6 * Written by : Antoine Calando (antoine@alliancemca.net)
7 ****************************************************************************/
8 /*
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU Library General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 */
23
24
25 /*!\file
26 * \brief Functions which parse the charset.dat file.
27 *
28 * \author Antoine Calando (antoine@alliancemca.net)
29 *
30 * \todo EC les fonction inline be fonctionne qu'avec gcc !! il faudrait mettre
31 * une macro UT_INLINE dans un header.
32 * AC ???
33 */
34
35 #define _UT_CHARSET_C_
36
37 #include <stdlib.h>
38 #include <stdio.h>
39 #include <endian.h>
40 #include <byteswap.h>
41 #define __USE_GNU //for strndup
42 #include <string.h>
43
44 #include "utrac.h"
45 #include "ut_charset.h"
46
47 //#undef UT_DEBUG
48 //#define UT_DEBUG 3
49 #include "debug.h"
50
51 // ***************************************************************************************
52 // const char * charmaps_filename = "/home/antoine/dev/libimport/charmaps_categ.txt";
53
54 //! \brief Keywords used in file charset.dat.
55 const static char * charmap_keyword = "Charmap:";
56 const static char * alias_keyword = "Alias:";
57 const static char * common_name_keyword = "CommonName:";
58 const static char * comment_keyword = "Comment:";
59 const static char * language_keyword = "Language:";
60 const static char * system_keyword = "System:";
61 const static char * language_def_keyword = "DefineLanguage:";
62 const static char * system_def_keyword = "DefineSystem:";
63
64 //! \brief Alphabet names that can be recognized in file charset.dat
65 const static char * SCRIPT_NAME[] = { "LATIN", "CYRILLIC", "ARABIC", "GREEK", "HEBREW", "THAI", NULL};
66
67 // ***************************************************************************************
68 /*!
69 * \brief test si le caractère ASCII (sur un octet) est un espace ou une tabulation
70 * \note EC pourquoi ne pas utiliser isblank à la place de is_blank ? (a cause du char ?)
71 * \note AC parce que isblank est spécifique à la glibc! (réponse trouvé à posteriori :)
72 */
73 static inline bool is_blank (char c) { return (c==' ' || c=='\t'); }
74
75 /*!
76 * \brief test si le caractère ASCII (sur un octet) est une fin de ligne
77 * \note EC On a réservé des octets de fin lors du malloc pour charger le fichier texte, ces
78 * octets devaient contenir la dernière fin de ligne si elle n'est pas présente et
79 * le caractère 0 final (une ligne devrait toujours ce finir par la combinaison de fin
80 * de ligne. De plus, un \r n'est pas forcemment une fin de ligne, cela peut être
81 * un saut de ligne dans un champ... Dans quels cas est utilsée cette fonction ? Ne
82 * faudrait-il pas plusieurs fonctions ?
83 * \note AC Ok pour la première remarque (un '\n' a été rajouté à la fin du fichier dans
84 * ut_load_file() et le test sur 0 a été retiré ici). Sinon le '\r' est forcement
85 * une fin de ligne car il s'agit ici du fichier contenant les infos charsets
86 * qui peut au pire être au format CR/LF/CRLF (selon système) mais rien d'autre.
87 *
88 */
89 static inline bool is_eol (char c) { return (c=='\n' || c=='\r' /*|| c=='\0'*/); }
90
91 /*!
92 * \brief test si le caractère ASCII (sur un octet) est une fin de ligne ou un début de commentaire
93 * \note EC pourquoi ne pas utiliser is_eol ?
94 * \note AC pour le test sur '#' (un appel, même inline, embrouillerait le source)
95 */
96 static inline bool is_eol_c (char c) { return (c=='#' || c=='\n' || c=='\r' /*|| c=='\0'*/); }
97
98 /*!
99 * \brief Link used to store temporarily UtCharset structures in a linked list
100 * \note EC Une définition de structure doit se trouver dans un header
101 * \note AC Elle n'est nécessaire qu'ici, ça permet d'alléger les .h
102 */
103 typedef struct UtCharmapLink {
104 UtCharset * charset;
105 struct UtCharmapLink * next;
106 } UtCharmapLink;
107
108 /**************************************************************************/
109 /*!
110 * \brief Expend size of an UtLangSys dynamic array
111 *
112 * \bug EC Il n'y a pas de test du retour de realloc !
113 * La fonction pouvant planter (realloc), elle doit renvoyer un code d'erreur.
114 * AC corrigé.
115 */
116 static UtCode expend_lang_sys (UtLangSys *lang_sys) {
117
118 lang_sys->n_max += UT_LANG_SYS_ALLOC_STEP;
119 lang_sys->name = (char**) realloc (lang_sys->name, lang_sys->n_max*sizeof(char*));
120 //lang_sys->code = (ushort*) realloc (lang_sys->code, lang_sys->n_max*sizeof(ushort));
121 lang_sys->code = (char*) realloc (lang_sys->code, lang_sys->n_max*2);
122 //lang_sys->code[0] = 0; lang_sys->code[1] = 0;
123
124 if (!lang_sys->name || !lang_sys->code) return UT_MALLOC_ERROR;
125 //if (!lang_sys->name ) return UT_MALLOC_ERROR;
126 else return UT_OK;
127
128 DBG3 ("Lang/sys dynamic array (at %p) expended to %d elements", lang_sys, lang_sys->n_max)
129 }
130
131 /**************************************************************************/
132 /*!
133 * \brief Copy a string from file buffer.
134 *
135 * \bug EC Dans le cas ou la premiere ligne d'un buffer lu en mémoire ne contienne pas de données
136 * pertinantes (ligne vide, ligne d'espace, etc.), la fin de ligne serait touvée, puis
137 * lors de la boucle de recherche du dernier caractère espace ou tabulation, on remonterait
138 * à *(buffer-1) ce qui entrainerait une segmentation fault (lors de la commande ou lors
139 * du free).
140 * AC La fonction n'est appelé que sur une ligne débutant par une commande, donc de la mémoire
141 * lisible.
142 * \bug EC le retour de strndup() n'est pas testé !
143 * AC pfff... pour un dizaine d'octet max... c'est de la diptèrophilie...
144 */
145 static UtCode parse_string_line (char** scan_in, char ** dst) {
146
147 char *scan = *scan_in;
148
149 while (is_blank(*scan)) scan++; //trim space before language name
150
151 char * name_beg = scan;
152 while (!is_eol_c(*scan)) scan++; //find eol or comment
153 do scan--; while (is_blank(*scan)); //go back until first nonblank char
154
155 if (scan-name_beg<0) return UT_STRING_MISSING_ERROR;
156
157 *dst = strndup (name_beg, scan-name_beg+1);
158
159 *scan_in = scan;
160 return UT_OK;
161 }
162
163 /**************************************************************************/
164 /*!
165 * \brief Parse parameter of a "DefineLanguage" or "DefineSystem" line.
166 */
167 static UtCode parse_lang_sys_def_line (char** scan_in, UtLangSys * lang_sys) {
168
169 char *scan = *scan_in;
170
171 if (ut_session->nb_charsets) return UT_LANG_SYS_DEF_AFTER_CHARSET_ERROR;
172 if (lang_sys->n == lang_sys->n_max) {
173 UT_TRY( expend_lang_sys (lang_sys) )
174 }
175
176 //printf (scan);
177 while (is_blank(*scan))
178 scan++; //trim space before language id
179
180 if (is_eol_c(*scan)) return UT_LANG_SYS_CODE_MISSING_ERROR;
181
182 if (is_blank(*(scan+1)) || is_eol_c(*(scan+1))) return UT_PARTIAL_LANG_SYS_CODE_ERROR;
183
184 //lang_sys->code [lang_sys->n] = *(((ushort*)(scan)))++;
185 //#if BYTE_ORDER == LITTLE_ENDIAN
186 //bswap_16(lang_sys->code [lang_sys->n]);
187 //#endif
188 lang_sys->code [lang_sys->n*2+0] = *scan++;
189 lang_sys->code [lang_sys->n*2+1] = *scan++;
190
191 //check if language exists
192 int i; for (i=0; i<lang_sys->n; i++)
193 if (lang_sys->code [i*2+0] == lang_sys->code [lang_sys->n*2+0] &&
194 lang_sys->code [i*2+1] == lang_sys->code [lang_sys->n*2+1])
195 return UT_LANG_SYS_ALREADY_DEFINED_ERROR;
196
197 UtCode rcode = parse_string_line (&scan, &lang_sys->name[lang_sys->n]);
198 if (rcode!=UT_OK) return rcode;
199
200 lang_sys->n++;
201
202 DBG("Lang/sys (%p) added : %s (%c%c) at pos %d",
203 lang_sys, lang_sys->name [lang_sys->n],
204 lang_sys->code [lang_sys->n*2+0],
205 lang_sys->code [lang_sys->n*2+1], lang_sys->n-1)
206
207 *scan_in = scan;
208 return UT_OK;
209 }
210
211
212 /**************************************************************************/
213 /*!
214 * \brief Parse parameter of a "Charmap"line.
215 */
216 static UtCode parse_charmap_line (char** scan_in, UtCharmapLink ** current_link) {
217
218 char* scan = *scan_in;
219 UtCharmapLink * old_link = *current_link;
220
221 UtCharset * new_charset = (UtCharset*) malloc (sizeof(UtCharset));
222 if (!new_charset) return UT_MALLOC_ERROR;
223 new_charset->name = NULL;
224 new_charset->alias = NULL;
225 new_charset->common_name = NULL;
226 new_charset->comment = NULL;
227 new_charset->type = UT_CST_UNSET;
228 new_charset->language = (u_char*) malloc (ut_session->language.n*(sizeof(u_char)));
229 new_charset->system = (u_char*) malloc (ut_session->system.n*(sizeof(u_char)));
230 new_charset->unicode = NULL;
231 new_charset->char_type = NULL;
232
233 int i;
234 for (i=0; i<ut_session->language.n; i++) new_charset->language[i] = 0;
235 for (i=0; i<ut_session->system.n; i++) new_charset->system[i] = 0;
236
237 UtCode rcode = parse_string_line (&scan, &new_charset->name);
238 if (rcode!=UT_OK) return rcode;
239
240 i = 0; while (UT_CHARSET_NAME[i]) {
241 if (strcmp (UT_CHARSET_NAME[i], new_charset->name)==0) break;
242 i++;
243 }
244 new_charset->type = (UtCharsetType) i;
245
246 UtCharmapLink * new_link;
247 if (old_link->charset ) {
248 new_link = (UtCharmapLink*) calloc (1, sizeof(UtCharmapLink));
249 old_link->next = new_link;
250 } else {
251 new_link = old_link;
252 }
253 new_link->charset = new_charset;
254 new_link->next = NULL;
255 ut_session->nb_charsets++;
256
257 DBG3 (" - Charset %s added! - ", new_charset->name)
258 *current_link = new_link;
259 *scan_in = scan;
260 return UT_OK;
261 }
262
263 /**************************************************************************/
264 /*!
265 * \brief Parse parameter of a "Language" or "System" line.
266 */
267 static UtCode parse_lang_sys_line (char** scan_in, UtLangSys * lang_sys, char * lang_sys_coef) {
268 char *scan = *scan_in;
269
270 u_char language_id, coef_id;
271
272 for(;;) {
273 while (is_blank(*scan)) scan++;
274 if (is_eol_c(*scan)) break;
275 //ushort lang_sys_code = *(ushort*)scan;
276 #if BYTE_ORDER == LITTLE_ENDIAN
277 bswap_16 (*(ushort*)scan);
278 #endif
279
280 for (language_id=0; language_id<lang_sys->n; language_id++) {
281 //if ( *(ushort*)scan == lang_sys->code[language_id]) break;
282 if ( *scan == lang_sys->code[language_id*2+0] &&
283 *(scan+1) == lang_sys->code[language_id*2+1]) break;
284 }
285
286 if (language_id==lang_sys->n) return UT_LANG_SYS_UNDEFINED_ERROR;
287
288 scan+=2;
289 if (*scan==':') {
290 char * beg = ++scan;
291 coef_id = strtoul (beg, &scan, 0);
292 if (beg==scan) return UT_LANG_SYS_COEF_MISSING_ERROR;
293 if (!is_blank(*scan) && !is_eol_c(*scan)) return UT_LANG_SYS_INCORRECT_COEF_ERROR;
294 if (coef_id>UT_COEF_MAX) return UT_LANG_SYS_COEF_TOO_BIG_ERROR;
295 } else coef_id = 1;
296
297 lang_sys_coef[language_id] = coef_id;
298 } // for(;;)
299
300 *scan_in = scan-1;
301 return UT_OK;
302 }
303
304 /**************************************************************************/
305 /*!
306 * \brief Parse charmap entry.
307 */
308 static UtCode parse_charmap_entry (char** scan_in, UtCharset * charset) {
309
310 if (charset->type!=UT_CST_ASCII && charset->type!=UT_CST_ASCII_EXTENSION)
311 return UT_CHARMAP_ENTRY_ILLEGAL_ERROR;
312
313 char* scan = *scan_in;
314 char * hex_beg = scan;
315
316 ulong character = strtoul (hex_beg, &scan, 16);
317 if (hex_beg==scan) return UT_INCORRECT_CHARMAP_ENTRY_ERROR; //useless?
318
319 if (character >= 0x80 && charset->type!=UT_CST_ASCII_EXTENSION)
320 return UT_CHARMAP_ENTRY_ILLEGAL_ERROR;
321
322 hex_beg = scan;
323 ulong unicode = strtoul (hex_beg, &scan, 16);
324 if (hex_beg==scan) unicode = UT_UNICODE_NONCHAR; //some unicode entries are empty!
325 if (character>0xFF) return UT_CHAR_TOO_BIG_ERROR;
326 if (unicode > 0xFFFF) return UT_UNICODE_CHAR_TOO_BIG_ERROR;
327
328 if (!charset->unicode && !charset->char_type) {
329 charset->unicode = (ushort*) malloc (sizeof( ushort[0x100]));
330 charset->char_type = (UtCharType*) malloc (sizeof( UtCharType[0x100]));
331 if (!charset->unicode || !charset->char_type)
332 return UT_MALLOC_ERROR;
333 int i; for (i=0; i<0x100; i++) {
334 charset->unicode[i] = UT_UNICODE_NONCHAR;
335 charset->char_type[i].categorie = UT_CTG_UNSET;
336 charset->char_type[i].script = 0;
337 }
338 }
339
340 charset->unicode[(u_char)character] = (ushort) unicode;
341
342 while (is_blank(*scan)) scan++;
343
344 if ('A'<=*scan && *scan <= 'Z') {
345
346 if (character==0||character==0x9||character==0xA||character==0xD||character==0x20)
347 charset->char_type[(u_char) character].categorie = UT_CTG_DELIMITER;
348 else
349 #if BYTE_ORDER==BIG_ENDIAN
350 switch (* (ushort*) scan ) {
351 #else
352 switch (bswap_16(* (ushort*) scan )) { //}
353 #endif
354 case 'Lu': charset->char_type[(u_char) character].categorie = UT_CTG_UPPERCASE; break;
355 case 'Ll': charset->char_type[(u_char) character].categorie = UT_CTG_LOWERCASE; break;
356 case 'Lt':
357 case 'Lm':
358 case 'Lo': charset->char_type[(u_char) character].categorie = UT_CTG_OTHER_LETTER; break;
359
360 case 'Mn': charset->char_type[(u_char) character].categorie = UT_CTG_MARK; break;
361 case 'Mc':
362 case 'Me': charset->char_type[(u_char) character].categorie = UT_CTG_OTHER; break;
363
364 case 'Nd':
365 case 'Nl':
366 case 'No': charset->char_type[(u_char) character].categorie = UT_CTG_NUMBER; break;
367
368 case 'Pc':
369 case 'Pd':
370 case 'Po': charset->char_type[(u_char) character].categorie = UT_CTG_PONCTUATION; break;
371 case 'Ps': charset->char_type[(u_char) character].categorie = UT_CTG_PONCT_INIT_OTHER ; break;
372 case 'Pe': charset->char_type[(u_char) character].categorie = UT_CTG_PONCT_FINAL_OTHER ; break;
373 case 'Pi':
374 switch (unicode) {
375 case 0x00AB: charset->char_type[(u_char) character].categorie = UT_CTG_PONCT_INIT_0 ; break;
376 case 0x2018: charset->char_type[(u_char) character].categorie = UT_CTG_PONCT_INIT_1 ; break;
377 case 0x201C: charset->char_type[(u_char) character].categorie = UT_CTG_PONCT_INIT_2 ; break;
378 case 0x2039: charset->char_type[(u_char) character].categorie = UT_CTG_PONCT_INIT_3 ; break;
379 default: charset->char_type[(u_char) character].categorie = UT_CTG_PONCT_INIT_OTHER ; break;
380 } break;
381
382 case 'Pf':
383 switch (unicode) {
384 case 0x00BB: charset->char_type[(u_char) character].categorie = UT_CTG_PONCT_FINAL_0 ; break;
385 case 0x2019: charset->char_type[(u_char) character].categorie = UT_CTG_PONCT_FINAL_1 ; break;
386 case 0x201D: charset->char_type[(u_char) character].categorie = UT_CTG_PONCT_FINAL_2 ; break;
387 case 0x203A: charset->char_type[(u_char) character].categorie = UT_CTG_PONCT_FINAL_3 ; break;
388 default: charset->char_type[(u_char) character].categorie = UT_CTG_PONCT_FINAL_OTHER ; break;
389 } break;
390
391 case 'Sc': charset->char_type[(u_char) character].categorie = UT_CTG_CURRENCY; break;
392 case 'Sm':
393 case 'Sk':
394 case 'So': charset->char_type[(u_char) character].categorie = UT_CTG_SYMBOL; break;
395
396 case 'Zs': charset->char_type[(u_char) character].categorie = UT_CTG_DELIMITER; break;
397 case 'Zl':
398 case 'Zp': charset->char_type[(u_char) character].categorie = UT_CTG_OTHER; break;
399
400 case 'Cc': charset->char_type[(u_char) character].categorie = UT_CTG_CONTROL; break;
401 case 'Cf':
402 case 'Cs':
403 case 'Co':
404 case 'Cn': charset->char_type[(u_char) character].categorie = UT_CTG_OTHER; break;
405 default: return UT_UNDEFINED_CATEGORY_ERROR;
406 }
407 scan +=2;
408 while (is_blank(*scan)) scan++;
409 }
410
411 //look for an script type in the comment (latin, arabic, hebrew...)
412 if (*scan == '#') { //is there a comment?
413 const char ** script = SCRIPT_NAME;
414 int index_script = 0;
415 char * first_eol, *first_script;
416 first_eol = strchr (scan, '\n'); //find the eol and replace it by \0
417 if (first_eol) *first_eol=0; //in order to use strstr
418 while (*script) {
419 index_script++;
420 first_script = strstr (scan, *script); //locate substring
421 if (first_script && first_script < first_eol) {
422 charset->char_type [(u_char) character].script = (char) index_script;//found
423 if (first_eol) scan = first_eol; //speed up the parsing
424 break;
425 }
426 script++;
427 }
428 if (first_eol) *first_eol='\n'; //replace the 0 by the initial eol
429 }
430 //while (*scan!='\n') scan++;
431 *scan_in = scan-1;
432 return UT_OK;
433 }
434
435 //! \brief Compare a null-ended string with a non-null-ended string.
436 static bool streq (const char * src, char **cmp) {
437 char *cmp_scan = *cmp;
438 while (*src) {
439 if (*src!=*cmp_scan || !*cmp_scan) return false;
440 src++; cmp_scan++;
441 }
442 *cmp = cmp_scan;
443 return true;
444 }
445
446
447 UtCode ut_print_charsets () {
448
449 int i; for (i=0; i < ut_session->nb_charsets; i++) {
450 printf ("%2d: %20s %2d [", i, ut_session->charset[i].name, ut_session->charset[i].type);
451 int j; for (j=0; j<ut_session->language.n_max;j++) printf("%d ",(int)ut_session->charset[i].language[j]);
452 printf("] [");
453 for (j=0; j<ut_session->system.n_max;j++) printf("%d ",(int)ut_session->charset[i].system[j]);
454 printf("]\n");
455 }
456
457
458 }
459
460
461 /*****************************************************************************/
462 /*!
463 * \brief Loads and parses file charset.dat.
464 *
465 * This function loads and parses file charset.dat containing all informations about
466 * charset in a UtCharset array in UtSession::charset.
467 *
468 * \return UT_CODE on success, error code otherwise
469 *
470 * \todo documentation of the charset.dat file
471 */
472 UtCode ut_load_charsets () {
473
474 DBG3 ("Loading charsets...")
475
476 int i;
477 char * file_buffer;
478 int rcode;
479 const char * filename;
480 {
481 #ifdef UT_CHARMAPS_FILENAME
482 filename = UT_CHARMAPS_FILENAME;
483 rcode = ut_load_charset_file (filename, &file_buffer);
484 }
485 if (rcode!=UT_OK) {
486 #endif
487 filename = UT_CHARMAPS_FILENAME2;
488 rcode = ut_load_charset_file (filename, &file_buffer);
489 }
490
491 if (rcode!=UT_OK) return rcode;
492
493 char * scan = file_buffer;
494 int line = 1;
495
496 //each new charmap is added to a linked list
497 UtCharmapLink * charmap_list = (UtCharmapLink*) calloc (1, sizeof(UtCharmapLink));
498 UtCharmapLink * current_link = charmap_list;
499
500 //parse file
501 while (*scan) {
502 if (*scan=='\r') {
503 if (*(scan+1)=='\n') scan++;
504 line++;
505 } else if (*scan=='\n') {
506 line++;
507 } else if (!is_blank(*scan)) {
508 if (*scan=='#') {
509 while (!is_eol(*++scan));
510 scan--;
511 } else if (*scan=='0' && *(scan+1)=='x') {
512 rcode = parse_charmap_entry(&scan, current_link->charset);
513
514 } else if ( streq (charmap_keyword, &scan) ) {
515 rcode = parse_charmap_line(&scan, ¤t_link);
516 } else if ( streq (alias_keyword, &scan) ) {
517 rcode = parse_string_line(&scan, ¤t_link->charset->alias);
518 } else if ( streq (common_name_keyword, &scan) ) {
519 rcode = parse_string_line(&scan, ¤t_link->charset->common_name);
520 } else if ( streq (comment_keyword, &scan) ) {
521 rcode = parse_string_line(&scan, ¤t_link->charset->comment);
522
523 } else if ( streq (language_keyword, &scan) ) {
524 rcode = parse_lang_sys_line(&scan, &ut_session->language, current_link->charset->language);
525 } else if ( streq (system_keyword, &scan) ) {
526 rcode = parse_lang_sys_line(&scan, &ut_session->system, current_link->charset->system);
527
528 } else if ( streq (language_def_keyword, &scan) ) {
529 rcode = parse_lang_sys_def_line(&scan, &ut_session->language);
530 } else if ( streq (system_def_keyword, &scan) ) {
531 rcode = parse_lang_sys_def_line(&scan, &ut_session->system);
532 } else {
533 //error
534 //rcode = utSYNTAX_ERROR;
535 if (!ut_session->error_string) ut_session->error_string = (char*) malloc (UT_ERROR_STRING_SIZE);
536 snprintf (ut_session->error_string, UT_ERROR_STRING_SIZE,
537 "syntax error in %s at line %d:\n%s", filename, line, scan);
538 return UT_SYNTAX_ERROR;
539 }
540 if (rcode!=UT_OK) {
541 if (!ut_session->error_string) ut_session->error_string = (char*) malloc (UT_ERROR_STRING_SIZE);
542 snprintf (ut_session->error_string, UT_ERROR_STRING_SIZE,
543 "error %d in %s at line %d", rcode, filename, line);
544 //malloc'ed blocs (file_buffer & links) not free'ed
545 return UT_CHARSET_FILE_ERROR;
546 }
547 } //else
548 scan++;
549 } //while
550
551 //put pointers from charmap linked list in an array
552 ut_session->charset = (UtCharset*) calloc (ut_session->nb_charsets, sizeof (UtCharset));
553 i=0;
554 current_link = charmap_list;
555 while (current_link) {
556 ut_session->charset[i].name = current_link->charset->name;
557 ut_session->charset[i].alias = current_link->charset->alias;
558 ut_session->charset[i].common_name = current_link->charset->common_name;
559 ut_session->charset[i].comment = current_link->charset->comment;
560 ut_session->charset[i].type = current_link->charset->type;
561 ut_session->charset[i].unicode = current_link->charset->unicode;
562 ut_session->charset[i].char_type = current_link->charset->char_type;
563 ut_session->charset[i].language = current_link->charset->language;
564 ut_session->charset[i].system = current_link->charset->system;
565 charmap_list = current_link->next;
566 free(current_link->charset);
567 free(current_link);
568 current_link = charmap_list;
569 i++;
570 }
571 free (file_buffer);
572
573 DBG2 ("Charset file %s processed!", filename)
574 //ut_print_charsets () ;
575 return UT_OK;
576
577 }