"Fossies" - the Fresh Open Source Software Archive

Member "utrac-0.3.2/src/utrac.c" (4 Jan 2009, 19053 Bytes) of package /linux/privat/old/utrac-0.3.2.tgz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "utrac.c" see the Fossies "Dox" file reference documentation.

    1 /***************************************************************************
    2  *            utrac.c
    3  *
    4  *  Tue Oct  5 11:29:59 2004
    5  *  Copyright  2004  Alliance MCA
    6  *  Written by : Antoine Calando (antoine@alliancemca.net)
    7  ****************************************************************************/
    8 
    9 /*
   10  *  This program is free software; you can redistribute it and/or modify
   11  *  it under the terms of the GNU General Public License as published by
   12  *  the Free Software Foundation; either version 2 of the License, or
   13  *  (at your option) any later version.
   14  *
   15  *  This program is distributed in the hope that it will be useful,
   16  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   17  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   18  *  GNU Library General Public License for more details.
   19  *
   20  *  You should have received a copy of the GNU General Public License
   21  *  along with this program; if not, write to the Free Software
   22  *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
   23  */
   24  
   25 /*!
   26  * \file utrac.c
   27  * \author Antoine Calando (antoine@alliancemca.net)
   28  * \brief Public API for using Utrac.
   29  */
   30  
   31 #define _UTRAC_C_
   32 
   33 #include <stdlib.h>
   34 #include <stdio.h>
   35 #include "utrac.h"
   36 
   37 #undef UT_DEBUG
   38 #define UT_DEBUG 3
   39 #include "debug.h"
   40 
   41 
   42 /***************************************************************************/
   43 /*!
   44  * \brief Initialize the Utrac library.
   45  *
   46  * This function must be called before any other Utrac function. It allocates an UtSession struture
   47  * that is accessible by the ut_session pointer, initalizes it, loads charsets data, and sets
   48  * default  language, charset and end of line type. The memory used is about 630kb for 47 charsets
   49  * loaded.
   50  *  
   51  * \note On Unix systems, LANG, LC_ALL and LC_TYPE are parsed to find default language and
   52  * charsets (ISO-8859-1 is used else), and  eol type is set to LF.
   53  *
   54  * \return UT_OK on success, an error code on failure.
   55  */
   56 UtCode ut_init () {
   57     
   58     if (ut_session) return UT_ALREADY_INITIALISED_ERROR;
   59         
   60     ut_session = (UtSession*) malloc (sizeof(UtSession));
   61     if (!ut_session) return UT_MALLOC_ERROR;
   62         
   63     return ut_init_noalloc();
   64 }
   65 
   66 /*!
   67  * \brief Initialize the Utrac library, without allocating memory for UtSession. Used internally
   68  */
   69 UtCode ut_init_noalloc () {
   70     //ut_session->flags = UT_F_UNSET; //flags_in
   71     ut_session->charset = NULL;
   72     ut_session->nb_charsets = 0;
   73     ut_session->language.name = NULL;
   74     ut_session->language.code = NULL;
   75     ut_session->language.n = 0;
   76     ut_session->language.n_max = 0;
   77     ut_session->system.name = NULL;
   78     ut_session->system.code = NULL;
   79     ut_session->system.n = 0;
   80     ut_session->system.n_max = 0;
   81     //ut_session->charset_default = UT_UNSET;
   82     ut_session->eol_default = UT_EOL_UNSET;
   83     ut_session->eol_alt_default = UT_EOL_UNSET;
   84     
   85     ut_session->nomapping_char = '_';
   86     ut_session->progress_function = NULL;
   87     ut_session->error_string = NULL;
   88     //load charsets data
   89     UT_TRY (ut_load_charsets ())
   90 
   91     //find default language, charset, eol type on the system
   92     #ifdef linux
   93     //should we use nl_langinfo()? (discovered later...) ->yes!
   94     int i;
   95     ut_session->language_default = 0; //language_default_in
   96     ut_session->system_default = 3;         //3 (to check in file charsets.dat)
   97     ut_session->eol_default = UT_EOL_LF;
   98     ut_session->eol_alt_default = UT_EOL_LF;
   99     ut_session->charset_default = ut_find_charset("ISO-8859-1");
  100     
  101     char * def_enc = getenv ("LC_CTYPE");
  102     if (!def_enc) def_enc = getenv ("LC_ALL");
  103     if (!def_enc) def_enc = getenv ("LANG");
  104     if (def_enc) {
  105         if (def_enc[2]=='_' || def_enc[2]=='.' || def_enc[2]==0) {
  106             for (i=0; i<ut_session->language.n; i++) {
  107                 if (def_enc[0]-'a'+'A'== ut_session->language.code[i*2+0]
  108                     && def_enc[1]-'a'+'A'== ut_session->language.code[i*2+1] ) {
  109                     ut_session->language_default = i;
  110                     break;
  111                 }
  112             } //for
  113         }
  114         if (def_enc[2]=='.') def_enc +=3;
  115         if (def_enc[2]=='_' && def_enc[5]=='.') def_enc +=6;
  116         for (i=0; i<ut_session->nb_charsets; i++) 
  117             if (ut_str_fuzzy_cmp (def_enc, ut_session->charset[i].name,'@')) break;
  118         if (i!=ut_session->nb_charsets) ut_session->charset_default = i;
  119     }
  120     
  121     if (ut_session->charset_default == UT_UNSET) {
  122         for (i=0; i<ut_session->nb_charsets; i++) 
  123             if (ut_str_fuzzy_cmp (UT_DEFAULT_ENCODING_UNIX, ut_session->charset[i].name,0)) break;
  124         if (i==ut_session->nb_charsets) {
  125             DBG1 ("*** No default charset ***")
  126         }
  127         else ut_session->charset_default = i;
  128     }
  129     #else
  130     ERROR ("pas unix!")
  131     #endif
  132 
  133     #if UT_DEBUG == 2
  134     if (ut_session->language_default != UT_UNSET)
  135         DBG2 ("lang: %s" , ut_session->language.name[ut_session->language_default])
  136     if (ut_session->charset_default != UT_UNSET)
  137         DBG2 ("charset: %s", ut_session->charset[ut_session->charset_default].name)
  138     if (ut_session->eol_default != UT_EOL_UNSET)
  139         DBG2 ("eol: %s", UT_EOL_NAME [ut_session->eol_default])
  140     #endif
  141     
  142     return UT_OK;
  143 }
  144 
  145 /*!
  146  * \brief Free ressources allocated during initialization of Utrac.
  147  *
  148  * This function frees the structure allocated in ut_session
  149  * by ut_init(). It must be the last Utrac function called.
  150  *
  151  * \return UT_OK on success, an error code on failure.
  152  */
  153 
  154 void ut_finish () {
  155     
  156     ut_finish_nofree ();
  157     free(ut_session);
  158     ut_session = NULL;
  159     
  160     return;
  161 }
  162 
  163 /*!
  164  * \brief Free ressources allocated during initialization of Utrac, without freeing UtSession. Used internally.
  165  */
  166 void ut_finish_nofree () {
  167     
  168     if (!ut_session) return;
  169     
  170     int i; for(i=0; i<ut_session->nb_charsets; i++) {
  171         free(ut_session->charset[i].name);
  172         free(ut_session->charset[i].alias);
  173         free(ut_session->charset[i].common_name);
  174         free(ut_session->charset[i].comment);
  175         free(ut_session->charset[i].unicode);
  176         free(ut_session->charset[i].char_type);
  177         free(ut_session->charset[i].language);
  178         free(ut_session->charset[i].system);
  179     }
  180     free (ut_session->charset);
  181     
  182     for (i=0; i<ut_session->language.n; i++) 
  183         free (ut_session->language.name[i]);
  184     free (ut_session->language.name);
  185     free (ut_session->language.code);
  186 
  187     for (i=0; i<ut_session->system.n; i++) 
  188         free (ut_session->system.name[i]);
  189     free (ut_session->system.name);
  190     free (ut_session->system.code);
  191     
  192     free (ut_session->error_string);
  193     return;
  194 };
  195 
  196 
  197 
  198 
  199 /***************************************************************************/
  200 /*!
  201  * \brief Allocates and initalizes an UtText structure.
  202  *
  203  * \return A pointer to the allocated structure, or NULL if the allocation failed.
  204  */
  205 
  206 UtText * ut_init_text_heap () {
  207     ASSERT (ut_session)
  208     UtText* new_text = (UtText*) malloc (sizeof(UtText));
  209     if (!new_text) return NULL;
  210     
  211     ut_init_text (new_text);
  212     
  213     return new_text;
  214 }
  215 
  216 /*!
  217  * \brief Initalizes an UtText structure.
  218  * \param new_text A pointer on the structure to initialize
  219  */
  220 
  221 void ut_init_text (UtText * new_text) {
  222         
  223     new_text->data = NULL;
  224     new_text->size = 0;
  225 
  226     new_text->eol = UT_EOL_UNSET;
  227     new_text->eol_alt = UT_EOL_UNSET;
  228     new_text->charset = UT_UNSET;
  229 
  230     new_text->nb_lines = UT_UNSET;
  231     new_text->nb_lines_alt = UT_UNSET;
  232     new_text->distribution = NULL;
  233     //int i; for (i=0; i<0x100; i++) new_text->distribution [i] = 0;
  234     new_text->ext_char = NULL;
  235     new_text->evaluation = NULL;
  236 
  237     new_text->flags = UT_F_DEFAULT;
  238     new_text->pass_flags = UT_PF_UNSET;
  239     new_text->skip_char = UT_SKIP_CHAR;
  240     
  241     new_text->progress_done = 0.0;
  242     new_text->progress_todo = 0;
  243     new_text->current_pass = UT_PF_UNSET;
  244 
  245     new_text->user = NULL;
  246 }
  247 
  248 /*!
  249  * \brief Free an UtText structure.
  250  * \param text pointer to the structure to free.
  251  */
  252 
  253 void ut_free_text_heap (UtText *text) {
  254     
  255     ut_free_text (text);
  256     free(text); 
  257     
  258 }
  259 
  260 /*!
  261  * \brief Free the contents of an UtText structure, without freeing the structure itself.
  262  * \param text pointer to the structure to free.
  263  */
  264 
  265 void ut_free_text (UtText *text) {
  266     //free(text->filename);
  267     //filename is not freed because it is set by user. 
  268     free(text->data); text->data = NULL;
  269     free(text->distribution); text->distribution = NULL;
  270     while (text->ext_char) {
  271         UtExtCharLine * tmp = text->ext_char;
  272         text->ext_char = text->ext_char->next;
  273         free (tmp);
  274     } text->ext_char = NULL;
  275 
  276     free(text->evaluation); text->evaluation = NULL;
  277     //text->user should be free by the user.
  278 }
  279 
  280 
  281 /*!
  282  * \brief Initialize an UtText structure before using the 'progress bar' callback feature
  283  *
  284  * Can be used internaly or by the user. The UtText must have member UtText::pass_flag set, or
  285  * at least UtText::flags (if UtText::pass_flags is unset, it will be set for just a recognition
  286  * pass and subpasses will be selected upon the value of UtText::flags).
  287 */
  288 
  289 UtCode ut_init_progress (UtText *text) {
  290     
  291     ASSERT (text);
  292     
  293     text->progress_done = 0.0;
  294     text->progress_todo = 0;
  295     if (text->pass_flags == UT_PF_UNSET) text->pass_flags = UT_PF_RECOGNIZE;
  296 
  297     if (text->pass_flags & UT_PF_LOAD ) text->progress_todo++;
  298         
  299     if (text->pass_flags & UT_PF_RECOGNIZE ) {
  300         if ((text->flags & UT_F_IDENTIFY_CHARSET) || (text->pass_flags & UT_PF_CONVERT ) )
  301             text->pass_flags |= UT_PF_DISTRIB_PASS;
  302         else text->pass_flags &= ~UT_PF_DISTRIB_PASS;
  303         if (text->flags & (UT_F_TRANSFORM_EOL | UT_F_REMOVE_ILLEGAL_CHAR | UT_F_ADD_FINAL_EOL | UT_F_IDENTIFY_EOL ) )
  304             text->pass_flags |= UT_PF_EOL_PASS;
  305         else text->pass_flags &= ~UT_PF_EOL_PASS;
  306 
  307         if (text->flags & (UT_F_IDENTIFY_CHARSET | UT_F_REFERENCE_EXT_CHAR ) )
  308             text->pass_flags |= UT_PF_XASCII_PASS;
  309         else text->pass_flags &= ~UT_PF_XASCII_PASS;
  310         
  311         if (text->pass_flags & UT_PF_DISTRIB_PASS) text->progress_todo++;
  312         if (text->pass_flags & UT_PF_EOL_PASS) text->progress_todo++;
  313         if (text->pass_flags & UT_PF_XASCII_PASS) text->progress_todo++;
  314     } else {
  315         text->pass_flags &= ~(UT_PF_DISTRIB_PASS | UT_PF_EOL_PASS | UT_PF_XASCII_PASS);
  316     }
  317         
  318     if (text->pass_flags & UT_PF_CONVERT ) text->progress_todo++;
  319     
  320     return UT_OK;
  321 }
  322 
  323 /*! \brief Load a file in an UtText structure
  324  *
  325  * If filename is null, it will read stdin. text->data and text->size will be set.
  326  * If ut_session->progress_function is set, it will be called during loading and members of
  327  * text dealing with this feature will be updated.
  328  */
  329 
  330 UtCode ut_load (UtText *text, const char * filename) {
  331 
  332     ASSERT (text);
  333 
  334     if (text->pass_flags==UT_PF_UNSET) {
  335         text->pass_flags |= UT_PF_LOAD | UT_PF_RECOGNIZE;
  336         ut_init_progress(text);
  337     }
  338     
  339     if (ut_session->progress_function && text->progress_done == 0.0) ut_update_progress (text, 0, true);
  340 
  341     text->current_pass = UT_PF_LOAD;
  342 
  343     if (filename) {
  344         UT_TRY ( ut_load_file_pass (text, filename) )
  345     } else {
  346         UT_TRY ( ut_load_stdin_pass (text) )
  347     }
  348 
  349     text->current_pass = UT_PF_NONE;
  350     
  351     if (ut_session->progress_function) {
  352         text->progress_done+= (1-text->progress_done)/text->progress_todo;
  353         text->progress_todo--;
  354     }
  355     
  356     //if (ut_session->progress_function && text->progress_done == 0.0) ut_update_progress (text, 0, true);
  357     if (ut_session->progress_function && text->progress_todo == 0) ut_update_progress (text, 0, true);
  358         
  359     return UT_OK;   
  360 }
  361 
  362 
  363 /*! \brief Recognize charset and EOL of a text.
  364  *
  365  * text->data must be set. If text->size is null, recognition will stop at the first
  366  * null character. text->flags must also be set to select processes to do (see UtTextFlags).
  367  * 
  368  * If ut_session->progress_function is set, it will be called during loading and members of
  369  * text dealing with this feature will be updated.
  370  *
  371  * If UT_F_FORCE_BINARY is set, texts with caracters between 0 and 0x19 (space is 0x20, and TAB, CR, 
  372  * LF are excluded of this range) won't produce error.
  373   *
  374  *
  375  * If UT_F_IDENTIFY_EOL is set, text->eol, text->eol_alt, text->nb_lines, text->nb_lines_alt will be
  376  * updated. If convertion of EOL is planned, UT_F_TRANSFORM_EOL must be set.
  377  *
  378  * If UT_F_IDENTIFY_CHARSET is set, text->charset will be updated. text->evaluation also if charset
  379  * is 8bits and ASCII -erivated.
  380 
  381  * text->distribution will always be set, text->ext_char also (but this is a bug!)
  382  *
  383  * If ut_session->progress_function is set, it will be called during loading and members of
  384  * text dealing with this feature will be updated. 
  385 */
  386 
  387 UtCode ut_recognize (UtText *text) {
  388     
  389     if (!text || !text->data) return UT_BAD_PARAMETER_ERROR;
  390 
  391     if (text->pass_flags==UT_PF_UNSET) ut_init_progress(text);
  392 
  393     if (ut_session->progress_function && text->progress_done == 0.0) ut_update_progress (text, 0, true);
  394     
  395     //FIRST PASS
  396     if (text->pass_flags & UT_PF_DISTRIB_PASS) {
  397         text->current_pass = UT_PF_DISTRIB_PASS | UT_PF_RECOGNIZE;
  398         int rcode = ut_distrib_utf_pass (text);
  399         text->current_pass = UT_PF_NONE;
  400 
  401         if (rcode == UT_BINARY_DATA_ERROR) {
  402             if ( !(text->flags & UT_F_FORCE_BINARY)) return rcode;
  403         } else if ( rcode != UT_OK) return rcode;
  404         
  405         if (text->charset != UT_UNSET && text->pass_flags & UT_PF_XASCII_PASS) {
  406             text->pass_flags &= ~UT_PF_XASCII_PASS | UT_PF_RECOGNIZE;
  407             text->progress_todo--;          
  408         }
  409     
  410         if (ut_session->progress_function) {
  411             text->progress_done+= (1-text->progress_done)/text->progress_todo;
  412             text->progress_todo--;
  413         }
  414     }
  415 
  416     // set text->skip_char
  417     if (text->flags & UT_F_REMOVE_ILLEGAL_CHAR ) {
  418         text->skip_char = UT_SKIP_CHAR;
  419     } else {
  420         //if control code accepted in file, try to find one not used
  421         int i; for (i=1; i<0x20; i++) {
  422             if (i==UT_EOL_ALT_CHAR || i== 0x9|| i==0xA || i==0xD) continue; //UT_EOL_CHAR and UT_EOF_CHAR = 0
  423             if (!text->distribution[i]) break;
  424         }
  425         if (i!=0x20) text->skip_char = i; 
  426         else text->skip_char = UT_SKIP_CHAR; //all control code used, nevermind, we use UT_SKIP_CHAR
  427     }
  428 
  429     //ASSERT (text->flags & UT_F_TRANSFORM_EOL)
  430     
  431     //SECOND PASS
  432     if (text->pass_flags & UT_PF_EOL_PASS) {
  433         text->current_pass = UT_PF_EOL_PASS | UT_PF_RECOGNIZE;
  434         UT_TRY ( ut_eol_pass (text) )
  435         text->current_pass = UT_PF_NONE;
  436         if (ut_session->progress_function) {
  437             text->progress_done+= (1-text->progress_done)/text->progress_todo;
  438             text->progress_todo--;
  439         }
  440     }
  441 
  442     //THIRD PASS
  443     if ( text->pass_flags & UT_PF_XASCII_PASS ) {
  444         text->current_pass = UT_PF_XASCII_PASS | UT_PF_RECOGNIZE;
  445         UT_TRY ( ut_xascii_pass (text) )
  446         text->current_pass = UT_PF_NONE;
  447         if (ut_session->progress_function) {
  448             text->progress_done+= (1-text->progress_done)/text->progress_todo;
  449             text->progress_todo--;
  450         }
  451     }
  452 
  453     if (ut_session->progress_function && text->progress_todo == 0) ut_update_progress (text, 0, true);
  454     
  455     return UT_OK;   
  456 }
  457 
  458 
  459 /*!
  460  * \brief Convert a text.
  461  *
  462  * \param src_text source text, with input eol and charset set.
  463  * \param dst_text destination text, with output eol and charset set. If it is null, src_text will be replaced
  464  * by the destination text, and output eol and charset will be selectionned from  ut_session.
  465  *
  466  * If ut_session->progress_function is set, it will be called during loading and members of
  467  * text dealing with this feature will be updated. 
  468  */
  469 
  470 UtCode ut_convert (UtText *src_text, UtText *dst_text) {
  471     
  472     if (!src_text || !src_text->data) return UT_BAD_PARAMETER_ERROR;
  473 
  474     ASSERT (src_text->eol != UT_EOL_UNSET)
  475     ASSERT (src_text->charset != UT_UNSET)
  476     ASSERT (src_text->distribution)
  477 
  478     bool same_text = false;
  479     if (!dst_text) {
  480         same_text = true;
  481         dst_text = ut_init_text_heap ();
  482         if (!dst_text) return UT_MALLOC_ERROR;
  483     }
  484     
  485     ASSERT (dst_text)
  486     
  487     if (src_text->pass_flags==UT_PF_UNSET) {
  488         src_text->pass_flags |= UT_PF_CONVERT;
  489         ut_init_progress(src_text);
  490     }
  491 
  492     
  493     if (ut_session->progress_function && src_text->progress_done == 0.0) ut_update_progress (src_text, 0, true);
  494     
  495     if (dst_text->eol     == UT_EOL_UNSET)  dst_text->eol       = ut_session->eol_default;
  496     if (dst_text->eol_alt == UT_EOL_UNSET)  dst_text->eol_alt   = ut_session->eol_alt_default;
  497     if (dst_text->charset == UT_UNSET)      dst_text->charset   = ut_session->charset_default;
  498 
  499     src_text->current_pass = UT_PF_CONVERT;
  500     UT_TRY  ( ut_conversion_pass (src_text, dst_text) )
  501     src_text->current_pass = UT_PF_NONE;
  502 
  503     if (ut_session->progress_function) {
  504         src_text->progress_done+= (1-src_text->progress_done)/src_text->progress_todo;
  505         src_text->progress_todo--;
  506     }
  507     
  508     if (ut_session->progress_function && src_text->progress_todo == 0) ut_update_progress (src_text, 0, true);
  509         
  510     if (same_text) {
  511         free (src_text->data);
  512         src_text->data = dst_text->data;
  513         dst_text->data = NULL;
  514         src_text->size = dst_text->size ;
  515         src_text->eol = dst_text->eol ;
  516         src_text->eol_alt = dst_text->eol_alt ;
  517         src_text->charset = dst_text->charset ;
  518         free (src_text->distribution);
  519         src_text->distribution = NULL;
  520         while (src_text->ext_char) {
  521             UtExtCharLine * tmp = src_text->ext_char;
  522             src_text->ext_char = src_text->ext_char->next;
  523             free (tmp);
  524         } src_text->ext_char = NULL;
  525         free(src_text->evaluation); 
  526         src_text->evaluation = NULL;
  527         ut_free_text_heap (dst_text);
  528     }
  529 
  530     return UT_OK;   
  531 }
  532 
  533 
  534 
  535 
  536 /***************************************************************************/
  537 /* OLD DOC!!!!
  538  * \brief Recognize charset and EOL type of a text, and eventually convert it.
  539  * 
  540  * This function take an UtText structure as a parameter and do severeal tasks :
  541  * -# it loads the file (or read the standard input),
  542  * -# it calculate the frequency distribution of each byte in the file
  543  *    (UtText::distribution), checks if the file is binary data or text,
  544  *    checks if it is ASCII or UTF-8,
  545  * -# it recognize the EOL type, and replace each EOL by null character to make
  546  *    further processing of the file easier (this feature can be disbled).
  547  * -# if the charset has not been determined earlier as ASCII or UTF-8, it tries
  548  *    to detect which known charset fit the best to the text.
  549  * -# it eventually convert the text, replacing EOL and extended character by
  550  *    those corresponding to the selection of the user and/or the result of the recogntion.
  551  *
  552  * \param text Text to recognize and eventually convert. Some members must be set
  553  *        before calling this function, but some other are optionnal. Members that
  554  *        select the input text are:
  555  *        - UtText::data: Pointer to the text to process (which must be null terminated).
  556  *          If NULL, UtText::filename is used.
  557  *        - UtText::filename: Path to the file containing the text to process, which will
  558  *          be loaded if . If NULL, standard input is read.
  559  *        - UtText::size: If UtText::data is set, this member can also be set to indicate
  560  *          the size of the text, if null, the first null character will determine the
  561  *          end of the text.
  562  * 
  563  * Members that modifies the recognition or the conversion are:
  564  *  - UtText::flags: Flags to customize the processing and the modification of the text.
  565  *    Set intially to UT_F_DEFAULT.
  566  *  - UtText::src_eol and UtText::src_charset: EOL type and charset of the text used as
  567  *    source for the conversion. If unset, the values taken are those recognized automatically.
  568  *  - UtText::dst_eol and UtText::dst_charset: EOL type and charset of the text resulting of
  569  *    the conversion. If unset, the values taken are those by default found by ut_init().
  570  *  - UtText::nomapping_char: Character inserted during the conversion each time an error occurs.
  571  * 
  572  * Misc member:
  573  *  - UtText::progress_function: Custom function provided by the user to refresh a progress bar.
  574  *
  575  * \param convert If true, conversion is effectued after recognition.
  576  *  
  577  * \return UT_OK on success, error code on failure (see UtCode).
  578  */