"Fossies" - the Fresh Open Source Software Archive

Member "utrac-0.3.2/src/utrac.h" (4 Jan 2009, 8438 Bytes) of package /linux/privat/old/utrac-0.3.2.tgz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "utrac.h" see the Fossies "Dox" file reference documentation.

    1 /***************************************************************************
    2  *            utrac.h
    3  *
    4  *  Tue Oct  5 11:28:44 2004
    5  *  Copyright  2004  Alliance MCA
    6  *  Written by : Antoine Calando (antoine@alliancemca.net)
    7  ****************************************************************************/
    8 
    9 /*
   10  *  This program is free software; you can redistribute it and/or modify
   11  *  it under the terms of the GNU General Public License as published by
   12  *  the Free Software Foundation; either version 2 of the License, or
   13  *  (at your option) any later version.
   14  *
   15  *  This program is distributed in the hope that it will be useful,
   16  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   17  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   18  *  GNU Library General Public License for more details.
   19  *
   20  *  You should have received a copy of the GNU General Public License
   21  *  along with this program; if not, write to the Free Software
   22  *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
   23  */
   24  
   25 /*!
   26  * \file utrac.h
   27  * \author Antoine Calando (antoine@alliancemca.net)
   28  */
   29 
   30 #ifndef _UTRAC_H_
   31 #define _UTRAC_H_
   32 
   33 #ifndef __cplusplus
   34 typedef unsigned short int bool;
   35 #define true 1
   36 #define false 0
   37 #else
   38 extern "C" {
   39 #endif
   40 
   41 //#include "debug.h"
   42 #include <sys/types.h>
   43 #include "ut_error.h"
   44 #include "ut_text.h"
   45 #include "ut_charset.h"
   46 
   47 #define UT_VERSION      "0.3.0"
   48 
   49 #define UT_EOL_CHAR     0x0     //!< Character code for end of line.
   50 #define UT_EOL_ALT_CHAR 0xD     //!< Character code for end of line 2 (see UtEolType).
   51 #define UT_SKIP_CHAR    0x1     //!< Character code for character to skip during conversion.
   52 #define UT_EOF_CHAR     0x0     //!< Character code for end of file.
   53 #define UT_UNICODE_NONCHAR 0xFFFF   //!< Illegal character, also used to indicate "no character".
   54 //#define BUFFER_OFFSET 4
   55 
   56 #define UT_UNSET -1 //!< Unset variable (often used for indexes).
   57 //#define UT_NO_CHANGE -2U
   58 
   59 #define UT_THRESHOLD_CONTROL_CHAR 0.05  //!< Maximum percentage of illegal control chars accepted in a file.
   60 #define UT_THRESHOLD_UTF8 0.01          //!< Maximum percentage of utf-8 errors in an UTF-8 file.
   61 
   62 #define UT_LOAD_STEP 1*1024*1024    //!< Step in bytes between two calls of the "progress bar" function during loading.
   63 #define UT_PROCESS_STEP 1*1024*1024 //!< Step in bytes between two calls of the "progress bar" function during processing.
   64 
   65 
   66 #define UT_COEF_MAX 5   //!< Maximum number of coefficients for languages and systems.
   67 
   68 #ifdef _UT_CHARSET_C_
   69 const float UT_LANG_SYS_COEF [UT_COEF_MAX] = { 1.0, 1.02, 1.04, 1.06, 1.10 };
   70 //const char * UT_CHARMAPS_FILENAME2 = "/home/antoine/dev/utrac/charsets.dat";
   71 //UT_CHARMAPS_FILENAME should be set with "gcc -D ..."
   72 const char * UT_CHARMAPS_FILENAME2 = "charsets.dat";
   73 const char * UT_DEFAULT_ENCODING_UNIX = "ISO-8859-1";
   74 #else
   75 //! Language and system coeficients applied to charset rating, depending on language or system selected.
   76 extern const float UT_LANG_SYS_COEF [];
   77 //! Path 1 to file containing charset informations.
   78 extern const char * UT_CHARMAPS_FILENAME;
   79 //! Path 2 to file containing charset informations.
   80 extern const char * UT_CHARMAPS_FILENAME2;
   81 //! Default encoding on Unix systems.
   82 extern const char * UT_DEFAULT_ENCODING_UNIX;
   83 #endif
   84 
   85 #define UT_LANG_SYS_ALLOC_STEP  8       //!< Initial size for dynamic array UtLangSys.
   86 #define UT_ERROR_STRING_SIZE    128     //!< Size of UtSession::error_string.
   87 #define UT_STDIN_BUFFER_SIZE    65536   //!< Initial size for dynamic buffer in ut_load_text_stdin().
   88 
   89 
   90 /***************************************************************************/
   91 /*!
   92  * \brief Structure containing different languages or systems used
   93  *
   94  * This structure is a dynamic array containing the list of languages
   95  * or systems defined in the charset data file
   96  */
   97 typedef struct UtLangSys {
   98     char ** name;       //!< Array of names of language or system.
   99     char * code;        //!< Array of codes on two uppercase characters.
  100     ushort n;           //!< Number of languages or systems listed in UtLangSys::name and UtLangSys::code.
  101     ushort n_max;       //!< Number of languages or systems that UtLangSys::name or UtLangSys::code can contains.
  102 } UtLangSys ;
  103 
  104 
  105 /***************************************************************************/
  106 /*!
  107  * \brief Structure containing all required information for Utrac.
  108  *
  109  * This structure contains all the required information
  110  * for an utrac session (charsets data, language, system
  111  * and charset default...). Its unique instance can be accessed with the
  112  * ut_session pointer which is defined as a global variable.
  113  * It is created with ut_init() and destroyed with ut_finish().
  114  * \sa ut_session.
  115  */
  116 
  117 typedef struct UtSession {
  118     struct UtCharset * charset;     //!< Charset array loaded from charset data file.
  119     int nb_charsets;                    //!< Number of charsets in UtSession::charset.
  120 
  121     UtLangSys language;             //!< List of languages used.
  122     UtLangSys system;               //!< List of languages used.
  123 
  124     int language_default;           //!< Index of default language (relative to UtSession:: #language).
  125     int system_default;             //!< Index of default system (relative to UtSession::system).
  126     UtEolType eol_default;          //!< Default type of end of line.
  127     UtEolType eol_alt_default;      //!< Default type 2 of end of line.
  128     UtCharsetIndex charset_default; //!< Default charset of the system.
  129 
  130     ulong nomapping_char;           //!< Character used if a character conversion error occurs. No character inserted if zero. Set by user.
  131     
  132     int (*progress_function)
  133     (UtText*,float);                //!< User function called during processing to update a progress bar. Can be NULL.
  134                                     //!< The function is called regularly (see UT_LOAD_STEP and UT_PROCESS_STEP),
  135                                     //!< with the float argument betwwen 0.0 and 1.0, indicating the part of the job done.
  136                                     //!< The function is called only once with 0.0 (the first time) and once with 1.0 (the
  137                                     //!< last time), so these values can be checked to do initialisation and cleanup. 
  138                                     //!< If this function return 0, the processing is interrupted. Set by user.
  139 
  140     char * error_string;            //!< error message (seldom used).
  141 } UtSession;
  142 
  143 #ifdef _UTRAC_C_
  144     UtSession * ut_session = NULL; //!< Point to the UtSession structure instanciated by ut_init().
  145 #else
  146     extern UtSession  * ut_session;
  147 #endif
  148 
  149 #define UT_TRY(func) \
  150 {\
  151     UtCode rcode = func;\
  152     if (rcode != UT_OK) return rcode;\
  153 }
  154 
  155 
  156 // ********** utrac.c ********** 
  157 UtCode ut_init ();
  158 UtCode ut_init_noalloc ();
  159 void ut_finish ();
  160 void ut_finish_nofree ();
  161 UtText * ut_init_text_heap ();
  162 void ut_init_text (UtText * new_text);
  163 void ut_free_text_heap (UtText *text);
  164 void ut_free_text (UtText * text);
  165 
  166 
  167 
  168 UtCode ut_init_progress (UtText *text);
  169 UtCode ut_load (UtText *text, const char * filename);
  170 UtCode ut_recognize (UtText *text);
  171 UtCode ut_convert (UtText *src_text, UtText *dst_text);
  172 
  173 //UtCode ut_process_text (UtText * text, bool convert);
  174 
  175 // ********** utils.c ********** 
  176 UtCode ut_load_charsets ();
  177 UtCode ut_load_charset_file (const char * filename, char ** buffer);
  178 UtCharsetIndex ut_find_charset (char * charset_name);
  179 UtEolType ut_find_eol (char * eol_name);
  180 int ut_find_lang_sys (char * language_name, UtLangSys * lang_sys);
  181 
  182 double ut_get_charset_coef (UtCharsetIndex i);
  183 bool ut_str_fuzzy_cmp (const char *str1, const char *str2, char stop_char);
  184 
  185 bool ut_update_progress (struct UtText *, ulong, bool);
  186 
  187 ulong ut_crc32 (ushort , ulong);
  188 
  189 void ut_print_binary (ulong src);
  190 UtCode ut_debug_text (struct UtText *);
  191 UtCode ut_debug_text_rating (struct UtText *);
  192 
  193 // ************** ut_messages.c *************
  194 const char * ut_error_message (UtCode code);
  195 
  196 // ************** ut_load.c *************
  197 UtCode ut_load_file_pass (UtText *text, const char * filename);
  198 UtCode ut_load_stdin_pass (UtText *text);
  199 
  200 // ********** ut_recognition1.c ********** 
  201 UtCode ut_distrib_utf_pass (struct UtText *);
  202 UtCode ut_eol_pass (struct UtText *);
  203 
  204 // ********** ut_recognition2.c ********** 
  205 UtCode ut_xascii_pass  (struct UtText *);
  206 
  207 // ********** ut_conversion.c ********** 
  208 int ut_size_char (char **src_p, UtCharsetIndex src_charset, UtCharsetIndex dst_charset);
  209 void ut_conv_char (char ** src_p, char ** dst_p, UtCharsetIndex src_charset, UtCharsetIndex dst_charset);
  210 void ut_insert_eol (char ** dst_p, UtEolType dst_eol);
  211 
  212 uint ut_count_ext_char (UtText * text);
  213 int ut_size_difference (UtText * src_text, UtText * dst_text);
  214     
  215 UtCode ut_conversion_pass (UtText * src_text, UtText * dst_text);
  216 
  217 
  218 #ifdef __cplusplus
  219 } // extern "C"
  220 #endif
  221 
  222 #endif // _UTRAC_H