"Fossies" - the Fresh Open Source Software Archive 
Member "utrac-0.3.2/src/utrac.h" (4 Jan 2009, 8438 Bytes) of package /linux/privat/old/utrac-0.3.2.tgz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "utrac.h" see the
Fossies "Dox" file reference documentation.
1 /***************************************************************************
2 * utrac.h
3 *
4 * Tue Oct 5 11:28:44 2004
5 * Copyright 2004 Alliance MCA
6 * Written by : Antoine Calando (antoine@alliancemca.net)
7 ****************************************************************************/
8
9 /*
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Library General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23 */
24
25 /*!
26 * \file utrac.h
27 * \author Antoine Calando (antoine@alliancemca.net)
28 */
29
30 #ifndef _UTRAC_H_
31 #define _UTRAC_H_
32
33 #ifndef __cplusplus
34 typedef unsigned short int bool;
35 #define true 1
36 #define false 0
37 #else
38 extern "C" {
39 #endif
40
41 //#include "debug.h"
42 #include <sys/types.h>
43 #include "ut_error.h"
44 #include "ut_text.h"
45 #include "ut_charset.h"
46
47 #define UT_VERSION "0.3.0"
48
49 #define UT_EOL_CHAR 0x0 //!< Character code for end of line.
50 #define UT_EOL_ALT_CHAR 0xD //!< Character code for end of line 2 (see UtEolType).
51 #define UT_SKIP_CHAR 0x1 //!< Character code for character to skip during conversion.
52 #define UT_EOF_CHAR 0x0 //!< Character code for end of file.
53 #define UT_UNICODE_NONCHAR 0xFFFF //!< Illegal character, also used to indicate "no character".
54 //#define BUFFER_OFFSET 4
55
56 #define UT_UNSET -1 //!< Unset variable (often used for indexes).
57 //#define UT_NO_CHANGE -2U
58
59 #define UT_THRESHOLD_CONTROL_CHAR 0.05 //!< Maximum percentage of illegal control chars accepted in a file.
60 #define UT_THRESHOLD_UTF8 0.01 //!< Maximum percentage of utf-8 errors in an UTF-8 file.
61
62 #define UT_LOAD_STEP 1*1024*1024 //!< Step in bytes between two calls of the "progress bar" function during loading.
63 #define UT_PROCESS_STEP 1*1024*1024 //!< Step in bytes between two calls of the "progress bar" function during processing.
64
65
66 #define UT_COEF_MAX 5 //!< Maximum number of coefficients for languages and systems.
67
68 #ifdef _UT_CHARSET_C_
69 const float UT_LANG_SYS_COEF [UT_COEF_MAX] = { 1.0, 1.02, 1.04, 1.06, 1.10 };
70 //const char * UT_CHARMAPS_FILENAME2 = "/home/antoine/dev/utrac/charsets.dat";
71 //UT_CHARMAPS_FILENAME should be set with "gcc -D ..."
72 const char * UT_CHARMAPS_FILENAME2 = "charsets.dat";
73 const char * UT_DEFAULT_ENCODING_UNIX = "ISO-8859-1";
74 #else
75 //! Language and system coeficients applied to charset rating, depending on language or system selected.
76 extern const float UT_LANG_SYS_COEF [];
77 //! Path 1 to file containing charset informations.
78 extern const char * UT_CHARMAPS_FILENAME;
79 //! Path 2 to file containing charset informations.
80 extern const char * UT_CHARMAPS_FILENAME2;
81 //! Default encoding on Unix systems.
82 extern const char * UT_DEFAULT_ENCODING_UNIX;
83 #endif
84
85 #define UT_LANG_SYS_ALLOC_STEP 8 //!< Initial size for dynamic array UtLangSys.
86 #define UT_ERROR_STRING_SIZE 128 //!< Size of UtSession::error_string.
87 #define UT_STDIN_BUFFER_SIZE 65536 //!< Initial size for dynamic buffer in ut_load_text_stdin().
88
89
90 /***************************************************************************/
91 /*!
92 * \brief Structure containing different languages or systems used
93 *
94 * This structure is a dynamic array containing the list of languages
95 * or systems defined in the charset data file
96 */
97 typedef struct UtLangSys {
98 char ** name; //!< Array of names of language or system.
99 char * code; //!< Array of codes on two uppercase characters.
100 ushort n; //!< Number of languages or systems listed in UtLangSys::name and UtLangSys::code.
101 ushort n_max; //!< Number of languages or systems that UtLangSys::name or UtLangSys::code can contains.
102 } UtLangSys ;
103
104
105 /***************************************************************************/
106 /*!
107 * \brief Structure containing all required information for Utrac.
108 *
109 * This structure contains all the required information
110 * for an utrac session (charsets data, language, system
111 * and charset default...). Its unique instance can be accessed with the
112 * ut_session pointer which is defined as a global variable.
113 * It is created with ut_init() and destroyed with ut_finish().
114 * \sa ut_session.
115 */
116
117 typedef struct UtSession {
118 struct UtCharset * charset; //!< Charset array loaded from charset data file.
119 int nb_charsets; //!< Number of charsets in UtSession::charset.
120
121 UtLangSys language; //!< List of languages used.
122 UtLangSys system; //!< List of languages used.
123
124 int language_default; //!< Index of default language (relative to UtSession:: #language).
125 int system_default; //!< Index of default system (relative to UtSession::system).
126 UtEolType eol_default; //!< Default type of end of line.
127 UtEolType eol_alt_default; //!< Default type 2 of end of line.
128 UtCharsetIndex charset_default; //!< Default charset of the system.
129
130 ulong nomapping_char; //!< Character used if a character conversion error occurs. No character inserted if zero. Set by user.
131
132 int (*progress_function)
133 (UtText*,float); //!< User function called during processing to update a progress bar. Can be NULL.
134 //!< The function is called regularly (see UT_LOAD_STEP and UT_PROCESS_STEP),
135 //!< with the float argument betwwen 0.0 and 1.0, indicating the part of the job done.
136 //!< The function is called only once with 0.0 (the first time) and once with 1.0 (the
137 //!< last time), so these values can be checked to do initialisation and cleanup.
138 //!< If this function return 0, the processing is interrupted. Set by user.
139
140 char * error_string; //!< error message (seldom used).
141 } UtSession;
142
143 #ifdef _UTRAC_C_
144 UtSession * ut_session = NULL; //!< Point to the UtSession structure instanciated by ut_init().
145 #else
146 extern UtSession * ut_session;
147 #endif
148
149 #define UT_TRY(func) \
150 {\
151 UtCode rcode = func;\
152 if (rcode != UT_OK) return rcode;\
153 }
154
155
156 // ********** utrac.c **********
157 UtCode ut_init ();
158 UtCode ut_init_noalloc ();
159 void ut_finish ();
160 void ut_finish_nofree ();
161 UtText * ut_init_text_heap ();
162 void ut_init_text (UtText * new_text);
163 void ut_free_text_heap (UtText *text);
164 void ut_free_text (UtText * text);
165
166
167
168 UtCode ut_init_progress (UtText *text);
169 UtCode ut_load (UtText *text, const char * filename);
170 UtCode ut_recognize (UtText *text);
171 UtCode ut_convert (UtText *src_text, UtText *dst_text);
172
173 //UtCode ut_process_text (UtText * text, bool convert);
174
175 // ********** utils.c **********
176 UtCode ut_load_charsets ();
177 UtCode ut_load_charset_file (const char * filename, char ** buffer);
178 UtCharsetIndex ut_find_charset (char * charset_name);
179 UtEolType ut_find_eol (char * eol_name);
180 int ut_find_lang_sys (char * language_name, UtLangSys * lang_sys);
181
182 double ut_get_charset_coef (UtCharsetIndex i);
183 bool ut_str_fuzzy_cmp (const char *str1, const char *str2, char stop_char);
184
185 bool ut_update_progress (struct UtText *, ulong, bool);
186
187 ulong ut_crc32 (ushort , ulong);
188
189 void ut_print_binary (ulong src);
190 UtCode ut_debug_text (struct UtText *);
191 UtCode ut_debug_text_rating (struct UtText *);
192
193 // ************** ut_messages.c *************
194 const char * ut_error_message (UtCode code);
195
196 // ************** ut_load.c *************
197 UtCode ut_load_file_pass (UtText *text, const char * filename);
198 UtCode ut_load_stdin_pass (UtText *text);
199
200 // ********** ut_recognition1.c **********
201 UtCode ut_distrib_utf_pass (struct UtText *);
202 UtCode ut_eol_pass (struct UtText *);
203
204 // ********** ut_recognition2.c **********
205 UtCode ut_xascii_pass (struct UtText *);
206
207 // ********** ut_conversion.c **********
208 int ut_size_char (char **src_p, UtCharsetIndex src_charset, UtCharsetIndex dst_charset);
209 void ut_conv_char (char ** src_p, char ** dst_p, UtCharsetIndex src_charset, UtCharsetIndex dst_charset);
210 void ut_insert_eol (char ** dst_p, UtEolType dst_eol);
211
212 uint ut_count_ext_char (UtText * text);
213 int ut_size_difference (UtText * src_text, UtText * dst_text);
214
215 UtCode ut_conversion_pass (UtText * src_text, UtText * dst_text);
216
217
218 #ifdef __cplusplus
219 } // extern "C"
220 #endif
221
222 #endif // _UTRAC_H