"Fossies" - the Fresh Open Source Software Archive 
Member "utrac-0.3.2/src/ut_utils.c" (4 Jan 2009, 11006 Bytes) of package /linux/privat/old/utrac-0.3.2.tgz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "ut_utils.c" see the
Fossies "Dox" file reference documentation.
1 /***************************************************************************
2 * ut_utils.c
3 *
4 * Tue Oct 5 11:29:53 2004
5 * Copyright 2004 Alliance MCA
6 * Written by : Antoine Calando (antoine@alliancemca.net)
7 ****************************************************************************/
8
9 /*
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Library General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23 */
24
25 /*!
26 * \file ut_utils.c
27 * \brief Various internal functions
28 * \author Antoine Calando (antoine@alliancemca.net)
29 */
30
31 #include <sys/stat.h>
32 #include <unistd.h>
33 #include <float.h>
34 #include <fcntl.h>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <limits.h> //for SSIZE_MAX
38
39 #include <endian.h>
40 #include <byteswap.h>
41
42 #include "utrac.h"
43 #include "ut_text.h"
44 #include "ut_charset.h"
45
46 //#undef UT_DEBUG
47 //#define UT_DEBUG 3
48 #include "debug.h"
49
50 /***************************************************************************/
51 /*!
52 * \brief Load a file in a buffer
53 *
54 * \param filename Path to the file.
55 * \param buffer Pointer used to return buffer address. Buffer must be free after used by user.
56 *
57 * \return UT_OK on succes, error code otherwise.
58 *
59 * \bug EC il n'y a qu'un appel à read or lorsque read!=size ce n'est pas une erreur si errno
60 * vaut EAGAIN.
61 * AC read() ne peut pas renvoyer EAGAIN si le fichier n'a pas été open() en mode EAGAIN
62 * (norme POSIX)
63 */
64 UtCode ut_load_charset_file (const char * filename, char ** buffer) {
65
66 DBG3 ("Loading file %s...", filename)
67
68 int fd = open (filename, O_RDONLY);
69 if (fd==-1) return UT_OPEN_FILE_ERROR;
70
71 struct stat f_stat;
72 if (fstat (fd, &f_stat)) return UT_FSTAT_FILE_ERROR;
73
74 //some space is needed to add an EOL and an EOF
75 *buffer = (char*) malloc (f_stat.st_size + 2);
76 if (!*buffer) return UT_MALLOC_ERROR;
77
78 int code=read (fd, *buffer, f_stat.st_size);
79 if (code<=0) return UT_READ_FILE_ERROR;
80 if (code!=f_stat.st_size) return UT_READ_FILE_ERROR2;
81
82 DBG3 ("File %s (%lu b) loaded!", filename, f_stat.st_size)
83
84 *(*buffer+f_stat.st_size) = '\n';
85 *(*buffer+f_stat.st_size+1) = UT_EOF_CHAR;
86
87 if (close(fd)) return UT_CLOSE_FILE_ERROR;
88
89 return UT_OK;
90 }
91
92
93
94 /***************************************************************************/
95 /*!
96 * \brief Print a number in binary form on stdout (debug).
97 *
98 * \param src Number to print.
99 *
100 * \bug pas de gestion big/little endian
101 */
102 void ut_print_binary (ulong src) {
103
104 int i; for (i=0; i<16; i++) {
105 if (src&1<<15) putchar('x');
106 else putchar ('-');
107 src<<=1;
108 if (!((i+1)%4)) putchar(' ');
109 }
110
111 }
112
113 /***************************************************************************/
114 /*!
115 * \brief Print content of a UtText structure (debug)
116 */
117 UtCode ut_debug_text (UtText * text) {
118
119 ASSERT (text);
120
121 printf ("=====> Structure UtText :\n");
122 //data
123 printf ("- size : %lu - %luk - %lum\n", text->size, text->size/1024, text->size/1024/1024);
124 printf ("- lines1 : %lu - %luk\n", text->nb_lines, text->nb_lines/1024);
125 printf ("- lines2 : %lu - %luk\n", text->nb_lines_alt, text->nb_lines_alt/1024);
126 printf ("- skip char : <%c>\n", text->skip_char);
127 printf ("- flags : "); ut_print_binary (text->flags); putchar('\n');
128 //distrib
129 //ext_char
130 //charmap
131 printf ("- eol1 : <%d>\n", text->eol);
132 printf ("- eol2 : <%d>\n", text->eol_alt);
133 printf ("- charset : <%hu>", text->charset);
134 if (text->charset != UT_UNSET) printf (" (%s)",
135 ut_session->charset[text->charset].name);
136 putchar('\n');
137 //convert eol
138 //convert charset
139 return UT_OK;
140 }
141
142 /***************************************************************************/
143 /*!
144 * \brief Print content of a UtText::evaluation array (debug)
145 */
146 UtCode ut_debug_text_rating (UtText * text) {
147
148 ASSERT (text);
149 if (!text->evaluation) return UT_OK;
150
151 int i; for (i=0; i<ut_session->nb_charsets; i++) {
152 printf ("=> %2i: chk:%11lx rtg:%6ld %s\n", i, text->evaluation[i].checksum,
153 text->evaluation[i].rating, ut_session->charset[i].name);
154 }
155
156 return UT_OK;
157 }
158
159 //@{
160 /** brief Utility functions for ut_str_fuzzy_cmp() These functions test if a character is uppercase, lowercase, letter or number.*/
161
162 static inline bool is_maj (char c) { return ('A'<=c && c<='Z'); }
163 static inline bool is_min (char c) { return ('a'<=c && c<='z'); }
164 static inline bool is_letter (char c) { c &= ~0x20; return is_maj(c); }
165 static inline bool is_num (char c) { return ('0'<=c && c<='9'); }
166 // @}
167
168
169
170 /*! \brief get charset index from a string
171
172 */
173
174 UtCharsetIndex ut_find_charset (char * charset_name) {
175
176 ASSERT (charset_name)
177
178 UtCharsetIndex i;
179 for (i=0; i<ut_session->nb_charsets; i++) {
180 if ( ut_session->charset[i].name &&
181 ut_str_fuzzy_cmp (charset_name, ut_session->charset[i].name, 0)) break;
182 if ( ut_session->charset[i].alias &&
183 ut_str_fuzzy_cmp (charset_name, ut_session->charset[i].alias, 0)) break;
184 }
185
186 if (i==ut_session->nb_charsets) return UT_UNSET;
187 else return i;
188 }
189
190 UtEolType ut_find_eol (char * eol_name) {
191
192 ASSERT (eol_name)
193
194 UtEolType j;
195 for (j= UT_EOL_CR; j<UT_EOL_NONE; j++)
196 if ( UT_EOL_NAME[j] && ut_str_fuzzy_cmp (eol_name, UT_EOL_NAME[j], 0) ) break;
197
198 if (j==UT_EOL_NONE) return UT_EOL_UNSET;
199 else return j;
200 }
201
202 int ut_find_lang_sys (char * language_name, UtLangSys * lang_sys) {
203
204 int language_id;
205 char ln[2];
206
207 ln[0] = language_name[0];
208 ln[1] = language_name[1];
209 if ('a'<= ln[0] && ln[0] <= 'z' ) ln[0] += 'A'-'a';
210 if ('a'<= ln[1] && ln[1] <= 'z' ) ln[1] += 'A'-'a';
211
212 for (language_id=0; language_id < lang_sys->n; language_id++) {
213 if ( ln[0] == lang_sys->code[language_id*2+0] &&
214 ln[1] == lang_sys->code[language_id*2+1]) break;
215 }
216
217 if (language_id == lang_sys->n) return UT_UNSET;
218
219 return language_id;
220 }
221
222
223
224 /***************************************************************************/
225 /*!
226 * \brief Approximative comparaison between two strings.
227 *
228 * The comparaison focuses only on substrings composed of number or letter
229 * (case is not significant). For instance "iso8859 1"=="ISO-8859-1",
230 * but "Mac Roman"!="MacRoman".
231 */
232 bool ut_str_fuzzy_cmp (const char *str1, const char *str2, char stop_char) {
233
234 ASSERT(str1)
235 ASSERT(str2)
236 //DBG3 (" <%s> =? <%s> ", str1, str2);
237
238 const char SEP = '*';
239 const char END = 0;
240 char prec1, c1=0;
241 char prec2, c2=0;
242
243 for (;;) {
244 prec1 = c1;
245 if (is_letter(*str1)) {
246 if (is_maj(prec1) || prec1==SEP) c1 = *str1++ & ~0x20;
247 else c1 = SEP;
248 } else if (is_num (*str1)) {
249 if (is_num (prec1) || prec1==SEP) c1 = *str1++;
250 else c1 = SEP;
251 } else if (!*str1 || *str1==stop_char) {
252 if (prec1==SEP) c1 = END;
253 else c1=SEP;
254 } else {
255 c1 = SEP;
256 while (!is_letter(*str1) && !is_num(*str1) && *str1 && *str1!=stop_char) str1++;
257 }
258 prec2 = c2;
259 if (is_letter(*str2)) {
260 if (is_maj(prec2) || prec2==SEP) c2 = *str2++ & ~0x20;
261 else c2 = SEP;
262 } else if (is_num (*str2)) {
263 if (is_num (prec2) || prec2==SEP) c2 = *str2++;
264 else c2 = SEP;
265 } else if (!*str2 || *str2==stop_char) {
266 if (prec2==SEP) c2 = END;
267 else c2=SEP;
268 } else {
269 c2 = SEP;
270 while (!is_letter(*str2) && !is_num(*str2) && *str2 && *str2!=stop_char) str2++;
271 }
272 if (c1!=c2) {
273 //DBG3 ("false");
274 return false; }
275 if (c1==END) {
276 //DBG3 ("true");
277 return true;
278 }
279 }
280 }
281
282
283
284
285 double ut_get_charset_coef (UtCharsetIndex i) {
286
287 float coef;
288
289 if (ut_session->language_default>=0)
290 coef = UT_LANG_SYS_COEF [ut_session->charset[i].language[ut_session->language_default]];
291 else
292 coef = 1.0;
293
294 if (ut_session->system_default>=0)
295 coef *= UT_LANG_SYS_COEF [ut_session->charset[i].system[ut_session->system_default]];
296
297 return coef;
298 }
299
300
301
302
303
304 /***************************************************************************/
305 /*!
306 * \brief Function which call the user-defined function UtText::progress_function.
307 *
308 * \param text Related UtText structure.
309 * \param processed Size in byte processed, compared to UtText::size.
310 * \param start_stop If true, the user-defined function will be call for initialisation or cleanup.
311 *
312 * \return This function returns the same return code than the user-defined function, i.e. 0
313 * if the processing must be interrupted, 1 otherwise.
314 */
315
316 bool ut_update_progress (UtText * text, ulong processed, bool start_stop) {
317
318 ASSERT (ut_session->progress_function)
319
320 float rate;
321
322 if (start_stop) {
323 if (!text->progress_done) rate = 0;
324 else if (!text->progress_todo) rate = 1.0;
325 else {
326 rate = 0;
327 DBG1 ("ut_update_progress: done!=0 && todo!=0 !?!?")
328 }
329 } else {
330 rate = text->progress_done + (1-text->progress_done)*( (float) processed/text->size)/text->progress_todo;
331 if (rate==0.0) rate = FLT_MIN;
332 else if (rate==1.0) rate = 1.0 - FLT_MIN;
333 if (rate>1.0) {
334 DBG1 ("ut_update_progress: rate = %f !!", rate)
335 }
336 }
337
338 return (*(ut_session->progress_function)) (text, rate);
339 }
340
341 /***************************************************************************/
342 /*! \brief table CRC32 ? */
343 ulong ut_crc32_table[256];
344 /*! \biref MAGIC NUMBER ? */
345 const ulong UT_CRC32_POLY=0x04c11db7;
346
347 /***************************************************************************/
348 /*!
349 * \brief Function which call the user-defined function UtText::progress_function.
350 *
351 * \param data The data to "checksum"
352 * \param crc_in The previous returned checksum, 0 if none
353 *
354 * \return The resulting checksum.
355 *
356 * \note Compute the CRC of a data. Code was modified and the following may not be exact :
357 * The following C code (by Rob Warnock <rpw3@sgi.com>) does CRC-32 in
358 * BigEndian/BigEndian byte/bit order. That is, the data is sent most
359 * significant byte first, and each of the bits within a byte is sent most
360 * significant bit first, as in FDDI. You will need to twiddle with it to do
361 * Ethernet CRC, i.e., BigEndian/LittleEndian byte/bit order.
362 * The CRCs this code generates agree with the vendor-supplied Verilog models
363 * of several of the popular FDDI "MAC" chips.
364 */
365
366 ulong ut_crc32(ushort data, ulong crc_in) {
367 ulong crc;
368
369 if (!ut_crc32_table[1]) {
370 int i, j; ulong c;
371 for (i = 0; i < 256; ++i) {
372 for (c = i << 24, j = 8; j > 0; --j) c = c & 0x80000000 ? (c << 1) ^ UT_CRC32_POLY : (c << 1);
373 ut_crc32_table[i] = c;
374 }
375 }
376 crc_in = ~crc_in;
377 crc = (crc_in << 8) ^ ut_crc32_table[((crc_in >> 16) ^ data )>>8]; //crc for 8 MSB of data
378 crc = (crc << 8) ^ ut_crc32_table[(crc >> 24) ^ (data&0xFF)]; //crc for 8 LSB of data
379 return ~crc;
380 }