"Fossies" - the Fresh Open Source Software Archive

Member "utrac-0.3.2/src/ut_utils.c" (4 Jan 2009, 11006 Bytes) of package /linux/privat/old/utrac-0.3.2.tgz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "ut_utils.c" see the Fossies "Dox" file reference documentation.

    1 /***************************************************************************
    2  *            ut_utils.c
    3  *
    4  *  Tue Oct  5 11:29:53 2004
    5  *  Copyright  2004  Alliance MCA
    6  *  Written by : Antoine Calando (antoine@alliancemca.net)
    7  ****************************************************************************/
    8 
    9 /*
   10  *  This program is free software; you can redistribute it and/or modify
   11  *  it under the terms of the GNU General Public License as published by
   12  *  the Free Software Foundation; either version 2 of the License, or
   13  *  (at your option) any later version.
   14  *
   15  *  This program is distributed in the hope that it will be useful,
   16  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   17  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   18  *  GNU Library General Public License for more details.
   19  *
   20  *  You should have received a copy of the GNU General Public License
   21  *  along with this program; if not, write to the Free Software
   22  *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
   23  */
   24  
   25 /*!
   26  * \file ut_utils.c 
   27  * \brief Various internal functions
   28  * \author Antoine Calando (antoine@alliancemca.net)
   29  */
   30 
   31 #include <sys/stat.h>
   32 #include <unistd.h>
   33 #include <float.h>
   34 #include <fcntl.h>
   35 #include <stdio.h>
   36 #include <stdlib.h>
   37 #include <limits.h> //for SSIZE_MAX
   38 
   39 #include <endian.h>
   40 #include <byteswap.h>
   41 
   42 #include "utrac.h"
   43 #include "ut_text.h"
   44 #include "ut_charset.h"
   45 
   46 //#undef UT_DEBUG
   47 //#define UT_DEBUG 3
   48 #include "debug.h"
   49 
   50 /***************************************************************************/
   51 /*!
   52  * \brief Load a file in a buffer
   53  *   
   54  * \param filename Path to the file.
   55  * \param buffer Pointer used to return buffer address. Buffer must be free after used by user.
   56  *
   57  * \return UT_OK on succes, error code otherwise.
   58  *
   59  * \bug EC il n'y a qu'un appel à read or lorsque read!=size ce n'est pas une erreur si errno
   60  *      vaut EAGAIN.
   61  *      AC read() ne peut pas renvoyer EAGAIN si le fichier n'a pas été open() en mode EAGAIN
   62  *      (norme POSIX)
   63  */
   64 UtCode ut_load_charset_file (const char * filename, char ** buffer) {
   65     
   66     DBG3 ("Loading file %s...", filename)
   67     
   68     int fd = open (filename, O_RDONLY);
   69     if (fd==-1) return UT_OPEN_FILE_ERROR;
   70 
   71     struct stat f_stat;
   72     if (fstat (fd, &f_stat)) return UT_FSTAT_FILE_ERROR;
   73     
   74     //some space is needed to add an EOL and an EOF
   75     *buffer = (char*) malloc (f_stat.st_size + 2);
   76     if (!*buffer) return UT_MALLOC_ERROR;
   77     
   78     int code=read (fd, *buffer, f_stat.st_size);
   79     if (code<=0) return UT_READ_FILE_ERROR;
   80     if (code!=f_stat.st_size) return UT_READ_FILE_ERROR2;
   81 
   82     DBG3 ("File %s (%lu b) loaded!", filename, f_stat.st_size)
   83     
   84     *(*buffer+f_stat.st_size) = '\n';
   85     *(*buffer+f_stat.st_size+1) = UT_EOF_CHAR;
   86 
   87     if (close(fd)) return UT_CLOSE_FILE_ERROR;
   88 
   89     return UT_OK;
   90 }
   91 
   92 
   93 
   94 /***************************************************************************/
   95 /*!
   96  * \brief Print a number in binary form on stdout (debug).
   97  *
   98  * \param src Number to print.
   99  *
  100  * \bug pas de gestion big/little endian
  101  */
  102 void ut_print_binary (ulong src) {
  103 
  104     int i; for (i=0; i<16; i++) {
  105         if (src&1<<15) putchar('x');
  106         else putchar ('-');
  107         src<<=1;
  108         if (!((i+1)%4)) putchar(' ');
  109     }
  110     
  111 }
  112 
  113 /***************************************************************************/
  114 /*!
  115  * \brief Print content of a UtText structure (debug)
  116  */
  117 UtCode ut_debug_text (UtText * text) {
  118     
  119     ASSERT (text);
  120     
  121     printf ("=====> Structure UtText :\n");
  122     //data  
  123     printf ("- size : %lu - %luk - %lum\n", text->size, text->size/1024, text->size/1024/1024);
  124     printf ("- lines1 : %lu - %luk\n", text->nb_lines, text->nb_lines/1024);
  125     printf ("- lines2 : %lu - %luk\n", text->nb_lines_alt, text->nb_lines_alt/1024);
  126     printf ("- skip char : <%c>\n", text->skip_char);
  127     printf ("- flags : "); ut_print_binary (text->flags); putchar('\n');        
  128     //distrib
  129     //ext_char
  130     //charmap
  131     printf ("- eol1 : <%d>\n", text->eol);
  132     printf ("- eol2 : <%d>\n", text->eol_alt);
  133     printf ("- charset : <%hu>", text->charset);
  134     if (text->charset != UT_UNSET) printf (" (%s)", 
  135         ut_session->charset[text->charset].name);
  136     putchar('\n');
  137     //convert eol
  138     //convert charset
  139     return UT_OK;
  140 }
  141 
  142 /***************************************************************************/
  143 /*!
  144  * \brief Print content of a UtText::evaluation array (debug)
  145  */
  146 UtCode ut_debug_text_rating (UtText * text) {
  147     
  148     ASSERT (text);
  149     if (!text->evaluation) return UT_OK;
  150     
  151     int i; for (i=0; i<ut_session->nb_charsets; i++) {
  152         printf ("=> %2i:   chk:%11lx   rtg:%6ld     %s\n", i, text->evaluation[i].checksum, 
  153             text->evaluation[i].rating, ut_session->charset[i].name);   
  154     }
  155         
  156     return UT_OK;   
  157 }
  158 
  159 //@{
  160 /** brief Utility functions for ut_str_fuzzy_cmp()  These functions test if a character is uppercase, lowercase, letter or number.*/
  161 
  162 static inline bool is_maj (char c) { return ('A'<=c && c<='Z'); }
  163 static inline bool is_min (char c) { return ('a'<=c && c<='z'); }
  164 static inline bool is_letter (char c) { c &= ~0x20; return is_maj(c); }
  165 static inline bool is_num (char c) { return ('0'<=c && c<='9'); }
  166 // @}
  167 
  168 
  169 
  170 /*! \brief get charset index from a string
  171 
  172 */
  173 
  174 UtCharsetIndex ut_find_charset (char * charset_name) {
  175     
  176     ASSERT (charset_name)
  177     
  178     UtCharsetIndex i;
  179     for (i=0; i<ut_session->nb_charsets; i++) {
  180         if ( ut_session->charset[i].name &&
  181             ut_str_fuzzy_cmp (charset_name, ut_session->charset[i].name, 0)) break;
  182         if ( ut_session->charset[i].alias &&
  183             ut_str_fuzzy_cmp (charset_name, ut_session->charset[i].alias, 0)) break;    
  184     }
  185 
  186     if (i==ut_session->nb_charsets) return UT_UNSET;
  187     else return i;
  188 }
  189 
  190 UtEolType ut_find_eol (char * eol_name) {
  191     
  192     ASSERT (eol_name)
  193     
  194     UtEolType j;
  195     for (j= UT_EOL_CR; j<UT_EOL_NONE; j++) 
  196         if ( UT_EOL_NAME[j] && ut_str_fuzzy_cmp (eol_name, UT_EOL_NAME[j], 0) ) break;
  197 
  198     if (j==UT_EOL_NONE) return UT_EOL_UNSET;
  199     else return j;
  200 }
  201 
  202 int ut_find_lang_sys (char * language_name, UtLangSys * lang_sys) {
  203     
  204     int language_id;
  205     char ln[2];
  206     
  207     ln[0] = language_name[0];
  208     ln[1] = language_name[1];
  209     if ('a'<= ln[0] && ln[0] <= 'z' ) ln[0] += 'A'-'a';
  210     if ('a'<= ln[1] && ln[1] <= 'z' ) ln[1] += 'A'-'a';
  211     
  212     for (language_id=0; language_id < lang_sys->n; language_id++) {
  213         if ( ln[0] == lang_sys->code[language_id*2+0] &&
  214              ln[1] == lang_sys->code[language_id*2+1]) break;
  215     }
  216     
  217     if (language_id == lang_sys->n) return UT_UNSET;
  218     
  219     return language_id;
  220 }
  221 
  222 
  223 
  224 /***************************************************************************/
  225 /*!
  226  * \brief Approximative comparaison between two strings.
  227  *
  228  * The comparaison focuses only on substrings composed of number or letter
  229  * (case is not significant). For instance "iso8859 1"=="ISO-8859-1",
  230  * but "Mac Roman"!="MacRoman".
  231  */
  232 bool ut_str_fuzzy_cmp (const char *str1, const char *str2, char stop_char) {
  233     
  234     ASSERT(str1)
  235     ASSERT(str2)
  236     //DBG3 (" <%s> =? <%s> ", str1, str2);
  237 
  238     const char SEP = '*';
  239     const char END = 0;
  240     char prec1, c1=0;
  241     char prec2, c2=0;
  242     
  243     for (;;) {
  244         prec1 = c1;
  245         if (is_letter(*str1)) {
  246             if (is_maj(prec1) || prec1==SEP) c1 = *str1++ & ~0x20;
  247             else c1 = SEP;
  248         } else if (is_num (*str1)) {
  249             if (is_num (prec1) || prec1==SEP) c1 = *str1++;
  250             else c1 = SEP;
  251         } else if (!*str1 || *str1==stop_char) { 
  252             if (prec1==SEP) c1 = END;
  253             else c1=SEP;
  254         } else {
  255             c1 = SEP;
  256             while (!is_letter(*str1) && !is_num(*str1) && *str1 && *str1!=stop_char) str1++;
  257         }
  258         prec2 = c2;
  259         if (is_letter(*str2)) {
  260             if (is_maj(prec2) || prec2==SEP) c2 = *str2++ & ~0x20;
  261             else c2 = SEP;
  262         } else if (is_num (*str2)) {
  263             if (is_num (prec2) || prec2==SEP) c2 = *str2++;
  264             else c2 = SEP;
  265         } else if (!*str2 || *str2==stop_char) { 
  266             if (prec2==SEP) c2 = END;
  267             else c2=SEP;
  268         } else {
  269             c2 = SEP;
  270             while (!is_letter(*str2) && !is_num(*str2) && *str2 && *str2!=stop_char) str2++;
  271         }
  272         if (c1!=c2) {
  273             //DBG3 ("false");
  274             return false; }
  275         if (c1==END) {
  276             //DBG3 ("true");
  277             return true;
  278         }
  279     }
  280 }
  281 
  282 
  283 
  284 
  285 double ut_get_charset_coef (UtCharsetIndex i) {
  286     
  287     float coef;
  288     
  289     if (ut_session->language_default>=0)
  290         coef = UT_LANG_SYS_COEF [ut_session->charset[i].language[ut_session->language_default]];
  291     else
  292         coef = 1.0;
  293 
  294     if (ut_session->system_default>=0)
  295          coef *= UT_LANG_SYS_COEF [ut_session->charset[i].system[ut_session->system_default]];
  296     
  297     return coef;
  298 }
  299 
  300 
  301 
  302 
  303 
  304 /***************************************************************************/
  305 /*!
  306  * \brief Function which call the user-defined function UtText::progress_function.
  307  *
  308  * \param text Related UtText structure.
  309  * \param processed Size in byte processed, compared to UtText::size.
  310  * \param start_stop If true, the user-defined function will be call for initialisation or cleanup.
  311  *
  312  *  \return This function returns the same return code than the user-defined function, i.e. 0
  313  *          if the processing must be interrupted, 1 otherwise.
  314  */
  315 
  316 bool ut_update_progress (UtText * text, ulong processed, bool start_stop) {
  317     
  318     ASSERT (ut_session->progress_function)
  319     
  320     float rate;
  321     
  322     if (start_stop) {
  323         if (!text->progress_done) rate = 0;
  324         else if (!text->progress_todo) rate = 1.0;
  325         else {
  326             rate = 0;
  327             DBG1 ("ut_update_progress: done!=0 && todo!=0 !?!?")
  328         }
  329     } else {
  330         rate = text->progress_done + (1-text->progress_done)*( (float) processed/text->size)/text->progress_todo;
  331         if (rate==0.0) rate = FLT_MIN;
  332         else if (rate==1.0) rate = 1.0 - FLT_MIN;
  333         if (rate>1.0) {
  334             DBG1 ("ut_update_progress: rate = %f !!", rate)
  335         }
  336     }
  337     
  338     return (*(ut_session->progress_function)) (text, rate); 
  339 }
  340 
  341 /***************************************************************************/
  342 /*! \brief table CRC32 ? */
  343 ulong ut_crc32_table[256];
  344 /*! \biref MAGIC NUMBER ? */
  345 const ulong UT_CRC32_POLY=0x04c11db7;
  346 
  347 /***************************************************************************/
  348 /*!
  349  * \brief Function which call the user-defined function UtText::progress_function.
  350  *
  351  * \param data The data to "checksum"
  352  * \param crc_in The previous returned checksum, 0 if none
  353  *
  354  * \return The resulting checksum.
  355  *
  356  * \note Compute the CRC of a data. Code was modified and the following may not be exact :
  357  *       The following C code (by Rob Warnock <rpw3@sgi.com>) does CRC-32 in
  358  *       BigEndian/BigEndian byte/bit order.  That is, the data is sent most
  359  *       significant byte first, and each of the bits within a byte is sent most
  360  *       significant bit first, as in FDDI. You will need to twiddle with it to do
  361  *       Ethernet CRC, i.e., BigEndian/LittleEndian byte/bit order.
  362  *       The CRCs this code generates agree with the vendor-supplied Verilog models
  363  *       of several of the popular FDDI "MAC" chips.
  364  */
  365 
  366 ulong ut_crc32(ushort data, ulong crc_in) {
  367     ulong  crc;
  368 
  369     if (!ut_crc32_table[1]) {
  370         int i, j; ulong c;
  371         for (i = 0; i < 256; ++i) {
  372             for (c = i << 24, j = 8; j > 0; --j) c = c & 0x80000000 ? (c << 1) ^ UT_CRC32_POLY : (c << 1);
  373             ut_crc32_table[i] = c;
  374         }
  375     }
  376     crc_in = ~crc_in;
  377     crc = (crc_in << 8) ^ ut_crc32_table[((crc_in >> 16) ^ data )>>8];  //crc for 8 MSB of data
  378     crc = (crc << 8) ^ ut_crc32_table[(crc >> 24) ^ (data&0xFF)];       //crc for 8 LSB of data
  379     return ~crc;
  380 }