"Fossies" - the Fresh Open Source Software Archive

Member "utrac-0.3.2/src/ut_charset.h" (4 Jan 2009, 4035 Bytes) of package /linux/privat/old/utrac-0.3.2.tgz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "ut_charset.h" see the Fossies "Dox" file reference documentation.

    1 /***************************************************************************
    2  *            ut_charset.h
    3  *
    4  *  Tue Oct  5 11:27:31 2004
    5  *  Copyright  2004  Alliance MCA
    6  *  Written by : Antoine Calando (antoine@alliancemca.net)
    7  ****************************************************************************/
    8 
    9 /*
   10  *  This program is free software; you can redistribute it and/or modify
   11  *  it under the terms of the GNU General Public License as published by
   12  *  the Free Software Foundation; either version 2 of the License, or
   13  *  (at your option) any later version.
   14  *
   15  *  This program is distributed in the hope that it will be useful,
   16  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   17  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   18  *  GNU Library General Public License for more details.
   19  *
   20  *  You should have received a copy of the GNU General Public License
   21  *  along with this program; if not, write to the Free Software
   22  *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
   23  */
   24  
   25 /*!
   26  * \file ut_charset.h
   27  * \author Antoine Calando (antoine@alliancemca.net)
   28  */
   29 
   30 #ifndef _UT_CHARSET_H_
   31 #define _UT_CHARSET_H_
   32 
   33 #include <sys/types.h>
   34 
   35 //#include "debug.h"
   36 
   37 /*!
   38  * \brief Categories to classify characters.
   39  *
   40  * They are inspirated from ftp://ftp.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values
   41  *
   42  * \todo EC explication de tous les enums
   43  *       AC voir lien!!!
   44  */
   45 typedef enum UtCateg {
   46     UT_CTG_UNSET=0 /*A*/,
   47     UT_CTG_UPPERCASE/*B*/,
   48     UT_CTG_LOWERCASE/*C*/,
   49     UT_CTG_OTHER_LETTER/*D*/,
   50     UT_CTG_NUMBER/*E*/,
   51     UT_CTG_PONCTUATION,
   52         UT_CTG_PONCT_INIT_0/*G*/,
   53         UT_CTG_PONCT_INIT_1,
   54         UT_CTG_PONCT_INIT_2,
   55         UT_CTG_PONCT_INIT_3,
   56         UT_CTG_PONCT_INIT_OTHER,
   57         UT_CTG_PONCT_FINAL_0/*L*/,
   58         UT_CTG_PONCT_FINAL_1,
   59         UT_CTG_PONCT_FINAL_2,
   60         UT_CTG_PONCT_FINAL_3,
   61         UT_CTG_PONCT_FINAL_OTHER, 
   62     UT_CTG_CURRENCY/*Q*/,
   63     UT_CTG_SYMBOL,
   64     UT_CTG_CONTROL,
   65     UT_CTG_DELIMITER/*T*/,
   66     UT_CTG_MARK,
   67     UT_CTG_OTHER/*V*/
   68 } UtCateg;
   69 
   70 
   71 #define UT_CTG_PONCT_IF_N UT_CTG_PONCT_INIT_OTHER-UT_CTG_PONCT_INIT_0+1
   72 
   73 /*! Type of alphabet (latin, cyrillic, arabic, greek, hebrew, thai...)
   74     The index is the same than for array SCRIPT_NAME in ut_charset.c
   75 */
   76 typedef u_char UtScript;
   77 
   78 /*!
   79  * \brief Description of a character.
   80  */
   81 typedef struct UtCharType {
   82     UtCateg categorie;  //!< Categorie of the character.
   83     UtScript script;    //!< Alphabet of the character.
   84 } UtCharType;
   85 
   86 
   87 /*! \brief Charset Name */
   88 
   89 #ifdef _UT_CHARSET_C_
   90 const char * UT_CHARSET_NAME[]  = {
   91     "ASCII",
   92     "UTF-8",
   93     "UTF-16BE",
   94     "UTF-16LE",
   95     "UTF-32BE",
   96     "UTF-32LE",
   97     NULL
   98 };
   99 #else
  100 extern const char * UT_CHARSET_NAME[];
  101 #endif
  102 
  103 /*!
  104  * \biref Charset type.
  105  *
  106  * ASCII extension means charsets monobytes whose 128 firsts characters are the same
  107  * than ASCII's ones.
  108  */
  109 
  110 typedef enum UtCharsetType {
  111     UT_CST_UNSET = -1,
  112     UT_CST_ASCII = 0,
  113     UT_CST_UTF_8,
  114     UT_CST_UTF_16BE,    //!< unimplemented.
  115     UT_CST_UTF_16LE,    //!< unimplemented.
  116     UT_CST_UTF_32BE,    //!< unimplemented.
  117     UT_CST_UTF_32LE,    //!< unimplemented.
  118     UT_CST_ASCII_EXTENSION
  119 } UtCharsetType;
  120 
  121 
  122 /*!
  123  * \brief Contains informations about a charset.
  124  *
  125  * This structure is used to describe a charset.  It is stocked as an array in UtSession::charset.
  126  * This array is created from file charsets.dat by ut_load_charsets() (called in ut_init()); it is
  127  * destroyed in ut_finish().
  128  */
  129 typedef struct UtCharset {
  130     char * name;            //!< Standard name of the charset.
  131     char * alias;           //!< Other name (TODO: make an array.)
  132     char * common_name;     //!< Friendly name for non-geek users.
  133     char * comment;         //!< Friendly comment for non-geek users.
  134     UtCharsetType type;     //!< Type of the charset.
  135     ushort * unicode;       //!< Unicode charmap array (on 16 bits!).
  136     UtCharType * char_type; //!< Character description arra
  137     u_char * language;      //!< Coefficients array of size \link UtSession::language ut_session->language.n \endlink
  138     u_char * system;            //!< Coefficients array of size \link UtSession::system ut_session->system.n \endlink
  139 } UtCharset;
  140 
  141 #endif // _UT_CHARSET_H_