"Fossies" - the Fresh Open Source Software Archive 
Member "utrac-0.3.2/src/ut_charset.h" (4 Jan 2009, 4035 Bytes) of package /linux/privat/old/utrac-0.3.2.tgz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "ut_charset.h" see the
Fossies "Dox" file reference documentation.
1 /***************************************************************************
2 * ut_charset.h
3 *
4 * Tue Oct 5 11:27:31 2004
5 * Copyright 2004 Alliance MCA
6 * Written by : Antoine Calando (antoine@alliancemca.net)
7 ****************************************************************************/
8
9 /*
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU Library General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23 */
24
25 /*!
26 * \file ut_charset.h
27 * \author Antoine Calando (antoine@alliancemca.net)
28 */
29
30 #ifndef _UT_CHARSET_H_
31 #define _UT_CHARSET_H_
32
33 #include <sys/types.h>
34
35 //#include "debug.h"
36
37 /*!
38 * \brief Categories to classify characters.
39 *
40 * They are inspirated from ftp://ftp.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values
41 *
42 * \todo EC explication de tous les enums
43 * AC voir lien!!!
44 */
45 typedef enum UtCateg {
46 UT_CTG_UNSET=0 /*A*/,
47 UT_CTG_UPPERCASE/*B*/,
48 UT_CTG_LOWERCASE/*C*/,
49 UT_CTG_OTHER_LETTER/*D*/,
50 UT_CTG_NUMBER/*E*/,
51 UT_CTG_PONCTUATION,
52 UT_CTG_PONCT_INIT_0/*G*/,
53 UT_CTG_PONCT_INIT_1,
54 UT_CTG_PONCT_INIT_2,
55 UT_CTG_PONCT_INIT_3,
56 UT_CTG_PONCT_INIT_OTHER,
57 UT_CTG_PONCT_FINAL_0/*L*/,
58 UT_CTG_PONCT_FINAL_1,
59 UT_CTG_PONCT_FINAL_2,
60 UT_CTG_PONCT_FINAL_3,
61 UT_CTG_PONCT_FINAL_OTHER,
62 UT_CTG_CURRENCY/*Q*/,
63 UT_CTG_SYMBOL,
64 UT_CTG_CONTROL,
65 UT_CTG_DELIMITER/*T*/,
66 UT_CTG_MARK,
67 UT_CTG_OTHER/*V*/
68 } UtCateg;
69
70
71 #define UT_CTG_PONCT_IF_N UT_CTG_PONCT_INIT_OTHER-UT_CTG_PONCT_INIT_0+1
72
73 /*! Type of alphabet (latin, cyrillic, arabic, greek, hebrew, thai...)
74 The index is the same than for array SCRIPT_NAME in ut_charset.c
75 */
76 typedef u_char UtScript;
77
78 /*!
79 * \brief Description of a character.
80 */
81 typedef struct UtCharType {
82 UtCateg categorie; //!< Categorie of the character.
83 UtScript script; //!< Alphabet of the character.
84 } UtCharType;
85
86
87 /*! \brief Charset Name */
88
89 #ifdef _UT_CHARSET_C_
90 const char * UT_CHARSET_NAME[] = {
91 "ASCII",
92 "UTF-8",
93 "UTF-16BE",
94 "UTF-16LE",
95 "UTF-32BE",
96 "UTF-32LE",
97 NULL
98 };
99 #else
100 extern const char * UT_CHARSET_NAME[];
101 #endif
102
103 /*!
104 * \biref Charset type.
105 *
106 * ASCII extension means charsets monobytes whose 128 firsts characters are the same
107 * than ASCII's ones.
108 */
109
110 typedef enum UtCharsetType {
111 UT_CST_UNSET = -1,
112 UT_CST_ASCII = 0,
113 UT_CST_UTF_8,
114 UT_CST_UTF_16BE, //!< unimplemented.
115 UT_CST_UTF_16LE, //!< unimplemented.
116 UT_CST_UTF_32BE, //!< unimplemented.
117 UT_CST_UTF_32LE, //!< unimplemented.
118 UT_CST_ASCII_EXTENSION
119 } UtCharsetType;
120
121
122 /*!
123 * \brief Contains informations about a charset.
124 *
125 * This structure is used to describe a charset. It is stocked as an array in UtSession::charset.
126 * This array is created from file charsets.dat by ut_load_charsets() (called in ut_init()); it is
127 * destroyed in ut_finish().
128 */
129 typedef struct UtCharset {
130 char * name; //!< Standard name of the charset.
131 char * alias; //!< Other name (TODO: make an array.)
132 char * common_name; //!< Friendly name for non-geek users.
133 char * comment; //!< Friendly comment for non-geek users.
134 UtCharsetType type; //!< Type of the charset.
135 ushort * unicode; //!< Unicode charmap array (on 16 bits!).
136 UtCharType * char_type; //!< Character description arra
137 u_char * language; //!< Coefficients array of size \link UtSession::language ut_session->language.n \endlink
138 u_char * system; //!< Coefficients array of size \link UtSession::system ut_session->system.n \endlink
139 } UtCharset;
140
141 #endif // _UT_CHARSET_H_