ucs.h (ocrad-0.24) | : | ucs.h (ocrad-0.25) | ||
---|---|---|---|---|
/* GNU Ocrad - Optical Character Recognition program | /* GNU Ocrad - Optical Character Recognition program | |||
Copyright (C) 2003-2014 Antonio Diaz Diaz. | Copyright (C) 2003-2015 Antonio Diaz Diaz. | |||
This program is free software: you can redistribute it and/or modify | This program is free software: you can redistribute it and/or modify | |||
it under the terms of the GNU General Public License as published by | it under the terms of the GNU General Public License as published by | |||
the Free Software Foundation, either version 2 of the License, or | the Free Software Foundation, either version 2 of the License, or | |||
(at your option) any later version. | (at your option) any later version. | |||
This program is distributed in the hope that it will be useful, | This program is distributed in the hope that it will be useful, | |||
but WITHOUT ANY WARRANTY; without even the implied warranty of | but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
GNU General Public License for more details. | GNU General Public License for more details. | |||
You should have received a copy of the GNU General Public License | You should have received a copy of the GNU General Public License | |||
along with this program. If not, see <http://www.gnu.org/licenses/>. | along with this program. If not, see <http://www.gnu.org/licenses/>. | |||
*/ | */ | |||
namespace UCS { | namespace UCS { | |||
enum { IEXCLAM = 0x00A1, // inverted exclamation mark | enum { | |||
COPY = 0x00A9, // copyright sign | IEXCLAM = 0x00A1, // inverted exclamation mark | |||
FEMIORD = 0x00AA, // feminine ordinal indicator | CENT = 0x00A2, // cent sign | |||
LDANGLE = 0x00AB, // left-pointing double angle quotation mark | POUND = 0x00A3, // pound sign | |||
NOT = 0x00AC, // not sign | YEN = 0x00A5, // yen sign | |||
REG = 0x00AE, // registered sign | SECTION = 0x00A7, // section sign | |||
DEG = 0x00B0, // degree sign | COPY = 0x00A9, // copyright sign | |||
PLUSMIN = 0x00B1, // plus-minus sign | FEMIORD = 0x00AA, // feminine ordinal indicator | |||
POW2 = 0x00B2, // superscript two | LDANGLE = 0x00AB, // left-pointing double angle quotation mark | |||
POW3 = 0x00B3, // superscript three | NOT = 0x00AC, // not sign | |||
MICRO = 0x00B5, // micro sign | REG = 0x00AE, // registered sign | |||
PILCROW = 0x00B6, // pilcrow sign | MACRON = 0x00AF, // macron | |||
MIDDOT = 0x00B7, // middle dot | DEG = 0x00B0, // degree sign | |||
POW1 = 0x00B9, // superscript one | PLUSMIN = 0x00B1, // plus-minus sign | |||
MASCORD = 0x00BA, // masculine ordinal indicator | POW2 = 0x00B2, // superscript two | |||
RDANGLE = 0x00BB, // right-pointing double angle quotation mark | POW3 = 0x00B3, // superscript three | |||
IQUEST = 0x00BF, // inverted question mark | MICRO = 0x00B5, // micro sign | |||
CAGRAVE = 0x00C0, // latin capital letter a with grave | PILCROW = 0x00B6, // pilcrow sign | |||
CAACUTE = 0x00C1, // latin capital letter a with acute | MIDDOT = 0x00B7, // middle dot | |||
CACIRCU = 0x00C2, // latin capital letter a with circumflex | POW1 = 0x00B9, // superscript one | |||
CATILDE = 0x00C3, // latin capital letter a with tilde | MASCORD = 0x00BA, // masculine ordinal indicator | |||
CADIAER = 0x00C4, // latin capital letter a with diaeresis | RDANGLE = 0x00BB, // right-pointing double angle quotation mark | |||
CARING = 0x00C5, // latin capital letter a with ring above | IQUEST = 0x00BF, // inverted question mark | |||
CCCEDI = 0x00C7, // latin capital letter c with cedilla | CAGRAVE = 0x00C0, // latin capital letter a with grave | |||
CEGRAVE = 0x00C8, // latin capital letter e with grave | CAACUTE = 0x00C1, // latin capital letter a with acute | |||
CEACUTE = 0x00C9, // latin capital letter e with acute | CACIRCU = 0x00C2, // latin capital letter a with circumflex | |||
CECIRCU = 0x00CA, // latin capital letter e with circumflex | CATILDE = 0x00C3, // latin capital letter a with tilde | |||
CEDIAER = 0x00CB, // latin capital letter e with diaeresis | CADIAER = 0x00C4, // latin capital letter a with diaeresis | |||
CIGRAVE = 0x00CC, // latin capital letter i with grave | CARING = 0x00C5, // latin capital letter a with ring above | |||
CIACUTE = 0x00CD, // latin capital letter i with acute | CCCEDI = 0x00C7, // latin capital letter c with cedilla | |||
CICIRCU = 0x00CE, // latin capital letter i with circumflex | CEGRAVE = 0x00C8, // latin capital letter e with grave | |||
CIDIAER = 0x00CF, // latin capital letter i with diaeresis | CEACUTE = 0x00C9, // latin capital letter e with acute | |||
CNTILDE = 0x00D1, // latin capital letter n with tilde | CECIRCU = 0x00CA, // latin capital letter e with circumflex | |||
COGRAVE = 0x00D2, // latin capital letter o with grave | CEDIAER = 0x00CB, // latin capital letter e with diaeresis | |||
COACUTE = 0x00D3, // latin capital letter o with acute | CIGRAVE = 0x00CC, // latin capital letter i with grave | |||
COCIRCU = 0x00D4, // latin capital letter o with circumflex | CIACUTE = 0x00CD, // latin capital letter i with acute | |||
COTILDE = 0x00D5, // latin capital letter o with tilde | CICIRCU = 0x00CE, // latin capital letter i with circumflex | |||
CODIAER = 0x00D6, // latin capital letter o with diaeresis | CIDIAER = 0x00CF, // latin capital letter i with diaeresis | |||
CUGRAVE = 0x00D9, // latin capital letter u with grave | CNTILDE = 0x00D1, // latin capital letter n with tilde | |||
CUACUTE = 0x00DA, // latin capital letter u with acute | COGRAVE = 0x00D2, // latin capital letter o with grave | |||
CUCIRCU = 0x00DB, // latin capital letter u with circumflex | COACUTE = 0x00D3, // latin capital letter o with acute | |||
CUDIAER = 0x00DC, // latin capital letter u with diaeresis | COCIRCU = 0x00D4, // latin capital letter o with circumflex | |||
CYACUTE = 0x00DD, // latin capital letter y with acute | COTILDE = 0x00D5, // latin capital letter o with tilde | |||
SSSHARP = 0x00DF, // latin small letter sharp s (german) | CODIAER = 0x00D6, // latin capital letter o with diaeresis | |||
SAGRAVE = 0x00E0, // latin small letter a with grave | CUGRAVE = 0x00D9, // latin capital letter u with grave | |||
SAACUTE = 0x00E1, // latin small letter a with acute | CUACUTE = 0x00DA, // latin capital letter u with acute | |||
SACIRCU = 0x00E2, // latin small letter a with circumflex | CUCIRCU = 0x00DB, // latin capital letter u with circumflex | |||
SATILDE = 0x00E3, // latin small letter a with tilde | CUDIAER = 0x00DC, // latin capital letter u with diaeresis | |||
SADIAER = 0x00E4, // latin small letter a with diaeresis | CYACUTE = 0x00DD, // latin capital letter y with acute | |||
SARING = 0x00E5, // latin small letter a with ring above | SSSHARP = 0x00DF, // latin small letter sharp s (german) | |||
SCCEDI = 0x00E7, // latin small letter c with cedilla | SAGRAVE = 0x00E0, // latin small letter a with grave | |||
SEGRAVE = 0x00E8, // latin small letter e with grave | SAACUTE = 0x00E1, // latin small letter a with acute | |||
SEACUTE = 0x00E9, // latin small letter e with acute | SACIRCU = 0x00E2, // latin small letter a with circumflex | |||
SECIRCU = 0x00EA, // latin small letter e with circumflex | SATILDE = 0x00E3, // latin small letter a with tilde | |||
SEDIAER = 0x00EB, // latin small letter e with diaeresis | SADIAER = 0x00E4, // latin small letter a with diaeresis | |||
SIGRAVE = 0x00EC, // latin small letter i with grave | SARING = 0x00E5, // latin small letter a with ring above | |||
SIACUTE = 0x00ED, // latin small letter i with acute | SCCEDI = 0x00E7, // latin small letter c with cedilla | |||
SICIRCU = 0x00EE, // latin small letter i with circumflex | SEGRAVE = 0x00E8, // latin small letter e with grave | |||
SIDIAER = 0x00EF, // latin small letter i with diaeresis | SEACUTE = 0x00E9, // latin small letter e with acute | |||
SNTILDE = 0x00F1, // latin small letter n with tilde | SECIRCU = 0x00EA, // latin small letter e with circumflex | |||
SOGRAVE = 0x00F2, // latin small letter o with grave | SEDIAER = 0x00EB, // latin small letter e with diaeresis | |||
SOACUTE = 0x00F3, // latin small letter o with acute | SIGRAVE = 0x00EC, // latin small letter i with grave | |||
SOCIRCU = 0x00F4, // latin small letter o with circumflex | SIACUTE = 0x00ED, // latin small letter i with acute | |||
SOTILDE = 0x00F5, // latin small letter o with tilde | SICIRCU = 0x00EE, // latin small letter i with circumflex | |||
SODIAER = 0x00F6, // latin small letter o with diaeresis | SIDIAER = 0x00EF, // latin small letter i with diaeresis | |||
DIV = 0x00F7, // division sign | SNTILDE = 0x00F1, // latin small letter n with tilde | |||
SUGRAVE = 0x00F9, // latin small letter u with grave | SOGRAVE = 0x00F2, // latin small letter o with grave | |||
SUACUTE = 0x00FA, // latin small letter u with acute | SOACUTE = 0x00F3, // latin small letter o with acute | |||
SUCIRCU = 0x00FB, // latin small letter u with circumflex | SOCIRCU = 0x00F4, // latin small letter o with circumflex | |||
SUDIAER = 0x00FC, // latin small letter u with diaeresis | SOTILDE = 0x00F5, // latin small letter o with tilde | |||
SYACUTE = 0x00FD, // latin small letter y with acute | SODIAER = 0x00F6, // latin small letter o with diaeresis | |||
SYDIAER = 0x00FF, // latin small letter y with diaeresis | DIV = 0x00F7, // division sign | |||
CGBREVE = 0X011E, // latin capital letter g with breve | SUGRAVE = 0x00F9, // latin small letter u with grave | |||
SGBREVE = 0x011F, // latin small letter g with breve | SUACUTE = 0x00FA, // latin small letter u with acute | |||
CIDOT = 0x0130, // latin capital letter i with dot above | SUCIRCU = 0x00FB, // latin small letter u with circumflex | |||
SINODOT = 0x0131, // latin small letter i dotless | SUDIAER = 0x00FC, // latin small letter u with diaeresis | |||
CSCEDI = 0x015E, // latin capital letter s with cedilla | SYACUTE = 0x00FD, // latin small letter y with acute | |||
SSCEDI = 0x015F, // latin small letter s with cedilla | SYDIAER = 0x00FF, // latin small letter y with diaeresis | |||
CSCARON = 0x0160, // latin capital letter s with caron | CGBREVE = 0X011E, // latin capital letter g with breve | |||
SSCARON = 0x0161, // latin small letter s with caron | SGBREVE = 0x011F, // latin small letter g with breve | |||
CZCARON = 0x017D, // latin capital letter z with caron | CIDOT = 0x0130, // latin capital letter i with dot above | |||
SZCARON = 0x017E, // latin small letter z with caron | SINODOT = 0x0131, // latin small letter i dotless | |||
EURO = 0x20AC // symbole euro | CLIGOE = 0x0152, // latin capital ligature oe | |||
}; | SLIGOE = 0x0153, // latin small ligature oe | |||
CSCEDI = 0x015E, // latin capital letter s with cedilla | ||||
SSCEDI = 0x015F, // latin small letter s with cedilla | ||||
CSCARON = 0x0160, // latin capital letter s with caron | ||||
SSCARON = 0x0161, // latin small letter s with caron | ||||
CYDIAER = 0x0178, // latin capital letter y with diaeresis | ||||
CZCARON = 0x017D, // latin capital letter z with caron | ||||
SZCARON = 0x017E, // latin small letter z with caron | ||||
EURO = 0x20AC // symbole euro | ||||
}; | ||||
int base_letter( const int code ); | int base_letter( const int code ); | |||
int compose( const int letter, const int accent ); | int compose( const int letter, const int accent ); | |||
bool isalnum( const int code ); | bool isalnum( const int code ); | |||
bool isalpha( const int code ); | bool isalpha( const int code ); | |||
bool isdigit( const int code ); | inline bool isdigit( const int code ) | |||
{ return ( code <= '9' && code >= '0' ); } | ||||
bool ishigh( const int code ); // high chars like "A1bp|" | bool ishigh( const int code ); // high chars like "A1bp|" | |||
bool islower( const int code ); | bool islower( const int code ); | |||
bool islower_ambiguous( const int code ); | bool islower_ambiguous( const int code ); | |||
bool islower_small( const int code ); | bool islower_small( const int code ); | |||
bool islower_small_ambiguous( const int code ); | bool islower_small_ambiguous( const int code ); | |||
bool isspace( const int code ); | bool isspace( const int code ); | |||
bool isupper( const int code ); | bool isupper( const int code ); | |||
bool isupper_normal_width( const int code ); | ||||
bool isvowel( int code ); | bool isvowel( int code ); | |||
unsigned char map_to_byte( const int code ); | unsigned char map_to_byte( const int code ); | |||
int map_to_ucs( const unsigned char ch ); // ISO-8859-15 to UCS | ||||
const char * ucs_to_utf8( const int code ); | const char * ucs_to_utf8( const int code ); | |||
int to_nearest_digit( const int code ); | int to_nearest_digit( const int code ); | |||
int to_nearest_letter( const int code ); | int to_nearest_letter( const int code ); | |||
int to_nearest_upper_num( const int code ); | int to_nearest_upper_num( const int code ); | |||
int toupper( const int code ); | int toupper( const int code ); | |||
} // end namespace UCS | } // end namespace UCS | |||
End of changes. 5 change blocks. | ||||
86 lines changed or deleted | 98 lines changed or added |