"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "ucs.cc" between
ocrad-0.24.tar.gz and ocrad-0.25.tar.gz

About: GNU Ocrad is an OCR (Optical Character Recognition) program.

ucs.cc  (ocrad-0.24):ucs.cc  (ocrad-0.25)
/* GNU Ocrad - Optical Character Recognition program /* GNU Ocrad - Optical Character Recognition program
Copyright (C) 2003-2014 Antonio Diaz Diaz. Copyright (C) 2003-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or the Free Software Foundation, either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
skipping to change at line 49 skipping to change at line 49
case CIACUTE: case CIACUTE:
case CICIRCU: case CICIRCU:
case CIDIAER: case CIDIAER:
case CIDOT : return 'I'; case CIDOT : return 'I';
case CNTILDE: return 'N'; case CNTILDE: return 'N';
case COGRAVE: case COGRAVE:
case COACUTE: case COACUTE:
case COCIRCU: case COCIRCU:
case COTILDE: case COTILDE:
case CODIAER: return 'O'; case CODIAER: return 'O';
case CSCEDI : return 'S'; case CSCEDI :
case CSCARON: return 'S';
case CUGRAVE: case CUGRAVE:
case CUACUTE: case CUACUTE:
case CUCIRCU: case CUCIRCU:
case CUDIAER: return 'U'; case CUDIAER: return 'U';
case CYACUTE: return 'Y'; case CYACUTE:
case CYDIAER: return 'Y';
case CZCARON: return 'Z';
case SAGRAVE: case SAGRAVE:
case SAACUTE: case SAACUTE:
case SACIRCU: case SACIRCU:
case SATILDE: case SATILDE:
case SADIAER: case SADIAER:
case SARING : return 'a'; case SARING : return 'a';
case SCCEDI : return 'c'; case SCCEDI : return 'c';
case SEGRAVE: case SEGRAVE:
case SEACUTE: case SEACUTE:
case SECIRCU: case SECIRCU:
skipping to change at line 78 skipping to change at line 81
case SIACUTE: case SIACUTE:
case SICIRCU: case SICIRCU:
case SIDIAER: case SIDIAER:
case SINODOT: return 'i'; case SINODOT: return 'i';
case SNTILDE: return 'n'; case SNTILDE: return 'n';
case SOGRAVE: case SOGRAVE:
case SOACUTE: case SOACUTE:
case SOCIRCU: case SOCIRCU:
case SOTILDE: case SOTILDE:
case SODIAER: return 'o'; case SODIAER: return 'o';
case SSCEDI : return 's'; case SSCEDI :
case SSCARON: return 's';
case SUGRAVE: case SUGRAVE:
case SUACUTE: case SUACUTE:
case SUCIRCU: case SUCIRCU:
case SUDIAER: return 'u'; case SUDIAER: return 'u';
case SYACUTE: case SYACUTE:
case SYDIAER: return 'y'; case SYDIAER: return 'y';
case SZCARON: return 'z';
default: return 0; default: return 0;
} }
} }
int UCS::compose( const int letter, const int accent ) int UCS::compose( const int letter, const int accent )
{ {
switch( letter ) switch( letter )
{ {
case 'A': if( accent == '\'') return CAACUTE; case 'A': if( accent == '\'') return CAACUTE;
if( accent == '`' ) return CAGRAVE; if( accent == '`' ) return CAGRAVE;
skipping to change at line 118 skipping to change at line 123
case 'O': if( accent == '\'') return COACUTE; case 'O': if( accent == '\'') return COACUTE;
if( accent == '`' ) return COGRAVE; if( accent == '`' ) return COGRAVE;
if( accent == '^' ) return COCIRCU; if( accent == '^' ) return COCIRCU;
if( accent == ':' ) return CODIAER; break; if( accent == ':' ) return CODIAER; break;
case 'S': return CSCARON; case 'S': return CSCARON;
case 'U': case 'U':
case 'V': if( accent == '\'') return CUACUTE; case 'V': if( accent == '\'') return CUACUTE;
if( accent == '`' ) return CUGRAVE; if( accent == '`' ) return CUGRAVE;
if( accent == '^' ) return CUCIRCU; if( accent == '^' ) return CUCIRCU;
if( accent == ':' ) return CUDIAER; break; if( accent == ':' ) return CUDIAER; break;
case 'Y': if( accent == '\'') return CYACUTE;
if( accent == ':' ) return CYDIAER; break;
case 'Z': return CZCARON; case 'Z': return CZCARON;
case 'a': if( accent == '\'') return SAACUTE; case 'a': if( accent == '\'') return SAACUTE;
if( accent == '`' ) return SAGRAVE; if( accent == '`' ) return SAGRAVE;
if( accent == '^' ) return SACIRCU; if( accent == '^' ) return SACIRCU;
if( accent == ':' ) return SADIAER; break; if( accent == ':' ) return SADIAER; break;
case 'e': if( accent == '\'') return SEACUTE; case 'e': if( accent == '\'') return SEACUTE;
if( accent == '`' ) return SEGRAVE; if( accent == '`' ) return SEGRAVE;
if( accent == '^' ) return SECIRCU; if( accent == '^' ) return SECIRCU;
if( accent == ':' ) return SEDIAER; break; if( accent == ':' ) return SEDIAER; break;
case '9': case '9':
skipping to change at line 164 skipping to change at line 171
bool UCS::isalnum( const int code ) bool UCS::isalnum( const int code )
{ {
return ( UCS::isalpha( code ) || UCS::isdigit( code ) ); return ( UCS::isalpha( code ) || UCS::isdigit( code ) );
} }
bool UCS::isalpha( const int code ) bool UCS::isalpha( const int code )
{ {
return ( ( code < 128 && std::isalpha( code ) ) || base_letter( code ) ); return ( ( code < 128 && std::isalpha( code ) ) || base_letter( code ) );
} }
bool UCS::isdigit( const int code )
{
return ( code <= '9' && code >= '0' );
}
bool UCS::ishigh( const int code ) bool UCS::ishigh( const int code )
{ {
if( isupper( code ) || isdigit( code ) ) return true; if( isupper( code ) || isdigit( code ) ) return true;
switch( code ) switch( code )
{ {
case 'b': case 'd': case 'f': case 'g': case 'h': case 'i': case 'j': case 'b': case 'd': case 'f': case 'g': case 'h': case 'i': case 'j':
case 'k': case 'l': case 'p': case 'q': case 't': case 'y': case '|': case 'k': case 'l': case 'p': case 'q': case 't': case 'y': case '|':
return true; return true;
default : return false; default : return false;
} }
skipping to change at line 228 skipping to change at line 230
switch( code ) switch( code )
{ {
case 'c': case 'o': case 's': case 'u': case 'v': case 'w': case 'c': case 'o': case 's': case 'u': case 'v': case 'w':
case 'x': case 'z': return true; case 'x': case 'z': return true;
default : return false; default : return false;
} }
} }
bool UCS::isspace( const int code ) bool UCS::isspace( const int code )
{ {
return ( code < 128 && std::isspace( code ) ); return ( code < 128 && std::isspace( code ) ) || code == 0xA0;
} }
bool UCS::isupper( const int code ) bool UCS::isupper( const int code )
{ {
if( code < 128 && std::isupper( code ) ) return true; if( code < 128 && std::isupper( code ) ) return true;
const int base = base_letter( code ); const int base = base_letter( code );
return ( base && std::isupper( base ) ); return ( base && std::isupper( base ) );
} }
bool UCS::isupper_normal_width( const int code )
{
if( code >= 128 || !std::isupper( code ) ) return false;
switch( code )
{
case 'I': case 'J': case 'L': case 'M': case 'Q': case 'W': return false;
default : return true;
}
}
bool UCS::isvowel( int code ) bool UCS::isvowel( int code )
{ {
if( code >= 128 ) code = base_letter( code ); if( code >= 128 ) code = base_letter( code );
if( !code || !std::isalpha( code ) ) return false; if( !code || !std::isalpha( code ) ) return false;
code = std::tolower( code ); code = std::tolower( code );
return ( code == 'a' || code == 'e' || code == 'i' || return ( code == 'a' || code == 'e' || code == 'i' ||
code == 'o' || code == 'u' ); code == 'o' || code == 'u' );
} }
unsigned char UCS::map_to_byte( const int code ) unsigned char UCS::map_to_byte( const int code )
skipping to change at line 261 skipping to change at line 273
switch( code ) switch( code )
{ {
case CGBREVE: return 0xD0; case CGBREVE: return 0xD0;
case SGBREVE: return 0xF0; case SGBREVE: return 0xF0;
case CIDOT : return 0xDD; case CIDOT : return 0xDD;
case SINODOT: return 0xFD; case SINODOT: return 0xFD;
case CSCEDI : return 0xDE; case CSCEDI : return 0xDE;
case SSCEDI : return 0xFE; case SSCEDI : return 0xFE;
case CSCARON: return 0xA6; case CSCARON: return 0xA6;
case SSCARON: return 0xA8; case SSCARON: return 0xA8;
case CYDIAER: return 0xBE;
case CZCARON: return 0xB4; case CZCARON: return 0xB4;
case SZCARON: return 0xB8; case SZCARON: return 0xB8;
case EURO : return 0xA4; case EURO : return 0xA4;
default : return 0; default : return 0;
} }
} }
int UCS::map_to_ucs( const unsigned char ch )
{
switch( ch )
{
case 0xA4: return EURO;
case 0xA6: return CSCARON;
case 0xA8: return SSCARON;
case 0xB4: return CZCARON;
case 0xB8: return SZCARON;
case 0xBC: return CLIGOE;
case 0xBD: return SLIGOE;
case 0xBE: return CYDIAER;
}
return ch;
}
// does not work for 'code' == 0
const char * UCS::ucs_to_utf8( const int code ) const char * UCS::ucs_to_utf8( const int code )
{ {
static char s[7]; static char s[7];
if( code < 0 || code > 0x7FFFFFFF ) { s[0] = 0; return s; } // invalid code if( code < 0 || code > 0x7FFFFFFF ) { s[0] = 0; return s; } // invalid code
if( code < 128 ) { s[0] = code; s[1] = 0; return s; } // plain ascii if( code < 128 ) { s[0] = code; s[1] = 0; return s; } // plain ascii
int i, mask; int i, mask;
if( code < 0x800 ) { i = 2; mask = 0xC0; } // 110X XXXX if( code < 0x800 ) { i = 2; mask = 0xC0; } // 110X XXXX
else if( code < 0x10000 ) { i = 3; mask = 0xE0; } // 1110 XXXX else if( code < 0x10000 ) { i = 3; mask = 0xE0; } // 1110 XXXX
else if( code < 0x200000 ) { i = 4; mask = 0xF0; } // 1111 0XXX else if( code < 0x200000 ) { i = 4; mask = 0xF0; } // 1111 0XXX
else if( code < 0x4000000 ) { i = 5; mask = 0xF8; } // 1111 10XX else if( code < 0x4000000 ) { i = 5; mask = 0xF8; } // 1111 10XX
else { i = 6; mask = 0xFC; } // 1111 110X else { i = 6; mask = 0xFC; } // 1111 110X
s[i] = 0; --i; s[i] = 0; --i;
int d = 0; int d = 0;
for( ; i > 0; --i, d+=6 ) for( ; i > 0; --i, d += 6 )
s[i] = 0x80 | ( ( code >> d ) & 0x3F ); // 10XX XXXX s[i] = 0x80 | ( ( code >> d ) & 0x3F ); // 10XX XXXX
s[0] = mask | ( code >> d ); s[0] = mask | ( code >> d );
return s; return s;
} }
int UCS::to_nearest_digit( const int code ) int UCS::to_nearest_digit( const int code )
{ {
switch( code ) switch( code )
{ {
case 'D':
case 'O': case 'O':
case 'Q': case 'Q':
case 'o': return '0'; case 'o': return '0';
case '|':
case 'I': case 'I':
case 'L': case 'L':
case 'l': case 'l':
case '|':
case SINODOT: return '1'; case SINODOT: return '1';
case 'Z': case 'Z':
case 'z': return '2'; case 'z': return '2';
case 'A': case 'A':
case 'q': return '4'; case 'q': return '4';
case 'S': case 'S':
case 's': return '5'; case 's': return '5';
case 'G': case 'G':
case 'b': case 'b':
case SOACUTE: return '6'; case SOACUTE: return '6';
skipping to change at line 341 skipping to change at line 372
case '8': return 'B'; case '8': return 'B';
case '9': return 'g'; case '9': return 'g';
default: return code; default: return code;
} }
} }
int UCS::to_nearest_upper_num( const int code ) int UCS::to_nearest_upper_num( const int code )
{ {
switch( code ) switch( code )
{ {
case '(':
case '[': return 'C';
case 'l': case 'l':
case '|': return 'I'; case '|': return 'I';
case DEG: return 'O'; case DEG: return 'O';
case MICRO: return 'U'; case MICRO: return 'U';
case POW1: case POW1:
case SINODOT: return '1'; case SINODOT: return '1';
case POW2: return '2'; case POW2: return '2';
case POW3: return '3'; case POW3: return '3';
case 'q': return '4'; case 'q': return '4';
case 'b': case 'b':
skipping to change at line 388 skipping to change at line 421
case SIACUTE: return CIACUTE; case SIACUTE: return CIACUTE;
case SICIRCU: return CICIRCU; case SICIRCU: return CICIRCU;
case SIDIAER: return CIDIAER; case SIDIAER: return CIDIAER;
case SNTILDE: return CNTILDE; case SNTILDE: return CNTILDE;
case SOGRAVE: return COGRAVE; case SOGRAVE: return COGRAVE;
case SOACUTE: return COACUTE; case SOACUTE: return COACUTE;
case SOCIRCU: return COCIRCU; case SOCIRCU: return COCIRCU;
case SOTILDE: return COTILDE; case SOTILDE: return COTILDE;
case SODIAER: return CODIAER; case SODIAER: return CODIAER;
case SSCEDI : return CSCEDI; case SSCEDI : return CSCEDI;
case SSCARON: return CSCARON;
case SUGRAVE: return CUGRAVE; case SUGRAVE: return CUGRAVE;
case SUACUTE: return CUACUTE; case SUACUTE: return CUACUTE;
case SUCIRCU: return CUCIRCU; case SUCIRCU: return CUCIRCU;
case SUDIAER: return CUDIAER; case SUDIAER: return CUDIAER;
case SYACUTE: return CYACUTE; case SYACUTE: return CYACUTE;
case SYDIAER: return CYDIAER;
case SZCARON: return CZCARON;
default: return code; default: return code;
} }
} }
 End of changes. 18 change blocks. 
12 lines changed or deleted 48 lines changed or added

Home  |  About  |  All  |  Newest  |  Fossies Dox  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTPS