textline_r2.cc (ocrad-0.24) | : | textline_r2.cc (ocrad-0.25) | ||
---|---|---|---|---|
/* GNU Ocrad - Optical Character Recognition program | /* GNU Ocrad - Optical Character Recognition program | |||
Copyright (C) 2003-2014 Antonio Diaz Diaz. | Copyright (C) 2003-2015 Antonio Diaz Diaz. | |||
This program is free software: you can redistribute it and/or modify | This program is free software: you can redistribute it and/or modify | |||
it under the terms of the GNU General Public License as published by | it under the terms of the GNU General Public License as published by | |||
the Free Software Foundation, either version 2 of the License, or | the Free Software Foundation, either version 2 of the License, or | |||
(at your option) any later version. | (at your option) any later version. | |||
This program is distributed in the hope that it will be useful, | This program is distributed in the hope that it will be useful, | |||
but WITHOUT ANY WARRANTY; without even the implied warranty of | but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
GNU General Public License for more details. | GNU General Public License for more details. | |||
skipping to change at line 239 | skipping to change at line 239 | |||
Character c1( new Blob( b1 ) ); | Character c1( new Blob( b1 ) ); | |||
Character c2( new Blob( b2 ) ); | Character c2( new Blob( b2 ) ); | |||
for( int j = 0; j < c.blobs(); ++j ) if( j != ib ) | for( int j = 0; j < c.blobs(); ++j ) if( j != ib ) | |||
{ | { | |||
const Blob & bj = c.blob( j ); | const Blob & bj = c.blob( j ); | |||
if( c1.includes_hcenter( bj ) ) c1.shift_blobp( new Blob( bj ) ); | if( c1.includes_hcenter( bj ) ) c1.shift_blobp( new Blob( bj ) ); | |||
else if( c2.includes_hcenter( bj ) ) c2.shift_blobp( new Blob( bj ) ); | else if( c2.includes_hcenter( bj ) ) c2.shift_blobp( new Blob( bj ) ); | |||
} | } | |||
c1.recognize1( charset, charbox( c1 ) ); | c1.recognize1( charset, charbox( c1 ) ); | |||
c2.recognize1( charset, charbox( c2 ) ); | c2.recognize1( charset, charbox( c2 ) ); | |||
if( ( c1.guesses() && c2.guesses() ) || | const bool good_c2 = ( c2.guesses() && c2.guess( 0 ).code != '\'' ); | |||
( ( c1.guesses() || c2.guesses() ) && c.width() > c.height() ) ) | if( ( c1.guesses() && good_c2 ) || | |||
( ( c1.guesses() || good_c2 ) && c.width() > c.height() ) ) | ||||
{ | { | |||
c = c1; shift_characterp( new Character( c2 ) ); | c = c1; shift_characterp( new Character( c2 ) ); | |||
if( !c1.guesses() ) --i; else if( c2.guesses() ) ++i; | if( !c1.guesses() ) --i; else if( c2.guesses() ) ++i; | |||
} | } | |||
} | } | |||
} | } | |||
// try to recognize 1 blob unrecognized characters with holes by | // try to recognize 1 blob unrecognized characters with holes by | |||
// removing small holes (noise) | // removing small holes (noise) | |||
for( int i = big_initials(); i < characters(); ++i ) | for( int i = big_initials(); i < characters(); ++i ) | |||
skipping to change at line 380 | skipping to change at line 381 | |||
if( j >= characters() || !character( j ).guesses() ) | if( j >= characters() || !character( j ).guesses() ) | |||
{ j = i - 1; if( j < big_initials() || !character( j ).guesses() ) conti nue; } | { j = i - 1; if( j < big_initials() || !character( j ).guesses() ) conti nue; } | |||
Character & c2 = character( j ); | Character & c2 = character( j ); | |||
if( UCS::isvowel( c2.guess( 0 ).code ) && | if( UCS::isvowel( c2.guess( 0 ).code ) && | |||
c1.bottom() >= c2.bottom() + ( c2.height() / 4 ) ) | c1.bottom() >= c2.bottom() + ( c2.height() / 4 ) ) | |||
c1.insert_guess( 0, 'j', 1 ); | c1.insert_guess( 0, 'j', 1 ); | |||
} | } | |||
} | } | |||
// transform small o or u with accent or diaeresis to capital | // transform small o or u with accent or diaeresis to capital | |||
// transform small s or z with caron to capital | ||||
{ | { | |||
int begin = big_initials(); | int begin = big_initials(); | |||
bool isolated = false; // isolated letters compare with all line | bool isolated = false; // isolated letters compare with all line | |||
for( int i = big_initials(); i < characters(); ++i ) | for( int i = big_initials(); i < characters(); ++i ) | |||
{ | { | |||
Character & c1 = character( i ); | Character & c1 = character( i ); | |||
if( c1.guesses() >= 1 ) | if( c1.guesses() >= 1 ) | |||
{ | { | |||
if( c1.maybe(' ') ) | if( c1.maybe(' ') ) | |||
{ | { | |||
if( i + 2 < characters() && character( i + 2 ).maybe(' ') ) | if( i + 2 < characters() && character( i + 2 ).maybe(' ') ) | |||
{ begin = big_initials(); isolated = true; } | { begin = big_initials(); isolated = true; } | |||
else { begin = i + 1; isolated = false; } | else { begin = i + 1; isolated = false; } | |||
continue; | continue; | |||
} | } | |||
int code = c1.guess( 0 ).code; | int code = c1.guess( 0 ).code; | |||
if( code < 128 || c1.blobs() < 2 ) continue; | if( code < 128 || c1.blobs() < 2 ) continue; | |||
int codeb = UCS::base_letter( code ); | int codeb = UCS::base_letter( code ); | |||
if( codeb != 'o' && codeb != 'u' ) continue; | if( codeb != 'o' && codeb != 'u' && codeb != 's' && codeb != 'z' ) | |||
continue; | ||||
const Blob & b1 = c1.blob( c1.blobs() - 1 ); // lower blob | const Blob & b1 = c1.blob( c1.blobs() - 1 ); // lower blob | |||
for( int j = begin; j < characters(); ++j ) if( j != i ) | for( int j = begin; j < characters(); ++j ) if( j != i ) | |||
{ | { | |||
Character & c2 = character( j ); | Character & c2 = character( j ); | |||
if( c2.guesses() >= 1 ) | if( c2.guesses() >= 1 ) | |||
{ | { | |||
if( c2.maybe(' ') ) { if( isolated ) continue; else break; } | if( c2.maybe(' ') ) { if( isolated ) continue; else break; } | |||
int code2 = c2.guess( 0 ).code; | int code2 = c2.guess( 0 ).code; | |||
int code2b = UCS::base_letter( code2 ); | int code2b = UCS::base_letter( code2 ); | |||
if( !code2b && code2 >= 128 ) continue; | if( !code2b && code2 >= 128 ) continue; | |||
skipping to change at line 769 | skipping to change at line 772 | |||
{ | { | |||
int code1 = c1.guess( 0 ).code; | int code1 = c1.guess( 0 ).code; | |||
int code2 = c2.guess( 0 ).code; | int code2 = c2.guess( 0 ).code; | |||
if( code1 == 'n' && ( code2 == 'I' || code2 == 'l' ) && | if( code1 == 'n' && ( code2 == 'I' || code2 == 'l' ) && | |||
Ocrad::similar( c1.height(), c2.height(), 10 ) && | Ocrad::similar( c1.height(), c2.height(), 10 ) && | |||
c2.left() - c1.right() < c2.width() ) | c2.left() - c1.right() < c2.width() ) | |||
{ c1.join( c2 ); c1.only_guess( 'm', 0 ); delete_character( i + 1 ); } | { c1.join( c2 ); c1.only_guess( 'm', 0 ); delete_character( i + 1 ); } | |||
} | } | |||
} | } | |||
// separate merged 'VV' | ||||
{ | ||||
int mean_upper_width = 0; | ||||
for( int i = big_initials(); i < characters(); ++i ) | ||||
{ | ||||
Character & c = character( i ); | ||||
if( !c.guesses() || c.guess( 0 ).code != 'W' || c.width() <= c.height() || | ||||
c.blobs() != 1 || c.blob( 0 ).holes() ) continue; | ||||
if( mean_upper_width == 0 ) | ||||
{ | ||||
int count = 0; | ||||
for( int j = big_initials(); j < characters(); ++j ) | ||||
{ | ||||
const Character & cj = character( j ); | ||||
if( cj.guesses() && UCS::isupper_normal_width( cj.guess( 0 ).code ) ) | ||||
{ mean_upper_width += cj.width(); ++count; } | ||||
} | ||||
if( count <= 0 ) break; // no characters to compare | ||||
mean_upper_width /= count; | ||||
} | ||||
if( c.width() < 2 * mean_upper_width ) continue; | ||||
const Blob & b = c.blob( 0 ); | ||||
int row = b.bottom(); | ||||
while( row >= b.top() && b.id( row, b.hcenter() ) == 0 ) --row; | ||||
if( row >= b.vpos( 20 ) ) continue; | ||||
Rectangle r1( b.left(), b.top(), b.hcenter() - 1, b.bottom() ); | ||||
Rectangle r2( b.hcenter() + 1, b.top(), b.right(), b.bottom() ); | ||||
Blob b1( b, r1 ); | ||||
Blob b2( b, r2 ); | ||||
b1.adjust_height(); | ||||
b2.adjust_height(); | ||||
if( 2 * b1.height() < b.height() || 2 * b2.height() < b.height() || | ||||
!Ocrad::similar( b1.height(), b2.height(), 10, 2 ) ) continue; | ||||
Character c1( new Blob( b1 ) ); | ||||
Character c2( new Blob( b2 ) ); | ||||
c1.only_guess( 'V', 0 ); | ||||
c2.only_guess( 'V', 0 ); | ||||
c = c1; | ||||
++i; cpv.insert( cpv.begin() + i, new Character( c2 ) ); | ||||
} | ||||
} | ||||
// join the secuence '', '/', 'o', ' ' into a '%' | // join the secuence '', '/', 'o', ' ' into a '%' | |||
for( int i = big_initials(); i + 2 < characters(); ++i ) | for( int i = big_initials(); i + 2 < characters(); ++i ) | |||
{ | { | |||
Character & c1 = character( i ); | Character & c1 = character( i ); | |||
if( c1.guesses() == 1 && c1.guess( 0 ).code == UCS::DEG ) | if( c1.guesses() == 1 && c1.guess( 0 ).code == UCS::DEG ) | |||
{ | { | |||
if( character( i + 1 ).maybe('/') && | if( character( i + 1 ).maybe('/') && | |||
character( i + 2 ).maybe('o') && | character( i + 2 ).maybe('o') && | |||
( i + 3 >= characters() || character( i + 3 ).maybe(' ') ) ) | ( i + 3 >= characters() || character( i + 3 ).maybe(' ') ) ) | |||
{ | { | |||
End of changes. 5 change blocks. | ||||
4 lines changed or deleted | 49 lines changed or added |