"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "textline_r2.cc" between
ocrad-0.24.tar.gz and ocrad-0.25.tar.gz

About: GNU Ocrad is an OCR (Optical Character Recognition) program.

textline_r2.cc  (ocrad-0.24):textline_r2.cc  (ocrad-0.25)
/* GNU Ocrad - Optical Character Recognition program /* GNU Ocrad - Optical Character Recognition program
Copyright (C) 2003-2014 Antonio Diaz Diaz. Copyright (C) 2003-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or the Free Software Foundation, either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
skipping to change at line 239 skipping to change at line 239
Character c1( new Blob( b1 ) ); Character c1( new Blob( b1 ) );
Character c2( new Blob( b2 ) ); Character c2( new Blob( b2 ) );
for( int j = 0; j < c.blobs(); ++j ) if( j != ib ) for( int j = 0; j < c.blobs(); ++j ) if( j != ib )
{ {
const Blob & bj = c.blob( j ); const Blob & bj = c.blob( j );
if( c1.includes_hcenter( bj ) ) c1.shift_blobp( new Blob( bj ) ); if( c1.includes_hcenter( bj ) ) c1.shift_blobp( new Blob( bj ) );
else if( c2.includes_hcenter( bj ) ) c2.shift_blobp( new Blob( bj ) ); else if( c2.includes_hcenter( bj ) ) c2.shift_blobp( new Blob( bj ) );
} }
c1.recognize1( charset, charbox( c1 ) ); c1.recognize1( charset, charbox( c1 ) );
c2.recognize1( charset, charbox( c2 ) ); c2.recognize1( charset, charbox( c2 ) );
if( ( c1.guesses() && c2.guesses() ) || const bool good_c2 = ( c2.guesses() && c2.guess( 0 ).code != '\'' );
( ( c1.guesses() || c2.guesses() ) && c.width() > c.height() ) ) if( ( c1.guesses() && good_c2 ) ||
( ( c1.guesses() || good_c2 ) && c.width() > c.height() ) )
{ {
c = c1; shift_characterp( new Character( c2 ) ); c = c1; shift_characterp( new Character( c2 ) );
if( !c1.guesses() ) --i; else if( c2.guesses() ) ++i; if( !c1.guesses() ) --i; else if( c2.guesses() ) ++i;
} }
} }
} }
// try to recognize 1 blob unrecognized characters with holes by // try to recognize 1 blob unrecognized characters with holes by
// removing small holes (noise) // removing small holes (noise)
for( int i = big_initials(); i < characters(); ++i ) for( int i = big_initials(); i < characters(); ++i )
skipping to change at line 380 skipping to change at line 381
if( j >= characters() || !character( j ).guesses() ) if( j >= characters() || !character( j ).guesses() )
{ j = i - 1; if( j < big_initials() || !character( j ).guesses() ) conti nue; } { j = i - 1; if( j < big_initials() || !character( j ).guesses() ) conti nue; }
Character & c2 = character( j ); Character & c2 = character( j );
if( UCS::isvowel( c2.guess( 0 ).code ) && if( UCS::isvowel( c2.guess( 0 ).code ) &&
c1.bottom() >= c2.bottom() + ( c2.height() / 4 ) ) c1.bottom() >= c2.bottom() + ( c2.height() / 4 ) )
c1.insert_guess( 0, 'j', 1 ); c1.insert_guess( 0, 'j', 1 );
} }
} }
// transform small o or u with accent or diaeresis to capital // transform small o or u with accent or diaeresis to capital
// transform small s or z with caron to capital
{ {
int begin = big_initials(); int begin = big_initials();
bool isolated = false; // isolated letters compare with all line bool isolated = false; // isolated letters compare with all line
for( int i = big_initials(); i < characters(); ++i ) for( int i = big_initials(); i < characters(); ++i )
{ {
Character & c1 = character( i ); Character & c1 = character( i );
if( c1.guesses() >= 1 ) if( c1.guesses() >= 1 )
{ {
if( c1.maybe(' ') ) if( c1.maybe(' ') )
{ {
if( i + 2 < characters() && character( i + 2 ).maybe(' ') ) if( i + 2 < characters() && character( i + 2 ).maybe(' ') )
{ begin = big_initials(); isolated = true; } { begin = big_initials(); isolated = true; }
else { begin = i + 1; isolated = false; } else { begin = i + 1; isolated = false; }
continue; continue;
} }
int code = c1.guess( 0 ).code; int code = c1.guess( 0 ).code;
if( code < 128 || c1.blobs() < 2 ) continue; if( code < 128 || c1.blobs() < 2 ) continue;
int codeb = UCS::base_letter( code ); int codeb = UCS::base_letter( code );
if( codeb != 'o' && codeb != 'u' ) continue; if( codeb != 'o' && codeb != 'u' && codeb != 's' && codeb != 'z' )
continue;
const Blob & b1 = c1.blob( c1.blobs() - 1 ); // lower blob const Blob & b1 = c1.blob( c1.blobs() - 1 ); // lower blob
for( int j = begin; j < characters(); ++j ) if( j != i ) for( int j = begin; j < characters(); ++j ) if( j != i )
{ {
Character & c2 = character( j ); Character & c2 = character( j );
if( c2.guesses() >= 1 ) if( c2.guesses() >= 1 )
{ {
if( c2.maybe(' ') ) { if( isolated ) continue; else break; } if( c2.maybe(' ') ) { if( isolated ) continue; else break; }
int code2 = c2.guess( 0 ).code; int code2 = c2.guess( 0 ).code;
int code2b = UCS::base_letter( code2 ); int code2b = UCS::base_letter( code2 );
if( !code2b && code2 >= 128 ) continue; if( !code2b && code2 >= 128 ) continue;
skipping to change at line 769 skipping to change at line 772
{ {
int code1 = c1.guess( 0 ).code; int code1 = c1.guess( 0 ).code;
int code2 = c2.guess( 0 ).code; int code2 = c2.guess( 0 ).code;
if( code1 == 'n' && ( code2 == 'I' || code2 == 'l' ) && if( code1 == 'n' && ( code2 == 'I' || code2 == 'l' ) &&
Ocrad::similar( c1.height(), c2.height(), 10 ) && Ocrad::similar( c1.height(), c2.height(), 10 ) &&
c2.left() - c1.right() < c2.width() ) c2.left() - c1.right() < c2.width() )
{ c1.join( c2 ); c1.only_guess( 'm', 0 ); delete_character( i + 1 ); } { c1.join( c2 ); c1.only_guess( 'm', 0 ); delete_character( i + 1 ); }
} }
} }
// separate merged 'VV'
{
int mean_upper_width = 0;
for( int i = big_initials(); i < characters(); ++i )
{
Character & c = character( i );
if( !c.guesses() || c.guess( 0 ).code != 'W' || c.width() <= c.height() ||
c.blobs() != 1 || c.blob( 0 ).holes() ) continue;
if( mean_upper_width == 0 )
{
int count = 0;
for( int j = big_initials(); j < characters(); ++j )
{
const Character & cj = character( j );
if( cj.guesses() && UCS::isupper_normal_width( cj.guess( 0 ).code ) )
{ mean_upper_width += cj.width(); ++count; }
}
if( count <= 0 ) break; // no characters to compare
mean_upper_width /= count;
}
if( c.width() < 2 * mean_upper_width ) continue;
const Blob & b = c.blob( 0 );
int row = b.bottom();
while( row >= b.top() && b.id( row, b.hcenter() ) == 0 ) --row;
if( row >= b.vpos( 20 ) ) continue;
Rectangle r1( b.left(), b.top(), b.hcenter() - 1, b.bottom() );
Rectangle r2( b.hcenter() + 1, b.top(), b.right(), b.bottom() );
Blob b1( b, r1 );
Blob b2( b, r2 );
b1.adjust_height();
b2.adjust_height();
if( 2 * b1.height() < b.height() || 2 * b2.height() < b.height() ||
!Ocrad::similar( b1.height(), b2.height(), 10, 2 ) ) continue;
Character c1( new Blob( b1 ) );
Character c2( new Blob( b2 ) );
c1.only_guess( 'V', 0 );
c2.only_guess( 'V', 0 );
c = c1;
++i; cpv.insert( cpv.begin() + i, new Character( c2 ) );
}
}
// join the secuence '', '/', 'o', ' ' into a '%' // join the secuence '', '/', 'o', ' ' into a '%'
for( int i = big_initials(); i + 2 < characters(); ++i ) for( int i = big_initials(); i + 2 < characters(); ++i )
{ {
Character & c1 = character( i ); Character & c1 = character( i );
if( c1.guesses() == 1 && c1.guess( 0 ).code == UCS::DEG ) if( c1.guesses() == 1 && c1.guess( 0 ).code == UCS::DEG )
{ {
if( character( i + 1 ).maybe('/') && if( character( i + 1 ).maybe('/') &&
character( i + 2 ).maybe('o') && character( i + 2 ).maybe('o') &&
( i + 3 >= characters() || character( i + 3 ).maybe(' ') ) ) ( i + 3 >= characters() || character( i + 3 ).maybe(' ') ) )
{ {
 End of changes. 5 change blocks. 
4 lines changed or deleted 49 lines changed or added

Home  |  About  |  All  |  Newest  |  Fossies Dox  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTPS