"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "textline.cc" between
ocrad-0.24.tar.gz and ocrad-0.25.tar.gz

About: GNU Ocrad is an OCR (Optical Character Recognition) program.

textline.cc  (ocrad-0.24):textline.cc  (ocrad-0.25)
/* GNU Ocrad - Optical Character Recognition program /* GNU Ocrad - Optical Character Recognition program
Copyright (C) 2003-2014 Antonio Diaz Diaz. Copyright (C) 2003-2015 Antonio Diaz Diaz.
This program is free software: you can redistribute it and/or modify This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or the Free Software Foundation, either version 2 of the License, or
(at your option) any later version. (at your option) any later version.
This program is distributed in the hope that it will be useful, This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details. GNU General Public License for more details.
skipping to change at line 32 skipping to change at line 32
#include <string> #include <string>
#include <vector> #include <vector>
#include <stdint.h> #include <stdint.h>
#include "common.h" #include "common.h"
#include "histogram.h" #include "histogram.h"
#include "rational.h" #include "rational.h"
#include "rectangle.h" #include "rectangle.h"
#include "track.h" #include "track.h"
#include "ucs.h" #include "ucs.h"
#include "user_filter.h"
#include "bitmap.h" #include "bitmap.h"
#include "blob.h" #include "blob.h"
#include "character.h" #include "character.h"
#include "page_image.h" #include "page_image.h"
#include "textline.h" #include "textline.h"
namespace { namespace {
// Return the character position >= first preceding a big gap or eol. // Returns the character position >= first preceding a big gap or eol.
// //
int find_big_gap( const Textline & line, const int first, int find_big_gap( const Textline & line, const int first,
const int space_width_limit ) const int space_width_limit )
{ {
int i = first; int i = first;
while( i + 1 < line.characters() ) while( i + 1 < line.characters() )
{ {
const Character & c1 = line.character( i ); const Character & c1 = line.character( i );
const Character & c2 = line.character( i + 1 ); const Character & c2 = line.character( i + 1 );
const int gap = c2.left() - c1.right() - 1; const int gap = c2.left() - c1.right() - 1;
skipping to change at line 120 skipping to change at line 121
for( int i = 0; i < characters(); ++i ) for( int i = 0; i < characters(); ++i )
if( cpv[i]->h_includes( col ) ) return cpv[i]; if( cpv[i]->h_includes( col ) ) return cpv[i];
return 0; return 0;
} }
Rectangle Textline::charbox( const Character & c ) const Rectangle Textline::charbox( const Character & c ) const
{ {
return Rectangle( c.left(), top( c.hcenter() ), c.right(), bottom( c.hcenter() ) ); return Rectangle( c.left(), top( c.hcenter() ), c.right(), bottom( c.hcenter() ) );
} }
bool Textline::is_key_character( const int i ) const
{
if( i < big_initials_ || i >= characters() )
Ocrad::internal_error( "is_key_character, index out of bounds." );
return ( cpv[i]->isalnum() && cpv[i]->guess( 0 ).code != 'J' &&
cpv[i]->height() < 2 * height() && 2 * cpv[i]->height() > height() );
}
void Textline::delete_character( const int i ) void Textline::delete_character( const int i )
{ {
if( i < 0 || i >= characters() ) if( i < 0 || i >= characters() )
Ocrad::internal_error( "delete_character, index out of bounds." ); Ocrad::internal_error( "delete_character, index out of bounds." );
if( i < big_initials_ ) --big_initials_; if( i < big_initials_ ) --big_initials_;
delete cpv[i]; cpv.erase( cpv.begin() + i ); delete cpv[i]; cpv.erase( cpv.begin() + i );
} }
int Textline::shift_characterp( Character * const p, const bool big ) int Textline::shift_characterp( Character * const p, const bool big )
{ {
skipping to change at line 279 skipping to change at line 288
void Textline::dprint( const Control & control, const bool graph, void Textline::dprint( const Control & control, const bool graph,
const bool recursive ) const const bool recursive ) const
{ {
if( graph || recursive ) if( graph || recursive )
{ {
Histogram hist; Histogram hist;
for( int i = 0; i < characters(); ++i ) for( int i = 0; i < characters(); ++i )
if( !character(i).maybe(' ') ) if( !character(i).maybe(' ') )
hist.add_sample( character(i).height() ); hist.add_sample( character(i).height() );
std::fprintf( control.outfile, "mean height = %d, median height = %d, track std::fprintf( control.outfile, "mean height = %d, median height = %d, track
segments = %d\n", segments = %d, big initials = %d\n",
mean_height(), hist.median(), segments() ); mean_height(), hist.median(), segments(), big_initials_ );
} }
for( int i = 0; i < characters(); ++i ) for( int i = 0; i < characters(); ++i )
{ {
const Character & c = character( i ); const Character & c = character( i );
if( i < big_initials_ ) c.dprint( control, c, graph, recursive ); if( i < big_initials_ ) c.dprint( control, c, graph, recursive );
else c.dprint( control, charbox( c ), graph, recursive ); else c.dprint( control, charbox( c ), graph, recursive );
} }
std::fputs( "\n", control.outfile ); std::fputs( "\n", control.outfile );
} }
skipping to change at line 326 skipping to change at line 335
c.only_guess( UCS::toupper( code ), 0 ); c.only_guess( UCS::toupper( code ), 0 );
} }
} }
else c.recognize1( charset, charbox( c ) ); else c.recognize1( charset, charbox( c ) );
} }
} }
void Textline::apply_filter( const Filter::Type filter ) void Textline::apply_filter( const Filter::Type filter )
{ {
bool modified = false; bool modified = false;
for( int i = characters() - 1; i >= 0; --i )
{
Character & c = character( i );
if( !c.guesses() ) continue;
c.apply_filter( filter );
if( !c.guesses() ) { delete_character( i ); modified = true; }
}
if( filter == Filter::same_height ) if( filter == Filter::same_height )
{ {
Histogram hist; Histogram hist;
for( int i = 0; i < characters(); ++i ) for( int i = 0; i < characters(); ++i )
if( !character(i).maybe(' ') ) if( !character(i).maybe(' ') )
hist.add_sample( character(i).height() ); hist.add_sample( character(i).height() );
const int median_height = hist.median(); const int median_height = hist.median();
for( int i = characters() - 1; i >= 0; --i ) for( int i = characters() - 1; i >= 0; --i )
if( !character(i).maybe(' ') && if( !character(i).maybe(' ') &&
!Ocrad::similar( character(i).height(), median_height, 13, 2 ) ) !Ocrad::similar( character(i).height(), median_height, 10, 2 ) )
{ delete_character( i ); modified = true; } { delete_character( i ); modified = true; }
} }
if( modified ) // remove leadind/trailing/duplicate spaces else
{
for( int i = characters() - 1; i >= 0; --i ) for( int i = characters() - 1; i >= 0; --i )
if( character(i).maybe(' ') && {
( i == 0 || i == characters() - 1 || character(i-1).maybe(' ') ) ) Character & c = character( i );
delete_character( i ); if( !c.guesses() ) continue;
c.apply_filter( filter );
if( !c.guesses() && filter != Filter::upper_num_mark )
{ delete_character( i ); modified = true; }
}
if( filter == Filter::upper_num_mark )
join_broken_unrecognized_characters();
}
if( modified ) remove_leadind_trailing_duplicate_spaces();
}
void Textline::apply_user_filter( const User_filter & user_filter )
{
bool modified = false;
for( int i = characters() - 1; i >= 0; --i )
{
Character & c = character( i );
if( !c.guesses() ) continue;
c.apply_user_filter( user_filter );
if( !c.guesses() && user_filter.discard() )
{ delete_character( i ); modified = true; }
}
if( user_filter.mark() ) join_broken_unrecognized_characters();
if( modified ) remove_leadind_trailing_duplicate_spaces();
}
void Textline::join_broken_unrecognized_characters()
{
for( int i = characters() - 1; i > 0; --i )
if( !character(i).guesses() &&
character(i).h_overlaps( character( i - 1 ) ) )
delete_character( i );
}
void Textline::remove_leadind_trailing_duplicate_spaces()
{
for( int i = characters() - 1; i >= 0; --i )
if( character(i).maybe(' ') &&
( i == 0 || i == characters() - 1 || character(i-1).maybe(' ') ) )
delete_character( i );
} }
 End of changes. 9 change blocks. 
17 lines changed or deleted 59 lines changed or added

Home  |  About  |  All  |  Newest  |  Fossies Dox  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTPS