textblock.cc (ocrad-0.24) | : | textblock.cc (ocrad-0.25) | ||
---|---|---|---|---|
/* GNU Ocrad - Optical Character Recognition program | /* GNU Ocrad - Optical Character Recognition program | |||
Copyright (C) 2003-2014 Antonio Diaz Diaz. | Copyright (C) 2003-2015 Antonio Diaz Diaz. | |||
This program is free software: you can redistribute it and/or modify | This program is free software: you can redistribute it and/or modify | |||
it under the terms of the GNU General Public License as published by | it under the terms of the GNU General Public License as published by | |||
the Free Software Foundation, either version 2 of the License, or | the Free Software Foundation, either version 2 of the License, or | |||
(at your option) any later version. | (at your option) any later version. | |||
This program is distributed in the hope that it will be useful, | This program is distributed in the hope that it will be useful, | |||
but WITHOUT ANY WARRANTY; without even the implied warranty of | but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |||
GNU General Public License for more details. | GNU General Public License for more details. | |||
skipping to change at line 29 | skipping to change at line 29 | |||
#include <climits> | #include <climits> | |||
#include <cstdio> | #include <cstdio> | |||
#include <string> | #include <string> | |||
#include <vector> | #include <vector> | |||
#include <stdint.h> | #include <stdint.h> | |||
#include "common.h" | #include "common.h" | |||
#include "rational.h" | #include "rational.h" | |||
#include "rectangle.h" | #include "rectangle.h" | |||
#include "track.h" | #include "track.h" | |||
#include "ucs.h" | ||||
#include "user_filter.h" | ||||
#include "bitmap.h" | #include "bitmap.h" | |||
#include "blob.h" | #include "blob.h" | |||
#include "character.h" | #include "character.h" | |||
#include "page_image.h" | #include "page_image.h" | |||
#include "textline.h" | #include "textline.h" | |||
#include "textblock.h" | #include "textblock.h" | |||
namespace { | namespace { | |||
void insert_line( std::vector< Textline * > & textlinep_vector, int i ) | void insert_line( std::vector< Textline * > & textlinep_vector, int i ) | |||
skipping to change at line 87 | skipping to change at line 89 | |||
joined = true; break; | joined = true; break; | |||
} | } | |||
} | } | |||
if( !joined ) ++i; | if( !joined ) ++i; | |||
} | } | |||
} | } | |||
} | } | |||
} // end namespace | } // end namespace | |||
void Textblock::apply_filters( const Control & control ) | ||||
{ | ||||
if( textlines() <= 0 ) return; | ||||
for( unsigned f = 0; f < control.filters.size(); ++f ) | ||||
{ | ||||
if( control.filters[f].user_filterp ) | ||||
{ | ||||
for( int i = 0; i < textlines(); ++i ) | ||||
tlpv[i]->apply_user_filter( *control.filters[f].user_filterp ); | ||||
continue; | ||||
} | ||||
const Filter::Type filter = control.filters[f].type; | ||||
if( filter != Filter::text_block ) | ||||
{ | ||||
for( int i = 0; i < textlines(); ++i ) | ||||
tlpv[i]->apply_filter( filter ); | ||||
continue; | ||||
} | ||||
int l = right(), t = bottom(), r = left(), b = top(); | ||||
for( int i = 0; i < textlines(); ++i ) | ||||
{ | ||||
const Textline & line = *tlpv[i]; | ||||
int first = line.characters(), last = -1; | ||||
for( int j = line.big_initials(); j < line.characters(); ++j ) | ||||
if( line.is_key_character( j ) ) | ||||
{ l = std::min( l, line.character(j).left() ); first = j; break; } | ||||
for( int j = line.characters() - 1; j >= first; --j ) | ||||
if( line.is_key_character( j ) ) | ||||
{ r = std::max( r, line.character(j).right() ); last = j; break; } | ||||
if( i == 0 ) | ||||
{ for( int j = first; j <= last; ++j ) | ||||
if( line.is_key_character( j ) ) | ||||
t = std::min( t, line.character(j).top() ); } | ||||
else if( i == textlines() - 1 ) | ||||
{ for( int j = first; j <= last; ++j ) | ||||
if( line.is_key_character( j ) ) | ||||
b = std::max( b, line.character(j).bottom() ); } | ||||
} | ||||
if( r < l || b < t ) continue; // can't apply filter; no text | ||||
Rectangle re( l, t, r, b ); | ||||
for( int i = 0; i < textlines(); ++i ) | ||||
{ | ||||
Textline & line = *tlpv[i]; | ||||
bool modified = false; | ||||
for( int j = line.characters() - 1; j >= 0; --j ) | ||||
if( line.character(j).height() >= 2 * line.height() || | ||||
!re.includes( line.character(j).vcenter(), line.character(j).hcenter | ||||
() ) ) | ||||
{ line.delete_character( j ); modified = true; } | ||||
if( modified ) line.remove_leadind_trailing_duplicate_spaces(); | ||||
} | ||||
} | ||||
} | ||||
Textblock::Textblock( const Rectangle & page, const Rectangle & block, | Textblock::Textblock( const Rectangle & page, const Rectangle & block, | |||
std::vector< Blob * > & blobp_vector ) | std::vector< Blob * > & blobp_vector ) | |||
: Rectangle( block ) | : Rectangle( block ) | |||
{ | { | |||
std::vector< Blob * > pending; | std::vector< Blob * > pending; | |||
std::vector< Blob * > pending_tall; | std::vector< Blob * > pending_tall; | |||
std::vector< Blob * > pending_short; | std::vector< Blob * > pending_short; | |||
for( unsigned begin = 0, end = 0; end < blobp_vector.size(); begin = end ) | for( unsigned begin = 0, end = 0; end < blobp_vector.size(); begin = end ) | |||
{ | { | |||
skipping to change at line 368 | skipping to change at line 423 | |||
{ | { | |||
// Recognize characters. | // Recognize characters. | |||
for( int i = 0; i < textlines(); ++i ) | for( int i = 0; i < textlines(); ++i ) | |||
{ | { | |||
// First pass. Recognize the easy characters. | // First pass. Recognize the easy characters. | |||
tlpv[i]->recognize1( control.charset ); | tlpv[i]->recognize1( control.charset ); | |||
// Second pass. Use context to clear up ambiguities. | // Second pass. Use context to clear up ambiguities. | |||
tlpv[i]->recognize2( control.charset ); | tlpv[i]->recognize2( control.charset ); | |||
} | } | |||
for( unsigned j = 0; j < control.filters.size(); ++j ) | apply_filters( control ); | |||
for( int i = 0; i < textlines(); ++i ) | ||||
tlpv[i]->apply_filter( control.filters[j] ); | ||||
// Remove unrecognized lines. | // Remove unrecognized lines. | |||
for( int i = textlines() - 1; i >= 0; --i ) | for( int i = textlines() - 1; i >= 0; --i ) | |||
{ | { | |||
Textline & line1 = *tlpv[i]; | Textline & line1 = *tlpv[i]; | |||
bool recognized = false; | bool recognized = false; | |||
for( int j = 0 ; j < line1.characters(); ++j ) | for( int j = 0 ; j < line1.characters(); ++j ) | |||
{ if( line1.character( j ).guesses() ) { recognized = true; break; } } | { if( line1.character( j ).guesses() ) { recognized = true; break; } } | |||
if( !recognized ) delete_line( tlpv, i ); | if( !recognized ) delete_line( tlpv, i ); | |||
} | } | |||
End of changes. 4 change blocks. | ||||
4 lines changed or deleted | 58 lines changed or added |