"Fossies" - the Fresh Open Source Software Archive

Member "tesseract-ocr/doc/html/reject_8h.html" (26 Oct 2012, 27482 Bytes) of package /linux/misc/old/tesseract-ocr-3.02.02-doc-html.tar.gz:


Caution: In this restricted "Fossies" environment the current HTML page may not be correctly presentated and may have some non-functional links. You can here alternatively try to browse the pure source code or just view or download the uninterpreted raw source code. If the rendering is insufficient you may try to find and view the page on the tesseract-ocr-3.02.02-doc-html.tar.gz project site itself.

Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
reject.h File Reference
#include "params.h"
#include "pageres.h"
#include "notdll.h"

Go to the source code of this file.

Functions

void reject_blanks (WERD_RES *word)
void reject_poor_matches (WERD_RES *word, BLOB_CHOICE_LIST_CLIST *blob_choices)
float compute_reject_threshold (BLOB_CHOICE_LIST_CLIST *blob_choices)
BOOL8 word_contains_non_1_digit (const char *word, const char *word_lengths)
void dont_allow_1Il (WERD_RES *word)
void flip_hyphens (WERD_RES *word)
void flip_0O (WERD_RES *word)
BOOL8 non_0_digit (const char *str, int length)

Function Documentation

float compute_reject_threshold ( BLOB_CHOICE_LIST_CLIST *  blob_choices)

Definition at line 370 of file reject.cpp.

{
inT16 index; //to ratings
inT16 blob_count; //no of blobs in word
inT16 ok_blob_count = 0; //non TESS rej blobs in word
float *ratings; //array of confidences
float threshold; //rejection threshold
float bestgap; //biggest gap
float gapstart; //bottom of gap
//super iterator
BLOB_CHOICE_LIST_C_IT list_it = blob_choices;
BLOB_CHOICE_IT choice_it; //real iterator
blob_count = blob_choices->length ();
ratings = (float *) alloc_mem (blob_count * sizeof (float));
for (list_it.mark_cycle_pt (), index = 0;
!list_it.cycled_list (); list_it.forward (), index++) {
choice_it.set_to_list (list_it.data ());
if (choice_it.length () > 0) {
ratings[ok_blob_count] = choice_it.data ()->certainty ();
//get in an array
// tprintf("Rating[%d]=%c %g %g\n",
// index,choice_it.data()->char_class(),
// choice_it.data()->rating(),choice_it.data()->certainty());
ok_blob_count++;
}
}
ASSERT_HOST (index == blob_count);
qsort (ratings, ok_blob_count, sizeof (float), sort_floats);
//sort them
bestgap = 0;
gapstart = ratings[0] - 1; //all reject if none better
if (ok_blob_count >= 3) {
for (index = 0; index < ok_blob_count - 1; index++) {
if (ratings[index + 1] - ratings[index] > bestgap) {
bestgap = ratings[index + 1] - ratings[index];
//find biggest
gapstart = ratings[index];
}
}
}
threshold = gapstart + bestgap / 2;
// tprintf("First=%g, last=%g, gap=%g, threshold=%g\n",
// ratings[0],ratings[index],bestgap,threshold);
free_mem(ratings);
return threshold;
}
void dont_allow_1Il ( WERD_RES word)
void flip_0O ( WERD_RES word)
void flip_hyphens ( WERD_RES word)
BOOL8 non_0_digit ( const char *  str,
int  length 
)
void reject_blanks ( WERD_RES word)

Definition at line 290 of file reject.cpp.

{
inT16 i;
inT16 offset;
for (i = 0, offset = 0; word->best_choice->unichar_string()[offset] != '\0';
offset += word->best_choice->unichar_lengths()[i], i += 1) {
if (word->best_choice->unichar_string()[offset] == ' ')
//rej unrecognised blobs
word->reject_map[i].setrej_tess_failure ();
}
}
void reject_poor_matches ( WERD_RES word,
BLOB_CHOICE_LIST_CLIST *  blob_choices 
)

Definition at line 319 of file reject.cpp.

{
float threshold;
inT16 i = 0;
inT16 offset = 0;
//super iterator
BLOB_CHOICE_LIST_C_IT list_it = blob_choices;
BLOB_CHOICE_IT choice_it; //real iterator
#ifndef SECURE_NAMES
if (strlen(word->best_choice->unichar_lengths().string()) !=
list_it.length()) {
("ASSERT FAIL string:\"%s\"; strlen=%d; choices len=%d; blob len=%d\n",
strlen (word->best_choice->unichar_lengths().string()), list_it.length(),
word->box_word->length());
}
#endif
ASSERT_HOST (strlen (word->best_choice->unichar_lengths().string ()) ==
list_it.length ());
ASSERT_HOST(word->box_word->length() == list_it.length());
threshold = compute_reject_threshold (blob_choices);
for (list_it.mark_cycle_pt ();
!list_it.cycled_list (); list_it.forward (), i++,
offset += word->best_choice->unichar_lengths()[i]) {
/* NB - only compares the threshold against the TOP choice char in the
choices list for a blob !! - the selected one may be below the threshold
*/
choice_it.set_to_list (list_it.data ());
if ((word->best_choice->unichar_string()[offset] == ' ') ||
(choice_it.length () == 0))
//rej unrecognised blobs
word->reject_map[i].setrej_tess_failure ();
else if (choice_it.data ()->certainty () < threshold)
//rej poor score blob
word->reject_map[i].setrej_poor_match ();
}
}
BOOL8 word_contains_non_1_digit ( const char *  word,
const char *  word_lengths 
)