"Fossies" - the Fresh Open Source Software Archive

Member "tesseract-ocr/doc/html/class_script_detector.html" (26 Oct 2012, 24758 Bytes) of package /linux/misc/old/tesseract-ocr-3.02.02-doc-html.tar.gz:


Caution: In this restricted "Fossies" environment the current HTML page may not be correctly presentated and may have some non-functional links. You can here alternatively try to browse the pure source code or just view or download the uninterpreted raw source code. If the rendering is insufficient you may try to find and view the page on the tesseract-ocr-3.02.02-doc-html.tar.gz project site itself.

Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ScriptDetector Class Reference

#include <osdetect.h>

List of all members.

Public Member Functions

 ScriptDetector (OSResults *, tesseract::Tesseract *tess)
void detect_blob (BLOB_CHOICE_LIST *scores)
void get_script ()
bool must_stop (int orientation)

Detailed Description

Definition at line 91 of file osdetect.h.


Constructor & Destructor Documentation

ScriptDetector::ScriptDetector ( OSResults osr,
tesseract::Tesseract tess 
)

Definition at line 419 of file osdetect.cpp.

{
osr_ = osr;
tess_ = tess;
katakana_id_ = tess_->unicharset.add_script(katakana_script);
hiragana_id_ = tess_->unicharset.add_script(hiragana_script);
han_id_ = tess_->unicharset.add_script(han_script);
hangul_id_ = tess_->unicharset.add_script(hangul_script);
japanese_id_ = tess_->unicharset.add_script(japanese_script_);
korean_id_ = tess_->unicharset.add_script(korean_script_);
latin_id_ = tess_->unicharset.add_script(latin_script);
fraktur_id_ = tess_->unicharset.add_script(fraktur_script_);
}

Member Function Documentation

void ScriptDetector::detect_blob ( BLOB_CHOICE_LIST *  scores)

Definition at line 435 of file osdetect.cpp.

{
bool done[kMaxNumberOfScripts];
for (int i = 0; i < 4; ++i) {
for (int j = 0; j < kMaxNumberOfScripts; ++j)
done[j] = false;
BLOB_CHOICE_IT choice_it;
choice_it.set_to_list(scores + i);
float prev_score = -1;
int script_count = 0;
int prev_id = -1;
int prev_script;
int prev_class_id = -1;
int prev_fontinfo_id = -1;
const char* prev_unichar = "";
const char* unichar = "";
float next_best_score = -1.0;
int next_best_script_id = -1;
const char* next_best_unichar = "";
for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
choice_it.forward()) {
BLOB_CHOICE* choice = choice_it.data();
int id = choice->script_id();
// Script already processed before.
if (done[id]) continue;
done[id] = true;
unichar = tess_->unicharset.id_to_unichar(choice->unichar_id());
// Save data from the first match
if (prev_score < 0) {
prev_score = -choice->certainty();
script_count = 1;
prev_id = id;
prev_script = choice->script_id();
prev_unichar = unichar;
prev_class_id = choice->unichar_id();
prev_fontinfo_id = choice->fontinfo_id();
} else if (-choice->certainty() < prev_score + kNonAmbiguousMargin) {
++script_count;
next_best_score = -choice->certainty();
next_best_script_id = choice->script_id();
next_best_unichar = tess_->unicharset.id_to_unichar(choice->unichar_id());
}
if (strlen(prev_unichar) == 1)
if (unichar[0] >= '0' && unichar[0] <= '9')
break;
// if script_count is >= 2, character is ambiguous, skip other matches
// since they are useless.
if (script_count >= 2)
break;
}
// Character is non ambiguous
if (script_count == 1) {
// Update the score of the winning script
osr_->scripts_na[i][prev_id] += 1.0;
// Workaround for Fraktur
if (prev_id == latin_id_) {
if (prev_fontinfo_id >= 0) {
const tesseract::FontInfo &fi =
tess_->get_fontinfo_table().get(prev_fontinfo_id);
//printf("Font: %s i:%i b:%i f:%i s:%i k:%i (%s)\n", fi.name,
// fi.is_italic(), fi.is_bold(), fi.is_fixed_pitch(),
// fi.is_serif(), fi.is_fraktur(),
// prev_unichar);
if (fi.is_fraktur()) {
osr_->scripts_na[i][prev_id] -= 1.0;
osr_->scripts_na[i][fraktur_id_] += 1.0;
}
}
}
// Update Japanese / Korean pseudo-scripts
if (prev_id == katakana_id_)
osr_->scripts_na[i][japanese_id_] += 1.0;
if (prev_id == hiragana_id_)
osr_->scripts_na[i][japanese_id_] += 1.0;
if (prev_id == hangul_id_)
osr_->scripts_na[i][korean_id_] += 1.0;
if (prev_id == han_id_)
osr_->scripts_na[i][korean_id_] += kHanRatioInKorean;
if (prev_id == han_id_)
osr_->scripts_na[i][japanese_id_] += kHanRatioInJapanese;
}
} // iterate over each orientation
}
void ScriptDetector::get_script ( )
bool ScriptDetector::must_stop ( int  orientation)

Definition at line 526 of file osdetect.cpp.

{
osr_->update_best_script(orientation);
return osr_->best_result.sconfidence > 1;
}

The documentation for this class was generated from the following files: