"Fossies" - the Fresh Open Source Software Archive

Member "tesseract-ocr/doc/html/classtesseract_1_1_tess_lang_model.html" (26 Oct 2012, 40650 Bytes) of package /linux/misc/old/tesseract-ocr-3.02.02-doc-html.tar.gz:


Caution: In this restricted "Fossies" environment the current HTML page may not be correctly presentated and may have some non-functional links. You can here alternatively try to browse the pure source code or just view or download the uninterpreted raw source code. If the rendering is insufficient you may try to find and view the page on the tesseract-ocr-3.02.02-doc-html.tar.gz project site itself.

Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tesseract::TessLangModel Class Reference

#include <tess_lang_model.h>

Inheritance diagram for tesseract::TessLangModel:
tesseract::LangModel

List of all members.

Public Member Functions

 TessLangModel (const string &lm_params, const string &data_file_path, bool load_system_dawg, TessdataManager *tessdata_manager, CubeRecoContext *cntxt)
 ~TessLangModel ()
TessLangModEdgeRoot ()
LangModEdge ** GetEdges (CharAltList *alt_list, LangModEdge *edge, int *edge_cnt)
bool IsValidSequence (const char_32 *sequence, bool eow_flag, LangModEdge **final_edge=NULL)
bool IsLeadingPunc (char_32 ch)
bool IsTrailingPunc (char_32 ch)
bool IsDigit (char_32 ch)
void RemoveInvalidCharacters (string *lm_str)
- Public Member Functions inherited from tesseract::LangModel
 LangModel ()
virtual ~LangModel ()
bool OOD ()
bool Numeric ()
bool WordList ()
bool Punc ()
void SetOOD (bool ood)
void SetNumeric (bool numeric)
void SetWordList (bool word_list)
void SetPunc (bool punc_enabled)

Additional Inherited Members

- Protected Attributes inherited from tesseract::LangModel
bool ood_enabled_
bool numeric_enabled_
bool word_list_enabled_
bool punc_enabled_

Detailed Description

Definition at line 38 of file tess_lang_model.h.


Constructor & Destructor Documentation

tesseract::TessLangModel::TessLangModel ( const string &  lm_params,
const string &  data_file_path,
bool  load_system_dawg,
TessdataManager tessdata_manager,
CubeRecoContext cntxt 
)

Definition at line 60 of file tess_lang_model.cpp.

{
cntxt_ = cntxt;
has_case_ = cntxt_->HasCase();
// Load the rest of the language model elements from file
LoadLangModelElements(lm_params);
// Load word_dawgs_ if needed.
if (tessdata_manager->SeekToStart(TESSDATA_CUBE_UNICHARSET)) {
word_dawgs_ = new DawgVector();
if (load_system_dawg &&
tessdata_manager->SeekToStart(TESSDATA_CUBE_SYSTEM_DAWG)) {
// The last parameter to the Dawg constructor (the debug level) is set to
// false, until Cube has a way to express its preferred debug level.
*word_dawgs_ += new SquishedDawg(tessdata_manager->GetDataFilePtr(),
cntxt_->Lang().c_str(),
SYSTEM_DAWG_PERM, false);
}
} else {
word_dawgs_ = NULL;
}
}
tesseract::TessLangModel::~TessLangModel ( )
inline

Definition at line 45 of file tess_lang_model.h.

{
if (word_dawgs_ != NULL) {
word_dawgs_->delete_data_pointers();
delete word_dawgs_;
}
}

Member Function Documentation

LangModEdge ** tesseract::TessLangModel::GetEdges ( CharAltList alt_list,
LangModEdge edge,
int *  edge_cnt 
)
virtual

Implements tesseract::LangModel.

Definition at line 169 of file tess_lang_model.cpp.

{
TessLangModEdge *tess_lm_edge =
reinterpret_cast<TessLangModEdge *>(lang_mod_edge);
LangModEdge **edge_array = NULL;
(*edge_cnt) = 0;
// if we are starting from the root, we'll instantiate every DAWG
// and get the all the edges that emerge from the root
if (tess_lm_edge == NULL) {
// get DAWG count from Tesseract
int dawg_cnt = NumDawgs();
// preallocate the edge buffer
(*edge_cnt) = dawg_cnt * max_edge_;
edge_array = new LangModEdge *[(*edge_cnt)];
if (edge_array == NULL) {
return NULL;
}
for (int dawg_idx = (*edge_cnt) = 0; dawg_idx < dawg_cnt; dawg_idx++) {
const Dawg *curr_dawg = GetDawg(dawg_idx);
// Only look through word Dawgs (since there is a special way of
// handling numbers and punctuation).
if (curr_dawg->type() == DAWG_TYPE_WORD) {
(*edge_cnt) += FanOut(alt_list, curr_dawg, 0, 0, NULL, true,
edge_array + (*edge_cnt));
}
} // dawg
(*edge_cnt) += FanOut(alt_list, number_dawg_, 0, 0, NULL, true,
edge_array + (*edge_cnt));
// OOD: it is intentionally not added to the list to make sure it comes
// at the end
(*edge_cnt) += FanOut(alt_list, ood_dawg_, 0, 0, NULL, true,
edge_array + (*edge_cnt));
// set the root flag for all root edges
for (int edge_idx = 0; edge_idx < (*edge_cnt); edge_idx++) {
edge_array[edge_idx]->SetRoot(true);
}
} else { // not starting at the root
// preallocate the edge buffer
(*edge_cnt) = max_edge_;
// allocate memory for edges
edge_array = new LangModEdge *[(*edge_cnt)];
if (edge_array == NULL) {
return NULL;
}
// get the FanOut edges from the root of each dawg
(*edge_cnt) = FanOut(alt_list,
tess_lm_edge->GetDawg(),
tess_lm_edge->EndEdge(), tess_lm_edge->EdgeMask(),
tess_lm_edge->EdgeString(), false, edge_array);
}
return edge_array;
}
bool tesseract::TessLangModel::IsDigit ( char_32  ch)
virtual

Implements tesseract::LangModel.

Definition at line 162 of file tess_lang_model.cpp.

{
return digits_.find(ch) != string::npos;
}
bool tesseract::TessLangModel::IsLeadingPunc ( char_32  ch)
virtual

Implements tesseract::LangModel.

Definition at line 154 of file tess_lang_model.cpp.

{
return lead_punc_.find(ch) != string::npos;
}
bool tesseract::TessLangModel::IsTrailingPunc ( char_32  ch)
virtual

Implements tesseract::LangModel.

Definition at line 158 of file tess_lang_model.cpp.

{
return trail_punc_.find(ch) != string::npos;
}
bool tesseract::TessLangModel::IsValidSequence ( const char_32 sequence,
bool  eow_flag,
LangModEdge **  final_edge = NULL 
)
virtual

Implements tesseract::LangModel.

Definition at line 145 of file tess_lang_model.cpp.

{
if (final_edge != NULL) {
(*final_edge) = NULL;
}
return IsValidSequence(NULL, sequence, eow_flag, final_edge);
}
void tesseract::TessLangModel::RemoveInvalidCharacters ( string *  lm_str)

Definition at line 482 of file tess_lang_model.cpp.

{
CharSet *char_set = cntxt_->CharacterSet();
CubeUtils::UTF8ToUTF32(lm_str->c_str(), &lm_str32);
int len = CubeUtils::StrLen(lm_str32.c_str());
char_32 *clean_str32 = new char_32[len + 1];
if (!clean_str32)
return;
int clean_len = 0;
for (int i = 0; i < len; ++i) {
int class_id = char_set->ClassID((char_32)lm_str32[i]);
if (class_id != INVALID_UNICHAR_ID) {
clean_str32[clean_len] = lm_str32[i];
++clean_len;
}
}
clean_str32[clean_len] = 0;
if (clean_len < len) {
lm_str->clear();
CubeUtils::UTF32ToUTF8(clean_str32, lm_str);
}
delete [] clean_str32;
}
TessLangModEdge* tesseract::TessLangModel::Root ( )
inlinevirtual

Implements tesseract::LangModel.

Definition at line 53 of file tess_lang_model.h.

{
return NULL;
}

The documentation for this class was generated from the following files: