"Fossies" - the Fresh Open Source Software Archive

Member "tesseract-ocr/doc/html/dawg2wordlist_8cpp.html" (26 Oct 2012, 20140 Bytes) of package /linux/misc/old/tesseract-ocr-3.02.02-doc-html.tar.gz:


Caution: In this restricted "Fossies" environment the current HTML page may not be correctly presentated and may have some non-functional links. You can here alternatively try to browse the pure source code or just view or download the uninterpreted raw source code. If the rendering is insufficient you may try to find and view the page on the tesseract-ocr-3.02.02-doc-html.tar.gz project site itself.

Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
dawg2wordlist.cpp File Reference
#include "dawg.h"
#include "host.h"
#include "tesscallback.h"
#include "trie.h"
#include "unicharset.h"

Go to the source code of this file.

Classes

class  WordOutputter

Functions

tesseract::DawgLoadSquishedDawg (const UNICHARSET &unicharset, const char *filename)
int WriteDawgAsWordlist (const UNICHARSET &unicharset, const tesseract::Dawg *dawg, const char *outfile_name)
int main (int argc, char *argv[])

Variables

const int kDictDebugLevel = 1

Function Documentation

tesseract::Dawg* LoadSquishedDawg ( const UNICHARSET unicharset,
const char *  filename 
)

Definition at line 28 of file dawg2wordlist.cpp.

{
const int kDictDebugLevel = 1;
FILE *dawg_file = fopen(filename, "rb");
if (dawg_file == NULL) {
tprintf("Could not open %s for reading.\n", filename);
return NULL;
}
tprintf("Loading word list from %s\n", filename);
dawg_file, tesseract::DAWG_TYPE_WORD, "eng", SYSTEM_DAWG_PERM,
kDictDebugLevel);
tprintf("Word list loaded.\n");
fclose(dawg_file);
return retval;
}
int main ( int  argc,
char *  argv[] 
)

Definition at line 70 of file dawg2wordlist.cpp.

{
if (argc != 4) {
tprintf("Print all the words in a given dawg.\n");
tprintf("Usage: %s <unicharset> <dawgfile> <wordlistfile>\n",
argv[0]);
return 1;
}
const char *unicharset_file = argv[1];
const char *dawg_file = argv[2];
const char *wordlist_file = argv[3];
UNICHARSET unicharset;
if (!unicharset.load_from_file(unicharset_file)) {
tprintf("Error loading unicharset from %s.\n", unicharset_file);
return 1;
}
tesseract::Dawg *dict = LoadSquishedDawg(unicharset, dawg_file);
if (dict == NULL) {
tprintf("Error loading dictionary from %s.\n", dawg_file);
return 1;
}
int retval = WriteDawgAsWordlist(unicharset, dict, wordlist_file);
delete dict;
return retval;
}
int WriteDawgAsWordlist ( const UNICHARSET unicharset,
const tesseract::Dawg dawg,
const char *  outfile_name 
)

Definition at line 54 of file dawg2wordlist.cpp.

{
FILE *out = fopen(outfile_name, "wb");
if (out == NULL) {
tprintf("Could not open %s for writing.\n", outfile_name);
return 1;
}
WordOutputter outputter(out);
TessCallback1<const char *> *print_word_cb =
dawg->iterate_words(unicharset, print_word_cb);
delete print_word_cb;
return fclose(out);
}

Variable Documentation

const int kDictDebugLevel = 1

Definition at line 26 of file dawg2wordlist.cpp.