Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
dawg2wordlist.cpp File Reference
#include "dawg.h"
#include "host.h"
#include "tesscallback.h"
#include "trie.h"
#include "unicharset.h"

Go to the source code of this file.

Classes

class  WordOutputter

Functions

tesseract::DawgLoadSquishedDawg (const UNICHARSET &unicharset, const char *filename)
int WriteDawgAsWordlist (const UNICHARSET &unicharset, const tesseract::Dawg *dawg, const char *outfile_name)
int main (int argc, char *argv[])

Variables

const int kDictDebugLevel = 1

Function Documentation

tesseract::Dawg* LoadSquishedDawg ( const UNICHARSET unicharset,
const char *  filename 
)

Definition at line 28 of file dawg2wordlist.cpp.

{
const int kDictDebugLevel = 1;
FILE *dawg_file = fopen(filename, "rb");
if (dawg_file == NULL) {
tprintf("Could not open %s for reading.\n", filename);
return NULL;
}
tprintf("Loading word list from %s\n", filename);
dawg_file, tesseract::DAWG_TYPE_WORD, "eng", SYSTEM_DAWG_PERM,
kDictDebugLevel);
tprintf("Word list loaded.\n");
fclose(dawg_file);
return retval;
}
int main ( int  argc,
char *  argv[] 
)

Definition at line 70 of file dawg2wordlist.cpp.

{
if (argc != 4) {
tprintf("Print all the words in a given dawg.\n");
tprintf("Usage: %s <unicharset> <dawgfile> <wordlistfile>\n",
argv[0]);
return 1;
}
const char *unicharset_file = argv[1];
const char *dawg_file = argv[2];
const char *wordlist_file = argv[3];
UNICHARSET unicharset;
if (!unicharset.load_from_file(unicharset_file)) {
tprintf("Error loading unicharset from %s.\n", unicharset_file);
return 1;
}
tesseract::Dawg *dict = LoadSquishedDawg(unicharset, dawg_file);
if (dict == NULL) {
tprintf("Error loading dictionary from %s.\n", dawg_file);
return 1;
}
int retval = WriteDawgAsWordlist(unicharset, dict, wordlist_file);
delete dict;
return retval;
}
int WriteDawgAsWordlist ( const UNICHARSET unicharset,
const tesseract::Dawg dawg,
const char *  outfile_name 
)

Definition at line 54 of file dawg2wordlist.cpp.

{
FILE *out = fopen(outfile_name, "wb");
if (out == NULL) {
tprintf("Could not open %s for writing.\n", outfile_name);
return 1;
}
WordOutputter outputter(out);
TessCallback1<const char *> *print_word_cb =
dawg->iterate_words(unicharset, print_word_cb);
delete print_word_cb;
return fclose(out);
}

Variable Documentation

const int kDictDebugLevel = 1

Definition at line 26 of file dawg2wordlist.cpp.