"Fossies" - the Fresh Open Source Software Archive

Member "tesseract-ocr/doc/html/tess__lang__mod__edge_8cpp_source.html" (26 Oct 2012, 28109 Bytes) of package /linux/misc/old/tesseract-ocr-3.02.02-doc-html.tar.gz:


Caution: In this restricted "Fossies" environment the current HTML page may not be correctly presentated and may have some non-functional links. You can here alternatively try to browse the pure source code or just view or download the uninterpreted raw source code. If the rendering is insufficient you may try to find and view the page on the tesseract-ocr-3.02.02-doc-html.tar.gz project site itself.

Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
tess_lang_mod_edge.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: tess_lang_mod_edge.cpp
3  * Description: Implementation of the Tesseract Language Model Edge Class
4  * Author: Ahmad Abdulkader
5  * Created: 2008
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include "tess_lang_mod_edge.h"
21 #include "const.h"
22 #include "unichar.h"
23 
24 
25 
26 namespace tesseract {
27 // OOD constructor
29  root_ = false;
30  cntxt_ = cntxt;
31  dawg_ = NULL;
32  start_edge_ = 0;
33  end_edge_ = 0;
34  edge_mask_ = 0;
35  class_id_ = class_id;
36  str_ = cntxt_->CharacterSet()->ClassString(class_id);
37  path_cost_ = Cost();
38 }
39 
40 // leading, trailing punc constructor and single byte UTF char
42  const Dawg *dawg, EDGE_REF edge_idx, int class_id) {
43  root_ = false;
44  cntxt_ = cntxt;
45  dawg_ = dawg;
46  start_edge_ = edge_idx;
47  end_edge_ = edge_idx;
48  edge_mask_ = 0;
49  class_id_ = class_id;
50  str_ = cntxt_->CharacterSet()->ClassString(class_id);
51  path_cost_ = Cost();
52 }
53 
54 // dict constructor: multi byte UTF char
56  EDGE_REF start_edge_idx, EDGE_REF end_edge_idx,
57  int class_id) {
58  root_ = false;
59  cntxt_ = cntxt;
60  dawg_ = dawg;
61  start_edge_ = start_edge_idx;
62  end_edge_ = end_edge_idx;
63  edge_mask_ = 0;
64  class_id_ = class_id;
65  str_ = cntxt_->CharacterSet()->ClassString(class_id);
66  path_cost_ = Cost();
67 }
68 
70  char *char_ptr = new char[256];
71  if (!char_ptr) {
72  return NULL;
73  }
74 
75  char dawg_str[256];
76  char edge_str[32];
77  if (dawg_ == (Dawg *)DAWG_OOD) {
78  strcpy(dawg_str, "OOD");
79  } else if (dawg_ == (Dawg *)DAWG_NUMBER) {
80  strcpy(dawg_str, "NUM");
81  } else if (dawg_->permuter() == SYSTEM_DAWG_PERM) {
82  strcpy(dawg_str, "Main");
83  } else if (dawg_->permuter() == USER_DAWG_PERM) {
84  strcpy(dawg_str, "User");
85  } else if (dawg_->permuter() == DOC_DAWG_PERM) {
86  strcpy(dawg_str, "Doc");
87  } else {
88  strcpy(dawg_str, "N/A");
89  }
90 
91  sprintf(edge_str, "%d", static_cast<int>(start_edge_));
92  if (IsLeadingPuncEdge(edge_mask_)) {
93  strcat(edge_str, "-LP");
94  }
95  if (IsTrailingPuncEdge(edge_mask_)) {
96  strcat(edge_str, "-TP");
97  }
98  sprintf(char_ptr, "%s(%s)%s, Wtd Dawg Cost=%d",
99  dawg_str, edge_str, IsEOW() ? "-EOW-" : "", path_cost_);
100 
101  return char_ptr;
102 }
103 
105  const Dawg *dawg,
106  NODE_REF parent_node,
107  LangModEdge **edge_array) {
108  int edge_cnt = 0;
109  NodeChildVector vec;
110  dawg->unichar_ids_of(parent_node, &vec); // find all children of the parent
111  for (int i = 0; i < vec.size(); ++i) {
112  const NodeChild &child = vec[i];
113  if (child.unichar_id == INVALID_UNICHAR_ID) continue;
114  edge_array[edge_cnt] =
115  new TessLangModEdge(cntxt, dawg, child.edge_ref, child.unichar_id);
116  if (edge_array[edge_cnt] != NULL) edge_cnt++;
117  }
118  return edge_cnt;
119 }
120 }