"Fossies" - the Fresh Open Source Software Archive

Member "tesseract-ocr/doc/html/cubeclassifier_8cpp_source.html" (26 Oct 2012, 33111 Bytes) of package /linux/misc/old/tesseract-ocr-3.02.02-doc-html.tar.gz:


Caution: In this restricted "Fossies" environment the current HTML page may not be correctly presentated and may have some non-functional links. You can here alternatively try to browse the pure source code or just view or download the uninterpreted raw source code. If the rendering is insufficient you may try to find and view the page on the tesseract-ocr-3.02.02-doc-html.tar.gz project site itself.

Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
cubeclassifier.cpp
Go to the documentation of this file.
1 // Copyright 2011 Google Inc. All Rights Reserved.
2 // Author: rays@google.com (Ray Smith)
4 // File: cubeclassifier.cpp
5 // Description: Cube implementation of a ShapeClassifier.
6 // Author: Ray Smith
7 // Created: Wed Nov 23 10:39:45 PST 2011
8 //
9 // (C) Copyright 2011, Google Inc.
10 // Licensed under the Apache License, Version 2.0 (the "License");
11 // you may not use this file except in compliance with the License.
12 // You may obtain a copy of the License at
13 // http://www.apache.org/licenses/LICENSE-2.0
14 // Unless required by applicable law or agreed to in writing, software
15 // distributed under the License is distributed on an "AS IS" BASIS,
16 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 // See the License for the specific language governing permissions and
18 // limitations under the License.
19 //
21 
22 #include "cubeclassifier.h"
23 
24 #include "char_altlist.h"
25 #include "char_set.h"
26 #include "cube_object.h"
27 #include "cube_reco_context.h"
28 #include "tessclassifier.h"
29 #include "tesseractclass.h"
30 #include "trainingsample.h"
31 #include "unicharset.h"
32 
33 namespace tesseract {
34 
36  : cube_cntxt_(tesseract->GetCubeRecoContext()),
37  shape_table_(*tesseract->shape_table()) {
38 }
40 }
41 
42 // Classifies the given [training] sample, writing to results.
43 // See ShapeClassifier for a full description.
45  Pix* page_pix, int debug, int keep_this,
46  GenericVector<ShapeRating>* results) {
47  results->clear();
48  if (page_pix == NULL) return 0;
49 
50  ASSERT_HOST(cube_cntxt_ != NULL);
51  const TBOX& char_box = sample.bounding_box();
52  CubeObject* cube_obj = new tesseract::CubeObject(
53  cube_cntxt_, page_pix, char_box.left(),
54  pixGetHeight(page_pix) - char_box.top(),
55  char_box.width(), char_box.height());
56  CharAltList* alt_list = cube_obj->RecognizeChar();
57  alt_list->Sort();
58  CharSet* char_set = cube_cntxt_->CharacterSet();
59  if (alt_list != NULL) {
60  for (int i = 0; i < alt_list->AltCount(); ++i) {
61  // Convert cube representation to a shape_id.
62  int alt_id = alt_list->Alt(i);
63  int unichar_id = char_set->UnicharID(char_set->ClassString(alt_id));
64  int shape_id = shape_table_.FindShape(unichar_id, -1);
65  if (shape_id >= 0)
66  results->push_back(ShapeRating(shape_id, alt_list->AltProb(i)));
67  }
68  delete alt_list;
69  }
70  delete cube_obj;
71  return results->size();
72 }
73 
74 // Provides access to the ShapeTable that this classifier works with.
76  return &shape_table_;
77 }
78 
80  : cube_cntxt_(tesseract->GetCubeRecoContext()),
81  shape_table_(*tesseract->shape_table()),
82  pruner_(new TessClassifier(true, tesseract)) {
83 }
85  delete pruner_;
86 }
87 
88 // Classifies the given [training] sample, writing to results.
89 // See ShapeClassifier for a full description.
91  Pix* page_pix, int debug, int keep_this,
92  GenericVector<ShapeRating>* results) {
93  int num_results = pruner_->ClassifySample(sample, page_pix, debug, keep_this,
94  results);
95  if (page_pix == NULL) return num_results;
96 
97  ASSERT_HOST(cube_cntxt_ != NULL);
98  const TBOX& char_box = sample.bounding_box();
99  CubeObject* cube_obj = new tesseract::CubeObject(
100  cube_cntxt_, page_pix, char_box.left(),
101  pixGetHeight(page_pix) - char_box.top(),
102  char_box.width(), char_box.height());
103  CharAltList* alt_list = cube_obj->RecognizeChar();
104  CharSet* char_set = cube_cntxt_->CharacterSet();
105  if (alt_list != NULL) {
106  for (int r = 0; r < num_results; ++r) {
107  const Shape& shape = shape_table_.GetShape((*results)[r].shape_id);
108  // Get the best cube probability of all unichars in the shape.
109  double best_prob = 0.0;
110  for (int i = 0; i < alt_list->AltCount(); ++i) {
111  int alt_id = alt_list->Alt(i);
112  int unichar_id = char_set->UnicharID(char_set->ClassString(alt_id));
113  if (shape.ContainsUnichar(unichar_id) &&
114  alt_list->AltProb(i) > best_prob) {
115  best_prob = alt_list->AltProb(i);
116  }
117  }
118  (*results)[r].rating = best_prob;
119  }
120  delete alt_list;
121  // Re-sort by rating.
123  }
124  delete cube_obj;
125  return results->size();
126 }
127 
128 // Provides access to the ShapeTable that this classifier works with.
130  return &shape_table_;
131 }
132 
133 } // namespace tesseract
134 
135 
136