"Fossies" - the Fresh Open Source Software Archive

Member "tesseract-ocr/doc/html/intfeaturemap_8h_source.html" (26 Oct 2012, 37550 Bytes) of package /linux/misc/old/tesseract-ocr-3.02.02-doc-html.tar.gz:


Caution: In this restricted "Fossies" environment the current HTML page may not be correctly presentated and may have some non-functional links. You can here alternatively try to browse the pure source code or just view or download the uninterpreted raw source code. If the rendering is insufficient you may try to find and view the page on the tesseract-ocr-3.02.02-doc-html.tar.gz project site itself.

Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
intfeaturemap.h
Go to the documentation of this file.
1 // Copyright 2010 Google Inc. All Rights Reserved.
2 // Author: rays@google.com (Ray Smith)
4 // File: intfeaturemap.h
5 // Description: Encapsulation of IntFeatureSpace with IndexMapBiDi
6 // to provide a subspace mapping and fast feature lookup.
7 // Created: Tue Oct 26 08:58:30 PDT 2010
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
20 
21 #ifndef TESSERACT_CLASSIFY_INTFEATUREMAP_H__
22 #define TESSERACT_CLASSIFY_INTFEATUREMAP_H__
23 
24 #include "intfeaturespace.h"
25 #include "indexmapbidi.h"
26 #include "intproto.h"
27 
28 namespace tesseract {
29 
30 class SampleIterator;
31 
32 // Number of positive and negative offset maps.
33 static const int kNumOffsetMaps = 2;
34 
35 // Class to map a feature space defined by INT_FEATURE_STRUCT to a compact
36 // down-sampled subspace of actually used features.
37 // The IntFeatureMap copes with 2 stages of transformation:
38 // The first step is down-sampling (re-quantization) and converting to a
39 // single index value from the 3-D input:
40 // INT_FEATURE_STRUCT <-> index feature (via IntFeatureSpace) and
41 // the second is a feature-space compaction to map only the feature indices
42 // that are actually used. This saves space in classifiers that are built
43 // using the mapped feature space.
44 // index (sparse) feature <-> map (compact) feature via IndexMapBiDi.
45 // Although the transformations are reversible, the inverses are lossy and do
46 // not return the exact input INT_FEATURE_STRUCT, due to the many->one nature
47 // of both transformations.
49  public:
50  IntFeatureMap();
52 
53  // Accessors.
54  int sparse_size() const {
55  return feature_space_.Size();
56  }
57  int compact_size() const {
58  return compact_size_;
59  }
60  const IntFeatureSpace& feature_space() const {
61  return feature_space_;
62  }
63  const IndexMapBiDi& feature_map() const {
64  return feature_map_;
65  }
66 
67  // Pseudo-accessors.
68  int IndexFeature(const INT_FEATURE_STRUCT& f) const;
69  int MapFeature(const INT_FEATURE_STRUCT& f) const;
70  int MapIndexFeature(int index_feature) const;
71  INT_FEATURE_STRUCT InverseIndexFeature(int index_feature) const;
72  INT_FEATURE_STRUCT InverseMapFeature(int map_feature) const;
73  void DeleteMapFeature(int map_feature);
74  bool IsMapFeatureDeleted(int map_feature) const;
75 
76  // Copies the given feature_space and uses it as the index feature map
77  // from INT_FEATURE_STRUCT.
78  void Init(const IntFeatureSpace& feature_space);
79 
80  // Helper to return an offset index feature. In this context an offset
81  // feature with a dir of +/-1 is a feature of a similar direction,
82  // but shifted perpendicular to the direction of the feature. An offset
83  // feature with a dir of +/-2 is feature at the same position, but rotated
84  // by +/- one [compact] quantum. Returns the index of the generated offset
85  // feature, or -1 if it doesn't exist. Dir should be in
86  // [-kNumOffsetMaps, kNumOffsetMaps] to indicate the relative direction.
87  // A dir of 0 is an identity transformation.
88  // Both input and output are from the index(sparse) feature space, not
89  // the mapped/compact feature space, but the offset feature is the minimum
90  // distance moved from the input to guarantee that it maps to the next
91  // available quantum in the mapped/compact space.
92  int OffsetFeature(int index_feature, int dir) const;
93 
94  // Computes the features used by the subset of samples defined by
95  // the iterator and sets up the feature mapping.
96  // Returns the size of the compacted feature space.
98 
99  // After deleting some features, finish setting up the mapping, and map
100  // all the samples. Returns the size of the compacted feature space.
102 
103  // Indexes the given array of features to a vector of sorted indices.
105  int num_features,
106  GenericVector<int>* sorted_features) const {
107  feature_space_.IndexAndSortFeatures(features, num_features,
108  sorted_features);
109  }
110  // Maps the given array of index/sparse features to an array of map/compact
111  // features.
112  // Assumes the input is sorted. The output indices are sorted and uniqued.
113  // Returns the number of "missed" features, being features that
114  // don't map to the compact feature space.
115  int MapIndexedFeatures(const GenericVector<int>& index_features,
116  GenericVector<int>* map_features) const {
117  return feature_map_.MapFeatures(index_features, map_features);
118  }
119 
120  // Prints the map features from the set in human-readable form.
121  void DebugMapFeatures(const GenericVector<int>& map_features) const;
122 
123  private:
124  void Clear();
125 
126  // Helper to compute an offset index feature. In this context an offset
127  // feature with a dir of +/-1 is a feature of a similar direction,
128  // but shifted perpendicular to the direction of the feature. An offset
129  // feature with a dir of +/-2 is feature at the same position, but rotated
130  // by +/- one [compact] quantum. Returns the index of the generated offset
131  // feature, or -1 if it doesn't exist. Dir should be in
132  // [-kNumOffsetMaps, kNumOffsetMaps] to indicate the relative direction.
133  // A dir of 0 is an identity transformation.
134  // Both input and output are from the index(sparse) feature space, not
135  // the mapped/compact feature space, but the offset feature is the minimum
136  // distance moved from the input to guarantee that it maps to the next
137  // available quantum in the mapped/compact space.
138  int ComputeOffsetFeature(int index_feature, int dir) const;
139 
140  // True if the mapping has changed since it was last finalized.
141  bool mapping_changed_;
142  // Size of the compacted feature space, after unused features are removed.
143  int compact_size_;
144  // Feature space quantization definition and indexing from INT_FEATURE_STRUCT.
145  IntFeatureSpace feature_space_;
146  // Mapping from indexed feature space to the compacted space with unused
147  // features mapping to -1.
148  IndexMapBiDi feature_map_;
149  // Index tables to map a feature index to the corresponding feature after a
150  // shift perpendicular to the feature direction, or a rotation in place.
151  // An entry of -1 indicates that there is no corresponding feature.
152  // Array of arrays of size feature_space_.Size() owned by this class.
153  int* offset_plus_[kNumOffsetMaps];
154  int* offset_minus_[kNumOffsetMaps];
155 
156  // Don't use default copy and assign!
158  void operator=(const IntFeatureMap&);
159 };
160 
161 } // namespace tesseract.
162 
163 #endif // TESSERACT_CLASSIFY_INTFEATUREMAP_H__