Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
strokewidth.h
Go to the documentation of this file.
1 
2 // File: strokewidth.h
3 // Description: Subclass of BBGrid to find uniformity of strokewidth.
4 // Author: Ray Smith
5 // Created: Mon Mar 31 16:17:01 PST 2008
6 //
7 // (C) Copyright 2008, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifndef TESSERACT_TEXTORD_STROKEWIDTH_H__
21 #define TESSERACT_TEXTORD_STROKEWIDTH_H__
22 
23 #include "blobbox.h" // BlobNeighourDir.
24 #include "blobgrid.h" // Base class.
25 #include "colpartitiongrid.h"
26 #include "textlineprojection.h"
27 
28 class DENORM;
29 class ScrollView;
30 class TO_BLOCK;
31 
32 namespace tesseract {
33 
34 class ColPartition_LIST;
35 class TabFind;
36 class TextlineProjection;
37 
38 // Misc enums to clarify bool arguments for direction-controlling args.
42 };
43 
49 class StrokeWidth : public BlobGrid {
50  public:
51  StrokeWidth(int gridsize, const ICOORD& bleft, const ICOORD& tright);
52  virtual ~StrokeWidth();
53 
54  // Sets the neighbours member of the medium-sized blobs in the block.
55  // Searches on 4 sides of each blob for similar-sized, similar-strokewidth
56  // blobs and sets pointers to the good neighbours.
58 
59  // Sets the neighbour/textline writing direction members of the medium
60  // and large blobs with optional repair of broken CJK characters first.
61  // Repair of broken CJK is needed here because broken CJK characters
62  // can fool the textline direction detection algorithm.
63  void FindTextlineDirectionAndFixBrokenCJK(bool cjk_merge,
64  TO_BLOCK* input_block);
65 
66  // To save computation, the process of generating partitions is broken
67  // into the following 4 steps:
68  // TestVerticalTextDirection
69  // CorrectForRotation (used only if a rotation is to be applied)
70  // FindLeaderPartitions
71  // GradeBlobsIntoPartitions.
72  // These functions are all required, in sequence, except for
73  // CorrectForRotation, which is not needed if no rotation is applied.
74 
75  // Types all the blobs as vertical or horizontal text or unknown and
76  // returns true if the majority are vertical.
77  // If the blobs are rotated, it is necessary to call CorrectForRotation
78  // after rotating everything, otherwise the work done here will be enough.
79  // If osd_blobs is not null, a list of blobs from the dominant textline
80  // direction are returned for use in orientation and script detection.
82  BLOBNBOX_CLIST* osd_blobs);
83 
84  // Corrects the data structures for the given rotation.
85  void CorrectForRotation(const FCOORD& rerotation,
86  ColPartitionGrid* part_grid);
87 
88  // Finds leader partitions and inserts them into the give grid.
89  void FindLeaderPartitions(TO_BLOCK* block,
90  ColPartitionGrid* part_grid);
91 
92  // Finds and marks noise those blobs that look like bits of vertical lines
93  // that would otherwise screw up layout analysis.
94  void RemoveLineResidue(ColPartition_LIST* big_part_list);
95 
96  // Types all the blobs as vertical text or horizontal text or unknown and
97  // puts them into initial ColPartitions in the supplied part_grid.
98  // rerotation determines how to get back to the image coordinates from the
99  // blob coordinates (since they may have been rotated for vertical text).
100  // block is the single block for the whole page or rectangle to be OCRed.
101  // nontext_pix (full-size), is a binary mask used to prevent merges across
102  // photo/text boundaries. It is not kept beyond this function.
103  // denorm provides a mapping back to the image from the current blob
104  // coordinate space.
105  // projection provides a measure of textline density over the image and
106  // provides functions to assist with diacritic detection. It should be a
107  // pointer to a new TextlineProjection, and will be setup here.
108  // part_grid is the output grid of textline partitions.
109  // Large blobs that cause overlap are put in separate partitions and added
110  // to the big_parts list.
111  void GradeBlobsIntoPartitions(const FCOORD& rerotation,
112  TO_BLOCK* block,
113  Pix* nontext_pix,
114  const DENORM* denorm,
115  TextlineProjection* projection,
116  ColPartitionGrid* part_grid,
117  ColPartition_LIST* big_parts);
118 
119  // Handles a click event in a display window.
120  virtual void HandleClick(int x, int y);
121 
122  private:
123  // Computes the noise_density_ by summing the number of elements in a
124  // neighbourhood of each grid cell.
125  void ComputeNoiseDensity(TO_BLOCK* block, TabFind* line_grid);
126 
127  // Detects and marks leader dots/dashes.
128  // Leaders are horizontal chains of small or noise blobs that look
129  // monospace according to ColPartition::MarkAsLeaderIfMonospaced().
130  // Detected leaders become the only occupants of the block->small_blobs list.
131  // Non-leader small blobs get moved to the blobs list.
132  // Non-leader noise blobs remain singletons in the noise list.
133  // All small and noise blobs in high density regions are marked BTFT_NONTEXT.
134  // block is the single block for the whole page or rectangle to be OCRed.
135  // leader_parts is the output.
136  void FindLeadersAndMarkNoise(TO_BLOCK* block,
137  ColPartition_LIST* leader_parts);
138 
141  void InsertBlobs(TO_BLOCK* block);
142 
143  // Fix broken CJK characters, using the fake joined blobs mechanism.
144  // Blobs are really merged, ie the master takes all the outlines and the
145  // others are deleted.
146  // Returns true if sufficient blobs are merged that it may be worth running
147  // again, due to a better estimate of character size.
148  bool FixBrokenCJK(TO_BLOCK* block);
149 
150  // Collect blobs that overlap or are within max_dist of the input bbox.
151  // Return them in the list of blobs and expand the bbox to be the union
152  // of all the boxes. not_this is excluded from the search, as are blobs
153  // that cause the merged box to exceed max_size in either dimension.
154  void AccumulateOverlaps(const BLOBNBOX* not_this, bool debug,
155  int max_size, int max_dist,
156  TBOX* bbox, BLOBNBOX_CLIST* blobs);
157 
158  // For each blob in this grid, Finds the textline direction to be horizontal
159  // or vertical according to distance to neighbours and 1st and 2nd order
160  // neighbours. Non-text tends to end up without a definite direction.
161  // Result is setting of the neighbours and vert_possible/horz_possible
162  // flags in the BLOBNBOXes currently in this grid.
163  // This function is called more than once if page orientation is uncertain,
164  // so display_if_debugging is true on the final call to display the results.
165  void FindTextlineFlowDirection(bool display_if_debugging);
166 
167  // Sets the neighbours and good_stroke_neighbours members of the blob by
168  // searching close on all 4 sides.
169  // When finding leader dots/dashes, there is a slightly different rule for
170  // what makes a good neighbour.
171  // If activate_line_trap, then line-like objects are found and isolated.
172  void SetNeighbours(bool leaders, bool activate_line_trap, BLOBNBOX* blob);
173 
174  // Sets the good_stroke_neighbours member of the blob if it has a
175  // GoodNeighbour on the given side.
176  // Also sets the neighbour in the blob, whether or not a good one is found.
177  // Return value is the number of neighbours in the line trap size range.
178  // Leaders get extra special lenient treatment.
179  int FindGoodNeighbour(BlobNeighbourDir dir, bool leaders, BLOBNBOX* blob);
180 
181  // Makes the blob to be only horizontal or vertical where evidence
182  // is clear based on gaps of 2nd order neighbours.
183  void SetNeighbourFlows(BLOBNBOX* blob);
184 
185  // Nullify the neighbours in the wrong directions where the direction
186  // is clear-cut based on a distance margin. Good for isolating vertical
187  // text from neighbouring horizontal text.
188  void SimplifyObviousNeighbours(BLOBNBOX* blob);
189 
190  // Smoothes the vertical/horizontal type of the blob based on the
191  // 2nd-order neighbours. If reset_all is true, then all blobs are
192  // changed. Otherwise, only ambiguous blobs are processed.
193  void SmoothNeighbourTypes(BLOBNBOX* blob, bool desperate);
194 
195  // Checks the left or right side of the given leader partition and sets the
196  // (opposite) leader_on_right or leader_on_left flags for blobs
197  // that are next to the given side of the given leader partition.
198  void MarkLeaderNeighbours(const ColPartition* part, LeftOrRight side);
199 
200  // Partition creation. Accumulates vertical and horizontal text chains,
201  // puts the remaining blobs in as unknowns, and then merges/splits to
202  // minimize overlap and smoothes the types with neighbours and the color
203  // image if provided. rerotation is used to rotate the coordinate space
204  // back to the nontext_map_ image.
205  void FindInitialPartitions(const FCOORD& rerotation,
206  TO_BLOCK* block,
207  ColPartitionGrid* part_grid,
208  ColPartition_LIST* big_parts);
209  // Finds vertical chains of text-like blobs and puts them in ColPartitions.
210  void FindVerticalTextChains(ColPartitionGrid* part_grid);
211  // Finds horizontal chains of text-like blobs and puts them in ColPartitions.
212  void FindHorizontalTextChains(ColPartitionGrid* part_grid);
213  // Finds diacritics and saves their base character in the blob.
214  void TestDiacritics(ColPartitionGrid* part_grid, TO_BLOCK* block);
215  // Searches this grid for an appropriately close and sized neighbour of the
216  // given [small] blob. If such a blob is found, the diacritic base is saved
217  // in the blob and true is returned.
218  // The small_grid is a secondary grid that contains the small/noise objects
219  // that are not in this grid, but may be useful for determining a connection
220  // between blob and its potential base character. (See DiacriticXGapFilled.)
221  bool DiacriticBlob(BlobGrid* small_grid, BLOBNBOX* blob);
222  // Returns true if there is no gap between the base char and the diacritic
223  // bigger than a fraction of the height of the base char:
224  // Eg: line end.....'
225  // The quote is a long way from the end of the line, yet it needs to be a
226  // diacritic. To determine that the quote is not part of an image, or
227  // a different text block, we check for other marks in the gap between
228  // the base char and the diacritic.
229  // '<--Diacritic
230  // |---------|
231  // | |<-toobig-gap->
232  // | Base |<ok gap>
233  // |---------| x<-----Dot occupying gap
234  // The grid is const really.
235  bool DiacriticXGapFilled(BlobGrid* grid, const TBOX& diacritic_box,
236  const TBOX& base_box);
237  // Merges diacritics with the ColPartition of the base character blob.
238  void MergeDiacritics(TO_BLOCK* block, ColPartitionGrid* part_grid);
239  // Any blobs on the large_blobs list of block that are still unowned by a
240  // ColPartition, are probably drop-cap or vertically touching so the blobs
241  // are removed to the big_parts list and treated separately.
242  void RemoveLargeUnusedBlobs(TO_BLOCK* block,
243  ColPartitionGrid* part_grid,
244  ColPartition_LIST* big_parts);
245 
246  // All remaining unused blobs are put in individual ColPartitions.
247  void PartitionRemainingBlobs(ColPartitionGrid* part_grid);
248 
249  // If combine, put all blobs in the cell_list into a single partition,
250  // otherwise put each one into its own partition.
251  void MakePartitionsFromCellList(bool combine,
252  ColPartitionGrid* part_grid,
253  BLOBNBOX_CLIST* cell_list);
254 
255  // Helper function to finish setting up a ColPartition and insert into
256  // part_grid.
257  void CompletePartition(ColPartition* part, ColPartitionGrid* part_grid);
258 
259  // Merge partitions where the merge appears harmless.
260  void EasyMerges(ColPartitionGrid* part_grid);
261 
262  // Compute a search box based on the orientation of the partition.
263  // Returns true if a suitable box can be calculated.
264  // Callback for EasyMerges.
265  bool OrientationSearchBox(ColPartition* part, TBOX* box);
266 
267  // Merge confirmation callback for EasyMerges.
268  bool ConfirmEasyMerge(const ColPartition* p1, const ColPartition* p2);
269 
270  // Returns true if there is no significant noise in between the boxes.
271  bool NoNoiseInBetween(const TBOX& box1, const TBOX& box2) const;
272 
273  // Displays the blobs colored according to the number of good neighbours
274  // and the vertical/horizontal flow.
275  ScrollView* DisplayGoodBlobs(const char* window_name, int x, int y);
276 
277  // Displays blobs colored according to whether or not they are diacritics.
278  ScrollView* DisplayDiacritics(const char* window_name,
279  int x, int y, TO_BLOCK* block);
280 
281  private:
282  // Image map of photo/noise areas on the page. Borrowed pointer (not owned.)
283  Pix* nontext_map_;
284  // Textline projection map. Borrowed pointer.
285  TextlineProjection* projection_;
286  // DENORM used by projection_ to get back to image coords. Borrowed pointer.
287  const DENORM* denorm_;
288  // Bounding box of the grid.
289  TBOX grid_box_;
290  // Rerotation to get back to the original image.
291  FCOORD rerotation_;
292  // Windows for debug display.
293  ScrollView* leaders_win_;
294  ScrollView* initial_widths_win_;
295  ScrollView* widths_win_;
296  ScrollView* chains_win_;
297  ScrollView* diacritics_win_;
298  ScrollView* textlines_win_;
299  ScrollView* smoothed_win_;
300 };
301 
302 } // namespace tesseract.
303 
304 #endif // TESSERACT_TEXTORD_STROKEWIDTH_H__