"Fossies" - the Fresh Open Source Software Archive

Member "tesseract-ocr/doc/html/blobbox_8cpp_source.html" (26 Oct 2012, 196375 Bytes) of package /linux/misc/old/tesseract-ocr-3.02.02-doc-html.tar.gz:


Caution: In this restricted "Fossies" environment the current HTML page may not be correctly presentated and may have some non-functional links. You can here alternatively try to browse the pure source code or just view or download the uninterpreted raw source code. If the rendering is insufficient you may try to find and view the page on the tesseract-ocr-3.02.02-doc-html.tar.gz project site itself.

Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
blobbox.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: blobbox.cpp (Formerly blobnbox.c)
3  * Description: Code for the textord blob class.
4  * Author: Ray Smith
5  * Created: Thu Jul 30 09:08:51 BST 1992
6  *
7  * (C) Copyright 1992, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 // Include automatically generated configuration file if running autoconf.
21 #ifdef HAVE_CONFIG_H
22 #include "config_auto.h"
23 #endif
24 
25 #include "mfcpch.h"
26 #include "blobbox.h"
27 #include "helpers.h"
28 
29 #define PROJECTION_MARGIN 10 //arbitrary
30 #define EXTERN
31 
33 
34 // Upto 30 degrees is allowed for rotations of diacritic blobs.
35 const double kCosSmallAngle = 0.866;
36 // Min aspect ratio for a joined word to indicate an obvious flow direction.
37 const double kDefiniteAspectRatio = 2.0;
38 // Multiple of short length in perimeter to make a joined word.
39 const double kComplexShapePerimeterRatio = 1.5;
40 // Min multiple of linesize for medium-sized blobs in ReFilterBlobs.
41 const double kMinMediumSizeRatio = 0.25;
42 // Max multiple of linesize for medium-sized blobs in ReFilterBlobs.
43 const double kMaxMediumSizeRatio = 4.0;
44 
45 // Rotates the box and the underlying blob.
46 void BLOBNBOX::rotate(FCOORD rotation) {
47  cblob_ptr->rotate(rotation);
48  rotate_box(rotation);
49  compute_bounding_box();
50 }
51 
52 // Reflect the box in the y-axis, leaving the underlying blob untouched.
54  int left = -box.right();
55  box.set_right(-box.left());
56  box.set_left(left);
57 }
58 
59 // Rotates the box by the angle given by rotation.
60 // If the blob is a diacritic, then only small rotations for skew
61 // correction can be applied.
62 void BLOBNBOX::rotate_box(FCOORD rotation) {
63  if (IsDiacritic()) {
64  ASSERT_HOST(rotation.x() >= kCosSmallAngle)
65  ICOORD top_pt((box.left() + box.right()) / 2, base_char_top_);
66  ICOORD bottom_pt(top_pt.x(), base_char_bottom_);
67  top_pt.rotate(rotation);
68  base_char_top_ = top_pt.y();
69  bottom_pt.rotate(rotation);
70  base_char_bottom_ = bottom_pt.y();
71  box.rotate(rotation);
72  } else {
73  box.rotate(rotation);
74  set_diacritic_box(box);
75  }
76 }
77 
78 /**********************************************************************
79  * BLOBNBOX::merge
80  *
81  * Merge this blob with the given blob, which should be after this.
82  **********************************************************************/
83 void BLOBNBOX::merge( //merge blobs
84  BLOBNBOX *nextblob //blob to join with
85  ) {
86  box += nextblob->box; //merge boxes
87  set_diacritic_box(box);
88  nextblob->joined = TRUE;
89 }
90 
91 
92 // Merge this with other, taking the outlines from other.
93 // Other is not deleted, but left for the caller to handle.
95  if (cblob_ptr != NULL && other->cblob_ptr != NULL) {
96  C_OUTLINE_IT ol_it(cblob_ptr->out_list());
97  ol_it.add_list_after(other->cblob_ptr->out_list());
98  }
100 }
101 
102 
103 /**********************************************************************
104  * BLOBNBOX::chop
105  *
106  * Chop this blob into equal sized pieces using the x height as a guide.
107  * The blob is not actually chopped. Instead, fake blobs are inserted
108  * with the relevant bounding boxes.
109  **********************************************************************/
110 
111 void BLOBNBOX::chop( //chop blobs
112  BLOBNBOX_IT *start_it, //location of this
113  BLOBNBOX_IT *end_it, //iterator
114  FCOORD rotation, //for landscape
115  float xheight //of line
116  ) {
117  inT16 blobcount; //no of blobs
118  BLOBNBOX *newblob; //fake blob
119  BLOBNBOX *blob; //current blob
120  inT16 blobindex; //number of chop
121  inT16 leftx; //left edge of blob
122  float blobwidth; //width of each
123  float rightx; //right edge to scan
124  float ymin, ymax; //limits of new blob
125  float test_ymin, test_ymax; //limits of part blob
126  ICOORD bl, tr; //corners of box
127  BLOBNBOX_IT blob_it; //blob iterator
128 
129  //get no of chops
130  blobcount = (inT16) floor (box.width () / xheight);
131  if (blobcount > 1 && cblob_ptr != NULL) {
132  //width of each
133  blobwidth = (float) (box.width () + 1) / blobcount;
134  for (blobindex = blobcount - 1, rightx = box.right ();
135  blobindex >= 0; blobindex--, rightx -= blobwidth) {
136  ymin = (float) MAX_INT32;
137  ymax = (float) -MAX_INT32;
138  blob_it = *start_it;
139  do {
140  blob = blob_it.data ();
141  find_cblob_vlimits(blob->cblob_ptr, rightx - blobwidth,
142  rightx,
143  /*rotation, */ test_ymin, test_ymax);
144  blob_it.forward ();
145  UpdateRange(test_ymin, test_ymax, &ymin, &ymax);
146  }
147  while (blob != end_it->data ());
148  if (ymin < ymax) {
149  leftx = (inT16) floor (rightx - blobwidth);
150  if (leftx < box.left ())
151  leftx = box.left (); //clip to real box
152  bl = ICOORD (leftx, (inT16) floor (ymin));
153  tr = ICOORD ((inT16) ceil (rightx), (inT16) ceil (ymax));
154  if (blobindex == 0)
155  box = TBOX (bl, tr); //change box
156  else {
157  newblob = new BLOBNBOX;
158  //box is all it has
159  newblob->box = TBOX (bl, tr);
160  //stay on current
161  newblob->base_char_top_ = tr.y();
162  newblob->base_char_bottom_ = bl.y();
163  end_it->add_after_stay_put (newblob);
164  }
165  }
166  }
167  }
168 }
169 
170 // Returns the box gaps between this and its neighbours_ in an array
171 // indexed by BlobNeighbourDir.
172 void BLOBNBOX::NeighbourGaps(int gaps[BND_COUNT]) const {
173  for (int dir = 0; dir < BND_COUNT; ++dir) {
174  gaps[dir] = MAX_INT16;
175  BLOBNBOX* neighbour = neighbours_[dir];
176  if (neighbour != NULL) {
177  TBOX n_box = neighbour->bounding_box();
178  if (dir == BND_LEFT || dir == BND_RIGHT) {
179  gaps[dir] = box.x_gap(n_box);
180  } else {
181  gaps[dir] = box.y_gap(n_box);
182  }
183  }
184  }
185 }
186 // Returns the min and max horizontal and vertical gaps (from NeighbourGaps)
187 // modified so that if the max exceeds the max dimension of the blob, and
188 // the min is less, the max is replaced with the min.
189 // The objective is to catch cases where there is only a single neighbour
190 // and avoid reporting the other gap as a ridiculously large number
191 void BLOBNBOX::MinMaxGapsClipped(int* h_min, int* h_max,
192  int* v_min, int* v_max) const {
193  int max_dimension = MAX(box.width(), box.height());
194  int gaps[BND_COUNT];
195  NeighbourGaps(gaps);
196  *h_min = MIN(gaps[BND_LEFT], gaps[BND_RIGHT]);
197  *h_max = MAX(gaps[BND_LEFT], gaps[BND_RIGHT]);
198  if (*h_max > max_dimension && *h_min < max_dimension) *h_max = *h_min;
199  *v_min = MIN(gaps[BND_ABOVE], gaps[BND_BELOW]);
200  *v_max = MAX(gaps[BND_ABOVE], gaps[BND_BELOW]);
201  if (*v_max > max_dimension && *v_min < max_dimension) *v_max = *v_min;
202 }
203 
204 // NULLs out any neighbours that are DeletableNoise to remove references.
206  for (int dir = 0; dir < BND_COUNT; ++dir) {
207  BLOBNBOX* neighbour = neighbours_[dir];
208  if (neighbour != NULL && neighbour->DeletableNoise()) {
209  neighbours_[dir] = NULL;
210  good_stroke_neighbours_[dir] = false;
211  }
212  }
213 }
214 
215 // Returns positive if there is at least one side neighbour that has a similar
216 // stroke width and is not on the other side of a rule line.
218  int score = 0;
219  for (int dir = 0; dir < BND_COUNT; ++dir) {
220  BlobNeighbourDir bnd = static_cast<BlobNeighbourDir>(dir);
221  if (good_stroke_neighbour(bnd))
222  ++score;
223  }
224  return score;
225 }
226 
227 // Returns the number of side neighbours that are of type BRT_NOISE.
229  int count = 0;
230  for (int dir = 0; dir < BND_COUNT; ++dir) {
231  BlobNeighbourDir bnd = static_cast<BlobNeighbourDir>(dir);
232  BLOBNBOX* blob = neighbour(bnd);
233  if (blob != NULL && blob->region_type() == BRT_NOISE)
234  ++count;
235  }
236  return count;
237 }
238 
239 // Returns true, and sets vert_possible/horz_possible if the blob has some
240 // feature that makes it individually appear to flow one way.
241 // eg if it has a high aspect ratio, yet has a complex shape, such as a
242 // joined word in Latin, Arabic, or Hindi, rather than being a -, I, l, 1 etc.
244  int box_perimeter = 2 * (box.height() + box.width());
245  if (box.width() > box.height() * kDefiniteAspectRatio) {
246  // Attempt to distinguish a wide joined word from a dash.
247  // If it is a dash, then its perimeter is approximately
248  // 2 * (box width + stroke width), but more if the outline is noisy,
249  // so perimeter - 2*(box width + stroke width) should be close to zero.
250  // A complex shape such as a joined word should have a much larger value.
251  int perimeter = cblob()->perimeter();
252  if (vert_stroke_width() > 0)
253  perimeter -= 2 * vert_stroke_width();
254  else
255  perimeter -= 4 * cblob()->area() / perimeter;
256  perimeter -= 2 * box.width();
257  // Use a multiple of the box perimeter as a threshold.
258  if (perimeter > kComplexShapePerimeterRatio * box_perimeter) {
259  set_vert_possible(false);
260  set_horz_possible(true);
261  return true;
262  }
263  }
264  if (box.height() > box.width() * kDefiniteAspectRatio) {
265  // As above, but for a putative vertical word vs a I/1/l.
266  int perimeter = cblob()->perimeter();
267  if (horz_stroke_width() > 0)
268  perimeter -= 2 * horz_stroke_width();
269  else
270  perimeter -= 4 * cblob()->area() / perimeter;
271  perimeter -= 2 * box.height();
272  if (perimeter > kComplexShapePerimeterRatio * box_perimeter) {
273  set_vert_possible(true);
274  set_horz_possible(false);
275  return true;
276  }
277  }
278  return false;
279 }
280 
281 // Returns true if there is no tabstop violation in merging this and other.
282 bool BLOBNBOX::ConfirmNoTabViolation(const BLOBNBOX& other) const {
283  if (box.left() < other.box.left() && box.left() < other.left_rule_)
284  return false;
285  if (other.box.left() < box.left() && other.box.left() < left_rule_)
286  return false;
287  if (box.right() > other.box.right() && box.right() > other.right_rule_)
288  return false;
289  if (other.box.right() > box.right() && other.box.right() > right_rule_)
290  return false;
291  return true;
292 }
293 
294 // Returns true if other has a similar stroke width to this.
296  double fractional_tolerance,
297  double constant_tolerance) const {
298  // The perimeter-based width is used as a backup in case there is
299  // no information in the blob.
300  double p_width = area_stroke_width();
301  double n_p_width = other.area_stroke_width();
302  float h_tolerance = horz_stroke_width_ * fractional_tolerance
303  + constant_tolerance;
304  float v_tolerance = vert_stroke_width_ * fractional_tolerance
305  + constant_tolerance;
306  double p_tolerance = p_width * fractional_tolerance
307  + constant_tolerance;
308  bool h_zero = horz_stroke_width_ == 0.0f || other.horz_stroke_width_ == 0.0f;
309  bool v_zero = vert_stroke_width_ == 0.0f || other.vert_stroke_width_ == 0.0f;
310  bool h_ok = !h_zero && NearlyEqual(horz_stroke_width_,
311  other.horz_stroke_width_, h_tolerance);
312  bool v_ok = !v_zero && NearlyEqual(vert_stroke_width_,
313  other.vert_stroke_width_, v_tolerance);
314  bool p_ok = h_zero && v_zero && NearlyEqual(p_width, n_p_width, p_tolerance);
315  // For a match, at least one of the horizontal and vertical widths
316  // must match, and the other one must either match or be zero.
317  // Only if both are zero will we look at the perimeter metric.
318  return p_ok || ((v_ok || h_ok) && (h_ok || h_zero) && (v_ok || v_zero));
319 }
320 
321 // Returns a bounding box of the outline contained within the
322 // given horizontal range.
323 TBOX BLOBNBOX::BoundsWithinLimits(int left, int right) {
324  FCOORD no_rotation(1.0f, 0.0f);
325  float top = box.top();
326  float bottom = box.bottom();
327  if (cblob_ptr != NULL) {
328  find_cblob_limits(cblob_ptr, static_cast<float>(left),
329  static_cast<float>(right), no_rotation,
330  bottom, top);
331  }
332 
333  if (top < bottom) {
334  top = box.top();
335  bottom = box.bottom();
336  }
337  FCOORD bot_left(left, bottom);
338  FCOORD top_right(right, top);
339  TBOX shrunken_box(bot_left);
340  TBOX shrunken_box2(top_right);
341  shrunken_box += shrunken_box2;
342  return shrunken_box;
343 }
344 
345 // Helper to call CleanNeighbours on all blobs on the list.
346 void BLOBNBOX::CleanNeighbours(BLOBNBOX_LIST* blobs) {
347  BLOBNBOX_IT blob_it(blobs);
348  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
349  blob_it.data()->CleanNeighbours();
350  }
351 }
352 
353 // Helper to delete all the deletable blobs on the list.
354 void BLOBNBOX::DeleteNoiseBlobs(BLOBNBOX_LIST* blobs) {
355  BLOBNBOX_IT blob_it(blobs);
356  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
357  BLOBNBOX* blob = blob_it.data();
358  if (blob->DeletableNoise()) {
359  delete blob->cblob();
360  delete blob_it.extract();
361  }
362  }
363 }
364 
365 #ifndef GRAPHICS_DISABLED
366 // Helper to draw all the blobs on the list in the given body_colour,
367 // with child outlines in the child_colour.
368 void BLOBNBOX::PlotBlobs(BLOBNBOX_LIST* list,
369  ScrollView::Color body_colour,
370  ScrollView::Color child_colour,
371  ScrollView* win) {
372  BLOBNBOX_IT it(list);
373  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
374  it.data()->plot(win, body_colour, child_colour);
375  }
376 }
377 
378 // Helper to draw only DeletableNoise blobs (unowned, BRT_NOISE) on the
379 // given list in the given body_colour, with child outlines in the
380 // child_colour.
381 void BLOBNBOX::PlotNoiseBlobs(BLOBNBOX_LIST* list,
382  ScrollView::Color body_colour,
383  ScrollView::Color child_colour,
384  ScrollView* win) {
385  BLOBNBOX_IT it(list);
386  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
387  BLOBNBOX* blob = it.data();
388  if (blob->DeletableNoise())
389  blob->plot(win, body_colour, child_colour);
390  }
391 }
392 
394  BlobTextFlowType flow_type) {
395  switch (region_type) {
396  case BRT_HLINE:
397  return ScrollView::BROWN;
398  case BRT_VLINE:
399  return ScrollView::DARK_GREEN;
400  case BRT_RECTIMAGE:
401  return ScrollView::RED;
402  case BRT_POLYIMAGE:
403  return ScrollView::ORANGE;
404  case BRT_UNKNOWN:
405  return flow_type == BTFT_NONTEXT ? ScrollView::CYAN : ScrollView::WHITE;
406  case BRT_VERT_TEXT:
407  if (flow_type == BTFT_STRONG_CHAIN || flow_type == BTFT_TEXT_ON_IMAGE)
408  return ScrollView::GREEN;
409  if (flow_type == BTFT_CHAIN)
410  return ScrollView::LIME_GREEN;
411  return ScrollView::YELLOW;
412  case BRT_TEXT:
413  if (flow_type == BTFT_STRONG_CHAIN)
414  return ScrollView::BLUE;
415  if (flow_type == BTFT_TEXT_ON_IMAGE)
416  return ScrollView::LIGHT_BLUE;
417  if (flow_type == BTFT_CHAIN)
419  if (flow_type == BTFT_LEADER)
420  return ScrollView::WHEAT;
421  if (flow_type == BTFT_NONTEXT)
422  return ScrollView::PINK;
423  return ScrollView::MAGENTA;
424  default:
425  return ScrollView::GREY;
426  }
427 }
428 
429 // Keep in sync with BlobRegionType.
431  return TextlineColor(region_type_, flow_);
432 }
433 
434 void BLOBNBOX::plot(ScrollView* window, // window to draw in
435  ScrollView::Color blob_colour, // for outer bits
436  ScrollView::Color child_colour) { // for holes
437  if (cblob_ptr != NULL)
438  cblob_ptr->plot(window, blob_colour, child_colour);
439 }
440 #endif
441 /**********************************************************************
442  * find_cblob_limits
443  *
444  * Scan the outlines of the cblob to locate the y min and max
445  * between the given x limits.
446  **********************************************************************/
447 
448 void find_cblob_limits( //get y limits
449  C_BLOB *blob, //blob to search
450  float leftx, //x limits
451  float rightx,
452  FCOORD rotation, //for landscape
453  float &ymin, //output y limits
454  float &ymax) {
455  inT16 stepindex; //current point
456  ICOORD pos; //current coords
457  ICOORD vec; //rotated step
458  C_OUTLINE *outline; //current outline
459  //outlines
460  C_OUTLINE_IT out_it = blob->out_list ();
461 
462  ymin = (float) MAX_INT32;
463  ymax = (float) -MAX_INT32;
464  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
465  outline = out_it.data ();
466  pos = outline->start_pos (); //get coords
467  pos.rotate (rotation);
468  for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) {
469  //inside
470  if (pos.x () >= leftx && pos.x () <= rightx) {
471  UpdateRange(pos.y(), &ymin, &ymax);
472  }
473  vec = outline->step (stepindex);
474  vec.rotate (rotation);
475  pos += vec; //move to next
476  }
477  }
478 }
479 
480 
481 /**********************************************************************
482  * find_cblob_vlimits
483  *
484  * Scan the outlines of the cblob to locate the y min and max
485  * between the given x limits.
486  **********************************************************************/
487 
488 void find_cblob_vlimits( //get y limits
489  C_BLOB *blob, //blob to search
490  float leftx, //x limits
491  float rightx,
492  float &ymin, //output y limits
493  float &ymax) {
494  inT16 stepindex; //current point
495  ICOORD pos; //current coords
496  ICOORD vec; //rotated step
497  C_OUTLINE *outline; //current outline
498  //outlines
499  C_OUTLINE_IT out_it = blob->out_list ();
500 
501  ymin = (float) MAX_INT32;
502  ymax = (float) -MAX_INT32;
503  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
504  outline = out_it.data ();
505  pos = outline->start_pos (); //get coords
506  for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) {
507  //inside
508  if (pos.x () >= leftx && pos.x () <= rightx) {
509  UpdateRange(pos.y(), &ymin, &ymax);
510  }
511  vec = outline->step (stepindex);
512  pos += vec; //move to next
513  }
514  }
515 }
516 
517 
518 /**********************************************************************
519  * find_cblob_hlimits
520  *
521  * Scan the outlines of the cblob to locate the x min and max
522  * between the given y limits.
523  **********************************************************************/
524 
525 void find_cblob_hlimits( //get x limits
526  C_BLOB *blob, //blob to search
527  float bottomy, //y limits
528  float topy,
529  float &xmin, //output x limits
530  float &xmax) {
531  inT16 stepindex; //current point
532  ICOORD pos; //current coords
533  ICOORD vec; //rotated step
534  C_OUTLINE *outline; //current outline
535  //outlines
536  C_OUTLINE_IT out_it = blob->out_list ();
537 
538  xmin = (float) MAX_INT32;
539  xmax = (float) -MAX_INT32;
540  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
541  outline = out_it.data ();
542  pos = outline->start_pos (); //get coords
543  for (stepindex = 0; stepindex < outline->pathlength (); stepindex++) {
544  //inside
545  if (pos.y () >= bottomy && pos.y () <= topy) {
546  UpdateRange(pos.x(), &xmin, &xmax);
547  }
548  vec = outline->step (stepindex);
549  pos += vec; //move to next
550  }
551  }
552 }
553 
554 /**********************************************************************
555  * crotate_cblob
556  *
557  * Rotate the copy by the given vector and return a C_BLOB.
558  **********************************************************************/
559 
560 C_BLOB *crotate_cblob( //rotate it
561  C_BLOB *blob, //blob to search
562  FCOORD rotation //for landscape
563  ) {
564  C_OUTLINE_LIST out_list; //output outlines
565  //input outlines
566  C_OUTLINE_IT in_it = blob->out_list ();
567  //output outlines
568  C_OUTLINE_IT out_it = &out_list;
569 
570  for (in_it.mark_cycle_pt (); !in_it.cycled_list (); in_it.forward ()) {
571  out_it.add_after_then_move (new C_OUTLINE (in_it.data (), rotation));
572  }
573  return new C_BLOB (&out_list);
574 }
575 
576 
577 /**********************************************************************
578  * box_next
579  *
580  * Compute the bounding box of this blob with merging of x overlaps
581  * but no pre-chopping.
582  * Then move the iterator on to the start of the next blob.
583  **********************************************************************/
584 
585 TBOX box_next( //get bounding box
586  BLOBNBOX_IT *it //iterator to blobds
587  ) {
588  BLOBNBOX *blob; //current blob
589  TBOX result; //total box
590 
591  blob = it->data ();
592  result = blob->bounding_box ();
593  do {
594  it->forward ();
595  blob = it->data ();
596  if (blob->cblob() == NULL)
597  //was pre-chopped
598  result += blob->bounding_box ();
599  }
600  //until next real blob
601  while ((blob->cblob() == NULL) || blob->joined_to_prev());
602  return result;
603 }
604 
605 
606 /**********************************************************************
607  * box_next_pre_chopped
608  *
609  * Compute the bounding box of this blob with merging of x overlaps
610  * but WITH pre-chopping.
611  * Then move the iterator on to the start of the next pre-chopped blob.
612  **********************************************************************/
613 
614 TBOX box_next_pre_chopped( //get bounding box
615  BLOBNBOX_IT *it //iterator to blobds
616  ) {
617  BLOBNBOX *blob; //current blob
618  TBOX result; //total box
619 
620  blob = it->data ();
621  result = blob->bounding_box ();
622  do {
623  it->forward ();
624  blob = it->data ();
625  }
626  //until next real blob
627  while (blob->joined_to_prev ());
628  return result;
629 }
630 
631 
632 /**********************************************************************
633  * TO_ROW::TO_ROW
634  *
635  * Constructor to make a row from a blob.
636  **********************************************************************/
637 
638 TO_ROW::TO_ROW ( //constructor
639 BLOBNBOX * blob, //first blob
640 float top, //corrected top
641 float bottom, //of row
642 float row_size //ideal
643 ) {
644  clear();
645  y_min = bottom;
646  y_max = top;
647  initial_y_min = bottom;
648 
649  float diff; //in size
650  BLOBNBOX_IT it = &blobs; //list of blobs
651 
652  it.add_to_end (blob);
653  diff = top - bottom - row_size;
654  if (diff > 0) {
655  y_max -= diff / 2;
656  y_min += diff / 2;
657  }
658  //very small object
659  else if ((top - bottom) * 3 < row_size) {
660  diff = row_size / 3 + bottom - top;
661  y_max += diff / 2;
662  y_min -= diff / 2;
663  }
664 }
665 
666 
667 /**********************************************************************
668  * TO_ROW:add_blob
669  *
670  * Add the blob to the end of the row.
671  **********************************************************************/
672 
673 void TO_ROW::add_blob( //constructor
674  BLOBNBOX *blob, //first blob
675  float top, //corrected top
676  float bottom, //of row
677  float row_size //ideal
678  ) {
679  float allowed; //allowed expansion
680  float available; //expansion
681  BLOBNBOX_IT it = &blobs; //list of blobs
682 
683  it.add_to_end (blob);
684  allowed = row_size + y_min - y_max;
685  if (allowed > 0) {
686  available = top > y_max ? top - y_max : 0;
687  if (bottom < y_min)
688  //total available
689  available += y_min - bottom;
690  if (available > 0) {
691  available += available; //do it gradually
692  if (available < allowed)
693  available = allowed;
694  if (bottom < y_min)
695  y_min -= (y_min - bottom) * allowed / available;
696  if (top > y_max)
697  y_max += (top - y_max) * allowed / available;
698  }
699  }
700 }
701 
702 
703 /**********************************************************************
704  * TO_ROW:insert_blob
705  *
706  * Add the blob to the row in the correct position.
707  **********************************************************************/
708 
709 void TO_ROW::insert_blob( //constructor
710  BLOBNBOX *blob //first blob
711  ) {
712  BLOBNBOX_IT it = &blobs; //list of blobs
713 
714  if (it.empty ())
715  it.add_before_then_move (blob);
716  else {
717  it.mark_cycle_pt ();
718  while (!it.cycled_list ()
719  && it.data ()->bounding_box ().left () <=
720  blob->bounding_box ().left ())
721  it.forward ();
722  if (it.cycled_list ())
723  it.add_to_end (blob);
724  else
725  it.add_before_stay_put (blob);
726  }
727 }
728 
729 
730 /**********************************************************************
731  * TO_ROW::compute_vertical_projection
732  *
733  * Compute the vertical projection of a TO_ROW from its blobs.
734  **********************************************************************/
735 
736 void TO_ROW::compute_vertical_projection() { //project whole row
737  TBOX row_box; //bound of row
738  BLOBNBOX *blob; //current blob
739  TBOX blob_box; //bounding box
740  BLOBNBOX_IT blob_it = blob_list ();
741 
742  if (blob_it.empty ())
743  return;
744  row_box = blob_it.data ()->bounding_box ();
745  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ())
746  row_box += blob_it.data ()->bounding_box ();
747 
749  row_box.right () + PROJECTION_MARGIN);
750  projection_left = row_box.left () - PROJECTION_MARGIN;
752  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
753  blob = blob_it.data();
754  if (blob->cblob() != NULL)
756  }
757 }
758 
759 
760 /**********************************************************************
761  * TO_ROW::clear
762  *
763  * Zero out all scalar members.
764  **********************************************************************/
765 void TO_ROW::clear() {
766  all_caps = 0;
767  used_dm_model = 0;
768  projection_left = 0;
769  projection_right = 0;
771  fixed_pitch = 0.0;
772  fp_space = 0.0;
773  fp_nonsp = 0.0;
774  pr_space = 0.0;
775  pr_nonsp = 0.0;
776  spacing = 0.0;
777  xheight = 0.0;
778  xheight_evidence = 0;
779  body_size = 0.0;
780  ascrise = 0.0;
781  descdrop = 0.0;
782  min_space = 0;
783  max_nonspace = 0;
784  space_threshold = 0;
785  kern_size = 0.0;
786  space_size = 0.0;
787  y_min = 0.0;
788  y_max = 0.0;
789  initial_y_min = 0.0;
790  m = 0.0;
791  c = 0.0;
792  error = 0.0;
793  para_c = 0.0;
794  para_error = 0.0;
795  y_origin = 0.0;
796  credibility = 0.0;
797  num_repeated_sets_ = -1;
798 }
799 
800 
801 /**********************************************************************
802  * vertical_cblob_projection
803  *
804  * Compute the vertical projection of a cblob from its outlines
805  * and add to the given STATS.
806  **********************************************************************/
807 
808 void vertical_cblob_projection( //project outlines
809  C_BLOB *blob, //blob to project
810  STATS *stats //output
811  ) {
812  //outlines of blob
813  C_OUTLINE_IT out_it = blob->out_list ();
814 
815  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
816  vertical_coutline_projection (out_it.data (), stats);
817  }
818 }
819 
820 
821 /**********************************************************************
822  * vertical_coutline_projection
823  *
824  * Compute the vertical projection of a outline from its outlines
825  * and add to the given STATS.
826  **********************************************************************/
827 
828 void vertical_coutline_projection( //project outlines
829  C_OUTLINE *outline, //outline to project
830  STATS *stats //output
831  ) {
832  ICOORD pos; //current point
833  ICOORD step; //edge step
834  inT32 length; //of outline
835  inT16 stepindex; //current step
836  C_OUTLINE_IT out_it = outline->child ();
837 
838  pos = outline->start_pos ();
839  length = outline->pathlength ();
840  for (stepindex = 0; stepindex < length; stepindex++) {
841  step = outline->step (stepindex);
842  if (step.x () > 0) {
843  stats->add (pos.x (), -pos.y ());
844  } else if (step.x () < 0) {
845  stats->add (pos.x () - 1, pos.y ());
846  }
847  pos += step;
848  }
849 
850  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
851  vertical_coutline_projection (out_it.data (), stats);
852  }
853 }
854 
855 
856 /**********************************************************************
857  * TO_BLOCK::TO_BLOCK
858  *
859  * Constructor to make a TO_BLOCK from a real block.
860  **********************************************************************/
861 
862 TO_BLOCK::TO_BLOCK( //make a block
863  BLOCK *src_block //real block
864  ) {
865  clear();
866  block = src_block;
867 }
868 
869 static void clear_blobnboxes(BLOBNBOX_LIST* boxes) {
870  BLOBNBOX_IT it = boxes;
871  // A BLOBNBOX generally doesn't own its blobs, so if they do, you
872  // have to delete them explicitly.
873  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
874  BLOBNBOX* box = it.data();
875  if (box->cblob() != NULL)
876  delete box->cblob();
877  }
878 }
879 
880 /**********************************************************************
881  * TO_BLOCK::clear
882  *
883  * Zero out all scalar members.
884  **********************************************************************/
886  block = NULL;
888  line_spacing = 0.0;
889  line_size = 0.0;
890  max_blob_size = 0.0;
891  baseline_offset = 0.0;
892  xheight = 0.0;
893  fixed_pitch = 0.0;
894  kern_size = 0.0;
895  space_size = 0.0;
896  min_space = 0;
897  max_nonspace = 0;
898  fp_space = 0.0;
899  fp_nonsp = 0.0;
900  pr_space = 0.0;
901  pr_nonsp = 0.0;
902  key_row = NULL;
903 }
904 
905 
907  // Any residual BLOBNBOXes at this stage own their blobs, so delete them.
908  clear_blobnboxes(&blobs);
909  clear_blobnboxes(&underlines);
910  clear_blobnboxes(&noise_blobs);
911  clear_blobnboxes(&small_blobs);
912  clear_blobnboxes(&large_blobs);
913 }
914 
915 // Helper function to divide the input blobs over noise, small, medium
916 // and large lists. Blobs small in height and (small in width or large in width)
917 // go in the noise list. Dash (-) candidates go in the small list, and
918 // medium and large are by height.
919 // SIDE-EFFECT: reset all blobs to initial state by calling Init().
920 static void SizeFilterBlobs(int min_height, int max_height,
921  BLOBNBOX_LIST* src_list,
922  BLOBNBOX_LIST* noise_list,
923  BLOBNBOX_LIST* small_list,
924  BLOBNBOX_LIST* medium_list,
925  BLOBNBOX_LIST* large_list) {
926  BLOBNBOX_IT noise_it(noise_list);
927  BLOBNBOX_IT small_it(small_list);
928  BLOBNBOX_IT medium_it(medium_list);
929  BLOBNBOX_IT large_it(large_list);
930  for (BLOBNBOX_IT src_it(src_list); !src_it.empty(); src_it.forward()) {
931  BLOBNBOX* blob = src_it.extract();
932  blob->ReInit();
933  int width = blob->bounding_box().width();
934  int height = blob->bounding_box().height();
935  if (height < min_height &&
936  (width < min_height || width > max_height))
937  noise_it.add_after_then_move(blob);
938  else if (height > max_height)
939  large_it.add_after_then_move(blob);
940  else if (height < min_height)
941  small_it.add_after_then_move(blob);
942  else
943  medium_it.add_after_then_move(blob);
944  }
945 }
946 
947 // Reorganize the blob lists with a different definition of small, medium
948 // and large, compared to the original definition.
949 // Height is still the primary filter key, but medium width blobs of small
950 // height become small, and very wide blobs of small height stay noise, along
951 // with small dot-shaped blobs.
953  int min_height = IntCastRounded(kMinMediumSizeRatio * line_size);
954  int max_height = IntCastRounded(kMaxMediumSizeRatio * line_size);
955  BLOBNBOX_LIST noise_list;
956  BLOBNBOX_LIST small_list;
957  BLOBNBOX_LIST medium_list;
958  BLOBNBOX_LIST large_list;
959  SizeFilterBlobs(min_height, max_height, &blobs,
960  &noise_list, &small_list, &medium_list, &large_list);
961  SizeFilterBlobs(min_height, max_height, &large_blobs,
962  &noise_list, &small_list, &medium_list, &large_list);
963  SizeFilterBlobs(min_height, max_height, &small_blobs,
964  &noise_list, &small_list, &medium_list, &large_list);
965  SizeFilterBlobs(min_height, max_height, &noise_blobs,
966  &noise_list, &small_list, &medium_list, &large_list);
967  BLOBNBOX_IT blob_it(&blobs);
968  blob_it.add_list_after(&medium_list);
969  blob_it.set_to_list(&large_blobs);
970  blob_it.add_list_after(&large_list);
971  blob_it.set_to_list(&small_blobs);
972  blob_it.add_list_after(&small_list);
973  blob_it.set_to_list(&noise_blobs);
974  blob_it.add_list_after(&noise_list);
975 }
976 
977 // Deletes noise blobs from all lists where not owned by a ColPartition.
987 }
988 
989 #ifndef GRAPHICS_DISABLED
990 // Draw the noise blobs from all lists in red.
996 }
997 
998 // Draw the blobs on the various lists in the block in different colors.
1002  win);
1004  win);
1006 }
1007 
1008 /**********************************************************************
1009  * plot_blob_list
1010  *
1011  * Draw a list of blobs.
1012  **********************************************************************/
1013 
1014 void plot_blob_list(ScrollView* win, // window to draw in
1015  BLOBNBOX_LIST *list, // blob list
1016  ScrollView::Color body_colour, // colour to draw
1017  ScrollView::Color child_colour) { // colour of child
1018  BLOBNBOX_IT it = list;
1019  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1020  it.data()->plot(win, body_colour, child_colour);
1021  }
1022 }
1023 #endif // GRAPHICS_DISABLED