"Fossies" - the Fresh Open Source Software Archive

Member "tesseract-5.2.0/src/textord/tabvector.h" (6 Jul 2022, 15193 Bytes) of package /linux/misc/tesseract-5.2.0.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "tabvector.h" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 4.1.3_vs_5.0.0.

    1 ///////////////////////////////////////////////////////////////////////
    2 // File:        tabvector.h
    3 // Description: Class to hold a near-vertical vector representing a tab-stop.
    4 // Author:      Ray Smith
    5 //
    6 // (C) Copyright 2008, Google Inc.
    7 // Licensed under the Apache License, Version 2.0 (the "License");
    8 // you may not use this file except in compliance with the License.
    9 // You may obtain a copy of the License at
   10 // http://www.apache.org/licenses/LICENSE-2.0
   11 // Unless required by applicable law or agreed to in writing, software
   12 // distributed under the License is distributed on an "AS IS" BASIS,
   13 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14 // See the License for the specific language governing permissions and
   15 // limitations under the License.
   16 //
   17 ///////////////////////////////////////////////////////////////////////
   18 
   19 #ifndef TESSERACT_TEXTORD_TABVECTOR_H_
   20 #define TESSERACT_TEXTORD_TABVECTOR_H_
   21 
   22 #include "bbgrid.h"
   23 #include "blobgrid.h"
   24 #include "clst.h"
   25 #include "elst.h"
   26 #include "elst2.h"
   27 #include "rect.h"
   28 
   29 #include <algorithm>
   30 
   31 class BLOBNBOX;
   32 class ScrollView;
   33 
   34 namespace tesseract {
   35 
   36 extern double_VAR_H(textord_tabvector_vertical_gap_fraction);
   37 extern double_VAR_H(textord_tabvector_vertical_box_ratio);
   38 
   39 // The alignment type that a tab vector represents.
   40 // Keep this enum synced with kAlignmentNames in tabvector.cpp.
   41 enum TabAlignment {
   42   TA_LEFT_ALIGNED,
   43   TA_LEFT_RAGGED,
   44   TA_CENTER_JUSTIFIED,
   45   TA_RIGHT_ALIGNED,
   46   TA_RIGHT_RAGGED,
   47   TA_SEPARATOR,
   48   TA_COUNT
   49 };
   50 
   51 // Forward declarations. The classes use their own list types, so we
   52 // need to make the list types first.
   53 class TabFind;
   54 class TabVector;
   55 class TabConstraint;
   56 
   57 ELIST2IZEH(TabVector)
   58 CLISTIZEH(TabVector)
   59 ELISTIZEH(TabConstraint)
   60 
   61 // TabConstraint is a totally self-contained class to maintain
   62 // a list of [min,max] constraints, each referring to a TabVector.
   63 // The constraints are manipulated through static methods that act
   64 // on a list of constraints. The list itself is cooperatively owned
   65 // by the TabVectors of the constraints on the list and managed
   66 // by implicit reference counting via the elements of the list.
   67 class TabConstraint : public ELIST_LINK {
   68 public:
   69   // This empty constructor is here only so that the class can be ELISTIZED.
   70   // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier
   71   // and eliminate CLASSNAME##_copier.
   72   TabConstraint() = default;
   73 
   74   // Create a constraint for the top or bottom of this TabVector.
   75   static void CreateConstraint(TabVector *vector, bool is_top);
   76 
   77   // Test to see if the constraints are compatible enough to merge.
   78   static bool CompatibleConstraints(TabConstraint_LIST *list1, TabConstraint_LIST *list2);
   79 
   80   // Merge the lists of constraints and update the TabVector pointers.
   81   // The second list is deleted.
   82   static void MergeConstraints(TabConstraint_LIST *list1, TabConstraint_LIST *list2);
   83 
   84   // Set all the tops and bottoms as appropriate to a mean of the
   85   // constrained range. Delete all the constraints and list.
   86   static void ApplyConstraints(TabConstraint_LIST *constraints);
   87 
   88 private:
   89   TabConstraint(TabVector *vector, bool is_top);
   90 
   91   // Get the max of the mins and the min of the maxes.
   92   static void GetConstraints(TabConstraint_LIST *constraints, int *y_min, int *y_max);
   93 
   94   // The TabVector this constraint applies to.
   95   TabVector *vector_;
   96   // If true then we refer to the top of the vector_.
   97   bool is_top_;
   98   // The allowed range of this vector_.
   99   int y_min_;
  100   int y_max_;
  101 };
  102 
  103 // Class to hold information about a single vector
  104 // that represents a tab stop or a rule line.
  105 class TabVector : public ELIST2_LINK {
  106 public:
  107   // TODO(rays) fix this in elst.h line 1076, where it should use the
  108   // copy constructor instead of operator=.
  109   TabVector() = default;
  110   ~TabVector() = default;
  111 
  112   // Public factory to build a TabVector from a list of boxes.
  113   // The TabVector will be of the given alignment type.
  114   // The input vertical vector is used in fitting, and the output
  115   // vertical_x, vertical_y have the resulting line vector added to them
  116   // if the alignment is not ragged.
  117   // The extended_start_y and extended_end_y are the maximum possible
  118   // extension to the line segment that can be used to align with others.
  119   // The input CLIST of BLOBNBOX good_points is consumed and taken over.
  120   static TabVector *FitVector(TabAlignment alignment, ICOORD vertical, int extended_start_y,
  121                               int extended_end_y, BLOBNBOX_CLIST *good_points, int *vertical_x,
  122                               int *vertical_y);
  123 
  124   // Build a ragged TabVector by copying another's direction, shifting it
  125   // to match the given blob, and making its initial extent the height
  126   // of the blob, but its extended bounds from the bounds of the original.
  127   TabVector(const TabVector &src, TabAlignment alignment, const ICOORD &vertical_skew,
  128             BLOBNBOX *blob);
  129 
  130   // Copies basic attributes of a tab vector for simple operations.
  131   // Copies things such startpt, endpt, range, width.
  132   // Does not copy things such as partners, boxes, or constraints.
  133   // This is useful if you only need vector information for processing, such
  134   // as in the table detection code.
  135   TabVector *ShallowCopy() const;
  136 
  137   // Simple accessors.
  138   const ICOORD &startpt() const {
  139     return startpt_;
  140   }
  141   const ICOORD &endpt() const {
  142     return endpt_;
  143   }
  144   int extended_ymax() const {
  145     return extended_ymax_;
  146   }
  147   int extended_ymin() const {
  148     return extended_ymin_;
  149   }
  150   int sort_key() const {
  151     return sort_key_;
  152   }
  153   int mean_width() const {
  154     return mean_width_;
  155   }
  156   void set_top_constraints(TabConstraint_LIST *constraints) {
  157     top_constraints_ = constraints;
  158   }
  159   void set_bottom_constraints(TabConstraint_LIST *constraints) {
  160     bottom_constraints_ = constraints;
  161   }
  162   TabVector_CLIST *partners() {
  163     return &partners_;
  164   }
  165   void set_startpt(const ICOORD &start) {
  166     startpt_ = start;
  167   }
  168   void set_endpt(const ICOORD &end) {
  169     endpt_ = end;
  170   }
  171   bool intersects_other_lines() const {
  172     return intersects_other_lines_;
  173   }
  174   void set_intersects_other_lines(bool value) {
  175     intersects_other_lines_ = value;
  176   }
  177 
  178   // Inline quasi-accessors that require some computation.
  179 
  180   // Compute the x coordinate at the given y coordinate.
  181   int XAtY(int y) const {
  182     int height = endpt_.y() - startpt_.y();
  183     if (height != 0) {
  184       return (y - startpt_.y()) * (endpt_.x() - startpt_.x()) / height + startpt_.x();
  185     } else {
  186       return startpt_.x();
  187     }
  188   }
  189 
  190   // Compute the vertical overlap with the other TabVector.
  191   int VOverlap(const TabVector &other) const {
  192     return std::min(other.endpt_.y(), endpt_.y()) - std::max(other.startpt_.y(), startpt_.y());
  193   }
  194   // Compute the vertical overlap with the given y bounds.
  195   int VOverlap(int top_y, int bottom_y) const {
  196     return std::min(top_y, static_cast<int>(endpt_.y())) -
  197            std::max(bottom_y, static_cast<int>(startpt_.y()));
  198   }
  199   // Compute the extended vertical overlap with the given y bounds.
  200   int ExtendedOverlap(int top_y, int bottom_y) const {
  201     return std::min(top_y, extended_ymax_) - std::max(bottom_y, extended_ymin_);
  202   }
  203 
  204   // Return true if this is a left tab stop, either aligned, or ragged.
  205   bool IsLeftTab() const {
  206     return alignment_ == TA_LEFT_ALIGNED || alignment_ == TA_LEFT_RAGGED;
  207   }
  208   // Return true if this is a right tab stop, either aligned, or ragged.
  209   bool IsRightTab() const {
  210     return alignment_ == TA_RIGHT_ALIGNED || alignment_ == TA_RIGHT_RAGGED;
  211   }
  212   // Return true if this is a separator.
  213   bool IsSeparator() const {
  214     return alignment_ == TA_SEPARATOR;
  215   }
  216   // Return true if this is a center aligned tab stop.
  217   bool IsCenterTab() const {
  218     return alignment_ == TA_CENTER_JUSTIFIED;
  219   }
  220   // Return true if this is a ragged tab top, either left or right.
  221   bool IsRagged() const {
  222     return alignment_ == TA_LEFT_RAGGED || alignment_ == TA_RIGHT_RAGGED;
  223   }
  224 
  225   // Return true if this vector is to the left of the other in terms
  226   // of sort_key_.
  227   bool IsLeftOf(const TabVector &other) const {
  228     return sort_key_ < other.sort_key_;
  229   }
  230 
  231   // Return true if the vector has no partners.
  232   bool Partnerless() {
  233     return partners_.empty();
  234   }
  235 
  236   // Return the number of tab boxes in this vector.
  237   int BoxCount() {
  238     return boxes_.length();
  239   }
  240 
  241   // Lock the vector from refits by clearing the boxes_ list.
  242   void Freeze() {
  243     boxes_.shallow_clear();
  244   }
  245 
  246   // Flip x and y on the ends so a vector can be created from flipped input.
  247   void XYFlip() {
  248     int x = startpt_.y();
  249     startpt_.set_y(startpt_.x());
  250     startpt_.set_x(x);
  251     x = endpt_.y();
  252     endpt_.set_y(endpt_.x());
  253     endpt_.set_x(x);
  254   }
  255 
  256   // Reflect the tab vector in the y-axis.
  257   void ReflectInYAxis() {
  258     startpt_.set_x(-startpt_.x());
  259     endpt_.set_x(-endpt_.x());
  260     sort_key_ = -sort_key_;
  261     if (alignment_ == TA_LEFT_ALIGNED) {
  262       alignment_ = TA_RIGHT_ALIGNED;
  263     } else if (alignment_ == TA_RIGHT_ALIGNED) {
  264       alignment_ = TA_LEFT_ALIGNED;
  265     }
  266     if (alignment_ == TA_LEFT_RAGGED) {
  267       alignment_ = TA_RIGHT_RAGGED;
  268     } else if (alignment_ == TA_RIGHT_RAGGED) {
  269       alignment_ = TA_LEFT_RAGGED;
  270     }
  271   }
  272 
  273   // Separate function to compute the sort key for a given coordinate pair.
  274   static int SortKey(const ICOORD &vertical, int x, int y) {
  275     ICOORD pt(x, y);
  276     return pt * vertical;
  277   }
  278 
  279   // Return the x at the given y for the given sort key.
  280   static int XAtY(const ICOORD &vertical, int sort_key, int y) {
  281     if (vertical.y() != 0) {
  282       return (vertical.x() * y + sort_key) / vertical.y();
  283     } else {
  284       return sort_key;
  285     }
  286   }
  287 
  288   // Sort function for E2LIST::sort to sort by sort_key_.
  289   static int SortVectorsByKey(const void *v1, const void *v2) {
  290     const TabVector *tv1 = *static_cast<const TabVector *const *>(v1);
  291     const TabVector *tv2 = *static_cast<const TabVector *const *>(v2);
  292     return tv1->sort_key_ - tv2->sort_key_;
  293   }
  294 
  295   // More complex members.
  296 
  297   // Extend this vector to include the supplied blob if it doesn't
  298   // already have it.
  299   void ExtendToBox(BLOBNBOX *blob);
  300 
  301   // Set the ycoord of the start and move the xcoord to match.
  302   void SetYStart(int start_y);
  303   // Set the ycoord of the end and move the xcoord to match.
  304   void SetYEnd(int end_y);
  305 
  306   // Rotate the ends by the given vector.
  307   void Rotate(const FCOORD &rotation);
  308 
  309   // Setup the initial constraints, being the limits of
  310   // the vector and the extended ends.
  311   void SetupConstraints();
  312 
  313   // Setup the constraints between the partners of this TabVector.
  314   void SetupPartnerConstraints();
  315 
  316   // Setup the constraints between this and its partner.
  317   void SetupPartnerConstraints(TabVector *partner);
  318 
  319   // Use the constraints to modify the top and bottom.
  320   void ApplyConstraints();
  321 
  322   // Merge close tab vectors of the same side that overlap.
  323   static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors,
  324                                      BlobGrid *grid);
  325 
  326   // Return true if this vector is the same side, overlaps, and close
  327   // enough to the other to be merged.
  328   bool SimilarTo(const ICOORD &vertical, const TabVector &other, BlobGrid *grid) const;
  329 
  330   // Eat the other TabVector into this and delete it.
  331   void MergeWith(const ICOORD &vertical, TabVector *other);
  332 
  333   // Add a new element to the list of partner TabVectors.
  334   // Partners must be added in order of increasing y coordinate of the text line
  335   // that makes them partners.
  336   // Groups of identical partners are merged into one.
  337   void AddPartner(TabVector *partner);
  338 
  339   // Return true if other is a partner of this.
  340   bool IsAPartner(const TabVector *other);
  341 
  342   // Print basic information about this tab vector.
  343   void Print(const char *prefix);
  344 
  345   // Print basic information about this tab vector and every box in it.
  346   void Debug(const char *prefix);
  347 
  348   // Draw this tabvector in place in the given window.
  349   void Display(ScrollView *tab_win);
  350 
  351   // Refit the line and/or re-evaluate the vector if the dirty flags are set.
  352   void FitAndEvaluateIfNeeded(const ICOORD &vertical, TabFind *finder);
  353 
  354   // Evaluate the vector in terms of coverage of its length by good-looking
  355   // box edges. A good looking box is one where its nearest neighbour on the
  356   // inside is nearer than half the distance its nearest neighbour on the
  357   // outside of the putative column. Bad boxes are removed from the line.
  358   // A second pass then further filters boxes by requiring that the gutter
  359   // width be a minimum fraction of the mean gutter along the line.
  360   void Evaluate(const ICOORD &vertical, TabFind *finder);
  361 
  362   // (Re)Fit a line to the stored points. Returns false if the line
  363   // is degenerate. Although the TabVector code mostly doesn't care about the
  364   // direction of lines, XAtY would give silly results for a horizontal line.
  365   // The class is mostly aimed at use for vertical lines representing
  366   // horizontal tab stops.
  367   bool Fit(ICOORD vertical, bool force_parallel);
  368 
  369   // Return the partner of this TabVector if the vector qualifies as
  370   // being a vertical text line, otherwise nullptr.
  371   TabVector *VerticalTextlinePartner();
  372 
  373   // Return the matching tabvector if there is exactly one partner, or
  374   // nullptr otherwise.  This can be used after matching is done, eg. by
  375   // VerticalTextlinePartner(), without checking if the line is vertical.
  376   TabVector *GetSinglePartner();
  377 
  378 private:
  379   // Constructor is private as the static factory is the external way
  380   // to build a TabVector.
  381   TabVector(int extended_ymin, int extended_ymax, TabAlignment alignment, BLOBNBOX_CLIST *boxes);
  382 
  383   // Delete this, but first, repoint all the partners to point to
  384   // replacement. If replacement is nullptr, then partner relationships
  385   // are removed.
  386   void Delete(TabVector *replacement);
  387 
  388 private:
  389   // The bottom of the tab line.
  390   ICOORD startpt_;
  391   // The top of the tab line.
  392   ICOORD endpt_;
  393   // The lowest y that the vector might extend to.
  394   int extended_ymin_ = 0;
  395   // The highest y that the vector might extend to.
  396   int extended_ymax_ = 0;
  397   // Perpendicular distance of vector from a given vertical for sorting.
  398   int sort_key_ = 0;
  399   // Result of Evaluate 0-100. Coverage of line with good boxes.
  400   int percent_score_ = 0;
  401   // The mean width of the blobs. Meaningful only for separator lines.
  402   int mean_width_ = 0;
  403   // True if the boxes_ list has been modified, so a refit is needed.
  404   bool needs_refit_ = false;
  405   // True if a fit has been done, so re-evaluation is needed.
  406   bool needs_evaluation_ = false;
  407   // True if a separator line intersects at least 2 other lines.
  408   bool intersects_other_lines_ = false;
  409   // The type of this TabVector.
  410   TabAlignment alignment_ = TA_LEFT_ALIGNED;
  411   // The list of boxes whose edges are aligned at this TabVector.
  412   BLOBNBOX_CLIST boxes_;
  413   // List of TabVectors that have a connection with this via a text line.
  414   TabVector_CLIST partners_;
  415   // Constraints used to resolve the exact location of the top and bottom
  416   // of the tab line.
  417   TabConstraint_LIST *top_constraints_ = nullptr;
  418   TabConstraint_LIST *bottom_constraints_ = nullptr;
  419 };
  420 
  421 } // namespace tesseract.
  422 
  423 #endif // TESSERACT_TEXTORD_TABVECTOR_H_