"Fossies" - the Fresh Open Source Software Archive

Member "tesseract-5.2.0/src/classify/kdtree.cpp" (6 Jul 2022, 16377 Bytes) of package /linux/misc/tesseract-5.2.0.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "kdtree.cpp" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 4.1.3_vs_5.0.0.

    1 /******************************************************************************
    2  **  Filename:  kdtree.cpp
    3  **  Purpose:   Routines for managing K-D search trees
    4  **  Author:    Dan Johnson
    5  **
    6  **  (c) Copyright Hewlett-Packard Company, 1988.
    7  ** Licensed under the Apache License, Version 2.0 (the "License");
    8  ** you may not use this file except in compliance with the License.
    9  ** You may obtain a copy of the License at
   10  ** http://www.apache.org/licenses/LICENSE-2.0
   11  ** Unless required by applicable law or agreed to in writing, software
   12  ** distributed under the License is distributed on an "AS IS" BASIS,
   13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14  ** See the License for the specific language governing permissions and
   15  ** limitations under the License.
   16  ******************************************************************************/
   17 
   18 /*-----------------------------------------------------------------------------
   19           Include Files and Type Defines
   20 -----------------------------------------------------------------------------*/
   21 #include "kdtree.h"
   22 
   23 #include <algorithm>
   24 #include <cfloat> // for FLT_MAX
   25 #include <cmath>
   26 #include <cstdio>
   27 
   28 namespace tesseract {
   29 
   30 #define Magnitude(X) ((X) < 0 ? -(X) : (X))
   31 #define NodeFound(N, K, D) (((N)->Key == (K)) && ((N)->Data == (D)))
   32 
   33 /*-----------------------------------------------------------------------------
   34         Global Data Definitions and Declarations
   35 -----------------------------------------------------------------------------*/
   36 #define MINSEARCH (-FLT_MAX)
   37 #define MAXSEARCH FLT_MAX
   38 
   39 // Helper function to find the next essential dimension in a cycle.
   40 static int NextLevel(KDTREE *tree, int level) {
   41   do {
   42     ++level;
   43     if (level >= tree->KeySize) {
   44       level = 0;
   45     }
   46   } while (tree->KeyDesc[level].NonEssential);
   47   return level;
   48 }
   49 
   50 //-----------------------------------------------------------------------------
   51 /**  Store the k smallest-keyed key-value pairs. */
   52 template <typename Key, typename Value>
   53 class MinK {
   54 public:
   55   MinK(Key max_key, int k);
   56   ~MinK();
   57 
   58   struct Element {
   59     Element() = default;
   60     Element(const Key &k, const Value &v) : key(k), value(v) {}
   61 
   62     Key key;
   63     Value value;
   64   };
   65 
   66   bool insert(Key k, Value v);
   67   const Key &max_insertable_key();
   68 
   69   int elements_count() {
   70     return elements_count_;
   71   }
   72   const Element *elements() {
   73     return elements_;
   74   }
   75 
   76 private:
   77   const Key max_key_;  ///< the maximum possible Key
   78   Element *elements_;  ///< unsorted array of elements
   79   int elements_count_; ///< the number of results collected so far
   80   int k_;              ///< the number of results we want from the search
   81   int max_index_;      ///< the index of the result with the largest key
   82 };
   83 
   84 template <typename Key, typename Value>
   85 MinK<Key, Value>::MinK(Key max_key, int k)
   86     : max_key_(max_key), elements_count_(0), k_(k < 1 ? 1 : k), max_index_(0) {
   87   elements_ = new Element[k_];
   88 }
   89 
   90 template <typename Key, typename Value>
   91 MinK<Key, Value>::~MinK() {
   92   delete[] elements_;
   93 }
   94 
   95 template <typename Key, typename Value>
   96 const Key &MinK<Key, Value>::max_insertable_key() {
   97   if (elements_count_ < k_) {
   98     return max_key_;
   99   }
  100   return elements_[max_index_].key;
  101 }
  102 
  103 template <typename Key, typename Value>
  104 bool MinK<Key, Value>::insert(Key key, Value value) {
  105   if (elements_count_ < k_) {
  106     elements_[elements_count_++] = Element(key, value);
  107     if (key > elements_[max_index_].key) {
  108       max_index_ = elements_count_ - 1;
  109     }
  110     return true;
  111   } else if (key < elements_[max_index_].key) {
  112     // evict the largest element.
  113     elements_[max_index_] = Element(key, value);
  114     // recompute max_index_
  115     for (int i = 0; i < elements_count_; i++) {
  116       if (elements_[i].key > elements_[max_index_].key) {
  117         max_index_ = i;
  118       }
  119     }
  120     return true;
  121   }
  122   return false;
  123 }
  124 
  125 //-----------------------------------------------------------------------------
  126 /** Helper class for searching for the k closest points to query_point in tree.
  127  */
  128 class KDTreeSearch {
  129 public:
  130   KDTreeSearch(KDTREE *tree, float *query_point, int k_closest);
  131   ~KDTreeSearch();
  132 
  133   /** Return the k nearest points' data. */
  134   void Search(int *result_count, float *distances, void **results);
  135 
  136 private:
  137   void SearchRec(int Level, KDNODE *SubTree);
  138   bool BoxIntersectsSearch(float *lower, float *upper);
  139 
  140   KDTREE *tree_;
  141   float *query_point_;
  142   float *sb_min_; ///< search box minimum
  143   float *sb_max_; ///< search box maximum
  144   MinK<float, void *> results_;
  145 };
  146 
  147 KDTreeSearch::KDTreeSearch(KDTREE *tree, float *query_point, int k_closest)
  148     : tree_(tree), query_point_(query_point), results_(MAXSEARCH, k_closest) {
  149   sb_min_ = new float[tree->KeySize];
  150   sb_max_ = new float[tree->KeySize];
  151 }
  152 
  153 KDTreeSearch::~KDTreeSearch() {
  154   delete[] sb_min_;
  155   delete[] sb_max_;
  156 }
  157 
  158 /// Locate the k_closest points to query_point_, and return their distances and
  159 /// data into the given buffers.
  160 void KDTreeSearch::Search(int *result_count, float *distances, void **results) {
  161   if (tree_->Root.Left == nullptr) {
  162     *result_count = 0;
  163   } else {
  164     for (int i = 0; i < tree_->KeySize; i++) {
  165       sb_min_[i] = tree_->KeyDesc[i].Min;
  166       sb_max_[i] = tree_->KeyDesc[i].Max;
  167     }
  168     SearchRec(0, tree_->Root.Left);
  169     int count = results_.elements_count();
  170     *result_count = count;
  171     for (int j = 0; j < count; j++) {
  172       // Pre-cast to float64 as key is a template type and we have no control
  173       // over its actual type.
  174       distances[j] = static_cast<float>(sqrt(static_cast<double>(results_.elements()[j].key)));
  175       results[j] = results_.elements()[j].value;
  176     }
  177   }
  178 }
  179 
  180 /*-----------------------------------------------------------------------------
  181               Public Code
  182 -----------------------------------------------------------------------------*/
  183 /// @return a new KDTREE based on the specified parameters.
  184 /// @param KeySize  # of dimensions in the K-D tree
  185 /// @param KeyDesc  array of params to describe key dimensions
  186 KDTREE *MakeKDTree(int16_t KeySize, const PARAM_DESC KeyDesc[]) {
  187   auto *KDTree = new KDTREE(KeySize);
  188   for (int i = 0; i < KeySize; i++) {
  189     KDTree->KeyDesc[i].NonEssential = KeyDesc[i].NonEssential;
  190     KDTree->KeyDesc[i].Circular = KeyDesc[i].Circular;
  191     if (KeyDesc[i].Circular) {
  192       KDTree->KeyDesc[i].Min = KeyDesc[i].Min;
  193       KDTree->KeyDesc[i].Max = KeyDesc[i].Max;
  194       KDTree->KeyDesc[i].Range = KeyDesc[i].Max - KeyDesc[i].Min;
  195       KDTree->KeyDesc[i].HalfRange = KDTree->KeyDesc[i].Range / 2;
  196       KDTree->KeyDesc[i].MidRange = (KeyDesc[i].Max + KeyDesc[i].Min) / 2;
  197     } else {
  198       KDTree->KeyDesc[i].Min = MINSEARCH;
  199       KDTree->KeyDesc[i].Max = MAXSEARCH;
  200     }
  201   }
  202   KDTree->Root.Left = nullptr;
  203   KDTree->Root.Right = nullptr;
  204   return KDTree;
  205 }
  206 
  207 /**
  208  * This routine stores Data in the K-D tree specified by Tree
  209  * using Key as an access key.
  210  *
  211  * @param Tree    K-D tree in which data is to be stored
  212  * @param Key    ptr to key by which data can be retrieved
  213  * @param Data    ptr to data to be stored in the tree
  214  */
  215 void KDStore(KDTREE *Tree, float *Key, void *Data) {
  216   auto PtrToNode = &(Tree->Root.Left);
  217   auto Node = *PtrToNode;
  218   auto Level = NextLevel(Tree, -1);
  219   while (Node != nullptr) {
  220     if (Key[Level] < Node->BranchPoint) {
  221       PtrToNode = &(Node->Left);
  222       if (Key[Level] > Node->LeftBranch) {
  223         Node->LeftBranch = Key[Level];
  224       }
  225     } else {
  226       PtrToNode = &(Node->Right);
  227       if (Key[Level] < Node->RightBranch) {
  228         Node->RightBranch = Key[Level];
  229       }
  230     }
  231     Level = NextLevel(Tree, Level);
  232     Node = *PtrToNode;
  233   }
  234 
  235   *PtrToNode = new KDNODE(Tree, Key, Data, Level);
  236 } /* KDStore */
  237 
  238 /**
  239  * This routine deletes a node from Tree.  The node to be
  240  * deleted is specified by the Key for the node and the Data
  241  * contents of the node.  These two pointers must be identical
  242  * to the pointers that were used for the node when it was
  243  * originally stored in the tree.  A node will be deleted from
  244  * the tree only if its key and data pointers are identical
  245  * to Key and Data respectively.  The tree is re-formed by removing
  246  * the affected subtree and inserting all elements but the root.
  247  *
  248  * @param Tree K-D tree to delete node from
  249  * @param Key key of node to be deleted
  250  * @param Data data contents of node to be deleted
  251  */
  252 void KDDelete(KDTREE *Tree, float Key[], void *Data) {
  253   int Level;
  254   KDNODE *Current;
  255   KDNODE *Father;
  256 
  257   /* initialize search at root of tree */
  258   Father = &(Tree->Root);
  259   Current = Father->Left;
  260   Level = NextLevel(Tree, -1);
  261 
  262   /* search tree for node to be deleted */
  263   while ((Current != nullptr) && (!NodeFound(Current, Key, Data))) {
  264     Father = Current;
  265     if (Key[Level] < Current->BranchPoint) {
  266       Current = Current->Left;
  267     } else {
  268       Current = Current->Right;
  269     }
  270 
  271     Level = NextLevel(Tree, Level);
  272   }
  273 
  274   if (Current != nullptr) { /* if node to be deleted was found */
  275     if (Current == Father->Left) {
  276       Father->Left = nullptr;
  277       Father->LeftBranch = Tree->KeyDesc[Level].Min;
  278     } else {
  279       Father->Right = nullptr;
  280       Father->RightBranch = Tree->KeyDesc[Level].Max;
  281     }
  282 
  283     InsertNodes(Tree, Current->Left);
  284     InsertNodes(Tree, Current->Right);
  285     delete Current;
  286   }
  287 } /* KDDelete */
  288 
  289 /**
  290  * This routine searches the K-D tree specified by Tree and
  291  * finds the QuerySize nearest neighbors of Query.  All neighbors
  292  * must be within MaxDistance of Query.  The data contents of
  293  * the nearest neighbors
  294  * are placed in NBuffer and their distances from Query are
  295  * placed in DBuffer.
  296  * @param Tree    ptr to K-D tree to be searched
  297  * @param Query    ptr to query key (point in D-space)
  298  * @param QuerySize  number of nearest neighbors to be found
  299  * @param MaxDistance  all neighbors must be within this distance
  300  * @param NBuffer ptr to QuerySize buffer to hold nearest neighbors
  301  * @param DBuffer ptr to QuerySize buffer to hold distances
  302  *          from nearest neighbor to query point
  303  * @param NumberOfResults [out] Number of nearest neighbors actually found
  304  */
  305 void KDNearestNeighborSearch(KDTREE *Tree, float Query[], int QuerySize, float MaxDistance,
  306                              int *NumberOfResults, void **NBuffer, float DBuffer[]) {
  307   KDTreeSearch search(Tree, Query, QuerySize);
  308   search.Search(NumberOfResults, DBuffer, NBuffer);
  309 }
  310 
  311 /*---------------------------------------------------------------------------*/
  312 /** Walk a given Tree with action. */
  313 void KDWalk(KDTREE *Tree, void_proc action, void *context) {
  314   if (Tree->Root.Left != nullptr) {
  315     Walk(Tree, action, context, Tree->Root.Left, NextLevel(Tree, -1));
  316   }
  317 }
  318 
  319 /*-----------------------------------------------------------------------------
  320               Private Code
  321 -----------------------------------------------------------------------------*/
  322 
  323 /*---------------------------------------------------------------------------*/
  324 /**
  325  * Recursively accumulate the k_closest points to query_point_ into results_.
  326  * @param Level  level in tree of sub-tree to be searched
  327  * @param SubTree  sub-tree to be searched
  328  */
  329 void KDTreeSearch::SearchRec(int level, KDNODE *sub_tree) {
  330   if (level >= tree_->KeySize) {
  331     level = 0;
  332   }
  333 
  334   if (!BoxIntersectsSearch(sb_min_, sb_max_)) {
  335     return;
  336   }
  337 
  338   results_.insert(DistanceSquared(tree_->KeySize, &tree_->KeyDesc[0], query_point_, sub_tree->Key),
  339                   sub_tree->Data);
  340 
  341   if (query_point_[level] < sub_tree->BranchPoint) {
  342     if (sub_tree->Left != nullptr) {
  343       float tmp = sb_max_[level];
  344       sb_max_[level] = sub_tree->LeftBranch;
  345       SearchRec(NextLevel(tree_, level), sub_tree->Left);
  346       sb_max_[level] = tmp;
  347     }
  348     if (sub_tree->Right != nullptr) {
  349       float tmp = sb_min_[level];
  350       sb_min_[level] = sub_tree->RightBranch;
  351       SearchRec(NextLevel(tree_, level), sub_tree->Right);
  352       sb_min_[level] = tmp;
  353     }
  354   } else {
  355     if (sub_tree->Right != nullptr) {
  356       float tmp = sb_min_[level];
  357       sb_min_[level] = sub_tree->RightBranch;
  358       SearchRec(NextLevel(tree_, level), sub_tree->Right);
  359       sb_min_[level] = tmp;
  360     }
  361     if (sub_tree->Left != nullptr) {
  362       float tmp = sb_max_[level];
  363       sb_max_[level] = sub_tree->LeftBranch;
  364       SearchRec(NextLevel(tree_, level), sub_tree->Left);
  365       sb_max_[level] = tmp;
  366     }
  367   }
  368 }
  369 
  370 /*---------------------------------------------------------------------------*/
  371 /**
  372  *Returns the Euclidean distance squared between p1 and p2 for all essential
  373  * dimensions.
  374  * @param k      keys are in k-space
  375  * @param dim    dimension descriptions (essential, circular, etc)
  376  * @param p1,p2  two different points in K-D space
  377  */
  378 float DistanceSquared(int k, PARAM_DESC *dim, float p1[], float p2[]) {
  379   float total_distance = 0;
  380 
  381   for (; k > 0; k--, p1++, p2++, dim++) {
  382     if (dim->NonEssential) {
  383       continue;
  384     }
  385 
  386     float dimension_distance = *p1 - *p2;
  387 
  388     /* if this dimension is circular - check wraparound distance */
  389     if (dim->Circular) {
  390       dimension_distance = Magnitude(dimension_distance);
  391       float wrap_distance = dim->Max - dim->Min - dimension_distance;
  392       dimension_distance = std::min(dimension_distance, wrap_distance);
  393     }
  394 
  395     total_distance += dimension_distance * dimension_distance;
  396   }
  397   return total_distance;
  398 }
  399 
  400 float ComputeDistance(int k, PARAM_DESC *dim, float p1[], float p2[]) {
  401   return std::sqrt(DistanceSquared(k, dim, p1, p2));
  402 }
  403 
  404 /*---------------------------------------------------------------------------*/
  405 /// Return whether the query region (the smallest known circle about
  406 /// query_point_ containing results->k_ points) intersects the box specified
  407 /// between lower and upper.  For circular dimensions, we also check the point
  408 /// one wrap distance away from the query.
  409 bool KDTreeSearch::BoxIntersectsSearch(float *lower, float *upper) {
  410   float *query = query_point_;
  411   // Compute the sum in higher precision.
  412   double total_distance = 0.0;
  413   double radius_squared =
  414       static_cast<double>(results_.max_insertable_key()) * results_.max_insertable_key();
  415   PARAM_DESC *dim = &tree_->KeyDesc[0];
  416 
  417   for (int i = tree_->KeySize; i > 0; i--, dim++, query++, lower++, upper++) {
  418     if (dim->NonEssential) {
  419       continue;
  420     }
  421 
  422     float dimension_distance;
  423     if (*query < *lower) {
  424       dimension_distance = *lower - *query;
  425     } else if (*query > *upper) {
  426       dimension_distance = *query - *upper;
  427     } else {
  428       dimension_distance = 0;
  429     }
  430 
  431     /* if this dimension is circular - check wraparound distance */
  432     if (dim->Circular) {
  433       float wrap_distance = FLT_MAX;
  434       if (*query < *lower) {
  435         wrap_distance = *query + dim->Max - dim->Min - *upper;
  436       } else if (*query > *upper) {
  437         wrap_distance = *lower - (*query - (dim->Max - dim->Min));
  438       }
  439       dimension_distance = std::min(dimension_distance, wrap_distance);
  440     }
  441 
  442     total_distance += static_cast<double>(dimension_distance) * dimension_distance;
  443     if (total_distance >= radius_squared) {
  444       return false;
  445     }
  446   }
  447   return true;
  448 }
  449 
  450 /*---------------------------------------------------------------------------*/
  451 /**
  452  * Walk a tree, calling action once on each node.
  453  *
  454  * Operation:
  455  *   This routine walks through the specified sub_tree and invokes action
  456  *   action at each node as follows:
  457  *       action(context, data, level)
  458  *   data  the data contents of the node being visited,
  459  *   level is the level of the node in the tree with the root being level 0.
  460  * @param tree  root of the tree being walked.
  461  * @param action  action to be performed at every node
  462  * @param context  action's context
  463  * @param sub_tree  ptr to root of subtree to be walked
  464  * @param level  current level in the tree for this node
  465  */
  466 void Walk(KDTREE *tree, void_proc action, void *context, KDNODE *sub_tree, int32_t level) {
  467   (*action)(context, sub_tree->Data, level);
  468   if (sub_tree->Left != nullptr) {
  469     Walk(tree, action, context, sub_tree->Left, NextLevel(tree, level));
  470   }
  471   if (sub_tree->Right != nullptr) {
  472     Walk(tree, action, context, sub_tree->Right, NextLevel(tree, level));
  473   }
  474 }
  475 
  476 /** Given a subtree nodes, insert all of its elements into tree. */
  477 void InsertNodes(KDTREE *tree, KDNODE *nodes) {
  478   if (nodes == nullptr) {
  479     return;
  480   }
  481 
  482   KDStore(tree, nodes->Key, nodes->Data);
  483   InsertNodes(tree, nodes->Left);
  484   InsertNodes(tree, nodes->Right);
  485 }
  486 
  487 } // namespace tesseract