"Fossies" - the Fresh Open Source Software Archive

Member "tesseract-5.2.0/src/classify/adaptive.cpp" (6 Jul 2022, 13307 Bytes) of package /linux/misc/tesseract-5.2.0.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "adaptive.cpp" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 4.1.3_vs_5.0.0.

    1 /******************************************************************************
    2  ** Filename:    adaptive.c
    3  ** Purpose:     Adaptive matcher.
    4  ** Author:      Dan Johnson
    5  **
    6  ** (c) Copyright Hewlett-Packard Company, 1988.
    7  ** Licensed under the Apache License, Version 2.0 (the "License");
    8  ** you may not use this file except in compliance with the License.
    9  ** You may obtain a copy of the License at
   10  ** http://www.apache.org/licenses/LICENSE-2.0
   11  ** Unless required by applicable law or agreed to in writing, software
   12  ** distributed under the License is distributed on an "AS IS" BASIS,
   13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   14  ** See the License for the specific language governing permissions and
   15  ** limitations under the License.
   16  ******************************************************************************/
   17 
   18 #include "adaptive.h"
   19 
   20 #include "classify.h"
   21 
   22 #include <cassert>
   23 #include <cstdio>
   24 
   25 namespace tesseract {
   26 
   27 /*----------------------------------------------------------------------------
   28               Public Code
   29 ----------------------------------------------------------------------------*/
   30 /*---------------------------------------------------------------------------*/
   31 /**
   32  * This routine adds a new adapted class to an existing
   33  * set of adapted templates.
   34  *
   35  * @param Templates set of templates to add new class to
   36  * @param Class new class to add to templates
   37  * @param ClassId class id to associate with new class
   38  *
   39  * @note Globals: none
   40  */
   41 void AddAdaptedClass(ADAPT_TEMPLATES_STRUCT *Templates, ADAPT_CLASS_STRUCT *Class, CLASS_ID ClassId) {
   42   assert(Templates != nullptr);
   43   assert(Class != nullptr);
   44   assert(LegalClassId(ClassId));
   45   assert(UnusedClassIdIn(Templates->Templates, ClassId));
   46   assert(Class->NumPermConfigs == 0);
   47 
   48   auto IntClass = new INT_CLASS_STRUCT(1, 1);
   49   AddIntClass(Templates->Templates, ClassId, IntClass);
   50 
   51   assert(Templates->Class[ClassId] == nullptr);
   52   Templates->Class[ClassId] = Class;
   53 
   54 } /* AddAdaptedClass */
   55 
   56 /*---------------------------------------------------------------------------*/
   57 
   58 PERM_CONFIG_STRUCT::~PERM_CONFIG_STRUCT() {
   59   delete[] Ambigs;
   60 }
   61 
   62 ADAPT_CLASS_STRUCT::ADAPT_CLASS_STRUCT() {
   63   NumPermConfigs = 0;
   64   MaxNumTimesSeen = 0;
   65   TempProtos = NIL_LIST;
   66 
   67   PermProtos = NewBitVector(MAX_NUM_PROTOS);
   68   PermConfigs = NewBitVector(MAX_NUM_CONFIGS);
   69   zero_all_bits(PermProtos, WordsInVectorOfSize(MAX_NUM_PROTOS));
   70   zero_all_bits(PermConfigs, WordsInVectorOfSize(MAX_NUM_CONFIGS));
   71 
   72   for (int i = 0; i < MAX_NUM_CONFIGS; i++) {
   73     TempConfigFor(this, i) = nullptr;
   74   }
   75 }
   76 
   77 ADAPT_CLASS_STRUCT::~ADAPT_CLASS_STRUCT() {
   78   for (int i = 0; i < MAX_NUM_CONFIGS; i++) {
   79     if (ConfigIsPermanent(this, i) && PermConfigFor(this, i) != nullptr) {
   80       delete PermConfigFor(this, i);
   81     } else if (!ConfigIsPermanent(this, i) && TempConfigFor(this, i) != nullptr) {
   82       delete TempConfigFor(this, i);
   83     }
   84   }
   85   FreeBitVector(PermProtos);
   86   FreeBitVector(PermConfigs);
   87   auto list = TempProtos;
   88   while (list != nullptr) {
   89     delete reinterpret_cast<TEMP_PROTO_STRUCT *>(list->node);
   90     list = pop(list);
   91   }
   92 }
   93 
   94 /// Constructor for adapted templates.
   95 /// Add an empty class for each char in unicharset to the newly created templates.
   96 ADAPT_TEMPLATES_STRUCT::ADAPT_TEMPLATES_STRUCT(UNICHARSET &unicharset) {
   97   Templates = new INT_TEMPLATES_STRUCT;
   98   NumPermClasses = 0;
   99   NumNonEmptyClasses = 0;
  100 
  101   /* Insert an empty class for each unichar id in unicharset */
  102   for (unsigned i = 0; i < MAX_NUM_CLASSES; i++) {
  103     Class[i] = nullptr;
  104     if (i < unicharset.size()) {
  105       AddAdaptedClass(this, new ADAPT_CLASS_STRUCT, i);
  106     }
  107   }
  108 }
  109 
  110 ADAPT_TEMPLATES_STRUCT::~ADAPT_TEMPLATES_STRUCT() {
  111   for (unsigned i = 0; i < (Templates)->NumClasses; i++) {
  112     delete Class[i];
  113   }
  114   delete Templates;
  115 }
  116 
  117 // Returns FontinfoId of the given config of the given adapted class.
  118 int Classify::GetFontinfoId(ADAPT_CLASS_STRUCT *Class, uint8_t ConfigId) {
  119   return (ConfigIsPermanent(Class, ConfigId) ? PermConfigFor(Class, ConfigId)->FontinfoId
  120                                              : TempConfigFor(Class, ConfigId)->FontinfoId);
  121 }
  122 
  123 /// This constructor allocates and returns a new temporary config.
  124 ///
  125 /// @param MaxProtoId  max id of any proto in new config
  126 /// @param FontinfoId font information from pre-trained templates
  127 TEMP_CONFIG_STRUCT::TEMP_CONFIG_STRUCT(int maxProtoId, int fontinfoId) {
  128   int NumProtos = maxProtoId + 1;
  129 
  130   Protos = NewBitVector(NumProtos);
  131 
  132   NumTimesSeen = 1;
  133   MaxProtoId = maxProtoId;
  134   ProtoVectorSize = WordsInVectorOfSize(NumProtos);
  135   zero_all_bits(Protos, ProtoVectorSize);
  136   FontinfoId = fontinfoId;
  137 }
  138 
  139 TEMP_CONFIG_STRUCT::~TEMP_CONFIG_STRUCT() {
  140   FreeBitVector(Protos);
  141 }
  142 
  143 /*---------------------------------------------------------------------------*/
  144 /**
  145  * This routine prints a summary of the adapted templates
  146  *  in Templates to File.
  147  *
  148  * @param File    open text file to print Templates to
  149  * @param Templates adapted templates to print to File
  150  *
  151  * @note Globals: none
  152  */
  153 void Classify::PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES_STRUCT *Templates) {
  154   INT_CLASS_STRUCT *IClass;
  155   ADAPT_CLASS_STRUCT *AClass;
  156 
  157   fprintf(File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
  158   fprintf(File, "Num classes = %d;  Num permanent classes = %d\n\n", Templates->NumNonEmptyClasses,
  159           Templates->NumPermClasses);
  160   fprintf(File, "   Id  NC NPC  NP NPP\n");
  161   fprintf(File, "------------------------\n");
  162 
  163   for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) {
  164     IClass = Templates->Templates->Class[i];
  165     AClass = Templates->Class[i];
  166     if (!IsEmptyAdaptedClass(AClass)) {
  167       fprintf(File, "%5u  %s %3d %3d %3d %3zd\n", i, unicharset.id_to_unichar(i), IClass->NumConfigs,
  168               AClass->NumPermConfigs, IClass->NumProtos,
  169               IClass->NumProtos - AClass->TempProtos->size());
  170     }
  171   }
  172   fprintf(File, "\n");
  173 
  174 } /* PrintAdaptedTemplates */
  175 
  176 /*---------------------------------------------------------------------------*/
  177 /**
  178  * Read an adapted class description from file and return
  179  * a ptr to the adapted class.
  180  *
  181  * @param fp open file to read adapted class from
  182  * @return Ptr to new adapted class.
  183  *
  184  * @note Globals: none
  185  */
  186 ADAPT_CLASS_STRUCT *ReadAdaptedClass(TFile *fp) {
  187   int NumTempProtos;
  188   int NumConfigs;
  189   int i;
  190   ADAPT_CLASS_STRUCT *Class;
  191 
  192   /* first read high level adapted class structure */
  193   Class = new ADAPT_CLASS_STRUCT;
  194   fp->FRead(Class, sizeof(ADAPT_CLASS_STRUCT), 1);
  195 
  196   /* then read in the definitions of the permanent protos and configs */
  197   Class->PermProtos = NewBitVector(MAX_NUM_PROTOS);
  198   Class->PermConfigs = NewBitVector(MAX_NUM_CONFIGS);
  199   fp->FRead(Class->PermProtos, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_PROTOS));
  200   fp->FRead(Class->PermConfigs, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_CONFIGS));
  201 
  202   /* then read in the list of temporary protos */
  203   fp->FRead(&NumTempProtos, sizeof(int), 1);
  204   Class->TempProtos = NIL_LIST;
  205   for (i = 0; i < NumTempProtos; i++) {
  206     auto TempProto = new TEMP_PROTO_STRUCT;
  207     fp->FRead(TempProto, sizeof(TEMP_PROTO_STRUCT), 1);
  208     Class->TempProtos = push_last(Class->TempProtos, TempProto);
  209   }
  210 
  211   /* then read in the adapted configs */
  212   fp->FRead(&NumConfigs, sizeof(int), 1);
  213   for (i = 0; i < NumConfigs; i++) {
  214     if (test_bit(Class->PermConfigs, i)) {
  215       Class->Config[i].Perm = ReadPermConfig(fp);
  216     } else {
  217       Class->Config[i].Temp = ReadTempConfig(fp);
  218     }
  219   }
  220 
  221   return (Class);
  222 
  223 } /* ReadAdaptedClass */
  224 
  225 /*---------------------------------------------------------------------------*/
  226 /**
  227  * Read a set of adapted templates from file and return
  228  * a ptr to the templates.
  229  *
  230  * @param fp open text file to read adapted templates from
  231  * @return Ptr to adapted templates read from file.
  232  *
  233  * @note Globals: none
  234  */
  235 ADAPT_TEMPLATES_STRUCT *Classify::ReadAdaptedTemplates(TFile *fp) {
  236   auto Templates = new ADAPT_TEMPLATES_STRUCT;
  237 
  238   /* first read the high level adaptive template struct */
  239   fp->FRead(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1);
  240 
  241   /* then read in the basic integer templates */
  242   Templates->Templates = ReadIntTemplates(fp);
  243 
  244   /* then read in the adaptive info for each class */
  245   for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) {
  246     Templates->Class[i] = ReadAdaptedClass(fp);
  247   }
  248   return (Templates);
  249 
  250 } /* ReadAdaptedTemplates */
  251 
  252 /*---------------------------------------------------------------------------*/
  253 /**
  254  * Read a permanent configuration description from file
  255  * and return a ptr to it.
  256  *
  257  * @param fp open file to read permanent config from
  258  * @return Ptr to new permanent configuration description.
  259  *
  260  * @note Globals: none
  261  */
  262 PERM_CONFIG_STRUCT *ReadPermConfig(TFile *fp) {
  263   auto Config = new PERM_CONFIG_STRUCT;
  264   uint8_t NumAmbigs;
  265   fp->FRead(&NumAmbigs, sizeof(NumAmbigs), 1);
  266   Config->Ambigs = new UNICHAR_ID[NumAmbigs + 1];
  267   fp->FRead(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs);
  268   Config->Ambigs[NumAmbigs] = -1;
  269   fp->FRead(&(Config->FontinfoId), sizeof(int), 1);
  270 
  271   return (Config);
  272 
  273 } /* ReadPermConfig */
  274 
  275 /*---------------------------------------------------------------------------*/
  276 /**
  277  * Read a temporary configuration description from file
  278  * and return a ptr to it.
  279  *
  280  * @param fp open file to read temporary config from
  281  * @return Ptr to new temporary configuration description.
  282  *
  283  * @note Globals: none
  284  */
  285 TEMP_CONFIG_STRUCT *ReadTempConfig(TFile *fp) {
  286   auto Config = new TEMP_CONFIG_STRUCT;
  287   fp->FRead(Config, sizeof(TEMP_CONFIG_STRUCT), 1);
  288 
  289   Config->Protos = NewBitVector(Config->ProtoVectorSize * BITSINLONG);
  290   fp->FRead(Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize);
  291 
  292   return (Config);
  293 
  294 } /* ReadTempConfig */
  295 
  296 /*---------------------------------------------------------------------------*/
  297 /**
  298  * This routine writes a binary representation of Class
  299  * to File.
  300  *
  301  * @param File    open file to write Class to
  302  * @param Class   adapted class to write to File
  303  * @param NumConfigs  number of configs in Class
  304  *
  305  * @note Globals: none
  306  */
  307 void WriteAdaptedClass(FILE *File, ADAPT_CLASS_STRUCT *Class, int NumConfigs) {
  308   /* first write high level adapted class structure */
  309   fwrite(Class, sizeof(ADAPT_CLASS_STRUCT), 1, File);
  310 
  311   /* then write out the definitions of the permanent protos and configs */
  312   fwrite(Class->PermProtos, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_PROTOS), File);
  313   fwrite(Class->PermConfigs, sizeof(uint32_t), WordsInVectorOfSize(MAX_NUM_CONFIGS), File);
  314 
  315   /* then write out the list of temporary protos */
  316   uint32_t NumTempProtos = Class->TempProtos->size();
  317   fwrite(&NumTempProtos, sizeof(NumTempProtos), 1, File);
  318   auto TempProtos = Class->TempProtos;
  319   iterate(TempProtos) {
  320     void *proto = TempProtos->node;
  321     fwrite(proto, sizeof(TEMP_PROTO_STRUCT), 1, File);
  322   }
  323 
  324   /* then write out the adapted configs */
  325   fwrite(&NumConfigs, sizeof(int), 1, File);
  326   for (int i = 0; i < NumConfigs; i++) {
  327     if (test_bit(Class->PermConfigs, i)) {
  328       WritePermConfig(File, Class->Config[i].Perm);
  329     } else {
  330       WriteTempConfig(File, Class->Config[i].Temp);
  331     }
  332   }
  333 
  334 } /* WriteAdaptedClass */
  335 
  336 /*---------------------------------------------------------------------------*/
  337 /**
  338  * This routine saves Templates to File in a binary format.
  339  *
  340  * @param File    open text file to write Templates to
  341  * @param Templates set of adapted templates to write to File
  342  *
  343  * @note Globals: none
  344  */
  345 void Classify::WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES_STRUCT *Templates) {
  346   /* first write the high level adaptive template struct */
  347   fwrite(Templates, sizeof(ADAPT_TEMPLATES_STRUCT), 1, File);
  348 
  349   /* then write out the basic integer templates */
  350   WriteIntTemplates(File, Templates->Templates, unicharset);
  351 
  352   /* then write out the adaptive info for each class */
  353   for (unsigned i = 0; i < (Templates->Templates)->NumClasses; i++) {
  354     WriteAdaptedClass(File, Templates->Class[i], Templates->Templates->Class[i]->NumConfigs);
  355   }
  356 } /* WriteAdaptedTemplates */
  357 
  358 /*---------------------------------------------------------------------------*/
  359 /**
  360  * This routine writes a binary representation of a
  361  * permanent configuration to File.
  362  *
  363  * @param File  open file to write Config to
  364  * @param Config  permanent config to write to File
  365  *
  366  * @note Globals: none
  367  */
  368 void WritePermConfig(FILE *File, PERM_CONFIG_STRUCT *Config) {
  369   uint8_t NumAmbigs = 0;
  370 
  371   assert(Config != nullptr);
  372   while (Config->Ambigs[NumAmbigs] > 0) {
  373     ++NumAmbigs;
  374   }
  375 
  376   fwrite(&NumAmbigs, sizeof(uint8_t), 1, File);
  377   fwrite(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
  378   fwrite(&(Config->FontinfoId), sizeof(int), 1, File);
  379 } /* WritePermConfig */
  380 
  381 /*---------------------------------------------------------------------------*/
  382 /**
  383  * This routine writes a binary representation of a
  384  * temporary configuration to File.
  385  *
  386  * @param File  open file to write Config to
  387  * @param Config  temporary config to write to File
  388  *
  389  * @note Globals: none
  390  */
  391 void WriteTempConfig(FILE *File, TEMP_CONFIG_STRUCT *Config) {
  392   assert(Config != nullptr);
  393 
  394   fwrite(Config, sizeof(TEMP_CONFIG_STRUCT), 1, File);
  395   fwrite(Config->Protos, sizeof(uint32_t), Config->ProtoVectorSize, File);
  396 
  397 } /* WriteTempConfig */
  398 
  399 } // namespace tesseract