"Fossies" - the Fresh Open Source Software Archive

Member "cb2bib-2.0.1/src/c2b/wordMatcher.cpp" (12 Feb 2021, 2639 Bytes) of package /linux/privat/cb2bib-2.0.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "wordMatcher.cpp" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 2.0.0_vs_2.0.1.

    1 /***************************************************************************
    2  *   Copyright (C) 2004-2021 by Pere Constans
    3  *   constans@molspaces.com
    4  *   cb2Bib version 2.0.1. Licensed under the GNU GPL version 3.
    5  *   See the LICENSE file that comes with this distribution.
    6  ***************************************************************************/
    7 #include "wordMatcher.h"
    8 
    9 #include "cb2bib_utilities.h"
   10 #include "triads.h"
   11 
   12 #include <limits>
   13 
   14 
   15 wordMatcher::wordMatcher() : _hook(-1), _stretch(0), _substring_count(0), _matched_length(-1), _p0(0), _pn(0) {}
   16 
   17 wordMatcher::wordMatcher(const QString& pattern, const Qt::CaseSensitivity cs) : _p0(0), _pn(0)
   18 {
   19     setPattern(pattern, cs);
   20 }
   21 
   22 
   23 void wordMatcher::setPattern(const QString& pattern, const Qt::CaseSensitivity cs)
   24 {
   25     _hook = -1;
   26     _matched_length = -1;
   27     _stretch = 0;
   28 
   29     _subpatterns = pattern.split(c2bUtils::nonLetter, QString::SkipEmptyParts);
   30     if (_subpatterns.count() == 0)
   31         return;
   32     _substring_count = _subpatterns.count();
   33     for (int i = 0; i < _substring_count; ++i)
   34         _subpatterns[i].replace('_', ' ');
   35     _substrings.resize(_substring_count);
   36     _lengths.resize(_substring_count);
   37     _sp0.resize(_substring_count);
   38     for (int i = 0; i < _substring_count; ++i)
   39     {
   40         _substrings[i] = txtmatcher(_subpatterns.at(i), cs);
   41         _lengths[i] = _subpatterns.at(i).length();
   42         if (_lengths.at(i) > 4)
   43             _stretch += 50;
   44         else
   45             _stretch += 10;
   46     }
   47     for (int i = 0, lf = std::numeric_limits<int>::max(); i < _substring_count; ++i)
   48     {
   49         const int f(_substrings.at(i).frequency() * triads::textFrequency(_substrings.at(i).pattern()));
   50         if (f < lf)
   51         {
   52             lf = f;
   53             _hook = i;
   54         }
   55     }
   56 #ifdef C2B_DEBUG_SEARCHING
   57     for (int i = 0; i < _substring_count; ++i)
   58         qDebug() << "subpattern         " << _substrings.at(i).pattern();
   59     qDebug() << "hook               " << _substrings.at(_hook).pattern() << _hook;
   60     qDebug() << "_stretch           " << _stretch;
   61     qDebug() << "subpatternstrings  " << subpatternStrings();
   62 #endif
   63 }
   64 
   65 int wordMatcher::indexIn(const QString& text, const int from) const
   66 {
   67     _matched_length = -1;
   68     if (_hook == -1) // Uninitialized
   69         return -1;
   70     _p0 = from;
   71     if (_p0 < 0)
   72         _p0 = 0;
   73     _pn = text.length();
   74     if (_pn == 0)
   75         return -1;
   76 
   77     int hp(_p0);
   78     int p(-1);
   79     while (p == -1)
   80     {
   81         hp = _index_in(_hook, text, hp);
   82         if (hp == -1)
   83             return -1;
   84         p = _index_around(text, hp);
   85         hp += _lengths.at(_hook);
   86     }
   87     return p;
   88 }