"Fossies" - the Fresh Open Source Software Archive

Member "cb2bib-2.0.1/src/c2b/stemMatcher.h" (12 Feb 2021, 4271 Bytes) of package /linux/privat/cb2bib-2.0.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "stemMatcher.h" see the Fossies "Dox" file reference documentation.

    1 /***************************************************************************
    2  *   Copyright (C) 2004-2021 by Pere Constans
    3  *   constans@molspaces.com
    4  *   cb2Bib version 2.0.1. Licensed under the GNU GPL version 3.
    5  *   See the LICENSE file that comes with this distribution.
    6  ***************************************************************************/
    7 #ifndef STEMMATCHER_H
    8 #define STEMMATCHER_H
    9 
   10 #include "txtmatcher.h"
   11 
   12 #include <QRegularExpression>
   13 #include <QVector>
   14 
   15 
   16 class stemMatcher
   17 {
   18 
   19 public:
   20     stemMatcher();
   21     explicit stemMatcher(const QString& pattern, const Qt::CaseSensitivity cs = Qt::CaseSensitive);
   22     inline ~stemMatcher() {}
   23 
   24 
   25     int indexIn(const QString& text, const int from = 0) const;
   26     void setPattern(const QString& pattern, const Qt::CaseSensitivity cs);
   27     inline int matchedLength() const
   28     {
   29         return _matched_length;
   30     }
   31     inline const QStringList& subpatternStrings() const
   32     {
   33         return _subpatterns;
   34     }
   35     inline QString signatureString() const
   36     {
   37         return _signature_string;
   38     }
   39 
   40 
   41 private:
   42     inline int _index_in(const int s, const QString& text, const int p0) const
   43     {
   44         if (s < _substring_txt_count)
   45         {
   46             const int i(_substrings_txt.at(s).indexIn(text, p0));
   47             _matched_lengths[s] = i == -1 ? 0 : _substrings_txt.at(s).pattern().length();
   48             return i;
   49         }
   50         else
   51         {
   52             _match = _substrings_rex.at(s - _substring_txt_count).match(text, p0);
   53             _matched_lengths[s] = _match.capturedLength();
   54             return _match.capturedStart();
   55         }
   56     }
   57     inline int _index_in(const int s, const QString& text, const int p0, const int pn) const
   58     {
   59         if (s < _substring_txt_count)
   60         {
   61             const int i(_substrings_txt.at(s).indexIn(text.unicode(), std::min(_pn, pn), p0));
   62             _matched_lengths[s] = i == -1 ? 0 : _substrings_txt.at(s).pattern().length();
   63             return i;
   64         }
   65         else
   66         {
   67             const int npn(std::min(_pn, pn));
   68             if (p0 >= npn)
   69             {
   70                 _matched_lengths[s] = 0;
   71                 return -1;
   72             }
   73             const QStringRef context(&text, p0, npn - p0);
   74             _match = _substrings_rex.at(s - _substring_txt_count).match(context);
   75             _matched_lengths[s] = _match.capturedLength();
   76 #if QT_VERSION >= QT_VERSION_CHECK(5, 6, 0)
   77             return _match.hasMatch() ? _match.capturedStart() + p0 : -1;
   78 #else
   79 #warning[cb2Bib] Linking against older than Qt 5.6 might effect some Context searches
   80             return _match.hasMatch() ? _match.capturedStart() : -1;
   81 #endif
   82         }
   83     }
   84     inline int _index_around(const QString& text, const int phook) const
   85     {
   86         for (int i = 0; i < _substring_count; ++i)
   87             _sp0[i] = -1;
   88         _sp0[_hook] = phook;
   89 
   90         const int bp0 = std::max(_p0, phook - _stretch);
   91         const int bpn = std::min(_pn, phook + _stretch);
   92         for (int i = 0; i < _substring_count; ++i)
   93             if (_sp0.at(i) == -1)
   94             {
   95                 const int p0(_index_in(i, text, bp0, bpn));
   96                 if (p0 == -1)
   97                     return -1;
   98                 _sp0[i] = p0;
   99             }
  100 
  101         int bp(_pn);
  102         int fp(_p0);
  103         for (int i = 0; i < _substring_count; ++i)
  104         {
  105             const int p0(_sp0.at(i));
  106             if (bp > p0)
  107                 bp = p0;
  108             const int pn(p0 + _matched_lengths.at(i));
  109             if (fp < pn)
  110                 fp = pn;
  111         }
  112         // Beautify match by including word endings
  113         for (int l = fp; l < _pn; ++l)
  114             if (text.at(l).isLetter())
  115                 ++fp;
  116             else
  117                 break;
  118         _matched_length = fp - bp;
  119         return bp;
  120     }
  121 
  122     QString _signature_string;
  123     QStringList _subpatterns;
  124     QVector<QRegularExpression> _substrings_rex;
  125     QVector<txtmatcher> _substrings_txt;
  126     int _hook;
  127     int _stretch;
  128     int _substring_count;
  129     int _substring_rex_count;
  130     int _substring_txt_count;
  131     mutable QRegularExpressionMatch _match;
  132     mutable QVector<int> _matched_lengths;
  133     mutable QVector<int> _sp0;
  134     mutable int _matched_length;
  135     mutable int _p0;
  136     mutable int _pn;
  137 };
  138 
  139 #endif