"Fossies" - the Fresh Open Source Software Archive

Member "cb2bib-2.0.1/src/c2b/searchPattern.cpp" (12 Feb 2021, 10601 Bytes) of package /linux/privat/cb2bib-2.0.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "searchPattern.cpp" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 2.0.0_vs_2.0.1.

    1 /***************************************************************************
    2  *   Copyright (C) 2004-2021 by Pere Constans
    3  *   constans@molspaces.com
    4  *   cb2Bib version 2.0.1. Licensed under the GNU GPL version 3.
    5  *   See the LICENSE file that comes with this distribution.
    6  ***************************************************************************/
    7 #include "searchPattern.h"
    8 
    9 #include "triads.h"
   10 
   11 #include <QMap>
   12 
   13 
   14 #ifndef __has_cpp_attribute
   15 #define __has_cpp_attribute(name) 0
   16 #endif
   17 #if __has_cpp_attribute(fallthrough)
   18 #define FALLTHROUGH [[fallthrough]]
   19 #else
   20 #define FALLTHROUGH
   21 #endif
   22 
   23 
   24 /**
   25     Top level driver for calling cb2Bib search types
   26 */
   27 searchPattern::searchPattern(const QString& pattern, const QString& patternType)
   28     : _matcher(pattern, typemap().value(patternType), Qt::CaseInsensitive)
   29 {
   30     _modifier.NOT = false;
   31     _modifier.string = _matcher.typedpattern;
   32     _rank = 0;
   33 }
   34 
   35 searchPattern::searchPattern(const bool NOT, const bool caseSensitive, const QString& patternType, const QString& scope,
   36                              const QChar& yearScope, const QString& pattern)
   37     : _matcher(pattern, typemap().value(patternType), qtcase(caseSensitive))
   38 {
   39     _modifier.NOT = NOT;
   40     _modifier.string = _matcher.typedpattern;
   41     _modifier.scope = scope;
   42     _modifier.yearScope = yearScope;
   43 
   44     if (_modifier.NOT)
   45         _formatted_string += "NOT.";
   46     _formatted_string += QString(" [%1][%2|case=%3]").arg(_modifier.string, patternType).arg(caseSensitive);
   47     if (_modifier.scope == "year")
   48         _formatted_string += QString(" IN [%1(%2)]").arg(_modifier.scope).arg(_modifier.yearScope);
   49     else
   50         _formatted_string += QString(" IN [%1]").arg(_modifier.scope);
   51 
   52     // Set an approximate ranking to speed up composite searches
   53     _rank = 0;
   54     if (_modifier.scope == "file")
   55         _rank += 200;
   56     else if (_modifier.scope == "all")
   57         _rank += 100;
   58     else if (_modifier.scope == "year" || _modifier.scope == "volume" || _modifier.scope == "pages")
   59         _rank += 50;
   60     else
   61         _rank += 1;
   62     if (!caseSensitive)
   63         _rank *= 2;
   64     if (_matcher.type == FixedStringAllWords || _matcher.type == FixedStringAnyWord)
   65         _rank *= 5;
   66     else if (_matcher.type == ApproximateString)
   67         _rank *= 10;
   68     else if (_matcher.type == RegularExpression)
   69         _rank *= 20;
   70 }
   71 
   72 const QStringList searchPattern::types()
   73 {
   74     return typemap().keys();
   75 }
   76 
   77 const QString searchPattern::type(const Type t)
   78 {
   79     return typemap().key(t);
   80 }
   81 
   82 const QMap<QString, searchPattern::Type> searchPattern::typemap()
   83 {
   84     QMap<QString, Type> tm;
   85     tm.insert(QObject::tr("Approximate string"), ApproximateString);
   86     tm.insert(QObject::tr("Context"), Context);
   87     tm.insert(QObject::tr("Fixed string: All Words"), FixedStringAllWords);
   88     tm.insert(QObject::tr("Fixed string: Any Word"), FixedStringAnyWord);
   89     tm.insert(QObject::tr("Fixed string: Context"), FixedStringContext);
   90     tm.insert(QObject::tr("Fixed string"), FixedString);
   91     tm.insert(QObject::tr("Regular expression"), RegularExpression);
   92     tm.insert(QObject::tr("Wildcard"), Wildcard);
   93     return tm;
   94 }
   95 
   96 
   97 searchPattern::matcher::matcher(const QString& pattern, const Type t, const Qt::CaseSensitivity cs)
   98     : csensitivity(cs), type(t), length(-1)
   99 {
  100     _set_typed_pattern(pattern);
  101 
  102     switch (t)
  103     {
  104     default:
  105         c2bUtils::warn(QObject::tr("Internal Error: Invalid search pattern type. Set to 'Approximate string'"));
  106         FALLTHROUGH;
  107     case ApproximateString:
  108     {
  109         appexp.setPattern(typedpattern, cs);
  110         if (appexp.isMultipattern())
  111         {
  112             signature = triads::textSignature(typedpattern);
  113             subsignatures = triads::textSignature(appexp.substrings());
  114         }
  115         else // Skip signatures for regular expression case
  116         {
  117             regexp = appexp.regexp();
  118             type = RegularExpression;
  119         }
  120     }
  121     break;
  122     case Context:
  123     {
  124         cstemexp.setPattern(typedpattern, cs);
  125         signature = triads::textSignature(cstemexp.signatureString());
  126     }
  127     break;
  128     case FixedStringAnyWord:
  129     {
  130         wordexp.setPattern(typedpattern, wordPattern::AnyWord, cs);
  131         signature = triads::textSignature(typedpattern);
  132         subsignatures = triads::textSignature(wordexp.substrings());
  133     }
  134     break;
  135     case FixedStringAllWords:
  136     {
  137         wordexp.setPattern(typedpattern, wordPattern::AllWords, cs);
  138         signature = triads::textSignature(typedpattern);
  139         subsignatures.fill(signature, wordexp.subpatternCount());
  140     }
  141     break;
  142     case FixedStringContext:
  143     {
  144         cwordexp.setPattern(typedpattern, cs);
  145         signature = triads::textSignature(typedpattern);
  146     }
  147     break;
  148     case FixedString:
  149     {
  150         strexp.setPattern(typedpattern);
  151         strexp.setCaseSensitivity(cs);
  152         signature = triads::textSignature(typedpattern);
  153     }
  154     break;
  155     case RegularExpression:
  156     {
  157 #if (QT_VERSION >= QT_VERSION_CHECK(5, 4, 0)) && (QT_VERSION < QT_VERSION_CHECK(5, 12, 0))
  158         const QRegularExpression::PatternOptions po(QRegularExpression::DontCaptureOption |
  159                 QRegularExpression::UseUnicodePropertiesOption |
  160                 QRegularExpression::OptimizeOnFirstUsageOption);
  161 #else
  162         const QRegularExpression::PatternOptions po(QRegularExpression::DontCaptureOption |
  163                 QRegularExpression::UseUnicodePropertiesOption);
  164 #endif
  165         regexp.setPattern(typedpattern);
  166         regexp.setPatternOptions(cs == Qt::CaseSensitive ? po : po | QRegularExpression::CaseInsensitiveOption);
  167     }
  168     break;
  169     case Wildcard:
  170     {
  171 #if (QT_VERSION >= QT_VERSION_CHECK(5, 4, 0)) && (QT_VERSION < QT_VERSION_CHECK(5, 12, 0))
  172         const QRegularExpression::PatternOptions po(
  173             QRegularExpression::InvertedGreedinessOption | QRegularExpression::DontCaptureOption |
  174             QRegularExpression::UseUnicodePropertiesOption | QRegularExpression::OptimizeOnFirstUsageOption);
  175 #else
  176         const QRegularExpression::PatternOptions po(QRegularExpression::InvertedGreedinessOption |
  177                 QRegularExpression::DontCaptureOption |
  178                 QRegularExpression::UseUnicodePropertiesOption);
  179 #endif
  180         regexp.setPattern(_wildcard_escaped(typedpattern));
  181         regexp.setPatternOptions(cs == Qt::CaseSensitive ? po : po | QRegularExpression::CaseInsensitiveOption);
  182         signature = triads::textSignature(typedpattern);
  183     }
  184     break;
  185     }
  186 }
  187 
  188 bool searchPattern::matcher::match(const QString& contents) const
  189 {
  190     length = -1;
  191     switch (type)
  192     {
  193     case ApproximateString:
  194         return appexp.matches(contents);
  195     case Context:
  196         return cstemexp.indexIn(contents) != -1;
  197     case FixedStringAnyWord:
  198     case FixedStringAllWords:
  199         return wordexp.matches(contents);
  200     case FixedStringContext:
  201         return cwordexp.indexIn(contents) != -1;
  202     case FixedString:
  203         return strexp.indexIn(contents) != -1;
  204     default:
  205         return regexp.match(contents).hasMatch();
  206     }
  207 }
  208 
  209 bool searchPattern::matcher::match(const documentContents& contents) const
  210 {
  211     length = -1;
  212     switch (type)
  213     {
  214     case ApproximateString:
  215         return _match_any(appexp.subpatternCount(), appexp.submatchers(), contents);
  216     case Context:
  217         return _match(cstemexp, signature, contents);
  218     case FixedStringAnyWord:
  219         return _match_any(wordexp.subpatternCount(), wordexp.submatchers(), contents);
  220     case FixedStringAllWords:
  221         return _match_all(wordexp.subpatternCount(), wordexp.submatchers(), contents);
  222     case FixedStringContext:
  223         return _match(cwordexp, signature, contents);
  224     case FixedString:
  225         return _match(strexp, signature, contents);
  226     default:
  227         return _match(regexp, signature, contents);
  228     }
  229 }
  230 
  231 int searchPattern::matcher::index(const QString& contents, const int from) const
  232 {
  233     int i;
  234     switch (type)
  235     {
  236     case ApproximateString:
  237         i = appexp.indexIn(contents, from);
  238         length = appexp.matchedLength();
  239         return i;
  240     case Context:
  241         i = cstemexp.indexIn(contents, from);
  242         length = cstemexp.matchedLength();
  243         return i;
  244     case FixedStringAnyWord:
  245     case FixedStringAllWords:
  246         i = wordexp.indexIn(contents, from);
  247         length = wordexp.matchedLength();
  248         return i;
  249     case FixedStringContext:
  250         i = cwordexp.indexIn(contents, from);
  251         length = cwordexp.matchedLength();
  252         return i;
  253     case FixedString:
  254         i = strexp.indexIn(contents, from);
  255         length = strexp.pattern().length();
  256         return i;
  257     default:
  258         regexp_match = regexp.match(contents, from);
  259         i = regexp_match.capturedStart();
  260         length = regexp_match.capturedLength();
  261         return i;
  262     }
  263 }
  264 
  265 void searchPattern::matcher::_set_typed_pattern(const QString& pattern)
  266 {
  267     const QRegExp rmors("\\|[^\\s]*");
  268     const QRegExp rmnws("[\\W_]");
  269     const QRegExp rmnls("[\\W\\d]");
  270 
  271     QString stripped(pattern);
  272     switch (type)
  273     {
  274     case ApproximateString:
  275     case Context:
  276     case FixedStringAllWords:
  277         stripped.replace(rmors, " ");
  278         stripped.replace(rmnws, " ");
  279         break;
  280     case FixedStringAnyWord:
  281         stripped.replace(rmnws, " ");
  282         break;
  283     case FixedStringContext:
  284         stripped.replace(rmors, " ");
  285         break;
  286     case RegularExpression:
  287         stripped.replace(rmnls, " ");
  288         break;
  289     default:
  290         break;
  291     }
  292     stripped = stripped.simplified();
  293     plength = type == FixedStringAllWords || type == FixedStringAnyWord ? 0 : stripped.length();
  294 
  295     switch (type)
  296     {
  297     case Context:
  298     case FixedString:
  299     case RegularExpression:
  300     case Wildcard:
  301         typedpattern = pattern.simplified();
  302         break;
  303     default:
  304         typedpattern = stripped;
  305         break;
  306     }
  307 #ifdef C2B_DEBUG_SEARCHING
  308     qDebug() << "searchPattern::matcher::_set_typed_pattern pattern      " << pattern << pattern.length();
  309     qDebug() << "searchPattern::matcher::_set_typed_pattern stripped     " << stripped << stripped.length() << plength;
  310     qDebug() << "searchPattern::matcher::_set_typed_pattern typedpattern " << typedpattern << typedpattern.length();
  311 #endif
  312 }
  313 
  314 QString searchPattern::matcher::_wildcard_escaped(const QString& pattern)
  315 {
  316     QString scaped;
  317     for (int i = 0; i < pattern.length(); ++i)
  318         if (pattern.at(i) == '*')
  319             scaped += ".*";
  320         else if (pattern.at(i) == '?')
  321             scaped += '.';
  322         else
  323             scaped += QRegularExpression::escape(pattern.at(i));
  324     return scaped;
  325 }