"Fossies" - the Fresh Open Source Software Archive

Member "cb2bib-2.0.1/src/c2b/triads.cpp" (12 Feb 2021, 3042 Bytes) of package /linux/privat/cb2bib-2.0.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "triads.cpp" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 2.0.0_vs_2.0.1.

    1 /***************************************************************************
    2  *   Copyright (C) 2004-2021 by Pere Constans
    3  *   constans@molspaces.com
    4  *   cb2Bib version 2.0.1. Licensed under the GNU GPL version 3.
    5  *   See the LICENSE file that comes with this distribution.
    6  ***************************************************************************/
    7 #include "triads.h"
    8 
    9 #include "cb2bib_utilities.h"
   10 
   11 #include <QHash>
   12 
   13 
   14 const QString triads::textSignature(const QString& str)
   15 {
   16     if (str.isEmpty())
   17         return QString();
   18     else
   19         return asciiTextSignature(c2bUtils::toAscii(str, c2bUtils::KeepWords));
   20 }
   21 
   22 const QVector<QString> triads::textSignature(const QStringList& strs)
   23 {
   24     QVector<QString> s(strs.count());
   25     for (int i = 0; i < strs.count(); ++i)
   26         s[i] = textSignature(strs.at(i));
   27     return s;
   28 }
   29 
   30 const QString triads::asciiTextSignature(const QString& str)
   31 {
   32     QString s(str);
   33     s.replace(c2bUtils::nonAsciiLetter, " ");
   34     s = s.toLower();
   35 
   36     const int nletters(26);
   37     const int ngram(5);
   38     const int slength(s.length());
   39     const int code0(QChar('a').unicode());
   40     const QChar space(' ');
   41 
   42     QSet<int> triads;
   43     for (int i = 0; i <= slength - ngram; ++i)
   44     {
   45         if (s.midRef(i, ngram).contains(space))
   46             continue;
   47         const int t1(s.at(i).unicode() - code0);
   48         const int t2(s.at(i + 2).unicode() - code0);
   49         const int t3(s.at(i + 4).unicode() - code0);
   50         const int code(code0 + t1 * nletters * nletters + t2 * nletters + t3);
   51         if (!triads.contains(code))
   52             triads.insert(code);
   53     }
   54     QList<int> codes(triads.toList());
   55     std::sort(codes.begin(), codes.end());
   56     s.resize(codes.count());
   57     for (int i = 0; i < codes.count(); ++i)
   58         s[i] = QChar(codes.at(i));
   59     return s;
   60 }
   61 
   62 int triads::textFrequency(const QString& str)
   63 {
   64     const QString sign(textSignature(str));
   65     if (sign.length() == 0)
   66         return 1000 - str.length();
   67     else
   68         return _rank(str, sign, c2bUtils::fileToString(":txt/txt/triads.txt"));
   69 }
   70 
   71 const QVector<int> triads::textFrequency(const QStringList& strs)
   72 {
   73     if (strs.count() == 0)
   74         return QVector<int>();
   75     const QString ranks(c2bUtils::fileToString(":txt/txt/triads.txt"));
   76     QVector<int> fr(strs.count());
   77     for (int i = 0; i < strs.count(); ++i)
   78     {
   79         const QString sign(textSignature(strs.at(i)));
   80         if (sign.length() == 0)
   81             fr[i] = 1000 - strs.at(i).length();
   82         else
   83             fr[i] = _rank(strs.at(i), sign, ranks);
   84     }
   85     return fr;
   86 }
   87 
   88 int triads::_rank(const QString& str, const QString& sign, const QString& ranks)
   89 {
   90     const int m(sign.length());
   91     const int n(ranks.length());
   92     int counter(0);
   93     int j0(0);
   94     for (int i = 0; i < m; ++i)
   95         for (int j = j0; j < n; ++j)
   96             if (sign.at(i) == ranks.at(j))
   97             {
   98                 ++counter;
   99                 j0 = j + 1;
  100                 break;
  101             }
  102     counter = 100 * (m - counter) + str.length();
  103     return 1000 - counter;
  104 }