"Fossies" - the Fresh Open Source Software Archive

Member "cb2bib-2.0.1/src/c2bTests.cpp" (12 Feb 2021, 54109 Bytes) of package /linux/privat/cb2bib-2.0.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "c2bTests.cpp" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 2.0.0_vs_2.0.1.

    1 /***************************************************************************
    2  *   Copyright (C) 2004-2021 by Pere Constans
    3  *   constans@molspaces.com
    4  *   cb2Bib version 2.0.1. Licensed under the GNU GPL version 3.
    5  *   See the LICENSE file that comes with this distribution.
    6  ***************************************************************************/
    7 #define C2B_CHECK_CAPITALIZATION 0
    8 #define C2B_CHECK_CODE_DECOMPOSITION 0
    9 #define C2B_CHECK_JOURNALS 0
   10 #define C2B_CHECK_PATTERNS 0
   11 #define C2B_CHECK_PDF_AUTHORS 0
   12 #define C2B_CHECK_PDF_CODES 0
   13 #include "c2bTests.h"
   14 
   15 #include <bibParser.h>
   16 #include <document.h>
   17 #include <settings.h>
   18 #include <txtmatcher.h>
   19 
   20 #include <QCoreApplication>
   21 #include <QDataStream>
   22 #include <QSysInfo>
   23 #include <QTextStream>
   24 
   25 
   26 c2bTests::c2bTests() : cout(*(new QTextStream(stdout))), _failed_test_count(0), _test_count(0)
   27 {
   28     cout.setCodec("UTF-8");
   29 
   30     cout << "============" << endl;
   31     cout << "cb2Bib tests" << endl;
   32     cout << "============" << endl;
   33     _bibliographic_dir = QFileInfo(settings::instance()->fileName("cb2Bib/BibTeXFile")).absolutePath();
   34 }
   35 
   36 c2bTests::~c2bTests()
   37 {
   38     cout << endl;
   39     cout << "=================" << endl;
   40     cout << "cb2Bib tests done" << endl;
   41     cout << "=================" << endl << endl;
   42     delete &cout;
   43 }
   44 
   45 
   46 int c2bTests::allTests()
   47 {
   48     _test_count = 0;
   49     _failed_test_count = 0;
   50 #if C2B_CHECK_PATTERNS
   51     heuristic_check_patterns();
   52     return 0;
   53 #endif
   54 #if C2B_CHECK_JOURNALS
   55     check_abbreviations_txt();
   56     heuristic_journal();
   57 #else
   58     author_parser();
   59     heuristic_pvny();
   60     heuristic_misc();
   61     heuristic_author();
   62     heuristic_check_patterns();
   63     compression_check();
   64     write_information();
   65 #endif
   66     cout << endl << endl;
   67     cout << "Tests done. Tests: " << _test_count << " Failed: " << _failed_test_count << endl;
   68     return 0;
   69 }
   70 
   71 int c2bTests::writeInformation()
   72 {
   73     write_information();
   74     return 0;
   75 }
   76 
   77 void c2bTests::compression_check()
   78 {
   79     QString test;
   80     test += QChar(100);
   81     test += QChar(200);
   82     test += QChar(8700);
   83     test += QChar(56200);
   84     test += QChar(57000);
   85     test += QChar(10);
   86     test += QChar(100);
   87     test += QChar(200);
   88     test += QChar(8700);
   89     test += QChar(56200);
   90     test += QChar(57000);
   91     test += QChar(10);
   92     QByteArray ba = test.toUtf8();
   93     bool passed(c2bUtils::fromUtf8(ba) == QString::fromUtf8(ba));
   94     cout << endl;
   95     if (passed)
   96     {
   97         cout << "-----------------------------------------------------------------" << endl;
   98         cout << "Check for c2b utf8: passed" << endl;
   99         cout << "-----------------------------------------------------------------" << endl;
  100     }
  101     else
  102     {
  103         cout << "-----------------------------------------------------------------" << endl;
  104         cout << "WARNING: Check for c2b utf8 FAILED! Please report it as a bug" << endl;
  105         cout << "-----------------------------------------------------------------" << endl;
  106         ++_failed_test_count;
  107     }
  108     ++_test_count;
  109 
  110     test += "-----------------------------------------------------------------";
  111     test += "=================================================================";
  112     QByteArray cba(c2bUtils::compressString(test));
  113     QString dcs(c2bUtils::decompressString(cba));
  114     passed = (dcs == test) && (test.toUtf8().length() > cba.length()) && (dcs.utf16()[dcs.size()] == 0);
  115     if (passed)
  116     {
  117         cout << "-----------------------------------------------------------------" << endl;
  118         cout << "Check for compression: passed" << endl;
  119         cout << "-----------------------------------------------------------------" << endl;
  120     }
  121     else
  122     {
  123         cout << "-----------------------------------------------------------------" << endl;
  124         cout << "WARNING: Check for compression FAILED! Please report it as a bug" << endl;
  125         cout << "-----------------------------------------------------------------" << endl;
  126         ++_failed_test_count;
  127     }
  128     ++_test_count;
  129 }
  130 
  131 void c2bTests::write_information()
  132 {
  133     cout << endl;
  134     cout << "-------------------------" << endl;
  135     cout << "Miscellaneous information" << endl;
  136     cout << "-------------------------" << endl;
  137     cout << endl;
  138     cout << "Version: cb2bib " << C2B_VERSION << endl;
  139 #if defined(C2B_USE_LZSSE)
  140     cout << "Compression: lzsse" << endl;
  141 #elif defined(C2B_USE_LZ4)
  142     cout << "Compression: lz4" << endl;
  143 #elif defined(C2B_USE_LZO)
  144     cout << "Compression: lzo2" << endl;
  145 #else
  146     cout << "Compression: zlib" << endl;
  147 #endif
  148 #if QT_VERSION >= QT_VERSION_CHECK(5, 4, 0)
  149     cout << "Optimized PCRE: yes" << endl;
  150 #else
  151     cout << "Optimized PCRE: no" << endl;
  152 #endif
  153 #ifdef C2B_USE_TXTMATCHER_AVX2
  154     cout << "Optimized txtmatcher: AVX2" << endl;
  155 #endif
  156 #ifdef C2B_USE_TXTMATCHER_SSE2
  157     cout << "Optimized txtmatcher: SSE2" << endl;
  158 #endif
  159 #ifdef C2B_USE_TXTMATCHER_SCALAR
  160     cout << "Optimized txtmatcher: no" << endl;
  161 #endif
  162 #ifdef C2B_USE_QWEBKIT
  163     cout << "Compiled against QtWebKit: yes" << endl;
  164 #else
  165     cout << "Compiled against QtWebKit: no" << endl;
  166 #endif
  167 #ifdef C2B_USE_QWEBENGINE
  168     cout << "Compiled against QtWebEngine: yes" << endl;
  169 #else
  170     cout << "Compiled against QtWebEngine: no" << endl;
  171 #endif
  172 #ifdef C2B_USE_CBPOLL
  173     cout << "Clipboard polling: yes" << endl;
  174 #else
  175     cout << "Clipboard polling: no" << endl;
  176 #endif
  177     cout << "Application binary path: " << QCoreApplication::instance()->applicationDirPath() << endl;
  178     cout << "Application hardcoded data path: " << C2B_DATA_DIR << endl;
  179 #ifdef Q_OS_MACOS
  180     QString C2B_DATA_DIR_MAC(C2B_DATA_DIR);
  181     if (QDir(C2B_DATA_DIR + "/data/").exists())
  182         C2B_DATA_DIR_MAC = QDir::cleanPath(C2B_DATA_DIR + '/');
  183     else if (QDir(QCoreApplication::instance()->applicationDirPath() + "/../Resources/data/").exists())
  184         C2B_DATA_DIR_MAC = QDir::cleanPath(QCoreApplication::instance()->applicationDirPath() + "/../Resources/");
  185     cout << "Application macosx data path: " << C2B_DATA_DIR_MAC << endl;
  186 #endif
  187     cout << "\n-------------------------" << endl;
  188     cout << "OS:" << endl;
  189     cout << "Build CPU:       " << QSysInfo::buildCpuArchitecture() << endl;
  190     cout << "CPU:             " << QSysInfo::currentCpuArchitecture() << endl;
  191     cout << "Kernel Type:     " << QSysInfo::kernelType() << endl;
  192     cout << "Kernel Version:  " << QSysInfo::kernelVersion() << endl;
  193     cout << "Product Type:    " << QSysInfo::productType() << endl;
  194     cout << "Product Version: " << QSysInfo::productVersion() << '|' << int(QSysInfo::productVersion().toDouble()) << endl;
  195     cout << "Name:            " << QSysInfo::prettyProductName() << endl;
  196     cout << "-------------------------" << endl;
  197     cout << endl;
  198 }
  199 
  200 void c2bTests::heuristic_author()
  201 {
  202     cout << endl;
  203     cout << "---------------------------------" << endl;
  204     cout << "Test heuristic author recognition" << endl;
  205     cout << "---------------------------------" << endl;
  206 
  207     QFileInfoList flist = QDir(_bibliographic_dir).entryInfoList(QStringList() << "*.bib");
  208     bibParser bp;
  209     bibReference reference;
  210     QStringList fields;
  211     fields.append("author");
  212     fields.append("editor");
  213     bp.initReferenceParsing(_bibliographic_dir, fields, &reference);
  214 
  215 #if C2B_CHECK_CAPITALIZATION
  216     for (int f = 0; f < flist.count(); ++f)
  217     {
  218         const QString bib_file(flist.at(f).absoluteFilePath());
  219         bp.initReferenceParsing(_bibliographic_dir, fields, &reference);
  220         cout << "File: " << bib_file << endl;
  221         while (bp.referencesIn(c2bUtils::fileToString(bib_file), &reference))
  222         {
  223             const QString author(reference.value("author"));
  224             if (author.isEmpty())
  225                 continue;
  226             bp.clearCurrentReference();
  227             bp.guessFields(author);
  228             const QString guess(bp.currentReference().value("author"));
  229             const bool passed(guess == author);
  230             ++_test_count;
  231             if (!passed)
  232                 ++_failed_test_count;
  233             cout << QString("[%1]\t'%2'\n").arg(boolToStr(passed)).arg(author);
  234             if (!passed)
  235                 cout << QString("\t'%1'\n").arg(guess);
  236         }
  237     }
  238     return;
  239 #endif
  240 #if C2B_CHECK_CODE_DECOMPOSITION
  241     for (int u = 1; u < 78790; ++u)
  242     {
  243         const QChar c(u);
  244         if (!c.isLetter())
  245             continue;
  246         if (c.decompositionTag() == QChar::NoDecomposition)
  247             continue;
  248         const QString dc(c.decomposition());
  249         if (dc.length() != 2)
  250             continue;
  251         if (dc.at(0).isLetter() && dc.at(1).isLetter())
  252             cout << '[' << c << ']' << " u: " << u << " decompose: " << decomposition(c)
  253                  << " category: " << c.category() << endl;
  254     }
  255     return;
  256 #endif
  257 #if C2B_CHECK_PDF_CODES
  258     QMap<QChar, long long> charfreq;
  259     long long char_count = 0;
  260     for (int f = 0; f < flist.count(); ++f)
  261     {
  262         const QString bib_file(flist.at(f).absoluteFilePath());
  263         fields.append("file");
  264         bp.initReferenceParsing(_bibliographic_dir, fields, &reference);
  265         cout << "File: " << bib_file << endl;
  266         while (bp.referencesIn(c2bUtils::fileToString(bib_file), &reference))
  267         {
  268             const QString file(reference.value("file"));
  269             document d(file, document::Raw);
  270             const QString doc(d.toString());
  271             for (int i = 0; i < doc.length(); ++i)
  272             {
  273                 if (doc.at(i).unicode() > 800 && doc.at(i).unicode() < 900)
  274                 {
  275                     cout << "At file: " << file << endl;
  276                     cout << '[' << doc.at(i) << "] " << doc.mid(i - 10, 20) << endl;
  277                 }
  278                 charfreq.insert(doc.at(i), 1 + charfreq.value(doc.at(i), 0));
  279             }
  280             char_count += doc.length();
  281         }
  282     }
  283     QList<QChar> cl = charfreq.keys();
  284     for (int i = 0; i < cl.count(); ++i)
  285         cout << '[' << cl.at(i) << ']' << " u: " << cl.at(i).unicode() << " letter: " << cl.at(i).isLetter()
  286              << " decompose: " << decomposition(cl.at(i)) << " category: " << cl.at(i).category()
  287              << " f: " << charfreq.value(cl.at(i)) << endl;
  288     cout << "\nTotal characters: " << char_count << endl;
  289     return;
  290 #endif
  291 #if C2B_CHECK_PDF_AUTHORS
  292     cout << endl;
  293     cout << "----------------------------------------" << endl;
  294     cout << "Test heuristic author recognition on PDF" << endl;
  295     cout << "----------------------------------------" << endl;
  296     cout << endl;
  297     cout << "Checking files at: " << _bibliographic_dir << endl;
  298 
  299     const QStringList excluded(c2bUtils::fileToString(_bibliographic_dir + "/c2b_test_exclude.txt")
  300                                .split(QRegExp("[\\n\\r]"), QString::SkipEmptyParts));
  301     QHash<QString, QString> doc_cache;
  302     const QString doc_cache_fn(_bibliographic_dir + "/c2b_test.cache");
  303     if (QFileInfo::exists(doc_cache_fn))
  304     {
  305         cout << "Reading from cache" << endl;
  306         QFile file(doc_cache_fn);
  307         file.open(QIODevice::ReadOnly);
  308         QDataStream ds(&file);
  309         ds >> doc_cache;
  310     }
  311     else
  312     {
  313         for (int f = 0; f < flist.count(); ++f)
  314         {
  315             const QString bib_file(flist.at(f).absoluteFilePath());
  316             fields.append("file");
  317             bp.initReferenceParsing(_bibliographic_dir, fields, &reference);
  318             cout << "File: " << bib_file << endl;
  319             while (bp.referencesIn(c2bUtils::fileToString(bib_file), &reference))
  320             {
  321                 const QString file(reference.value("file"));
  322                 if (excluded.contains(file) || file.contains("/stacks/"))
  323                     continue;
  324                 document d(file, document::FirstPage);
  325                 const QString doc(d.toString());
  326                 doc_cache.insert(reference.value("file"), doc);
  327             }
  328         }
  329         if (doc_cache.size() > 0)
  330         {
  331             QFile file(doc_cache_fn);
  332             file.open(QIODevice::WriteOnly);
  333             QDataStream ds(&file);
  334             ds << doc_cache;
  335             cout << "Writing cache " << doc_cache_fn << endl;
  336             cout << "Delete cache when done" << endl;
  337         }
  338     }
  339     cout << "Check authors" << endl;
  340     int n_pdf(0);
  341     int n_failed_pdf(0);
  342     for (int f = 0; f < flist.count(); ++f)
  343     {
  344         const QString bib_file(flist.at(f).absoluteFilePath());
  345         fields.append("file");
  346         bp.initReferenceParsing(_bibliographic_dir, fields, &reference);
  347         cout << "File: " << bib_file << endl;
  348         while (bp.referencesIn(c2bUtils::fileToString(bib_file), &reference))
  349         {
  350             const QString file(reference.value("file"));
  351             if (file.isEmpty())
  352                 continue;
  353             if (excluded.contains(file) || file.contains("/stacks/"))
  354                 continue;
  355             const QString doc(doc_cache.value(reference.value("file")));
  356             QString check_doc(doc);
  357             check_doc.remove(c2bUtils::nonAsciiLetter);
  358             if (check_doc.isEmpty())
  359                 continue;
  360 
  361             const QString author(reference.value("author"));
  362             bp.clearCurrentReference();
  363             bp.guessFields(doc);
  364             const QString guess(bp.currentReference().value("author"));
  365             bool passed(guess == author);
  366             if (!passed)
  367             {
  368                 QString author_ascii(c2bUtils::toAscii(author, c2bUtils::Cleanup));
  369                 QString guess_ascii(c2bUtils::toAscii(guess, c2bUtils::Cleanup));
  370                 passed = guess_ascii == author_ascii;
  371                 if (passed)
  372                     cout << QString("[%1*] '%2'\n").arg(boolToStr(passed), author);
  373                 else
  374                     cout << QString("[%1]  '%2'\n").arg(boolToStr(passed), author);
  375                 if (!passed)
  376                 {
  377                     qDebug() << author;
  378                     qDebug() << guess;
  379                     qDebug() << reference.value("file");
  380                     qDebug() << "DOCUMENT: \n" << doc << endl << endl;
  381                     //                    for (int i = 0; i < doc.count(); ++i)
  382                     //                        cout << '[' << doc.at(i) << ']' << " u: " << doc.at(i).unicode() << "
  383                     //                        letter: " <<
  384                     //                             doc.at(i).isLetter() << endl;
  385                 }
  386                 cout << QString("          '%1'\n").arg(guess);
  387             }
  388             else
  389                 cout << QString("[%1]  '%2'\n").arg(boolToStr(passed), author);
  390             ++n_pdf;
  391             if (!passed)
  392                 ++n_failed_pdf;
  393         }
  394     }
  395     cout << "PDF Tests: " << n_pdf << endl;
  396     cout << "Succeded: " << n_pdf - n_failed_pdf << endl;
  397     _test_count += n_pdf;
  398     _failed_test_count += n_failed_pdf;
  399 #endif
  400 }
  401 
  402 void c2bTests::check_abbreviations_txt()
  403 {
  404 #if C2B_CHECK_JOURNALS
  405     cout << endl;
  406     cout << "--------------------------------" << endl;
  407     cout << "Check journal abbreviations file" << endl;
  408     cout << "--------------------------------" << endl;
  409     cout << endl;
  410 
  411     const QString dbfile(settings::instance()->fileName("cb2Bib/JournalFile"));
  412     cout << dbfile << endl;
  413     journalDB jdb(dbfile);
  414 
  415     QStringList abbr1, abbr2, full1, full2, code;
  416     {
  417         QFile file(dbfile);
  418         if (!file.open(QIODevice::ReadOnly | QIODevice::Text))
  419             return;
  420         QTextStream stream(&file);
  421         stream.setCodec("UTF-8");
  422         stream.setAutoDetectUnicode(true);
  423         QString line;
  424         int line_number(0);
  425         while (!stream.atEnd())
  426         {
  427             line = stream.readLine();
  428             ++line_number;
  429             if (line.isEmpty() || line.startsWith('#'))
  430                 continue;
  431             const QStringList spLine(line.split('|', QString::SkipEmptyParts));
  432             if (spLine.count() != 3)
  433             {
  434                 cout << "Syntax error in journal file at line " << line_number << endl;
  435                 continue;
  436             }
  437             const QStringList spAbbreviated(spLine.at(1).split('=', QString::SkipEmptyParts));
  438             const int na(spAbbreviated.count());
  439             if (na < 1 || na > 2)
  440             {
  441                 cout << "Syntax error in journal file at line " << line_number << endl;
  442                 continue;
  443             }
  444             const QStringList spExtended(spLine.at(2).split('=', QString::SkipEmptyParts));
  445             const int ne(spExtended.count());
  446             if (ne < 1 || ne > 2)
  447             {
  448                 cout << "Syntax error in journal file at line " << line_number << endl;
  449                 continue;
  450             }
  451             code += spLine.at(0);
  452             abbr1 += spAbbreviated.at(0);
  453             full1 += spExtended.at(0);
  454             abbr2 += (na == 2) ? spAbbreviated.at(1) : spAbbreviated.at(0);
  455             full2 += (ne == 2) ? spExtended.at(1) : spExtended.at(0);
  456         }
  457         file.close();
  458         const int njournals(code.count());
  459         QStringList counter;
  460         for (int i = 0; i < njournals; ++i)
  461             counter += code.at(i).toLower().remove(c2bUtils::nonLetter);
  462         counter.removeDuplicates();
  463         cout << "code total:  " << njournals << " unique: " << counter.count() << endl;
  464         counter.clear();
  465         for (int i = 0; i < njournals; ++i)
  466             counter += abbr1.at(i).toLower().remove(c2bUtils::nonLetter);
  467         counter.removeDuplicates();
  468         cout << "abbr1 total: " << njournals << " unique: " << counter.count() << endl;
  469         counter.clear();
  470         for (int i = 0; i < njournals; ++i)
  471             counter += full1.at(i).toLower().remove(c2bUtils::nonLetter);
  472         counter.removeDuplicates();
  473         cout << "full1 total: " << njournals << " unique: " << counter.count() << endl;
  474         counter.clear();
  475         for (int i = 0; i < njournals; ++i)
  476             counter += abbr2.at(i).toLower().remove(c2bUtils::nonLetter);
  477         counter.removeDuplicates();
  478         cout << "abbr2 total: " << njournals << " unique: " << counter.count() << endl;
  479         counter.clear();
  480         for (int i = 0; i < njournals; ++i)
  481             counter += full2.at(i).toLower().remove(c2bUtils::nonLetter);
  482         counter.removeDuplicates();
  483         cout << "full2 total: " << njournals << " unique: " << counter.count() << endl;
  484     }
  485     const int njournals(code.count());
  486 
  487     cout << endl;
  488     for (int i = 0; i < njournals; ++i)
  489         if (jdb.retrieve(code.at(i)) != abbr1.at(i))
  490             cout << "failed code to abbr1   " << code.at(i) << " | " << abbr1.at(i) << " | " << jdb.retrieve(code.at(i))
  491                  << endl;
  492     for (int i = 0; i < njournals; ++i)
  493         if (jdb.retrieveAlternate(code.at(i)) != abbr2.at(i))
  494             cout << "failed code to abbr2   " << code.at(i) << " | " << abbr2.at(i) << " | "
  495                  << jdb.retrieveAlternate(code.at(i)) << endl;
  496     for (int i = 0; i < njournals; ++i)
  497         if (jdb.retrieveFull(code.at(i)) != full1.at(i))
  498             cout << "failed code to full1   " << code.at(i) << " | " << full1.at(i) << " | "
  499                  << jdb.retrieveFull(code.at(i)) << endl;
  500     for (int i = 0; i < njournals; ++i)
  501         if (jdb.retrieveAlternateFull(code.at(i)) != full2.at(i))
  502             cout << "failed code to full2   " << code.at(i) << " | " << full2.at(i) << " | "
  503                  << jdb.retrieveAlternateFull(code.at(i)) << endl;
  504 
  505     cout << endl;
  506     for (int i = 0; i < njournals; ++i)
  507         if (jdb.retrieve(abbr1.at(i)) != abbr1.at(i))
  508             cout << "failed abbr1 to abbr1  " << abbr1.at(i) << " | " << abbr1.at(i) << " | "
  509                  << jdb.retrieve(abbr1.at(i)) << endl;
  510     for (int i = 0; i < njournals; ++i)
  511         if (jdb.retrieveAlternate(abbr1.at(i)) != abbr2.at(i))
  512             cout << "failed abbr1 to abbr2  " << abbr1.at(i) << " | " << abbr2.at(i) << " | "
  513                  << jdb.retrieveAlternate(abbr1.at(i)) << endl;
  514     for (int i = 0; i < njournals; ++i)
  515         if (jdb.retrieveFull(abbr1.at(i)) != full1.at(i))
  516             cout << "failed abbr1 to full1  " << abbr1.at(i) << " | " << full1.at(i) << " | "
  517                  << jdb.retrieveFull(abbr1.at(i)) << endl;
  518     for (int i = 0; i < njournals; ++i)
  519         if (jdb.retrieveAlternateFull(abbr1.at(i)) != full2.at(i))
  520             cout << "failed abbr1 to full2  " << abbr1.at(i) << " | " << full2.at(i) << " | "
  521                  << jdb.retrieveAlternateFull(abbr1.at(i)) << endl;
  522 
  523     cout << endl;
  524     for (int i = 0; i < njournals; ++i)
  525         if (jdb.retrieve(abbr2.at(i)) != abbr1.at(i))
  526             cout << "failed abbr2 to abbr1  " << abbr2.at(i) << " | " << abbr1.at(i) << " | "
  527                  << jdb.retrieve(abbr2.at(i)) << endl;
  528     for (int i = 0; i < njournals; ++i)
  529         if (jdb.retrieveAlternate(abbr2.at(i)) != abbr2.at(i))
  530             cout << "failed abbr2 to abbr2  " << abbr2.at(i) << " | " << abbr2.at(i) << " | "
  531                  << jdb.retrieveAlternate(abbr2.at(i)) << endl;
  532     for (int i = 0; i < njournals; ++i)
  533         if (jdb.retrieveFull(abbr2.at(i)) != full1.at(i))
  534             cout << "failed abbr2 to full1  " << abbr2.at(i) << " | " << full1.at(i) << " | "
  535                  << jdb.retrieveFull(abbr2.at(i)) << endl;
  536     for (int i = 0; i < njournals; ++i)
  537         if (jdb.retrieveAlternateFull(abbr2.at(i)) != full2.at(i))
  538             cout << "failed abbr2 to full2  " << abbr2.at(i) << " | " << full2.at(i) << " | "
  539                  << jdb.retrieveAlternateFull(abbr2.at(i)) << endl;
  540 
  541     cout << endl;
  542     for (int i = 0; i < njournals; ++i)
  543         if (jdb.retrieve(full1.at(i)) != abbr1.at(i))
  544             cout << "failed full1 to abbr1  " << full1.at(i) << " | " << abbr1.at(i) << " | "
  545                  << jdb.retrieve(full1.at(i)) << endl;
  546     for (int i = 0; i < njournals; ++i)
  547         if (jdb.retrieveAlternate(full1.at(i)) != abbr2.at(i))
  548             cout << "failed full1 to abbr2  " << full1.at(i) << " | " << abbr2.at(i) << " | "
  549                  << jdb.retrieveAlternate(full1.at(i)) << endl;
  550     for (int i = 0; i < njournals; ++i)
  551         if (jdb.retrieveFull(full1.at(i)) != full1.at(i))
  552             cout << "failed full1 to full1  " << full1.at(i) << " | " << full1.at(i) << " | "
  553                  << jdb.retrieveFull(full1.at(i)) << endl;
  554     for (int i = 0; i < njournals; ++i)
  555         if (jdb.retrieveAlternateFull(full1.at(i)) != full2.at(i))
  556             cout << "failed full1 to full2  " << full1.at(i) << " | " << full2.at(i) << " | "
  557                  << jdb.retrieveAlternateFull(full1.at(i)) << endl;
  558 
  559     cout << endl;
  560     for (int i = 0; i < njournals; ++i)
  561         if (jdb.retrieve(full2.at(i)) != abbr1.at(i))
  562             cout << "failed full2 to abbr1  " << full2.at(i) << " | " << abbr1.at(i) << " | "
  563                  << jdb.retrieve(full2.at(i)) << endl;
  564     for (int i = 0; i < njournals; ++i)
  565         if (jdb.retrieveAlternate(full2.at(i)) != abbr2.at(i))
  566             cout << "failed full2 to abbr2  " << full2.at(i) << " | " << abbr2.at(i) << " | "
  567                  << jdb.retrieveAlternate(full2.at(i)) << endl;
  568     for (int i = 0; i < njournals; ++i)
  569         if (jdb.retrieveFull(full2.at(i)) != full1.at(i))
  570             cout << "failed full2 to full1  " << full2.at(i) << " | " << full1.at(i) << " | "
  571                  << jdb.retrieveFull(full2.at(i)) << endl;
  572     for (int i = 0; i < njournals; ++i)
  573         if (jdb.retrieveAlternateFull(full2.at(i)) != full2.at(i))
  574             cout << "failed full2 to full2  " << full2.at(i) << " | " << full2.at(i) << " | "
  575                  << jdb.retrieveAlternateFull(full2.at(i)) << endl;
  576 #endif
  577 }
  578 
  579 void c2bTests::heuristic_journal()
  580 {
  581 #if C2B_CHECK_JOURNALS
  582     cout << endl;
  583     cout << "-----------------------------------------" << endl;
  584     cout << "Test heuristic journal recognition on PDF" << endl;
  585     cout << "-----------------------------------------" << endl;
  586     cout << endl;
  587     cout << "Checking files at: " << _bibliographic_dir << endl;
  588 
  589     QFileInfoList flist = QDir(_bibliographic_dir).entryInfoList(QStringList() << "*.bib");
  590     bibParser bp;
  591     bibReference reference;
  592     QStringList fields;
  593     fields.append("journal");
  594     fields.append("file");
  595 
  596     const QStringList excluded(c2bUtils::fileToString(_bibliographic_dir + "/c2b_test_exclude.txt")
  597                                .split(QRegExp("[\\n\\r]"), QString::SkipEmptyParts));
  598     QHash<QString, QString> doc_cache;
  599     const QString doc_cache_fn(_bibliographic_dir + "/c2b_test.cache");
  600     if (QFileInfo(doc_cache_fn).exists())
  601     {
  602         cout << "Reading from cache" << endl;
  603         QFile file(doc_cache_fn);
  604         file.open(QIODevice::ReadOnly);
  605         QDataStream ds(&file);
  606         ds >> doc_cache;
  607     }
  608     else
  609     {
  610         cout << "No cache. Return" << doc_cache_fn << endl;
  611         return;
  612     }
  613     cout << "Check journals" << endl;
  614     int n_pdf(0);
  615     int n_failed_pdf(0);
  616     QList<int> doclengths;
  617     for (int f = 0; f < flist.count(); ++f)
  618     {
  619         const QString bib_file(flist.at(f).absoluteFilePath());
  620         bp.initReferenceParsing(_bibliographic_dir, fields, &reference);
  621         cout << "File: " << bib_file << endl;
  622         while (bp.referencesIn(c2bUtils::fileToString(bib_file), &reference))
  623         {
  624             const QString file(reference.value("file"));
  625             if (file.isEmpty())
  626                 continue;
  627             if (excluded.contains(file) || file.contains("/stacks/"))
  628                 continue;
  629             const QString doc(doc_cache.value(file));
  630             QString check_doc(doc);
  631             check_doc.remove(c2bUtils::nonAsciiLetter);
  632             if (check_doc.isEmpty())
  633                 continue;
  634             doclengths.append(check_doc.length());
  635             const QString ajournal(bp.abbreviatedJournal(reference.value("journal")));
  636             const QString fjournal(bp.fullJournal(reference.value("journal")));
  637             if (fjournal.isEmpty())
  638                 continue;
  639             if (ajournal == fjournal && (ajournal.count('.') > 0 || ajournal.count(' ') > 0))
  640             {
  641                 qDebug() << "not in db:" << fjournal;
  642                 continue;
  643             }
  644             bp.clearCurrentReference();
  645             bp.guessFields(doc);
  646             // const QString guess(bp.abbreviatedJournal(bp.currentReference().value("journal")));
  647             const QString guess(bp.fullJournal(bp.currentReference().value("journal")));
  648             if (guess.isEmpty())
  649                 continue;
  650 
  651             bool passed(guess == fjournal);
  652             if (!passed)
  653             {
  654                 cout << QString("[%1]  '%2'\n").arg(boolToStr(passed)).arg(fjournal);
  655                 cout << QString("          '%1'\n").arg(guess);
  656 
  657                 qDebug() << "jn:" << fjournal;
  658                 qDebug() << "gn:" << guess;
  659                 qDebug() << reference.value("file");
  660                 qDebug() << "DOCUMENT: \n" << doc << endl << endl;
  661             }
  662             else
  663                 cout << QString("[%1]  '%2'\n").arg(boolToStr(passed)).arg(fjournal);
  664             ++n_pdf;
  665             if (!passed)
  666                 ++n_failed_pdf;
  667         }
  668     }
  669     cout << "PDF Tests: " << n_pdf << endl;
  670     cout << "Succeded:  " << n_pdf - n_failed_pdf << endl;
  671     cout << "Ratio:     " << double(n_pdf) / double(n_failed_pdf) << endl;
  672     _test_count += n_pdf;
  673     _failed_test_count += n_failed_pdf;
  674 
  675     std::sort(doclengths.begin(), doclengths.end());
  676     cout << "doclengths min max median: " << doclengths.first() << ' ' << doclengths.last() << ' '
  677          << doclengths.at(doclengths.count() / 2) << endl;
  678 #endif
  679 }
  680 
  681 void c2bTests::author_parser()
  682 {
  683     cout << endl;
  684     cout << "------------------" << endl;
  685     cout << "Test author parser" << endl;
  686     cout << "------------------" << endl;
  687 
  688     bibParser bp;
  689     QString author;
  690     QString input;
  691 
  692     author = "J.-L. Smith and R. Jones and K. McGibbons";
  693     input = "SMITH, J.-L., R. JONES, AND K. MCGIBBONS";
  694     check_test(author == bp.parse("author", input), input, author);
  695     input = "Smith, J.-L., Jones, R., and McGibbons, K.";
  696     check_test(author == bp.parse("author", input), input, author);
  697     input = "Smith, J.-L., Jones, R., McGibbons, K.";
  698     check_test(author == bp.parse("author", input), input, author);
  699 
  700     author = "J. L. Brooks Jr";
  701     input = "Brooks Jr, John L";
  702     check_test(author == bp.parse("author", input), input, author);
  703 
  704     author = "E. B. Melissa and W. F. Tohnson Jr and C. Z. Ortiz and S. J. van der Burgh";
  705     input = "Melissa, Elizabeth Baines; Tohnson, Walter F., Jr.; Zapa Ortiz, Carlos; van der Burgh, S. J.";
  706     check_test(author == bp.parse("author", input), input, author);
  707     input = "Melissa, E.B., Tohnson Jr, Walter F., Zapa Ortiz, C., van der Burgh, S. J.";
  708     check_test(author == bp.parse("author", input), input, author);
  709     input = "Melissa, E.B., Tohnson Jr, Walter F., Zapa Ortiz, C., and van der Burgh, S. J.";
  710     check_test(author == bp.parse("author", input), input, author);
  711 
  712     author = "B. B. Aaaaaaa";
  713     input = "Aaaaaaa, BB";
  714     check_test(author == bp.parse("author", input), input, author);
  715     // author = "J. R. Aaaaaaa";
  716     // input = "Aaaaaaa, JR"; // Gives wrong processing of the initials JR (set to Jr)
  717     // check_test(author == bp.parse("author", input), input, author);
  718 
  719     author = "J. Pi";
  720     input = "Joan Pi";
  721     check_test(author == bp.parse("author", input), input, author);
  722     input = "JOAN PI";
  723     check_test(author == bp.parse("author", input), input, author);
  724     author = "P. I. Joan";
  725     input = "Joan PI";
  726     check_test(author == bp.parse("author", input), input, author);
  727 
  728     author = "C. V. Pi";
  729     input = "Carles Vidal Pi";
  730     check_test(author == bp.parse("author", input), input, author);
  731     input = "Carles VIDAL PI";
  732     check_test(author == bp.parse("author", input), input, author);
  733     input = "Vidal Pi, Carles";
  734     check_test(author == bp.parse("author", input), input, author);
  735     input = "Pi, Carles Vidal";
  736     check_test(author == bp.parse("author", input), input, author);
  737     input = "PI, CARLES VIDAL";
  738     check_test(author == bp.parse("author", input), input, author);
  739 
  740     author = "C. V. Pi and M. R. Catala";
  741     input = "Carles Vidal Pi, Maria Rosa Catala";
  742     check_test(author == bp.parse("author", input), input, author);
  743     input = "Vidal Pi, Carles and Catala, Maria Rosa";
  744     check_test(author == bp.parse("author", input), input, author);
  745     input = "Vidal Pi, Carles and Catala, MR";
  746     check_test(author == bp.parse("author", input), input, author);
  747     input = "VIDAL PI, Carles and CATALA, MR";
  748     check_test(author == bp.parse("author", input), input, author);
  749     input = "Vidal Pi, Carles and Catala, M.R.";
  750     check_test(author == bp.parse("author", input), input, author);
  751 
  752     author = "C. V. Pi and M. R. Catala";
  753     input = "Carles Vidal Pi, Maria Ros Catala";
  754     check_test(author == bp.parse("author", input), input, author);
  755     input = "Carles VIDAL PI, Maria ROS CATALA";
  756     check_test(author == bp.parse("author", input), input, author);
  757     input = "Vidal Pi, Carles and Ros Catala, Maria";
  758     check_test(author == bp.parse("author", input), input, author);
  759 
  760     author = "M. R. R. Catala";
  761     input = "Maria Rosa Ros Catala";
  762     check_test(author == bp.parse("author", input), input, author);
  763     input = "Ros Catala, MR";
  764     check_test(author == bp.parse("author", input), input, author);
  765     input = "Ros Catala, M R";
  766     check_test(author == bp.parse("author", input), input, author);
  767     input = "Ros Catala, M. R.";
  768     check_test(author == bp.parse("author", input), input, author);
  769 
  770     author = "M. d'Errico";
  771     input = "Monica d'Errico";
  772     check_test(author == bp.parse("author", input), input, author);
  773 
  774     author = "J. V. Mael Jr";
  775     input = "JAMES V. MAEL, Jr.";
  776     check_test(author == bp.parse("author", input), input, author);
  777     author = "J. V. Mael III";
  778     input = "JAMES V. MAEL, III";
  779     check_test(author == bp.parse("author", input), input, author);
  780 
  781     author = "V. Yu. Dmitri and A. Karpoff";
  782     input = "VICTOR Yu. DMITRI,3 ANDRIY KARPOFF";
  783     check_test(author == bp.parse("author", input), input, author);
  784 
  785     author = "Yu. Dmitri and A. Karpoff";
  786     input = "Yu. Dmitri,3 Andriy Karpoff";
  787     check_test(author == bp.parse("author", input), input, author);
  788     input = "Yu. DMITRI,3 ANDRIY KARPOFF";
  789     check_test(author == bp.parse("author", input), input, author);
  790     input = "YU. DMITRI,3 ANDRIY KARPOFF";
  791     check_test(author == bp.parse("author", input), input, author);
  792 
  793     cout << "From medline:" << endl;
  794     author = "C. L. Maggi III";
  795     input = "Maggi, Carla L., III";
  796     check_test(author == bp.parse("author", input), input, author);
  797     input = "Maggi III CL";
  798     check_test(author == bp.parse("author", input), input, author);
  799     check_test(author == bp.parse("author", bp.authorFromMedline(input)), input, author);
  800     input = "Maggi, Carla L 3rd";
  801     check_test(author == bp.parse("author", bp.authorFromMedline(input)), input, author);
  802     input = "Maggi CL 3rd";
  803     check_test(author == bp.parse("author", bp.authorFromMedline(input)), input, author);
  804     author = "A. M. C. Lourtau";
  805     input = "Carr Lourtau, A M";
  806     check_test(author == bp.parse("author", input), input, author);
  807     check_test(author == bp.parse("author", bp.authorFromMedline(input)), input, author);
  808     input = "Carr Lourtau, AM";
  809     check_test(author == bp.parse("author", input), input, author);
  810     // input = "Carr Lourtau AM"; // Gives 'L. A. Carr' (rare AU formatting)
  811     // check_test(author == bp.parse("author", author::fromMedline(input)), input, author);
  812 
  813     author = "B. de Rivas";
  814     input = "de Rivas, Beatriz";
  815     check_test(author == bp.parse("author", bp.authorFromMedline(input)), input, author);
  816     input = "de Rivas B";
  817     check_test(author == bp.parse("author", bp.authorFromMedline(input)), input, author);
  818     author = "B. De Baets";
  819     input = "  De Baets, B  ";
  820     check_test(author == bp.parse("author", bp.authorFromMedline(input)), input, author);
  821 }
  822 
  823 void c2bTests::heuristic_pvny()
  824 {
  825     cout << endl;
  826     cout << "---------------------------------------------------------" << endl;
  827     cout << "Test heuristic pages - volume - number - year recognition" << endl;
  828     cout << "---------------------------------------------------------" << endl;
  829 
  830     bibParser bp;
  831     bibReference reference;
  832 
  833     /****************************************************************
  834     journal-pages-volume
  835     ****************************************************************/
  836     reference.clearReference();
  837     reference.insert("journal", bp.parse("journal", "Science"));
  838     reference.insert("pages", bp.parse("pages", "927"));
  839     reference.insert("volume", bp.parse("volume", "120"));
  840     write(bp, reference);
  841     heuristic(bp, reference, "Science. 120: 927");
  842     heuristic(bp, reference, "Science. 120, 927");
  843     heuristic(bp, reference, "Science 120, 927");
  844     heuristic(bp, reference, "Science 120, pp. 927");
  845 
  846     reference.clearReference();
  847     reference.insert("journal", bp.parse("journal", "Science"));
  848     reference.insert("pages", bp.parse("pages", "927 - 993"));
  849     reference.insert("volume", bp.parse("volume", "120"));
  850     write(bp, reference);
  851     heuristic(bp, reference, "Science. 120: 927 - 993");
  852     heuristic(bp, reference, "Science. 120, 927 - 93");
  853     heuristic(bp, reference, "Science 120, 927 - 993");
  854     heuristic(bp, reference, "Science 120, pp. 927 - 993");
  855 
  856     /****************************************************************
  857     journal-pages-volume-number
  858     ****************************************************************/
  859     reference.clearReference();
  860     reference.insert("journal", bp.parse("journal", "Science"));
  861     reference.insert("pages", bp.parse("pages", "927"));
  862     reference.insert("volume", bp.parse("volume", "120"));
  863     reference.insert("number", bp.parse("number", "1 - 3"));
  864     write(bp, reference);
  865     heuristic(bp, reference, "Science 120(1 - 3), 927");
  866     heuristic(bp, reference, "Science 120(1 - 3), pp. 927");
  867 
  868     reference.clearReference();
  869     reference.insert("journal", bp.parse("journal", "Science"));
  870     reference.insert("pages", bp.parse("pages", "927 - 993"));
  871     reference.insert("volume", bp.parse("volume", "120"));
  872     reference.insert("number", bp.parse("number", "1 - 3"));
  873     write(bp, reference);
  874     heuristic(bp, reference, "Science 120(1 - 3), 927 - 993");
  875     heuristic(bp, reference, "Science 120(1 - 3), pp. 927 - 993");
  876 
  877     reference.clearReference();
  878     reference.insert("journal", bp.parse("journal", "Science"));
  879     reference.insert("pages", bp.parse("pages", "927 - 993"));
  880     reference.insert("volume", bp.parse("volume", "120"));
  881     reference.insert("number", bp.parse("number", "1"));
  882     write(bp, reference);
  883     heuristic(bp, reference, "Science, Volume 120, Number 1, 927 - 993");
  884 
  885     /****************************************************************
  886     journal-pages-volume-year
  887     ****************************************************************/
  888     reference.clearReference();
  889     reference.insert("journal", bp.parse("journal", "Science"));
  890     reference.insert("pages", bp.parse("pages", "1922"));
  891     reference.insert("volume", bp.parse("volume", "120"));
  892     reference.insert("year", bp.parse("year", "2007"));
  893     write(bp, reference);
  894     heuristic(bp, reference, "Science 2007, 120, 1922");
  895     heuristic(bp, reference, "Science 2007 120: 1922");
  896     heuristic(bp, reference, "Science. 120: 1922, 2007");
  897     heuristic(bp, reference, "(2007) Science 120: 1922");
  898     heuristic(bp, reference, "Science 120, 1922 2007");
  899     heuristic(bp, reference, "Science 120, 1922(2007)");
  900     heuristic(bp, reference, "Science 120(2007) 1922");
  901     heuristic(bp, reference, "Science 2007;120:1922.");
  902     heuristic(bp, reference, "Science 2007 May 2;120:1922");
  903 
  904     reference.clearReference();
  905     reference.insert("journal", bp.parse("journal", "Science"));
  906     reference.insert("pages", bp.parse("pages", "22"));
  907     reference.insert("volume", bp.parse("volume", "120"));
  908     reference.insert("year", bp.parse("year", "2007"));
  909     write(bp, reference);
  910     heuristic(bp, reference, "Science 2007, 120, 22");
  911     heuristic(bp, reference, "Science 2007 120: 22");
  912     heuristic(bp, reference, "Science 2007 May 2, 120: 22");
  913     heuristic(bp, reference, "Science. 120: 22, 2007");
  914     heuristic(bp, reference, "(2007) Science 120: 22");
  915     heuristic(bp, reference, "Science 120, 22 2007");
  916     heuristic(bp, reference, "Science 120, 22(2007)");
  917     heuristic(bp, reference, "Science 120(2007) 22");
  918 
  919     // Usual pages
  920     reference.clearReference();
  921     reference.insert("journal", bp.parse("journal", "Science"));
  922     reference.insert("pages", bp.parse("pages", "3 - 7"));
  923     reference.insert("volume", bp.parse("volume", "120"));
  924     reference.insert("year", bp.parse("year", "2007"));
  925     write(bp, reference);
  926     heuristic(bp, reference, "Science. 120, 3 - 7(2007)");
  927     heuristic(bp, reference, "Science. 120: 3 - 7, 2007");
  928     heuristic(bp, reference, "Science, 120(2007), pp. 3 - 7");
  929     heuristic(bp, reference, "Science 120(2007) 3 - 7");
  930     heuristic(bp, reference, "2007 Science 120 3 - 7");
  931     heuristic(bp, reference, "Science. 2007, 120, 3 - 7");
  932     heuristic(bp, reference, "Science. 2007 120: 3 - 7");
  933     heuristic(bp, reference, "Science. 2007 May 2, 120: 3 - 7");
  934     heuristic(bp, reference, "Science. 2007, 120, 3 - 5pp");
  935     heuristic(bp, reference, "Science 2007;120: 3 - 7");
  936 
  937     // Pages susceptible to be confused by years
  938     reference.clearReference();
  939     reference.insert("journal", bp.parse("journal", "Science"));
  940     reference.insert("pages", bp.parse("pages", "1997 - 2001"));
  941     reference.insert("volume", bp.parse("volume", "120"));
  942     reference.insert("year", bp.parse("year", "2007"));
  943     write(bp, reference);
  944     heuristic(bp, reference, "Science. 120, 1997 - 2001(2007)");
  945     heuristic(bp, reference, "Science. 120, 1997 - 2001, 2007");
  946     heuristic(bp, reference, "Science. 120: 1997 - 2001, 2007");
  947     heuristic(bp, reference, "Science, 120(2007), pp. 1997 - 2001");
  948     heuristic(bp, reference, "Science 120(2007) 1997 - 2001");
  949     heuristic(bp, reference, "2007 Science 120 1997 - 2001");
  950     heuristic(bp, reference, "Science. 2007, 120, 1997 - 2001");
  951     heuristic(bp, reference, "Science. 2007 May 2, 120, 1997 - 2001");
  952     heuristic(bp, reference, "Science. 2007 120: 1997 - 2001");
  953     heuristic(bp, reference, "Science. 2007, 120, 1997 - 5pp");
  954     heuristic(bp, reference, "Science 2007;120: 1997 - 2001");
  955 
  956     // Pages starting by zero
  957     reference.clearReference();
  958     reference.insert("journal", bp.parse("journal", "Science"));
  959     reference.insert("pages", bp.parse("pages", "044103 - 044110"));
  960     reference.insert("volume", bp.parse("volume", "120"));
  961     reference.insert("year", bp.parse("year", "2007"));
  962     write(bp, reference);
  963     heuristic(bp, reference, "Science. 120, 044103 - 044110(2007)");
  964     heuristic(bp, reference, "Science. 120: 044103 - 044110, 2007");
  965     heuristic(bp, reference, "Science, 120(2007), pp. 044103 - 044110");
  966     heuristic(bp, reference, "Science 120(2007) 044103 - 044110");
  967     heuristic(bp, reference, "2007 Science 120 044103 - 044110");
  968     heuristic(bp, reference, "Science. 2007, 120, 044103 - 044110");
  969     heuristic(bp, reference, "Science. 2007 May 2; 120, 044103 - 044110");
  970     heuristic(bp, reference, "Science. 2007 120: 044103 - 044110");
  971     heuristic(bp, reference, "Science 2007;120: 044103 - 044110");
  972 
  973     /****************************************************************
  974     journal-pages-volume-number-year
  975     ****************************************************************/
  976     // Usual pages
  977     reference.clearReference();
  978     reference.insert("journal", bp.parse("journal", "Science"));
  979     reference.insert("pages", bp.parse("pages", "117"));
  980     reference.insert("volume", bp.parse("volume", "120"));
  981     reference.insert("number", bp.parse("number", "1"));
  982     reference.insert("year", bp.parse("year", "2007"));
  983     write(bp, reference);
  984     heuristic(bp, reference, "Science 120(1), 117(2007)");
  985     heuristic(bp, reference, "Science 120(1), 117(2007)");
  986     heuristic(bp, reference, "Science 120(1): 117, 2007");
  987     heuristic(bp, reference, "Science 2007, 120(1): 117");
  988     heuristic(bp, reference, "Science 2007 120(1): 117");
  989     heuristic(bp, reference, "Science 2007, 120(1), 117");
  990     heuristic(bp, reference, "Science, 2007, 120 (1), p 117");
  991     heuristic(bp, reference, "Science 2007 January 25; 120(1), 117");
  992     heuristic(bp, reference, "Science 2007 January 25 120(1), 117");
  993     heuristic(bp, reference, "Science 2007 May 25 120(1), 117");
  994     heuristic(bp, reference, "Science (2007), 120, 1, 117");
  995     heuristic(bp, reference, "Science, Vol. 120, No. 1 (2007) 117");
  996 
  997     reference.clearReference();
  998     reference.insert("journal", bp.parse("journal", "Science"));
  999     reference.insert("pages", bp.parse("pages", "10 - 17"));
 1000     reference.insert("volume", bp.parse("volume", "120"));
 1001     reference.insert("number", bp.parse("number", "1"));
 1002     reference.insert("year", bp.parse("year", "2007"));
 1003     write(bp, reference);
 1004     heuristic(bp, reference, "Science 120(1), 10 - 17(2007)");
 1005     heuristic(bp, reference, "Science 120(1), 10 - 17(2007)");
 1006     heuristic(bp, reference, "Science 120(1): 10 - 17, 2007");
 1007     heuristic(bp, reference, "Science 2007, 120(1): 10 - 17");
 1008     heuristic(bp, reference, "Science. 2007, 120(1): 10 - 7");
 1009     heuristic(bp, reference, "Science. 2007 120(1): 10 - 7");
 1010     heuristic(bp, reference, "Science. 2007, 120(1): 10 8pp");
 1011     heuristic(bp, reference, "Science. 2007, 120(1), 10 - 17");
 1012     heuristic(bp, reference, "Science. 2007, 120(1), pp 10 - 17");
 1013     heuristic(bp, reference, "Science. 2007 January 25, 120(1), pp 10 - 17");
 1014     heuristic(bp, reference, "Science (2007), 120, 1, pp. 10-17");
 1015 
 1016     reference.clearReference();
 1017     reference.insert("journal", bp.parse("journal", "Science"));
 1018     reference.insert("pages", bp.parse("pages", "117"));
 1019     reference.insert("volume", bp.parse("volume", "120"));
 1020     reference.insert("number", bp.parse("number", "1 - 6"));
 1021     reference.insert("year", bp.parse("year", "2007"));
 1022     write(bp, reference);
 1023     heuristic(bp, reference, "Science 120(1 - 6), 117(2007)");
 1024     heuristic(bp, reference, "Science 120(1 - 6), 117(2007)");
 1025     heuristic(bp, reference, "Science 120(1 - 6): 117, 2007");
 1026     heuristic(bp, reference, "Science 2007, 120(1 - 6): 117");
 1027     heuristic(bp, reference, "Science 2007 120(1 - 6): 117");
 1028     heuristic(bp, reference, "Science 2007 May 25 120(1 - 6): 117");
 1029 
 1030     reference.clearReference();
 1031     reference.insert("journal", bp.parse("journal", "Science"));
 1032     reference.insert("pages", bp.parse("pages", "10 - 17"));
 1033     reference.insert("volume", bp.parse("volume", "120"));
 1034     reference.insert("number", bp.parse("number", "1 - 6"));
 1035     reference.insert("year", bp.parse("year", "2007"));
 1036     write(bp, reference);
 1037     heuristic(bp, reference, "Science 120(1 - 6), 10 - 17(2007)");
 1038     heuristic(bp, reference, "Science 120(1 - 6), 10 - 17(2007)");
 1039     heuristic(bp, reference, "Science 120(1 - 6): 10 - 17, 2007");
 1040     heuristic(bp, reference, "Science 2007, 120(1 - 6): 10 - 17");
 1041     heuristic(bp, reference, "Science. 2007, 120(1 - 6): 10 - 7");
 1042     heuristic(bp, reference, "Science. 2007, 120(1 - 6): 10 8pp");
 1043     heuristic(bp, reference, "Science 2007 120(1 - 6): 10 8pp");
 1044     heuristic(bp, reference, "Science 2007 May 25 120(1 - 6): 10 8pp");
 1045 
 1046     // Pages susceptible to be confused by years
 1047     reference.clearReference();
 1048     reference.insert("journal", bp.parse("journal", "Science"));
 1049     reference.insert("pages", bp.parse("pages", "1922"));
 1050     reference.insert("volume", bp.parse("volume", "120"));
 1051     reference.insert("number", bp.parse("number", "1"));
 1052     reference.insert("year", bp.parse("year", "2007"));
 1053     write(bp, reference);
 1054     heuristic(bp, reference, "Science 120(1), 1922(2007)");
 1055     heuristic(bp, reference, "Science 120(1), 1922(2007)");
 1056     heuristic(bp, reference, "Science 120(1): 1922, 2007");
 1057     heuristic(bp, reference, "Science 2007, 120(1): 1922");
 1058     heuristic(bp, reference, "Science 2007, 120(1), 1922");
 1059     heuristic(bp, reference, "Science 2007 120(1): 1922");
 1060     heuristic(bp, reference, "Science 2007 May 25 120(1): 1922");
 1061     heuristic(bp, reference, "Science, Vol. 120, No. 1 (2007) 1922");
 1062 
 1063     reference.clearReference();
 1064     reference.insert("journal", bp.parse("journal", "Science"));
 1065     reference.insert("pages", bp.parse("pages", "1922 - 1927"));
 1066     reference.insert("volume", bp.parse("volume", "120"));
 1067     reference.insert("number", bp.parse("number", "1"));
 1068     reference.insert("year", bp.parse("year", "2007"));
 1069     write(bp, reference);
 1070     heuristic(bp, reference, "Science 120(1), 1922 - 1927(2007)");
 1071     heuristic(bp, reference, "Science 120(1), 1922 - 1927(2007)");
 1072     heuristic(bp, reference, "Science 120(1): 1922 - 1927, 2007");
 1073     heuristic(bp, reference, "Science 2007, 120(1): 1922 - 1927");
 1074     heuristic(bp, reference, "Science. 2007, 120(1): 1922 - 1927");
 1075     heuristic(bp, reference, "Science. 2007, 120(1): 1922 6pp");
 1076     heuristic(bp, reference, "Science. 2007 120(1): 1922 6pp");
 1077     heuristic(bp, reference, "Science. 2007, 120(1), 1922 - 1927");
 1078     heuristic(bp, reference, "Science. 2007, 120(1), pp 1922 - 1927");
 1079     heuristic(bp, reference, "Science. 2007 May 25, 120(1), pp 1922 - 1927");
 1080     heuristic(bp, reference, "Science (2007), 120, 1, pp. 1922-1927");
 1081     heuristic(bp, reference, "Science, Vol. 120, No. 1 (2007) 1922 - 1927");
 1082 
 1083     reference.clearReference();
 1084     reference.insert("journal", bp.parse("journal", "Science"));
 1085     reference.insert("pages", bp.parse("pages", "1922"));
 1086     reference.insert("volume", bp.parse("volume", "120"));
 1087     reference.insert("number", bp.parse("number", "1 - 6"));
 1088     reference.insert("year", bp.parse("year", "2007"));
 1089     write(bp, reference);
 1090     heuristic(bp, reference, "Science 120(1 - 6), 1922(2007)");
 1091     heuristic(bp, reference, "Science 120(1 - 6), 1922(2007)");
 1092     heuristic(bp, reference, "Science 120(1 - 6): 1922, 2007");
 1093     heuristic(bp, reference, "Science 2007, 120(1 - 6): 1922");
 1094     heuristic(bp, reference, "Science 2007 120(1 - 6): 1922");
 1095     heuristic(bp, reference, "Science 2007 May 25 120(1 - 6): 1922");
 1096 
 1097     reference.clearReference();
 1098     reference.insert("journal", bp.parse("journal", "Science"));
 1099     reference.insert("pages", bp.parse("pages", "1922 - 1927"));
 1100     reference.insert("volume", bp.parse("volume", "120"));
 1101     reference.insert("number", bp.parse("number", "1 - 6"));
 1102     reference.insert("year", bp.parse("year", "2007"));
 1103     write(bp, reference);
 1104     heuristic(bp, reference, "Science 120(1 - 6), 1922 - 1927(2007)");
 1105     heuristic(bp, reference, "Science 120(1 - 6), 1922 - 1927(2007)");
 1106     heuristic(bp, reference, "Science 120(1 - 6): 1922 - 1927, 2007");
 1107     heuristic(bp, reference, "Science 2007, 120(1 - 6): 1922 - 1927");
 1108     heuristic(bp, reference, "Science 2007 120(1 - 6): 1922 - 1927");
 1109     heuristic(bp, reference, "Science. 2007, 120(1 - 6): 1922 - 7");
 1110     heuristic(bp, reference, "Science. 2007, 120(1 - 6): 1922 6pp");
 1111     heuristic(bp, reference, "Science. 2007 May 25, 120(1 - 6): 1922 6pp");
 1112 }
 1113 
 1114 void c2bTests::heuristic_misc()
 1115 {
 1116     cout << endl;
 1117     cout << "-----------------------------" << endl;
 1118     cout << "Test miscellaneous heuristics" << endl;
 1119     cout << "-----------------------------" << endl;
 1120 
 1121     bibParser bp;
 1122     bibReference reference;
 1123 
 1124     // abstract
 1125     reference.clearReference();
 1126     reference.insert("abstract", bp.parse("abstract", "Some text."));
 1127     write(bp, reference);
 1128     heuristic(bp, reference, "...\n\nAbstract. Some text.\n");
 1129     heuristic(bp, reference, "...\n\nAbstract.\n Some text.\n");
 1130     heuristic(bp, reference, "...\n\nAbstract:\n Some text.\n");
 1131     heuristic(bp, reference, "...\n\nAbstract\n Some text.\n");
 1132     heuristic(bp, reference, "...\n\nSummary. Some text.\n");
 1133     heuristic(bp, reference, "...\n\nSummary:\n Some text.\n");
 1134     heuristic(bp, reference, "...\n\nSummary\n Some text.\n");
 1135 
 1136     // keywords
 1137     reference.clearReference();
 1138     reference.insert("keywords", bp.parse("keywords", "Some text."));
 1139     write(bp, reference);
 1140     heuristic(bp, reference, "...\n\nKeywords:\n Some text.\n");
 1141     heuristic(bp, reference, "...\n\nKeywords. Some text.\n");
 1142 
 1143     // title
 1144     reference.clearReference();
 1145     reference.insert("title", bp.parse("title", "Some text."));
 1146     write(bp, reference);
 1147     heuristic(bp, reference, "...\n\nTitle: Some text.\n");
 1148     heuristic(bp, reference, "...\n\nTitle:\n Some text.\n");
 1149 }
 1150 
 1151 void c2bTests::heuristic(bibParser& bp, const bibReference& reference, const QString& text)
 1152 {
 1153     bp.clearCurrentReference();
 1154     bp.guessFields(text);
 1155     const bool passed(bp.toBibTeX() == bp.toBibTeX(reference));
 1156     ++_test_count;
 1157     if (!passed)
 1158         ++_failed_test_count;
 1159     cout << QString("[%1]\t'%2'\n").arg(boolToStr(passed), text);
 1160     if (!passed)
 1161         qDebug() << "Guessed:\n" << bp.toBibTeX();
 1162 }
 1163 
 1164 void c2bTests::check_test(const bool passed, const QString& input, const QString& output)
 1165 {
 1166     ++_test_count;
 1167     if (!passed)
 1168         ++_failed_test_count;
 1169     cout << QString("[%1]\t'%2'\n").arg(boolToStr(passed), input);
 1170     if (!passed)
 1171         qDebug() << output;
 1172 }
 1173 
 1174 void c2bTests::write(const bibParser& bp, const bibReference& reference)
 1175 {
 1176     cout << endl;
 1177     const QStringList& bibliographicFields = bp.bibliographicFields();
 1178     for (int i = 0; i < bibliographicFields.count(); ++i)
 1179     {
 1180         const QString fvalue(reference.value(bibliographicFields.at(i)));
 1181         if (!fvalue.isEmpty())
 1182         {
 1183             const QString& fd = bibliographicFields.at(i);
 1184             const QString padding(QString().fill(' ', 12 - fd.length()));
 1185             cout << (fd + padding + " = {" + fvalue + '}') << endl;
 1186         }
 1187     }
 1188 }
 1189 
 1190 #if C2B_CHECK_PATTERNS
 1191 #include "frequentPatterns.h"
 1192 #endif
 1193 void c2bTests::heuristic_check_patterns()
 1194 {
 1195 #if C2B_CHECK_PATTERNS
 1196     frequentPatterns fp;
 1197     fp.process();
 1198     return;
 1199 #endif
 1200     cout << endl;
 1201     cout << "-----------------------------------------------------" << endl;
 1202     cout << "Check pattern clashes in heuristic author recognition" << endl;
 1203     cout << "-----------------------------------------------------" << endl;
 1204     cout << endl;
 1205     QFileInfoList flist = QDir(_bibliographic_dir).entryInfoList(QStringList() << "*.bib");
 1206     bibParser bp;
 1207     bibReference reference;
 1208     QStringList fields;
 1209     fields.append("author");
 1210     QHash<QString, int> adictionary;
 1211 
 1212     for (int f = 0; f < flist.count(); ++f)
 1213     {
 1214         const QString bib_file(flist.at(f).absoluteFilePath());
 1215         bp.initReferenceParsing(_bibliographic_dir, fields, &reference);
 1216         while (bp.referencesIn(c2bUtils::fileToString(bib_file), &reference))
 1217         {
 1218             const QString a(c2bUtils::toAscii(reference.value("author"), c2bUtils::Collation));
 1219             const QStringList as(a.split(c2bUtils::nonLetter));
 1220             for (int i = 0; i < as.count(); ++i)
 1221                 adictionary.insert(as.at(i), 1 + adictionary.value(as.at(i), 0));
 1222         }
 1223     }
 1224     QStringList word_prefix_lexicon(
 1225         c2bUtils::fileToString(":/txt/txt/word_prefix_lexicon.txt").split(c2bUtils::nonLetter, QString::SkipEmptyParts));
 1226     for (int i = 0; i < word_prefix_lexicon.count(); ++i)
 1227         word_prefix_lexicon[i].replace('_', "\\b");
 1228 
 1229     QStringList anames(adictionary.keys());
 1230     QRegExp wre;
 1231     for (int i = 0; i < word_prefix_lexicon.count(); ++i)
 1232         for (int j = 0; j < anames.count(); ++j)
 1233         {
 1234             const QString& wp = word_prefix_lexicon.at(i);
 1235             const QString& a = anames.at(j);
 1236             wre.setPattern(wp);
 1237             if (a.contains(wre))
 1238                 cout << "LEXICON CLASH: " << wp << ' ' << a << endl;
 1239         }
 1240 }
 1241 
 1242 QString c2bTests::decomposition(const QChar& c)
 1243 {
 1244     // Write char decomposition
 1245     const QString d(c.decomposition());
 1246     QString wd;
 1247     for (int i = 0; i < d.length(); ++i)
 1248         wd += '[' + d.at(i) + ']';
 1249     return wd;
 1250 }