"Fossies" - the Fresh Open Source Software Archive

Member "cb2bib-2.0.1/src/c2b/coreBibParser.cpp" (12 Feb 2021, 16152 Bytes) of package /linux/privat/cb2bib-2.0.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "coreBibParser.cpp" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 2.0.0_vs_2.0.1.

    1 /***************************************************************************
    2  *   Copyright (C) 2004-2021 by Pere Constans
    3  *   constans@molspaces.com
    4  *   cb2Bib version 2.0.1. Licensed under the GNU GPL version 3.
    5  *   See the LICENSE file that comes with this distribution.
    6  ***************************************************************************/
    7 #include "coreBibParser.h"
    8 
    9 
   10 coreBibParser::coreBibParser(QObject* parento)
   11     : QObject(parento),
   12       _at(QLatin1Char('@')),
   13       _close(QLatin1Char('}')),
   14       _colon(QLatin1Char(':')),
   15       _comma(QLatin1Char(',')),
   16       _cr(13),
   17       _hyphen(QLatin1Char('-')),
   18       _lca(QLatin1Char('a')),
   19       _lcz(QLatin1Char('z')),
   20       _nl(10),
   21       _open(QLatin1Char('{')),
   22       _period(QLatin1Char('.')),
   23       _pound(QLatin1Char('#')),
   24       _quote(QLatin1Char('"')),
   25       _space(QLatin1Char(' ')),
   26       _tilde(QLatin1Char('~')),
   27       _uca(QLatin1Char('A')),
   28       _ucz(QLatin1Char('Z'))
   29 {
   30     _settingsP = settings::instance();
   31 
   32     // Setting bibliographic types
   33     setTypes();
   34     // Setting bibliographic fields
   35     setFields();
   36     // Setting regular expressions
   37     setRegularExpressions();
   38 }
   39 
   40 
   41 bool coreBibParser::referencesIn(const QString& str, bibReference* ref)
   42 {
   43     // File parsing for given fields in ref
   44     ref->clearReference();
   45     const int pos(referenceStarts(str, ref->pos));
   46     if (pos < 0)
   47         return false;
   48     referenceContents(str, ref, pos);
   49     return true;
   50 }
   51 
   52 bool coreBibParser::referenceAtKey(const QString& key, const QString& str, bibReference* ref)
   53 {
   54     // File parsing for given fields in ref
   55     ref->clearReference();
   56     const int pos(referenceStarts(key, str));
   57     if (pos < 0)
   58         return false;
   59     referenceContents(str, ref, pos);
   60     return true;
   61 }
   62 
   63 QString coreBibParser::referenceToFomattedBibTeX(const bibReference& ref) const
   64 {
   65     // Writes a BibTeX string from struct bibReference
   66     QString BibString(QLatin1Char('@') + ref.typeName + QLatin1Char('{') + ref.citeidName);
   67     const bool ConvertReferenceToLaTeX(_settingsP->value("cb2Bib/ConvertReferenceToLaTeX").toBool());
   68     const bool PostprocessMonth(_settingsP->value("cb2Bib/PostprocessMonth").toBool());
   69     const bool UseDoubleBraces(_settingsP->value("cb2Bib/UseDoubleBraces").toBool());
   70     QStringList::const_iterator it = _bibliographic_fields.begin();
   71     while (it != _bibliographic_fields.end())
   72     {
   73         QString fvalue(ref.value(*it));
   74         if (!fvalue.isEmpty())
   75         {
   76             if (ConvertReferenceToLaTeX)
   77                 c2bUtils::c2bToBib(fvalue);
   78             const QString fd(*it);
   79             const QString padding(QString().fill(' ', 12 - fd.length()));
   80             if (fd == QLatin1String("title") || fd == QLatin1String("booktitle"))
   81             {
   82                 if (UseDoubleBraces)
   83                     if (!fvalue.contains(QRegExp("^\\{.+\\}$")))
   84                         fvalue = '{' + fvalue + '}';
   85                 fvalue = QLatin1Char('{') + fvalue + QLatin1Char('}');
   86             }
   87             else if (fd == QLatin1String("month"))
   88             {
   89                 if (!PostprocessMonth)
   90                     fvalue = QLatin1Char('{') + fvalue + QLatin1Char('}');
   91             }
   92             else
   93                 fvalue = QLatin1Char('{') + fvalue + QLatin1Char('}');
   94             BibString += QLatin1String(",\n") + fd + padding + QLatin1String(" = ") + fvalue;
   95         }
   96         ++it;
   97     }
   98     BibString += QLatin1String("\n}\n");
   99     return BibString;
  100 }
  101 
  102 QString coreBibParser::referenceToBibTeX(const bibReference& ref) const
  103 {
  104     // Writes a BibTeX string from struct bibReference
  105     // No special formatting is done here
  106     QString BibString(QLatin1Char('@') + ref.typeName + QLatin1Char('{'));
  107     QStringList::const_iterator it(_bibliographic_fields.begin());
  108     while (it != _bibliographic_fields.end())
  109     {
  110         const QString fvalue(ref.value(*it));
  111         if (!fvalue.isEmpty())
  112         {
  113             const QString fd(*it);
  114             const QString padding(QString().fill(QLatin1Char(' '), 12 - fd.length()));
  115             BibString += QLatin1String(",\n") + fd + padding + QLatin1String(" = {") + fvalue + QLatin1Char('}');
  116         }
  117         ++it;
  118     }
  119     BibString += QLatin1String("\n}\n");
  120     return BibString;
  121 }
  122 
  123 QString coreBibParser::adjacentNumbers(const QString& numbers) const
  124 {
  125     // Originally for pages, used also for multiple volume, number and year
  126     QString anums(numbers);
  127     anums.replace(c2bUtils::nonLetter, QLatin1String(" "));
  128     anums = c2bUtils::simplifyString(anums);
  129     if (anums.isEmpty() || anums == QLatin1String("0"))
  130         return QString();
  131     anums.replace(' ', '-');
  132     QRegExp rx1("^(\\d+)-(\\d+)-*pp$");
  133     QRegExp rx2("^(\\d+)-(\\d+)$");
  134     if (rx1.indexIn(anums) > -1)
  135     {
  136         anums = rx1.cap(1);
  137         int ilp(rx1.cap(2).toInt() - 1);
  138         if (ilp > 0)
  139             anums += QLatin1Char('-') + QString().setNum(rx1.cap(1).toInt() + ilp); // eg, 123-7pp  ->  123 - 129
  140     }
  141     else if (rx2.indexIn(anums) > -1)
  142     {
  143         QString fp(rx2.cap(1));
  144         anums = fp + '-';
  145         QString lp(rx2.cap(2));
  146         uint lfp(fp.length());
  147         uint llp(lp.length());
  148         if (lfp > llp)
  149             lp = fp.remove(lfp - llp, llp) + lp; // eg, 123-7  ->  123 - 127
  150         anums += lp;
  151     }
  152     const QString separator(_settingsP->value("cb2Bib/PageNumberSeparator").toString());
  153     if (separator.isEmpty())
  154         anums.replace('-', " - ");
  155     else
  156         anums.replace('-', separator);
  157     return anums;
  158 }
  159 
  160 void coreBibParser::setFields()
  161 {
  162     _bibliographic_fields << QLatin1String("title") << QLatin1String("author") << QLatin1String("journal")
  163                           << QLatin1String("booktitle") << QLatin1String("series") << QLatin1String("chapter")
  164                           << QLatin1String("pages") << QLatin1String("volume") << QLatin1String("number")
  165                           << QLatin1String("edition") << QLatin1String("institution") << QLatin1String("organization")
  166                           << QLatin1String("school") << QLatin1String("address") << QLatin1String("month")
  167                           << QLatin1String("year") << QLatin1String("editor") << QLatin1String("publisher")
  168                           << QLatin1String("abstract") << QLatin1String("keywords") << QLatin1String("isbn")
  169                           << QLatin1String("issn") << QLatin1String("doi") << QLatin1String("eprint")
  170                           << QLatin1String("file") << QLatin1String("url") << QLatin1String("note")
  171                           << QLatin1String("annote");
  172     _sorted_bibliographic_fields = _bibliographic_fields;
  173     std::sort(_sorted_bibliographic_fields.begin(), _sorted_bibliographic_fields.end());
  174 }
  175 
  176 void coreBibParser::setTypes()
  177 {
  178     _bibliographic_types << QLatin1String("") << QLatin1String("article") << QLatin1String("book")
  179                          << QLatin1String("booklet") << QLatin1String("conference") << QLatin1String("inbook")
  180                          << QLatin1String("incollection") << QLatin1String("inproceedings") << QLatin1String("manual")
  181                          << QLatin1String("mastersthesis") << QLatin1String("misc") << QLatin1String("periodical")
  182                          << QLatin1String("phdthesis") << QLatin1String("proceedings") << QLatin1String("techreport")
  183                          << QLatin1String("unpublished");
  184 }
  185 
  186 void coreBibParser::setRegularExpressions()
  187 {
  188     _bib_begin0_re = QRegExp("^\\s*@\\w+\\s*\\{");
  189     _bib_begin1_re = QRegExp("[\\r\\n]\\s*@\\w+\\s*\\{");
  190     _bib_begin_at = txtmatcher("@", Qt::CaseSensitive, 0);
  191     _bib_begin_re = QRegExp("^@\\w+\\s*\\{");
  192     _bib_field_delimiter = txtmatcher("=", Qt::CaseSensitive, 0);
  193     _field_re = QRegExp("\\b(" + _bibliographic_fields.join("|") + ")\\b");
  194 }
  195 
  196 void coreBibParser::initReferenceParsing(const QString& dir, const QStringList& fields, bibReference* ref)
  197 {
  198     setReferenceParsingDir(dir);
  199     // Init file parsing for given fields
  200     ref->clearFields();
  201     ref->clearReference();
  202     _current_bibliographic_fields = fields;
  203 }
  204 
  205 bibReference coreBibParser::wholeReference(const QString& str) const
  206 {
  207     // Skip positionValue as it is not needed here
  208     bibReference ref;
  209     int pos(referenceStarts(str));
  210     if (pos < 0)
  211         return ref;
  212     QString str_ref(referenceAt(str, &pos));
  213     c2bUtils::fullBibToC2b(str_ref);
  214     c2bUtils::simplifyString(str_ref);
  215     setReferenceEnd(&str_ref);
  216     _parse_reference_fields(str_ref, _bibliographic_fields, &ref);
  217     return ref;
  218 }
  219 
  220 void coreBibParser::referenceContents(const QString& str, bibReference* ref, int pos) const
  221 {
  222     // File parsing for_current_bibliographic_fields in ref
  223     ref->positionValue = pos;
  224     QString str_ref(referenceAt(str, &pos));
  225     ref->pos = pos;
  226     ref->rawReference = str_ref;
  227     c2bUtils::bibToC2b(str_ref);
  228     c2bUtils::simplifyString(str_ref);
  229     setReferenceEnd(&str_ref);
  230     ref->unicodeReference = str_ref;
  231     _parse_reference_fields(str_ref, _current_bibliographic_fields, ref);
  232     if (ref->contains(QLatin1String("file")))
  233         if (!QDir::isAbsolutePath(ref->value(QLatin1String("file"))))
  234             (*ref)[QLatin1String("file")] = _bib_file_dir + ref->value(QLatin1String("file"));
  235 }
  236 
  237 QString coreBibParser::singleReferenceField(const QString& field, const bibReference& ref) const
  238 {
  239     // Simplified _parse_reference_fields function, excludes month and keywords peculiarities
  240     const QChar* const b(ref.unicodeReference.constData());
  241     const int length(ref.unicodeReference.length());
  242     if (length == 0 || b[0] != _at || field.isEmpty())
  243         return QString();
  244     const QChar lclc(field.at(field.length() - 1).toLower()), lcuc(field.at(field.length() - 1).toUpper());
  245     int cursor(1);
  246     while (true)
  247     {
  248         const int cd(_bib_field_delimiter.indexIn(b, length, cursor));
  249         if (cd < 1)
  250             break;
  251         const int fends(b[cd - 1] == _space ? cd - 2 : cd - 1);
  252         if (b[fends] != lclc && b[fends] != lcuc)
  253         {
  254             cursor = cd + 2;
  255             continue;
  256         }
  257         for (cursor = fends; cursor > 1; --cursor)
  258             if ((b[cursor] >= _lca && b[cursor] <= _lcz) || (b[cursor] >= _uca && b[cursor] <= _ucz))
  259                 continue;
  260             else
  261                 break;
  262         if (field.compare(ref.unicodeReference.midRef(cursor + 1, fends - cursor), Qt::CaseInsensitive) != 0)
  263         {
  264             cursor = cd + 2;
  265             continue;
  266         }
  267         const int vstarts(b[cd + 1] == _space ? cd + 2 : cd + 1);
  268         const int vends(_in_braces_ends(b, vstarts, length));
  269         if (vends > 0)
  270         {
  271             const int vs(b[vstarts + 1] == _space ? vstarts + 2 : vstarts + 1);
  272             return ref.unicodeReference.mid(vs, std::max(0, vends - vs + 1));
  273         }
  274         for (cursor = vstarts; cursor < length; ++cursor)
  275             if (!b[cursor].isLetterOrNumber())
  276                 break;
  277         const int vnbends(cursor);
  278         if (b[cursor] == _space)
  279             ++cursor;
  280         if (b[cursor] == _comma)
  281             return ref.unicodeReference.mid(vstarts, std::max(0, vnbends - vstarts));
  282         cursor = cd + 2;
  283     }
  284     return QString();
  285 }
  286 
  287 static inline const QString* _ci_fields_index_of(const QStringList& fields, const ushort* const b, const int length)
  288 {
  289     for (QStringList::const_iterator i = fields.constBegin(); i != fields.constEnd(); ++i)
  290     {
  291         if (length != i->length() || (b[0] | 0x20) != i->at(0).unicode())
  292             continue;
  293         for (int l = 1; l < length; ++l)
  294             if ((b[l] | 0x20) != i->at(l).unicode())
  295                 goto next;
  296         return &(*i);
  297 next:
  298         continue;
  299     }
  300     return nullptr;
  301 }
  302 
  303 void coreBibParser::_parse_reference_fields(const QString& bibstr, const QStringList& fields, bibReference* ref) const
  304 {
  305     const QChar* const b(bibstr.constData());
  306     const int length(bibstr.length());
  307 
  308     if (length == 0 || b[0] != _at)
  309         return;
  310     int cursor;
  311     for (cursor = 1; cursor < length; ++cursor)
  312         if (!b[cursor].isLetterOrNumber())
  313             break;
  314     const int tends(cursor);
  315     if (b[cursor] == _space)
  316         ++cursor;
  317     if (b[cursor] == _open)
  318     {
  319         ref->typeName = bibstr.mid(1, tends - 1).toLower();
  320         ++cursor;
  321         if (b[cursor] == _space)
  322             ++cursor;
  323         const int kstarts(cursor);
  324         for (; cursor < length; ++cursor)
  325             if (b[cursor].isLetterOrNumber() || b[cursor] == _colon || b[cursor] == _hyphen || b[cursor] == _period)
  326                 continue;
  327             else
  328                 break;
  329         const int kends(cursor);
  330         if (b[cursor] == _space)
  331             ++cursor;
  332         if (b[cursor] == _comma)
  333             ref->citeidName = bibstr.mid(kstarts, kends - kstarts);
  334     }
  335     if (fields.isEmpty())
  336         return;
  337     while (true)
  338     {
  339         const int cd(_bib_field_delimiter.indexIn(b, length, cursor));
  340         if (cd < 1)
  341             break;
  342 
  343         const int fends(b[cd - 1] == _space ? cd - 2 : cd - 1);
  344         for (cursor = fends; cursor > 1; --cursor)
  345             if ((b[cursor] >= _lca && b[cursor] <= _lcz) || (b[cursor] >= _uca && b[cursor] <= _ucz))
  346                 continue;
  347             else
  348                 break;
  349         const QString* const fp(
  350             _ci_fields_index_of(fields, reinterpret_cast<const ushort*>(b + cursor + 1), fends - cursor));
  351         if (fp == nullptr)
  352         {
  353             cursor = cd + 2;
  354             continue;
  355         }
  356         const QString& field(*fp);
  357 
  358         const int vstarts(b[cd + 1] == _space ? cd + 2 : cd + 1);
  359         if (b[vstarts] != _open && field == QLatin1String("month"))
  360         {
  361             // Month macro
  362             for (cursor = vstarts; cursor < length; ++cursor)
  363                 if (b[cursor].isLetterOrNumber() || b[cursor] == _space || b[cursor] == _tilde || b[cursor] == _pound ||
  364                     b[cursor] == _quote)
  365                     continue;
  366                 else
  367                     break;
  368             if (b[cursor] == _comma && !ref->contains(field))
  369                 ref->insert(field, bibstr.mid(vstarts, cursor - vstarts).trimmed());
  370             cursor = cd + 2;
  371             continue;
  372         }
  373         const int vends(_in_braces_ends(b, vstarts, length));
  374         if (vends > 0)
  375         {
  376             const int vs(b[vstarts + 1] == _space ? vstarts + 2 : vstarts + 1);
  377             const int vl(vends - vs + 1);
  378             if (vl > 0)
  379             {
  380                 if (!ref->contains(field))
  381                     ref->insert(field, bibstr.mid(vs, vl));
  382                 else if (field == QLatin1String("keywords"))
  383                     ref->insert(field, ref->value(field) + QLatin1Char(';') + bibstr.mid(vs, vl));
  384             }
  385             cursor = cd + 2;
  386             continue;
  387         }
  388 
  389         for (cursor = vstarts; cursor < length; ++cursor)
  390             if (!b[cursor].isLetterOrNumber())
  391                 break;
  392         const int vnbends(cursor);
  393         if (b[cursor] == _space)
  394             ++cursor;
  395         if (b[cursor] == _comma)
  396         {
  397             const int vl(vnbends - vstarts);
  398             if (vl > 0)
  399             {
  400                 if (!ref->contains(field))
  401                     ref->insert(field, bibstr.mid(vstarts, vl));
  402                 else if (field == QLatin1String("keywords"))
  403                     ref->insert(field, ref->value(field) + QLatin1Char(';') + bibstr.mid(vstarts, vl));
  404             }
  405         }
  406         cursor = cd + 2;
  407     }
  408 }
  409 
  410 int coreBibParser::_in_braces_ends(const QChar* const b, const int p, const int length) const
  411 {
  412     if (b[p] == _open)
  413         for (int i = p + 1, open_braces = 1; i < length; ++i)
  414         {
  415             if (b[i] < _open)
  416                 continue;
  417             if (b[i] == _open)
  418                 ++open_braces;
  419             else if (b[i] == _close)
  420                 --open_braces;
  421             if (open_braces == 0)
  422                 return (b[i - 1] == _space) ? i - 2 : i - 1;
  423         }
  424     else if (b[p] == _quote)
  425         for (int i = p + 1; i < length; ++i)
  426             if (b[i] == _quote && b[i + 1] == _comma)
  427                 return (b[i - 1] == _space) ? i - 2 : i - 1;
  428     return 0;
  429 }