"Fossies" - the Fresh Open Source Software Archive

Member "cb2bib-2.0.1/src/c2b/authorString.h" (12 Feb 2021, 4302 Bytes) of package /linux/privat/cb2bib-2.0.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "authorString.h" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 2.0.0_vs_2.0.1.

    1 /***************************************************************************
    2  *   Copyright (C) 2004-2021 by Pere Constans
    3  *   constans@molspaces.com
    4  *   cb2Bib version 2.0.1. Licensed under the GNU GPL version 3.
    5  *   See the LICENSE file that comes with this distribution.
    6  ***************************************************************************/
    7 #ifndef AUTHORSTRING_H
    8 #define AUTHORSTRING_H
    9 
   10 #include "cb2bib_utilities.h"
   11 
   12 #include <QRegExp>
   13 #include <QString>
   14 #include <QStringList>
   15 
   16 
   17 namespace author
   18 {
   19 
   20 struct unifier
   21 {
   22     unifier();
   23 
   24     QString& unifyNames(QString& author) const;
   25 
   26     QString& simplifyString(QString& author, const bool full = false) const;
   27     QString fromMedline(const QString& author) const;
   28 
   29     const QString name;
   30     const QString initials;
   31 
   32     const QString double_initials;
   33     const QString prefixes;
   34 
   35     const QString reversed_romance_name;
   36 
   37     const QRegExp reversed_name_rx;
   38     const QRegExp reversed_romance_name_rx;
   39 
   40     const QRegExp unifier_rx1;
   41     const QRegExp unifier_rx2;
   42     const QRegExp unifier_rx3;
   43     const QRegExp unifier_rx4;
   44     const QRegExp unifier_rx5;
   45     const QRegExp unifier_rx6;
   46     const QRegExp unifier_rx7;
   47 
   48     const QRegExp simplify_string_rx1;
   49     const QRegExp simplify_string_rx2;
   50     const QRegExp simplify_string_rx3;
   51     const QRegExp simplify_string_rx4;
   52     const QRegExp simplify_string_rx5;
   53     const QRegExp simplify_string_rx6;
   54     const QRegExp simplify_string_rx7;
   55 };
   56 
   57 /**
   58     Implementation of author field extraction
   59     P. Constans. A Simple Extraction Procedure for Bibliographical Author Field.
   60     arXiv:0902.0755, 2009.
   61 */
   62 struct encoder
   63 {
   64     encoder() {}
   65     explicit encoder(const QString& str)
   66     {
   67         encode(str);
   68     }
   69     QString code;
   70     QString decoded(const int position, const int length) const;
   71     QStringList fragments;
   72     unifier aunifier;
   73     void clear()
   74     {
   75         code.clear();
   76         fragments.clear();
   77     }
   78     void encode(const QString& raw);
   79     void scape(const int pos, const int length, const QChar c = QChar(' '))
   80     {
   81         const int pos0(std::max(pos, 0));
   82         const int posn(std::min(pos0 + length, code.length()));
   83         for (int i = pos0; i < posn; ++i)
   84             if (code.at(i) != 'L')
   85                 code[i] = c;
   86     }
   87     void scapePattern(const QString& pattern, const QChar c = QChar(' '))
   88     {
   89         QRegExp rx(pattern, Qt::CaseSensitive);
   90         rx.setMinimal(true);
   91         rx.setPatternSyntax(QRegExp::RegExp2);
   92         if (!rx.isValid())
   93             exit(6);
   94         int pos(0);
   95         while (pos > -1)
   96         {
   97             pos = rx.indexIn(code, pos);
   98             if (pos > -1)
   99             {
  100                 scape(pos, rx.matchedLength(), c);
  101                 pos += rx.matchedLength();
  102             }
  103         }
  104     }
  105     static inline bool hasUpper(const QString& str)
  106     {
  107         for (int i = 0; i < str.length(); i++)
  108             if (str.at(i).isLetter())
  109                 if (str.at(i).category() == QChar::Letter_Uppercase)
  110                     return true;
  111         return false;
  112     }
  113     static inline bool isCapitalName(const QString& w)
  114     {
  115         return c2bUtils::isUpperCaseString(w, std::max(0, w.lastIndexOf('_', -5))); // Skip prefixes and suffixes
  116     }
  117     static inline bool isSeparator(const QString& w)
  118     {
  119         return (w == "and") || (w == "&");
  120     }
  121     static bool isAdparticle(const QString& w);
  122     static bool isInitial(const QString& w);
  123     static bool isName(const QString& w);
  124     static bool isPlainWord(const QString& w);
  125 };
  126 
  127 } // namespace author
  128 
  129 
  130 class authorString
  131 {
  132 
  133 public:
  134     authorString();
  135     inline ~authorString() {}
  136 
  137     QString toBibTeX(const QString& author, bool full_form = false);
  138     inline QString fromMedline(const QString& author) const
  139     {
  140         return au.fromMedline(author);
  141     }
  142     inline const QString& prefixes() const
  143     {
  144         return au.prefixes;
  145     }
  146 
  147 
  148 private:
  149     QString capitalize(const QString& name) const;
  150     QString processFirstMiddle(const QString& first_middle) const;
  151     bool containLowerCaseLetter(const QString& author) const;
  152     bool isReverseOrder(const QString& author) const;
  153     static bool containUpperCaseLetter(const QString& author);
  154 
  155     QString _author_string;
  156     author::unifier au;
  157     bool _full_form;
  158 };
  159 
  160 #endif