"Fossies" - the Fresh Open Source Software Archive

Member "cb2bib-2.0.1/src/c2b/authorString.cpp" (12 Feb 2021, 26152 Bytes) of package /linux/privat/cb2bib-2.0.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "authorString.cpp" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 2.0.0_vs_2.0.1.

    1 /***************************************************************************
    2  *   Copyright (C) 2004-2021 by Pere Constans
    3  *   constans@molspaces.com
    4  *   cb2Bib version 2.0.1. Licensed under the GNU GPL version 3.
    5  *   See the LICENSE file that comes with this distribution.
    6  ***************************************************************************/
    7 #include "authorString.h"
    8 
    9 
   10 namespace author
   11 {
   12 
   13 unifier::unifier()
   14     : name("(?:\\w[-'\\w]{1,})"),
   15       initials("(?:\\s*-{0,1}\\b\\w\\b\\.{0,1}){1,3}"),
   16       double_initials("(Al|Ch|Kh|Md|Th|Xh|Ya|Yu|Zs)"),
   17       prefixes("(da|de|dal|del|der|di|do|du|dos|el|la|le|lo|van|vande|von|zur)"),
   18       reversed_romance_name("(?:\\w[-'\\w]{1,})\\s+(?:\\w[-'\\w]{1,}),\\s*(?:\\w[-'\\w]{1,}|" + initials + ')'),
   19 
   20       reversed_name_rx('^' + name + ','),
   21       // Cases 'n1 n2, n3', 'n1 n2, n3 and n4 n5, n6', 'n1 n2, n3 and n4, n5 n6' are necessarily reverse order
   22       reversed_romance_name_rx("^(?:" + reversed_romance_name + '|' + reversed_romance_name + " and " +
   23                                reversed_romance_name + '|' + reversed_romance_name +
   24                                " and (?:\\w[-'\\w]{1,}),\\s*(?:\\w[-'\\w]{1,}|\\w[-'\\w]{1,} \\w[-'\\w]{1,}|" + initials +
   25                                "))$"),
   26 
   27       unifier_rx1("(\\w\\w)\\si\\s(\\w\\w)(?!d\\b)", Qt::CaseSensitive),
   28       unifier_rx2("\\b" + prefixes + "\\s(?!(?:,|and\\b))", Qt::CaseInsensitive),
   29       unifier_rx3("\\b" + double_initials + "\\.", Qt::CaseInsensitive),
   30       unifier_rx4("\\b(\\w[-'\\w]{2,})\\W+Jr\\.", Qt::CaseInsensitive),
   31       unifier_rx5("\\b(\\w[-'\\w]{2,})\\W+Jr\\b", Qt::CaseInsensitive),
   32       unifier_rx6("(\\w),{0,1}\\s(II|III|IV)\\b", Qt::CaseSensitive),
   33       unifier_rx7("([^\\w-])[a-z](?=[^\\w'])"),
   34 
   35       simplify_string_rx1(QString("%1(?=\\w)").arg(QChar(8217))),
   36       simplify_string_rx2("\'(?!\\w)"),
   37       simplify_string_rx3("\\d\\d+"),
   38       simplify_string_rx4("\\d(?=\\s\\w\\w)"),
   39       simplify_string_rx5("\\d[\\*,;][a-z]\\b"),
   40       simplify_string_rx6("\\d"),
   41       simplify_string_rx7("[^-',;:\\|/&\\.\\s\\w]") {}
   42 
   43 
   44 QString& unifier::unifyNames(QString& author) const
   45 {
   46     // Composite Names temporary unified
   47     author.replace(unifier_rx1, "\\1+i+\\2");
   48     author.replace(unifier_rx2, "\\1+");
   49     author.replace("Da+", "Da ", Qt::CaseSensitive);
   50     author.replace(unifier_rx3, "\\1+ ");
   51     if (author.contains("Jr", Qt::CaseInsensitive))
   52     {
   53         // Remove period and first comma if there
   54         author.replace(unifier_rx4, "\\1+JR");
   55         author.replace(unifier_rx5, "\\1+JR");
   56     }
   57     if (author.contains('I', Qt::CaseSensitive))
   58         author.replace(unifier_rx6, "\\1+\\2");
   59     author.replace('+', '_');
   60     author.replace(unifier_rx7, "\\1 "); // Cleaning affiliation 'superscripts'. Avoid cleaning 'M.-m. Lin'
   61     return author;
   62 }
   63 
   64 QString& unifier::simplifyString(QString& author, const bool full) const
   65 {
   66     if (full) // Characters | and : are used for the encoder
   67     {
   68         author.replace('|', ' ');
   69         author.replace(':', ' ');
   70     }
   71     author.replace(simplify_string_rx1, "\'"); // Normalize apostrophe
   72     author.remove(simplify_string_rx2);        // Remove spurious apostrophes
   73     author.replace(simplify_string_rx3, "/");  // Break dates, addresses, etc, but remove from author's foot notes.
   74     author.replace(simplify_string_rx4, ",");  // Help no-separator designs, and also break zip codes.
   75     author.replace(simplify_string_rx5, " ");
   76     author.remove(
   77         simplify_string_rx6); // Better remove if no conflict. It will help to not confuse with chemical formula.
   78     author.replace(simplify_string_rx7, " ");
   79     author = c2bUtils::simplifyString(author);
   80     return author;
   81 }
   82 
   83 QString unifier::fromMedline(const QString& author) const
   84 {
   85     // Preprocess Author from Medline 'AAAAAAA BB' to Aaaaaaa, BB'
   86     // which can be unambiguously translated to 'B. B. Aaaaaaa'
   87     // Takes care of cb2Bib included prefixes and suffixes
   88     // FAU  -  Foa, Edna B
   89     // AU   -  Foa EB
   90     // FAU  -  Steketee, Gail S
   91     // AU   -  Steketee GS
   92 
   93     QString FullN(author.simplified());
   94     FullN.replace(QRegExp("\\b" + prefixes + "\\s", Qt::CaseInsensitive), "\\1+");
   95     FullN.replace('+', '_');
   96     QStringList parts;
   97     QString LastN;
   98     if (FullN.contains(',')) // Some FAU are 'Last1 Last2, First'
   99     {
  100         parts = FullN.split(',', QString::SkipEmptyParts);
  101         if (parts.count() > 1)
  102             LastN = parts.takeFirst();
  103     }
  104     else
  105     {
  106         parts = FullN.split(' ', QString::SkipEmptyParts);
  107         if (parts.count() > 1)
  108             LastN = parts.takeFirst();
  109     }
  110     FullN = parts.join(" ");
  111     parts = FullN.split(' ', QString::SkipEmptyParts);
  112     if (!LastN.isEmpty())
  113         if (c2bUtils::isUpperCaseString(LastN))
  114         {
  115             LastN = LastN.toLower();
  116             LastN[0] = LastN.at(0).toUpper();
  117             int ii(LastN.indexOf(QRegExp("[-']")));
  118             if (ii++ > 0)
  119                 LastN[ii] = LastN.at(ii).toUpper();
  120         }
  121     QString FirstN;
  122     for (int i = 0; i < parts.count(); ++i)
  123         FirstN += ' ' + parts.at(i);
  124     QString isSuffix;
  125     if (parts.count() > 0)
  126         isSuffix = parts.last();
  127     if (isSuffix.contains(QRegExp("\\b(?:2nd|3rd|Jr|II|III)\\b")))
  128     {
  129         isSuffix.replace(QRegExp("\\b2nd\\b"), "II");
  130         isSuffix.replace(QRegExp("\\b3rd\\b"), "III");
  131         LastN += ' ' + isSuffix;
  132         FirstN.remove(QRegExp("\\b(?:2nd|3rd|Jr|II|III)\\b"));
  133     }
  134     LastN.replace(QRegExp(prefixes + '_', Qt::CaseInsensitive), "\\1 ");
  135     FullN = LastN + ',' + FirstN;
  136     return FullN;
  137 }
  138 
  139 
  140 /**
  141     Implementation of author field extraction
  142     P. Constans. A Simple Extraction Procedure for Bibliographical Author Field.
  143     arXiv:0902.0755, 2009.
  144 */
  145 void encoder::encode(const QString& raw)
  146 {
  147     clear();
  148     QString str(raw);
  149     aunifier.unifyNames(str);
  150     int position(0);
  151     int length(0);
  152     for (int i = 0; i < str.length(); ++i)
  153     {
  154         const QChar& si = str[i];
  155         if (si.isLetter())
  156             ++length;
  157         else if (si == '_')
  158             ++length;
  159         else if (si == '-')
  160             ++length;
  161         else if (si == '\'')
  162             ++length;
  163         else
  164         {
  165             if (length > 0)
  166                 fragments.append(str.mid(position, length));
  167             position = i + 1;
  168             length = 0;
  169             if (si != ' ')
  170                 fragments.append(str.at(i));
  171         }
  172     }
  173     if (length > 0)
  174         fragments.append(str.mid(position, length));
  175     for (int i = 0; i < fragments.count(); ++i)
  176     {
  177         const QString& w = fragments.at(i);
  178         if (isSeparator(w))
  179             code += '&';
  180         else if (isAdparticle(w))
  181             code += 'a';
  182         else if (isInitial(w))
  183             code += 'I';
  184         else if (isPlainWord(w))
  185             code += 'w';
  186         else if (isName(w))
  187         {
  188             if (isCapitalName(w))
  189                 code += 'N';
  190             else
  191                 code += 'n';
  192         }
  193         else if (w.at(0) == '.')
  194             code += 'p';
  195         else if (w.at(0) == ',')
  196             code += ',';
  197         else if (w.at(0) == ';')
  198             code += ';';
  199         else if (w.at(0) == ':')
  200             code += ':';
  201         else if (w.at(0) == '|')
  202             code += 'L';
  203         else
  204             code += 'o';
  205     }
  206     scapePattern("aL+[nN]{1,2}");
  207     scapePattern("a[nNw]&L+[nN]{1,2}"); // in Linear and / Sublinear Time
  208     scapePattern(":L+[InN]{1,2}");      // ... Structure Classification: / A Survey
  209     scapePattern("[nN]*&L[nN]L");       // Not an & for author
  210 }
  211 
  212 QString encoder::decoded(const int position, const int length) const
  213 {
  214     if (position < 0)
  215         return QString();
  216     if (length < 1 || position + length > fragments.count())
  217         return QString();
  218     QString d(fragments.at(position));
  219     for (int i = 1; i < length; ++i)
  220         d += ' ' + fragments.at(position + i);
  221     // Above extra spaces are fine, except in these cases
  222     d.replace(" . -", ".-");
  223     d.replace(" ,", ",");
  224     return d;
  225 }
  226 
  227 bool encoder::isPlainWord(const QString& w)
  228 {
  229     if (w.length() > 1)
  230     {
  231         if (w.contains('_') || w.contains('-'))
  232             return c2bUtils::isLowerCaseString(w);
  233         if (w.at(0).isLetter())
  234             if (w.at(0).category() == QChar::Letter_Lowercase)
  235                 return true;
  236     }
  237     return false;
  238 }
  239 
  240 bool encoder::isInitial(const QString& w)
  241 {
  242     if (w.length() == 1)
  243         if (w.at(0).isLetter())
  244             return w.at(0).isUpper();
  245     if (w.length() == 2)
  246         if (w.at(0) == '-')
  247             if (w.at(1).isLetter())
  248                 return true; // Chinese composite might(?) be lower
  249     return false;
  250 }
  251 
  252 bool encoder::isName(const QString& w)
  253 {
  254     if (w.length() < 2)
  255         return false;
  256     if (w.at(0).isUpper())
  257         return true;
  258     if (w.contains('_'))
  259         return hasUpper(w);
  260     return false;
  261 }
  262 
  263 bool encoder::isAdparticle(const QString& w)
  264 {
  265     const int ws(w.size());
  266     if (ws < 2)
  267         return false;
  268     if (ws > 6)
  269         return false;
  270     const QByteArray ba(w.toLatin1());
  271     const char* s = ba.data();
  272     const int ss(ws * int(sizeof(char)));
  273     if (ws == 2)
  274     {
  275         if (memcmp("of", s, ss) == 0)
  276             return true;
  277         if (memcmp("on", s, ss) == 0)
  278             return true;
  279         if (memcmp("to", s, ss) == 0)
  280             return true;
  281         if (memcmp("in", s, ss) == 0)
  282             return true;
  283         if (memcmp("as", s, ss) == 0)
  284             return true;
  285         if (memcmp("vs", s, ss) == 0)
  286             return true;
  287         if (memcmp("at", s, ss) == 0)
  288             return true;
  289         if (memcmp("is", s, ss) == 0)
  290             return true;
  291         if (memcmp("an", s, ss) == 0)
  292             return true;
  293     }
  294     if (ws == 3)
  295     {
  296         if (memcmp("for", s, ss) == 0)
  297             return true;
  298         if (memcmp("but", s, ss) == 0)
  299             return true;
  300         if (memcmp("are", s, ss) == 0)
  301             return true;
  302         if (memcmp("its", s, ss) == 0)
  303             return true;
  304         if (memcmp("the", s, ss) == 0)
  305             return true;
  306     }
  307     if (ws == 4)
  308     {
  309         if (memcmp("from", s, ss) == 0)
  310             return true;
  311         if (memcmp("with", s, ss) == 0)
  312             return true;
  313         if (memcmp("into", s, ss) == 0)
  314             return true;
  315     }
  316     if (ws == 6)
  317     {
  318         if (memcmp("within", s, ss) == 0)
  319             return true;
  320     }
  321     return false;
  322 }
  323 
  324 } // namespace author
  325 
  326 
  327 /** \page authorproc Processing of Author Names
  328 
  329     cb2Bib automatically processes the author names string. It uses a set of
  330     heuristic rules. First, the authors separator is identified. And second, it
  331     is decided whether or not author names are in natural or reverse order, or
  332     in the 'Abcd, E., F. Ghij, ...' mixed order.
  333 
  334 */
  335 authorString::authorString() : _full_form(false) {}
  336 
  337 
  338 /** \page authorproc
  339 
  340    Cleanup author string:
  341 
  342    - Escape BibTeX to Unicode
  343 
  344    - Remove digits from authors string
  345 
  346    - Remove any character except <tt>-',;&\\.\\s\\w</tt>
  347 
  348    - Simplify white spaces
  349 
  350    - Consider composing prefixes <tt>(da|de|dal|del|der|di|do|du|dos|el|la|le|lo|van|vande|von|zur)</tt>
  351 
  352    - Consider composing suffixes <tt>(II|III|IV|Jr)</tt>
  353 
  354    - Some publishers use superscripts to refer to multiple author affiliations.
  355    Text clipboard copying loses superscript formatting. Author strings are
  356    clean from 'orphan' lowcase, single letters in a preprocessing step.
  357    Everything following the pattern <b>[a-z]</b> is removed. Fortunately,
  358    abbreviated initials are most normally input as uppercase letters, thus
  359    permitting a correct superscript clean up. \n <em>Caution:</em> Lowcase,
  360    single, a to z letters are removed from author's string.\n <em>Caution:</em>
  361    Supperscripts <b>will be added to author Last Name</b> if no separation is
  362    provided. Users should care about it and correct these cases.
  363 
  364 
  365    Rules to identify separators:
  366    - Contains comma and semicolon -> ';'
  367    - Contains pattern <tt>'^Abcd, E.-F.,'</tt> -> ','
  368    - Contains pattern <tt>'^Abcd,'</tt> -> 'and'
  369    - Contains comma -> ','
  370    - Contains semicolon -> ';'
  371    - Any other -> 'and'
  372 
  373 */
  374 QString authorString::toBibTeX(const QString& author, bool full_form)
  375 {
  376     _full_form = full_form;
  377     _author_string = author;
  378     // BibTeX braces interfere with authorString, remove them even though some BibTeX meaning might be lost
  379     _author_string.remove('{');
  380     _author_string.remove('}');
  381     au.simplifyString(_author_string, true);
  382     au.unifyNames(_author_string);
  383     const bool has_comma(_author_string.contains(','));
  384     const bool has_semicolon(_author_string.contains(';'));
  385     const bool has_ands(_author_string.count(" and ") > 1);
  386     const bool is_first_reversed(_author_string.contains(au.reversed_name_rx));
  387     const bool is_special_case(_author_string.contains(au.reversed_romance_name_rx));
  388     bool is_string_reversed((has_comma && has_semicolon) || (has_comma && has_ands) || is_special_case);
  389 
  390     QString separator;
  391     if (is_special_case)
  392         separator = " and ";
  393     else if (has_comma && has_semicolon)
  394         separator = ';'; // Multiple Authors, separated by semicolon, reversed naming
  395     else if (has_comma)
  396     {
  397         if (is_first_reversed)
  398         {
  399             if (_author_string.contains(QRegExp('^' + au.name + ",(?:\\s*-{0,1}\\b\\w\\b\\.){1,3},\\s*" + au.name)))
  400             {
  401                 _author_string.replace(QRegExp("\\bJr.", Qt::CaseSensitive), "Jr");
  402                 _author_string.replace(".,", ".;");
  403                 // Reversed, comma separated 'Abrahamsson, A.-L., Springett, J., Karlsson, L., Ottosson, T.'
  404                 separator = ';';
  405                 is_string_reversed = true;
  406             }
  407             else if (_author_string.contains(QRegExp('^' + au.name + ',' + au.initials + ',')))
  408             {
  409                 _author_string.replace(QRegExp("^([-'\\w]+),"), "\\1 ");
  410                 separator = ','; // Mixed naming 'Smith, J.-L., R. Jones, and K. Gibbons'
  411             }
  412             else
  413                 separator = " and "; // Reversed naming
  414         }
  415         else if (has_ands)
  416             separator = " and ";
  417         else // Natural naming
  418             separator = ',';
  419     }
  420     else if (has_semicolon)
  421         separator = ';'; // Multiple Authors, separated by semicolon
  422     else
  423         separator = " and ";
  424     c2bUtils::debug(QObject::tr("Separator: |%1|").arg(separator));
  425     c2bUtils::debug("1--|" + _author_string + '|');
  426     _author_string.replace(QRegExp("\\band\\b", Qt::CaseInsensitive), separator);
  427     _author_string.replace(QRegExp("\\s&\\s", Qt::CaseInsensitive), separator);
  428     c2bUtils::debug("2--|" + _author_string + '|');
  429     _author_string.remove(QRegExp("[^\\w\\.]+$")); // Removing of duplicate commas and semicolons
  430     _author_string.replace(QRegExp(",\\s*"), ",");
  431     c2bUtils::debug("3--|" + _author_string + '|');
  432     _author_string.replace(QRegExp(",+"), ",");
  433     _author_string.replace(QRegExp(";\\s*"), ";");
  434     _author_string.replace(QRegExp(";+"), ";");
  435     c2bUtils::debug("4--|" + _author_string + '|');
  436     const bool are_authors_in_uppercase(containUpperCaseLetter(_author_string) &&
  437                                         !containLowerCaseLetter(_author_string));
  438     if (are_authors_in_uppercase)
  439         c2bUtils::debug("Input Authors in Uppercase");
  440     QStringList authors;
  441     if (separator == " and ")
  442         authors = _author_string.split(QRegExp("\\band\\b"));
  443     else
  444         authors = _author_string.split(separator);
  445 
  446     // Setting author ordering
  447     const QString first_author(authors.first().trimmed());
  448     bool is_current_reversed(is_string_reversed || is_first_reversed || isReverseOrder(first_author));
  449     const QString last_author(authors.last().trimmed());
  450     const bool is_last_reversed(is_string_reversed || last_author.contains(au.reversed_name_rx) ||
  451                                 isReverseOrder(last_author));
  452     const bool is_string_mixed(is_current_reversed && !is_last_reversed);
  453     if (is_string_mixed) // Mixed naming 'Smith, J., R. Jones'
  454         c2bUtils::debug("Mixed order");
  455 
  456     // Process each author name
  457     for (int ai = 0; ai < authors.count(); ++ai)
  458     {
  459         QString author_i(authors.at(ai));
  460         c2bUtils::debug(author_i);
  461         author_i.replace(QRegExp("\\.{0,1}\\s{0,1}-"), "-"); // Abbreviated cases, eg M.-H. Something
  462         author_i.replace(QRegExp("[^-'\\w,]"), " ");         // Only these characters compose a name; keep commas
  463         author_i = c2bUtils::simplifyString(author_i);
  464 
  465         // Split author name
  466         QStringList fore_name_parts;
  467         QString last_name;
  468         if (is_current_reversed)
  469         {
  470             const QStringList parts(author_i.split(',', QString::SkipEmptyParts));
  471             const int nparts(parts.count());
  472             if (nparts == 2)
  473             {
  474                 QStringList p(parts.first().split(' ', QString::SkipEmptyParts));
  475                 if (p.count() > 0)
  476                     last_name = p.takeLast();
  477                 fore_name_parts = parts.last().split(' ', QString::SkipEmptyParts) + p;
  478             }
  479             else if (nparts == 3)
  480             {
  481                 QStringList p(parts.first().split(' ', QString::SkipEmptyParts));
  482                 if (p.count() > 0)
  483                     last_name = p.takeLast();
  484                 fore_name_parts = parts.at(1).split(' ', QString::SkipEmptyParts) + p;
  485                 if (parts.last().contains(QRegExp("^(?:Jr|II|III|IV)$"))) // If otherwise, ignore it
  486                     last_name += '_' + parts.last();
  487             }
  488             else
  489             {
  490                 fore_name_parts = author_i.split(' ', QString::SkipEmptyParts);
  491                 if (fore_name_parts.count() > 0)
  492                     last_name = fore_name_parts.takeFirst();
  493             }
  494             c2bUtils::debug("Reversed order");
  495         }
  496         else
  497         {
  498             fore_name_parts = author_i.split(' ', QString::SkipEmptyParts);
  499             if (fore_name_parts.count() > 0)
  500                 last_name = fore_name_parts.takeLast();
  501             c2bUtils::debug("Natural order");
  502         }
  503 
  504         // Process first and middle names
  505         QString author_name;
  506         for (int i = 0; i < fore_name_parts.count(); ++i)
  507         {
  508             c2bUtils::debug("First and Midle: " + fore_name_parts.at(i));
  509             if (fore_name_parts.at(i).contains('-')) // Composite names
  510             {
  511                 const QStringList fnpi(fore_name_parts.at(i).split('-'));
  512                 if (fnpi.count() > 1)
  513                 {
  514                     author_name += processFirstMiddle(fnpi.at(0)) + '-';
  515                     author_name += processFirstMiddle(fnpi.at(1)) + ' '; // Shouldn't be more than 2 parts...
  516                 }
  517             }
  518             else // Regular names
  519             {
  520                 QString fore_name(fore_name_parts.at(i));
  521                 const int fore_length(fore_name.length());
  522                 const bool is_uppercase(!containLowerCaseLetter(fore_name));
  523                 if (fore_name_parts.count() == 1 && fore_length > 1 &&
  524                     !fore_name.contains(QRegExp("\\b" + au.double_initials + '_', Qt::CaseInsensitive)) &&
  525                     !are_authors_in_uppercase && is_uppercase)
  526                 {
  527                     // Cases 'Last, FST': Always abbreviated, no call to processFirstMiddle
  528                     for (int l = 0; l < fore_length; ++l)
  529                         author_name += fore_name[l] + ". ";
  530                 }
  531                 else if (fore_name_parts.count() == 2 && fore_length > 1 && fore_length < 3 && is_current_reversed &&
  532                          !are_authors_in_uppercase && is_uppercase)
  533                 {
  534                     // Cases 'Last1 Last2, FST': Always abbreviated, no call to processFirstMiddle
  535                     for (int l = 0; l < fore_length; ++l)
  536                         author_name += fore_name[l] + ". ";
  537                 }
  538                 else if (i == 1 && fore_name_parts.count() == 2 && fore_length > 1 && fore_length < 3 &&
  539                          !is_current_reversed && !are_authors_in_uppercase && is_uppercase)
  540                 {
  541                     // Cases 'Fore IJ Last': Process initials
  542                     for (int l = 0; l < fore_length; ++l)
  543                         author_name += fore_name[l] + ". ";
  544                 }
  545                 else
  546                     author_name += processFirstMiddle(fore_name) + ' ';
  547             }
  548         }
  549         // Add last name
  550         author_name += capitalize(last_name);
  551         authors[ai] = author_name;
  552         c2bUtils::debug(author_name);
  553         if (is_string_mixed) // Mixed naming 'Smith, J., R. Jones'
  554             is_current_reversed = false;
  555     }
  556 
  557     authors.removeAll(QString());
  558     _author_string = authors.join(" and ");
  559     // Restore Composite Names white spaces
  560     _author_string.replace("_i_", " i ");
  561     _author_string.replace(QRegExp("_II\\b", Qt::CaseInsensitive), " II"); // Suffix can be lower case here
  562     _author_string.replace(QRegExp("_III\\b", Qt::CaseInsensitive), " III");
  563     _author_string.replace(QRegExp("_IV\\b", Qt::CaseInsensitive), " IV");
  564     _author_string.replace(QRegExp("_JR\\b", Qt::CaseInsensitive), " Jr");
  565     _author_string.replace(QRegExp(au.prefixes + '_', Qt::CaseInsensitive), "\\1 ");
  566     _author_string.replace(QRegExp("\\b" + au.double_initials + '_', Qt::CaseInsensitive), "\\1.");
  567     _author_string = c2bUtils::simplifyString(_author_string);
  568 
  569     return _author_string;
  570 }
  571 
  572 QString authorString::processFirstMiddle(const QString& first_middle) const
  573 {
  574     // Process First and Middle parts
  575     // Abbreviates if required
  576     // Takes care of abbreviation periods
  577     QString proc_fm;
  578     if (_full_form)
  579     {
  580         if (first_middle.length() > 1)
  581             proc_fm = capitalize(first_middle);
  582         else
  583             proc_fm = first_middle + '.';
  584     }
  585     else
  586     {
  587         if (first_middle.contains('_')) // Composite names should not be abbreviated
  588         {
  589             proc_fm = capitalize(first_middle);
  590             if (first_middle.length() - first_middle.indexOf('_') == 2)
  591                 proc_fm += '.';
  592         }
  593         else if (first_middle.length() > 0)
  594             proc_fm = first_middle.at(0) + '.';
  595     }
  596     return proc_fm;
  597 }
  598 
  599 QString authorString::capitalize(const QString& name) const
  600 {
  601     // Capitalizes author's name
  602     if (name.isEmpty())
  603         return QString();
  604     QString proc_name(name);
  605     int ii(0);
  606     const int prefixes(proc_name.count(QRegExp(au.prefixes + "_(?!(?:Jr|II|III|IV)\\b)", Qt::CaseInsensitive)));
  607     for (int p = 0; p < prefixes; ++p)
  608     {
  609         const int iin(proc_name.indexOf('_', ii));
  610         if (c2bUtils::isUpperCaseString(proc_name, ii, iin))
  611             for (int i = 0; i < iin; ++i)
  612                 proc_name[i] = proc_name.at(i).toLower();
  613         ii = std::min(iin + 1, proc_name.length() - 1);
  614     }
  615     if (c2bUtils::isUpperCaseString(proc_name, ii))
  616     {
  617         proc_name[ii] = proc_name.at(ii).toUpper();
  618         for (int i = ++ii; i < proc_name.length(); ++i)
  619             proc_name[i] = proc_name.at(i).toLower();
  620         ii = proc_name.indexOf(QRegExp("[\\s-']")); // As before, assume just one part
  621         if (ii++ > 0)
  622             if (ii < proc_name.length())
  623                 proc_name[ii] = proc_name.at(ii).toUpper();
  624         if (proc_name.startsWith("Mc"))
  625             if (proc_name.length() > 4)
  626                 proc_name[2] = proc_name.at(2).toUpper();
  627     }
  628     return proc_name;
  629 }
  630 
  631 /** \page authorproc
  632 
  633     Rules to identify ordering:
  634     - Contains comma and semicolon -> Reverse
  635     - Pattern <tt>'^Abcd,'</tt> -> Reverse
  636     - Pattern <tt>'^Abcd EF Ghi'</tt> -> Natural
  637     - Pattern <tt>'^Abcd EF'</tt> -> Reverse
  638     - Pattern <tt>'^Abcd E.F.'</tt> -> Reverse
  639     - Any other pattern -> Natural
  640 
  641 */
  642 bool authorString::isReverseOrder(const QString& author) const
  643 {
  644     // Returns true if Author Name is in reversed order as "Him DF, Her SR, "
  645     // ISI doesn't contain point - return for safety
  646     // Consider "Him DF Last"
  647     const QString author_line(author.simplified());
  648     QRegExp rRevNISI("^([-'\\w]+) ((\\w\\.\\s*)+)$");
  649     rRevNISI.setMinimal(false);
  650     if (rRevNISI.indexIn(author_line) > -1)
  651     {
  652         const QString Last(rRevNISI.cap(3));
  653         if (Last != "and")
  654             return true;
  655     }
  656     if (author_line.contains('.'))
  657         return false;
  658     rRevNISI = QRegExp("^([-'\\w]+) ([-'\\w]+) ([-'\\w]+)");
  659     rRevNISI.setMinimal(false);
  660     if (rRevNISI.indexIn(author_line) > -1)
  661     {
  662         const QString Last(rRevNISI.cap(3));
  663         if (Last != "and")
  664             return false;
  665     }
  666     rRevNISI = QRegExp("^([-'\\w]+) ([-\\w]{1,3})$"); // Consider only 1 to 3 initials
  667     rRevNISI.setMinimal(false);
  668     if (rRevNISI.indexIn(author_line) > -1)
  669     {
  670         const QString Last(rRevNISI.cap(1));
  671         const QString First(rRevNISI.cap(2));
  672         c2bUtils::debug(QObject::tr("ISI:  |%1| |%2|").arg(Last, First));
  673         if (containLowerCaseLetter(First))
  674             return false;
  675         if (!containLowerCaseLetter(Last))
  676             return false;
  677         return true;
  678     }
  679     return false;
  680 }
  681 
  682 bool authorString::containLowerCaseLetter(const QString& author) const
  683 {
  684     QString author_line(author);
  685     author_line.remove(QRegExp("\\band\\b"));                                 // Remove possible 'and' separator
  686     author_line.remove(QRegExp(au.prefixes + '_', Qt::CaseInsensitive));      // Remove possible prefixes
  687     author_line.remove(QRegExp(au.double_initials + '_', Qt::CaseSensitive)); // Remove possible two-letter initials
  688     for (int i = 0; i < author_line.length(); i++)
  689     {
  690         if (author_line.at(i).isLetter())
  691             if (author_line.at(i).category() == QChar::Letter_Lowercase)
  692                 return true;
  693     }
  694     return false;
  695 }
  696 
  697 bool authorString::containUpperCaseLetter(const QString& author)
  698 {
  699     for (int i = 0; i < author.length(); i++)
  700     {
  701         if (author.at(i).isLetter())
  702             if (author.at(i).category() == QChar::Letter_Uppercase)
  703                 return true;
  704     }
  705     return false;
  706 }