1 /*************************************************************************** 2 * Copyright (C) 2004-2021 by Pere Constans 3 * constans@molspaces.com 4 * cb2Bib version 2.0.1. Licensed under the GNU GPL version 3. 5 * See the LICENSE file that comes with this distribution. 6 ***************************************************************************/ 7 #ifndef HEURISTICBIBPARSER_H 8 #define HEURISTICBIBPARSER_H 9 10 #include "authorString.h" 11 #include "bibReference.h" 12 13 14 class bibParser; 15 class journalDB; 16 17 18 /** 19 bibParser helper class for heuristic recognition 20 21 @author Pere Constans 22 */ 23 class heuristicBibParser 24 { 25 26 public: 27 explicit heuristicBibParser(bibParser* bp); 28 ~heuristicBibParser(); 29 30 void guessFields(const QString& clean_text, const QString& tagged_text); 31 void heuristicFields(const QString& text); 32 33 34 private: 35 QRegExp* _author_b2_lc_rx; 36 QRegExp* _author_b2_uc_rx; 37 QRegExp* _author_b3_lc_rx; 38 QRegExp* _author_b3_uc_rx; 39 QRegExp* _author_b4_lc_rx; 40 QRegExp* _author_b4_uc_rx; 41 QRegExp* _author_b5_lc_rx; 42 QRegExp* _author_b5_uc_rx; 43 QRegExp _author_sb; 44 QRegExp _author_sb_lc; 45 QRegExp _author_sb_uc; 46 QRegExp _hyphens; 47 QRegExp _leading_non_letters; 48 QString _hyphen_nums; 49 QString _hyphen_pages; 50 QString guessAuthor_multi_block(); 51 QString guessAuthor_single_block(); 52 QStringList _word_prefix_lexicon; 53 author::encoder _aencoder; 54 bibParser* _bpP; 55 bibReference& _current_reference; 56 bool _reliable_number; 57 bool _reliable_pages; 58 bool _reliable_volume; 59 const QString _abstract; 60 const QString _addauthors; 61 const QString _author; 62 const QString _isbn; 63 const QString _keywords; 64 const QString _number; 65 const QString _pages; 66 const QString _title; 67 const QString _volume; 68 const QString _year; 69 const QStringList& _bibliographic_fields; 70 const journalDB& _journal_db; 71 int _max_year; 72 static int authorCount(const QString& authors); 73 void guessAbstract(const QString& text); 74 void guessAuthor(const QString& tagged_text); 75 void guessFromMetadata(const QString& text); 76 void guessISBN(const QString& text); 77 void guessJournal(const QString& text); 78 void guessKeywords(const QString& text); 79 void guessNumber(const QString& text); 80 void guessPages(const QString& text); 81 void guessTitle(const QString& text); 82 void guessVolume(const QString& text); 83 void guessVolumePagesYear(const QString& text); 84 void guessVolumeYearPages(const QString& text); 85 void guessYear(const QString& text); 86 void guessYearVolumePages(const QString& text); 87 88 inline void _debug_guess(const QString& 89 #ifdef C2B_DEBUG_HEURISTICBIBPARSER 90 procedure 91 #endif 92 ) const 93 { 94 #ifdef C2B_DEBUG_HEURISTICBIBPARSER 95 qDebug() << "[cb2bib] " << procedure; 96 for (int i = 0; i < _bibliographic_fields.count(); ++i) 97 { 98 if (!_current_reference.value(_bibliographic_fields.at(i)).isEmpty()) 99 qDebug() << QString(" %1: '%2'") 100 .arg(_bibliographic_fields.at(i)) 101 .arg(_current_reference.value(_bibliographic_fields.at(i))); 102 } 103 #endif 104 } 105 }; 106 107 #endif