"Fossies" - the Fresh Open Source Software Archive 
Member "cb2bib-2.0.1/src/c2b/networkQuery.cpp" (12 Feb 2021, 16840 Bytes) of package /linux/privat/cb2bib-2.0.1.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "networkQuery.cpp" see the
Fossies "Dox" file reference documentation and the latest
Fossies "Diffs" side-by-side code changes report:
2.0.0_vs_2.0.1.
1 /***************************************************************************
2 * Copyright (C) 2004-2021 by Pere Constans
3 * constans@molspaces.com
4 * cb2Bib version 2.0.1. Licensed under the GNU GPL version 3.
5 * See the LICENSE file that comes with this distribution.
6 ***************************************************************************/
7 #include "networkQuery.h"
8
9 #include "bibParser.h"
10 #include "cb2bib_utilities.h"
11 #include "network.h"
12 #include "settings.h"
13
14 #include <QTimer>
15
16
17 networkQuery::networkQuery(bibParser* bp, QObject* parento) : QObject(parento), _bpP(bp)
18 {
19 Q_ASSERT_X(_bpP, "networkQuery", "bibParser was not instantiated");
20 _networkP = new network(this);
21 init();
22 }
23
24 networkQuery::networkQuery(bibParser* bp, network* net, QObject* parento) : QObject(parento), _bpP(bp), _networkP(net)
25 {
26 Q_ASSERT_X(_bpP, "networkQuery", "bibParser was not instantiated");
27 Q_ASSERT_X(_networkP, "networkQuery", "network was not instantiated");
28 init();
29 }
30
31
32 void networkQuery::init()
33 {
34 _settingsP = settings::instance();
35 _settingsP->setValue("networkQuery/isSupervised", true);
36 _networkquery_tmp_fn1 = _settingsP->tempPath() + "/cb2bib_query_tmp_html1_" + _settingsP->applicationPid();
37 _networkquery_tmp_fn2 = _settingsP->tempPath() + "/cb2bib_query_tmp_html2_" + _settingsP->applicationPid();
38 _timing = QDateTime::currentDateTime();
39 }
40
41 void networkQuery::submitQuery(const bibReference& reference, const QString& raw_reference, const bool check_document)
42 {
43 // Submission Initialization
44 _error_string.clear();
45 _query_parameter_count.clear();
46 _is_end_of_file = false;
47 _pdfurl_is_captured = false;
48 _check_document = check_document && _settingsP->value("cb2Bib/AutomaticPdfDownload").toBool();
49
50 // Query data
51 _Qtitle = reference.value("title");
52 _Qjournal = _bpP->fullJournal(reference.value("journal"));
53 _Qvolume = reference.value("volume");
54 _Qpage = c2bUtils::firstPage(reference.value("pages"));
55 _Qdoi = reference.value("doi").trimmed();
56 if (!_Qdoi.isEmpty() && !_Qdoi.contains(QRegExp("^10\\.[\\d\\.]+/\\S+$")))
57 {
58 _Qdoi.clear();
59 c2bUtils::warn(tr("Warning: DOI skipped: '%1' is not a valid DOI").arg(_Qdoi));
60 }
61 _Qauthor = reference.value("author");
62 _Qauthor.replace(QRegExp("(?:\\b\\w\\b|\\band\\b|\\W)"), " ");
63 _Qauthor = c2bUtils::simplifyString(_Qauthor);
64 _Qexcerpt = _bpP->excerpt(raw_reference, QStringList() << _Qdoi << _Qauthor << _Qtitle);
65 _Qeprint = reference.value("eprint").trimmed();
66
67 _raw_reference = raw_reference;
68 // Remove raw metadata in case user repeats the network query
69 _raw_reference.remove(QRegExp("\\n\\[Raw Metadata.+$"));
70
71 if (_timing.msecsTo(QDateTime::currentDateTime()) < 550)
72 QTimer::singleShot(550, this, SLOT(submitQuery1()));
73 else
74 submitQuery1();
75 }
76
77 void networkQuery::submitQuery1()
78 {
79 _raw_metadata.clear();
80
81 // Submission, first step, setting journal codes
82 if (!setQueryParameters())
83 {
84 _error_string = tr("No data for query.");
85 emit queryEnded(false, _targetQ, _networkquery_tmp_fn1);
86 return;
87 }
88 if (_is_end_of_file)
89 {
90 _error_string = tr("Performed %1 queries: No reference found.").arg(_query_parameter_count.count());
91 emit queryEnded(false, _targetQ, _networkquery_tmp_fn1);
92 return;
93 }
94
95 c2bUtils::debug(tr("Query Number = %1").arg(_query_parameter_count.count()));
96 c2bUtils::debug(tr("targetQ[%1]").arg(_targetQ));
97 c2bUtils::debug(tr("captionQ[%1]").arg(_captionQ));
98 c2bUtils::debug(tr("referenceurl_prefix[%1]").arg(_referenceurl_prefix));
99 c2bUtils::debug(tr("referenceurl_sufix[%1]").arg(_referenceurl_sufix));
100 c2bUtils::debug(tr("pdfurl_prefix[%1]").arg(_pdfurl_prefix));
101 c2bUtils::debug(tr("pdfurl_sufix[%1]").arg(_pdfurl_sufix));
102 c2bUtils::debug(tr("action[%1]").arg(_action));
103 c2bUtils::debug(tr("POST1[%1]").arg(_targetQ));
104
105 if (_action == "browse_query")
106 {
107 if (openFile(encodeUrl(_targetQ)))
108 {
109 _error_string = tr("Browsing query.");
110 emit queryEnded(true, QString(), QString());
111 }
112 else
113 {
114 _error_string = tr("Could not open URL '%1'.").arg(encodeUrl(_targetQ));
115 emit queryEnded(false, QString(), QString());
116 }
117 return;
118 }
119
120 _timing = QDateTime::currentDateTime();
121 if (_action == "htm2txt_query")
122 emit statusMessage(tr("Importing: %1.").arg(_targetQ));
123 else
124 emit statusMessage(tr("Query: %1.").arg(_targetQ));
125 _networkP->getFile(_targetQ, _networkquery_tmp_fn1, network::Copy, this, SLOT(submitQuery2(bool)),
126 !_settingsP->value("cb2Bib/KeepTmpNQFiles").toBool());
127 }
128
129 void networkQuery::submitQuery2(bool succeeded)
130 {
131 // Submission, second part: check query replay and PDF existence
132 if (!succeeded)
133 {
134 _error_string = _networkP->errorString();
135 emit statusMessage(tr("Query failed with %1.").arg(_error_string));
136 QTimer::singleShot(10, this, SLOT(submitQuery1()));
137 return;
138 }
139
140 QString lines(c2bUtils::fileToString(_networkquery_tmp_fn1, !_settingsP->value("cb2Bib/KeepTmpNQFiles").toBool()));
141
142 // For the single query cases with no htm2txt_query and referenceurl_prefix use
143 // non empty capture_from_query to check for result availability
144 QString captured;
145 if (!_captionQ.isEmpty())
146 {
147 QRegExp rx(_captionQ);
148 rx.setMinimal(true);
149 if (!rx.isValid())
150 c2bUtils::warn(tr("Warning: RegExp '%1' is not valid").arg(_captionQ));
151 const int ncap(rx.indexIn(lines));
152 if (ncap == -1)
153 {
154 QTimer::singleShot(10, this, SLOT(submitQuery1()));
155 return;
156 }
157 captured = fromHtmlString(rx.cap(1));
158 c2bUtils::debug(tr("CAPTURED[%1]").arg(captured));
159 }
160 if (_action == "htm2txt_query")
161 {
162 _error_string = tr("Importing query URL.");
163 emit queryEnded(true, QString(), fromHtmlString(lines, true));
164 return;
165 }
166 if (_action == "merge_all_metadata")
167 _raw_metadata = _raw_reference + c2bUtils::metadatasection.arg(lines.trimmed());
168 else if (_action == "merge_referenceurl_metadata")
169 _raw_metadata = _raw_reference;
170 else
171 _raw_metadata = c2bUtils::metadatasection.arg(lines.trimmed());
172 if (_referenceurl_prefix.isEmpty() && _pdfurl_prefix.isEmpty())
173 {
174 _error_string = tr("Importing query URL.");
175 emit queryEnded(true, QString(), _raw_metadata);
176 return;
177 }
178 if (captured.isEmpty())
179 {
180 QTimer::singleShot(10, this, SLOT(submitQuery1()));
181 return;
182 }
183 if (_referenceurl_prefix.isEmpty())
184 _targetBib.clear();
185 else
186 _targetBib = _referenceurl_prefix + captured + _referenceurl_sufix;
187 if (_check_document && !_pdfurl_prefix.isEmpty())
188 {
189 if (_pdfurl_is_captured)
190 _targetPDF = _pdfurl_prefix + _pdfurl_sufix;
191 else
192 _targetPDF = _pdfurl_prefix + captured + _pdfurl_sufix;
193 emit statusMessage(tr("Checking: %1").arg(_targetPDF));
194 _networkP->headFile(_targetPDF, this, SLOT(submitQuery3(bool)));
195 }
196 else
197 submitQuery3(false);
198 }
199
200 void networkQuery::submitQuery3(bool succeeded)
201 {
202 // Submission, third part: extracting reference location
203 if (!succeeded || !_networkP->mimetypeString().contains(QRegExp("\\b(chm|djvu|pdf|ps)\\b")))
204 _targetPDF.clear();
205 else
206 _targetPDF = _networkP->sourceFilename();
207
208 if (_referenceurl_prefix.isEmpty())
209 {
210 emit queryEnded(true, _targetPDF, _raw_metadata);
211 return;
212 }
213
214 c2bUtils::debug(tr("POST2[%1]").arg(_targetBib));
215 c2bUtils::debug(tr("POST3[%1]").arg(_targetPDF));
216
217 if (_action == "browse_referenceurl")
218 {
219 if (openFile(encodeUrl(_targetBib)))
220 {
221 _error_string = tr("Browsing reference.");
222 emit queryEnded(true, QString(), QString());
223 }
224 else
225 {
226 _error_string = tr("Could not open URL '%1'.").arg(encodeUrl(_targetBib));
227 emit queryEnded(false, QString(), QString());
228 }
229 return;
230 }
231 if (_action == "htm2txt_referenceurl")
232 emit statusMessage(tr("Importing: %1.").arg(_targetBib));
233 else
234 emit statusMessage(tr("Retrieving: %1.").arg(_targetBib));
235 _networkP->getFile(_targetBib, _networkquery_tmp_fn2, network::Copy, this, SLOT(queryDone(bool)),
236 !_settingsP->value("cb2Bib/KeepTmpNQFiles").toBool());
237 }
238
239 void networkQuery::queryDone(bool succeeded)
240 {
241 // Submission Done
242 if (!succeeded)
243 {
244 QTimer::singleShot(10, this, SLOT(submitQuery1()));
245 return;
246 }
247 QString lines(c2bUtils::fileToString(_networkquery_tmp_fn2, !_settingsP->value("cb2Bib/KeepTmpNQFiles").toBool()));
248 if (_action == "htm2txt_referenceurl")
249 {
250 _error_string = tr("Importing reference URL.");
251 emit queryEnded(true, _targetPDF, fromHtmlString(lines, true));
252 return;
253 }
254 if (_action == "merge_all_metadata" || _action == "merge_referenceurl_metadata")
255 _raw_metadata += c2bUtils::metadatasection.arg(lines.trimmed());
256 else
257 _raw_metadata = c2bUtils::metadatasection.arg(lines.trimmed());
258 emit queryEnded(true, _targetPDF, _raw_metadata);
259 }
260
261 bool networkQuery::setQueryParameters()
262 {
263 if (!checkQueryFile(_settingsP->fileName("cb2Bib/NetworkFile")))
264 return false;
265
266 QFile file(_settingsP->fileName("cb2Bib/NetworkFile"));
267 file.open(QIODevice::ReadOnly | QIODevice::Text);
268 QTextStream stream(&file);
269 stream.setCodec("UTF-8");
270 stream.setAutoDetectUnicode(true);
271 QRegExp Journal("journal=" + _Qjournal + "\\|");
272 QRegExp AnyJournal("journal=\\s*$");
273 uint readQueryParams(0);
274 _is_end_of_file = false;
275 QString line;
276 while (!stream.atEnd())
277 {
278 line = stream.readLine();
279 if (line.startsWith("%c2b_stop_parsing"))
280 break;
281 // Skip comments and blanks
282 if (!(line.isEmpty() || line.contains(QRegExp("^#"))))
283 {
284 if (line.contains(Journal))
285 {
286 const QStringList lc(line.split('|'));
287 if (lc.count() > 1)
288 _QjournalCode = lc.at(1);
289 else
290 _QjournalCode.clear();
291 }
292 else if (line.contains(AnyJournal))
293 {
294 _QjournalCode = _Qjournal;
295 _QjournalCode.replace(" & ", " and "); // Avoid sending '&' to confuse URLs
296 _QjournalCode.replace(QRegExp("\\W"), " ");
297 _QjournalCode = _QjournalCode.simplified();
298 _QjournalCode.replace(" ", "+");
299 }
300 // Get appropiate parameters for Journal or AnyJournal
301 if (line.contains(Journal) || line.contains(AnyJournal))
302 {
303 // Skip if already performed
304 if (!_query_parameter_count.contains(++readQueryParams))
305 {
306 while (line.contains(QRegExp("^journal=")))
307 line = stream.readLine();
308 _targetQ = line.remove(QRegExp("^query="));
309 line = stream.readLine();
310 _captionQ = line.remove(QRegExp("^capture_from_query="));
311 line = stream.readLine();
312 _referenceurl_prefix = line.remove(QRegExp("^referenceurl_prefix="));
313 line = stream.readLine();
314 _referenceurl_sufix = line.remove(QRegExp("^referenceurl_sufix="));
315 line = stream.readLine();
316 _pdfurl_prefix = line.remove(QRegExp("^pdfurl_prefix="));
317 line = stream.readLine();
318 _pdfurl_sufix = line.remove(QRegExp("^pdfurl_sufix="));
319 line = stream.readLine();
320 _action = line.remove(QRegExp("^action="));
321 // Setting Query Parameters
322 updateQueryPlaceholders();
323 // Finally, check for unresolved cb2Bib tags
324 if (areQueryParametersValid())
325 {
326 _query_parameter_count.append(readQueryParams);
327 return true;
328 }
329 }
330 }
331 }
332 }
333 file.close();
334 _is_end_of_file = true;
335 return (!_query_parameter_count.isEmpty());
336 }
337
338 void networkQuery::updateQueryPlaceholders()
339 {
340 const QString pdfurl_prefix(_pdfurl_prefix);
341 if (!_Qtitle.isEmpty())
342 {
343 _targetQ.replace("<<title>>", _Qtitle);
344 _captionQ.replace("<<title>>", _Qtitle);
345 _referenceurl_prefix.replace("<<title>>", _Qtitle);
346 _referenceurl_sufix.replace("<<title>>", _Qtitle);
347 _pdfurl_prefix.replace("<<title>>", _Qtitle);
348 _pdfurl_sufix.replace("<<title>>", _Qtitle);
349 }
350 if (!_QjournalCode.isEmpty())
351 {
352 _targetQ.replace("<<journal>>", _QjournalCode);
353 _captionQ.replace("<<journal>>", _QjournalCode);
354 _referenceurl_prefix.replace("<<journal>>", _QjournalCode);
355 _referenceurl_sufix.replace("<<journal>>", _QjournalCode);
356 _pdfurl_prefix.replace("<<journal>>", _QjournalCode);
357 _pdfurl_sufix.replace("<<journal>>", _QjournalCode);
358 }
359 if (!_Qpage.isEmpty())
360 {
361 _targetQ.replace("<<pages>>", _Qpage);
362 _captionQ.replace("<<pages>>", _Qpage);
363 _referenceurl_prefix.replace("<<pages>>", _Qpage);
364 _referenceurl_sufix.replace("<<pages>>", _Qpage);
365 _pdfurl_prefix.replace("<<pages>>", _Qpage);
366 _pdfurl_sufix.replace("<<pages>>", _Qpage);
367 }
368 if (!_Qvolume.isEmpty())
369 {
370 _targetQ.replace("<<volume>>", _Qvolume);
371 _captionQ.replace("<<volume>>", _Qvolume);
372 _referenceurl_prefix.replace("<<volume>>", _Qvolume);
373 _referenceurl_sufix.replace("<<volume>>", _Qvolume);
374 _pdfurl_prefix.replace("<<volume>>", _Qvolume);
375 _pdfurl_sufix.replace("<<volume>>", _Qvolume);
376 }
377 if (!_Qdoi.isEmpty())
378 {
379 _targetQ.replace("<<doi>>", _Qdoi);
380 _captionQ.replace("<<doi>>", _Qdoi);
381 _referenceurl_prefix.replace("<<doi>>", _Qdoi);
382 _referenceurl_sufix.replace("<<doi>>", _Qdoi);
383 _pdfurl_prefix.replace("<<doi>>", _Qdoi);
384 _pdfurl_sufix.replace("<<doi>>", _Qdoi);
385 }
386 if (!_Qexcerpt.isEmpty())
387 {
388 _targetQ.replace("<<excerpt>>", _Qexcerpt);
389 _captionQ.replace("<<excerpt>>", _Qexcerpt);
390 _referenceurl_prefix.replace("<<excerpt>>", _Qexcerpt);
391 _referenceurl_sufix.replace("<<excerpt>>", _Qexcerpt);
392 _pdfurl_prefix.replace("<<excerpt>>", _Qexcerpt);
393 _pdfurl_sufix.replace("<<excerpt>>", _Qexcerpt);
394 }
395 if (!_Qeprint.isEmpty())
396 {
397 _targetQ.replace("<<eprint>>", _Qeprint);
398 _captionQ.replace("<<eprint>>", QRegExp::escape(_Qeprint));
399 _referenceurl_prefix.replace("<<eprint>>", _Qeprint);
400 _referenceurl_sufix.replace("<<eprint>>", _Qeprint);
401 _pdfurl_prefix.replace("<<eprint>>", _Qeprint);
402 _pdfurl_sufix.replace("<<eprint>>", _Qeprint);
403 }
404 _pdfurl_is_captured = (pdfurl_prefix != _pdfurl_prefix);
405 }
406
407 bool networkQuery::areQueryParametersValid()
408 {
409 if (!_action.isEmpty())
410 {
411 if (_action == "browse_query" || _action == "browse_referenceurl")
412 {
413 if (!_settingsP->value("networkQuery/isSupervised").toBool())
414 return false;
415 }
416 else if (!(_action == "htm2txt_query" || _action == "htm2txt_referenceurl" || _action == "merge_all_metadata" ||
417 _action == "merge_referenceurl_metadata"))
418 return false;
419 }
420 const QString allParams(_targetQ + _captionQ + _referenceurl_prefix + _referenceurl_sufix + _pdfurl_prefix +
421 _pdfurl_sufix);
422 return !(
423 allParams.contains(QRegExp("(?:<<title>>|<<journal>>|<<pages>>|<<volume>>|<<doi>>|<<excerpt>>|<<eprint>>)")));
424 }
425
426 const QString networkQuery::encodeUrl(const QString& url) const
427 {
428 // Removes <<post>> tag if present and encodes URL to percent encoding
429 QString encoded_url(url);
430 encoded_url.remove(QRegExp("^<<post>>"));
431 encoded_url = QUrl::toPercentEncoding(encoded_url, "+:/?=&\\");
432 return encoded_url;
433 }
434
435 bool networkQuery::checkQueryFile(const QString& fn) const
436 {
437 if (fn.isEmpty())
438 {
439 c2bUtils::warn(tr("No network query file especified"));
440 return false;
441 }
442 QFileInfo fi(fn);
443 if (!fi.exists() || !fi.isReadable())
444 {
445 c2bUtils::warn(tr("Could not open network query file %1 for reading").arg(fn));
446 return false;
447 }
448 return true;
449 }