"Fossies" - the Fresh Open Source Software Archive 
Member "cb2bib-2.0.1/src/c2b/documentParser.cpp" (12 Feb 2021, 2839 Bytes) of package /linux/privat/cb2bib-2.0.1.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "documentParser.cpp" see the
Fossies "Dox" file reference documentation and the latest
Fossies "Diffs" side-by-side code changes report:
2.0.0_vs_2.0.1.
1 /***************************************************************************
2 * Copyright (C) 2004-2021 by Pere Constans
3 * constans@molspaces.com
4 * cb2Bib version 2.0.1. Licensed under the GNU GPL version 3.
5 * See the LICENSE file that comes with this distribution.
6 ***************************************************************************/
7 #include "documentParser.h"
8
9
10 documentParser::documentParser(const QString& text, const int maximum)
11 : _space(QChar(' ')), _text(text), _maximum_words(maximum), _minimum_words(2), _text_length(text.length())
12 {
13 _current_nwords = 0;
14 _current_ssentence_length = 0;
15 _current_ssentence_origin = 0;
16 _nwords = 0;
17 _position = 0;
18
19 if (_text_length > 0)
20 {
21 if (_is_letter(_text.at(0)))
22 {
23 _ssentence_origin = 0;
24 _all_done = false;
25 }
26 else
27 {
28 _ssentence_origin = nextWord(0);
29 _position = _ssentence_origin;
30 _all_done = (_ssentence_origin == _text_length);
31 }
32 }
33 else
34 _all_done = true;
35 }
36
37 documentParser::~documentParser() {}
38
39
40 bool documentParser::parses()
41 {
42 if (_all_done)
43 return false;
44
45 // Assumed 0-terminated string, i.e., text[_text_length] = 0
46 const QChar* const text(_text.constData());
47 int i(_position);
48 while (i++ < _text_length)
49 {
50 const QChar& c(text[i]);
51 if (_is_letter_or_number(c))
52 continue;
53 ++_nwords;
54 // Contiguous words within a subsentence
55 if (c == _space && _is_letter(text[i + 1]))
56 {
57 if (_nwords < _minimum_words)
58 continue;
59 if (_nwords < _maximum_words)
60 {
61 _position = i + 1;
62 _current_nwords = _nwords;
63 _current_ssentence_origin = _ssentence_origin;
64 _current_ssentence_length = i - _ssentence_origin;
65 return true;
66 }
67 }
68 // Subsentence terminated at c
69 else if (_nwords < _minimum_words)
70 {
71 // Too few words in subsentence, move forward to next subsentence
72 _ssentence_origin = nextWord(i);
73 i = _ssentence_origin;
74 _nwords = 0;
75 continue;
76 }
77 _position = nextWord(_ssentence_origin);
78 _current_nwords = _nwords;
79 _current_ssentence_origin = _ssentence_origin;
80 _current_ssentence_length = i - _ssentence_origin;
81 _nwords = 0;
82 _ssentence_origin = _position;
83 return true;
84 }
85
86 _all_done = true;
87 return false;
88 }
89
90 int documentParser::nextWord(const int pos) const
91 {
92 for (int i = pos + 1; i < _text_length; ++i)
93 if (_is_letter(_text.at(i)) && !_is_letter_or_number(_text.at(i - 1)))
94 return i;
95 return _text_length;
96 }