"Fossies" - the Fresh Open Source Software Archive 
Member "cb2bib-2.0.1/src/c2b/texToHtml.cpp" (12 Feb 2021, 14002 Bytes) of package /linux/privat/cb2bib-2.0.1.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "texToHtml.cpp" see the
Fossies "Dox" file reference documentation and the latest
Fossies "Diffs" side-by-side code changes report:
2.0.0_vs_2.0.1.
1 /***************************************************************************
2 * Copyright (C) 2004-2021 by Pere Constans
3 * constans@molspaces.com
4 * cb2Bib version 2.0.1. Licensed under the GNU GPL version 3.
5 * See the LICENSE file that comes with this distribution.
6 ***************************************************************************/
7 #include "texToHtml.h"
8
9 #include "cb2bib_parameters.h"
10 #include "settings.h"
11
12 #include <QUrl>
13
14
15 texToHtml::texToHtml() : texParser()
16 {
17 _macro_arguments_rx = QRegExp("\\}\\s*\\[\\s*(\\d+)\\s*\\]\\s*\\{");
18 _named_extern_url_rx = QRegExp("_((?:file|http|https|ftp)://(?:[^\\s\\n\\r\\[]+))\\[([^\\]]*)\\]");
19 _named_url_rx = QRegExp("\\b((?:file|http|https|ftp)://(?:[^\\s\\n\\r\\[]+))\\[([^\\]]*)\\]");
20 _extern_url_rx = QRegExp("_((?:file|http|https|ftp)(?!.+\">)://(?:[^\\s\\n\\r\\[]+))(\\.[\\s\\n\\r]){0,1}");
21 _url_rx = QRegExp("\\b((?:file|http|https|ftp)(?!.+\">)://(?:[^\\s\\n\\r\\[]+))(\\.[\\s\\n\\r]){0,1}");
22
23 _close_subsection = false;
24 _close_subsubsection = false;
25 _make_index = false;
26 _use_mathjax_rendering = false;
27 _use_relative_links = false;
28 _index_anchors = 0;
29
30 _settingsP = settings::instance();
31 }
32
33
34 void texToHtml::toHtml(const QString& tex, const QString& fn)
35 {
36 _html_filename = fn;
37 c2bUtils::stringToFile(toHtml(tex), fn);
38 }
39
40 QString texToHtml::toHtml(const QString& tex)
41 {
42 _bibtex_directory.clear();
43 _cites.clear();
44 _close_subsection = false;
45 _close_subsubsection = false;
46 _html.clear();
47 _index = "<ul>\n";
48 _index_anchors = 0;
49 _make_index = false;
50 _use_mathjax_rendering = false;
51 _references.clear();
52 _tex_macro_names_rx.clear();
53 _tex_macros.clear();
54 _title.clear();
55
56 _use_relative_links = _settingsP->value("c2bAnnote/UseRelativeLinks").toBool();
57 if (_html_filename.isEmpty())
58 _use_relative_links = false;
59 else
60 _current_dir = QDir(QFileInfo(_html_filename).absolutePath());
61
62 QString tex2html(c2bUtils::fileToString(":/htm/htm/tex2html.html"));
63 QString mathjax_head(c2bUtils::fileToString(_settingsP->fileName("c2bAnnote/MathJaxHeaderFile")).trimmed());
64 if (!mathjax_head.isEmpty())
65 _use_mathjax_rendering = true;
66
67 parse(tex);
68
69 if (_close_subsubsection)
70 _index += "</ul></li>\n";
71 if (_close_subsection)
72 _index += "</ul></li>\n";
73 _index += "</ul>";
74 if (_use_mathjax_rendering)
75 {
76 if (!_tex_macros.isEmpty())
77 {
78 QString t("\n\n<script type=\"text/x-mathjax-config\">\n MathJax.Hub.Config({\n TeX: {\n Macros: "
79 "{%1\n }\n }\n });\n</script>\n");
80 mathjax_head += t.arg(_tex_macros);
81 }
82 tex2html.replace("GET_MATHJAX_HEAD", mathjax_head);
83 }
84 else
85 tex2html.remove("GET_MATHJAX_HEAD");
86 if (_settingsP->value("c2bAnnote/IncludeCSS").toBool())
87 {
88 QString css(c2bUtils::fileToString(_settingsP->fileName("c2bAnnote/AnnoteCssFile")).trimmed());
89 if (css.isEmpty())
90 css = c2bUtils::fileToString(":/htm/htm/tex2html.css");
91 tex2html.replace("GET_CSS", "<style type=\"text/css\">\n" + css.trimmed() + "\n</style>\n");
92 }
93 else
94 {
95 QString cssfn(_settingsP->fileName("c2bAnnote/AnnoteCssFile"));
96 if (cssfn.isEmpty())
97 tex2html.remove("GET_CSS");
98 else
99 {
100 if (_use_relative_links)
101 cssfn = _current_dir.relativeFilePath(cssfn);
102 tex2html.replace("GET_CSS", "\n <link href=\"" + cssfn + "\" rel=\"stylesheet\" type=\"text/css\"/>\n");
103 }
104 }
105 tex2html.replace("GET_TITLE", _title);
106 retrieveReferences();
107 QString reference_list_html;
108 referencesToHtml(&reference_list_html);
109 tex2html.replace("GET_REFERENCES", reference_list_html);
110 citesToHtml(&_html);
111 if (_make_index && _index_anchors > 1)
112 {
113 _index = c2bUtils::fileToString(":/htm/htm/tex2html_index.html").arg(_index);
114 tex2html.replace("GET_INDEX", _index);
115 }
116 else
117 tex2html.remove("GET_INDEX");
118 tex2html.replace("GET_HTML", _html);
119 tex2html.replace("GET_CB2BIB_VERSION_NUMBER", C2B_VERSION);
120
121 _html_filename.clear();
122 return tex2html;
123 }
124
125 void texToHtml::parseElement(const QString& p, const QString& e, const QString& v)
126 {
127 if (e == "section")
128 {
129 const QString sv(toHtmlString(v));
130 ++_index_anchors;
131 _html += QString("\n<h1><a id=\"sect%1\"></a>%2</h1>\n\n").arg(_index_anchors).arg(sv);
132 if (_close_subsubsection)
133 _index += "</ul></li>\n";
134 if (_close_subsection)
135 _index += "</ul></li>\n";
136 _index += QString("<li><a href=\"#sect%1\">%2</a></li>\n").arg(_index_anchors).arg(sv);
137 _close_subsubsection = false;
138 _close_subsection = false;
139 }
140 else if (e == "subsection")
141 {
142 const QString sv(toHtmlString(v));
143 ++_index_anchors;
144 _html += QString("\n<h2><a id=\"sect%1\"></a>%2</h2>\n\n").arg(_index_anchors).arg(sv);
145 if (_close_subsubsection)
146 _index += "</ul></li>\n";
147 if (!_close_subsection)
148 _index += "<li style=\"list-style: none\"><ul>\n";
149 _index += QString("<li><a href=\"#sect%1\">%2</a></li>\n").arg(_index_anchors).arg(sv);
150 _close_subsubsection = false;
151 _close_subsection = true;
152 }
153 else if (e == "subsubsection")
154 {
155 const QString sv(toHtmlString(v));
156 ++_index_anchors;
157 _html += QString("\n<h3><a id=\"sect%1\"></a>%2</h3>\n\n").arg(_index_anchors).arg(sv);
158 if (!_close_subsubsection)
159 _index += "<li style=\"list-style: none\"><ul>\n";
160 _index += QString("<li><a href=\"#sect%1\">%2</a></li>\n").arg(_index_anchors).arg(sv);
161 _close_subsubsection = true;
162 }
163 else if (e == "title")
164 _title = toHtmlString(v);
165 else if (e == "itemize")
166 {
167 const QStringList items(v.trimmed().split("\\item ", QString::SkipEmptyParts));
168 for (int i = 0; i < items.count(); ++i)
169 parseTextParagraph("- " + items.at(i).simplified());
170 }
171 else if (e == "abstract")
172 _html += QString("\n<div id=\"abstract\" class=\"abstract\">\n%1\n</div><br /><hr />\n\n")
173 .arg(toHtmlString(v.trimmed(), false));
174 else if (e == "verbatim")
175 _html += QString("\n<pre>%1</pre>\n\n").arg(v);
176 else if (e == "newcommand")
177 extractMacro(v);
178 else
179 _html += QString("\n<div>\n%1\n</div>\n\n").arg(p);
180 }
181
182 void texToHtml::parseComment(const QString& p)
183 {
184 if (p.startsWith("%\\c2b_bibtex_directory{"))
185 c2bUtils::inBraces(p.indexOf('{') + 1, p, &_bibtex_directory);
186 else if (p == "%\\c2b_makeindex")
187 _make_index = true;
188 }
189
190 void texToHtml::parseTextParagraph(const QString& p)
191 {
192 extractCites(p);
193 QString pstr(toHtmlString(p, false));
194 urlToHtml(&pstr);
195 if (p.startsWith("- "))
196 _html += QString("\n<ul><li>%1</li></ul>\n\n").arg(pstr.mid(2));
197 else
198 _html += QString("\n<p>%1</p>\n\n").arg(pstr);
199 }
200
201 void texToHtml::extractMacro(const QString& v)
202 {
203 QString name;
204 if (!c2bUtils::inBraces(1, v, &name))
205 return;
206 // Substitution '\macro' -> '$\macro[args]$'. MathJax will not process macros outside equation environments.
207 _tex_macro_names_rx.append(QRegExp("(\\" + name + "\\S*)"));
208 // To minimize clashes, the replacement must be performed from longest to shortest names.
209 // Sort now, even if this needs multiple sortings. This way, substitutions can be done while parsing.
210 std::sort(_tex_macro_names_rx.begin(), _tex_macro_names_rx.end(), c2bUtils::patternLengthMoreThan);
211 QString definition;
212 for (int i = name.length() + 2; i < v.length(); ++i)
213 if (v.at(i) == '{')
214 {
215 if (c2bUtils::inBraces(i + 1, v, &definition))
216 break;
217 else
218 return;
219 }
220 if (definition.isEmpty())
221 return;
222 name.remove("\\");
223 definition.remove("\\ensuremath");
224 definition.replace("\\", "\\\\");
225 if (_macro_arguments_rx.indexIn(v) > -1)
226 _tex_macros += QString("\n %1: ['%2',%3],").arg(name, definition, _macro_arguments_rx.cap(1));
227 else
228 _tex_macros += QString("\n %1: '%2',").arg(name, definition);
229 }
230
231 void texToHtml::extractCites(const QString& p)
232 {
233 int pos(p.indexOf("\\cite{", 0));
234 while (pos >= 0)
235 {
236 QString cites;
237 if (c2bUtils::inBraces(pos + 6, p, &cites))
238 {
239 const QStringList cite(cites.split(',', QString::SkipEmptyParts));
240 for (int i = 0; i < cite.count(); ++i)
241 {
242 const QString c(cite.at(i).trimmed());
243 if (!_cites.contains(c))
244 _cites.insert(c, -(1 + _cites.count()));
245 }
246 }
247 pos = p.indexOf("\\cite{", pos + 6);
248 }
249 }
250
251 void texToHtml::citesToHtml(QString* html)
252 {
253 QString key("\\b%1\\b");
254 QHash<QString, bibReference>::const_iterator i = _references.constBegin();
255 while (i != _references.constEnd())
256 {
257 const QString k(i.key());
258 html->replace(QRegExp(key.arg(k)), QString("<a href=\"#%1\">%2</a>").arg(k).arg(_cites.value(k)));
259 ++i;
260 }
261 html->replace(QRegExp("\\\\cite\\{([^\\{]+)\\}"), "<span class=\"citations\">[\\1]</span>");
262 }
263
264 /**
265 Retrieve references from BibTeX files directory
266 */
267 void texToHtml::retrieveReferences()
268 {
269 if (_bibtex_directory.isEmpty())
270 _bibtex_directory = QFileInfo(_settingsP->fileName("cb2Bib/BibTeXFile")).path();
271 const QStringList flist(c2bUtils::filesInDir(_bibtex_directory, QStringList() << "*.bib"));
272 const QStringList keys(_cites.keys());
273 QStringList fields;
274 fields.append("author");
275 fields.append("booktitle");
276 fields.append("doi");
277 fields.append("editor");
278 fields.append("file");
279 fields.append("journal");
280 fields.append("pages");
281 fields.append("title");
282 fields.append("url");
283 fields.append("volume");
284 fields.append("year");
285 bibReference ref;
286 _cbp.initReferenceParsing(_bibtex_directory + '/', fields, &ref);
287 bool done(false);
288 for (int i = 0; i < flist.count(); ++i)
289 {
290 if (done)
291 return;
292 const QString bib_contents(c2bUtils::fileToString(flist.at(i)));
293 done = true;
294 for (int j = 0; j < keys.count(); ++j)
295 {
296 if (_cites.value(keys.at(j)) > 0)
297 continue;
298 done = false;
299 const QString key(keys.at(j));
300 if (_cbp.referenceAtKey(key, bib_contents, &ref))
301 {
302 _references.insert(key, ref);
303 _cites[key] = -_cites[key];
304 }
305 }
306 }
307 }
308
309 void texToHtml::referencesToHtml(QString* reference_list_html)
310 {
311 reference_list_html->clear();
312 if (_references.count() == 0)
313 return;
314
315 QStringList keys(_references.keys());
316 QMap<int, QString> ordering;
317 for (int i = 0; i < keys.count(); ++i)
318 ordering.insert(_cites.value(keys.at(i)), keys.at(i));
319 keys = ordering.values();
320
321 const QString reference_item_html(c2bUtils::fileToString(":/htm/htm/reference_item.html"));
322 const QRegExp nonletters("[^\\w\\s-]");
323 QString references;
324 for (int i = 0; i < keys.count(); ++i)
325 {
326 const bibReference& ref = _references.value(keys.at(i));
327
328 QString author(ref.anyAuthor());
329 if (!author.isEmpty())
330 {
331 author = _cbp.authorFromBibTeX(author);
332 author.remove(nonletters);
333 author.replace(" and ", ", ");
334 c2bUtils::simplifyString(author);
335 author += '.';
336 }
337
338 QString title(ref.anyTitle());
339 c2bUtils::cleanTitle(title, true);
340
341 // Do not percent encode file (some browsers dont like encoded local files) and url
342 // (it might already be, if needed, decode prior to encode)
343 const QString doi(ref.value("doi"));
344 const QString file(ref.value("file"));
345 const QString url(ref.value("url"));
346 QString link;
347 if (!file.isEmpty())
348 {
349 if (_use_relative_links)
350 link = QDir::cleanPath(_current_dir.relativeFilePath(file));
351 else
352 link = QDir::cleanPath(file);
353 }
354 else if (!url.isEmpty())
355 link = url;
356 else if (!doi.isEmpty())
357 {
358 if (doi.startsWith("http"))
359 link = doi;
360 else
361 link = "https://dx.doi.org/" + QUrl::toPercentEncoding(doi);
362 }
363
364 const QString journal(ref.anyJournal());
365 const QString volume(ref.value("volume"));
366
367 QString pages(ref.value("pages"));
368 if (!pages.isEmpty())
369 {
370 if (!volume.isEmpty())
371 pages = ", " + pages.remove(' ');
372 else
373 pages = ' ' + pages.remove(' ');
374 }
375
376 QString year(ref.value("year"));
377 if (!year.isEmpty())
378 year = '(' + year + ").";
379
380 QString item(reference_item_html);
381 item.replace("GET_REFERENCE_ANCHOR_ID", ref.citeidName);
382 item.replace("GET_REFERENCE_ANCHOR_NAME", ref.citeidName);
383 item.replace("GET_REFERENCE_AUTHOR", author);
384 if (link.isEmpty())
385 item.remove("href=\"GET_REFERENCE_LINK\"");
386 else
387 item.replace("GET_REFERENCE_LINK", link);
388 item.replace("GET_REFERENCE_PAGES", pages);
389 item.replace("GET_REFERENCE_SOURCE", journal);
390 item.replace("GET_REFERENCE_TITLE", c2bUtils::toHtmlString(title));
391 item.replace("GET_REFERENCE_VOLUME", volume);
392 item.replace("GET_REFERENCE_YEAR", year);
393 references += item;
394
395 // Update _cites values according to this list
396 _cites[keys.at(i)] = i + 1;
397 }
398 *reference_list_html = c2bUtils::fileToString(":/htm/htm/reference_list.html");
399 reference_list_html->replace("GET_REFERENCES", references);
400 }