"Fossies" - the Fresh Open Source Software Archive

Member "cb2bib-2.0.1/src/c2b/texToHtml.cpp" (12 Feb 2021, 14002 Bytes) of package /linux/privat/cb2bib-2.0.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "texToHtml.cpp" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 2.0.0_vs_2.0.1.

    1 /***************************************************************************
    2  *   Copyright (C) 2004-2021 by Pere Constans
    3  *   constans@molspaces.com
    4  *   cb2Bib version 2.0.1. Licensed under the GNU GPL version 3.
    5  *   See the LICENSE file that comes with this distribution.
    6  ***************************************************************************/
    7 #include "texToHtml.h"
    8 
    9 #include "cb2bib_parameters.h"
   10 #include "settings.h"
   11 
   12 #include <QUrl>
   13 
   14 
   15 texToHtml::texToHtml() : texParser()
   16 {
   17     _macro_arguments_rx = QRegExp("\\}\\s*\\[\\s*(\\d+)\\s*\\]\\s*\\{");
   18     _named_extern_url_rx = QRegExp("_((?:file|http|https|ftp)://(?:[^\\s\\n\\r\\[]+))\\[([^\\]]*)\\]");
   19     _named_url_rx = QRegExp("\\b((?:file|http|https|ftp)://(?:[^\\s\\n\\r\\[]+))\\[([^\\]]*)\\]");
   20     _extern_url_rx = QRegExp("_((?:file|http|https|ftp)(?!.+\">)://(?:[^\\s\\n\\r\\[]+))(\\.[\\s\\n\\r]){0,1}");
   21     _url_rx = QRegExp("\\b((?:file|http|https|ftp)(?!.+\">)://(?:[^\\s\\n\\r\\[]+))(\\.[\\s\\n\\r]){0,1}");
   22 
   23     _close_subsection = false;
   24     _close_subsubsection = false;
   25     _make_index = false;
   26     _use_mathjax_rendering = false;
   27     _use_relative_links = false;
   28     _index_anchors = 0;
   29 
   30     _settingsP = settings::instance();
   31 }
   32 
   33 
   34 void texToHtml::toHtml(const QString& tex, const QString& fn)
   35 {
   36     _html_filename = fn;
   37     c2bUtils::stringToFile(toHtml(tex), fn);
   38 }
   39 
   40 QString texToHtml::toHtml(const QString& tex)
   41 {
   42     _bibtex_directory.clear();
   43     _cites.clear();
   44     _close_subsection = false;
   45     _close_subsubsection = false;
   46     _html.clear();
   47     _index = "<ul>\n";
   48     _index_anchors = 0;
   49     _make_index = false;
   50     _use_mathjax_rendering = false;
   51     _references.clear();
   52     _tex_macro_names_rx.clear();
   53     _tex_macros.clear();
   54     _title.clear();
   55 
   56     _use_relative_links = _settingsP->value("c2bAnnote/UseRelativeLinks").toBool();
   57     if (_html_filename.isEmpty())
   58         _use_relative_links = false;
   59     else
   60         _current_dir = QDir(QFileInfo(_html_filename).absolutePath());
   61 
   62     QString tex2html(c2bUtils::fileToString(":/htm/htm/tex2html.html"));
   63     QString mathjax_head(c2bUtils::fileToString(_settingsP->fileName("c2bAnnote/MathJaxHeaderFile")).trimmed());
   64     if (!mathjax_head.isEmpty())
   65         _use_mathjax_rendering = true;
   66 
   67     parse(tex);
   68 
   69     if (_close_subsubsection)
   70         _index += "</ul></li>\n";
   71     if (_close_subsection)
   72         _index += "</ul></li>\n";
   73     _index += "</ul>";
   74     if (_use_mathjax_rendering)
   75     {
   76         if (!_tex_macros.isEmpty())
   77         {
   78             QString t("\n\n<script type=\"text/x-mathjax-config\">\n  MathJax.Hub.Config({\n   TeX: {\n    Macros: "
   79                       "{%1\n    }\n   }\n  });\n</script>\n");
   80             mathjax_head += t.arg(_tex_macros);
   81         }
   82         tex2html.replace("GET_MATHJAX_HEAD", mathjax_head);
   83     }
   84     else
   85         tex2html.remove("GET_MATHJAX_HEAD");
   86     if (_settingsP->value("c2bAnnote/IncludeCSS").toBool())
   87     {
   88         QString css(c2bUtils::fileToString(_settingsP->fileName("c2bAnnote/AnnoteCssFile")).trimmed());
   89         if (css.isEmpty())
   90             css = c2bUtils::fileToString(":/htm/htm/tex2html.css");
   91         tex2html.replace("GET_CSS", "<style type=\"text/css\">\n" + css.trimmed() + "\n</style>\n");
   92     }
   93     else
   94     {
   95         QString cssfn(_settingsP->fileName("c2bAnnote/AnnoteCssFile"));
   96         if (cssfn.isEmpty())
   97             tex2html.remove("GET_CSS");
   98         else
   99         {
  100             if (_use_relative_links)
  101                 cssfn = _current_dir.relativeFilePath(cssfn);
  102             tex2html.replace("GET_CSS", "\n <link href=\"" + cssfn + "\" rel=\"stylesheet\" type=\"text/css\"/>\n");
  103         }
  104     }
  105     tex2html.replace("GET_TITLE", _title);
  106     retrieveReferences();
  107     QString reference_list_html;
  108     referencesToHtml(&reference_list_html);
  109     tex2html.replace("GET_REFERENCES", reference_list_html);
  110     citesToHtml(&_html);
  111     if (_make_index && _index_anchors > 1)
  112     {
  113         _index = c2bUtils::fileToString(":/htm/htm/tex2html_index.html").arg(_index);
  114         tex2html.replace("GET_INDEX", _index);
  115     }
  116     else
  117         tex2html.remove("GET_INDEX");
  118     tex2html.replace("GET_HTML", _html);
  119     tex2html.replace("GET_CB2BIB_VERSION_NUMBER", C2B_VERSION);
  120 
  121     _html_filename.clear();
  122     return tex2html;
  123 }
  124 
  125 void texToHtml::parseElement(const QString& p, const QString& e, const QString& v)
  126 {
  127     if (e == "section")
  128     {
  129         const QString sv(toHtmlString(v));
  130         ++_index_anchors;
  131         _html += QString("\n<h1><a id=\"sect%1\"></a>%2</h1>\n\n").arg(_index_anchors).arg(sv);
  132         if (_close_subsubsection)
  133             _index += "</ul></li>\n";
  134         if (_close_subsection)
  135             _index += "</ul></li>\n";
  136         _index += QString("<li><a href=\"#sect%1\">%2</a></li>\n").arg(_index_anchors).arg(sv);
  137         _close_subsubsection = false;
  138         _close_subsection = false;
  139     }
  140     else if (e == "subsection")
  141     {
  142         const QString sv(toHtmlString(v));
  143         ++_index_anchors;
  144         _html += QString("\n<h2><a id=\"sect%1\"></a>%2</h2>\n\n").arg(_index_anchors).arg(sv);
  145         if (_close_subsubsection)
  146             _index += "</ul></li>\n";
  147         if (!_close_subsection)
  148             _index += "<li style=\"list-style: none\"><ul>\n";
  149         _index += QString("<li><a href=\"#sect%1\">%2</a></li>\n").arg(_index_anchors).arg(sv);
  150         _close_subsubsection = false;
  151         _close_subsection = true;
  152     }
  153     else if (e == "subsubsection")
  154     {
  155         const QString sv(toHtmlString(v));
  156         ++_index_anchors;
  157         _html += QString("\n<h3><a id=\"sect%1\"></a>%2</h3>\n\n").arg(_index_anchors).arg(sv);
  158         if (!_close_subsubsection)
  159             _index += "<li style=\"list-style: none\"><ul>\n";
  160         _index += QString("<li><a href=\"#sect%1\">%2</a></li>\n").arg(_index_anchors).arg(sv);
  161         _close_subsubsection = true;
  162     }
  163     else if (e == "title")
  164         _title = toHtmlString(v);
  165     else if (e == "itemize")
  166     {
  167         const QStringList items(v.trimmed().split("\\item ", QString::SkipEmptyParts));
  168         for (int i = 0; i < items.count(); ++i)
  169             parseTextParagraph("- " + items.at(i).simplified());
  170     }
  171     else if (e == "abstract")
  172         _html += QString("\n<div id=\"abstract\" class=\"abstract\">\n%1\n</div><br /><hr />\n\n")
  173                  .arg(toHtmlString(v.trimmed(), false));
  174     else if (e == "verbatim")
  175         _html += QString("\n<pre>%1</pre>\n\n").arg(v);
  176     else if (e == "newcommand")
  177         extractMacro(v);
  178     else
  179         _html += QString("\n<div>\n%1\n</div>\n\n").arg(p);
  180 }
  181 
  182 void texToHtml::parseComment(const QString& p)
  183 {
  184     if (p.startsWith("%\\c2b_bibtex_directory{"))
  185         c2bUtils::inBraces(p.indexOf('{') + 1, p, &_bibtex_directory);
  186     else if (p == "%\\c2b_makeindex")
  187         _make_index = true;
  188 }
  189 
  190 void texToHtml::parseTextParagraph(const QString& p)
  191 {
  192     extractCites(p);
  193     QString pstr(toHtmlString(p, false));
  194     urlToHtml(&pstr);
  195     if (p.startsWith("- "))
  196         _html += QString("\n<ul><li>%1</li></ul>\n\n").arg(pstr.mid(2));
  197     else
  198         _html += QString("\n<p>%1</p>\n\n").arg(pstr);
  199 }
  200 
  201 void texToHtml::extractMacro(const QString& v)
  202 {
  203     QString name;
  204     if (!c2bUtils::inBraces(1, v, &name))
  205         return;
  206     // Substitution '\macro' -> '$\macro[args]$'. MathJax will not process macros outside equation environments.
  207     _tex_macro_names_rx.append(QRegExp("(\\" + name + "\\S*)"));
  208     // To minimize clashes, the replacement must be performed from longest to shortest names.
  209     // Sort now, even if this needs multiple sortings. This way, substitutions can be done while parsing.
  210     std::sort(_tex_macro_names_rx.begin(), _tex_macro_names_rx.end(), c2bUtils::patternLengthMoreThan);
  211     QString definition;
  212     for (int i = name.length() + 2; i < v.length(); ++i)
  213         if (v.at(i) == '{')
  214         {
  215             if (c2bUtils::inBraces(i + 1, v, &definition))
  216                 break;
  217             else
  218                 return;
  219         }
  220     if (definition.isEmpty())
  221         return;
  222     name.remove("\\");
  223     definition.remove("\\ensuremath");
  224     definition.replace("\\", "\\\\");
  225     if (_macro_arguments_rx.indexIn(v) > -1)
  226         _tex_macros += QString("\n     %1: ['%2',%3],").arg(name, definition, _macro_arguments_rx.cap(1));
  227     else
  228         _tex_macros += QString("\n     %1: '%2',").arg(name, definition);
  229 }
  230 
  231 void texToHtml::extractCites(const QString& p)
  232 {
  233     int pos(p.indexOf("\\cite{", 0));
  234     while (pos >= 0)
  235     {
  236         QString cites;
  237         if (c2bUtils::inBraces(pos + 6, p, &cites))
  238         {
  239             const QStringList cite(cites.split(',', QString::SkipEmptyParts));
  240             for (int i = 0; i < cite.count(); ++i)
  241             {
  242                 const QString c(cite.at(i).trimmed());
  243                 if (!_cites.contains(c))
  244                     _cites.insert(c, -(1 + _cites.count()));
  245             }
  246         }
  247         pos = p.indexOf("\\cite{", pos + 6);
  248     }
  249 }
  250 
  251 void texToHtml::citesToHtml(QString* html)
  252 {
  253     QString key("\\b%1\\b");
  254     QHash<QString, bibReference>::const_iterator i = _references.constBegin();
  255     while (i != _references.constEnd())
  256     {
  257         const QString k(i.key());
  258         html->replace(QRegExp(key.arg(k)), QString("<a href=\"#%1\">%2</a>").arg(k).arg(_cites.value(k)));
  259         ++i;
  260     }
  261     html->replace(QRegExp("\\\\cite\\{([^\\{]+)\\}"), "<span class=\"citations\">[\\1]</span>");
  262 }
  263 
  264 /**
  265     Retrieve references from BibTeX files directory
  266 */
  267 void texToHtml::retrieveReferences()
  268 {
  269     if (_bibtex_directory.isEmpty())
  270         _bibtex_directory = QFileInfo(_settingsP->fileName("cb2Bib/BibTeXFile")).path();
  271     const QStringList flist(c2bUtils::filesInDir(_bibtex_directory, QStringList() << "*.bib"));
  272     const QStringList keys(_cites.keys());
  273     QStringList fields;
  274     fields.append("author");
  275     fields.append("booktitle");
  276     fields.append("doi");
  277     fields.append("editor");
  278     fields.append("file");
  279     fields.append("journal");
  280     fields.append("pages");
  281     fields.append("title");
  282     fields.append("url");
  283     fields.append("volume");
  284     fields.append("year");
  285     bibReference ref;
  286     _cbp.initReferenceParsing(_bibtex_directory + '/', fields, &ref);
  287     bool done(false);
  288     for (int i = 0; i < flist.count(); ++i)
  289     {
  290         if (done)
  291             return;
  292         const QString bib_contents(c2bUtils::fileToString(flist.at(i)));
  293         done = true;
  294         for (int j = 0; j < keys.count(); ++j)
  295         {
  296             if (_cites.value(keys.at(j)) > 0)
  297                 continue;
  298             done = false;
  299             const QString key(keys.at(j));
  300             if (_cbp.referenceAtKey(key, bib_contents, &ref))
  301             {
  302                 _references.insert(key, ref);
  303                 _cites[key] = -_cites[key];
  304             }
  305         }
  306     }
  307 }
  308 
  309 void texToHtml::referencesToHtml(QString* reference_list_html)
  310 {
  311     reference_list_html->clear();
  312     if (_references.count() == 0)
  313         return;
  314 
  315     QStringList keys(_references.keys());
  316     QMap<int, QString> ordering;
  317     for (int i = 0; i < keys.count(); ++i)
  318         ordering.insert(_cites.value(keys.at(i)), keys.at(i));
  319     keys = ordering.values();
  320 
  321     const QString reference_item_html(c2bUtils::fileToString(":/htm/htm/reference_item.html"));
  322     const QRegExp nonletters("[^\\w\\s-]");
  323     QString references;
  324     for (int i = 0; i < keys.count(); ++i)
  325     {
  326         const bibReference& ref = _references.value(keys.at(i));
  327 
  328         QString author(ref.anyAuthor());
  329         if (!author.isEmpty())
  330         {
  331             author = _cbp.authorFromBibTeX(author);
  332             author.remove(nonletters);
  333             author.replace(" and ", ", ");
  334             c2bUtils::simplifyString(author);
  335             author += '.';
  336         }
  337 
  338         QString title(ref.anyTitle());
  339         c2bUtils::cleanTitle(title, true);
  340 
  341         // Do not percent encode file (some browsers dont like encoded local files) and url
  342         // (it might already be, if needed, decode prior to encode)
  343         const QString doi(ref.value("doi"));
  344         const QString file(ref.value("file"));
  345         const QString url(ref.value("url"));
  346         QString link;
  347         if (!file.isEmpty())
  348         {
  349             if (_use_relative_links)
  350                 link = QDir::cleanPath(_current_dir.relativeFilePath(file));
  351             else
  352                 link = QDir::cleanPath(file);
  353         }
  354         else if (!url.isEmpty())
  355             link = url;
  356         else if (!doi.isEmpty())
  357         {
  358             if (doi.startsWith("http"))
  359                 link = doi;
  360             else
  361                 link = "https://dx.doi.org/" + QUrl::toPercentEncoding(doi);
  362         }
  363 
  364         const QString journal(ref.anyJournal());
  365         const QString volume(ref.value("volume"));
  366 
  367         QString pages(ref.value("pages"));
  368         if (!pages.isEmpty())
  369         {
  370             if (!volume.isEmpty())
  371                 pages = ", " + pages.remove(' ');
  372             else
  373                 pages = ' ' + pages.remove(' ');
  374         }
  375 
  376         QString year(ref.value("year"));
  377         if (!year.isEmpty())
  378             year = '(' + year + ").";
  379 
  380         QString item(reference_item_html);
  381         item.replace("GET_REFERENCE_ANCHOR_ID", ref.citeidName);
  382         item.replace("GET_REFERENCE_ANCHOR_NAME", ref.citeidName);
  383         item.replace("GET_REFERENCE_AUTHOR", author);
  384         if (link.isEmpty())
  385             item.remove("href=\"GET_REFERENCE_LINK\"");
  386         else
  387             item.replace("GET_REFERENCE_LINK", link);
  388         item.replace("GET_REFERENCE_PAGES", pages);
  389         item.replace("GET_REFERENCE_SOURCE", journal);
  390         item.replace("GET_REFERENCE_TITLE", c2bUtils::toHtmlString(title));
  391         item.replace("GET_REFERENCE_VOLUME", volume);
  392         item.replace("GET_REFERENCE_YEAR", year);
  393         references += item;
  394 
  395         // Update _cites values according to this list
  396         _cites[keys.at(i)] = i + 1;
  397     }
  398     *reference_list_html = c2bUtils::fileToString(":/htm/htm/reference_list.html");
  399     reference_list_html->replace("GET_REFERENCES", references);
  400 }