"Fossies" - the Fresh Open Source Software Archive

Member "recoll-1.26.3/query/reslistpager.cpp" (24 Nov 2019, 17028 Bytes) of package /linux/privat/recoll-1.26.3.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "reslistpager.cpp" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 1.25.23_vs_1.26.0.

    1 /* Copyright (C) 2007-2019 J.F.Dockes
    2  *   This program is free software; you can redistribute it and/or modify
    3  *   it under the terms of the GNU General Public License as published by
    4  *   the Free Software Foundation; either version 2 of the License, or
    5  *   (at your option) any later version.
    6  *
    7  *   This program is distributed in the hope that it will be useful,
    8  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
    9  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   10  *   GNU General Public License for more details.
   11  *
   12  *   You should have received a copy of the GNU General Public License
   13  *   along with this program; if not, write to the
   14  *   Free Software Foundation, Inc.,
   15  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
   16  */
   17 #include "autoconfig.h"
   18 
   19 #include <stdio.h>
   20 #include <stdlib.h>
   21 #include <math.h>
   22 #include <time.h>
   23 #include <stdint.h>
   24 
   25 #include <sstream>
   26 #include <iostream>
   27 #include <list>
   28 using std::ostringstream;
   29 using std::endl;
   30 using std::list;
   31 
   32 #include "cstr.h"
   33 #include "reslistpager.h"
   34 #include "log.h"
   35 #include "rclconfig.h"
   36 #include "smallut.h"
   37 #include "rclutil.h"
   38 #include "plaintorich.h"
   39 #include "mimehandler.h"
   40 #include "transcode.h"
   41 
   42 // Default highlighter. No need for locking, this is query-only.
   43 static const string cstr_hlfontcolor("<span style='color: blue;'>");
   44 static const string cstr_hlendfont("</span>");
   45 class PlainToRichHtReslist : public PlainToRich {
   46 public:
   47     virtual string startMatch(unsigned int) {
   48         return cstr_hlfontcolor;
   49     }
   50     virtual string endMatch() {
   51         return cstr_hlendfont;
   52     }
   53 };
   54 static PlainToRichHtReslist g_hiliter;
   55 
   56 ResListPager::ResListPager(int pagesize) 
   57     : m_pagesize(pagesize),
   58       m_newpagesize(pagesize),
   59       m_resultsInCurrentPage(0),
   60       m_winfirst(-1),
   61       m_hasNext(true),
   62       m_hiliter(&g_hiliter)
   63 {
   64 }
   65 
   66 void ResListPager::resultPageNext()
   67 {
   68     if (!m_docSource) {
   69         LOGDEB("ResListPager::resultPageNext: null source\n");
   70         return;
   71     }
   72 
   73     int resCnt = m_docSource->getResCnt();
   74     LOGDEB("ResListPager::resultPageNext: rescnt " << resCnt <<
   75            ", winfirst " << m_winfirst << "\n");
   76 
   77     if (m_winfirst < 0) {
   78         m_winfirst = 0;
   79     } else {
   80         m_winfirst += int(m_respage.size());
   81     }
   82     // Get the next page of results. Note that we look ahead by one to
   83     // determine if there is actually a next page
   84     vector<ResListEntry> npage;
   85     int pagelen = m_docSource->getSeqSlice(m_winfirst, m_pagesize + 1, npage);
   86 
   87     // If page was truncated, there is no next
   88     m_hasNext = (pagelen == m_pagesize + 1);
   89 
   90     // Get rid of the possible excess result
   91     if (pagelen == m_pagesize + 1) {
   92         npage.resize(m_pagesize);
   93         pagelen--;
   94     }
   95 
   96     if (pagelen <= 0) {
   97         // No results ? This can only happen on the first page or if the
   98         // actual result list size is a multiple of the page pref (else
   99         // there would have been no Next on the last page)
  100         if (m_winfirst > 0) {
  101             // Have already results. Let them show, just disable the
  102             // Next button. We'd need to remove the Next link from the page
  103             // too.
  104             // Restore the m_winfirst value, let the current result vector alone
  105             m_winfirst -= int(m_respage.size());
  106         } else {
  107             // No results at all (on first page)
  108             m_winfirst = -1;
  109         }
  110         return;
  111     }
  112     m_resultsInCurrentPage = pagelen;
  113     m_respage = npage;
  114 }
  115 static string maybeEscapeHtml(const string& fld)
  116 {
  117     if (fld.compare(0, cstr_fldhtm.size(), cstr_fldhtm))
  118         return escapeHtml(fld);
  119     else
  120         return fld.substr(cstr_fldhtm.size());
  121 }
  122 
  123 
  124 void ResListPager::resultPageFor(int docnum)
  125 {
  126     if (!m_docSource) {
  127         LOGDEB("ResListPager::resultPageFor: null source\n");
  128         return;
  129     }
  130 
  131     int resCnt = m_docSource->getResCnt();
  132     LOGDEB("ResListPager::resultPageFor(" << docnum << "): rescnt " <<
  133            resCnt << ", winfirst " << m_winfirst << "\n");
  134     m_winfirst = (docnum / m_pagesize) * m_pagesize;
  135 
  136     // Get the next page of results.
  137     vector<ResListEntry> npage;
  138     int pagelen = m_docSource->getSeqSlice(m_winfirst, m_pagesize, npage);
  139 
  140     // If page was truncated, there is no next
  141     m_hasNext = (pagelen == m_pagesize);
  142 
  143     if (pagelen <= 0) {
  144         m_winfirst = -1;
  145         return;
  146     }
  147     m_respage = npage;
  148 }
  149 
  150 void ResListPager::displayDoc(RclConfig *config, int i, Rcl::Doc& doc, 
  151                               const HighlightData& hdata, const string& sh)
  152 {
  153     ostringstream chunk;
  154 
  155     // Determine icon to display if any
  156     string iconurl = iconUrl(config, doc);
  157     
  158     // Printable url: either utf-8 if transcoding succeeds, or url-encoded
  159     string url;
  160     printableUrl(config->getDefCharset(), doc.url, url);
  161 
  162     // Same as url, but with file:// possibly stripped. output by %u instead
  163     // of %U. 
  164     string urlOrLocal;
  165     urlOrLocal = fileurltolocalpath(url);
  166     if (urlOrLocal.empty())
  167         urlOrLocal = url;
  168 
  169     // Make title out of file name if none yet
  170     string titleOrFilename;
  171     string utf8fn;
  172     doc.getmeta(Rcl::Doc::keytt, &titleOrFilename);
  173     doc.getmeta(Rcl::Doc::keyfn, &utf8fn);
  174     if (utf8fn.empty()) {
  175         utf8fn = path_getsimple(url);   
  176     }
  177     if (titleOrFilename.empty()) {
  178         titleOrFilename = utf8fn;
  179     }
  180 
  181     // Url for the parent directory. We strip the file:// part for local
  182     // paths
  183     string parenturl = url_parentfolder(url);
  184     {
  185         string localpath = fileurltolocalpath(parenturl);
  186         if (!localpath.empty())
  187             parenturl = localpath;
  188     }
  189 
  190     // Result number
  191     char numbuf[20];
  192     int docnumforlinks = m_winfirst + 1 + i;
  193     sprintf(numbuf, "%d", docnumforlinks);
  194 
  195     // Document date: either doc or file modification times
  196     string datebuf;
  197     if (!doc.dmtime.empty() || !doc.fmtime.empty()) {
  198         char cdate[100];
  199         cdate[0] = 0;
  200         time_t mtime = doc.dmtime.empty() ?
  201             atoll(doc.fmtime.c_str()) : atoll(doc.dmtime.c_str());
  202         struct tm *tm = localtime(&mtime);
  203         strftime(cdate, 99, dateFormat().c_str(), tm);
  204         transcode(cdate, datebuf, RclConfig::getLocaleCharset(), "UTF-8");
  205     }
  206 
  207     // Size information. We print both doc and file if they differ a lot
  208     int64_t fsize = -1, dsize = -1;
  209     if (!doc.dbytes.empty())
  210         dsize = static_cast<int64_t>(atoll(doc.dbytes.c_str()));
  211     if (!doc.fbytes.empty())
  212         fsize =  static_cast<int64_t>(atoll(doc.fbytes.c_str()));
  213     string sizebuf;
  214     if (dsize > 0) {
  215         sizebuf = displayableBytes(dsize);
  216         if (fsize > 10 * dsize && fsize - dsize > 1000)
  217             sizebuf += string(" / ") + displayableBytes(fsize);
  218     } else if (fsize >= 0) {
  219         sizebuf = displayableBytes(fsize);
  220     }
  221 
  222     string richabst;
  223     bool needabstract = parFormat().find("%A") != string::npos;
  224     if (needabstract && m_docSource) {
  225         vector<string> vabs;
  226         m_docSource->getAbstract(doc, vabs);
  227         m_hiliter->set_inputhtml(false);
  228 
  229         for (vector<string>::const_iterator it = vabs.begin();
  230              it != vabs.end(); it++) {
  231             if (!it->empty()) {
  232                 // No need to call escapeHtml(), plaintorich handles it
  233                 list<string> lr;
  234                 // There may be data like page numbers before the snippet text.
  235                 // will be in brackets.
  236                 string::size_type bckt = it->find("]");
  237                 if (bckt == string::npos) {
  238                     m_hiliter->plaintorich(*it, lr, hdata);
  239                 } else {
  240                     m_hiliter->plaintorich(it->substr(bckt), lr, hdata);
  241                     lr.front() = it->substr(0, bckt) + lr.front();
  242                 }
  243                 richabst += lr.front();
  244                 richabst += absSep();
  245             }
  246         }
  247     }
  248 
  249     // Links; Uses utilities from mimehandler.h
  250     ostringstream linksbuf;
  251     if (canIntern(&doc, config)) { 
  252         linksbuf << "<a href=\""<< linkPrefix()<< "P" << docnumforlinks << "\">" 
  253                  << trans("Preview") << "</a>&nbsp;&nbsp;";
  254     }
  255     if (canOpen(&doc, config)) {
  256         linksbuf << "<a href=\"" <<linkPrefix() + "E" <<docnumforlinks << "\">"  
  257                  << trans("Open") << "</a>";
  258     }
  259     ostringstream snipsbuf;
  260     if (doc.haspages) {
  261         snipsbuf << "<a href=\"" <<linkPrefix()<<"A" << docnumforlinks << "\">" 
  262                  << trans("Snippets") << "</a>&nbsp;&nbsp;";
  263         linksbuf << "&nbsp;&nbsp;" << snipsbuf.str();
  264     }
  265 
  266     string collapscnt;
  267     if (doc.getmeta(Rcl::Doc::keycc, &collapscnt) && !collapscnt.empty()) {
  268         ostringstream collpsbuf;
  269         int clc = atoi(collapscnt.c_str()) + 1;
  270         collpsbuf << "<a href=\""<<linkPrefix()<<"D" << docnumforlinks << "\">" 
  271                   << trans("Dups") << "(" << clc << ")" << "</a>&nbsp;&nbsp;";
  272         linksbuf << "&nbsp;&nbsp;" << collpsbuf.str();
  273     }
  274 
  275     // Build the result list paragraph:
  276 
  277     // Subheader: this is used by history
  278     if (!sh.empty())
  279         chunk << "<p style='clear: both;'><b>" << sh << "</p>\n<p>";
  280     else
  281         chunk << "<p style='margin: 0px;padding: 0px;clear: both;'>";
  282 
  283     char xdocidbuf[100];
  284     sprintf(xdocidbuf, "%lu", doc.xdocid);
  285     
  286     // Configurable stuff
  287     map<string, string> subs;
  288     subs["A"] = !richabst.empty() ? richabst : "";
  289     subs["D"] = datebuf;
  290     subs["E"] = snipsbuf.str();
  291     subs["I"] = iconurl;
  292     subs["i"] = doc.ipath;
  293     subs["K"] = !doc.meta[Rcl::Doc::keykw].empty() ? 
  294         string("[") + maybeEscapeHtml(doc.meta[Rcl::Doc::keykw]) + "]" : "";
  295     subs["L"] = linksbuf.str();
  296     subs["N"] = numbuf;
  297     subs["M"] = doc.mimetype;
  298     subs["P"] = parenturl;
  299     subs["R"] = doc.meta[Rcl::Doc::keyrr];
  300     subs["S"] = sizebuf;
  301     subs["T"] = maybeEscapeHtml(titleOrFilename);
  302     subs["t"] = maybeEscapeHtml(doc.meta[Rcl::Doc::keytt]);
  303     subs["U"] = url;
  304     subs["u"] = urlOrLocal;
  305     subs["x"] = xdocidbuf;
  306     
  307     // Let %(xx) access all metadata. HTML-neuter everything:
  308     for (const auto& entry : doc.meta) {
  309         if (!entry.first.empty()) 
  310             subs[entry.first] = maybeEscapeHtml(entry.second);
  311     }
  312 
  313     string formatted;
  314     pcSubst(parFormat(), formatted, subs);
  315     chunk << formatted;
  316 
  317     chunk << "</p>" << endl;
  318     // This was to force qt 4.x to clear the margins (which it should do
  319     // anyway because of the paragraph's style), but we finally took
  320     // the table approach for 1.15 for now (in guiutils.cpp)
  321 //      chunk << "<br style='clear:both;height:0;line-height:0;'>" << endl;
  322 
  323     LOGDEB2("Chunk: [" << chunk.rdbuf()->str() << "]\n");
  324     append(chunk.rdbuf()->str(), i, doc);
  325 }
  326 
  327 bool ResListPager::getDoc(int num, Rcl::Doc& doc)
  328 {
  329     if (m_winfirst < 0 || m_respage.size() == 0)
  330         return false;
  331     if (num < m_winfirst || num >= m_winfirst + int(m_respage.size()))
  332         return false;
  333     doc = m_respage[num-m_winfirst].doc;
  334     return true;
  335 }
  336 
  337 void ResListPager::displayPage(RclConfig *config)
  338 {
  339     LOGDEB("ResListPager::displayPage. linkPrefix: " << linkPrefix() << "\n");
  340     if (!m_docSource) {
  341         LOGDEB("ResListPager::displayPage: null source\n");
  342         return;
  343     }
  344     if (m_winfirst < 0 && !pageEmpty()) {
  345         LOGDEB("ResListPager::displayPage: sequence error: winfirst < 0\n");
  346         return;
  347     }
  348 
  349     ostringstream chunk;
  350 
  351     // Display list header
  352     // We could use a <title> but the textedit doesnt display
  353     // it prominently
  354     // Note: have to append text in chunks that make sense
  355     // html-wise. If we break things up too much, the editor
  356     // gets confused. Hence the use of the 'chunk' text
  357     // accumulator
  358     // Also note that there can be results beyond the estimated resCnt.
  359     chunk << "<html><head>" << endl
  360           << "<meta http-equiv=\"content-type\""
  361           << " content=\"text/html; charset=utf-8\">" << endl
  362           << headerContent()
  363           << "</head><body>" << endl
  364           << pageTop()
  365           << "<p><span style=\"font-size:110%;\"><b>"
  366           << m_docSource->title()
  367           << "</b></span>&nbsp;&nbsp;&nbsp;";
  368 
  369     if (pageEmpty()) {
  370         chunk << trans("<p><b>No results found</b><br>");
  371         string reason = m_docSource->getReason();
  372         if (!reason.empty()) {
  373             chunk << "<blockquote>" << escapeHtml(reason) << 
  374                 "</blockquote></p>";
  375         } else {
  376             HighlightData hldata;
  377             m_docSource->getTerms(hldata);
  378             vector<string> uterms(hldata.uterms.begin(), hldata.uterms.end());
  379             if (!uterms.empty()) {
  380                 map<string, vector<string> > spellings;
  381                 suggest(uterms, spellings);
  382                 if (!spellings.empty()) {
  383                     if (o_index_stripchars) {
  384                         chunk << 
  385                             trans("<p><i>Alternate spellings (accents suppressed): </i>")
  386                               << "<br /><blockquote>";
  387                     } else {
  388                         chunk << 
  389                             trans("<p><i>Alternate spellings: </i>")
  390                               << "<br /><blockquote>";
  391                     
  392                     }
  393 
  394                     for (const auto& entry: spellings) {
  395                         chunk << "<b>" << entry.first << "</b> : ";
  396                         for (const auto& spelling : entry.second) {
  397                             chunk << spelling << " ";
  398                         }
  399                         chunk << "<br />";
  400                     }
  401                     chunk << "</blockquote></p>";
  402                 }
  403             }
  404         }
  405     } else {
  406         unsigned int resCnt = m_docSource->getResCnt();
  407         if (m_winfirst + m_respage.size() < resCnt) {
  408             chunk << trans("Documents") << " <b>" << m_winfirst + 1
  409                   << "-" << m_winfirst + m_respage.size() << "</b> " 
  410                   << trans("out of at least") << " " 
  411                   << resCnt << " " << trans("for") << " " ;
  412         } else {
  413             chunk << trans("Documents") << " <b>" 
  414                   << m_winfirst + 1 << "-" << m_winfirst + m_respage.size()
  415                   << "</b> " << trans("for") << " ";
  416         }
  417     }
  418     chunk << detailsLink();
  419     if (hasPrev() || hasNext()) {
  420         chunk << "&nbsp;&nbsp;";
  421         if (hasPrev()) {
  422             chunk << "<a href=\"" << linkPrefix() + prevUrl() + "\"><b>"
  423                   << trans("Previous")
  424                   << "</b></a>&nbsp;&nbsp;&nbsp;";
  425         }
  426         if (hasNext()) {
  427             chunk << "<a href=\"" << linkPrefix() + nextUrl() + "\"><b>"
  428                   << trans("Next")
  429                   << "</b></a>";
  430         }
  431     }
  432     chunk << "</p>" << endl;
  433 
  434     append(chunk.rdbuf()->str());
  435     chunk.rdbuf()->str("");
  436     if (pageEmpty())
  437         return;
  438 
  439     HighlightData hdata;
  440     m_docSource->getTerms(hdata);
  441 
  442     // Emit data for result entry paragraph. Do it in chunks that make sense
  443     // html-wise, else our client may get confused
  444     for (int i = 0; i < (int)m_respage.size(); i++) {
  445         Rcl::Doc& doc(m_respage[i].doc);
  446         string& sh(m_respage[i].subHeader);
  447         displayDoc(config, i, doc, hdata, sh);
  448     }
  449 
  450     // Footer
  451     chunk << "<p align=\"center\">";
  452     if (hasPrev() || hasNext()) {
  453         if (hasPrev()) {
  454             chunk << "<a href=\"" + linkPrefix() + prevUrl() + "\"><b>" 
  455                   << trans("Previous")
  456                   << "</b></a>&nbsp;&nbsp;&nbsp;";
  457         }
  458         if (hasNext()) {
  459             chunk << "<a href=\"" << linkPrefix() + nextUrl() + "\"><b>"
  460                   << trans("Next")
  461                   << "</b></a>";
  462         }
  463     }
  464     chunk << "</p>" << endl;
  465     chunk << "</body></html>" << endl;
  466     append(chunk.rdbuf()->str());
  467 }
  468 
  469 // Default implementations for things that should be implemented by 
  470 // specializations
  471 string ResListPager::nextUrl()
  472 {
  473     return "n-1";
  474 }
  475 
  476 string ResListPager::prevUrl()
  477 {
  478     return "p-1";
  479 }
  480 
  481 string ResListPager::iconUrl(RclConfig *config, Rcl::Doc& doc)
  482 {
  483     string apptag;
  484     doc.getmeta(Rcl::Doc::keyapptg, &apptag);
  485 
  486     return path_pathtofileurl(config->getMimeIconPath(doc.mimetype, apptag));
  487 }
  488 
  489 bool ResListPager::append(const string& data)
  490 {
  491     fprintf(stderr, "%s", data.c_str());
  492     return true;
  493 }
  494 
  495 string ResListPager::trans(const string& in) 
  496 {
  497     return in;
  498 }
  499 
  500 string ResListPager::detailsLink()
  501 {
  502     string chunk = string("<a href=\"") + linkPrefix() + "H-1\">";
  503     chunk += trans("(show query)") + "</a>";
  504     return chunk;
  505 }
  506 
  507 const string &ResListPager::parFormat()
  508 {
  509     static const string cstr_format("<img src=\"%I\" align=\"left\">"
  510                                     "%R %S %L &nbsp;&nbsp;<b>%T</b><br>"
  511                                     "%M&nbsp;%D&nbsp;&nbsp;&nbsp;<i>%U</i><br>"
  512                                     "%A %K");
  513     return cstr_format;
  514 }
  515 
  516 const string &ResListPager::dateFormat()
  517 {
  518     static const string cstr_format("&nbsp;%Y-%m-%d&nbsp;%H:%M:%S&nbsp;%z");
  519     return cstr_format;
  520 }