"Fossies" - the Fresh Open Source Software Archive

Member "recoll-1.26.3/query/xadump.cpp" (4 Sep 2019, 9314 Bytes) of package /linux/privat/recoll-1.26.3.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "xadump.cpp" see the Fossies "Dox" file reference documentation.

    1 /* Copyright (C) 2004 J.F.Dockes
    2  *   This program is free software; you can redistribute it and/or modify
    3  *   it under the terms of the GNU General Public License as published by
    4  *   the Free Software Foundation; either version 2 of the License, or
    5  *   (at your option) any later version.
    6  *
    7  *   This program is distributed in the hope that it will be useful,
    8  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
    9  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   10  *   GNU General Public License for more details.
   11  *
   12  *   You should have received a copy of the GNU General Public License
   13  *   along with this program; if not, write to the
   14  *   Free Software Foundation, Inc.,
   15  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
   16  */
   17 
   18 #include "autoconfig.h"
   19 
   20 #include <stdio.h>
   21 #include <stdlib.h>
   22 #include <signal.h>
   23 #include <strings.h>
   24 
   25 #include <iostream>
   26 #include <string>
   27 #include <vector>
   28 
   29 #include "pathut.h"
   30 
   31 #ifndef NO_NAMESPACES
   32 using namespace std;
   33 #endif /* NO_NAMESPACES */
   34 
   35 #include "utf8iter.h"
   36 
   37 #include "xapian.h"
   38 
   39 static string thisprog;
   40 
   41 static string usage =
   42     " -d <dbdir> \n"
   43     "-e <output encoding>\n"
   44     " -i docid -D : get document data for docid\n"
   45     " -i docid -X : delete document docid\n"
   46     " -i docid -T : term list for doc docid\n"
   47     " -i docid -r : reconstructed text for docid\n"
   48     " -t term -E  : term existence test\n"
   49     " -t term -F  : retrieve term frequency data for given term\n"
   50     " -t term -P  : retrieve postings for term\n"
   51     " -T          : list all terms\n"
   52     "    -f       : precede each term in the list with its occurrence counts\n"
   53     "    -n : raw data (no [])\n"
   54     "    -l : don't list prefixed terms\n"
   55     " -x          : separate each output char with a space\n"
   56     " -s          : special mode to dump recoll stem db\n"
   57     " -q term [term ...] : perform AND query\n"
   58     "  \n\n"
   59     ;
   60 
   61 static void
   62 Usage(void)
   63 {
   64     cerr << thisprog  << ": usage:\n" << usage;
   65     exit(1);
   66 }
   67 
   68 static int        op_flags;
   69 #define OPT_D     0x1
   70 #define OPT_E     0x2
   71 #define OPT_F     0x4
   72 #define OPT_P     0x8
   73 #define OPT_T     0x10
   74 #define OPT_X     0x20
   75 #define OPT_d     0x80 
   76 #define OPT_e     0x100
   77 #define OPT_f     0x200
   78 #define OPT_i     0x400
   79 #define OPT_n     0x800
   80 #define OPT_q     0x1000
   81 #define OPT_t     0x4000
   82 #define OPT_x     0x8000
   83 #define OPT_l     0x10000
   84 #define OPT_r     0x20000
   85 
   86 // Compute an exploded version of string, inserting a space between each char.
   87 // (no character combining possible)
   88 static string detailstring(const string& in)
   89 {
   90     if (!(op_flags & OPT_x))
   91     return in;
   92     string out;
   93     Utf8Iter  it(in);
   94     for (; !it.eof(); it++) {
   95     it.appendchartostring(out);
   96     out += ' ';
   97     }
   98     // Strip last space
   99     if (!out.empty())
  100     out.resize(out.size()-1);
  101     return out;
  102 }
  103 
  104 Xapian::Database *db;
  105 
  106 static void cleanup()
  107 {
  108     delete db;
  109 }
  110 
  111 static void sigcleanup(int sig)
  112 {
  113     fprintf(stderr, "sigcleanup\n");
  114     cleanup();
  115     exit(1);
  116 }
  117 
  118 bool o_index_stripchars;
  119 
  120 inline bool has_prefix(const string& trm)
  121 {
  122     if (o_index_stripchars) {
  123     return trm.size() && 'A' <= trm[0] && trm[0] <= 'Z';
  124     } else {
  125     return trm.size() > 0 && trm[0] == ':';
  126     }
  127 }
  128 
  129 
  130 void wholedoc(Xapian::Database* db, int docid)
  131 {
  132     vector<string> buf;
  133     Xapian::TermIterator term;
  134     for (term = db->termlist_begin(docid);
  135          term != db->termlist_end(docid); term++) {
  136         Xapian::PositionIterator pos;
  137         for (pos = db->positionlist_begin(docid, *term);
  138              pos != db->positionlist_end(docid, *term); pos++) {
  139             if (buf.size() < *pos)
  140                 buf.resize(2*((*pos)+1));
  141             buf[(*pos)] = detailstring(*term);
  142         }
  143     }
  144     for (vector<string>::iterator it = buf.begin(); it != buf.end(); it++) {
  145         if (!it->empty())
  146             cout << *it << " ";
  147     }
  148 }
  149 
  150 int main(int argc, char **argv)
  151 {
  152     string dbdir = path_cat(path_home(), ".recoll/xapiandb");
  153     string outencoding = "ISO8859-1";
  154     int docid = 1;
  155     string aterm;
  156 
  157     thisprog = argv[0];
  158     argc--; argv++;
  159 
  160     while (argc > 0 && **argv == '-') {
  161     (*argv)++;
  162     if (!(**argv))
  163         /* Cas du "adb - core" */
  164         Usage();
  165     while (**argv)
  166         switch (*(*argv)++) {
  167         case 'D':   op_flags |= OPT_D; break;
  168         case 'd':   op_flags |= OPT_d; if (argc < 2)  Usage();
  169         dbdir = *(++argv);
  170         argc--; 
  171         goto b1;
  172         case 'E':   op_flags |= OPT_E; break;
  173         case 'e':   op_flags |= OPT_d; if (argc < 2)  Usage();
  174         outencoding = *(++argv);
  175         argc--; 
  176         goto b1;
  177         case 'F':   op_flags |= OPT_F; break;
  178         case 'f':   op_flags |= OPT_f; break;
  179         case 'i':   op_flags |= OPT_i; if (argc < 2)  Usage();
  180         if (sscanf(*(++argv), "%d", &docid) != 1) Usage();
  181         argc--; 
  182         goto b1;
  183         case 'l':   op_flags |= OPT_l; break;
  184         case 'n':   op_flags |= OPT_n; break;
  185         case 'P':   op_flags |= OPT_P; break;
  186         case 'q':   op_flags |= OPT_q; break;
  187         case 'r':   case 'b': op_flags |= OPT_r; break;
  188         case 'T':   op_flags |= OPT_T; break;
  189         case 't':   op_flags |= OPT_t; if (argc < 2)  Usage();
  190         aterm = *(++argv);
  191         argc--; 
  192         goto b1;
  193         case 'X':   op_flags |= OPT_X; break;
  194         case 'x':   op_flags |= OPT_x; break;
  195         default: Usage();   break;
  196         }
  197     b1: argc--; argv++;
  198     }
  199 
  200     vector<string> qterms;
  201     if (op_flags & OPT_q) {
  202     fprintf(stderr, "q argc %d\n", argc);
  203     if (argc < 1)
  204         Usage();
  205     while (argc > 0) {
  206         qterms.push_back(*argv++); argc--;
  207     }
  208     }
  209 
  210     if (argc != 0)
  211     Usage();
  212 
  213     atexit(cleanup);
  214     if (signal(SIGHUP, SIG_IGN) != SIG_IGN)
  215     signal(SIGHUP, sigcleanup);
  216     if (signal(SIGINT, SIG_IGN) != SIG_IGN)
  217     signal(SIGINT, sigcleanup);
  218     if (signal(SIGQUIT, SIG_IGN) != SIG_IGN)
  219     signal(SIGQUIT, sigcleanup);
  220     if (signal(SIGTERM, SIG_IGN) != SIG_IGN)
  221     signal(SIGTERM, sigcleanup);
  222 
  223     try {
  224     db = new Xapian::Database(dbdir);
  225     cout << "DB: ndocs " << db->get_doccount() << " lastdocid " <<
  226         db->get_lastdocid() << " avglength " << db->get_avlength() << endl;
  227 
  228     // If we have terms with a leading ':' it's a new style,
  229     // unstripped index
  230     {
  231         Xapian::TermIterator term = db->allterms_begin(":");
  232         if (term == db->allterms_end())
  233         o_index_stripchars = true;
  234         else
  235         o_index_stripchars = false;
  236         cout<<"DB: terms are "<<(o_index_stripchars?"stripped":"raw")<<endl;
  237     }
  238     
  239     if (op_flags & OPT_T) {
  240         Xapian::TermIterator term;
  241         string printable;
  242         string op = (op_flags & OPT_n) ? string(): "[";
  243         string cl = (op_flags & OPT_n) ? string(): "]";
  244         if (op_flags & OPT_i) {
  245         for (term = db->termlist_begin(docid); 
  246              term != db->termlist_end(docid);term++) {
  247             const string& s = *term;
  248             if ((op_flags&OPT_l) && has_prefix(s))
  249             continue;
  250             cout << op << detailstring(s) << cl << endl;
  251         }
  252         } else {
  253         for (term = db->allterms_begin(); 
  254              term != db->allterms_end();term++) {
  255             const string& s = *term;
  256             if ((op_flags&OPT_l) && has_prefix(s))
  257             continue;
  258             if (op_flags & OPT_f)
  259             cout <<  db->get_collection_freq(*term) << " " 
  260                  << term.get_termfreq() << " ";
  261             cout << op << detailstring(s) << cl << endl;
  262         }
  263         }
  264     } else if (op_flags & OPT_D) {
  265         Xapian::Document doc = db->get_document(docid);
  266         string data = doc.get_data();
  267         cout << data << endl;
  268     } else if (op_flags & OPT_r) {
  269         wholedoc(db, docid);
  270     } else if (op_flags & OPT_X) {
  271         Xapian::Document doc = db->get_document(docid);
  272         string data = doc.get_data();
  273         cout << data << endl;
  274         cout << "Really delete xapian document ?" << endl;
  275         string rep;
  276         cin >> rep;
  277         if (!rep.empty() && (rep[0] == 'y' || rep[0] == 'Y')) {
  278         Xapian::WritableDatabase wdb(dbdir,  Xapian::DB_OPEN);
  279         cout << "Deleting" << endl;
  280         wdb.delete_document(docid);
  281         }
  282     } else if (op_flags & OPT_P) {
  283         Xapian::PostingIterator doc;
  284         for (doc = db->postlist_begin(aterm);
  285          doc != db->postlist_end(aterm); doc++) {
  286         cout << *doc << "(" << doc.get_wdf() << ") : " ;
  287         Xapian::PositionIterator pos;
  288         for (pos = doc.positionlist_begin(); 
  289              pos != doc.positionlist_end(); pos++) {
  290             cout << *pos << " " ;
  291         }
  292         cout << endl;
  293         }
  294         
  295     } else if (op_flags & OPT_F) {
  296         cout << "FreqFor " << aterm << " : " <<
  297         db->get_termfreq(aterm) << endl;
  298     } else if (op_flags & OPT_E) {
  299         cout << "Exists [" << aterm << "] : " <<
  300         db->term_exists(aterm) << endl;
  301     }  else if (op_flags & OPT_q) {
  302         Xapian::Enquire enquire(*db);
  303 
  304         Xapian::Query query(Xapian::Query::OP_AND, qterms.begin(), 
  305                 qterms.end());
  306         cout << "Performing query `" <<
  307         query.get_description() << "'" << endl;
  308         enquire.set_query(query);
  309 
  310         Xapian::MSet matches = enquire.get_mset(0, 10);
  311         cout << "Estimated results: " << 
  312         matches.get_matches_lower_bound() << endl;
  313         Xapian::MSetIterator i;
  314         for (i = matches.begin(); i != matches.end(); ++i) {
  315         cout << "Document ID " << *i << "\t";
  316         cout << i.get_percent() << "% ";
  317         Xapian::Document doc = i.get_document();
  318         cout << "[" << doc.get_data() << "]" << endl;
  319         }
  320     }
  321     } catch (const Xapian::Error &e) {
  322     cout << "Exception: " << e.get_msg() << endl;
  323     } catch (const string &s) {
  324     cout << "Exception: " << s << endl;
  325     } catch (const char *s) {
  326     cout << "Exception: " << s << endl;
  327     } catch (...) {
  328     cout << "Caught unknown exception" << endl;
  329     }
  330     exit(0);
  331 }