"Fossies" - the Fresh Open Source Software Archive

Member "xapian-core-1.4.14/tests/api_matchspy.cc" (23 Nov 2019, 10504 Bytes) of package /linux/www/xapian-core-1.4.14.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. See also the last Fossies "Diffs" side-by-side code changes report for "api_matchspy.cc": 1.4.12_vs_1.4.13.

    1 /** @file api_matchspy.cc
    2  * @brief tests of MatchSpy usage
    3  */
    4 /* Copyright 2007,2009 Lemur Consulting Ltd
    5  * Copyright 2009,2011,2012,2015,2019 Olly Betts
    6  * Copyright 2010 Richard Boulton
    7  *
    8  * This program is free software; you can redistribute it and/or
    9  * modify it under the terms of the GNU General Public License as
   10  * published by the Free Software Foundation; either version 2 of the
   11  * License, or (at your option) any later version.
   12  *
   13  * This program is distributed in the hope that it will be useful,
   14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   16  * GNU General Public License for more details.
   17  *
   18  * You should have received a copy of the GNU General Public License
   19  * along with this program; if not, write to the Free Software
   20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
   21  * USA
   22  */
   23 
   24 #include <config.h>
   25 
   26 #include "api_matchspy.h"
   27 
   28 #include <xapian.h>
   29 
   30 #include <cmath>
   31 #include <map>
   32 #include <vector>
   33 
   34 #include "backendmanager.h"
   35 #include "str.h"
   36 #include "testsuite.h"
   37 #include "testutils.h"
   38 #include "apitest.h"
   39 
   40 using namespace std;
   41 
   42 // #######################################################################
   43 // # Tests start here
   44 
   45 class SimpleMatchSpy : public Xapian::MatchSpy {
   46   public:
   47     // Vector which will be filled with all the document contents seen.
   48     std::vector<std::string> seen;
   49 
   50     void operator()(const Xapian::Document &doc, double) {
   51     // Note that this is not recommended usage of get_data() - you
   52     // generally shouldn't call get_data() from inside a MatchSpy, because
   53     // it is (likely to be) a slow operation resulting in considerable IO.
   54     seen.push_back(doc.get_data());
   55     }
   56 };
   57 
   58 // Basic test of a matchspy.
   59 DEFINE_TESTCASE(matchspy1, backend && !remote) {
   60     Xapian::Database db(get_database("apitest_simpledata"));
   61     Xapian::Enquire enquire(db);
   62     enquire.set_query(Xapian::Query("this"));
   63 
   64     SimpleMatchSpy myspy;
   65 
   66     Xapian::MSet nospymset = enquire.get_mset(0, 100);
   67     enquire.add_matchspy(&myspy);
   68     Xapian::MSet spymset = enquire.get_mset(0, 100);
   69 
   70     // Check that the match estimates aren't affected by the matchspy.
   71     TEST_EQUAL(nospymset, spymset);
   72 
   73     vector<bool> docid_checked(db.get_lastdocid());
   74 
   75     // Check that we get the expected number of matches, and that the stored
   76     // document contents are right.
   77     Xapian::MSetIterator i = spymset.begin();
   78     TEST(i != spymset.end());
   79     TEST_EQUAL(spymset.size(), 6);
   80     TEST_EQUAL(myspy.seen.size(), spymset.size());
   81 
   82     std::sort(myspy.seen.begin(), myspy.seen.end());
   83 
   84     std::vector<std::string> seen2;
   85     for ( ; i != spymset.end(); ++i) {
   86     const Xapian::Document doc(i.get_document());
   87     seen2.push_back(doc.get_data());
   88     }
   89     std::sort(seen2.begin(), seen2.end());
   90 
   91     TEST_EQUAL(myspy.seen.size(), seen2.size());
   92     std::vector<std::string>::const_iterator j = myspy.seen.begin();
   93     std::vector<std::string>::const_iterator j2 = seen2.begin();
   94     for (; j != myspy.seen.end(); ++j, ++j2) {
   95     TEST_EQUAL(*j, *j2);
   96     }
   97 
   98     return true;
   99 }
  100 
  101 static string values_to_repr(const Xapian::ValueCountMatchSpy & spy) {
  102     string resultrepr("|");
  103     for (Xapian::TermIterator i = spy.values_begin();
  104      i != spy.values_end();
  105      ++i) {
  106     resultrepr += *i;
  107     resultrepr += ':';
  108     resultrepr += str(i.get_termfreq());
  109     resultrepr += '|';
  110     }
  111     return resultrepr;
  112 }
  113 
  114 static void
  115 make_matchspy2_db(Xapian::WritableDatabase &db, const string &)
  116 {
  117     for (int c = 1; c <= 25; ++c) {
  118     Xapian::Document doc;
  119     doc.set_data("Document " + str(c));
  120     int factors = 0;
  121     for (int factor = 1; factor <= c; ++factor) {
  122         doc.add_term("all");
  123         if (c % factor == 0) {
  124         doc.add_term("XFACT" + str(factor));
  125         ++factors;
  126         }
  127     }
  128 
  129     // Number of factors.
  130     doc.add_value(0, str(factors));
  131     // Units digits.
  132     doc.add_value(1, str(c % 10));
  133     // Constant.
  134     doc.add_value(2, "fish");
  135     // Number of digits.
  136     doc.add_value(3, str(str(c).size()));
  137 
  138     db.add_document(doc);
  139     }
  140 }
  141 
  142 DEFINE_TESTCASE(matchspy2, generated)
  143 {
  144     Xapian::Database db = get_database("matchspy2", make_matchspy2_db);
  145 
  146     Xapian::ValueCountMatchSpy spy0(0);
  147     Xapian::ValueCountMatchSpy spy1(1);
  148     Xapian::ValueCountMatchSpy spy3(3);
  149 
  150     Xapian::Enquire enq(db);
  151 
  152     enq.set_query(Xapian::Query("all"));
  153     if (startswith(get_dbtype(), "multi")) {
  154     // Without this, we short-cut on the second shard because we don't get
  155     // the documents in ascending weight order.
  156     enq.set_weighting_scheme(Xapian::CoordWeight());
  157     }
  158 
  159     enq.add_matchspy(&spy0);
  160     enq.add_matchspy(&spy1);
  161     enq.add_matchspy(&spy3);
  162     Xapian::MSet mset = enq.get_mset(0, 10);
  163 
  164     TEST_EQUAL(spy0.get_total(), 25);
  165     TEST_EQUAL(spy1.get_total(), 25);
  166     TEST_EQUAL(spy3.get_total(), 25);
  167 
  168     static const char * const results[] = {
  169     "|1:1|2:9|3:3|4:7|5:1|6:3|8:1|",
  170     "|0:2|1:3|2:3|3:3|4:3|5:3|6:2|7:2|8:2|9:2|",
  171     "|1:9|2:16|",
  172     };
  173     TEST_STRINGS_EQUAL(values_to_repr(spy0), results[0]);
  174     TEST_STRINGS_EQUAL(values_to_repr(spy1), results[1]);
  175     TEST_STRINGS_EQUAL(values_to_repr(spy3), results[2]);
  176 
  177     return true;
  178 }
  179 
  180 DEFINE_TESTCASE(matchspy4, generated)
  181 {
  182     XFAIL_FOR_BACKEND("multi_remote",
  183               "Matchspy counts hits on remote and locally");
  184     XFAIL_FOR_BACKEND("multi_glass_remote",
  185               "Matchspy counts hits on remote and locally");
  186 
  187     Xapian::Database db = get_database("matchspy2", make_matchspy2_db);
  188 
  189     // We're going to run the match twice - once sorted by relevance, and once
  190     // sorted by a value.  This is a regression test - the matcher used to fail
  191     // to show some documents to the spy when sorting by non-pure-relevance.
  192     Xapian::ValueCountMatchSpy spya0(0);
  193     Xapian::ValueCountMatchSpy spya1(1);
  194     Xapian::ValueCountMatchSpy spya3(3);
  195     Xapian::ValueCountMatchSpy spyb0(0);
  196     Xapian::ValueCountMatchSpy spyb1(1);
  197     Xapian::ValueCountMatchSpy spyb3(3);
  198 
  199     Xapian::Enquire enqa(db);
  200     Xapian::Enquire enqb(db);
  201 
  202     enqa.set_query(Xapian::Query("all"));
  203     if (startswith(get_dbtype(), "multi")) {
  204     // Without this, we short-cut on the second shard because we don't get
  205     // the documents in ascending weight order.
  206     enqa.set_weighting_scheme(Xapian::CoordWeight());
  207     }
  208     enqb.set_query(Xapian::Query("all"));
  209 
  210     enqa.add_matchspy(&spya0);
  211     enqa.add_matchspy(&spya1);
  212     enqa.add_matchspy(&spya3);
  213     enqb.add_matchspy(&spyb0);
  214     enqb.add_matchspy(&spyb1);
  215     enqb.add_matchspy(&spyb3);
  216 
  217     Xapian::MSet mseta = enqa.get_mset(0, 10);
  218     enqb.set_sort_by_value(0, false);
  219     Xapian::MSet msetb = enqb.get_mset(0, 10, 100);
  220 
  221     TEST_EQUAL(spya0.get_total(), 25);
  222     TEST_EQUAL(spya1.get_total(), 25);
  223     TEST_EQUAL(spya3.get_total(), 25);
  224     TEST_EQUAL(spyb0.get_total(), 25);
  225     TEST_EQUAL(spyb1.get_total(), 25);
  226     TEST_EQUAL(spyb3.get_total(), 25);
  227 
  228     static const char * const results[] = {
  229     "|2:9|4:7|3:3|6:3|1:1|5:1|8:1|",
  230     "|1:3|2:3|3:3|4:3|5:3|0:2|6:2|7:2|8:2|9:2|",
  231     "|",
  232     "|2:16|1:9|",
  233     "|2:9|4:7|3:3|6:3|1:1|5:1|8:1|",
  234     "|1:3|2:3|3:3|4:3|5:3|0:2|6:2|7:2|8:2|9:2|",
  235     "|",
  236     "|2:16|1:9|",
  237     NULL
  238     };
  239     std::vector<Xapian::ValueCountMatchSpy *> spies;
  240     spies.push_back(&spya0);
  241     spies.push_back(&spya1);
  242     spies.push_back(NULL);
  243     spies.push_back(&spya3);
  244     spies.push_back(&spyb0);
  245     spies.push_back(&spyb1);
  246     spies.push_back(NULL);
  247     spies.push_back(&spyb3);
  248     for (Xapian::valueno v = 0; results[v]; ++v) {
  249     tout << "value " << v << endl;
  250     Xapian::ValueCountMatchSpy * spy = spies[v];
  251     string allvals_str("|");
  252     if (spy != NULL) {
  253         size_t allvals_size = 0;
  254         for (Xapian::TermIterator i = spy->top_values_begin(100);
  255          i != spy->top_values_end(100);
  256          ++i, ++allvals_size) {
  257         allvals_str += *i;
  258         allvals_str += ':';
  259         allvals_str += str(i.get_termfreq());
  260         allvals_str += '|';
  261         }
  262         tout << allvals_str << endl;
  263         TEST_STRINGS_EQUAL(allvals_str, results[v]);
  264 
  265         for (size_t count = 0; count < allvals_size; ++count) {
  266         tout << "count " << count << endl;
  267         for (Xapian::TermIterator i = spy->top_values_begin(100),
  268              j = spy->top_values_begin(count);
  269              i != spy->top_values_end(100) &&
  270              j != spy->top_values_end(count);
  271              ++i, ++j) {
  272             tout << "j " << j << endl;
  273             TEST_EQUAL(*i, *j);
  274             TEST_EQUAL(i.get_termfreq(), j.get_termfreq());
  275         }
  276         }
  277     }
  278     }
  279 
  280     return true;
  281 }
  282 
  283 // Test builtin match spies
  284 DEFINE_TESTCASE(matchspy5, backend)
  285 {
  286     Xapian::Database db(get_database("apitest_simpledata"));
  287     Xapian::Enquire enquire(db);
  288     enquire.set_query(Xapian::Query("this"));
  289 
  290     Xapian::ValueCountMatchSpy myspy1(1);
  291     Xapian::ValueCountMatchSpy myspy2(1);
  292 
  293     enquire.add_matchspy(&myspy1);
  294     enquire.add_matchspy(&myspy2);
  295     Xapian::MSet mymset = enquire.get_mset(0, 100);
  296     TEST_EQUAL(mymset.size(), 6);
  297 
  298     Xapian::TermIterator i = myspy1.values_begin();
  299     TEST(i != myspy1.values_end());
  300     TEST(*i == "h");
  301     TEST_EQUAL(i.get_termfreq(), 5);
  302     ++i;
  303     TEST(i != myspy1.values_end());
  304     TEST(*i == "n");
  305     TEST_EQUAL(i.get_termfreq(), 1);
  306     ++i;
  307     TEST(i == myspy1.values_end());
  308 
  309     i = myspy2.values_begin();
  310     TEST(i != myspy2.values_end());
  311     TEST(*i == "h");
  312     TEST_EQUAL(i.get_termfreq(), 5);
  313     ++i;
  314     TEST(i != myspy2.values_end());
  315     TEST(*i == "n");
  316     TEST_EQUAL(i.get_termfreq(), 1);
  317     ++i;
  318     TEST(i == myspy2.values_end());
  319 
  320     return true;
  321 }
  322 
  323 class MySpy : public Xapian::MatchSpy {
  324     void operator()(const Xapian::Document &, double) {
  325     }
  326 };
  327 
  328 // Test exceptions from matchspy base class, and get_description method.
  329 DEFINE_TESTCASE(matchspy6, !backend)
  330 {
  331     MySpy spy;
  332 
  333     TEST_EXCEPTION(Xapian::UnimplementedError, spy.clone());
  334     TEST_EXCEPTION(Xapian::UnimplementedError, spy.name());
  335     TEST_EXCEPTION(Xapian::UnimplementedError, spy.serialise());
  336     TEST_EXCEPTION(Xapian::UnimplementedError,
  337            spy.unserialise(std::string(), Xapian::Registry()));
  338     TEST_EXCEPTION(Xapian::UnimplementedError, spy.serialise_results());
  339     TEST_EXCEPTION(Xapian::UnimplementedError,
  340            spy.merge_results(std::string()));
  341     TEST_EQUAL(spy.get_description(), "Xapian::MatchSpy()");
  342 
  343     return true;
  344 }
  345 
  346 /// Regression test for bug fixed in 1.4.12.
  347 DEFINE_TESTCASE(matchspy7, !backend)
  348 {
  349     Xapian::ValueCountMatchSpy myspy(1);
  350     string s = myspy.serialise_results();
  351     s += 'x';
  352     // This merge_results() call used to enter an infinite loop.
  353     TEST_EXCEPTION(Xapian::NetworkError, myspy.merge_results(s));
  354 
  355     return true;
  356 }