"Fossies" - the Fresh Open Source Software Archive

Member "xapian-core-1.4.14/tests/api_percentages.cc" (23 Nov 2019, 11288 Bytes) of package /linux/www/xapian-core-1.4.14.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. See also the last Fossies "Diffs" side-by-side code changes report for "api_percentages.cc": 1.4.12_vs_1.4.13.

    1 /** @file api_percentages.cc
    2  * @brief Tests of percentage calculations.
    3  */
    4 /* Copyright (C) 2008,2009 Lemur Consulting Ltd
    5  * Copyright (C) 2008,2009,2010,2011,2012,2014 Olly Betts
    6  *
    7  * This program is free software; you can redistribute it and/or modify
    8  * it under the terms of the GNU General Public License as published by
    9  * the Free Software Foundation; either version 2 of the License, or
   10  * (at your option) any later version.
   11  *
   12  * This program is distributed in the hope that it will be useful,
   13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   15  * GNU General Public License for more details.
   16  *
   17  * You should have received a copy of the GNU General Public License
   18  * along with this program; if not, write to the Free Software
   19  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA
   20  */
   21 
   22 #include <config.h>
   23 
   24 #include "api_percentages.h"
   25 
   26 #include <xapian.h>
   27 
   28 #include "apitest.h"
   29 #include "str.h"
   30 #include "testutils.h"
   31 
   32 #include <cfloat>
   33 
   34 using namespace std;
   35 
   36 // Test that percentages reported are the same regardless of which part of the
   37 // mset is returned, for sort-by-value search.  Regression test for bug#216 in
   38 // 1.0.10 and earlier with returned percentages.
   39 DEFINE_TESTCASE(consistency3, backend) {
   40     Xapian::Database db(get_database("apitest_sortconsist"));
   41     Xapian::Enquire enquire(db);
   42     enquire.set_query(Xapian::Query("foo"));
   43     enquire.set_sort_by_value(1, 0);
   44     Xapian::doccount lots = 3;
   45     Xapian::MSet bigmset = enquire.get_mset(0, lots);
   46     TEST_EQUAL(bigmset.size(), lots);
   47     for (Xapian::doccount start = 0; start < lots; ++start) {
   48     tout << *bigmset[start] << ":" << bigmset[start].get_weight() << ":"
   49          << bigmset[start].get_percent() << "%" << endl;
   50     for (Xapian::doccount size = 0; size < lots - start; ++size) {
   51         Xapian::MSet mset = enquire.get_mset(start, size);
   52         if (mset.size()) {
   53         TEST_EQUAL(start + mset.size(),
   54                min(start + size, bigmset.size()));
   55         } else if (size) {
   56         TEST(start >= bigmset.size());
   57         }
   58         for (Xapian::doccount i = 0; i < mset.size(); ++i) {
   59         TEST_EQUAL(*mset[i], *bigmset[start + i]);
   60         TEST_EQUAL_DOUBLE(mset[i].get_weight(),
   61                   bigmset[start + i].get_weight());
   62         TEST_EQUAL_DOUBLE(mset[i].get_percent(),
   63                   bigmset[start + i].get_percent());
   64         }
   65     }
   66     }
   67     return true;
   68 }
   69 
   70 class MyPostingSource : public Xapian::PostingSource {
   71     vector<pair<Xapian::docid, double>> weights;
   72     vector<pair<Xapian::docid, double>>::const_iterator i;
   73     bool started;
   74 
   75     MyPostingSource(const vector<pair<Xapian::docid, double>>& weights_,
   76             double max_wt)
   77     : weights(weights_), started(false)
   78     {
   79     set_maxweight(max_wt);
   80     }
   81 
   82   public:
   83     MyPostingSource() : started(false) { }
   84 
   85     PostingSource * clone() const
   86     {
   87     return new MyPostingSource(weights, get_maxweight());
   88     }
   89 
   90     void append_docweight(Xapian::docid did, double wt) {
   91     weights.push_back(make_pair(did, wt));
   92     if (wt > get_maxweight()) set_maxweight(wt);
   93     }
   94 
   95     void init(const Xapian::Database &) { started = false; }
   96 
   97     double get_weight() const { return i->second; }
   98 
   99     Xapian::doccount get_termfreq_min() const { return weights.size(); }
  100     Xapian::doccount get_termfreq_est() const { return weights.size(); }
  101     Xapian::doccount get_termfreq_max() const { return weights.size(); }
  102 
  103     void next(double /*wt*/) {
  104     if (!started) {
  105         i = weights.begin();
  106         started = true;
  107     } else {
  108         ++i;
  109     }
  110     }
  111 
  112     bool at_end() const {
  113     return (i == weights.end());
  114     }
  115 
  116     Xapian::docid get_docid() const { return i->first; }
  117 
  118     string get_description() const {
  119     return "MyPostingSource";
  120     }
  121 };
  122 
  123 /// Test for rounding errors in percentage weight calculations and cutoffs.
  124 DEFINE_TESTCASE(pctcutoff4, backend && !remote && !multi) {
  125     // Find the number of DBL_EPSILONs to subtract which result in the
  126     // percentage of the second hit being 49% instead of 50%.
  127     int epsilons = 0;
  128     Xapian::Database db(get_database("apitest_simpledata"));
  129     Xapian::Enquire enquire(db);
  130     while (true) {
  131     MyPostingSource source;
  132     source.append_docweight(1, 100);
  133     source.append_docweight(2, 50 - epsilons * DBL_EPSILON);
  134     enquire.set_query(Xapian::Query(&source));
  135     Xapian::MSet mset = enquire.get_mset(0, 10);
  136     TEST_EQUAL(mset.size(), 2);
  137     if (mset[1].get_percent() != 50) break;
  138     ++epsilons;
  139     }
  140 
  141     // Make a set of document weights including ones on either side of the
  142     // 49% / 50% boundary.
  143     MyPostingSource source;
  144     source.append_docweight(1, 100);
  145     source.append_docweight(2, 50);
  146     source.append_docweight(3, 50 - (epsilons - 1) * DBL_EPSILON);
  147     source.append_docweight(4, 50 - epsilons * DBL_EPSILON);
  148     source.append_docweight(5, 25);
  149 
  150     enquire.set_query(Xapian::Query(&source));
  151     Xapian::MSet mset1 = enquire.get_mset(0, 10);
  152     TEST_EQUAL(mset1.size(), 5);
  153     TEST_EQUAL(mset1[2].get_percent(), 50);
  154     TEST_EQUAL(mset1[3].get_percent(), 49);
  155 
  156     // Use various different percentage cutoffs, and check that the values
  157     // returned are as expected.
  158     int percent = 100;
  159     for (Xapian::MSetIterator i = mset1.begin(); i != mset1.end(); ++i) {
  160     int new_percent = mset1.convert_to_percent(i);
  161     tout << "mset1 item = " << i.get_percent() << "%\n";
  162     if (new_percent != percent) {
  163         enquire.set_cutoff(percent);
  164         Xapian::MSet mset2 = enquire.get_mset(0, 10);
  165         tout << "cutoff = " << percent << "%, "
  166             "mset size = " << mset2.size() << "\n";
  167         TEST_EQUAL(mset2.size(), i.get_rank());
  168         percent = new_percent;
  169     }
  170     }
  171 
  172     return true;
  173 }
  174 
  175 /// Check we throw for a percentage cutoff while sorting primarily by value.
  176 DEFINE_TESTCASE(pctcutoff5, backend) {
  177     Xapian::Database db(get_database("apitest_simpledata"));
  178     Xapian::Enquire enquire(db);
  179     enquire.set_query(Xapian::Query("test"));
  180     enquire.set_cutoff(42);
  181     Xapian::MSet mset;
  182 
  183     enquire.set_sort_by_value(0, false);
  184     TEST_EXCEPTION(Xapian::UnimplementedError, mset = enquire.get_mset(0, 10));
  185 
  186     enquire.set_sort_by_value(0, true);
  187     TEST_EXCEPTION(Xapian::UnimplementedError, mset = enquire.get_mset(0, 10));
  188 
  189     enquire.set_sort_by_value_then_relevance(0, false);
  190     TEST_EXCEPTION(Xapian::UnimplementedError, mset = enquire.get_mset(0, 10));
  191 
  192     enquire.set_sort_by_value_then_relevance(0, true);
  193     TEST_EXCEPTION(Xapian::UnimplementedError, mset = enquire.get_mset(0, 10));
  194 
  195     return true;
  196 }
  197 
  198 // Regression test for bug fixed in 1.0.14.
  199 DEFINE_TESTCASE(topercent3, backend) {
  200     Xapian::Database db = get_database("apitest_simpledata");
  201     Xapian::Enquire enquire(db);
  202     enquire.set_sort_by_value(1, false);
  203 
  204     static const char * const terms[] = { "paragraph", "banana" };
  205     enquire.set_query(Xapian::Query(Xapian::Query::OP_OR, terms, terms + 2));
  206 
  207     Xapian::MSet mset = enquire.get_mset(0, 20);
  208 
  209     Xapian::MSetIterator i;
  210     for (i = mset.begin(); i != mset.end(); ++i) {
  211     // We should never achieve 100%.
  212     TEST_REL(i.get_percent(),<,100);
  213     }
  214 
  215     return true;
  216 }
  217 
  218 // Regression test for bug introduced temporarily by the "percent without
  219 // termlist" patch.
  220 DEFINE_TESTCASE(topercent4, backend) {
  221     Xapian::Enquire enquire(get_database("apitest_simpledata"));
  222 
  223     Xapian::Query query(Xapian::Query::OP_FILTER,
  224             Xapian::Query("paragraph"),
  225             Xapian::Query("queri"));
  226     query = Xapian::Query(Xapian::Query::OP_XOR,
  227               query, Xapian::Query("rubbish"));
  228 
  229     enquire.set_query(query);
  230     Xapian::MSet mset = enquire.get_mset(0, 10);
  231 
  232     // We should get 50% not 33%.
  233     TEST(!mset.empty());
  234     TEST_EQUAL(mset[0].get_percent(), 50);
  235 
  236     return true;
  237 }
  238 
  239 /// Test that a search with a non-existent term doesn't get 100%.
  240 DEFINE_TESTCASE(topercent5, backend) {
  241     Xapian::Enquire enquire(get_database("apitest_simpledata"));
  242     Xapian::Query q(Xapian::Query::OP_OR,
  243             Xapian::Query("paragraph"), Xapian::Query("xyzzy"));
  244     enquire.set_query(q);
  245     Xapian::MSet mset = enquire.get_mset(0, 10);
  246     TEST(!mset.empty());
  247     TEST(mset[0].get_percent() < 100);
  248     // It would be odd if the non-existent term was worth more, but in 1.0.x
  249     // the top hit got 4% in this testcase.  In 1.2.x it gets 50%, which is
  250     // better, but >50% would be more natural.
  251     TEST_REL(mset[0].get_percent(), >=, 50);
  252     return true;
  253 }
  254 
  255 /// Test that OP_FILTER doesn't affect percentages.
  256 //  Regression test for bug#590 fixed in 1.3.1 and 1.2.10.
  257 DEFINE_TESTCASE(topercent6, backend) {
  258     Xapian::Enquire enquire(get_database("apitest_simpledata"));
  259     Xapian::Query q(Xapian::Query::OP_OR,
  260             Xapian::Query("rubbish"), Xapian::Query("letter"));
  261     enquire.set_query(q);
  262     Xapian::MSet mset = enquire.get_mset(0, 10);
  263     TEST(!mset.empty());
  264     TEST(mset[0].get_percent() < 100);
  265 
  266     q = Xapian::Query(q.OP_FILTER, q, Xapian::Query("this"));
  267     enquire.set_query(q);
  268     Xapian::MSet mset2 = enquire.get_mset(0, 10);
  269     TEST(!mset2.empty());
  270     TEST_EQUAL(mset[0].get_percent(), mset2[0].get_percent());
  271     return true;
  272 }
  273 
  274 static void
  275 make_topercent7_db(Xapian::WritableDatabase &db, const string &)
  276 {
  277     for (int i = 1; i <= 6; ++i) {
  278     Xapian::Document d;
  279     d.set_data(str(i));
  280     d.add_term("boom", 2 + (i - 4)*(i - 2));
  281     if (i != 5)
  282         d.add_boolean_term("XCAT122");
  283     db.add_document(d);
  284     }
  285     db.commit();
  286 }
  287 
  288 /// Test that a term with wdf always = 0 gets counted.
  289 //  Regression test for bug introduced in 1.2.10 by the original fix for #590,
  290 //  and fixed in 1.2.13 (and in trunk before 1.3.1 was released).
  291 DEFINE_TESTCASE(topercent7, generated) {
  292     Xapian::Database db(get_database("topercent7", make_topercent7_db));
  293 
  294     Xapian::Query q;
  295     q = Xapian::Query(q.OP_OR, Xapian::Query("tomb"), Xapian::Query("boom"));
  296     q = Xapian::Query(q.OP_AND, q, Xapian::Query("XCAT122"));
  297 
  298     Xapian::Enquire enq(db);
  299     enq.set_query(q);
  300     Xapian::MSet m = enq.get_mset(0, 10);
  301     TEST(!m.empty());
  302     TEST_REL(m[0].get_percent(),>,60);
  303     return true;
  304 }
  305 
  306 class ZWeight : public Xapian::Weight {
  307   public:
  308     ZWeight() { }
  309 
  310     void init(double) { }
  311 
  312     Weight * clone() const {
  313     return new ZWeight();
  314     }
  315 
  316     double get_sumpart(Xapian::termcount,
  317                Xapian::termcount,
  318                Xapian::termcount) const {
  319     return 0.0;
  320     }
  321 
  322     double get_maxpart() const {
  323     return 0.0;
  324     }
  325 
  326     double get_sumextra(Xapian::termcount doclen,
  327             Xapian::termcount) const {
  328     return 1.0 / doclen;
  329     }
  330 
  331     double get_maxextra() const {
  332     return 1.0;
  333     }
  334 };
  335 
  336 /// Regression test for bug introduced in 1.3.1 and fixed in 1.3.2.
  337 DEFINE_TESTCASE(checkzeromaxpartopt1, backend && !remote) {
  338     Xapian::Database db = get_database("apitest_simpledata");
  339     Xapian::Enquire enquire(db);
  340     // "this" indexes all documents, so will get replaced with MatchAll
  341     // internally.
  342     static const char * const terms[] = { "this", "spoken", "blank" };
  343     enquire.set_query(Xapian::Query(Xapian::Query::OP_OR, terms, terms + 3));
  344     ZWeight wt;
  345     enquire.set_weighting_scheme(wt);
  346     Xapian::MSet mset = enquire.get_mset(0, db.get_doccount());
  347     // No documents match all 3 terms, so the score shouldn't be 100%.
  348     TEST(mset[0].get_percent() != 100);
  349     // Make sure the percentage score isn't 0 or 1 though.
  350     TEST_REL(mset[0].get_percent(), >, 1);
  351     return true;
  352 }