"Fossies" - the Fresh Open Source Software Archive

Member "xdelta3-3.1.0/testing/checksum_test.cc" (1 Jan 2016, 16490 Bytes) of package /linux/misc/xdelta3-3.1.0.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 /* Copyright (C) 2007 Josh MacDonald */
    2 
    3 #include "test.h"
    4 #include <assert.h>
    5 #include <list>
    6 #include <vector>
    7 #include <algorithm>
    8 
    9 #include "../cpp-btree/btree_map.h"
   10 
   11 extern "C" {
   12 uint32_t xd3_large32_cksum_old (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look);
   13 uint32_t xd3_large32_cksum_update_old (xd3_hash_cfg *cfg, uint32_t cksum, 
   14                        const uint8_t *base, const usize_t look);
   15 
   16 uint64_t xd3_large64_cksum_old (xd3_hash_cfg *cfg, const uint8_t *base, const usize_t look);
   17 uint64_t xd3_large64_cksum_update_old (xd3_hash_cfg *cfg, uint64_t cksum, 
   18                        const uint8_t *base, const usize_t look);
   19 }
   20 
   21 using btree::btree_map;
   22 using std::list;
   23 using std::vector;
   24 
   25 // MLCG parameters
   26 // a, a*
   27 uint32_t good_32bit_values[] = {
   28   1597334677U, // ...
   29   741103597U, 887987685U,
   30 };
   31 
   32 // a, a*
   33 uint64_t good_64bit_values[] = {
   34   1181783497276652981ULL, 4292484099903637661ULL,
   35   7664345821815920749ULL, // ...
   36 };
   37 
   38 void print_header() {
   39   static int hdr_cnt = 0;
   40   if (hdr_cnt++ % 20 == 0) {
   41     printf("%-32sConf\t\tCount\tUniq\tFull\tCover\tColls"
   42        "\tMB/s\tIters\t#Colls\n", "Name");
   43   }
   44 }
   45 
   46 struct true_type { };
   47 struct false_type { };
   48 
   49 template <typename Word>
   50 usize_t bitsof();
   51 
   52 template<>
   53 usize_t bitsof<unsigned int>() {
   54   return sizeof(unsigned int) * 8;
   55 }
   56 
   57 template<>
   58 usize_t bitsof<unsigned long>() {
   59   return sizeof(unsigned long) * 8;
   60 }
   61 
   62 template<>
   63 usize_t bitsof<unsigned long long>() {
   64   return sizeof(unsigned long long) * 8;
   65 }
   66 
   67 template <typename Word>
   68 struct hhash {  // shift "s" bits leaving the high bits as a hash value for
   69         // this checksum, which are the most "distant" in terms of the
   70         // spectral test for the rabin_karp MLCG.  For short windows,
   71         // the high bits aren't enough, XOR "mask" worth of these in.
   72   Word operator()(const Word t, const Word s, const Word mask) {
   73     return (t >> s) ^ (t & mask);
   74   }
   75 };
   76 
   77 template <typename Word>
   78 Word good_word();
   79 
   80 template<>
   81 uint32_t good_word<uint32_t>() {
   82   return good_32bit_values[0];
   83 }
   84 
   85 template<>
   86 uint64_t good_word<uint64_t>() {
   87   return good_64bit_values[0];
   88 }
   89 
   90 // CLASSES
   91 
   92 #define SELF Word, CksumSize, CksumSkip, Hash, Compaction
   93 #define MEMBER template <typename Word,     \
   94              int CksumSize,     \
   95              int CksumSkip,     \
   96              typename Hash,     \
   97                          int Compaction>
   98 
   99 MEMBER
  100 struct cksum_params {
  101   typedef Word word_type;
  102   typedef Hash hash_type;
  103 
  104   static const int cksum_size = CksumSize;
  105   static const int cksum_skip = CksumSkip;
  106   static const int compaction = Compaction;
  107 };
  108 
  109 MEMBER
  110 struct rabin_karp : public cksum_params<SELF> {
  111   // (a^cksum_size-1 c_0) + (a^cksum_size-2 c_1) ...
  112   rabin_karp()
  113     : powers(make_powers()),
  114       product(powers[0] * good_word<Word>()),
  115       incr_state(0) { }
  116 
  117   static Word* make_powers() {
  118     Word *p = new Word[CksumSize];
  119     p[CksumSize - 1] = 1;
  120     for (int i = CksumSize - 2; i >= 0; i--) {
  121       p[i] = p[i + 1] * good_word<Word>();
  122     }
  123     return p;
  124   }
  125 
  126   ~rabin_karp() {
  127     delete [] powers;
  128   }
  129 
  130   Word step(const uint8_t *ptr) {
  131     Word h = 0;
  132     for (int i = 0; i < CksumSize; i++) {
  133       h += (ptr[i]) * powers[i];
  134     }
  135     return h;
  136   }
  137 
  138   Word state0(const uint8_t *ptr) {
  139     incr_state = step(ptr);
  140     return incr_state;
  141   }
  142 
  143   Word incr(const uint8_t *ptr) {
  144     incr_state = good_word<Word>() * incr_state -
  145       product * (ptr[-1]) + (ptr[CksumSize - 1]);
  146     return incr_state;
  147   }
  148 
  149   const Word *const powers;
  150   const Word  product;
  151   Word        incr_state;
  152 };
  153 
  154 MEMBER
  155 struct with_stream : public cksum_params<SELF> {
  156   xd3_stream stream;
  157 
  158   with_stream()
  159   {
  160     xd3_config cfg;
  161     memset (&stream, 0, sizeof (stream));
  162     xd3_init_config (&cfg, 0);
  163     cfg.smatch_cfg = XD3_SMATCH_SOFT;
  164     cfg.smatcher_soft.large_look = CksumSize;
  165     cfg.smatcher_soft.large_step = CksumSkip;
  166     cfg.smatcher_soft.small_look = 4;
  167     cfg.smatcher_soft.small_chain = 4;
  168     cfg.smatcher_soft.small_lchain = 4;
  169     cfg.smatcher_soft.max_lazy = 4;
  170     cfg.smatcher_soft.long_enough = 4;
  171     CHECK_EQ(0, xd3_config_stream (&stream, &cfg));
  172 
  173     CHECK_EQ(0, xd3_size_hashtable (&stream,
  174                     1<<10 /* ignored */,
  175                     stream.smatcher.large_look,
  176                     & stream.large_hash));
  177   }
  178   ~with_stream() 
  179   {
  180     xd3_free_stream (&stream);
  181   }
  182 };
  183 
  184 MEMBER
  185 struct large_cksum : public with_stream<SELF> {
  186   Word step(const uint8_t *ptr) {
  187     return xd3_large_cksum (&this->stream.large_hash, ptr, CksumSize);
  188   }
  189 
  190   Word state0(const uint8_t *ptr) {
  191     incr_state = step(ptr);
  192     return incr_state;
  193   }
  194 
  195   Word incr(const uint8_t *ptr) {
  196     incr_state = xd3_large_cksum_update (&this->stream.large_hash, 
  197                      incr_state, ptr - 1, CksumSize);
  198     return incr_state;
  199   }
  200 
  201   Word incr_state;
  202 };
  203 
  204 #if SIZEOF_USIZE_T == 4
  205 #define xd3_large_cksum_old         xd3_large32_cksum_old
  206 #define xd3_large_cksum_update_old  xd3_large32_cksum_update_old
  207 #elif SIZEOF_USIZE_T == 8
  208 #define xd3_large_cksum_old         xd3_large64_cksum_old
  209 #define xd3_large_cksum_update_old  xd3_large64_cksum_update_old
  210 #endif
  211 
  212 MEMBER
  213 struct large_cksum_old : public with_stream<SELF> {
  214   Word step(const uint8_t *ptr) {
  215     return xd3_large_cksum_old (&this->stream.large_hash, ptr, CksumSize);
  216   }
  217 
  218   Word state0(const uint8_t *ptr) {
  219     incr_state = step(ptr);
  220     return incr_state;
  221   }
  222 
  223   Word incr(const uint8_t *ptr) {
  224     incr_state = xd3_large_cksum_update_old (&this->stream.large_hash, 
  225                          incr_state, ptr - 1, CksumSize);
  226     return incr_state;
  227   }
  228 
  229   Word incr_state;
  230 };
  231 
  232 // TESTS
  233 
  234 template <typename Word>
  235 struct file_stats {
  236   typedef const uint8_t* ptr_type;
  237   typedef Word word_type;
  238   typedef btree::btree_multimap<word_type, ptr_type> table_type;
  239   typedef typename table_type::iterator table_iterator;
  240 
  241   usize_t cksum_size;
  242   usize_t cksum_skip;
  243   usize_t unique;
  244   usize_t unique_values;
  245   usize_t count;
  246   table_type table;
  247 
  248   file_stats(usize_t size, usize_t skip)
  249     : cksum_size(size),
  250       cksum_skip(skip),
  251       unique(0),
  252       unique_values(0),
  253       count(0) {
  254   }
  255 
  256   void reset() {
  257     unique = 0;
  258     unique_values = 0;
  259     count = 0;
  260     table.clear();
  261   }
  262 
  263   void update(word_type word, ptr_type ptr) {
  264     table_iterator t_i = table.find(word);
  265 
  266     count++;
  267     if (t_i != table.end()) {
  268       int collisions = 0;
  269       for (table_iterator p_i = t_i;
  270        p_i != table.end() && p_i->first == word;
  271        ++p_i) {
  272     if (memcmp(p_i->second, ptr, cksum_size) == 0) {
  273       return;
  274     }
  275     collisions++;
  276       }
  277       if (collisions >= 1000) {
  278     fprintf(stderr, "Something is not right, lots of collisions=%d\n", 
  279         collisions);
  280     abort();
  281       }
  282     } else {
  283       unique_values++;
  284     }
  285     unique++;
  286     table.insert(std::make_pair(word, ptr));
  287     return;
  288   }
  289 
  290   void freeze() {
  291     table.clear();
  292   }
  293 };
  294 
  295 struct test_result_base;
  296 
  297 static vector<test_result_base*> all_tests;
  298 
  299 struct test_result_base {
  300   virtual ~test_result_base() {
  301   }
  302   virtual void reset() = 0;
  303   virtual void print() = 0;
  304   virtual void get(const uint8_t* buf, const size_t buf_size, 
  305            usize_t iters) = 0;
  306   virtual void stat() = 0;
  307   virtual usize_t count() = 0;
  308   virtual usize_t dups() = 0;
  309   virtual double uniqueness() = 0;
  310   virtual double fullness() = 0;
  311   virtual double collisions() = 0;
  312   virtual double coverage() = 0;
  313   virtual double compression() = 0;
  314   virtual double time() = 0;
  315   virtual double total_time() = 0;
  316   virtual usize_t total_count() = 0;
  317   virtual usize_t total_dups() = 0;
  318 };
  319 
  320 template <typename Checksum>
  321 struct test_result : public test_result_base {
  322   Checksum cksum;
  323   const char *test_name;
  324   file_stats<typename Checksum::word_type> fstats;
  325   usize_t test_size;
  326   usize_t n_steps;
  327   usize_t n_incrs;
  328   typename Checksum::word_type s_bits;
  329   typename Checksum::word_type s_mask;
  330   usize_t t_entries;
  331   usize_t h_bits;
  332   usize_t h_buckets_full;
  333   char *hash_table;
  334   long accum_millis;
  335   usize_t accum_iters;
  336 
  337   // These are not reset
  338   double accum_time;
  339   usize_t accum_count;
  340   usize_t accum_dups;
  341   usize_t accum_colls;
  342   size_t accum_size;
  343 
  344   test_result(const char *name)
  345     : test_name(name),
  346       fstats(Checksum::cksum_size, Checksum::cksum_skip),
  347       hash_table(NULL),
  348       accum_millis(0),
  349       accum_iters(0),
  350       accum_time(0.0),
  351       accum_count(0),
  352       accum_dups(0),
  353       accum_colls(0),
  354       accum_size(0) {
  355     all_tests.push_back(this);
  356   }
  357 
  358   ~test_result() {
  359     reset();
  360   }
  361 
  362   void reset() {
  363     // size of file
  364     test_size = 0;
  365 
  366     // count
  367     n_steps = 0;
  368     n_incrs = 0;
  369 
  370     // four values used by new_table()/summarize_table()
  371     s_bits = 0;
  372     s_mask = 0;
  373     t_entries = 0;
  374     h_bits = 0;
  375     h_buckets_full = 0;
  376 
  377     accum_millis = 0;
  378     accum_iters = 0;
  379 
  380     fstats.reset();
  381 
  382     // temporary
  383     if (hash_table) {
  384       delete(hash_table);
  385       hash_table = NULL;
  386     }
  387   }
  388 
  389   usize_t count() {
  390     if (Checksum::cksum_skip == 1) {
  391       return n_incrs;
  392     } else {
  393       return n_steps;
  394     }
  395   }
  396 
  397   usize_t dups() {
  398     return fstats.count - fstats.unique;
  399   }
  400 
  401   /* Fraction of distinct strings of length cksum_size which are not
  402    * represented in the hash table. */
  403   double collisions() {
  404     return (fstats.unique - fstats.unique_values) / (double) fstats.unique;
  405   }
  406   usize_t colls() {
  407     return (fstats.unique - fstats.unique_values);
  408   }
  409 
  410   double uniqueness() {
  411     return 1.0 - (double) dups() / count();
  412   }
  413 
  414   double fullness() {
  415     return (double) h_buckets_full / (1 << h_bits);
  416   }
  417 
  418   double coverage() {
  419     return (double) h_buckets_full / uniqueness() / count();
  420   }
  421 
  422   double compression() {
  423     return 1.0 - coverage();
  424   }
  425 
  426   double time() {
  427     return (double) accum_millis / accum_iters;
  428   }
  429 
  430   double total_time() {
  431     return accum_time;
  432   }
  433 
  434   usize_t total_count() {
  435     return accum_count;
  436   }
  437 
  438   usize_t total_dups() {
  439     return accum_dups;
  440   }
  441 
  442   usize_t total_colls() {
  443     return accum_dups;
  444   }
  445 
  446   void stat() {
  447     accum_time += time();
  448     accum_count += count();
  449     accum_dups += dups();
  450     accum_colls += colls();
  451     accum_size += test_size;
  452   }
  453 
  454   void print() {
  455     if (fstats.count != count()) {
  456       fprintf(stderr, "internal error: %" W "d != %" W "d\n", fstats.count, count());
  457       abort();
  458     }
  459     print_header();
  460     printf("%-32s%d/%d 2^%" W "u\t%" W "u\t%0.4f\t%.4f\t%.4f\t%.1e\t%.2f\t"
  461        "%" W "u\t%" W "u\n",
  462        test_name,
  463        Checksum::cksum_size,
  464        Checksum::cksum_skip,
  465        h_bits,
  466        count(),
  467        uniqueness(),
  468        fullness(),
  469        coverage(),
  470        collisions(),
  471        0.001 * accum_iters * test_size / accum_millis,
  472        accum_iters,
  473        colls());
  474   }
  475 
  476   usize_t size_log2 (usize_t slots) {
  477     usize_t bits = bitsof<typename Checksum::word_type>() - 1;
  478     usize_t i;
  479 
  480     for (i = 3; i <= bits; i += 1) {
  481       if (slots <= (1U << i)) {
  482     return i - Checksum::compaction;
  483       }
  484     }
  485 
  486     return bits;
  487   }
  488 
  489   void new_table(usize_t entries) {
  490     t_entries = entries;
  491     h_bits = size_log2(entries);
  492 
  493     usize_t n = 1 << h_bits;
  494 
  495     s_bits = bitsof<typename Checksum::word_type>() - h_bits;
  496     s_mask = n - 1U;
  497 
  498     hash_table = new char[n / 8];
  499     memset(hash_table, 0, n / 8);
  500   }
  501 
  502   int get_table_bit(usize_t i) {
  503     return hash_table[i/8] & (1 << i%8);
  504   }
  505 
  506   int set_table_bit(usize_t i) {
  507     return hash_table[i/8] |= (1 << i%8);
  508   }
  509 
  510   void summarize_table() {
  511     usize_t n = 1 << h_bits;
  512     usize_t f = 0;
  513     for (usize_t i = 0; i < n; i++) {
  514       if (get_table_bit(i)) {
  515     f++;
  516       }
  517     }
  518     h_buckets_full = f;
  519   }
  520 
  521   void get(const uint8_t* buf, const size_t buf_size, usize_t test_iters) {
  522     typename Checksum::hash_type hash;
  523     const uint8_t *ptr;
  524     const uint8_t *end;
  525     usize_t periods;
  526     int64_t last_offset;
  527     int64_t stop;
  528 
  529     test_size = buf_size;
  530     last_offset = buf_size - Checksum::cksum_size;
  531 
  532     if (last_offset < 0) {
  533       periods = 0;
  534       n_steps = 0;
  535       n_incrs = 0;
  536       stop = -Checksum::cksum_size;
  537     } else {
  538       periods = last_offset / Checksum::cksum_skip;
  539       n_steps = periods + 1;
  540       n_incrs = last_offset + 1;
  541       stop = last_offset - (periods + 1) * Checksum::cksum_skip;
  542     }
  543 
  544     // Compute file stats once.
  545     if (fstats.unique_values == 0) {
  546       if (Checksum::cksum_skip == 1) {
  547     for (size_t i = 0; i <= buf_size - Checksum::cksum_size; i++) {
  548       fstats.update(hash(cksum.step(buf + i), s_bits, s_mask), buf + i);
  549     }
  550       } else {
  551     ptr = buf + last_offset;
  552     end = buf + stop;
  553 
  554     for (; ptr != end; ptr -= Checksum::cksum_skip) {
  555       fstats.update(hash(cksum.step(ptr), s_bits, s_mask), ptr);
  556     }
  557       }
  558       fstats.freeze();
  559     }
  560 
  561     long start_test = get_millisecs_now();
  562 
  563     if (Checksum::cksum_skip != 1) {
  564       new_table(n_steps);
  565 
  566       for (usize_t i = 0; i < test_iters; i++) {
  567     ptr = buf + last_offset;
  568     end = buf + stop;
  569 
  570     for (; ptr != end; ptr -= Checksum::cksum_skip) {
  571       set_table_bit(hash(cksum.step(ptr), s_bits, s_mask));
  572     }
  573       }
  574 
  575       summarize_table();
  576     }
  577 
  578     stop = buf_size - Checksum::cksum_size + 1;
  579     if (stop < 0) {
  580       stop = 0;
  581     }
  582 
  583     if (Checksum::cksum_skip == 1) {
  584       new_table(n_incrs);
  585 
  586       for (usize_t i = 0; i < test_iters; i++) {
  587     ptr = buf;
  588     end = buf + stop;
  589 
  590     if (ptr != end) {
  591       set_table_bit(hash(cksum.state0(ptr++), s_bits, s_mask));
  592     }
  593 
  594     for (; ptr != end; ptr++) {
  595       typename Checksum::word_type w = cksum.incr(ptr);
  596       CHECK_EQ(w, cksum.step(ptr));
  597       set_table_bit(hash(w, s_bits, s_mask));
  598     }
  599       }
  600 
  601       summarize_table();
  602     }
  603 
  604     accum_iters += test_iters;
  605     accum_millis += get_millisecs_now() - start_test;
  606   }
  607 };
  608 
  609 static int read_whole_file(const char *name,
  610                uint8_t **buf_ptr,
  611                size_t *buf_len) {
  612   main_file file;
  613   int ret;
  614   xoff_t len;
  615   size_t nread;
  616   main_file_init(&file);
  617   file.filename = name;
  618   ret = main_file_open(&file, name, XO_READ);
  619   if (ret != 0) {
  620     fprintf(stderr, "open failed\n");
  621     goto exit;
  622   }
  623   ret = main_file_stat(&file, &len);
  624   if (ret != 0) {
  625     fprintf(stderr, "stat failed\n");
  626     goto exit;
  627   }
  628   
  629   (*buf_len) = (size_t)len;
  630   (*buf_ptr) = (uint8_t*) main_malloc(*buf_len);
  631   ret = main_file_read(&file, *buf_ptr, *buf_len, &nread,
  632                "read failed");
  633   if (ret == 0 && *buf_len == nread) {
  634     ret = 0;
  635   } else {
  636     fprintf(stderr, "invalid read\n");
  637     ret = XD3_INTERNAL;
  638   }
  639  exit:
  640   main_file_cleanup(&file);
  641   return ret;
  642 }
  643 
  644 int main(int argc, char** argv) {
  645   int i;
  646   uint8_t *buf = NULL;
  647   size_t buf_len = 0;
  648   int ret;
  649 
  650   if (argc <= 1) {
  651     fprintf(stderr, "usage: %s file ...\n", argv[0]);
  652     return 1;
  653   }
  654 
  655 // TODO: The xdelta3-hash.h code is identical now; add sameness test.
  656 // using rabin_karp<> template.
  657 #define TEST(T,Z,S,C)                   \
  658   test_result<large_cksum<T,Z,S,hhash<T>,C>>        \
  659     _xck_ ## T ## _ ## Z ## _ ## S ## _ ## C        \
  660     ("xck_" #T "_" #Z "_" #S "_" #C);           \
  661   test_result<large_cksum_old<T,Z,S,hhash<T>,C>>    \
  662     _old_ ## T ## _ ## Z ## _ ## S ## _ ## C        \
  663     ("old_" #T "_" #Z "_" #S "_" #C)
  664 
  665 #define TESTS(SIZE, SKIP)    \
  666   TEST(usize_t, SIZE, SKIP, 1);  \
  667   TEST(usize_t, SIZE, SKIP, 2)
  668    
  669   TESTS(5, 1);
  670   TESTS(6, 1);
  671   TESTS(7, 1);
  672   TESTS(8, 1);
  673   TESTS(9, 1);
  674   TESTS(10, 1);
  675   TESTS(11, 1);
  676   TESTS(12, 1);
  677   TESTS(13, 1);
  678   TESTS(14, 1);
  679   TESTS(15, 1);
  680   TESTS(16, 1);
  681   TESTS(17, 1);
  682   TESTS(18, 1);
  683   TESTS(19, 1);
  684   TESTS(20, 1);
  685   TESTS(21, 1);
  686   TESTS(22, 1);
  687   TESTS(23, 1);
  688   TESTS(24, 1);
  689   TESTS(25, 1);
  690   TESTS(26, 1);
  691   TESTS(27, 1);
  692   TESTS(28, 1);
  693   TESTS(29, 1);
  694   TESTS(30, 1);
  695   TESTS(31, 1);
  696   TESTS(32, 1);
  697   TESTS(33, 1);
  698   TESTS(34, 1);
  699   TESTS(35, 1);
  700   TESTS(36, 1);
  701   TESTS(37, 1);
  702   TESTS(38, 1);
  703   TESTS(39, 1);
  704 
  705 
  706   for (i = 1; i < argc; i++) {
  707     if ((ret = read_whole_file(argv[i],
  708                    & buf,
  709                    & buf_len))) {
  710       return 1;
  711     }
  712 
  713     fprintf(stderr, "file %s is %zu bytes\n",
  714         argv[i], buf_len);
  715 
  716     double min_time = -1.0;
  717     double min_compression = 0.0;
  718 
  719     for (vector<test_result_base*>::iterator iter = all_tests.begin();
  720      iter != all_tests.end(); ++iter) {
  721       test_result_base *test = *iter;
  722       test->reset();
  723 
  724       usize_t iters = 1;
  725       long start_test = get_millisecs_now();
  726 
  727       do {
  728     test->get(buf, buf_len, iters);
  729     iters *= 3;
  730     iters /= 2;
  731       } while (get_millisecs_now() - start_test < 2000);
  732 
  733       test->stat();
  734 
  735       if (min_time < 0.0) {
  736     min_compression = test->compression();
  737     min_time = test->time();
  738       }
  739 
  740       if (min_time > test->time()) {
  741     min_time = test->time();
  742       }
  743 
  744       if (min_compression > test->compression()) {
  745     min_compression = test->compression();
  746       }
  747 
  748       test->print();
  749     }
  750 
  751     main_free(buf);
  752     buf = NULL;
  753   }
  754 
  755   return 0;      
  756 }