"Fossies" - the Fresh Open Source Software Archive

Member "xdelta3-3.0.11/examples/checksum_test.cc" (24 Mar 2015, 15514 Bytes) of package /linux/misc/xdelta3-3.0.11.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 /* Copyright (C) 2007 Josh MacDonald */
    2 
    3 extern "C" {
    4 #include "test.h"
    5 #include <assert.h>
    6 }
    7 
    8 #include <list>
    9 #include <vector>
   10 #include <map>
   11 #include <algorithm>
   12 
   13 using std::list;
   14 using std::map;
   15 using std::vector;
   16 
   17 // MLCG parameters
   18 // a, a*
   19 uint32_t good_32bit_values[] = {
   20     1597334677U, // ...
   21     741103597U, 887987685U,
   22 };
   23 
   24 // a, a*
   25 uint64_t good_64bit_values[] = {
   26     1181783497276652981ULL, 4292484099903637661ULL,
   27     7664345821815920749ULL, // ...
   28 };
   29 
   30 struct true_type { };
   31 struct false_type { };
   32 
   33 template <typename Word>
   34 int bitsof();
   35 
   36 template<>
   37 int bitsof<uint32_t>() {
   38     return 32;
   39 }
   40 
   41 template<>
   42 int bitsof<uint64_t>() {
   43     return 64;
   44 }
   45 
   46 struct plain {
   47     int operator()(const uint8_t &c) {
   48     return c;
   49     }
   50 };
   51 
   52 template <typename Word>
   53 struct hhash {  // take "h" of the high-bits as a hash value for this
   54         // checksum, which are the most "distant" in terms of the
   55         // spectral test for the rabin_karp MLCG.  For short windows,
   56         // the high bits aren't enough, XOR "mask" worth of these in.
   57     Word operator()(const Word& t, const int &h, const int &mask) {
   58     return (t >> h) ^ (t & mask);
   59     }
   60 };
   61 
   62 template <typename Word>
   63 Word good_word();
   64 
   65 template<>
   66 uint32_t good_word<uint32_t>() {
   67     return good_32bit_values[0];
   68 }
   69 
   70 template<>
   71 uint64_t good_word<uint64_t>() {
   72     return good_64bit_values[0];
   73 }
   74 
   75 // CLASSES
   76 
   77 #define SELF Word, CksumSize, CksumSkip, Permute, Hash, Compaction
   78 #define MEMBER template <typename Word, \
   79              int CksumSize, \
   80              int CksumSkip, \
   81              typename Permute, \
   82              typename Hash, \
   83                          int Compaction>
   84 
   85 MEMBER
   86 struct cksum_params {
   87     typedef Word word_type;
   88     typedef Permute permute_type;
   89     typedef Hash hash_type;
   90 
   91     enum { cksum_size = CksumSize,
   92        cksum_skip = CksumSkip,
   93        compaction = Compaction,
   94     };
   95 };
   96 
   97 
   98 MEMBER
   99 struct rabin_karp {
  100     typedef Word word_type;
  101     typedef Permute permute_type;
  102     typedef Hash hash_type;
  103 
  104     enum { cksum_size = CksumSize,
  105        cksum_skip = CksumSkip, 
  106        compaction = Compaction,
  107     };
  108 
  109     // (a^cksum_size-1 c_0) + (a^cksum_size-2 c_1) ...
  110     rabin_karp() {
  111     multiplier = good_word<Word>();
  112     powers = new Word[cksum_size];
  113     powers[cksum_size - 1] = 1;
  114     for (int i = cksum_size - 2; i >= 0; i--) {
  115         powers[i] = powers[i + 1] * multiplier;
  116     }
  117     product = powers[0] * multiplier;
  118     }
  119 
  120     ~rabin_karp() {
  121     delete [] powers;
  122     }
  123 
  124     Word step(const uint8_t *ptr) {
  125     Word h = 0;
  126     for (int i = 0; i < cksum_size; i++) {
  127         h += permute_type()(ptr[i]) * powers[i];
  128     }
  129     return h;
  130     }
  131 
  132     Word state0(const uint8_t *ptr) {
  133     incr_state = step(ptr);
  134     return incr_state;
  135     }
  136 
  137     Word incr(const uint8_t *ptr) {
  138     incr_state = multiplier * incr_state -
  139         product * permute_type()(ptr[-1]) +
  140         permute_type()(ptr[cksum_size - 1]);
  141     return incr_state;
  142     }
  143 
  144     Word *powers;
  145     Word  product;
  146     Word  multiplier;
  147     Word  incr_state;
  148 };
  149 
  150 MEMBER
  151 struct adler32_cksum {
  152     typedef Word word_type;
  153     typedef Permute permute_type;
  154     typedef Hash hash_type;
  155 
  156     enum { cksum_size = CksumSize,
  157        cksum_skip = CksumSkip, 
  158        compaction = Compaction,
  159     };
  160 
  161     Word step(const uint8_t *ptr) {
  162     return xd3_lcksum (ptr, cksum_size);
  163     }
  164 
  165     Word state0(const uint8_t *ptr) {
  166     incr_state = step(ptr);
  167     return incr_state;
  168     }
  169 
  170     Word incr(const uint8_t *ptr) {
  171     incr_state = xd3_large_cksum_update (incr_state, ptr - 1, cksum_size);
  172     return incr_state;
  173     }
  174 
  175     Word  incr_state;
  176 };
  177 
  178 // TESTS
  179 
  180 template <typename Word>
  181 struct file_stats {
  182     typedef list<const uint8_t*> ptr_list;
  183     typedef Word word_type;
  184     typedef map<word_type, ptr_list> table_type;
  185     typedef typename table_type::iterator table_iterator;
  186     typedef typename ptr_list::iterator ptr_iterator;
  187 
  188     int cksum_size;
  189     int cksum_skip;
  190     int unique;
  191     int unique_values;
  192     int count;
  193     table_type table;
  194 
  195     file_stats(int size, int skip)
  196     : cksum_size(size),
  197       cksum_skip(skip),
  198       unique(0),
  199       unique_values(0),
  200       count(0) {
  201     }
  202 
  203     void reset() {
  204     unique = 0;
  205     unique_values = 0;
  206     count = 0;
  207     table.clear();
  208     }
  209 
  210     void update(const word_type &word, const uint8_t *ptr) {
  211     table_iterator t_i = table.find(word);
  212 
  213     count++;
  214 
  215     if (t_i == table.end()) {
  216         table.insert(make_pair(word, ptr_list()));
  217     }
  218 
  219     ptr_list &pl = table[word];
  220 
  221     for (ptr_iterator p_i = pl.begin();
  222          p_i != pl.end();
  223          ++p_i) {
  224         if (memcmp(*p_i, ptr, cksum_size) == 0) {
  225         return;
  226         }
  227     }
  228 
  229     unique++;
  230     pl.push_back(ptr);
  231     }
  232 
  233     void freeze() {
  234     unique_values = table.size();
  235     table.clear();
  236     }
  237 };
  238 
  239 struct test_result_base;
  240 
  241 static vector<test_result_base*> all_tests;
  242 
  243 struct test_result_base {
  244     virtual ~test_result_base() {
  245     }
  246     virtual void reset() = 0;
  247     virtual void print() = 0;
  248     virtual void get(const uint8_t* buf, const int buf_size, int iters) = 0;
  249     virtual void stat() = 0;
  250     virtual int count() = 0;
  251     virtual int dups() = 0;
  252     virtual double uniqueness() = 0;
  253     virtual double fullness() = 0;
  254     virtual double collisions() = 0;
  255     virtual double coverage() = 0;
  256     virtual double compression() = 0;
  257     virtual double time() = 0;
  258     virtual double score() = 0;
  259     virtual void set_score(double min_dups_frac, double min_time) = 0;
  260     virtual double total_time() = 0;
  261     virtual int total_count() = 0;
  262     virtual int total_dups() = 0;
  263 };
  264 
  265 struct compare_h {
  266     bool operator()(test_result_base *a,
  267             test_result_base *b) {
  268     return a->score() < b->score();
  269     }
  270 };
  271 
  272 MEMBER
  273 struct test_result : public test_result_base {
  274     typedef Word word_type;
  275     typedef Permute permute_type;
  276     typedef Hash hash_type;
  277 
  278     enum { cksum_size = CksumSize,
  279        cksum_skip = CksumSkip, 
  280        compaction = Compaction,
  281     };
  282 
  283     const char *test_name;
  284     file_stats<Word> fstats;
  285     int test_size;
  286     int n_steps;
  287     int n_incrs;
  288     int s_bits;
  289     int s_mask;
  290     int t_entries;
  291     int h_bits;
  292     int h_buckets_full;
  293     double h_score;
  294     char *hash_table;
  295     long accum_millis;
  296     int accum_iters;
  297 
  298     // These are not reset
  299     double accum_time;
  300     int accum_count;
  301     int accum_dups;
  302     int accum_colls;
  303     int accum_size;
  304 
  305     test_result(const char *name)
  306     : test_name(name),
  307       fstats(cksum_size, cksum_skip),
  308       hash_table(NULL),
  309       accum_millis(0),
  310       accum_iters(0),
  311       accum_time(0.0),
  312       accum_count(0),
  313       accum_dups(0),
  314       accum_colls(0),
  315       accum_size(0) {
  316     all_tests.push_back(this);
  317     }
  318 
  319     ~test_result() {
  320     reset();
  321     }
  322 
  323     void reset() {
  324     // size of file
  325     test_size = -1;
  326 
  327     // count
  328     n_steps = -1;
  329     n_incrs = -1;
  330 
  331     // four values used by new_table()/summarize_table()
  332     s_bits = -1;
  333     s_mask = -1;
  334     t_entries = -1;
  335     h_bits = -1;
  336     h_buckets_full = -1;
  337 
  338     accum_millis = 0;
  339     accum_iters = 0;
  340 
  341     fstats.reset();
  342 
  343     // temporary
  344     if (hash_table) {
  345         delete(hash_table);
  346         hash_table = NULL;
  347     }
  348     }
  349 
  350     int count() {
  351     if (cksum_skip == 1) {
  352         return n_incrs;
  353     } else {
  354         return n_steps;
  355     }
  356     }
  357 
  358     int dups() {
  359     return fstats.count - fstats.unique;
  360     }
  361 
  362     int colls() {
  363     return fstats.unique - fstats.unique_values;
  364     }
  365 
  366     double uniqueness() {
  367     return 1.0 - (double) dups() / count();
  368     }
  369 
  370     double fullness() {
  371     return (double) h_buckets_full / (1 << h_bits);
  372     }
  373 
  374     double collisions() {
  375     return (double) colls() / fstats.unique;
  376     }
  377 
  378     double coverage() {
  379     return (double) h_buckets_full / uniqueness() / count();
  380     }
  381 
  382     double compression() {
  383     return 1.0 - coverage();
  384     }
  385 
  386     double time() {
  387     return (double) accum_millis / accum_iters;
  388     }
  389 
  390     double score() {
  391     return h_score;
  392     }
  393 
  394     void set_score(double min_compression, double min_time) {
  395     h_score = (compression() - 0.99 * min_compression)
  396             * (time() - 0.99 * min_time);
  397     }
  398 
  399     double total_time() {
  400     return accum_time;
  401     }
  402 
  403     int total_count() {
  404     return accum_count;
  405     }
  406 
  407     int total_dups() {
  408     return accum_dups;
  409     }
  410 
  411     int total_colls() {
  412     return accum_dups;
  413     }
  414 
  415     void stat() {
  416     accum_time += time();
  417     accum_count += count();
  418     accum_dups += dups();
  419     accum_colls += colls();
  420     accum_size += test_size;
  421     }
  422 
  423     void print() {
  424     if (fstats.count != count()) {
  425         fprintf(stderr, "internal error: %d != %d\n", fstats.count, count());
  426         abort();
  427     }
  428     printf("%s: (%u#%u) count %u uniq %0.2f%% full %u (%0.4f%% coll %0.4f%%) covers %0.2f%% w/ 2^%d @ %.4f MB/s %u iters\n",
  429            test_name,
  430            cksum_size,
  431            cksum_skip,
  432            count(),
  433            100.0 * uniqueness(),
  434            h_buckets_full,
  435            100.0 * fullness(),
  436            100.0 * collisions(),
  437            100.0 * coverage(),
  438            h_bits,
  439            0.001 * accum_iters * test_size / accum_millis,
  440            accum_iters);
  441     }
  442 
  443     int size_log2 (int slots)
  444     {
  445     int bits = bitsof<word_type>() - 1;
  446     int i;
  447 
  448     for (i = 3; i <= bits; i += 1) {
  449         if (slots <= (1 << i)) {
  450         return i - compaction;
  451         }
  452     }
  453 
  454     return bits;
  455     }
  456 
  457     void new_table(int entries) {
  458     t_entries = entries;
  459     h_bits = size_log2(entries);
  460 
  461     int n = 1 << h_bits;
  462 
  463     s_bits = bitsof<word_type>() - h_bits;
  464     s_mask = n - 1;
  465 
  466     hash_table = new char[n / 8];
  467     memset(hash_table, 0, n / 8);
  468     }
  469 
  470     int get_table_bit(int i) {
  471     return hash_table[i/8] & (1 << i%8);
  472     }
  473 
  474     int set_table_bit(int i) {
  475     return hash_table[i/8] |= (1 << i%8);
  476     }
  477 
  478     void summarize_table() {
  479     int n = 1 << h_bits;
  480     int f = 0;
  481     for (int i = 0; i < n; i++) {
  482         if (get_table_bit(i)) {
  483         f++;
  484         }
  485     }
  486     h_buckets_full = f;
  487     }
  488 
  489     void get(const uint8_t* buf, const int buf_size, int test_iters) {
  490     rabin_karp<SELF> test;
  491     //adler32_cksum<SELF> test;
  492     hash_type hash;
  493     const uint8_t *ptr;
  494     const uint8_t *end;
  495     int last_offset;
  496     int periods;
  497     int stop;
  498 
  499     test_size = buf_size;
  500     last_offset = buf_size - cksum_size;
  501 
  502     if (last_offset < 0) {
  503         periods = 0;
  504         n_steps = 0;
  505         n_incrs = 0;
  506         stop = -cksum_size;
  507     } else {
  508         periods = last_offset / cksum_skip;
  509         n_steps = periods + 1;
  510         n_incrs = last_offset + 1;
  511         stop = last_offset - (periods + 1) * cksum_skip;
  512     }
  513 
  514     // Compute file stats once.
  515     if (fstats.unique_values == 0) {
  516         if (cksum_skip == 1) {
  517         for (int i = 0; i <= buf_size - cksum_size; i++) {
  518             fstats.update(hash(test.step(buf + i), s_bits, s_mask), buf + i);
  519         }
  520         } else {
  521         ptr = buf + last_offset;
  522         end = buf + stop;
  523         
  524         for (; ptr != end; ptr -= cksum_skip) {
  525             fstats.update(hash(test.step(ptr), s_bits, s_mask), ptr);
  526         }
  527         }
  528         fstats.freeze();
  529     }
  530 
  531     long start_test = get_millisecs_now();
  532 
  533     if (cksum_skip != 1) {
  534         new_table(n_steps);
  535 
  536         for (int i = 0; i < test_iters; i++) {
  537         ptr = buf + last_offset;
  538         end = buf + stop;
  539 
  540         for (; ptr != end; ptr -= cksum_skip) {
  541             set_table_bit(hash(test.step(ptr), s_bits, s_mask));
  542         }
  543         }
  544 
  545         summarize_table();
  546     }
  547 
  548     stop = buf_size - cksum_size + 1;
  549     if (stop < 0) {
  550         stop = 0;
  551     }
  552 
  553     if (cksum_skip == 1) {
  554 
  555         new_table(n_incrs);
  556 
  557         for (int i = 0; i < test_iters; i++) {
  558         ptr = buf;
  559         end = buf + stop;
  560 
  561         if (ptr != end) {
  562             set_table_bit(hash(test.state0(ptr++), s_bits, s_mask));
  563         }
  564 
  565         for (; ptr != end; ptr++) {
  566             Word w = test.incr(ptr);
  567             assert(w == test.step(ptr));
  568             set_table_bit(hash(w, s_bits, s_mask));
  569         }
  570         }
  571 
  572         summarize_table();
  573     }
  574 
  575     accum_iters += test_iters;
  576     accum_millis += get_millisecs_now() - start_test;
  577     }
  578 };
  579 
  580 template <typename Word>
  581 void print_array(const char *tname) {
  582     printf("static const %s hash_multiplier[64] = {\n", tname);
  583     Word p = 1;
  584     for (int i = 0; i < 64; i++) {
  585     printf("  %uU,\n", p);
  586     p *= good_word<Word>();
  587     }
  588     printf("};\n", tname);
  589 }
  590 
  591 int main(int argc, char** argv) {
  592   int i;
  593   uint8_t *buf = NULL;
  594   size_t buf_len = 0;
  595   int ret;
  596 
  597   if (argc <= 1) {
  598     fprintf(stderr, "usage: %s file ...\n", argv[0]);
  599     return 1;
  600   }
  601 
  602   //print_array<uint32_t>("uint32_t");
  603 
  604 #define TEST(T,Z,S,P,H,C) test_result<T,Z,S,P,H<T>,C> \
  605       _ ## T ## _ ## Z ## _ ## S ## _ ## P ## _ ## H ## _ ## C \
  606       (#T "_" #Z "_" #S "_" #P "_" #H "_" #C)
  607 
  608 #if 0
  609 
  610   TEST(uint32_t, 4, SKIP, plain, hhash, 0); /* x */ \
  611   TEST(uint32_t, 4, SKIP, plain, hhash, 1); /* x */ \
  612   TEST(uint32_t, 4, SKIP, plain, hhash, 2); /* x */ \
  613   TEST(uint32_t, 4, SKIP, plain, hhash, 3); /* x */ \
  614 
  615 #endif
  616 
  617 #define TESTS(SKIP) \
  618   TEST(uint32_t, 9, SKIP, plain, hhash, 0); /* x */ \
  619   TEST(uint32_t, 9, SKIP, plain, hhash, 1); /* x */ \
  620   TEST(uint32_t, 9, SKIP, plain, hhash, 2); /* x */ \
  621   TEST(uint32_t, 9, SKIP, plain, hhash, 3)
  622   
  623 #define TESTS_ALL(SKIP) \
  624   TEST(uint32_t, 3, SKIP, plain, hhash, 0); \
  625   TEST(uint32_t, 3, SKIP, plain, hhash, 1); \
  626   TEST(uint32_t, 4, SKIP, plain, hhash, 0); /* x */ \
  627   TEST(uint32_t, 4, SKIP, plain, hhash, 1); /* x */ \
  628   TEST(uint32_t, 4, SKIP, plain, hhash, 2); /* x */ \
  629   TEST(uint32_t, 4, SKIP, plain, hhash, 3); /* x */ \
  630   TEST(uint32_t, 5, SKIP, plain, hhash, 0); \
  631   TEST(uint32_t, 5, SKIP, plain, hhash, 1); \
  632   TEST(uint32_t, 8, SKIP, plain, hhash, 0); \
  633   TEST(uint32_t, 8, SKIP, plain, hhash, 1); \
  634   TEST(uint32_t, 9, SKIP, plain, hhash, 0); /* x */ \
  635   TEST(uint32_t, 9, SKIP, plain, hhash, 1); /* x */ \
  636   TEST(uint32_t, 9, SKIP, plain, hhash, 2); /* x */ \
  637   TEST(uint32_t, 9, SKIP, plain, hhash, 3); /* x */ \
  638   TEST(uint32_t, 11, SKIP, plain, hhash, 0); /* x */ \
  639   TEST(uint32_t, 11, SKIP, plain, hhash, 1); /* x */ \
  640   TEST(uint32_t, 13, SKIP, plain, hhash, 0); \
  641   TEST(uint32_t, 13, SKIP, plain, hhash, 1); \
  642   TEST(uint32_t, 15, SKIP, plain, hhash, 0); /* x */ \
  643   TEST(uint32_t, 15, SKIP, plain, hhash, 1); /* x */ \
  644   TEST(uint32_t, 16, SKIP, plain, hhash, 0); /* x */ \
  645   TEST(uint32_t, 16, SKIP, plain, hhash, 1); /* x */ \
  646   TEST(uint32_t, 21, SKIP, plain, hhash, 0); \
  647   TEST(uint32_t, 21, SKIP, plain, hhash, 1); \
  648   TEST(uint32_t, 34, SKIP, plain, hhash, 0); \
  649   TEST(uint32_t, 34, SKIP, plain, hhash, 1); \
  650   TEST(uint32_t, 55, SKIP, plain, hhash, 0); \
  651   TEST(uint32_t, 55, SKIP, plain, hhash, 1)
  652 
  653   TESTS(1); // *
  654 //   TESTS(2); // *
  655 //   TESTS(3); // *
  656 //   TESTS(5); // *
  657 //   TESTS(8); // *
  658 //   TESTS(9);
  659 //   TESTS(11);
  660 //   TESTS(13); // *
  661   TESTS(15);
  662 //   TESTS(16);
  663 //   TESTS(21); // *
  664 //   TESTS(34); // *
  665 //   TESTS(55); // *
  666 //   TESTS(89); // *
  667 
  668   for (i = 1; i < argc; i++) {
  669     if ((ret = read_whole_file(argv[i],
  670                    & buf,
  671                    & buf_len))) {
  672       return 1;
  673     }
  674 
  675     fprintf(stderr, "file %s is %zu bytes\n",
  676         argv[i], buf_len);
  677 
  678     double min_time = -1.0;
  679     double min_compression = 0.0;
  680 
  681     for (vector<test_result_base*>::iterator i = all_tests.begin();
  682      i != all_tests.end(); ++i) {
  683     test_result_base *test = *i;
  684     test->reset();
  685 
  686     int iters = 100;
  687     long start_test = get_millisecs_now();
  688 
  689     do {
  690         test->get(buf, buf_len, iters);
  691         iters *= 3;
  692         iters /= 2;
  693     } while (get_millisecs_now() - start_test < 2000);
  694 
  695     test->stat();
  696 
  697     if (min_time < 0.0) {
  698         min_compression = test->compression();
  699         min_time = test->time();
  700     }
  701 
  702     if (min_time > test->time()) {
  703         min_time = test->time();
  704     }
  705 
  706     if (min_compression > test->compression()) {
  707         min_compression = test->compression();
  708     }
  709 
  710     test->print();
  711     }
  712 
  713 //     for (vector<test_result_base*>::iterator i = all_tests.begin();
  714 //   i != all_tests.end(); ++i) {
  715 //  test_result_base *test = *i;
  716 //  test->set_score(min_compression, min_time);
  717 //     }    
  718 
  719 //     sort(all_tests.begin(), all_tests.end(), compare_h());
  720     
  721 //     for (vector<test_result_base*>::iterator i = all_tests.begin();
  722 //   i != all_tests.end(); ++i) {
  723 //  test_result_base *test = *i;
  724 //  test->print();
  725 //     }    
  726     
  727     free(buf);
  728     buf = NULL;
  729   }
  730 
  731   return 0;      
  732 }