"Fossies" - the Fresh Open Source Software Archive 
Member "xdelta3-3.0.11/examples/checksum_test.cc" (24 Mar 2015, 15514 Bytes) of package /linux/misc/xdelta3-3.0.11.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
1 /* Copyright (C) 2007 Josh MacDonald */
2
3 extern "C" {
4 #include "test.h"
5 #include <assert.h>
6 }
7
8 #include <list>
9 #include <vector>
10 #include <map>
11 #include <algorithm>
12
13 using std::list;
14 using std::map;
15 using std::vector;
16
17 // MLCG parameters
18 // a, a*
19 uint32_t good_32bit_values[] = {
20 1597334677U, // ...
21 741103597U, 887987685U,
22 };
23
24 // a, a*
25 uint64_t good_64bit_values[] = {
26 1181783497276652981ULL, 4292484099903637661ULL,
27 7664345821815920749ULL, // ...
28 };
29
30 struct true_type { };
31 struct false_type { };
32
33 template <typename Word>
34 int bitsof();
35
36 template<>
37 int bitsof<uint32_t>() {
38 return 32;
39 }
40
41 template<>
42 int bitsof<uint64_t>() {
43 return 64;
44 }
45
46 struct plain {
47 int operator()(const uint8_t &c) {
48 return c;
49 }
50 };
51
52 template <typename Word>
53 struct hhash { // take "h" of the high-bits as a hash value for this
54 // checksum, which are the most "distant" in terms of the
55 // spectral test for the rabin_karp MLCG. For short windows,
56 // the high bits aren't enough, XOR "mask" worth of these in.
57 Word operator()(const Word& t, const int &h, const int &mask) {
58 return (t >> h) ^ (t & mask);
59 }
60 };
61
62 template <typename Word>
63 Word good_word();
64
65 template<>
66 uint32_t good_word<uint32_t>() {
67 return good_32bit_values[0];
68 }
69
70 template<>
71 uint64_t good_word<uint64_t>() {
72 return good_64bit_values[0];
73 }
74
75 // CLASSES
76
77 #define SELF Word, CksumSize, CksumSkip, Permute, Hash, Compaction
78 #define MEMBER template <typename Word, \
79 int CksumSize, \
80 int CksumSkip, \
81 typename Permute, \
82 typename Hash, \
83 int Compaction>
84
85 MEMBER
86 struct cksum_params {
87 typedef Word word_type;
88 typedef Permute permute_type;
89 typedef Hash hash_type;
90
91 enum { cksum_size = CksumSize,
92 cksum_skip = CksumSkip,
93 compaction = Compaction,
94 };
95 };
96
97
98 MEMBER
99 struct rabin_karp {
100 typedef Word word_type;
101 typedef Permute permute_type;
102 typedef Hash hash_type;
103
104 enum { cksum_size = CksumSize,
105 cksum_skip = CksumSkip,
106 compaction = Compaction,
107 };
108
109 // (a^cksum_size-1 c_0) + (a^cksum_size-2 c_1) ...
110 rabin_karp() {
111 multiplier = good_word<Word>();
112 powers = new Word[cksum_size];
113 powers[cksum_size - 1] = 1;
114 for (int i = cksum_size - 2; i >= 0; i--) {
115 powers[i] = powers[i + 1] * multiplier;
116 }
117 product = powers[0] * multiplier;
118 }
119
120 ~rabin_karp() {
121 delete [] powers;
122 }
123
124 Word step(const uint8_t *ptr) {
125 Word h = 0;
126 for (int i = 0; i < cksum_size; i++) {
127 h += permute_type()(ptr[i]) * powers[i];
128 }
129 return h;
130 }
131
132 Word state0(const uint8_t *ptr) {
133 incr_state = step(ptr);
134 return incr_state;
135 }
136
137 Word incr(const uint8_t *ptr) {
138 incr_state = multiplier * incr_state -
139 product * permute_type()(ptr[-1]) +
140 permute_type()(ptr[cksum_size - 1]);
141 return incr_state;
142 }
143
144 Word *powers;
145 Word product;
146 Word multiplier;
147 Word incr_state;
148 };
149
150 MEMBER
151 struct adler32_cksum {
152 typedef Word word_type;
153 typedef Permute permute_type;
154 typedef Hash hash_type;
155
156 enum { cksum_size = CksumSize,
157 cksum_skip = CksumSkip,
158 compaction = Compaction,
159 };
160
161 Word step(const uint8_t *ptr) {
162 return xd3_lcksum (ptr, cksum_size);
163 }
164
165 Word state0(const uint8_t *ptr) {
166 incr_state = step(ptr);
167 return incr_state;
168 }
169
170 Word incr(const uint8_t *ptr) {
171 incr_state = xd3_large_cksum_update (incr_state, ptr - 1, cksum_size);
172 return incr_state;
173 }
174
175 Word incr_state;
176 };
177
178 // TESTS
179
180 template <typename Word>
181 struct file_stats {
182 typedef list<const uint8_t*> ptr_list;
183 typedef Word word_type;
184 typedef map<word_type, ptr_list> table_type;
185 typedef typename table_type::iterator table_iterator;
186 typedef typename ptr_list::iterator ptr_iterator;
187
188 int cksum_size;
189 int cksum_skip;
190 int unique;
191 int unique_values;
192 int count;
193 table_type table;
194
195 file_stats(int size, int skip)
196 : cksum_size(size),
197 cksum_skip(skip),
198 unique(0),
199 unique_values(0),
200 count(0) {
201 }
202
203 void reset() {
204 unique = 0;
205 unique_values = 0;
206 count = 0;
207 table.clear();
208 }
209
210 void update(const word_type &word, const uint8_t *ptr) {
211 table_iterator t_i = table.find(word);
212
213 count++;
214
215 if (t_i == table.end()) {
216 table.insert(make_pair(word, ptr_list()));
217 }
218
219 ptr_list &pl = table[word];
220
221 for (ptr_iterator p_i = pl.begin();
222 p_i != pl.end();
223 ++p_i) {
224 if (memcmp(*p_i, ptr, cksum_size) == 0) {
225 return;
226 }
227 }
228
229 unique++;
230 pl.push_back(ptr);
231 }
232
233 void freeze() {
234 unique_values = table.size();
235 table.clear();
236 }
237 };
238
239 struct test_result_base;
240
241 static vector<test_result_base*> all_tests;
242
243 struct test_result_base {
244 virtual ~test_result_base() {
245 }
246 virtual void reset() = 0;
247 virtual void print() = 0;
248 virtual void get(const uint8_t* buf, const int buf_size, int iters) = 0;
249 virtual void stat() = 0;
250 virtual int count() = 0;
251 virtual int dups() = 0;
252 virtual double uniqueness() = 0;
253 virtual double fullness() = 0;
254 virtual double collisions() = 0;
255 virtual double coverage() = 0;
256 virtual double compression() = 0;
257 virtual double time() = 0;
258 virtual double score() = 0;
259 virtual void set_score(double min_dups_frac, double min_time) = 0;
260 virtual double total_time() = 0;
261 virtual int total_count() = 0;
262 virtual int total_dups() = 0;
263 };
264
265 struct compare_h {
266 bool operator()(test_result_base *a,
267 test_result_base *b) {
268 return a->score() < b->score();
269 }
270 };
271
272 MEMBER
273 struct test_result : public test_result_base {
274 typedef Word word_type;
275 typedef Permute permute_type;
276 typedef Hash hash_type;
277
278 enum { cksum_size = CksumSize,
279 cksum_skip = CksumSkip,
280 compaction = Compaction,
281 };
282
283 const char *test_name;
284 file_stats<Word> fstats;
285 int test_size;
286 int n_steps;
287 int n_incrs;
288 int s_bits;
289 int s_mask;
290 int t_entries;
291 int h_bits;
292 int h_buckets_full;
293 double h_score;
294 char *hash_table;
295 long accum_millis;
296 int accum_iters;
297
298 // These are not reset
299 double accum_time;
300 int accum_count;
301 int accum_dups;
302 int accum_colls;
303 int accum_size;
304
305 test_result(const char *name)
306 : test_name(name),
307 fstats(cksum_size, cksum_skip),
308 hash_table(NULL),
309 accum_millis(0),
310 accum_iters(0),
311 accum_time(0.0),
312 accum_count(0),
313 accum_dups(0),
314 accum_colls(0),
315 accum_size(0) {
316 all_tests.push_back(this);
317 }
318
319 ~test_result() {
320 reset();
321 }
322
323 void reset() {
324 // size of file
325 test_size = -1;
326
327 // count
328 n_steps = -1;
329 n_incrs = -1;
330
331 // four values used by new_table()/summarize_table()
332 s_bits = -1;
333 s_mask = -1;
334 t_entries = -1;
335 h_bits = -1;
336 h_buckets_full = -1;
337
338 accum_millis = 0;
339 accum_iters = 0;
340
341 fstats.reset();
342
343 // temporary
344 if (hash_table) {
345 delete(hash_table);
346 hash_table = NULL;
347 }
348 }
349
350 int count() {
351 if (cksum_skip == 1) {
352 return n_incrs;
353 } else {
354 return n_steps;
355 }
356 }
357
358 int dups() {
359 return fstats.count - fstats.unique;
360 }
361
362 int colls() {
363 return fstats.unique - fstats.unique_values;
364 }
365
366 double uniqueness() {
367 return 1.0 - (double) dups() / count();
368 }
369
370 double fullness() {
371 return (double) h_buckets_full / (1 << h_bits);
372 }
373
374 double collisions() {
375 return (double) colls() / fstats.unique;
376 }
377
378 double coverage() {
379 return (double) h_buckets_full / uniqueness() / count();
380 }
381
382 double compression() {
383 return 1.0 - coverage();
384 }
385
386 double time() {
387 return (double) accum_millis / accum_iters;
388 }
389
390 double score() {
391 return h_score;
392 }
393
394 void set_score(double min_compression, double min_time) {
395 h_score = (compression() - 0.99 * min_compression)
396 * (time() - 0.99 * min_time);
397 }
398
399 double total_time() {
400 return accum_time;
401 }
402
403 int total_count() {
404 return accum_count;
405 }
406
407 int total_dups() {
408 return accum_dups;
409 }
410
411 int total_colls() {
412 return accum_dups;
413 }
414
415 void stat() {
416 accum_time += time();
417 accum_count += count();
418 accum_dups += dups();
419 accum_colls += colls();
420 accum_size += test_size;
421 }
422
423 void print() {
424 if (fstats.count != count()) {
425 fprintf(stderr, "internal error: %d != %d\n", fstats.count, count());
426 abort();
427 }
428 printf("%s: (%u#%u) count %u uniq %0.2f%% full %u (%0.4f%% coll %0.4f%%) covers %0.2f%% w/ 2^%d @ %.4f MB/s %u iters\n",
429 test_name,
430 cksum_size,
431 cksum_skip,
432 count(),
433 100.0 * uniqueness(),
434 h_buckets_full,
435 100.0 * fullness(),
436 100.0 * collisions(),
437 100.0 * coverage(),
438 h_bits,
439 0.001 * accum_iters * test_size / accum_millis,
440 accum_iters);
441 }
442
443 int size_log2 (int slots)
444 {
445 int bits = bitsof<word_type>() - 1;
446 int i;
447
448 for (i = 3; i <= bits; i += 1) {
449 if (slots <= (1 << i)) {
450 return i - compaction;
451 }
452 }
453
454 return bits;
455 }
456
457 void new_table(int entries) {
458 t_entries = entries;
459 h_bits = size_log2(entries);
460
461 int n = 1 << h_bits;
462
463 s_bits = bitsof<word_type>() - h_bits;
464 s_mask = n - 1;
465
466 hash_table = new char[n / 8];
467 memset(hash_table, 0, n / 8);
468 }
469
470 int get_table_bit(int i) {
471 return hash_table[i/8] & (1 << i%8);
472 }
473
474 int set_table_bit(int i) {
475 return hash_table[i/8] |= (1 << i%8);
476 }
477
478 void summarize_table() {
479 int n = 1 << h_bits;
480 int f = 0;
481 for (int i = 0; i < n; i++) {
482 if (get_table_bit(i)) {
483 f++;
484 }
485 }
486 h_buckets_full = f;
487 }
488
489 void get(const uint8_t* buf, const int buf_size, int test_iters) {
490 rabin_karp<SELF> test;
491 //adler32_cksum<SELF> test;
492 hash_type hash;
493 const uint8_t *ptr;
494 const uint8_t *end;
495 int last_offset;
496 int periods;
497 int stop;
498
499 test_size = buf_size;
500 last_offset = buf_size - cksum_size;
501
502 if (last_offset < 0) {
503 periods = 0;
504 n_steps = 0;
505 n_incrs = 0;
506 stop = -cksum_size;
507 } else {
508 periods = last_offset / cksum_skip;
509 n_steps = periods + 1;
510 n_incrs = last_offset + 1;
511 stop = last_offset - (periods + 1) * cksum_skip;
512 }
513
514 // Compute file stats once.
515 if (fstats.unique_values == 0) {
516 if (cksum_skip == 1) {
517 for (int i = 0; i <= buf_size - cksum_size; i++) {
518 fstats.update(hash(test.step(buf + i), s_bits, s_mask), buf + i);
519 }
520 } else {
521 ptr = buf + last_offset;
522 end = buf + stop;
523
524 for (; ptr != end; ptr -= cksum_skip) {
525 fstats.update(hash(test.step(ptr), s_bits, s_mask), ptr);
526 }
527 }
528 fstats.freeze();
529 }
530
531 long start_test = get_millisecs_now();
532
533 if (cksum_skip != 1) {
534 new_table(n_steps);
535
536 for (int i = 0; i < test_iters; i++) {
537 ptr = buf + last_offset;
538 end = buf + stop;
539
540 for (; ptr != end; ptr -= cksum_skip) {
541 set_table_bit(hash(test.step(ptr), s_bits, s_mask));
542 }
543 }
544
545 summarize_table();
546 }
547
548 stop = buf_size - cksum_size + 1;
549 if (stop < 0) {
550 stop = 0;
551 }
552
553 if (cksum_skip == 1) {
554
555 new_table(n_incrs);
556
557 for (int i = 0; i < test_iters; i++) {
558 ptr = buf;
559 end = buf + stop;
560
561 if (ptr != end) {
562 set_table_bit(hash(test.state0(ptr++), s_bits, s_mask));
563 }
564
565 for (; ptr != end; ptr++) {
566 Word w = test.incr(ptr);
567 assert(w == test.step(ptr));
568 set_table_bit(hash(w, s_bits, s_mask));
569 }
570 }
571
572 summarize_table();
573 }
574
575 accum_iters += test_iters;
576 accum_millis += get_millisecs_now() - start_test;
577 }
578 };
579
580 template <typename Word>
581 void print_array(const char *tname) {
582 printf("static const %s hash_multiplier[64] = {\n", tname);
583 Word p = 1;
584 for (int i = 0; i < 64; i++) {
585 printf(" %uU,\n", p);
586 p *= good_word<Word>();
587 }
588 printf("};\n", tname);
589 }
590
591 int main(int argc, char** argv) {
592 int i;
593 uint8_t *buf = NULL;
594 size_t buf_len = 0;
595 int ret;
596
597 if (argc <= 1) {
598 fprintf(stderr, "usage: %s file ...\n", argv[0]);
599 return 1;
600 }
601
602 //print_array<uint32_t>("uint32_t");
603
604 #define TEST(T,Z,S,P,H,C) test_result<T,Z,S,P,H<T>,C> \
605 _ ## T ## _ ## Z ## _ ## S ## _ ## P ## _ ## H ## _ ## C \
606 (#T "_" #Z "_" #S "_" #P "_" #H "_" #C)
607
608 #if 0
609
610 TEST(uint32_t, 4, SKIP, plain, hhash, 0); /* x */ \
611 TEST(uint32_t, 4, SKIP, plain, hhash, 1); /* x */ \
612 TEST(uint32_t, 4, SKIP, plain, hhash, 2); /* x */ \
613 TEST(uint32_t, 4, SKIP, plain, hhash, 3); /* x */ \
614
615 #endif
616
617 #define TESTS(SKIP) \
618 TEST(uint32_t, 9, SKIP, plain, hhash, 0); /* x */ \
619 TEST(uint32_t, 9, SKIP, plain, hhash, 1); /* x */ \
620 TEST(uint32_t, 9, SKIP, plain, hhash, 2); /* x */ \
621 TEST(uint32_t, 9, SKIP, plain, hhash, 3)
622
623 #define TESTS_ALL(SKIP) \
624 TEST(uint32_t, 3, SKIP, plain, hhash, 0); \
625 TEST(uint32_t, 3, SKIP, plain, hhash, 1); \
626 TEST(uint32_t, 4, SKIP, plain, hhash, 0); /* x */ \
627 TEST(uint32_t, 4, SKIP, plain, hhash, 1); /* x */ \
628 TEST(uint32_t, 4, SKIP, plain, hhash, 2); /* x */ \
629 TEST(uint32_t, 4, SKIP, plain, hhash, 3); /* x */ \
630 TEST(uint32_t, 5, SKIP, plain, hhash, 0); \
631 TEST(uint32_t, 5, SKIP, plain, hhash, 1); \
632 TEST(uint32_t, 8, SKIP, plain, hhash, 0); \
633 TEST(uint32_t, 8, SKIP, plain, hhash, 1); \
634 TEST(uint32_t, 9, SKIP, plain, hhash, 0); /* x */ \
635 TEST(uint32_t, 9, SKIP, plain, hhash, 1); /* x */ \
636 TEST(uint32_t, 9, SKIP, plain, hhash, 2); /* x */ \
637 TEST(uint32_t, 9, SKIP, plain, hhash, 3); /* x */ \
638 TEST(uint32_t, 11, SKIP, plain, hhash, 0); /* x */ \
639 TEST(uint32_t, 11, SKIP, plain, hhash, 1); /* x */ \
640 TEST(uint32_t, 13, SKIP, plain, hhash, 0); \
641 TEST(uint32_t, 13, SKIP, plain, hhash, 1); \
642 TEST(uint32_t, 15, SKIP, plain, hhash, 0); /* x */ \
643 TEST(uint32_t, 15, SKIP, plain, hhash, 1); /* x */ \
644 TEST(uint32_t, 16, SKIP, plain, hhash, 0); /* x */ \
645 TEST(uint32_t, 16, SKIP, plain, hhash, 1); /* x */ \
646 TEST(uint32_t, 21, SKIP, plain, hhash, 0); \
647 TEST(uint32_t, 21, SKIP, plain, hhash, 1); \
648 TEST(uint32_t, 34, SKIP, plain, hhash, 0); \
649 TEST(uint32_t, 34, SKIP, plain, hhash, 1); \
650 TEST(uint32_t, 55, SKIP, plain, hhash, 0); \
651 TEST(uint32_t, 55, SKIP, plain, hhash, 1)
652
653 TESTS(1); // *
654 // TESTS(2); // *
655 // TESTS(3); // *
656 // TESTS(5); // *
657 // TESTS(8); // *
658 // TESTS(9);
659 // TESTS(11);
660 // TESTS(13); // *
661 TESTS(15);
662 // TESTS(16);
663 // TESTS(21); // *
664 // TESTS(34); // *
665 // TESTS(55); // *
666 // TESTS(89); // *
667
668 for (i = 1; i < argc; i++) {
669 if ((ret = read_whole_file(argv[i],
670 & buf,
671 & buf_len))) {
672 return 1;
673 }
674
675 fprintf(stderr, "file %s is %zu bytes\n",
676 argv[i], buf_len);
677
678 double min_time = -1.0;
679 double min_compression = 0.0;
680
681 for (vector<test_result_base*>::iterator i = all_tests.begin();
682 i != all_tests.end(); ++i) {
683 test_result_base *test = *i;
684 test->reset();
685
686 int iters = 100;
687 long start_test = get_millisecs_now();
688
689 do {
690 test->get(buf, buf_len, iters);
691 iters *= 3;
692 iters /= 2;
693 } while (get_millisecs_now() - start_test < 2000);
694
695 test->stat();
696
697 if (min_time < 0.0) {
698 min_compression = test->compression();
699 min_time = test->time();
700 }
701
702 if (min_time > test->time()) {
703 min_time = test->time();
704 }
705
706 if (min_compression > test->compression()) {
707 min_compression = test->compression();
708 }
709
710 test->print();
711 }
712
713 // for (vector<test_result_base*>::iterator i = all_tests.begin();
714 // i != all_tests.end(); ++i) {
715 // test_result_base *test = *i;
716 // test->set_score(min_compression, min_time);
717 // }
718
719 // sort(all_tests.begin(), all_tests.end(), compare_h());
720
721 // for (vector<test_result_base*>::iterator i = all_tests.begin();
722 // i != all_tests.end(); ++i) {
723 // test_result_base *test = *i;
724 // test->print();
725 // }
726
727 free(buf);
728 buf = NULL;
729 }
730
731 return 0;
732 }