"Fossies" - the Fresh Open Source Software Archive 
Member "xdelta3-3.0.11/xdelta3-djw.h" (3 Nov 2015, 47489 Bytes) of package /linux/misc/xdelta3-3.0.11.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "xdelta3-djw.h" see the
Fossies "Dox" file reference documentation.
1 /* xdelta 3 - delta compression tools and library
2 * Copyright (C) 2002, 2006, 2007. Joshua P. MacDonald
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19 /* TODO: This code needs a thorough round of commenting. There is
20 * some slop in the declaration of arrays, which are maybe one element
21 * larger than they need to be and comments would help clear it up. */
22
23 #ifndef _XDELTA3_DJW_H_
24 #define _XDELTA3_DJW_H_
25
26 /* The following people deserve much credit for the algorithms and
27 * techniques contained in this file:
28
29 Julian Seward
30 Bzip2 sources, implementation of the multi-table Huffman technique.
31
32 Jean-loup Gailly and Mark Adler and L. Peter Deutsch
33 Zlib source code, RFC 1951
34
35 Daniel S. Hirschberg and Debra A. LeLewer
36 "Efficient Decoding of Prefix Codes"
37 Communications of the ACM, April 1990 33(4).
38
39 David J. Wheeler
40 Program bred3.c, bexp3 and accompanying documents bred3.ps, huff.ps.
41 This contains the idea behind the multi-table Huffman and 1-2 coding
42 techniques.
43 ftp://ftp.cl.cam.ac.uk/users/djw3/
44
45 */
46
47 /* OPT: during the multi-table iteration, pick the worst-overall
48 * performing table and replace it with exactly the frequencies of the
49 * worst-overall performing sector or N-worst performing sectors. */
50
51 /* REF: See xdfs-0.222 and xdfs-0.226 for some old experiments with
52 * the Bzip prefix coding strategy. xdfs-0.256 contains the last of
53 * the other-format tests, including RFC1950 and the RFC1950+MTF
54 * tests. */
55
56 #define DJW_MAX_CODELEN 20U /* Maximum length of an alphabet code. */
57
58 /* Code lengths are themselves code-length encoded, so the total number of
59 * codes is: [RUN_0, RUN_1, 1-DJW_MAX_CODELEN] */
60 #define DJW_TOTAL_CODES (DJW_MAX_CODELEN+2)
61
62 #define RUN_0 0U /* Symbols used in MTF+1/2 coding. */
63 #define RUN_1 1U
64
65 /* Number of code lengths always encoded (djw_encode_basic array) */
66 #define DJW_BASIC_CODES 5U
67 #define DJW_RUN_CODES 2U /* Number of run codes */
68
69 /* Offset of extra codes */
70 #define DJW_EXTRA_12OFFSET (DJW_BASIC_CODES + DJW_RUN_CODES)
71
72 /* Number of optionally encoded code lengths (djw_encode_extra array) */
73 #define DJW_EXTRA_CODES 15U
74
75 /* Number of bits to code [0-DJW_EXTRA_CODES] */
76 #define DJW_EXTRA_CODE_BITS 4U
77
78 #define DJW_MAX_GROUPS 8U /* Max number of group coding tables */
79 #define DJW_GROUP_BITS 3U /* Number of bits to code [1-DJW_MAX_GROUPS] */
80
81 #define DJW_SECTORSZ_MULT 5U /* Multiplier for encoded sectorsz */
82 #define DJW_SECTORSZ_BITS 5U /* Number of bits to code group size */
83 #define DJW_SECTORSZ_MAX ((1U << DJW_SECTORSZ_BITS) * DJW_SECTORSZ_MULT)
84
85 /* Maximum number of iterations to find group tables. */
86 #define DJW_MAX_ITER 6U
87 /* Minimum number of bits an iteration must reduce coding by. */
88 #define DJW_MIN_IMPROVEMENT 20U
89
90 /* Maximum code length of a prefix code length */
91 #define DJW_MAX_CLCLEN 15U
92
93 /* Number of bits to code [0-DJW_MAX_CLCLEN] */
94 #define DJW_CLCLEN_BITS 4U
95
96 #define DJW_MAX_GBCLEN 7U /* Maximum code length of a group selector */
97
98 /* Number of bits to code [0-DJW_MAX_GBCLEN]
99 * TODO: Actually, should never have zero code lengths here, or else a group
100 * went unused. Write a test for this: if a group goes unused, eliminate
101 * it? */
102 #define DJW_GBCLEN_BITS 3U
103
104 /* It has to save at least this many bits... */
105 #define EFFICIENCY_BITS 16U
106
107 typedef struct _djw_stream djw_stream;
108 typedef struct _djw_heapen djw_heapen;
109 typedef struct _djw_prefix djw_prefix;
110 typedef uint32_t djw_weight;
111
112 struct _djw_heapen
113 {
114 uint32_t depth;
115 uint32_t freq;
116 uint32_t parent;
117 };
118
119 struct _djw_prefix
120 {
121 usize_t scount;
122 uint8_t *symbol;
123 usize_t mcount;
124 uint8_t *mtfsym;
125 uint8_t *repcnt;
126 };
127
128 struct _djw_stream
129 {
130 int unused;
131 };
132
133 /* Each Huffman table consists of 256 "code length" (CLEN) codes,
134 * which are themselves Huffman coded after eliminating repeats and
135 * move-to-front coding. The prefix consists of all the CLEN codes in
136 * djw_encode_basic plus a 4-bit value stating how many of the
137 * djw_encode_extra codes are actually coded (the rest are presumed
138 * zero, or unused CLEN codes).
139 *
140 * These values of these two arrays were arrived at by studying the
141 * distribution of min and max clen over a collection of DATA, INST,
142 * and ADDR inputs. The goal is to specify the order of
143 * djw_extra_codes that is most likely to minimize the number of extra
144 * codes that must be encoded.
145 *
146 * Results: 158896 sections were counted by compressing files (window
147 * size 512K) listed with: `find / -type f ( -user jmacd -o -perm +444
148 * )`
149 *
150 * The distribution of CLEN codes for each efficient invocation of the
151 * secondary compressor (taking the best number of groups/sector size)
152 * was recorded. Then we look at the distribution of min and max clen
153 * values, counting the number of times the value C_low is less than
154 * the min and C_high is greater than the max. Values >= C_high and
155 * <= C_low will not have their lengths coded. The results are sorted
156 * and the least likely 15 are placed into the djw_encode_extra[]
157 * array in order. These values are used as the initial MTF ordering.
158
159 clow[1] = 155119
160 clow[2] = 140325
161 clow[3] = 84072
162 ---
163 clow[4] = 7225
164 clow[5] = 1093
165 clow[6] = 215
166 ---
167 chigh[4] = 1
168 chigh[5] = 30
169 chigh[6] = 218
170 chigh[7] = 2060
171 chigh[8] = 13271
172 ---
173 chigh[9] = 39463
174 chigh[10] = 77360
175 chigh[11] = 118298
176 chigh[12] = 141360
177 chigh[13] = 154086
178 chigh[14] = 157967
179 chigh[15] = 158603
180 chigh[16] = 158864
181 chigh[17] = 158893
182 chigh[18] = 158895
183 chigh[19] = 158896
184 chigh[20] = 158896
185
186 */
187
188 static const uint8_t djw_encode_12extra[DJW_EXTRA_CODES] =
189 {
190 9, 10, 3, 11, 2, 12, 13, 1, 14, 15, 16, 17, 18, 19, 20,
191 };
192
193 static const uint8_t djw_encode_12basic[DJW_BASIC_CODES] =
194 {
195 4, 5, 6, 7, 8,
196 };
197
198 /*********************************************************************/
199 /* DECLS */
200 /*********************************************************************/
201
202 static djw_stream* djw_alloc (xd3_stream *stream);
203 static int djw_init (xd3_stream *stream,
204 djw_stream *h,
205 int is_encode);
206 static void djw_destroy (xd3_stream *stream,
207 djw_stream *h);
208
209 #if XD3_ENCODER
210 static int xd3_encode_huff (xd3_stream *stream,
211 djw_stream *sec_stream,
212 xd3_output *input,
213 xd3_output *output,
214 xd3_sec_cfg *cfg);
215 #endif
216
217 static int xd3_decode_huff (xd3_stream *stream,
218 djw_stream *sec_stream,
219 const uint8_t **input,
220 const uint8_t *const input_end,
221 uint8_t **output,
222 const uint8_t *const output_end);
223
224 /*********************************************************************/
225 /* HUFFMAN */
226 /*********************************************************************/
227
228 static djw_stream*
229 djw_alloc (xd3_stream *stream)
230 {
231 return (djw_stream*) xd3_alloc (stream, sizeof (djw_stream), 1);
232 }
233
234 static int
235 djw_init (xd3_stream *stream, djw_stream *h, int is_encode)
236 {
237 /* Fields are initialized prior to use. */
238 return 0;
239 }
240
241 static void
242 djw_destroy (xd3_stream *stream,
243 djw_stream *h)
244 {
245 xd3_free (stream, h);
246 }
247
248
249 /*********************************************************************/
250 /* HEAP */
251 /*********************************************************************/
252
253 static inline int
254 heap_less (const djw_heapen *a, const djw_heapen *b)
255 {
256 return a->freq < b->freq ||
257 (a->freq == b->freq &&
258 a->depth < b->depth);
259 }
260
261 static inline void
262 heap_insert (usize_t *heap, const djw_heapen *ents, usize_t p, const usize_t e)
263 {
264 /* Insert ents[e] into next slot heap[p] */
265 usize_t pp = p/2; /* P's parent */
266
267 while (heap_less (& ents[e], & ents[heap[pp]]))
268 {
269 heap[p] = heap[pp];
270 p = pp;
271 pp = p/2;
272 }
273
274 heap[p] = e;
275 }
276
277 static inline djw_heapen*
278 heap_extract (usize_t *heap, const djw_heapen *ents, usize_t heap_last)
279 {
280 usize_t smallest = heap[1];
281 usize_t p, pc, t;
282
283 /* Caller decrements heap_last, so heap_last+1 is the replacement elt. */
284 heap[1] = heap[heap_last+1];
285
286 /* Re-heapify */
287 for (p = 1; ; p = pc)
288 {
289 pc = p*2;
290
291 /* Reached bottom of heap */
292 if (pc > heap_last) { break; }
293
294 /* See if second child is smaller. */
295 if (pc < heap_last && heap_less (& ents[heap[pc+1]], & ents[heap[pc]]))
296 {
297 pc += 1;
298 }
299
300 /* If pc is not smaller than p, heap property re-established. */
301 if (! heap_less (& ents[heap[pc]], & ents[heap[p]])) { break; }
302
303 t = heap[pc];
304 heap[pc] = heap[p];
305 heap[p] = t;
306 }
307
308 return (djw_heapen*) & ents[smallest];
309 }
310
311 #if XD3_DEBUG
312 static void
313 heap_check (usize_t *heap, djw_heapen *ents, usize_t heap_last)
314 {
315 usize_t i;
316 for (i = 1; i <= heap_last; i += 1)
317 {
318 /* Heap property: child not less than parent */
319 XD3_ASSERT (! heap_less (& ents[heap[i]], & ents[heap[i/2]]));
320
321 IF_DEBUG2 (DP(RINT "heap[%d] = %u\n", i, ents[heap[i]].freq));
322 }
323 }
324 #endif
325
326 /*********************************************************************/
327 /* MTF, 1/2 */
328 /*********************************************************************/
329
330 static inline usize_t
331 djw_update_mtf (uint8_t *mtf, usize_t mtf_i)
332 {
333 int k;
334 usize_t sym = mtf[mtf_i];
335
336 for (k = mtf_i; k != 0; k -= 1) { mtf[k] = mtf[k-1]; }
337
338 mtf[0] = sym;
339 return sym;
340 }
341
342 static inline void
343 djw_update_1_2 (int *mtf_run, usize_t *mtf_i,
344 uint8_t *mtfsym, djw_weight *freq)
345 {
346 int code;
347
348 do
349 {
350 /* Offset by 1, since any number of RUN_ symbols implies run>0... */
351 *mtf_run -= 1;
352
353 code = (*mtf_run & 1) ? RUN_1 : RUN_0;
354
355 mtfsym[(*mtf_i)++] = code;
356 freq[code] += 1;
357 *mtf_run >>= 1;
358 }
359 while (*mtf_run >= 1);
360
361 *mtf_run = 0;
362 }
363
364 static void
365 djw_init_clen_mtf_1_2 (uint8_t *clmtf)
366 {
367 usize_t i, cl_i = 0;
368
369 clmtf[cl_i++] = 0;
370 for (i = 0; i < DJW_BASIC_CODES; i += 1)
371 {
372 clmtf[cl_i++] = djw_encode_12basic[i];
373 }
374 for (i = 0; i < DJW_EXTRA_CODES; i += 1)
375 {
376 clmtf[cl_i++] = djw_encode_12extra[i];
377 }
378 }
379
380 /*********************************************************************/
381 /* PREFIX CODES */
382 /*********************************************************************/
383 #if XD3_ENCODER
384 static usize_t
385 djw_build_prefix (const djw_weight *freq, uint8_t *clen, usize_t asize, usize_t maxlen)
386 {
387 /* Heap with 0th entry unused, prefix tree with up to ALPHABET_SIZE-1
388 * internal nodes, never more than ALPHABET_SIZE entries actually in the
389 * heap (minimum weight subtrees during prefix construction). First
390 * ALPHABET_SIZE entries are the actual symbols, next ALPHABET_SIZE-1 are
391 * internal nodes. */
392 djw_heapen ents[ALPHABET_SIZE * 2];
393 usize_t heap[ALPHABET_SIZE + 1];
394
395 usize_t heap_last; /* Index of the last _valid_ heap entry. */
396 usize_t ents_size; /* Number of entries, including 0th fake entry */
397 usize_t overflow; /* Number of code lengths that overflow */
398 uint32_t total_bits;
399 usize_t i;
400
401 IF_DEBUG (uint32_t first_bits = 0);
402
403 /* Insert real symbol frequences. */
404 for (i = 0; i < asize; i += 1)
405 {
406 ents[i+1].freq = freq[i];
407 IF_DEBUG2 (DP(RINT "ents[%d] = freq[%d] = %d\n",
408 i+1, i, freq[i]));
409 }
410
411 again:
412
413 /* The loop is re-entered each time an overflow occurs. Re-initialize... */
414 heap_last = 0;
415 ents_size = 1;
416 overflow = 0;
417 total_bits = 0;
418
419 /* 0th entry terminates the while loop in heap_insert (it's the parent of
420 * the smallest element, always less-than) */
421 heap[0] = 0;
422 ents[0].depth = 0;
423 ents[0].freq = 0;
424
425 /* Initial heap. */
426 for (i = 0; i < asize; i += 1, ents_size += 1)
427 {
428 ents[ents_size].depth = 0;
429 ents[ents_size].parent = 0;
430
431 if (ents[ents_size].freq != 0)
432 {
433 heap_insert (heap, ents, ++heap_last, ents_size);
434 }
435 }
436
437 IF_DEBUG (heap_check (heap, ents, heap_last));
438
439 /* Must be at least one symbol, or else we can't get here. */
440 XD3_ASSERT (heap_last != 0);
441
442 /* If there is only one symbol, fake a second to prevent zero-length
443 * codes. */
444 if (heap_last == 1)
445 {
446 /* Pick either the first or last symbol. */
447 int s = freq[0] ? asize-1 : 0;
448 ents[s+1].freq = 1;
449 goto again;
450 }
451
452 /* Build prefix tree. */
453 while (heap_last > 1)
454 {
455 djw_heapen *h1 = heap_extract (heap, ents, --heap_last);
456 djw_heapen *h2 = heap_extract (heap, ents, --heap_last);
457
458 ents[ents_size].freq = h1->freq + h2->freq;
459 ents[ents_size].depth = 1 + xd3_max (h1->depth, h2->depth);
460 ents[ents_size].parent = 0;
461
462 h1->parent = h2->parent = ents_size;
463
464 heap_insert (heap, ents, ++heap_last, ents_size++);
465 }
466
467 IF_DEBUG (heap_check (heap, ents, heap_last));
468
469 /* Now compute prefix code lengths, counting parents. */
470 for (i = 1; i < asize+1; i += 1)
471 {
472 usize_t b = 0;
473
474 if (ents[i].freq != 0)
475 {
476 usize_t p = i;
477
478 while ((p = ents[p].parent) != 0) { b += 1; }
479
480 if (b > maxlen) { overflow = 1; }
481
482 total_bits += b * freq[i-1];
483 }
484
485 /* clen is 0-origin, unlike ents. */
486 IF_DEBUG2 (DP(RINT "clen[%d] = %d\n", i-1, b));
487 clen[i-1] = b;
488 }
489
490 IF_DEBUG (if (first_bits == 0) first_bits = total_bits);
491
492 if (! overflow)
493 {
494 IF_DEBUG2 (if (first_bits != total_bits)
495 {
496 DP(RINT "code length overflow changed %u bits\n",
497 (usize_t)(total_bits - first_bits));
498 });
499 return total_bits;
500 }
501
502 /* OPT: There is a non-looping way to fix overflow shown in zlib, but this
503 * is easier (for now), as done in bzip2. */
504 for (i = 1; i < asize+1; i += 1)
505 {
506 ents[i].freq = ents[i].freq / 2 + 1;
507 }
508
509 goto again;
510 }
511
512 static void
513 djw_build_codes (usize_t *codes, const uint8_t *clen, usize_t asize, usize_t abs_max)
514 {
515 usize_t i, l;
516 usize_t min_clen = DJW_MAX_CODELEN;
517 usize_t max_clen = 0;
518 usize_t code = 0;
519
520 /* Find the min and max code length */
521 for (i = 0; i < asize; i += 1)
522 {
523 if (clen[i] > 0 && clen[i] < min_clen)
524 {
525 min_clen = clen[i];
526 }
527
528 max_clen = xd3_max (max_clen, (usize_t) clen[i]);
529 }
530
531 XD3_ASSERT (max_clen <= abs_max);
532
533 /* Generate a code for each symbol with the appropriate length. */
534 for (l = min_clen; l <= max_clen; l += 1)
535 {
536 for (i = 0; i < asize; i += 1)
537 {
538 if (clen[i] == l)
539 {
540 codes[i] = code++;
541 }
542 }
543
544 code <<= 1;
545 }
546
547 IF_DEBUG2 ({
548 for (i = 0; i < asize; i += 1)
549 {
550 DP(RINT "code[%d] = %u\n", i, codes[i]);
551 }
552 });
553 }
554
555 /*********************************************************************/
556 /* MOVE-TO-FRONT */
557 /*********************************************************************/
558 static void
559 djw_compute_mtf_1_2 (djw_prefix *prefix,
560 uint8_t *mtf,
561 djw_weight *freq_out,
562 usize_t nsym)
563 {
564 size_t i, j, k;
565 usize_t sym;
566 usize_t size = prefix->scount;
567 usize_t mtf_i = 0;
568 int mtf_run = 0;
569
570 /* This +2 is for the RUN_0, RUN_1 codes */
571 memset (freq_out, 0, sizeof (freq_out[0]) * (nsym+2));
572
573 for (i = 0; i < size; )
574 {
575 /* OPT: Bzip optimizes this algorithm a little by effectively checking
576 * j==0 before the MTF update. */
577 sym = prefix->symbol[i++];
578
579 for (j = 0; mtf[j] != sym; j += 1) { }
580
581 XD3_ASSERT (j <= nsym);
582
583 for (k = j; k >= 1; k -= 1) { mtf[k] = mtf[k-1]; }
584
585 mtf[0] = sym;
586
587 if (j == 0)
588 {
589 mtf_run += 1;
590 continue;
591 }
592
593 if (mtf_run > 0)
594 {
595 djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out);
596 }
597
598 /* Non-zero symbols are offset by RUN_1 */
599 prefix->mtfsym[mtf_i++] = (uint8_t)(j+RUN_1);
600 freq_out[j+RUN_1] += 1;
601 }
602
603 if (mtf_run > 0)
604 {
605 djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out);
606 }
607
608 prefix->mcount = mtf_i;
609 }
610
611 /* Counts character frequencies of the input buffer, returns the size. */
612 static usize_t
613 djw_count_freqs (djw_weight *freq, xd3_output *input)
614 {
615 xd3_output *in;
616 usize_t size = 0;
617
618 memset (freq, 0, sizeof (freq[0]) * ALPHABET_SIZE);
619
620 for (in = input; in; in = in->next_page)
621 {
622 const uint8_t *p = in->base;
623 const uint8_t *p_max = p + in->next;
624
625 size += in->next;
626
627 do
628 {
629 ++freq[*p];
630 }
631 while (++p < p_max);
632 }
633
634 IF_DEBUG2 ({int i;
635 DP(RINT "freqs: ");
636 for (i = 0; i < ALPHABET_SIZE; i += 1)
637 {
638 DP(RINT "%u ", freq[i]);
639 }
640 DP(RINT "\n");});
641
642 return size;
643 }
644
645 static void
646 djw_compute_multi_prefix (usize_t groups,
647 uint8_t clen[DJW_MAX_GROUPS][ALPHABET_SIZE],
648 djw_prefix *prefix)
649 {
650 usize_t gp, i;
651
652 prefix->scount = ALPHABET_SIZE;
653 memcpy (prefix->symbol, clen[0], ALPHABET_SIZE);
654
655 for (gp = 1; gp < groups; gp += 1)
656 {
657 for (i = 0; i < ALPHABET_SIZE; i += 1)
658 {
659 if (clen[gp][i] == 0)
660 {
661 continue;
662 }
663
664 prefix->symbol[prefix->scount++] = clen[gp][i];
665 }
666 }
667 }
668
669 static void
670 djw_compute_prefix_1_2 (djw_prefix *prefix, djw_weight *freq)
671 {
672 /* This +1 is for the 0 code-length. */
673 uint8_t clmtf[DJW_MAX_CODELEN+1];
674
675 djw_init_clen_mtf_1_2 (clmtf);
676
677 djw_compute_mtf_1_2 (prefix, clmtf, freq, DJW_MAX_CODELEN);
678 }
679
680 static int
681 djw_encode_prefix (xd3_stream *stream,
682 xd3_output **output,
683 bit_state *bstate,
684 djw_prefix *prefix)
685 {
686 int ret;
687 size_t i;
688 usize_t num_to_encode;
689 djw_weight clfreq[DJW_TOTAL_CODES];
690 uint8_t clclen[DJW_TOTAL_CODES];
691 usize_t clcode[DJW_TOTAL_CODES];
692
693 /* Move-to-front encode prefix symbols, count frequencies */
694 djw_compute_prefix_1_2 (prefix, clfreq);
695
696 /* Compute codes */
697 djw_build_prefix (clfreq, clclen, DJW_TOTAL_CODES, DJW_MAX_CLCLEN);
698 djw_build_codes (clcode, clclen, DJW_TOTAL_CODES, DJW_MAX_CLCLEN);
699
700 /* Compute number of extra codes beyond basic ones for this template. */
701 num_to_encode = DJW_TOTAL_CODES;
702 while (num_to_encode > DJW_EXTRA_12OFFSET && clclen[num_to_encode-1] == 0)
703 {
704 num_to_encode -= 1;
705 }
706 XD3_ASSERT (num_to_encode - DJW_EXTRA_12OFFSET < (1 << DJW_EXTRA_CODE_BITS));
707
708 /* Encode: # of extra codes */
709 if ((ret = xd3_encode_bits (stream, output, bstate, DJW_EXTRA_CODE_BITS,
710 num_to_encode - DJW_EXTRA_12OFFSET)))
711 {
712 return ret;
713 }
714
715 /* Encode: MTF code lengths */
716 for (i = 0; i < num_to_encode; i += 1)
717 {
718 if ((ret = xd3_encode_bits (stream, output, bstate,
719 DJW_CLCLEN_BITS, clclen[i])))
720 {
721 return ret;
722 }
723 }
724
725 /* Encode: CLEN code lengths */
726 for (i = 0; i < prefix->mcount; i += 1)
727 {
728 usize_t mtf_sym = prefix->mtfsym[i];
729 usize_t bits = clclen[mtf_sym];
730 usize_t code = clcode[mtf_sym];
731
732 if ((ret = xd3_encode_bits (stream, output, bstate, bits, code)))
733 {
734 return ret;
735 }
736 }
737
738 return 0;
739 }
740
741 static void
742 djw_compute_selector_1_2 (djw_prefix *prefix,
743 usize_t groups,
744 djw_weight *gbest_freq)
745 {
746 uint8_t grmtf[DJW_MAX_GROUPS];
747 usize_t i;
748
749 for (i = 0; i < groups; i += 1) { grmtf[i] = i; }
750
751 djw_compute_mtf_1_2 (prefix, grmtf, gbest_freq, groups);
752 }
753
754 static int
755 xd3_encode_howmany_groups (xd3_stream *stream,
756 xd3_sec_cfg *cfg,
757 usize_t input_size,
758 usize_t *ret_groups,
759 usize_t *ret_sector_size)
760 {
761 usize_t cfg_groups = 0;
762 usize_t cfg_sector_size = 0;
763 usize_t sugg_groups = 0;
764 usize_t sugg_sector_size = 0;
765
766 if (cfg->ngroups != 0)
767 {
768 if (cfg->ngroups > DJW_MAX_GROUPS)
769 {
770 stream->msg = "invalid secondary encoder group number";
771 return XD3_INTERNAL;
772 }
773
774 cfg_groups = cfg->ngroups;
775 }
776
777 if (cfg->sector_size != 0)
778 {
779 if (cfg->sector_size < DJW_SECTORSZ_MULT ||
780 cfg->sector_size > DJW_SECTORSZ_MAX ||
781 (cfg->sector_size % DJW_SECTORSZ_MULT) != 0)
782 {
783 stream->msg = "invalid secondary encoder sector size";
784 return XD3_INTERNAL;
785 }
786
787 cfg_sector_size = cfg->sector_size;
788 }
789
790 if (cfg_groups == 0 || cfg_sector_size == 0)
791 {
792 /* These values were found empirically using xdelta3-tune around version
793 * xdfs-0.256. */
794 switch (cfg->data_type)
795 {
796 case DATA_SECTION:
797 if (input_size < 1000) { sugg_groups = 1; sugg_sector_size = 0; }
798 else if (input_size < 4000) { sugg_groups = 2; sugg_sector_size = 10; }
799 else if (input_size < 7000) { sugg_groups = 3; sugg_sector_size = 10; }
800 else if (input_size < 10000) { sugg_groups = 4; sugg_sector_size = 10; }
801 else if (input_size < 25000) { sugg_groups = 5; sugg_sector_size = 10; }
802 else if (input_size < 50000) { sugg_groups = 7; sugg_sector_size = 20; }
803 else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 30; }
804 else { sugg_groups = 8; sugg_sector_size = 70; }
805 break;
806 case INST_SECTION:
807 if (input_size < 7000) { sugg_groups = 1; sugg_sector_size = 0; }
808 else if (input_size < 10000) { sugg_groups = 2; sugg_sector_size = 50; }
809 else if (input_size < 25000) { sugg_groups = 3; sugg_sector_size = 50; }
810 else if (input_size < 50000) { sugg_groups = 6; sugg_sector_size = 40; }
811 else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 40; }
812 else { sugg_groups = 8; sugg_sector_size = 40; }
813 break;
814 case ADDR_SECTION:
815 if (input_size < 9000) { sugg_groups = 1; sugg_sector_size = 0; }
816 else if (input_size < 25000) { sugg_groups = 2; sugg_sector_size = 130; }
817 else if (input_size < 50000) { sugg_groups = 3; sugg_sector_size = 130; }
818 else if (input_size < 100000) { sugg_groups = 5; sugg_sector_size = 130; }
819 else { sugg_groups = 7; sugg_sector_size = 130; }
820 break;
821 }
822
823 if (cfg_groups == 0)
824 {
825 cfg_groups = sugg_groups;
826 }
827
828 if (cfg_sector_size == 0)
829 {
830 cfg_sector_size = sugg_sector_size;
831 }
832 }
833
834 if (cfg_groups != 1 && cfg_sector_size == 0)
835 {
836 switch (cfg->data_type)
837 {
838 case DATA_SECTION:
839 cfg_sector_size = 20;
840 break;
841 case INST_SECTION:
842 cfg_sector_size = 50;
843 break;
844 case ADDR_SECTION:
845 cfg_sector_size = 130;
846 break;
847 }
848 }
849
850 (*ret_groups) = cfg_groups;
851 (*ret_sector_size) = cfg_sector_size;
852
853 XD3_ASSERT (cfg_groups > 0 && cfg_groups <= DJW_MAX_GROUPS);
854 XD3_ASSERT (cfg_groups == 1 ||
855 (cfg_sector_size >= DJW_SECTORSZ_MULT &&
856 cfg_sector_size <= DJW_SECTORSZ_MAX));
857
858 return 0;
859 }
860
861 static int
862 xd3_encode_huff (xd3_stream *stream,
863 djw_stream *h,
864 xd3_output *input,
865 xd3_output *output,
866 xd3_sec_cfg *cfg)
867 {
868 int ret;
869 usize_t groups, sector_size;
870 bit_state bstate = BIT_STATE_ENCODE_INIT;
871 xd3_output *in;
872 usize_t output_bits;
873 usize_t input_bits;
874 usize_t input_bytes;
875 usize_t initial_offset = output->next;
876 djw_weight real_freq[ALPHABET_SIZE];
877 uint8_t *gbest = NULL;
878 uint8_t *gbest_mtf = NULL;
879
880 input_bytes = djw_count_freqs (real_freq, input);
881 input_bits = input_bytes * 8;
882
883 XD3_ASSERT (input_bytes > 0);
884
885 if ((ret = xd3_encode_howmany_groups (stream, cfg, input_bytes,
886 & groups, & sector_size)))
887 {
888 return ret;
889 }
890
891 if (0)
892 {
893 regroup:
894 /* Sometimes we dynamically decide there are too many groups. Arrive
895 * here. */
896 output->next = initial_offset;
897 xd3_bit_state_encode_init (& bstate);
898 }
899
900 /* Encode: # of groups (3 bits) */
901 if ((ret = xd3_encode_bits (stream, & output, & bstate,
902 DJW_GROUP_BITS, groups-1))) { goto failure; }
903
904 if (groups == 1)
905 {
906 /* Single Huffman group. */
907 usize_t code[ALPHABET_SIZE]; /* Codes */
908 uint8_t clen[ALPHABET_SIZE];
909 uint8_t prefix_mtfsym[ALPHABET_SIZE];
910 djw_prefix prefix;
911
912 output_bits =
913 djw_build_prefix (real_freq, clen, ALPHABET_SIZE, DJW_MAX_CODELEN);
914 djw_build_codes (code, clen, ALPHABET_SIZE, DJW_MAX_CODELEN);
915
916 if (output_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient)
917 {
918 goto nosecond;
919 }
920
921 /* Encode: prefix */
922 prefix.mtfsym = prefix_mtfsym;
923 prefix.symbol = clen;
924 prefix.scount = ALPHABET_SIZE;
925
926 if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix)))
927 {
928 goto failure;
929 }
930
931 if (output_bits + (8 * output->next) + EFFICIENCY_BITS >=
932 input_bits && ! cfg->inefficient)
933 {
934 goto nosecond;
935 }
936
937 /* Encode: data */
938 for (in = input; in; in = in->next_page)
939 {
940 const uint8_t *p = in->base;
941 const uint8_t *p_max = p + in->next;
942
943 do
944 {
945 usize_t sym = *p++;
946 usize_t bits = clen[sym];
947
948 IF_DEBUG (output_bits -= bits);
949
950 if ((ret = xd3_encode_bits (stream, & output,
951 & bstate, bits, code[sym])))
952 {
953 goto failure;
954 }
955 }
956 while (p < p_max);
957 }
958
959 XD3_ASSERT (output_bits == 0);
960 }
961 else
962 {
963 /* DJW Huffman */
964 djw_weight evolve_freq[DJW_MAX_GROUPS][ALPHABET_SIZE];
965 uint8_t evolve_clen[DJW_MAX_GROUPS][ALPHABET_SIZE];
966 djw_weight left = input_bytes;
967 usize_t gp;
968 usize_t niter = 0;
969 usize_t select_bits;
970 usize_t sym1 = 0, sym2 = 0, s;
971 usize_t gcost[DJW_MAX_GROUPS];
972 usize_t gbest_code[DJW_MAX_GROUPS+2];
973 uint8_t gbest_clen[DJW_MAX_GROUPS+2];
974 usize_t gbest_max = 1 + (input_bytes - 1) / sector_size;
975 usize_t best_bits = 0;
976 usize_t gbest_no;
977 usize_t gpcnt;
978 const uint8_t *p;
979 IF_DEBUG2 (usize_t gcount[DJW_MAX_GROUPS]);
980
981 /* Encode: sector size (5 bits) */
982 if ((ret = xd3_encode_bits (stream, & output, & bstate,
983 DJW_SECTORSZ_BITS,
984 (sector_size/DJW_SECTORSZ_MULT)-1)))
985 {
986 goto failure;
987 }
988
989 /* Dynamic allocation. */
990 if (gbest == NULL)
991 {
992 if ((gbest = (uint8_t*) xd3_alloc (stream, gbest_max, 1)) == NULL)
993 {
994 ret = ENOMEM;
995 goto failure;
996 }
997 }
998
999 if (gbest_mtf == NULL)
1000 {
1001 if ((gbest_mtf = (uint8_t*) xd3_alloc (stream, gbest_max, 1)) == NULL)
1002 {
1003 ret = ENOMEM;
1004 goto failure;
1005 }
1006 }
1007
1008 /* OPT: Some of the inner loops can be optimized, as shown in bzip2 */
1009
1010 /* Generate initial code length tables. */
1011 for (gp = 0; gp < groups; gp += 1)
1012 {
1013 djw_weight sum = 0;
1014 djw_weight goal = left / (groups - gp);
1015
1016 IF_DEBUG2 (usize_t nz = 0);
1017
1018 /* Due to the single-code granularity of this distribution, it may
1019 * be that we can't generate a distribution for each group. In that
1020 * case subtract one group and try again. If (inefficient), we're
1021 * testing group behavior, so don't mess things up. */
1022 if (goal == 0 && !cfg->inefficient)
1023 {
1024 IF_DEBUG2 (DP(RINT "too many groups (%u), dropping one\n",
1025 groups));
1026 groups -= 1;
1027 goto regroup;
1028 }
1029
1030 /* Sum == goal is possible when (cfg->inefficient)... */
1031 while (sum < goal)
1032 {
1033 XD3_ASSERT (sym2 < ALPHABET_SIZE);
1034 IF_DEBUG2 (nz += real_freq[sym2] != 0);
1035 sum += real_freq[sym2++];
1036 }
1037
1038 IF_DEBUG2(DP(RINT "group %u has symbols %u..%u (%u non-zero) "
1039 "(%u/%u = %.3f)\n",
1040 gp, sym1, sym2, nz, sum,
1041 input_bytes, sum / (double)input_bytes););
1042
1043 for (s = 0; s < ALPHABET_SIZE; s += 1)
1044 {
1045 evolve_clen[gp][s] = (s >= sym1 && s <= sym2) ? 1 : 16;
1046 }
1047
1048 left -= sum;
1049 sym1 = sym2+1;
1050 }
1051
1052 repeat:
1053
1054 niter += 1;
1055 gbest_no = 0;
1056 memset (evolve_freq, 0, sizeof (evolve_freq[0]) * groups);
1057 IF_DEBUG2 (memset (gcount, 0, sizeof (gcount[0]) * groups));
1058
1059 /* For each input page (loop is irregular to allow non-pow2-size group
1060 * size. */
1061 in = input;
1062 p = in->base;
1063
1064 /* For each group-size sector. */
1065 do
1066 {
1067 const uint8_t *p0 = p;
1068 xd3_output *in0 = in;
1069 usize_t best = 0;
1070 usize_t winner = 0;
1071
1072 /* Select best group for each sector, update evolve_freq. */
1073 memset (gcost, 0, sizeof (gcost[0]) * groups);
1074
1075 /* For each byte in sector. */
1076 for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1)
1077 {
1078 /* For each group. */
1079 for (gp = 0; gp < groups; gp += 1)
1080 {
1081 gcost[gp] += evolve_clen[gp][*p];
1082 }
1083
1084 /* Check end-of-input-page. */
1085 # define GP_PAGE() \
1086 if ((usize_t)(++p - in->base) == in->next) \
1087 { \
1088 in = in->next_page; \
1089 if (in == NULL) { break; } \
1090 p = in->base; \
1091 }
1092
1093 GP_PAGE ();
1094 }
1095
1096 /* Find min cost group for this sector */
1097 best = USIZE_T_MAX;
1098 for (gp = 0; gp < groups; gp += 1)
1099 {
1100 if (gcost[gp] < best)
1101 {
1102 best = gcost[gp];
1103 winner = gp;
1104 }
1105 }
1106
1107 XD3_ASSERT(gbest_no < gbest_max);
1108 gbest[gbest_no++] = winner;
1109 IF_DEBUG2 (gcount[winner] += 1);
1110
1111 p = p0;
1112 in = in0;
1113
1114 /* Update group frequencies. */
1115 for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1)
1116 {
1117 evolve_freq[winner][*p] += 1;
1118
1119 GP_PAGE ();
1120 }
1121 }
1122 while (in != NULL);
1123
1124 XD3_ASSERT (gbest_no == gbest_max);
1125
1126 /* Recompute code lengths. */
1127 output_bits = 0;
1128 for (gp = 0; gp < groups; gp += 1)
1129 {
1130 int i;
1131 uint8_t evolve_zero[ALPHABET_SIZE];
1132 int any_zeros = 0;
1133
1134 memset (evolve_zero, 0, sizeof (evolve_zero));
1135
1136 /* Cannot allow a zero clen when the real frequency is non-zero.
1137 * Note: this means we are going to encode a fairly long code for
1138 * these unused entries. An improvement would be to implement a
1139 * NOTUSED code for when these are actually zero, but this requires
1140 * another data structure (evolve_zero) since we don't know when
1141 * evolve_freq[i] == 0... Briefly tested, looked worse. */
1142 for (i = 0; i < ALPHABET_SIZE; i += 1)
1143 {
1144 if (evolve_freq[gp][i] == 0 && real_freq[i] != 0)
1145 {
1146 evolve_freq[gp][i] = 1;
1147 evolve_zero[i] = 1;
1148 any_zeros = 1;
1149 }
1150 }
1151
1152 output_bits += djw_build_prefix (evolve_freq[gp], evolve_clen[gp],
1153 ALPHABET_SIZE, DJW_MAX_CODELEN);
1154
1155 /* The above faking of frequencies does not matter for the last
1156 * iteration, but we don't know when that is yet. However, it also
1157 * breaks the output_bits computation. Necessary for accuracy, and
1158 * for the (output_bits==0) assert after all bits are output. */
1159 if (any_zeros)
1160 {
1161 IF_DEBUG2 (usize_t save_total = output_bits);
1162
1163 for (i = 0; i < ALPHABET_SIZE; i += 1)
1164 {
1165 if (evolve_zero[i]) { output_bits -= evolve_clen[gp][i]; }
1166 }
1167
1168 IF_DEBUG2 (DP(RINT "evolve_zero reduced %u bits in group %u\n",
1169 save_total - output_bits, gp));
1170 }
1171 }
1172
1173 IF_DEBUG2(
1174 DP(RINT "pass %u total bits: %u group uses: ", niter, output_bits);
1175 for (gp = 0; gp < groups; gp += 1) { DP(RINT "%u ", gcount[gp]); }
1176 DP(RINT "\n");
1177 );
1178
1179 /* End iteration. */
1180
1181 IF_DEBUG2 (if (niter > 1 && best_bits < output_bits) {
1182 DP(RINT "iteration lost %u bits\n", output_bits - best_bits); });
1183
1184 if (niter == 1 || (niter < DJW_MAX_ITER &&
1185 (best_bits - output_bits) >= DJW_MIN_IMPROVEMENT))
1186 {
1187 best_bits = output_bits;
1188 goto repeat;
1189 }
1190
1191 /* Efficiency check. */
1192 if (output_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient)
1193 {
1194 goto nosecond;
1195 }
1196
1197 IF_DEBUG2 (DP(RINT "djw compression: %u -> %0.3f\n",
1198 input_bytes, output_bits / 8.0));
1199
1200 /* Encode: prefix */
1201 {
1202 uint8_t prefix_symbol[DJW_MAX_GROUPS * ALPHABET_SIZE];
1203 uint8_t prefix_mtfsym[DJW_MAX_GROUPS * ALPHABET_SIZE];
1204 uint8_t prefix_repcnt[DJW_MAX_GROUPS * ALPHABET_SIZE];
1205 djw_prefix prefix;
1206
1207 prefix.symbol = prefix_symbol;
1208 prefix.mtfsym = prefix_mtfsym;
1209 prefix.repcnt = prefix_repcnt;
1210
1211 djw_compute_multi_prefix (groups, evolve_clen, & prefix);
1212 if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix)))
1213 {
1214 goto failure;
1215 }
1216 }
1217
1218 /* Encode: selector frequencies */
1219 {
1220 /* DJW_MAX_GROUPS +2 is for RUN_0, RUN_1 symbols. */
1221 djw_weight gbest_freq[DJW_MAX_GROUPS+2];
1222 djw_prefix gbest_prefix;
1223 usize_t i;
1224
1225 gbest_prefix.scount = gbest_no;
1226 gbest_prefix.symbol = gbest;
1227 gbest_prefix.mtfsym = gbest_mtf;
1228
1229 djw_compute_selector_1_2 (& gbest_prefix, groups, gbest_freq);
1230
1231 select_bits =
1232 djw_build_prefix (gbest_freq, gbest_clen, groups+1, DJW_MAX_GBCLEN);
1233 djw_build_codes (gbest_code, gbest_clen, groups+1, DJW_MAX_GBCLEN);
1234
1235 for (i = 0; i < groups+1; i += 1)
1236 {
1237 if ((ret = xd3_encode_bits (stream, & output, & bstate,
1238 DJW_GBCLEN_BITS, gbest_clen[i])))
1239 {
1240 goto failure;
1241 }
1242 }
1243
1244 for (i = 0; i < gbest_prefix.mcount; i += 1)
1245 {
1246 usize_t gp_mtf = gbest_mtf[i];
1247 usize_t gp_sel_bits = gbest_clen[gp_mtf];
1248 usize_t gp_sel_code = gbest_code[gp_mtf];
1249
1250 XD3_ASSERT (gp_mtf < groups+1);
1251
1252 if ((ret = xd3_encode_bits (stream, & output, & bstate,
1253 gp_sel_bits, gp_sel_code)))
1254 {
1255 goto failure;
1256 }
1257
1258 IF_DEBUG (select_bits -= gp_sel_bits);
1259 }
1260
1261 XD3_ASSERT (select_bits == 0);
1262 }
1263
1264 /* Efficiency check. */
1265 if (output_bits + select_bits + (8 * output->next) +
1266 EFFICIENCY_BITS >= input_bits && ! cfg->inefficient)
1267 {
1268 goto nosecond;
1269 }
1270
1271 /* Encode: data */
1272 {
1273 usize_t evolve_code[DJW_MAX_GROUPS][ALPHABET_SIZE];
1274 usize_t sector = 0;
1275
1276 /* Build code tables for each group. */
1277 for (gp = 0; gp < groups; gp += 1)
1278 {
1279 djw_build_codes (evolve_code[gp], evolve_clen[gp],
1280 ALPHABET_SIZE, DJW_MAX_CODELEN);
1281 }
1282
1283 /* Now loop over the input. */
1284 in = input;
1285 p = in->base;
1286
1287 do
1288 {
1289 /* For each sector. */
1290 usize_t gp_best = gbest[sector];
1291 usize_t *gp_codes = evolve_code[gp_best];
1292 uint8_t *gp_clens = evolve_clen[gp_best];
1293
1294 XD3_ASSERT (sector < gbest_no);
1295
1296 sector += 1;
1297
1298 /* Encode the sector data. */
1299 for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1)
1300 {
1301 usize_t sym = *p;
1302 usize_t bits = gp_clens[sym];
1303 usize_t code = gp_codes[sym];
1304
1305 IF_DEBUG (output_bits -= bits);
1306
1307 if ((ret = xd3_encode_bits (stream, & output, & bstate,
1308 bits, code)))
1309 {
1310 goto failure;
1311 }
1312
1313 GP_PAGE ();
1314 }
1315 }
1316 while (in != NULL);
1317
1318 XD3_ASSERT (select_bits == 0);
1319 XD3_ASSERT (output_bits == 0);
1320 }
1321 }
1322
1323 ret = xd3_flush_bits (stream, & output, & bstate);
1324
1325 if (0)
1326 {
1327 nosecond:
1328 stream->msg = "secondary compression was inefficient";
1329 ret = XD3_NOSECOND;
1330 }
1331
1332 failure:
1333
1334 xd3_free (stream, gbest);
1335 xd3_free (stream, gbest_mtf);
1336 return ret;
1337 }
1338 #endif /* XD3_ENCODER */
1339
1340 /*********************************************************************/
1341 /* DECODE */
1342 /*********************************************************************/
1343
1344 static void
1345 djw_build_decoder (xd3_stream *stream,
1346 usize_t asize,
1347 usize_t abs_max,
1348 const uint8_t *clen,
1349 uint8_t *inorder,
1350 usize_t *base,
1351 usize_t *limit,
1352 usize_t *min_clenp,
1353 usize_t *max_clenp)
1354 {
1355 usize_t i, l;
1356 const uint8_t *ci;
1357 usize_t nr_clen [DJW_TOTAL_CODES];
1358 usize_t tmp_base[DJW_TOTAL_CODES];
1359 usize_t min_clen;
1360 usize_t max_clen;
1361
1362 /* Assumption: the two temporary arrays are large enough to hold abs_max. */
1363 XD3_ASSERT (abs_max <= DJW_MAX_CODELEN);
1364
1365 /* This looks something like the start of zlib's inftrees.c */
1366 memset (nr_clen, 0, sizeof (nr_clen[0]) * (abs_max+1));
1367
1368 /* Count number of each code length */
1369 i = asize;
1370 ci = clen;
1371 do
1372 {
1373 /* Caller _must_ check that values are in-range. Most of the time the
1374 * caller decodes a specific number of bits, which imply the max value,
1375 * and the other time the caller decodes a huffman value, which must be
1376 * in-range. Therefore, its an assertion and this function cannot
1377 * otherwise fail. */
1378 XD3_ASSERT (*ci <= abs_max);
1379
1380 nr_clen[*ci++]++;
1381 }
1382 while (--i != 0);
1383
1384 /* Compute min, max. */
1385 for (i = 1; i <= abs_max; i += 1) { if (nr_clen[i]) { break; } }
1386 min_clen = i;
1387 for (i = abs_max; i != 0; i -= 1) { if (nr_clen[i]) { break; } }
1388 max_clen = i;
1389
1390 /* Fill the BASE, LIMIT table. */
1391 tmp_base[min_clen] = 0;
1392 base[min_clen] = 0;
1393 limit[min_clen] = nr_clen[min_clen] - 1;
1394 for (i = min_clen + 1; i <= max_clen; i += 1)
1395 {
1396 usize_t last_limit = ((limit[i-1] + 1) << 1);
1397 tmp_base[i] = tmp_base[i-1] + nr_clen[i-1];
1398 limit[i] = last_limit + nr_clen[i] - 1;
1399 base[i] = last_limit - tmp_base[i];
1400 }
1401
1402 /* Fill the inorder array, canonically ordered codes. */
1403 ci = clen;
1404 for (i = 0; i < asize; i += 1)
1405 {
1406 if ((l = *ci++) != 0)
1407 {
1408 inorder[tmp_base[l]++] = i;
1409 }
1410 }
1411
1412 *min_clenp = min_clen;
1413 *max_clenp = max_clen;
1414 }
1415
1416 static inline int
1417 djw_decode_symbol (xd3_stream *stream,
1418 bit_state *bstate,
1419 const uint8_t **input,
1420 const uint8_t *input_end,
1421 const uint8_t *inorder,
1422 const usize_t *base,
1423 const usize_t *limit,
1424 usize_t min_clen,
1425 usize_t max_clen,
1426 usize_t *sym,
1427 usize_t max_sym)
1428 {
1429 usize_t code = 0;
1430 usize_t bits = 0;
1431
1432 /* OPT: Supposedly a small lookup table improves speed here... */
1433
1434 /* Code outline is similar to xd3_decode_bits... */
1435 if (bstate->cur_mask == 0x100) { goto next_byte; }
1436
1437 for (;;)
1438 {
1439 do
1440 {
1441 if (bits == max_clen) { goto corrupt; }
1442
1443 bits += 1;
1444 code = (code << 1);
1445
1446 if (bstate->cur_byte & bstate->cur_mask) { code |= 1; }
1447
1448 bstate->cur_mask <<= 1;
1449
1450 if (bits >= min_clen && code <= limit[bits]) { goto done; }
1451 }
1452 while (bstate->cur_mask != 0x100);
1453
1454 next_byte:
1455
1456 if (*input == input_end)
1457 {
1458 stream->msg = "secondary decoder end of input";
1459 return XD3_INVALID_INPUT;
1460 }
1461
1462 bstate->cur_byte = *(*input)++;
1463 bstate->cur_mask = 1;
1464 }
1465
1466 done:
1467
1468 if (base[bits] <= code)
1469 {
1470 usize_t offset = code - base[bits];
1471
1472 if (offset <= max_sym)
1473 {
1474 IF_DEBUG2 (DP(RINT "(j) %u ", code));
1475 *sym = inorder[offset];
1476 return 0;
1477 }
1478 }
1479
1480 corrupt:
1481 stream->msg = "secondary decoder invalid code";
1482 return XD3_INVALID_INPUT;
1483 }
1484
1485 static int
1486 djw_decode_clclen (xd3_stream *stream,
1487 bit_state *bstate,
1488 const uint8_t **input,
1489 const uint8_t *input_end,
1490 uint8_t *cl_inorder,
1491 usize_t *cl_base,
1492 usize_t *cl_limit,
1493 usize_t *cl_minlen,
1494 usize_t *cl_maxlen,
1495 uint8_t *cl_mtf)
1496 {
1497 int ret;
1498 uint8_t cl_clen[DJW_TOTAL_CODES];
1499 usize_t num_codes, value;
1500 usize_t i;
1501
1502 /* How many extra code lengths to encode. */
1503 if ((ret = xd3_decode_bits (stream, bstate, input,
1504 input_end, DJW_EXTRA_CODE_BITS, & num_codes)))
1505 {
1506 return ret;
1507 }
1508
1509 num_codes += DJW_EXTRA_12OFFSET;
1510
1511 /* Read num_codes. */
1512 for (i = 0; i < num_codes; i += 1)
1513 {
1514 if ((ret = xd3_decode_bits (stream, bstate, input,
1515 input_end, DJW_CLCLEN_BITS, & value)))
1516 {
1517 return ret;
1518 }
1519
1520 cl_clen[i] = value;
1521 }
1522
1523 /* Set the rest to zero. */
1524 for (; i < DJW_TOTAL_CODES; i += 1) { cl_clen[i] = 0; }
1525
1526 /* No need to check for in-range clen values, because: */
1527 XD3_ASSERT (1 << DJW_CLCLEN_BITS == DJW_MAX_CLCLEN + 1);
1528
1529 /* Build the code-length decoder. */
1530 djw_build_decoder (stream, DJW_TOTAL_CODES, DJW_MAX_CLCLEN,
1531 cl_clen, cl_inorder, cl_base,
1532 cl_limit, cl_minlen, cl_maxlen);
1533
1534 /* Initialize the MTF state. */
1535 djw_init_clen_mtf_1_2 (cl_mtf);
1536
1537 return 0;
1538 }
1539
1540 static inline int
1541 djw_decode_1_2 (xd3_stream *stream,
1542 bit_state *bstate,
1543 const uint8_t **input,
1544 const uint8_t *input_end,
1545 const uint8_t *inorder,
1546 const usize_t *base,
1547 const usize_t *limit,
1548 const usize_t *minlen,
1549 const usize_t *maxlen,
1550 uint8_t *mtfvals,
1551 usize_t elts,
1552 usize_t skip_offset,
1553 uint8_t *values)
1554 {
1555 usize_t n = 0, rep = 0, mtf = 0, s = 0;
1556 int ret;
1557
1558 while (n < elts)
1559 {
1560 /* Special case inside generic code: CLEN only: If not the first group,
1561 * we already know the zero frequencies. */
1562 if (skip_offset != 0 && n >= skip_offset && values[n-skip_offset] == 0)
1563 {
1564 values[n++] = 0;
1565 continue;
1566 }
1567
1568 /* Repeat last symbol. */
1569 if (rep != 0)
1570 {
1571 values[n++] = mtfvals[0];
1572 rep -= 1;
1573 continue;
1574 }
1575
1576 /* Symbol following last repeat code. */
1577 if (mtf != 0)
1578 {
1579 usize_t sym = djw_update_mtf (mtfvals, mtf);
1580 values[n++] = sym;
1581 mtf = 0;
1582 continue;
1583 }
1584
1585 /* Decode next symbol/repeat code. */
1586 if ((ret = djw_decode_symbol (stream, bstate, input, input_end,
1587 inorder, base, limit, *minlen, *maxlen,
1588 & mtf, DJW_TOTAL_CODES))) { return ret; }
1589
1590 if (mtf <= RUN_1)
1591 {
1592 /* Repetition. */
1593 rep = ((mtf + 1) << s);
1594 mtf = 0;
1595 s += 1;
1596 }
1597 else
1598 {
1599 /* Remove the RUN_1 MTF offset. */
1600 mtf -= 1;
1601 s = 0;
1602 }
1603 }
1604
1605 /* If (rep != 0) there were too many codes received. */
1606 if (rep != 0)
1607 {
1608 stream->msg = "secondary decoder invalid repeat code";
1609 return XD3_INVALID_INPUT;
1610 }
1611
1612 return 0;
1613 }
1614
1615 static inline int
1616 djw_decode_prefix (xd3_stream *stream,
1617 bit_state *bstate,
1618 const uint8_t **input,
1619 const uint8_t *input_end,
1620 const uint8_t *cl_inorder,
1621 const usize_t *cl_base,
1622 const usize_t *cl_limit,
1623 const usize_t *cl_minlen,
1624 const usize_t *cl_maxlen,
1625 uint8_t *cl_mtf,
1626 usize_t groups,
1627 uint8_t *clen)
1628 {
1629 return djw_decode_1_2 (stream, bstate, input, input_end,
1630 cl_inorder, cl_base, cl_limit,
1631 cl_minlen, cl_maxlen, cl_mtf,
1632 ALPHABET_SIZE * groups, ALPHABET_SIZE, clen);
1633 }
1634
1635 static int
1636 xd3_decode_huff (xd3_stream *stream,
1637 djw_stream *h,
1638 const uint8_t **input_pos,
1639 const uint8_t *const input_end,
1640 uint8_t **output_pos,
1641 const uint8_t *const output_end)
1642 {
1643 const uint8_t *input = *input_pos;
1644 uint8_t *output = *output_pos;
1645 bit_state bstate = BIT_STATE_DECODE_INIT;
1646 uint8_t *sel_group = NULL;
1647 usize_t groups, gp;
1648 usize_t output_bytes = (usize_t)(output_end - output);
1649 usize_t sector_size;
1650 usize_t sectors;
1651 int ret;
1652
1653 /* Invalid input. */
1654 if (output_bytes == 0)
1655 {
1656 stream->msg = "secondary decoder invalid input";
1657 return XD3_INVALID_INPUT;
1658 }
1659
1660 /* Decode: number of groups */
1661 if ((ret = xd3_decode_bits (stream, & bstate, & input,
1662 input_end, DJW_GROUP_BITS, & groups)))
1663 {
1664 goto fail;
1665 }
1666
1667 groups += 1;
1668
1669 if (groups > 1)
1670 {
1671 /* Decode: group size */
1672 if ((ret = xd3_decode_bits (stream, & bstate, & input,
1673 input_end, DJW_SECTORSZ_BITS,
1674 & sector_size))) { goto fail; }
1675
1676 sector_size = (sector_size + 1) * DJW_SECTORSZ_MULT;
1677 }
1678 else
1679 {
1680 /* Default for groups == 1 */
1681 sector_size = output_bytes;
1682 }
1683
1684 sectors = 1 + (output_bytes - 1) / sector_size;
1685
1686 /* TODO: In the case of groups==1, lots of extra stack space gets used here.
1687 * Could dynamically allocate this memory, which would help with excess
1688 * parameter passing, too. Passing too many parameters in this file,
1689 * simplify it! */
1690
1691 /* Outer scope: per-group symbol decoder tables. */
1692 {
1693 uint8_t inorder[DJW_MAX_GROUPS][ALPHABET_SIZE];
1694 usize_t base [DJW_MAX_GROUPS][DJW_TOTAL_CODES];
1695 usize_t limit [DJW_MAX_GROUPS][DJW_TOTAL_CODES];
1696 usize_t minlen [DJW_MAX_GROUPS];
1697 usize_t maxlen [DJW_MAX_GROUPS];
1698
1699 /* Nested scope: code length decoder tables. */
1700 {
1701 uint8_t clen [DJW_MAX_GROUPS][ALPHABET_SIZE];
1702 uint8_t cl_inorder[DJW_TOTAL_CODES];
1703 usize_t cl_base [DJW_MAX_CLCLEN+2];
1704 usize_t cl_limit [DJW_MAX_CLCLEN+2];
1705 uint8_t cl_mtf [DJW_TOTAL_CODES];
1706 usize_t cl_minlen;
1707 usize_t cl_maxlen;
1708
1709 /* Compute the code length decoder. */
1710 if ((ret = djw_decode_clclen (stream, & bstate, & input, input_end,
1711 cl_inorder, cl_base, cl_limit, & cl_minlen,
1712 & cl_maxlen, cl_mtf))) { goto fail; }
1713
1714 /* Now decode each group decoder. */
1715 if ((ret = djw_decode_prefix (stream, & bstate, & input, input_end,
1716 cl_inorder, cl_base, cl_limit,
1717 & cl_minlen, & cl_maxlen, cl_mtf,
1718 groups, clen[0]))) { goto fail; }
1719
1720 /* Prepare the actual decoding tables. */
1721 for (gp = 0; gp < groups; gp += 1)
1722 {
1723 djw_build_decoder (stream, ALPHABET_SIZE, DJW_MAX_CODELEN,
1724 clen[gp], inorder[gp], base[gp], limit[gp],
1725 & minlen[gp], & maxlen[gp]);
1726 }
1727 }
1728
1729 /* Decode: selector clens. */
1730 {
1731 uint8_t sel_inorder[DJW_MAX_GROUPS+2];
1732 usize_t sel_base [DJW_MAX_GBCLEN+2];
1733 usize_t sel_limit [DJW_MAX_GBCLEN+2];
1734 uint8_t sel_mtf [DJW_MAX_GROUPS+2];
1735 usize_t sel_minlen;
1736 usize_t sel_maxlen;
1737
1738 /* Setup group selection. */
1739 if (groups > 1)
1740 {
1741 uint8_t sel_clen[DJW_MAX_GROUPS+1];
1742
1743 for (gp = 0; gp < groups+1; gp += 1)
1744 {
1745 usize_t value;
1746
1747 if ((ret = xd3_decode_bits (stream, & bstate, & input,
1748 input_end, DJW_GBCLEN_BITS,
1749 & value))) { goto fail; }
1750
1751 sel_clen[gp] = value;
1752 sel_mtf[gp] = gp;
1753 }
1754
1755 if ((sel_group = (uint8_t*) xd3_alloc (stream, sectors, 1)) == NULL)
1756 {
1757 ret = ENOMEM;
1758 goto fail;
1759 }
1760
1761 djw_build_decoder (stream, groups+1, DJW_MAX_GBCLEN, sel_clen,
1762 sel_inorder, sel_base, sel_limit,
1763 & sel_minlen, & sel_maxlen);
1764
1765 if ((ret = djw_decode_1_2 (stream, & bstate, & input, input_end,
1766 sel_inorder, sel_base,
1767 sel_limit, & sel_minlen,
1768 & sel_maxlen, sel_mtf,
1769 sectors, 0, sel_group))) { goto fail; }
1770 }
1771
1772 /* Now decode each sector. */
1773 {
1774 /* Initialize for (groups==1) case. */
1775 uint8_t *gp_inorder = inorder[0];
1776 usize_t *gp_base = base[0];
1777 usize_t *gp_limit = limit[0];
1778 usize_t gp_minlen = minlen[0];
1779 usize_t gp_maxlen = maxlen[0];
1780 usize_t c;
1781
1782 for (c = 0; c < sectors; c += 1)
1783 {
1784 usize_t n;
1785
1786 if (groups >= 2)
1787 {
1788 gp = sel_group[c];
1789
1790 XD3_ASSERT (gp < groups);
1791
1792 gp_inorder = inorder[gp];
1793 gp_base = base[gp];
1794 gp_limit = limit[gp];
1795 gp_minlen = minlen[gp];
1796 gp_maxlen = maxlen[gp];
1797 }
1798
1799 if (output_end < output)
1800 {
1801 stream->msg = "secondary decoder invalid input";
1802 return XD3_INVALID_INPUT;
1803 }
1804
1805 /* Decode next sector. */
1806 n = xd3_min (sector_size, (usize_t) (output_end - output));
1807
1808 do
1809 {
1810 usize_t sym;
1811
1812 if ((ret = djw_decode_symbol (stream, & bstate,
1813 & input, input_end,
1814 gp_inorder, gp_base,
1815 gp_limit, gp_minlen, gp_maxlen,
1816 & sym, ALPHABET_SIZE)))
1817 {
1818 goto fail;
1819 }
1820
1821 *output++ = sym;
1822 }
1823 while (--n);
1824 }
1825 }
1826 }
1827 }
1828
1829 IF_REGRESSION (if ((ret = xd3_test_clean_bits (stream, & bstate)))
1830 { goto fail; });
1831 XD3_ASSERT (ret == 0);
1832
1833 fail:
1834 xd3_free (stream, sel_group);
1835
1836 (*input_pos) = input;
1837 (*output_pos) = output;
1838 return ret;
1839 }
1840
1841 #endif