"Fossies" - the Fresh Open Source Software Archive 
Member "zsync-0.6.2/zlib/trees.c" (16 Sep 2010, 44041 Bytes) of package /linux/privat/old/zsync-0.6.2.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "trees.c" see the
Fossies "Dox" file reference documentation.
1 /* trees.c -- output deflated data using Huffman coding
2 * Copyright (C) 1995-2003 Jean-loup Gailly
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6 /*
7 * ALGORITHM
8 *
9 * The "deflation" process uses several Huffman trees. The more
10 * common source values are represented by shorter bit sequences.
11 *
12 * Each code tree is stored in a compressed form which is itself
13 * a Huffman encoding of the lengths of all the code strings (in
14 * ascending order by source values). The actual code strings are
15 * reconstructed from the lengths in the inflate process, as described
16 * in the deflate specification.
17 *
18 * REFERENCES
19 *
20 * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification".
21 * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc
22 *
23 * Storer, James A.
24 * Data Compression: Methods and Theory, pp. 49-50.
25 * Computer Science Press, 1988. ISBN 0-7167-8156-5.
26 *
27 * Sedgewick, R.
28 * Algorithms, p290.
29 * Addison-Wesley, 1983. ISBN 0-201-06672-6.
30 */
31
32 /* @(#) $Id$ */
33
34 /* #define GEN_TREES_H */
35
36 #include "deflate.h"
37
38 #ifdef DEBUG
39 # include <ctype.h>
40 #endif
41
42 /* ===========================================================================
43 * Constants
44 */
45
46 #define MAX_BL_BITS 7
47 /* Bit length codes must not exceed MAX_BL_BITS bits */
48
49 #define END_BLOCK 256
50 /* end of block literal code */
51
52 #define REP_3_6 16
53 /* repeat previous bit length 3-6 times (2 bits of repeat count) */
54
55 #define REPZ_3_10 17
56 /* repeat a zero length 3-10 times (3 bits of repeat count) */
57
58 #define REPZ_11_138 18
59 /* repeat a zero length 11-138 times (7 bits of repeat count) */
60
61 local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */
62 = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};
63
64 local const int extra_dbits[D_CODES] /* extra bits for each distance code */
65 = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
66
67 local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */
68 = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};
69
70 local const uch bl_order[BL_CODES]
71 = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
72 /* The lengths of the bit length codes are sent in order of decreasing
73 * probability, to avoid transmitting the lengths for unused bit length codes.
74 */
75
76 #define Buf_size (8 * 2*sizeof(char))
77 /* Number of bits used within bi_buf. (bi_buf might be implemented on
78 * more than 16 bits on some systems.)
79 */
80
81 /* ===========================================================================
82 * Local data. These are initialized only once.
83 */
84
85 #define DIST_CODE_LEN 512 /* see definition of array dist_code below */
86
87 #if defined(GEN_TREES_H) || !defined(STDC)
88 /* non ANSI compilers may not accept trees.h */
89
90 local ct_data static_ltree[L_CODES+2];
91 /* The static literal tree. Since the bit lengths are imposed, there is no
92 * need for the L_CODES extra codes used during heap construction. However
93 * The codes 286 and 287 are needed to build a canonical tree (see _tr_init
94 * below).
95 */
96
97 local ct_data static_dtree[D_CODES];
98 /* The static distance tree. (Actually a trivial tree since all codes use
99 * 5 bits.)
100 */
101
102 uch _dist_code[DIST_CODE_LEN];
103 /* Distance codes. The first 256 values correspond to the distances
104 * 3 .. 258, the last 256 values correspond to the top 8 bits of
105 * the 15 bit distances.
106 */
107
108 uch _length_code[MAX_MATCH-MIN_MATCH+1];
109 /* length code for each normalized match length (0 == MIN_MATCH) */
110
111 local int base_length[LENGTH_CODES];
112 /* First normalized length for each code (0 = MIN_MATCH) */
113
114 local int base_dist[D_CODES];
115 /* First normalized distance for each code (0 = distance of 1) */
116
117 #else
118 # include "trees.h"
119 #endif /* GEN_TREES_H */
120
121 struct static_tree_desc_s {
122 const ct_data *static_tree; /* static tree or NULL */
123 const intf *extra_bits; /* extra bits for each code or NULL */
124 int extra_base; /* base index for extra_bits */
125 int elems; /* max number of elements in the tree */
126 int max_length; /* max bit length for the codes */
127 };
128
129 local static_tree_desc static_l_desc =
130 {static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS};
131
132 local static_tree_desc static_d_desc =
133 {static_dtree, extra_dbits, 0, D_CODES, MAX_BITS};
134
135 local static_tree_desc static_bl_desc =
136 {(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS};
137
138 /* ===========================================================================
139 * Local (static) routines in this file.
140 */
141
142 local void tr_static_init OF((void));
143 local void init_block OF((deflate_state *s));
144 local void pqdownheap OF((deflate_state *s, ct_data *tree, int k));
145 local void gen_bitlen OF((deflate_state *s, tree_desc *desc));
146 local void gen_codes OF((ct_data *tree, int max_code, ushf *bl_count));
147 local void build_tree OF((deflate_state *s, tree_desc *desc));
148 local void scan_tree OF((deflate_state *s, ct_data *tree, int max_code));
149 local void send_tree OF((deflate_state *s, ct_data *tree, int max_code));
150 local int build_bl_tree OF((deflate_state *s));
151 local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes,
152 int blcodes));
153 local void compress_block OF((deflate_state *s, ct_data *ltree,
154 ct_data *dtree));
155 local void set_data_type OF((deflate_state *s));
156 local unsigned bi_reverse OF((unsigned value, int length));
157 local void bi_windup OF((deflate_state *s));
158 local void bi_flush OF((deflate_state *s));
159 local void copy_block OF((deflate_state *s, charf *buf, unsigned len,
160 int header));
161
162 #ifdef GEN_TREES_H
163 local void gen_trees_header OF((void));
164 #endif
165
166 #ifndef DEBUG
167 # define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len)
168 /* Send a code of the given tree. c and tree must not have side effects */
169
170 #else /* DEBUG */
171 # define send_code(s, c, tree) \
172 { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \
173 send_bits(s, tree[c].Code, tree[c].Len); }
174 #endif
175
176 /* ===========================================================================
177 * Output a short LSB first on the stream.
178 * IN assertion: there is enough room in pendingBuf.
179 */
180 #define put_short(s, w) { \
181 put_byte(s, (uch)((w) & 0xff)); \
182 put_byte(s, (uch)((ush)(w) >> 8)); \
183 }
184
185 /* ===========================================================================
186 * Send a value on a given number of bits.
187 * IN assertion: length <= 16 and value fits in length bits.
188 */
189 #ifdef DEBUG
190 local void send_bits OF((deflate_state *s, int value, int length));
191
192 local void send_bits(s, value, length)
193 deflate_state *s;
194 int value; /* value to send */
195 int length; /* number of bits */
196 {
197 Tracevv((stderr," l %2d v %4x ", length, value));
198 Assert(length > 0 && length <= 15, "invalid length");
199 s->bits_sent += (ulg)length;
200
201 /* If not enough room in bi_buf, use (valid) bits from bi_buf and
202 * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid))
203 * unused bits in value.
204 */
205 if (s->bi_valid > (int)Buf_size - length) {
206 s->bi_buf |= (value << s->bi_valid);
207 put_short(s, s->bi_buf);
208 s->bi_buf = (ush)value >> (Buf_size - s->bi_valid);
209 s->bi_valid += length - Buf_size;
210 } else {
211 s->bi_buf |= value << s->bi_valid;
212 s->bi_valid += length;
213 }
214 }
215 #else /* !DEBUG */
216
217 #define send_bits(s, value, length) \
218 { int len = length;\
219 if (s->bi_valid > (int)Buf_size - len) {\
220 int val = value;\
221 s->bi_buf |= (val << s->bi_valid);\
222 put_short(s, s->bi_buf);\
223 s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\
224 s->bi_valid += len - Buf_size;\
225 } else {\
226 s->bi_buf |= (value) << s->bi_valid;\
227 s->bi_valid += len;\
228 }\
229 }
230 #endif /* DEBUG */
231
232
233 /* the arguments must not have side effects */
234
235 /* ===========================================================================
236 * Initialize the various 'constant' tables.
237 */
238 local void tr_static_init()
239 {
240 #if defined(GEN_TREES_H) || !defined(STDC)
241 static int static_init_done = 0;
242 int n; /* iterates over tree elements */
243 int bits; /* bit counter */
244 int length; /* length value */
245 int code; /* code value */
246 int dist; /* distance index */
247 ush bl_count[MAX_BITS+1];
248 /* number of codes at each bit length for an optimal tree */
249
250 if (static_init_done) return;
251
252 /* For some embedded targets, global variables are not initialized: */
253 static_l_desc.static_tree = static_ltree;
254 static_l_desc.extra_bits = extra_lbits;
255 static_d_desc.static_tree = static_dtree;
256 static_d_desc.extra_bits = extra_dbits;
257 static_bl_desc.extra_bits = extra_blbits;
258
259 /* Initialize the mapping length (0..255) -> length code (0..28) */
260 length = 0;
261 for (code = 0; code < LENGTH_CODES-1; code++) {
262 base_length[code] = length;
263 for (n = 0; n < (1<<extra_lbits[code]); n++) {
264 _length_code[length++] = (uch)code;
265 }
266 }
267 Assert (length == 256, "tr_static_init: length != 256");
268 /* Note that the length 255 (match length 258) can be represented
269 * in two different ways: code 284 + 5 bits or code 285, so we
270 * overwrite length_code[255] to use the best encoding:
271 */
272 _length_code[length-1] = (uch)code;
273
274 /* Initialize the mapping dist (0..32K) -> dist code (0..29) */
275 dist = 0;
276 for (code = 0 ; code < 16; code++) {
277 base_dist[code] = dist;
278 for (n = 0; n < (1<<extra_dbits[code]); n++) {
279 _dist_code[dist++] = (uch)code;
280 }
281 }
282 Assert (dist == 256, "tr_static_init: dist != 256");
283 dist >>= 7; /* from now on, all distances are divided by 128 */
284 for ( ; code < D_CODES; code++) {
285 base_dist[code] = dist << 7;
286 for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) {
287 _dist_code[256 + dist++] = (uch)code;
288 }
289 }
290 Assert (dist == 256, "tr_static_init: 256+dist != 512");
291
292 /* Construct the codes of the static literal tree */
293 for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0;
294 n = 0;
295 while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++;
296 while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++;
297 while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++;
298 while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++;
299 /* Codes 286 and 287 do not exist, but we must include them in the
300 * tree construction to get a canonical Huffman tree (longest code
301 * all ones)
302 */
303 gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count);
304
305 /* The static distance tree is trivial: */
306 for (n = 0; n < D_CODES; n++) {
307 static_dtree[n].Len = 5;
308 static_dtree[n].Code = bi_reverse((unsigned)n, 5);
309 }
310 static_init_done = 1;
311
312 # ifdef GEN_TREES_H
313 gen_trees_header();
314 # endif
315 #endif /* defined(GEN_TREES_H) || !defined(STDC) */
316 }
317
318 /* ===========================================================================
319 * Genererate the file trees.h describing the static trees.
320 */
321 #ifdef GEN_TREES_H
322 # ifndef DEBUG
323 # include <stdio.h>
324 # endif
325
326 # define SEPARATOR(i, last, width) \
327 ((i) == (last)? "\n};\n\n" : \
328 ((i) % (width) == (width)-1 ? ",\n" : ", "))
329
330 void gen_trees_header()
331 {
332 FILE *header = fopen("trees.h", "w");
333 int i;
334
335 Assert (header != NULL, "Can't open trees.h");
336 fprintf(header,
337 "/* header created automatically with -DGEN_TREES_H */\n\n");
338
339 fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n");
340 for (i = 0; i < L_CODES+2; i++) {
341 fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code,
342 static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5));
343 }
344
345 fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n");
346 for (i = 0; i < D_CODES; i++) {
347 fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code,
348 static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5));
349 }
350
351 fprintf(header, "const uch _dist_code[DIST_CODE_LEN] = {\n");
352 for (i = 0; i < DIST_CODE_LEN; i++) {
353 fprintf(header, "%2u%s", _dist_code[i],
354 SEPARATOR(i, DIST_CODE_LEN-1, 20));
355 }
356
357 fprintf(header, "const uch _length_code[MAX_MATCH-MIN_MATCH+1]= {\n");
358 for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) {
359 fprintf(header, "%2u%s", _length_code[i],
360 SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20));
361 }
362
363 fprintf(header, "local const int base_length[LENGTH_CODES] = {\n");
364 for (i = 0; i < LENGTH_CODES; i++) {
365 fprintf(header, "%1u%s", base_length[i],
366 SEPARATOR(i, LENGTH_CODES-1, 20));
367 }
368
369 fprintf(header, "local const int base_dist[D_CODES] = {\n");
370 for (i = 0; i < D_CODES; i++) {
371 fprintf(header, "%5u%s", base_dist[i],
372 SEPARATOR(i, D_CODES-1, 10));
373 }
374
375 fclose(header);
376 }
377 #endif /* GEN_TREES_H */
378
379 /* ===========================================================================
380 * Initialize the tree data structures for a new zlib stream.
381 */
382 void _tr_init(s)
383 deflate_state *s;
384 {
385 tr_static_init();
386
387 s->l_desc.dyn_tree = s->dyn_ltree;
388 s->l_desc.stat_desc = &static_l_desc;
389
390 s->d_desc.dyn_tree = s->dyn_dtree;
391 s->d_desc.stat_desc = &static_d_desc;
392
393 s->bl_desc.dyn_tree = s->bl_tree;
394 s->bl_desc.stat_desc = &static_bl_desc;
395
396 s->bi_buf = 0;
397 s->bi_valid = 0;
398 s->last_eob_len = 8; /* enough lookahead for inflate */
399 #ifdef DEBUG
400 s->compressed_len = 0L;
401 s->bits_sent = 0L;
402 #endif
403
404 /* Initialize the first block of the first file: */
405 init_block(s);
406 }
407
408 /* ===========================================================================
409 * Initialize a new block.
410 */
411 local void init_block(s)
412 deflate_state *s;
413 {
414 int n; /* iterates over tree elements */
415
416 /* Initialize the trees. */
417 for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0;
418 for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0;
419 for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
420
421 s->dyn_ltree[END_BLOCK].Freq = 1;
422 s->opt_len = s->static_len = 0L;
423 s->last_lit = s->matches = 0;
424 }
425
426 #define SMALLEST 1
427 /* Index within the heap array of least frequent node in the Huffman tree */
428
429
430 /* ===========================================================================
431 * Remove the smallest element from the heap and recreate the heap with
432 * one less element. Updates heap and heap_len.
433 */
434 #define pqremove(s, tree, top) \
435 {\
436 top = s->heap[SMALLEST]; \
437 s->heap[SMALLEST] = s->heap[s->heap_len--]; \
438 pqdownheap(s, tree, SMALLEST); \
439 }
440
441 /* ===========================================================================
442 * Compares to subtrees, using the tree depth as tie breaker when
443 * the subtrees have equal frequency. This minimizes the worst case length.
444 */
445 #define smaller(tree, n, m, depth) \
446 (tree[n].Freq < tree[m].Freq || \
447 (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m]))
448
449 /* ===========================================================================
450 * Restore the heap property by moving down the tree starting at node k,
451 * exchanging a node with the smallest of its two sons if necessary, stopping
452 * when the heap property is re-established (each father smaller than its
453 * two sons).
454 */
455 local void pqdownheap(s, tree, k)
456 deflate_state *s;
457 ct_data *tree; /* the tree to restore */
458 int k; /* node to move down */
459 {
460 int v = s->heap[k];
461 int j = k << 1; /* left son of k */
462 while (j <= s->heap_len) {
463 /* Set j to the smallest of the two sons: */
464 if (j < s->heap_len &&
465 smaller(tree, s->heap[j+1], s->heap[j], s->depth)) {
466 j++;
467 }
468 /* Exit if v is smaller than both sons */
469 if (smaller(tree, v, s->heap[j], s->depth)) break;
470
471 /* Exchange v with the smallest son */
472 s->heap[k] = s->heap[j]; k = j;
473
474 /* And continue down the tree, setting j to the left son of k */
475 j <<= 1;
476 }
477 s->heap[k] = v;
478 }
479
480 /* ===========================================================================
481 * Compute the optimal bit lengths for a tree and update the total bit length
482 * for the current block.
483 * IN assertion: the fields freq and dad are set, heap[heap_max] and
484 * above are the tree nodes sorted by increasing frequency.
485 * OUT assertions: the field len is set to the optimal bit length, the
486 * array bl_count contains the frequencies for each bit length.
487 * The length opt_len is updated; static_len is also updated if stree is
488 * not null.
489 */
490 local void gen_bitlen(s, desc)
491 deflate_state *s;
492 tree_desc *desc; /* the tree descriptor */
493 {
494 ct_data *tree = desc->dyn_tree;
495 int max_code = desc->max_code;
496 const ct_data *stree = desc->stat_desc->static_tree;
497 const intf *extra = desc->stat_desc->extra_bits;
498 int base = desc->stat_desc->extra_base;
499 int max_length = desc->stat_desc->max_length;
500 int h; /* heap index */
501 int n, m; /* iterate over the tree elements */
502 int bits; /* bit length */
503 int xbits; /* extra bits */
504 ush f; /* frequency */
505 int overflow = 0; /* number of elements with bit length too large */
506
507 for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0;
508
509 /* In a first pass, compute the optimal bit lengths (which may
510 * overflow in the case of the bit length tree).
511 */
512 tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */
513
514 for (h = s->heap_max+1; h < HEAP_SIZE; h++) {
515 n = s->heap[h];
516 bits = tree[tree[n].Dad].Len + 1;
517 if (bits > max_length) bits = max_length, overflow++;
518 tree[n].Len = (ush)bits;
519 /* We overwrite tree[n].Dad which is no longer needed */
520
521 if (n > max_code) continue; /* not a leaf node */
522
523 s->bl_count[bits]++;
524 xbits = 0;
525 if (n >= base) xbits = extra[n-base];
526 f = tree[n].Freq;
527 s->opt_len += (ulg)f * (bits + xbits);
528 if (stree) s->static_len += (ulg)f * (stree[n].Len + xbits);
529 }
530 if (overflow == 0) return;
531
532 Trace((stderr,"\nbit length overflow\n"));
533 /* This happens for example on obj2 and pic of the Calgary corpus */
534
535 /* Find the first bit length which could increase: */
536 do {
537 bits = max_length-1;
538 while (s->bl_count[bits] == 0) bits--;
539 s->bl_count[bits]--; /* move one leaf down the tree */
540 s->bl_count[bits+1] += 2; /* move one overflow item as its brother */
541 s->bl_count[max_length]--;
542 /* The brother of the overflow item also moves one step up,
543 * but this does not affect bl_count[max_length]
544 */
545 overflow -= 2;
546 } while (overflow > 0);
547
548 /* Now recompute all bit lengths, scanning in increasing frequency.
549 * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
550 * lengths instead of fixing only the wrong ones. This idea is taken
551 * from 'ar' written by Haruhiko Okumura.)
552 */
553 for (bits = max_length; bits != 0; bits--) {
554 n = s->bl_count[bits];
555 while (n != 0) {
556 m = s->heap[--h];
557 if (m > max_code) continue;
558 if (tree[m].Len != (unsigned) bits) {
559 Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits));
560 s->opt_len += ((long)bits - (long)tree[m].Len)
561 *(long)tree[m].Freq;
562 tree[m].Len = (ush)bits;
563 }
564 n--;
565 }
566 }
567 }
568
569 /* ===========================================================================
570 * Generate the codes for a given tree and bit counts (which need not be
571 * optimal).
572 * IN assertion: the array bl_count contains the bit length statistics for
573 * the given tree and the field len is set for all tree elements.
574 * OUT assertion: the field code is set for all tree elements of non
575 * zero code length.
576 */
577 local void gen_codes (tree, max_code, bl_count)
578 ct_data *tree; /* the tree to decorate */
579 int max_code; /* largest code with non zero frequency */
580 ushf *bl_count; /* number of codes at each bit length */
581 {
582 ush next_code[MAX_BITS+1]; /* next code value for each bit length */
583 ush code = 0; /* running code value */
584 int bits; /* bit index */
585 int n; /* code index */
586
587 /* The distribution counts are first used to generate the code values
588 * without bit reversal.
589 */
590 for (bits = 1; bits <= MAX_BITS; bits++) {
591 next_code[bits] = code = (code + bl_count[bits-1]) << 1;
592 }
593 /* Check that the bit counts in bl_count are consistent. The last code
594 * must be all ones.
595 */
596 Assert (code + bl_count[MAX_BITS]-1 == (1<<MAX_BITS)-1,
597 "inconsistent bit counts");
598 Tracev((stderr,"\ngen_codes: max_code %d ", max_code));
599
600 for (n = 0; n <= max_code; n++) {
601 int len = tree[n].Len;
602 if (len == 0) continue;
603 /* Now reverse the bits */
604 tree[n].Code = bi_reverse(next_code[len]++, len);
605
606 Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",
607 n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1));
608 }
609 }
610
611 /* ===========================================================================
612 * Construct one Huffman tree and assigns the code bit strings and lengths.
613 * Update the total bit length for the current block.
614 * IN assertion: the field freq is set for all tree elements.
615 * OUT assertions: the fields len and code are set to the optimal bit length
616 * and corresponding code. The length opt_len is updated; static_len is
617 * also updated if stree is not null. The field max_code is set.
618 */
619 local void build_tree(s, desc)
620 deflate_state *s;
621 tree_desc *desc; /* the tree descriptor */
622 {
623 ct_data *tree = desc->dyn_tree;
624 const ct_data *stree = desc->stat_desc->static_tree;
625 int elems = desc->stat_desc->elems;
626 int n, m; /* iterate over heap elements */
627 int max_code = -1; /* largest code with non zero frequency */
628 int node; /* new node being created */
629
630 /* Construct the initial heap, with least frequent element in
631 * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
632 * heap[0] is not used.
633 */
634 s->heap_len = 0, s->heap_max = HEAP_SIZE;
635
636 for (n = 0; n < elems; n++) {
637 if (tree[n].Freq != 0) {
638 s->heap[++(s->heap_len)] = max_code = n;
639 s->depth[n] = 0;
640 } else {
641 tree[n].Len = 0;
642 }
643 }
644
645 /* The pkzip format requires that at least one distance code exists,
646 * and that at least one bit should be sent even if there is only one
647 * possible code. So to avoid special checks later on we force at least
648 * two codes of non zero frequency.
649 */
650 while (s->heap_len < 2) {
651 node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0);
652 tree[node].Freq = 1;
653 s->depth[node] = 0;
654 s->opt_len--; if (stree) s->static_len -= stree[node].Len;
655 /* node is 0 or 1 so it does not have extra bits */
656 }
657 desc->max_code = max_code;
658
659 /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
660 * establish sub-heaps of increasing lengths:
661 */
662 for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n);
663
664 /* Construct the Huffman tree by repeatedly combining the least two
665 * frequent nodes.
666 */
667 node = elems; /* next internal node of the tree */
668 do {
669 pqremove(s, tree, n); /* n = node of least frequency */
670 m = s->heap[SMALLEST]; /* m = node of next least frequency */
671
672 s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */
673 s->heap[--(s->heap_max)] = m;
674
675 /* Create a new node father of n and m */
676 tree[node].Freq = tree[n].Freq + tree[m].Freq;
677 s->depth[node] = (uch)((s->depth[n] >= s->depth[m] ?
678 s->depth[n] : s->depth[m]) + 1);
679 tree[n].Dad = tree[m].Dad = (ush)node;
680 #ifdef DUMP_BL_TREE
681 if (tree == s->bl_tree) {
682 fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)",
683 node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq);
684 }
685 #endif
686 /* and insert the new node in the heap */
687 s->heap[SMALLEST] = node++;
688 pqdownheap(s, tree, SMALLEST);
689
690 } while (s->heap_len >= 2);
691
692 s->heap[--(s->heap_max)] = s->heap[SMALLEST];
693
694 /* At this point, the fields freq and dad are set. We can now
695 * generate the bit lengths.
696 */
697 gen_bitlen(s, (tree_desc *)desc);
698
699 /* The field len is now set, we can generate the bit codes */
700 gen_codes ((ct_data *)tree, max_code, s->bl_count);
701 }
702
703 /* ===========================================================================
704 * Scan a literal or distance tree to determine the frequencies of the codes
705 * in the bit length tree.
706 */
707 local void scan_tree (s, tree, max_code)
708 deflate_state *s;
709 ct_data *tree; /* the tree to be scanned */
710 int max_code; /* and its largest code of non zero frequency */
711 {
712 int n; /* iterates over all tree elements */
713 int prevlen = -1; /* last emitted length */
714 int curlen; /* length of current code */
715 int nextlen = tree[0].Len; /* length of next code */
716 int count = 0; /* repeat count of the current code */
717 int max_count = 7; /* max repeat count */
718 int min_count = 4; /* min repeat count */
719
720 if (nextlen == 0) max_count = 138, min_count = 3;
721 tree[max_code+1].Len = (ush)0xffff; /* guard */
722
723 for (n = 0; n <= max_code; n++) {
724 curlen = nextlen; nextlen = tree[n+1].Len;
725 if (++count < max_count && curlen == nextlen) {
726 continue;
727 } else if (count < min_count) {
728 s->bl_tree[curlen].Freq += count;
729 } else if (curlen != 0) {
730 if (curlen != prevlen) s->bl_tree[curlen].Freq++;
731 s->bl_tree[REP_3_6].Freq++;
732 } else if (count <= 10) {
733 s->bl_tree[REPZ_3_10].Freq++;
734 } else {
735 s->bl_tree[REPZ_11_138].Freq++;
736 }
737 count = 0; prevlen = curlen;
738 if (nextlen == 0) {
739 max_count = 138, min_count = 3;
740 } else if (curlen == nextlen) {
741 max_count = 6, min_count = 3;
742 } else {
743 max_count = 7, min_count = 4;
744 }
745 }
746 }
747
748 /* ===========================================================================
749 * Send a literal or distance tree in compressed form, using the codes in
750 * bl_tree.
751 */
752 local void send_tree (s, tree, max_code)
753 deflate_state *s;
754 ct_data *tree; /* the tree to be scanned */
755 int max_code; /* and its largest code of non zero frequency */
756 {
757 int n; /* iterates over all tree elements */
758 int prevlen = -1; /* last emitted length */
759 int curlen; /* length of current code */
760 int nextlen = tree[0].Len; /* length of next code */
761 int count = 0; /* repeat count of the current code */
762 int max_count = 7; /* max repeat count */
763 int min_count = 4; /* min repeat count */
764
765 /* tree[max_code+1].Len = -1; */ /* guard already set */
766 if (nextlen == 0) max_count = 138, min_count = 3;
767
768 for (n = 0; n <= max_code; n++) {
769 curlen = nextlen; nextlen = tree[n+1].Len;
770 if (++count < max_count && curlen == nextlen) {
771 continue;
772 } else if (count < min_count) {
773 do { send_code(s, curlen, s->bl_tree); } while (--count != 0);
774
775 } else if (curlen != 0) {
776 if (curlen != prevlen) {
777 send_code(s, curlen, s->bl_tree); count--;
778 }
779 Assert(count >= 3 && count <= 6, " 3_6?");
780 send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2);
781
782 } else if (count <= 10) {
783 send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3);
784
785 } else {
786 send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7);
787 }
788 count = 0; prevlen = curlen;
789 if (nextlen == 0) {
790 max_count = 138, min_count = 3;
791 } else if (curlen == nextlen) {
792 max_count = 6, min_count = 3;
793 } else {
794 max_count = 7, min_count = 4;
795 }
796 }
797 }
798
799 /* ===========================================================================
800 * Construct the Huffman tree for the bit lengths and return the index in
801 * bl_order of the last bit length code to send.
802 */
803 local int build_bl_tree(s)
804 deflate_state *s;
805 {
806 int max_blindex; /* index of last bit length code of non zero freq */
807
808 /* Determine the bit length frequencies for literal and distance trees */
809 scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code);
810 scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code);
811
812 /* Build the bit length tree: */
813 build_tree(s, (tree_desc *)(&(s->bl_desc)));
814 /* opt_len now includes the length of the tree representations, except
815 * the lengths of the bit lengths codes and the 5+5+4 bits for the counts.
816 */
817
818 /* Determine the number of bit length codes to send. The pkzip format
819 * requires that at least 4 bit length codes be sent. (appnote.txt says
820 * 3 but the actual value used is 4.)
821 */
822 for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) {
823 if (s->bl_tree[bl_order[max_blindex]].Len != 0) break;
824 }
825 /* Update opt_len to include the bit length tree and counts */
826 s->opt_len += 3*(max_blindex+1) + 5+5+4;
827 Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld",
828 s->opt_len, s->static_len));
829
830 return max_blindex;
831 }
832
833 /* ===========================================================================
834 * Send the header for a block using dynamic Huffman trees: the counts, the
835 * lengths of the bit length codes, the literal tree and the distance tree.
836 * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
837 */
838 local void send_all_trees(s, lcodes, dcodes, blcodes)
839 deflate_state *s;
840 int lcodes, dcodes, blcodes; /* number of codes for each tree */
841 {
842 int rank; /* index in bl_order */
843
844 Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes");
845 Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES,
846 "too many codes");
847 Tracev((stderr, "\nbl counts: "));
848 send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */
849 send_bits(s, dcodes-1, 5);
850 send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */
851 for (rank = 0; rank < blcodes; rank++) {
852 Tracev((stderr, "\nbl code %2d ", bl_order[rank]));
853 send_bits(s, s->bl_tree[bl_order[rank]].Len, 3);
854 }
855 Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent));
856
857 send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */
858 Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent));
859
860 send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */
861 Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent));
862 }
863
864 /* ===========================================================================
865 * Send a stored block
866 */
867 void _tr_stored_block(s, buf, stored_len, eof)
868 deflate_state *s;
869 charf *buf; /* input block */
870 ulg stored_len; /* length of input block */
871 int eof; /* true if this is the last block for a file */
872 {
873 send_bits(s, (STORED_BLOCK<<1)+eof, 3); /* send block type */
874 #ifdef DEBUG
875 s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;
876 s->compressed_len += (stored_len + 4) << 3;
877 #endif
878 copy_block(s, buf, (unsigned)stored_len, 1); /* with header */
879 }
880
881 /* ===========================================================================
882 * Send one empty static block to give enough lookahead for inflate.
883 * This takes 10 bits, of which 7 may remain in the bit buffer.
884 * The current inflate code requires 9 bits of lookahead. If the
885 * last two codes for the previous block (real code plus EOB) were coded
886 * on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode
887 * the last real code. In this case we send two empty static blocks instead
888 * of one. (There are no problems if the previous block is stored or fixed.)
889 * To simplify the code, we assume the worst case of last real code encoded
890 * on one bit only.
891 */
892 void _tr_align(s)
893 deflate_state *s;
894 {
895 send_bits(s, STATIC_TREES<<1, 3);
896 send_code(s, END_BLOCK, static_ltree);
897 #ifdef DEBUG
898 s->compressed_len += 10L; /* 3 for block type, 7 for EOB */
899 #endif
900 bi_flush(s);
901 /* Of the 10 bits for the empty block, we have already sent
902 * (10 - bi_valid) bits. The lookahead for the last real code (before
903 * the EOB of the previous block) was thus at least one plus the length
904 * of the EOB plus what we have just sent of the empty static block.
905 */
906 if (1 + s->last_eob_len + 10 - s->bi_valid < 9) {
907 send_bits(s, STATIC_TREES<<1, 3);
908 send_code(s, END_BLOCK, static_ltree);
909 #ifdef DEBUG
910 s->compressed_len += 10L;
911 #endif
912 bi_flush(s);
913 }
914 s->last_eob_len = 7;
915 }
916
917 /* ===========================================================================
918 * Determine the best encoding for the current block: dynamic trees, static
919 * trees or store, and output the encoded block to the zip file.
920 */
921 void _tr_flush_block(s, buf, stored_len, eof)
922 deflate_state *s;
923 charf *buf; /* input block, or NULL if too old */
924 ulg stored_len; /* length of input block */
925 int eof; /* true if this is the last block for a file */
926 {
927 ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
928 int max_blindex = 0; /* index of last bit length code of non zero freq */
929
930 /* Build the Huffman trees unless a stored block is forced */
931 if (s->level > 0) {
932
933 /* Check if the file is ascii or binary */
934 if (s->data_type == Z_UNKNOWN) set_data_type(s);
935
936 /* Construct the literal and distance trees */
937 build_tree(s, (tree_desc *)(&(s->l_desc)));
938 Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len,
939 s->static_len));
940
941 build_tree(s, (tree_desc *)(&(s->d_desc)));
942 Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len,
943 s->static_len));
944 /* At this point, opt_len and static_len are the total bit lengths of
945 * the compressed block data, excluding the tree representations.
946 */
947
948 /* Build the bit length tree for the above two trees, and get the index
949 * in bl_order of the last bit length code to send.
950 */
951 max_blindex = build_bl_tree(s);
952
953 /* Determine the best encoding. Compute the block lengths in bytes. */
954 opt_lenb = (s->opt_len+3+7)>>3;
955 static_lenb = (s->static_len+3+7)>>3;
956
957 Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",
958 opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,
959 s->last_lit));
960
961 if (static_lenb <= opt_lenb) opt_lenb = static_lenb;
962
963 } else {
964 Assert(buf != (char*)0, "lost buf");
965 opt_lenb = static_lenb = stored_len + 5; /* force a stored block */
966 }
967
968 #ifdef FORCE_STORED
969 if (buf != (char*)0) { /* force stored block */
970 #else
971 if (stored_len+4 <= opt_lenb && buf != (char*)0) {
972 /* 4: two words for the lengths */
973 #endif
974 /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
975 * Otherwise we can't have processed more than WSIZE input bytes since
976 * the last block flush, because compression would have been
977 * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
978 * transform a block into a stored block.
979 */
980 _tr_stored_block(s, buf, stored_len, eof);
981
982 #ifdef FORCE_STATIC
983 } else if (static_lenb >= 0) { /* force static trees */
984 #else
985 } else if (static_lenb == opt_lenb) {
986 #endif
987 send_bits(s, (STATIC_TREES<<1)+eof, 3);
988 compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree);
989 #ifdef DEBUG
990 s->compressed_len += 3 + s->static_len;
991 #endif
992 } else {
993 send_bits(s, (DYN_TREES<<1)+eof, 3);
994 send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1,
995 max_blindex+1);
996 compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree);
997 #ifdef DEBUG
998 s->compressed_len += 3 + s->opt_len;
999 #endif
1000 }
1001 Assert (s->compressed_len == s->bits_sent, "bad compressed size");
1002 /* The above check is made mod 2^32, for files larger than 512 MB
1003 * and uLong implemented on 32 bits.
1004 */
1005 init_block(s);
1006
1007 if (eof) {
1008 bi_windup(s);
1009 #ifdef DEBUG
1010 s->compressed_len += 7; /* align on byte boundary */
1011 #endif
1012 }
1013 Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3,
1014 s->compressed_len-7*eof));
1015 }
1016
1017 /* ===========================================================================
1018 * Save the match info and tally the frequency counts. Return true if
1019 * the current block must be flushed.
1020 */
1021 int _tr_tally (s, dist, lc)
1022 deflate_state *s;
1023 unsigned dist; /* distance of matched string */
1024 unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */
1025 {
1026 s->d_buf[s->last_lit] = (ush)dist;
1027 s->l_buf[s->last_lit++] = (uch)lc;
1028 if (dist == 0) {
1029 /* lc is the unmatched char */
1030 s->dyn_ltree[lc].Freq++;
1031 } else {
1032 s->matches++;
1033 /* Here, lc is the match length - MIN_MATCH */
1034 dist--; /* dist = match distance - 1 */
1035 Assert((ush)dist < (ush)MAX_DIST(s) &&
1036 (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) &&
1037 (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match");
1038
1039 s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++;
1040 s->dyn_dtree[d_code(dist)].Freq++;
1041 }
1042
1043 #ifdef TRUNCATE_BLOCK
1044 /* Try to guess if it is profitable to stop the current block here */
1045 if ((s->last_lit & 0x1fff) == 0 && s->level > 2) {
1046 /* Compute an upper bound for the compressed length */
1047 ulg out_length = (ulg)s->last_lit*8L;
1048 ulg in_length = (ulg)((long)s->strstart - s->block_start);
1049 int dcode;
1050 for (dcode = 0; dcode < D_CODES; dcode++) {
1051 out_length += (ulg)s->dyn_dtree[dcode].Freq *
1052 (5L+extra_dbits[dcode]);
1053 }
1054 out_length >>= 3;
1055 Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ",
1056 s->last_lit, in_length, out_length,
1057 100L - out_length*100L/in_length));
1058 if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1;
1059 }
1060 #endif
1061 return (s->last_lit == s->lit_bufsize-1);
1062 /* We avoid equality with lit_bufsize because of wraparound at 64K
1063 * on 16 bit machines and because stored blocks are restricted to
1064 * 64K-1 bytes.
1065 */
1066 }
1067
1068 /* ===========================================================================
1069 * Send the block data compressed using the given Huffman trees
1070 */
1071 local void compress_block(s, ltree, dtree)
1072 deflate_state *s;
1073 ct_data *ltree; /* literal tree */
1074 ct_data *dtree; /* distance tree */
1075 {
1076 unsigned dist; /* distance of matched string */
1077 int lc; /* match length or unmatched char (if dist == 0) */
1078 unsigned lx = 0; /* running index in l_buf */
1079 unsigned code; /* the code to send */
1080 int extra; /* number of extra bits to send */
1081
1082 if (s->last_lit != 0) do {
1083 dist = s->d_buf[lx];
1084 lc = s->l_buf[lx++];
1085 if (dist == 0) {
1086 send_code(s, lc, ltree); /* send a literal byte */
1087 Tracecv(isgraph(lc), (stderr," '%c' ", lc));
1088 } else {
1089 /* Here, lc is the match length - MIN_MATCH */
1090 code = _length_code[lc];
1091 send_code(s, code+LITERALS+1, ltree); /* send the length code */
1092 extra = extra_lbits[code];
1093 if (extra != 0) {
1094 lc -= base_length[code];
1095 send_bits(s, lc, extra); /* send the extra length bits */
1096 }
1097 dist--; /* dist is now the match distance - 1 */
1098 code = d_code(dist);
1099 Assert (code < D_CODES, "bad d_code");
1100
1101 send_code(s, code, dtree); /* send the distance code */
1102 extra = extra_dbits[code];
1103 if (extra != 0) {
1104 dist -= base_dist[code];
1105 send_bits(s, dist, extra); /* send the extra distance bits */
1106 }
1107 } /* literal or match pair ? */
1108
1109 /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
1110 Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx,
1111 "pendingBuf overflow");
1112
1113 } while (lx < s->last_lit);
1114
1115 send_code(s, END_BLOCK, ltree);
1116 s->last_eob_len = ltree[END_BLOCK].Len;
1117 }
1118
1119 /* ===========================================================================
1120 * Set the data type to ASCII or BINARY, using a crude approximation:
1121 * binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise.
1122 * IN assertion: the fields freq of dyn_ltree are set and the total of all
1123 * frequencies does not exceed 64K (to fit in an int on 16 bit machines).
1124 */
1125 local void set_data_type(s)
1126 deflate_state *s;
1127 {
1128 int n = 0;
1129 unsigned ascii_freq = 0;
1130 unsigned bin_freq = 0;
1131 while (n < 7) bin_freq += s->dyn_ltree[n++].Freq;
1132 while (n < 128) ascii_freq += s->dyn_ltree[n++].Freq;
1133 while (n < LITERALS) bin_freq += s->dyn_ltree[n++].Freq;
1134 s->data_type = (Byte)(bin_freq > (ascii_freq >> 2) ? Z_BINARY : Z_ASCII);
1135 }
1136
1137 /* ===========================================================================
1138 * Reverse the first len bits of a code, using straightforward code (a faster
1139 * method would use a table)
1140 * IN assertion: 1 <= len <= 15
1141 */
1142 local unsigned bi_reverse(code, len)
1143 unsigned code; /* the value to invert */
1144 int len; /* its bit length */
1145 {
1146 register unsigned res = 0;
1147 do {
1148 res |= code & 1;
1149 code >>= 1, res <<= 1;
1150 } while (--len > 0);
1151 return res >> 1;
1152 }
1153
1154 /* ===========================================================================
1155 * Flush the bit buffer, keeping at most 7 bits in it.
1156 */
1157 local void bi_flush(s)
1158 deflate_state *s;
1159 {
1160 if (s->bi_valid == 16) {
1161 put_short(s, s->bi_buf);
1162 s->bi_buf = 0;
1163 s->bi_valid = 0;
1164 } else if (s->bi_valid >= 8) {
1165 put_byte(s, (Byte)s->bi_buf);
1166 s->bi_buf >>= 8;
1167 s->bi_valid -= 8;
1168 }
1169 }
1170
1171 /* ===========================================================================
1172 * Flush the bit buffer and align the output on a byte boundary
1173 */
1174 local void bi_windup(s)
1175 deflate_state *s;
1176 {
1177 if (s->bi_valid > 8) {
1178 put_short(s, s->bi_buf);
1179 } else if (s->bi_valid > 0) {
1180 put_byte(s, (Byte)s->bi_buf);
1181 }
1182 s->bi_buf = 0;
1183 s->bi_valid = 0;
1184 #ifdef DEBUG
1185 s->bits_sent = (s->bits_sent+7) & ~7;
1186 #endif
1187 }
1188
1189 /* ===========================================================================
1190 * Copy a stored block, storing first the length and its
1191 * one's complement if requested.
1192 */
1193 local void copy_block(s, buf, len, header)
1194 deflate_state *s;
1195 charf *buf; /* the input data */
1196 unsigned len; /* its length */
1197 int header; /* true if block header must be written */
1198 {
1199 bi_windup(s); /* align on byte boundary */
1200 s->last_eob_len = 8; /* enough lookahead for inflate */
1201
1202 if (header) {
1203 put_short(s, (ush)len);
1204 put_short(s, (ush)~len);
1205 #ifdef DEBUG
1206 s->bits_sent += 2*16;
1207 #endif
1208 }
1209 #ifdef DEBUG
1210 s->bits_sent += (ulg)len<<3;
1211 #endif
1212 while (len--) {
1213 put_byte(s, *buf++);
1214 }
1215 }