"Fossies" - the Fresh Open Source Software Archive 
Member "muscle/zlib/zlib/trees.c" (21 Nov 2020, 43761 Bytes) of package /linux/privat/muscle7.62.zip:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "trees.c" see the
Fossies "Dox" file reference documentation and the latest
Fossies "Diffs" side-by-side code changes report:
7.61_vs_7.62.
1 /* trees.c -- output deflated data using Huffman coding
2 * Copyright (C) 1995-2017 Jean-loup Gailly
3 * detect_data_type() function provided freely by Cosmin Truta, 2006
4 * For conditions of distribution and use, see copyright notice in zlib.h
5 */
6
7 /*
8 * ALGORITHM
9 *
10 * The "deflation" process uses several Huffman trees. The more
11 * common source values are represented by shorter bit sequences.
12 *
13 * Each code tree is stored in a compressed form which is itself
14 * a Huffman encoding of the lengths of all the code strings (in
15 * ascending order by source values). The actual code strings are
16 * reconstructed from the lengths in the inflate process, as described
17 * in the deflate specification.
18 *
19 * REFERENCES
20 *
21 * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification".
22 * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc
23 *
24 * Storer, James A.
25 * Data Compression: Methods and Theory, pp. 49-50.
26 * Computer Science Press, 1988. ISBN 0-7167-8156-5.
27 *
28 * Sedgewick, R.
29 * Algorithms, p290.
30 * Addison-Wesley, 1983. ISBN 0-201-06672-6.
31 */
32
33 /* @(#) $Id$ */
34
35 /* #define GEN_TREES_H */
36
37 #include "deflate.h"
38
39 #ifdef ZLIB_DEBUG
40 # include <ctype.h>
41 #endif
42
43 /* ===========================================================================
44 * Constants
45 */
46
47 #define MAX_BL_BITS 7
48 /* Bit length codes must not exceed MAX_BL_BITS bits */
49
50 #define END_BLOCK 256
51 /* end of block literal code */
52
53 #define REP_3_6 16
54 /* repeat previous bit length 3-6 times (2 bits of repeat count) */
55
56 #define REPZ_3_10 17
57 /* repeat a zero length 3-10 times (3 bits of repeat count) */
58
59 #define REPZ_11_138 18
60 /* repeat a zero length 11-138 times (7 bits of repeat count) */
61
62 local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */
63 = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};
64
65 local const int extra_dbits[D_CODES] /* extra bits for each distance code */
66 = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
67
68 local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */
69 = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};
70
71 local const uch bl_order[BL_CODES]
72 = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
73 /* The lengths of the bit length codes are sent in order of decreasing
74 * probability, to avoid transmitting the lengths for unused bit length codes.
75 */
76
77 /* ===========================================================================
78 * Local data. These are initialized only once.
79 */
80
81 #define DIST_CODE_LEN 512 /* see definition of array dist_code below */
82
83 #if defined(GEN_TREES_H) || !defined(STDC)
84 /* non ANSI compilers may not accept trees.h */
85
86 local ct_data static_ltree[L_CODES+2];
87 /* The static literal tree. Since the bit lengths are imposed, there is no
88 * need for the L_CODES extra codes used during heap construction. However
89 * The codes 286 and 287 are needed to build a canonical tree (see _tr_init
90 * below).
91 */
92
93 local ct_data static_dtree[D_CODES];
94 /* The static distance tree. (Actually a trivial tree since all codes use
95 * 5 bits.)
96 */
97
98 uch _dist_code[DIST_CODE_LEN];
99 /* Distance codes. The first 256 values correspond to the distances
100 * 3 .. 258, the last 256 values correspond to the top 8 bits of
101 * the 15 bit distances.
102 */
103
104 uch _length_code[MAX_MATCH-MIN_MATCH+1];
105 /* length code for each normalized match length (0 == MIN_MATCH) */
106
107 local int base_length[LENGTH_CODES];
108 /* First normalized length for each code (0 = MIN_MATCH) */
109
110 local int base_dist[D_CODES];
111 /* First normalized distance for each code (0 = distance of 1) */
112
113 #else
114 # include "trees.h"
115 #endif /* GEN_TREES_H */
116
117 struct static_tree_desc_s {
118 const ct_data *static_tree; /* static tree or NULL */
119 const intf *extra_bits; /* extra bits for each code or NULL */
120 int extra_base; /* base index for extra_bits */
121 int elems; /* max number of elements in the tree */
122 int max_length; /* max bit length for the codes */
123 };
124
125 local const static_tree_desc static_l_desc =
126 {static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS};
127
128 local const static_tree_desc static_d_desc =
129 {static_dtree, extra_dbits, 0, D_CODES, MAX_BITS};
130
131 local const static_tree_desc static_bl_desc =
132 {(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS};
133
134 /* ===========================================================================
135 * Local (static) routines in this file.
136 */
137
138 local void tr_static_init OF((void));
139 local void init_block OF((deflate_state *s));
140 local void pqdownheap OF((deflate_state *s, ct_data *tree, int k));
141 local void gen_bitlen OF((deflate_state *s, tree_desc *desc));
142 local void gen_codes OF((ct_data *tree, int max_code, ushf *bl_count));
143 local void build_tree OF((deflate_state *s, tree_desc *desc));
144 local void scan_tree OF((deflate_state *s, ct_data *tree, int max_code));
145 local void send_tree OF((deflate_state *s, ct_data *tree, int max_code));
146 local int build_bl_tree OF((deflate_state *s));
147 local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes,
148 int blcodes));
149 local void compress_block OF((deflate_state *s, const ct_data *ltree,
150 const ct_data *dtree));
151 local int detect_data_type OF((deflate_state *s));
152 local unsigned bi_reverse OF((unsigned value, int length));
153 local void bi_windup OF((deflate_state *s));
154 local void bi_flush OF((deflate_state *s));
155
156 #ifdef GEN_TREES_H
157 local void gen_trees_header OF((void));
158 #endif
159
160 #ifndef ZLIB_DEBUG
161 # define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len)
162 /* Send a code of the given tree. c and tree must not have side effects */
163
164 #else /* !ZLIB_DEBUG */
165 # define send_code(s, c, tree) \
166 { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \
167 send_bits(s, tree[c].Code, tree[c].Len); }
168 #endif
169
170 /* ===========================================================================
171 * Output a short LSB first on the stream.
172 * IN assertion: there is enough room in pendingBuf.
173 */
174 #define put_short(s, w) { \
175 put_byte(s, (uch)((w) & 0xff)); \
176 put_byte(s, (uch)((ush)(w) >> 8)); \
177 }
178
179 /* ===========================================================================
180 * Send a value on a given number of bits.
181 * IN assertion: length <= 16 and value fits in length bits.
182 */
183 #ifdef ZLIB_DEBUG
184 local void send_bits OF((deflate_state *s, int value, int length));
185
186 local void send_bits(s, value, length)
187 deflate_state *s;
188 int value; /* value to send */
189 int length; /* number of bits */
190 {
191 Tracevv((stderr," l %2d v %4x ", length, value));
192 Assert(length > 0 && length <= 15, "invalid length");
193 s->bits_sent += (ulg)length;
194
195 /* If not enough room in bi_buf, use (valid) bits from bi_buf and
196 * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid))
197 * unused bits in value.
198 */
199 if (s->bi_valid > (int)Buf_size - length) {
200 s->bi_buf |= (ush)value << s->bi_valid;
201 put_short(s, s->bi_buf);
202 s->bi_buf = (ush)value >> (Buf_size - s->bi_valid);
203 s->bi_valid += length - Buf_size;
204 } else {
205 s->bi_buf |= (ush)value << s->bi_valid;
206 s->bi_valid += length;
207 }
208 }
209 #else /* !ZLIB_DEBUG */
210
211 #define send_bits(s, value, length) \
212 { int len = length;\
213 if (s->bi_valid > (int)Buf_size - len) {\
214 int val = (int)value;\
215 s->bi_buf |= (ush)val << s->bi_valid;\
216 put_short(s, s->bi_buf);\
217 s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\
218 s->bi_valid += len - Buf_size;\
219 } else {\
220 s->bi_buf |= (ush)(value) << s->bi_valid;\
221 s->bi_valid += len;\
222 }\
223 }
224 #endif /* ZLIB_DEBUG */
225
226
227 /* the arguments must not have side effects */
228
229 /* ===========================================================================
230 * Initialize the various 'constant' tables.
231 */
232 local void tr_static_init()
233 {
234 #if defined(GEN_TREES_H) || !defined(STDC)
235 static int static_init_done = 0;
236 int n; /* iterates over tree elements */
237 int bits; /* bit counter */
238 int length; /* length value */
239 int code; /* code value */
240 int dist; /* distance index */
241 ush bl_count[MAX_BITS+1];
242 /* number of codes at each bit length for an optimal tree */
243
244 if (static_init_done) return;
245
246 /* For some embedded targets, global variables are not initialized: */
247 #ifdef NO_INIT_GLOBAL_POINTERS
248 static_l_desc.static_tree = static_ltree;
249 static_l_desc.extra_bits = extra_lbits;
250 static_d_desc.static_tree = static_dtree;
251 static_d_desc.extra_bits = extra_dbits;
252 static_bl_desc.extra_bits = extra_blbits;
253 #endif
254
255 /* Initialize the mapping length (0..255) -> length code (0..28) */
256 length = 0;
257 for (code = 0; code < LENGTH_CODES-1; code++) {
258 base_length[code] = length;
259 for (n = 0; n < (1<<extra_lbits[code]); n++) {
260 _length_code[length++] = (uch)code;
261 }
262 }
263 Assert (length == 256, "tr_static_init: length != 256");
264 /* Note that the length 255 (match length 258) can be represented
265 * in two different ways: code 284 + 5 bits or code 285, so we
266 * overwrite length_code[255] to use the best encoding:
267 */
268 _length_code[length-1] = (uch)code;
269
270 /* Initialize the mapping dist (0..32K) -> dist code (0..29) */
271 dist = 0;
272 for (code = 0 ; code < 16; code++) {
273 base_dist[code] = dist;
274 for (n = 0; n < (1<<extra_dbits[code]); n++) {
275 _dist_code[dist++] = (uch)code;
276 }
277 }
278 Assert (dist == 256, "tr_static_init: dist != 256");
279 dist >>= 7; /* from now on, all distances are divided by 128 */
280 for ( ; code < D_CODES; code++) {
281 base_dist[code] = dist << 7;
282 for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) {
283 _dist_code[256 + dist++] = (uch)code;
284 }
285 }
286 Assert (dist == 256, "tr_static_init: 256+dist != 512");
287
288 /* Construct the codes of the static literal tree */
289 for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0;
290 n = 0;
291 while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++;
292 while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++;
293 while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++;
294 while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++;
295 /* Codes 286 and 287 do not exist, but we must include them in the
296 * tree construction to get a canonical Huffman tree (longest code
297 * all ones)
298 */
299 gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count);
300
301 /* The static distance tree is trivial: */
302 for (n = 0; n < D_CODES; n++) {
303 static_dtree[n].Len = 5;
304 static_dtree[n].Code = bi_reverse((unsigned)n, 5);
305 }
306 static_init_done = 1;
307
308 # ifdef GEN_TREES_H
309 gen_trees_header();
310 # endif
311 #endif /* defined(GEN_TREES_H) || !defined(STDC) */
312 }
313
314 /* ===========================================================================
315 * Genererate the file trees.h describing the static trees.
316 */
317 #ifdef GEN_TREES_H
318 # ifndef ZLIB_DEBUG
319 # include <stdio.h>
320 # endif
321
322 # define SEPARATOR(i, last, width) \
323 ((i) == (last)? "\n};\n\n" : \
324 ((i) % (width) == (width)-1 ? ",\n" : ", "))
325
326 void gen_trees_header()
327 {
328 FILE *header = fopen("trees.h", "w");
329 int i;
330
331 Assert (header != NULL, "Can't open trees.h");
332 fprintf(header,
333 "/* header created automatically with -DGEN_TREES_H */\n\n");
334
335 fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n");
336 for (i = 0; i < L_CODES+2; i++) {
337 fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code,
338 static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5));
339 }
340
341 fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n");
342 for (i = 0; i < D_CODES; i++) {
343 fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code,
344 static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5));
345 }
346
347 fprintf(header, "const uch ZLIB_INTERNAL _dist_code[DIST_CODE_LEN] = {\n");
348 for (i = 0; i < DIST_CODE_LEN; i++) {
349 fprintf(header, "%2u%s", _dist_code[i],
350 SEPARATOR(i, DIST_CODE_LEN-1, 20));
351 }
352
353 fprintf(header,
354 "const uch ZLIB_INTERNAL _length_code[MAX_MATCH-MIN_MATCH+1]= {\n");
355 for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) {
356 fprintf(header, "%2u%s", _length_code[i],
357 SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20));
358 }
359
360 fprintf(header, "local const int base_length[LENGTH_CODES] = {\n");
361 for (i = 0; i < LENGTH_CODES; i++) {
362 fprintf(header, "%1u%s", base_length[i],
363 SEPARATOR(i, LENGTH_CODES-1, 20));
364 }
365
366 fprintf(header, "local const int base_dist[D_CODES] = {\n");
367 for (i = 0; i < D_CODES; i++) {
368 fprintf(header, "%5u%s", base_dist[i],
369 SEPARATOR(i, D_CODES-1, 10));
370 }
371
372 fclose(header);
373 }
374 #endif /* GEN_TREES_H */
375
376 /* ===========================================================================
377 * Initialize the tree data structures for a new zlib stream.
378 */
379 void ZLIB_INTERNAL _tr_init(s)
380 deflate_state *s;
381 {
382 tr_static_init();
383
384 s->l_desc.dyn_tree = s->dyn_ltree;
385 s->l_desc.stat_desc = &static_l_desc;
386
387 s->d_desc.dyn_tree = s->dyn_dtree;
388 s->d_desc.stat_desc = &static_d_desc;
389
390 s->bl_desc.dyn_tree = s->bl_tree;
391 s->bl_desc.stat_desc = &static_bl_desc;
392
393 s->bi_buf = 0;
394 s->bi_valid = 0;
395 #ifdef ZLIB_DEBUG
396 s->compressed_len = 0L;
397 s->bits_sent = 0L;
398 #endif
399
400 /* Initialize the first block of the first file: */
401 init_block(s);
402 }
403
404 /* ===========================================================================
405 * Initialize a new block.
406 */
407 local void init_block(s)
408 deflate_state *s;
409 {
410 int n; /* iterates over tree elements */
411
412 /* Initialize the trees. */
413 for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0;
414 for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0;
415 for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
416
417 s->dyn_ltree[END_BLOCK].Freq = 1;
418 s->opt_len = s->static_len = 0L;
419 s->last_lit = s->matches = 0;
420 }
421
422 #define SMALLEST 1
423 /* Index within the heap array of least frequent node in the Huffman tree */
424
425
426 /* ===========================================================================
427 * Remove the smallest element from the heap and recreate the heap with
428 * one less element. Updates heap and heap_len.
429 */
430 #define pqremove(s, tree, top) \
431 {\
432 top = s->heap[SMALLEST]; \
433 s->heap[SMALLEST] = s->heap[s->heap_len--]; \
434 pqdownheap(s, tree, SMALLEST); \
435 }
436
437 /* ===========================================================================
438 * Compares to subtrees, using the tree depth as tie breaker when
439 * the subtrees have equal frequency. This minimizes the worst case length.
440 */
441 #define smaller(tree, n, m, depth) \
442 (tree[n].Freq < tree[m].Freq || \
443 (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m]))
444
445 /* ===========================================================================
446 * Restore the heap property by moving down the tree starting at node k,
447 * exchanging a node with the smallest of its two sons if necessary, stopping
448 * when the heap property is re-established (each father smaller than its
449 * two sons).
450 */
451 local void pqdownheap(s, tree, k)
452 deflate_state *s;
453 ct_data *tree; /* the tree to restore */
454 int k; /* node to move down */
455 {
456 int v = s->heap[k];
457 int j = k << 1; /* left son of k */
458 while (j <= s->heap_len) {
459 /* Set j to the smallest of the two sons: */
460 if (j < s->heap_len &&
461 smaller(tree, s->heap[j+1], s->heap[j], s->depth)) {
462 j++;
463 }
464 /* Exit if v is smaller than both sons */
465 if (smaller(tree, v, s->heap[j], s->depth)) break;
466
467 /* Exchange v with the smallest son */
468 s->heap[k] = s->heap[j]; k = j;
469
470 /* And continue down the tree, setting j to the left son of k */
471 j <<= 1;
472 }
473 s->heap[k] = v;
474 }
475
476 /* ===========================================================================
477 * Compute the optimal bit lengths for a tree and update the total bit length
478 * for the current block.
479 * IN assertion: the fields freq and dad are set, heap[heap_max] and
480 * above are the tree nodes sorted by increasing frequency.
481 * OUT assertions: the field len is set to the optimal bit length, the
482 * array bl_count contains the frequencies for each bit length.
483 * The length opt_len is updated; static_len is also updated if stree is
484 * not null.
485 */
486 local void gen_bitlen(s, desc)
487 deflate_state *s;
488 tree_desc *desc; /* the tree descriptor */
489 {
490 ct_data *tree = desc->dyn_tree;
491 int max_code = desc->max_code;
492 const ct_data *stree = desc->stat_desc->static_tree;
493 const intf *extra = desc->stat_desc->extra_bits;
494 int base = desc->stat_desc->extra_base;
495 int max_length = desc->stat_desc->max_length;
496 int h; /* heap index */
497 int n, m; /* iterate over the tree elements */
498 int bits; /* bit length */
499 int xbits; /* extra bits */
500 ush f; /* frequency */
501 int overflow = 0; /* number of elements with bit length too large */
502
503 for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0;
504
505 /* In a first pass, compute the optimal bit lengths (which may
506 * overflow in the case of the bit length tree).
507 */
508 tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */
509
510 for (h = s->heap_max+1; h < HEAP_SIZE; h++) {
511 n = s->heap[h];
512 bits = tree[tree[n].Dad].Len + 1;
513 if (bits > max_length) bits = max_length, overflow++;
514 tree[n].Len = (ush)bits;
515 /* We overwrite tree[n].Dad which is no longer needed */
516
517 if (n > max_code) continue; /* not a leaf node */
518
519 s->bl_count[bits]++;
520 xbits = 0;
521 if (n >= base) xbits = extra[n-base];
522 f = tree[n].Freq;
523 s->opt_len += (ulg)f * (unsigned)(bits + xbits);
524 if (stree) s->static_len += (ulg)f * (unsigned)(stree[n].Len + xbits);
525 }
526 if (overflow == 0) return;
527
528 Tracev((stderr,"\nbit length overflow\n"));
529 /* This happens for example on obj2 and pic of the Calgary corpus */
530
531 /* Find the first bit length which could increase: */
532 do {
533 bits = max_length-1;
534 while (s->bl_count[bits] == 0) bits--;
535 s->bl_count[bits]--; /* move one leaf down the tree */
536 s->bl_count[bits+1] += 2; /* move one overflow item as its brother */
537 s->bl_count[max_length]--;
538 /* The brother of the overflow item also moves one step up,
539 * but this does not affect bl_count[max_length]
540 */
541 overflow -= 2;
542 } while (overflow > 0);
543
544 /* Now recompute all bit lengths, scanning in increasing frequency.
545 * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
546 * lengths instead of fixing only the wrong ones. This idea is taken
547 * from 'ar' written by Haruhiko Okumura.)
548 */
549 for (bits = max_length; bits != 0; bits--) {
550 n = s->bl_count[bits];
551 while (n != 0) {
552 m = s->heap[--h];
553 if (m > max_code) continue;
554 if ((unsigned) tree[m].Len != (unsigned) bits) {
555 Tracev((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits));
556 s->opt_len += ((ulg)bits - tree[m].Len) * tree[m].Freq;
557 tree[m].Len = (ush)bits;
558 }
559 n--;
560 }
561 }
562 }
563
564 /* ===========================================================================
565 * Generate the codes for a given tree and bit counts (which need not be
566 * optimal).
567 * IN assertion: the array bl_count contains the bit length statistics for
568 * the given tree and the field len is set for all tree elements.
569 * OUT assertion: the field code is set for all tree elements of non
570 * zero code length.
571 */
572 local void gen_codes (tree, max_code, bl_count)
573 ct_data *tree; /* the tree to decorate */
574 int max_code; /* largest code with non zero frequency */
575 ushf *bl_count; /* number of codes at each bit length */
576 {
577 ush next_code[MAX_BITS+1]; /* next code value for each bit length */
578 unsigned code = 0; /* running code value */
579 int bits; /* bit index */
580 int n; /* code index */
581
582 /* The distribution counts are first used to generate the code values
583 * without bit reversal.
584 */
585 for (bits = 1; bits <= MAX_BITS; bits++) {
586 code = (code + bl_count[bits-1]) << 1;
587 next_code[bits] = (ush)code;
588 }
589 /* Check that the bit counts in bl_count are consistent. The last code
590 * must be all ones.
591 */
592 Assert (code + bl_count[MAX_BITS]-1 == (1<<MAX_BITS)-1,
593 "inconsistent bit counts");
594 Tracev((stderr,"\ngen_codes: max_code %d ", max_code));
595
596 for (n = 0; n <= max_code; n++) {
597 int len = tree[n].Len;
598 if (len == 0) continue;
599 /* Now reverse the bits */
600 tree[n].Code = (ush)bi_reverse(next_code[len]++, len);
601
602 Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",
603 n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1));
604 }
605 }
606
607 /* ===========================================================================
608 * Construct one Huffman tree and assigns the code bit strings and lengths.
609 * Update the total bit length for the current block.
610 * IN assertion: the field freq is set for all tree elements.
611 * OUT assertions: the fields len and code are set to the optimal bit length
612 * and corresponding code. The length opt_len is updated; static_len is
613 * also updated if stree is not null. The field max_code is set.
614 */
615 local void build_tree(s, desc)
616 deflate_state *s;
617 tree_desc *desc; /* the tree descriptor */
618 {
619 ct_data *tree = desc->dyn_tree;
620 const ct_data *stree = desc->stat_desc->static_tree;
621 int elems = desc->stat_desc->elems;
622 int n, m; /* iterate over heap elements */
623 int max_code = -1; /* largest code with non zero frequency */
624 int node; /* new node being created */
625
626 /* Construct the initial heap, with least frequent element in
627 * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
628 * heap[0] is not used.
629 */
630 s->heap_len = 0, s->heap_max = HEAP_SIZE;
631
632 for (n = 0; n < elems; n++) {
633 if (tree[n].Freq != 0) {
634 s->heap[++(s->heap_len)] = max_code = n;
635 s->depth[n] = 0;
636 } else {
637 tree[n].Len = 0;
638 }
639 }
640
641 /* The pkzip format requires that at least one distance code exists,
642 * and that at least one bit should be sent even if there is only one
643 * possible code. So to avoid special checks later on we force at least
644 * two codes of non zero frequency.
645 */
646 while (s->heap_len < 2) {
647 node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0);
648 tree[node].Freq = 1;
649 s->depth[node] = 0;
650 s->opt_len--; if (stree) s->static_len -= stree[node].Len;
651 /* node is 0 or 1 so it does not have extra bits */
652 }
653 desc->max_code = max_code;
654
655 /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
656 * establish sub-heaps of increasing lengths:
657 */
658 for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n);
659
660 /* Construct the Huffman tree by repeatedly combining the least two
661 * frequent nodes.
662 */
663 node = elems; /* next internal node of the tree */
664 do {
665 pqremove(s, tree, n); /* n = node of least frequency */
666 m = s->heap[SMALLEST]; /* m = node of next least frequency */
667
668 s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */
669 s->heap[--(s->heap_max)] = m;
670
671 /* Create a new node father of n and m */
672 tree[node].Freq = tree[n].Freq + tree[m].Freq;
673 s->depth[node] = (uch)((s->depth[n] >= s->depth[m] ?
674 s->depth[n] : s->depth[m]) + 1);
675 tree[n].Dad = tree[m].Dad = (ush)node;
676 #ifdef DUMP_BL_TREE
677 if (tree == s->bl_tree) {
678 fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)",
679 node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq);
680 }
681 #endif
682 /* and insert the new node in the heap */
683 s->heap[SMALLEST] = node++;
684 pqdownheap(s, tree, SMALLEST);
685
686 } while (s->heap_len >= 2);
687
688 s->heap[--(s->heap_max)] = s->heap[SMALLEST];
689
690 /* At this point, the fields freq and dad are set. We can now
691 * generate the bit lengths.
692 */
693 gen_bitlen(s, (tree_desc *)desc);
694
695 /* The field len is now set, we can generate the bit codes */
696 gen_codes ((ct_data *)tree, max_code, s->bl_count);
697 }
698
699 /* ===========================================================================
700 * Scan a literal or distance tree to determine the frequencies of the codes
701 * in the bit length tree.
702 */
703 local void scan_tree (s, tree, max_code)
704 deflate_state *s;
705 ct_data *tree; /* the tree to be scanned */
706 int max_code; /* and its largest code of non zero frequency */
707 {
708 int n; /* iterates over all tree elements */
709 int prevlen = -1; /* last emitted length */
710 int curlen; /* length of current code */
711 int nextlen = tree[0].Len; /* length of next code */
712 int count = 0; /* repeat count of the current code */
713 int max_count = 7; /* max repeat count */
714 int min_count = 4; /* min repeat count */
715
716 if (nextlen == 0) max_count = 138, min_count = 3;
717 tree[max_code+1].Len = (ush)0xffff; /* guard */
718
719 for (n = 0; n <= max_code; n++) {
720 curlen = nextlen; nextlen = tree[n+1].Len;
721 if (++count < max_count && curlen == nextlen) {
722 continue;
723 } else if (count < min_count) {
724 s->bl_tree[curlen].Freq += count;
725 } else if (curlen != 0) {
726 if (curlen != prevlen) s->bl_tree[curlen].Freq++;
727 s->bl_tree[REP_3_6].Freq++;
728 } else if (count <= 10) {
729 s->bl_tree[REPZ_3_10].Freq++;
730 } else {
731 s->bl_tree[REPZ_11_138].Freq++;
732 }
733 count = 0; prevlen = curlen;
734 if (nextlen == 0) {
735 max_count = 138, min_count = 3;
736 } else if (curlen == nextlen) {
737 max_count = 6, min_count = 3;
738 } else {
739 max_count = 7, min_count = 4;
740 }
741 }
742 }
743
744 /* ===========================================================================
745 * Send a literal or distance tree in compressed form, using the codes in
746 * bl_tree.
747 */
748 local void send_tree (s, tree, max_code)
749 deflate_state *s;
750 ct_data *tree; /* the tree to be scanned */
751 int max_code; /* and its largest code of non zero frequency */
752 {
753 int n; /* iterates over all tree elements */
754 int prevlen = -1; /* last emitted length */
755 int curlen; /* length of current code */
756 int nextlen = tree[0].Len; /* length of next code */
757 int count = 0; /* repeat count of the current code */
758 int max_count = 7; /* max repeat count */
759 int min_count = 4; /* min repeat count */
760
761 /* tree[max_code+1].Len = -1; */ /* guard already set */
762 if (nextlen == 0) max_count = 138, min_count = 3;
763
764 for (n = 0; n <= max_code; n++) {
765 curlen = nextlen; nextlen = tree[n+1].Len;
766 if (++count < max_count && curlen == nextlen) {
767 continue;
768 } else if (count < min_count) {
769 do { send_code(s, curlen, s->bl_tree); } while (--count != 0);
770
771 } else if (curlen != 0) {
772 if (curlen != prevlen) {
773 send_code(s, curlen, s->bl_tree); count--;
774 }
775 Assert(count >= 3 && count <= 6, " 3_6?");
776 send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2);
777
778 } else if (count <= 10) {
779 send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3);
780
781 } else {
782 send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7);
783 }
784 count = 0; prevlen = curlen;
785 if (nextlen == 0) {
786 max_count = 138, min_count = 3;
787 } else if (curlen == nextlen) {
788 max_count = 6, min_count = 3;
789 } else {
790 max_count = 7, min_count = 4;
791 }
792 }
793 }
794
795 /* ===========================================================================
796 * Construct the Huffman tree for the bit lengths and return the index in
797 * bl_order of the last bit length code to send.
798 */
799 local int build_bl_tree(s)
800 deflate_state *s;
801 {
802 int max_blindex; /* index of last bit length code of non zero freq */
803
804 /* Determine the bit length frequencies for literal and distance trees */
805 scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code);
806 scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code);
807
808 /* Build the bit length tree: */
809 build_tree(s, (tree_desc *)(&(s->bl_desc)));
810 /* opt_len now includes the length of the tree representations, except
811 * the lengths of the bit lengths codes and the 5+5+4 bits for the counts.
812 */
813
814 /* Determine the number of bit length codes to send. The pkzip format
815 * requires that at least 4 bit length codes be sent. (appnote.txt says
816 * 3 but the actual value used is 4.)
817 */
818 for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) {
819 if (s->bl_tree[bl_order[max_blindex]].Len != 0) break;
820 }
821 /* Update opt_len to include the bit length tree and counts */
822 s->opt_len += 3*((ulg)max_blindex+1) + 5+5+4;
823 Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld",
824 s->opt_len, s->static_len));
825
826 return max_blindex;
827 }
828
829 /* ===========================================================================
830 * Send the header for a block using dynamic Huffman trees: the counts, the
831 * lengths of the bit length codes, the literal tree and the distance tree.
832 * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
833 */
834 local void send_all_trees(s, lcodes, dcodes, blcodes)
835 deflate_state *s;
836 int lcodes, dcodes, blcodes; /* number of codes for each tree */
837 {
838 int rank; /* index in bl_order */
839
840 Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes");
841 Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES,
842 "too many codes");
843 Tracev((stderr, "\nbl counts: "));
844 send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */
845 send_bits(s, dcodes-1, 5);
846 send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */
847 for (rank = 0; rank < blcodes; rank++) {
848 Tracev((stderr, "\nbl code %2d ", bl_order[rank]));
849 send_bits(s, s->bl_tree[bl_order[rank]].Len, 3);
850 }
851 Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent));
852
853 send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */
854 Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent));
855
856 send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */
857 Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent));
858 }
859
860 /* ===========================================================================
861 * Send a stored block
862 */
863 void ZLIB_INTERNAL _tr_stored_block(s, buf, stored_len, last)
864 deflate_state *s;
865 charf *buf; /* input block */
866 ulg stored_len; /* length of input block */
867 int last; /* one if this is the last block for a file */
868 {
869 send_bits(s, (STORED_BLOCK<<1)+last, 3); /* send block type */
870 bi_windup(s); /* align on byte boundary */
871 put_short(s, (ush)stored_len);
872 put_short(s, (ush)~stored_len);
873 zmemcpy(s->pending_buf + s->pending, (Bytef *)buf, stored_len);
874 s->pending += stored_len;
875 #ifdef ZLIB_DEBUG
876 s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;
877 s->compressed_len += (stored_len + 4) << 3;
878 s->bits_sent += 2*16;
879 s->bits_sent += stored_len<<3;
880 #endif
881 }
882
883 /* ===========================================================================
884 * Flush the bits in the bit buffer to pending output (leaves at most 7 bits)
885 */
886 void ZLIB_INTERNAL _tr_flush_bits(s)
887 deflate_state *s;
888 {
889 bi_flush(s);
890 }
891
892 /* ===========================================================================
893 * Send one empty static block to give enough lookahead for inflate.
894 * This takes 10 bits, of which 7 may remain in the bit buffer.
895 */
896 void ZLIB_INTERNAL _tr_align(s)
897 deflate_state *s;
898 {
899 send_bits(s, STATIC_TREES<<1, 3);
900 send_code(s, END_BLOCK, static_ltree);
901 #ifdef ZLIB_DEBUG
902 s->compressed_len += 10L; /* 3 for block type, 7 for EOB */
903 #endif
904 bi_flush(s);
905 }
906
907 /* ===========================================================================
908 * Determine the best encoding for the current block: dynamic trees, static
909 * trees or store, and write out the encoded block.
910 */
911 void ZLIB_INTERNAL _tr_flush_block(s, buf, stored_len, last)
912 deflate_state *s;
913 charf *buf; /* input block, or NULL if too old */
914 ulg stored_len; /* length of input block */
915 int last; /* one if this is the last block for a file */
916 {
917 ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
918 int max_blindex = 0; /* index of last bit length code of non zero freq */
919
920 /* Build the Huffman trees unless a stored block is forced */
921 if (s->level > 0) {
922
923 /* Check if the file is binary or text */
924 if (s->strm->data_type == Z_UNKNOWN)
925 s->strm->data_type = detect_data_type(s);
926
927 /* Construct the literal and distance trees */
928 build_tree(s, (tree_desc *)(&(s->l_desc)));
929 Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len,
930 s->static_len));
931
932 build_tree(s, (tree_desc *)(&(s->d_desc)));
933 Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len,
934 s->static_len));
935 /* At this point, opt_len and static_len are the total bit lengths of
936 * the compressed block data, excluding the tree representations.
937 */
938
939 /* Build the bit length tree for the above two trees, and get the index
940 * in bl_order of the last bit length code to send.
941 */
942 max_blindex = build_bl_tree(s);
943
944 /* Determine the best encoding. Compute the block lengths in bytes. */
945 opt_lenb = (s->opt_len+3+7)>>3;
946 static_lenb = (s->static_len+3+7)>>3;
947
948 Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",
949 opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,
950 s->last_lit));
951
952 if (static_lenb <= opt_lenb) opt_lenb = static_lenb;
953
954 } else {
955 Assert(buf != (char*)0, "lost buf");
956 opt_lenb = static_lenb = stored_len + 5; /* force a stored block */
957 }
958
959 #ifdef FORCE_STORED
960 if (buf != (char*)0) { /* force stored block */
961 #else
962 if (stored_len+4 <= opt_lenb && buf != (char*)0) {
963 /* 4: two words for the lengths */
964 #endif
965 /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
966 * Otherwise we can't have processed more than WSIZE input bytes since
967 * the last block flush, because compression would have been
968 * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
969 * transform a block into a stored block.
970 */
971 _tr_stored_block(s, buf, stored_len, last);
972
973 #ifdef FORCE_STATIC
974 } else if (static_lenb >= 0) { /* force static trees */
975 #else
976 } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) {
977 #endif
978 send_bits(s, (STATIC_TREES<<1)+last, 3);
979 compress_block(s, (const ct_data *)static_ltree,
980 (const ct_data *)static_dtree);
981 #ifdef ZLIB_DEBUG
982 s->compressed_len += 3 + s->static_len;
983 #endif
984 } else {
985 send_bits(s, (DYN_TREES<<1)+last, 3);
986 send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1,
987 max_blindex+1);
988 compress_block(s, (const ct_data *)s->dyn_ltree,
989 (const ct_data *)s->dyn_dtree);
990 #ifdef ZLIB_DEBUG
991 s->compressed_len += 3 + s->opt_len;
992 #endif
993 }
994 Assert (s->compressed_len == s->bits_sent, "bad compressed size");
995 /* The above check is made mod 2^32, for files larger than 512 MB
996 * and uLong implemented on 32 bits.
997 */
998 init_block(s);
999
1000 if (last) {
1001 bi_windup(s);
1002 #ifdef ZLIB_DEBUG
1003 s->compressed_len += 7; /* align on byte boundary */
1004 #endif
1005 }
1006 Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3,
1007 s->compressed_len-7*last));
1008 }
1009
1010 /* ===========================================================================
1011 * Save the match info and tally the frequency counts. Return true if
1012 * the current block must be flushed.
1013 */
1014 int ZLIB_INTERNAL _tr_tally (s, dist, lc)
1015 deflate_state *s;
1016 unsigned dist; /* distance of matched string */
1017 unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */
1018 {
1019 s->d_buf[s->last_lit] = (ush)dist;
1020 s->l_buf[s->last_lit++] = (uch)lc;
1021 if (dist == 0) {
1022 /* lc is the unmatched char */
1023 s->dyn_ltree[lc].Freq++;
1024 } else {
1025 s->matches++;
1026 /* Here, lc is the match length - MIN_MATCH */
1027 dist--; /* dist = match distance - 1 */
1028 Assert((ush)dist < (ush)MAX_DIST(s) &&
1029 (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) &&
1030 (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match");
1031
1032 s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++;
1033 s->dyn_dtree[d_code(dist)].Freq++;
1034 }
1035
1036 #ifdef TRUNCATE_BLOCK
1037 /* Try to guess if it is profitable to stop the current block here */
1038 if ((s->last_lit & 0x1fff) == 0 && s->level > 2) {
1039 /* Compute an upper bound for the compressed length */
1040 ulg out_length = (ulg)s->last_lit*8L;
1041 ulg in_length = (ulg)((long)s->strstart - s->block_start);
1042 int dcode;
1043 for (dcode = 0; dcode < D_CODES; dcode++) {
1044 out_length += (ulg)s->dyn_dtree[dcode].Freq *
1045 (5L+extra_dbits[dcode]);
1046 }
1047 out_length >>= 3;
1048 Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ",
1049 s->last_lit, in_length, out_length,
1050 100L - out_length*100L/in_length));
1051 if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1;
1052 }
1053 #endif
1054 return (s->last_lit == s->lit_bufsize-1);
1055 /* We avoid equality with lit_bufsize because of wraparound at 64K
1056 * on 16 bit machines and because stored blocks are restricted to
1057 * 64K-1 bytes.
1058 */
1059 }
1060
1061 /* ===========================================================================
1062 * Send the block data compressed using the given Huffman trees
1063 */
1064 local void compress_block(s, ltree, dtree)
1065 deflate_state *s;
1066 const ct_data *ltree; /* literal tree */
1067 const ct_data *dtree; /* distance tree */
1068 {
1069 unsigned dist; /* distance of matched string */
1070 int lc; /* match length or unmatched char (if dist == 0) */
1071 unsigned lx = 0; /* running index in l_buf */
1072 unsigned code; /* the code to send */
1073 int extra; /* number of extra bits to send */
1074
1075 if (s->last_lit != 0) do {
1076 dist = s->d_buf[lx];
1077 lc = s->l_buf[lx++];
1078 if (dist == 0) {
1079 send_code(s, lc, ltree); /* send a literal byte */
1080 Tracecv(isgraph(lc), (stderr," '%c' ", lc));
1081 } else {
1082 /* Here, lc is the match length - MIN_MATCH */
1083 code = _length_code[lc];
1084 send_code(s, code+LITERALS+1, ltree); /* send the length code */
1085 extra = extra_lbits[code];
1086 if (extra != 0) {
1087 lc -= base_length[code];
1088 send_bits(s, lc, extra); /* send the extra length bits */
1089 }
1090 dist--; /* dist is now the match distance - 1 */
1091 code = d_code(dist);
1092 Assert (code < D_CODES, "bad d_code");
1093
1094 send_code(s, code, dtree); /* send the distance code */
1095 extra = extra_dbits[code];
1096 if (extra != 0) {
1097 dist -= (unsigned)base_dist[code];
1098 send_bits(s, dist, extra); /* send the extra distance bits */
1099 }
1100 } /* literal or match pair ? */
1101
1102 /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
1103 Assert((uInt)(s->pending) < s->lit_bufsize + 2*lx,
1104 "pendingBuf overflow");
1105
1106 } while (lx < s->last_lit);
1107
1108 send_code(s, END_BLOCK, ltree);
1109 }
1110
1111 /* ===========================================================================
1112 * Check if the data type is TEXT or BINARY, using the following algorithm:
1113 * - TEXT if the two conditions below are satisfied:
1114 * a) There are no non-portable control characters belonging to the
1115 * "black list" (0..6, 14..25, 28..31).
1116 * b) There is at least one printable character belonging to the
1117 * "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).
1118 * - BINARY otherwise.
1119 * - The following partially-portable control characters form a
1120 * "gray list" that is ignored in this detection algorithm:
1121 * (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}).
1122 * IN assertion: the fields Freq of dyn_ltree are set.
1123 */
1124 local int detect_data_type(s)
1125 deflate_state *s;
1126 {
1127 /* black_mask is the bit mask of black-listed bytes
1128 * set bits 0..6, 14..25, and 28..31
1129 * 0xf3ffc07f = binary 11110011111111111100000001111111
1130 */
1131 unsigned long black_mask = 0xf3ffc07fUL;
1132 int n;
1133
1134 /* Check for non-textual ("black-listed") bytes. */
1135 for (n = 0; n <= 31; n++, black_mask >>= 1)
1136 if ((black_mask & 1) && (s->dyn_ltree[n].Freq != 0))
1137 return Z_BINARY;
1138
1139 /* Check for textual ("white-listed") bytes. */
1140 if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0
1141 || s->dyn_ltree[13].Freq != 0)
1142 return Z_TEXT;
1143 for (n = 32; n < LITERALS; n++)
1144 if (s->dyn_ltree[n].Freq != 0)
1145 return Z_TEXT;
1146
1147 /* There are no "black-listed" or "white-listed" bytes:
1148 * this stream either is empty or has tolerated ("gray-listed") bytes only.
1149 */
1150 return Z_BINARY;
1151 }
1152
1153 /* ===========================================================================
1154 * Reverse the first len bits of a code, using straightforward code (a faster
1155 * method would use a table)
1156 * IN assertion: 1 <= len <= 15
1157 */
1158 local unsigned bi_reverse(code, len)
1159 unsigned code; /* the value to invert */
1160 int len; /* its bit length */
1161 {
1162 register unsigned res = 0;
1163 do {
1164 res |= code & 1;
1165 code >>= 1, res <<= 1;
1166 } while (--len > 0);
1167 return res >> 1;
1168 }
1169
1170 /* ===========================================================================
1171 * Flush the bit buffer, keeping at most 7 bits in it.
1172 */
1173 local void bi_flush(s)
1174 deflate_state *s;
1175 {
1176 if (s->bi_valid == 16) {
1177 put_short(s, s->bi_buf);
1178 s->bi_buf = 0;
1179 s->bi_valid = 0;
1180 } else if (s->bi_valid >= 8) {
1181 put_byte(s, (Byte)s->bi_buf);
1182 s->bi_buf >>= 8;
1183 s->bi_valid -= 8;
1184 }
1185 }
1186
1187 /* ===========================================================================
1188 * Flush the bit buffer and align the output on a byte boundary
1189 */
1190 local void bi_windup(s)
1191 deflate_state *s;
1192 {
1193 if (s->bi_valid > 8) {
1194 put_short(s, s->bi_buf);
1195 } else if (s->bi_valid > 0) {
1196 put_byte(s, (Byte)s->bi_buf);
1197 }
1198 s->bi_buf = 0;
1199 s->bi_valid = 0;
1200 #ifdef ZLIB_DEBUG
1201 s->bits_sent = (s->bits_sent+7) & ~7;
1202 #endif
1203 }