"Fossies" - the Fresh Open Source Software Archive

Member "xdelta3-3.0.11/xdelta3-djw.h" (3 Nov 2015, 47489 Bytes) of package /linux/misc/xdelta3-3.0.11.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "xdelta3-djw.h" see the Fossies "Dox" file reference documentation.

    1 /* xdelta 3 - delta compression tools and library
    2  * Copyright (C) 2002, 2006, 2007.  Joshua P. MacDonald
    3  *
    4  *  This program is free software; you can redistribute it and/or modify
    5  *  it under the terms of the GNU General Public License as published by
    6  *  the Free Software Foundation; either version 2 of the License, or
    7  *  (at your option) any later version.
    8  *
    9  *  This program is distributed in the hope that it will be useful,
   10  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
   11  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   12  *  GNU General Public License for more details.
   13  *
   14  *  You should have received a copy of the GNU General Public License
   15  *  along with this program; if not, write to the Free Software
   16  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   17  */
   18 
   19 /* TODO: This code needs a thorough round of commenting.  There is
   20  * some slop in the declaration of arrays, which are maybe one element
   21  * larger than they need to be and comments would help clear it up. */
   22 
   23 #ifndef _XDELTA3_DJW_H_
   24 #define _XDELTA3_DJW_H_
   25 
   26 /* The following people deserve much credit for the algorithms and
   27  * techniques contained in this file:
   28 
   29  Julian Seward
   30  Bzip2 sources, implementation of the multi-table Huffman technique.
   31 
   32  Jean-loup Gailly and Mark Adler and L. Peter Deutsch
   33  Zlib source code, RFC 1951
   34 
   35  Daniel S. Hirschberg and Debra A. LeLewer
   36  "Efficient Decoding of Prefix Codes"
   37  Communications of the ACM, April 1990 33(4).
   38 
   39  David J. Wheeler
   40  Program bred3.c, bexp3 and accompanying documents bred3.ps, huff.ps.
   41  This contains the idea behind the multi-table Huffman and 1-2 coding
   42  techniques.
   43  ftp://ftp.cl.cam.ac.uk/users/djw3/
   44 
   45 */
   46 
   47 /* OPT: during the multi-table iteration, pick the worst-overall
   48  * performing table and replace it with exactly the frequencies of the
   49  * worst-overall performing sector or N-worst performing sectors. */
   50 
   51 /* REF: See xdfs-0.222 and xdfs-0.226 for some old experiments with
   52  * the Bzip prefix coding strategy.  xdfs-0.256 contains the last of
   53  * the other-format tests, including RFC1950 and the RFC1950+MTF
   54  * tests. */
   55 
   56 #define DJW_MAX_CODELEN      20U /* Maximum length of an alphabet code. */
   57 
   58 /* Code lengths are themselves code-length encoded, so the total number of
   59  * codes is: [RUN_0, RUN_1, 1-DJW_MAX_CODELEN] */
   60 #define DJW_TOTAL_CODES      (DJW_MAX_CODELEN+2)
   61 
   62 #define RUN_0                0U /* Symbols used in MTF+1/2 coding. */
   63 #define RUN_1                1U
   64 
   65 /* Number of code lengths always encoded (djw_encode_basic array) */
   66 #define DJW_BASIC_CODES      5U  
   67 #define DJW_RUN_CODES        2U  /* Number of run codes */
   68 
   69 /* Offset of extra codes */
   70 #define DJW_EXTRA_12OFFSET   (DJW_BASIC_CODES + DJW_RUN_CODES)
   71 
   72 /* Number of optionally encoded code lengths (djw_encode_extra array) */
   73 #define DJW_EXTRA_CODES      15U
   74 
   75 /* Number of bits to code [0-DJW_EXTRA_CODES] */
   76 #define DJW_EXTRA_CODE_BITS  4U  
   77 
   78 #define DJW_MAX_GROUPS       8U  /* Max number of group coding tables */
   79 #define DJW_GROUP_BITS       3U  /* Number of bits to code [1-DJW_MAX_GROUPS] */
   80 
   81 #define DJW_SECTORSZ_MULT     5U  /* Multiplier for encoded sectorsz */
   82 #define DJW_SECTORSZ_BITS     5U  /* Number of bits to code group size */
   83 #define DJW_SECTORSZ_MAX      ((1U << DJW_SECTORSZ_BITS) * DJW_SECTORSZ_MULT)
   84 
   85 /* Maximum number of iterations to find group tables. */
   86 #define DJW_MAX_ITER         6U
   87 /* Minimum number of bits an iteration must reduce coding by. */
   88 #define DJW_MIN_IMPROVEMENT  20U 
   89 
   90 /* Maximum code length of a prefix code length */
   91 #define DJW_MAX_CLCLEN       15U
   92 
   93 /* Number of bits to code [0-DJW_MAX_CLCLEN] */
   94 #define DJW_CLCLEN_BITS      4U  
   95 
   96 #define DJW_MAX_GBCLEN       7U  /* Maximum code length of a group selector */
   97 
   98 /* Number of bits to code [0-DJW_MAX_GBCLEN]
   99  * TODO: Actually, should never have zero code lengths here, or else a group
  100  * went unused.  Write a test for this: if a group goes unused, eliminate
  101  * it? */
  102 #define DJW_GBCLEN_BITS      3U
  103 
  104 /* It has to save at least this many bits... */
  105 #define EFFICIENCY_BITS      16U
  106 
  107 typedef struct _djw_stream   djw_stream;
  108 typedef struct _djw_heapen   djw_heapen;
  109 typedef struct _djw_prefix   djw_prefix;
  110 typedef uint32_t             djw_weight;
  111 
  112 struct _djw_heapen
  113 {
  114   uint32_t depth;
  115   uint32_t freq;
  116   uint32_t parent;
  117 };
  118 
  119 struct _djw_prefix
  120 {
  121   usize_t   scount;
  122   uint8_t *symbol;
  123   usize_t   mcount;
  124   uint8_t *mtfsym;
  125   uint8_t *repcnt;
  126 };
  127 
  128 struct _djw_stream
  129 {
  130   int unused;
  131 };
  132 
  133 /* Each Huffman table consists of 256 "code length" (CLEN) codes,
  134  * which are themselves Huffman coded after eliminating repeats and
  135  * move-to-front coding.  The prefix consists of all the CLEN codes in
  136  * djw_encode_basic plus a 4-bit value stating how many of the
  137  * djw_encode_extra codes are actually coded (the rest are presumed
  138  * zero, or unused CLEN codes).
  139  *
  140  * These values of these two arrays were arrived at by studying the
  141  * distribution of min and max clen over a collection of DATA, INST,
  142  * and ADDR inputs.  The goal is to specify the order of
  143  * djw_extra_codes that is most likely to minimize the number of extra
  144  * codes that must be encoded.
  145  *
  146  * Results: 158896 sections were counted by compressing files (window
  147  * size 512K) listed with: `find / -type f ( -user jmacd -o -perm +444
  148  * )`
  149  *
  150  * The distribution of CLEN codes for each efficient invocation of the
  151  * secondary compressor (taking the best number of groups/sector size)
  152  * was recorded.  Then we look at the distribution of min and max clen
  153  * values, counting the number of times the value C_low is less than
  154  * the min and C_high is greater than the max.  Values >= C_high and
  155  * <= C_low will not have their lengths coded.  The results are sorted
  156  * and the least likely 15 are placed into the djw_encode_extra[]
  157  * array in order.  These values are used as the initial MTF ordering.
  158 
  159  clow[1] = 155119
  160  clow[2] = 140325
  161  clow[3] = 84072
  162  ---
  163  clow[4] = 7225
  164  clow[5] = 1093
  165  clow[6] = 215
  166  ---
  167  chigh[4] = 1
  168  chigh[5] = 30
  169  chigh[6] = 218
  170  chigh[7] = 2060
  171  chigh[8] = 13271
  172  ---
  173  chigh[9] = 39463
  174  chigh[10] = 77360
  175  chigh[11] = 118298
  176  chigh[12] = 141360
  177  chigh[13] = 154086
  178  chigh[14] = 157967
  179  chigh[15] = 158603
  180  chigh[16] = 158864
  181  chigh[17] = 158893
  182  chigh[18] = 158895
  183  chigh[19] = 158896
  184  chigh[20] = 158896
  185 
  186 */
  187 
  188 static const uint8_t djw_encode_12extra[DJW_EXTRA_CODES] =
  189   {
  190     9, 10, 3, 11, 2, 12, 13, 1, 14, 15, 16, 17, 18, 19, 20,
  191   };
  192 
  193 static const uint8_t djw_encode_12basic[DJW_BASIC_CODES] =
  194   {
  195     4, 5, 6, 7, 8,
  196   };
  197 
  198 /*********************************************************************/
  199 /*                              DECLS                                */
  200 /*********************************************************************/
  201 
  202 static djw_stream*     djw_alloc           (xd3_stream *stream);
  203 static int             djw_init            (xd3_stream *stream, 
  204                         djw_stream *h,
  205                         int is_encode);
  206 static void            djw_destroy         (xd3_stream *stream,
  207                         djw_stream *h);
  208 
  209 #if XD3_ENCODER
  210 static int             xd3_encode_huff     (xd3_stream   *stream,
  211                         djw_stream  *sec_stream,
  212                         xd3_output   *input,
  213                         xd3_output   *output,
  214                         xd3_sec_cfg  *cfg);
  215 #endif
  216 
  217 static int             xd3_decode_huff     (xd3_stream     *stream,
  218                         djw_stream    *sec_stream,
  219                         const uint8_t **input,
  220                         const uint8_t  *const input_end,
  221                         uint8_t       **output,
  222                         const uint8_t  *const output_end);
  223 
  224 /*********************************************************************/
  225 /*                             HUFFMAN                               */
  226 /*********************************************************************/
  227 
  228 static djw_stream*
  229 djw_alloc (xd3_stream *stream)
  230 {
  231   return (djw_stream*) xd3_alloc (stream, sizeof (djw_stream), 1);
  232 }
  233 
  234 static int
  235 djw_init (xd3_stream *stream, djw_stream *h, int is_encode)
  236 {
  237   /* Fields are initialized prior to use. */
  238   return 0;
  239 }
  240 
  241 static void
  242 djw_destroy (xd3_stream *stream,
  243          djw_stream *h)
  244 {
  245   xd3_free (stream, h);
  246 }
  247 
  248 
  249 /*********************************************************************/
  250 /*                               HEAP                                */
  251 /*********************************************************************/
  252 
  253 static inline int
  254 heap_less (const djw_heapen *a, const djw_heapen *b)
  255 {
  256   return a->freq   < b->freq ||
  257     (a->freq  == b->freq &&
  258      a->depth  < b->depth);
  259 }
  260 
  261 static inline void
  262 heap_insert (usize_t *heap, const djw_heapen *ents, usize_t p, const usize_t e)
  263 {
  264   /* Insert ents[e] into next slot heap[p] */
  265   usize_t pp = p/2; /* P's parent */
  266 
  267   while (heap_less (& ents[e], & ents[heap[pp]]))
  268     {
  269       heap[p] = heap[pp];
  270       p  = pp;
  271       pp = p/2;
  272     }
  273 
  274   heap[p] = e;
  275 }
  276 
  277 static inline djw_heapen*
  278 heap_extract (usize_t *heap, const djw_heapen *ents, usize_t heap_last)
  279 {
  280   usize_t smallest = heap[1];
  281   usize_t p, pc, t;
  282 
  283   /* Caller decrements heap_last, so heap_last+1 is the replacement elt. */
  284   heap[1] = heap[heap_last+1];
  285 
  286   /* Re-heapify */
  287   for (p = 1; ; p = pc)
  288     {
  289       pc = p*2;
  290 
  291       /* Reached bottom of heap */
  292       if (pc > heap_last) { break; }
  293 
  294       /* See if second child is smaller. */
  295       if (pc < heap_last && heap_less (& ents[heap[pc+1]], & ents[heap[pc]]))
  296     {
  297       pc += 1;
  298     }
  299 
  300       /* If pc is not smaller than p, heap property re-established. */
  301       if (! heap_less (& ents[heap[pc]], & ents[heap[p]])) { break; }
  302 
  303       t = heap[pc];
  304       heap[pc] = heap[p];
  305       heap[p] = t;
  306     }
  307 
  308   return (djw_heapen*) & ents[smallest];
  309 }
  310 
  311 #if XD3_DEBUG
  312 static void
  313 heap_check (usize_t *heap, djw_heapen *ents, usize_t heap_last)
  314 {
  315   usize_t i;
  316   for (i = 1; i <= heap_last; i += 1)
  317     {
  318       /* Heap property: child not less than parent */
  319       XD3_ASSERT (! heap_less (& ents[heap[i]], & ents[heap[i/2]]));
  320 
  321       IF_DEBUG2 (DP(RINT "heap[%d] = %u\n", i, ents[heap[i]].freq));
  322     }
  323 }
  324 #endif
  325 
  326 /*********************************************************************/
  327 /*                             MTF, 1/2                              */
  328 /*********************************************************************/
  329 
  330 static inline usize_t
  331 djw_update_mtf (uint8_t *mtf, usize_t mtf_i)
  332 {
  333   int k;
  334   usize_t sym = mtf[mtf_i];
  335 
  336   for (k = mtf_i; k != 0; k -= 1) { mtf[k] = mtf[k-1]; }
  337 
  338   mtf[0] = sym;
  339   return sym;
  340 }
  341 
  342 static inline void
  343 djw_update_1_2 (int *mtf_run, usize_t *mtf_i,
  344         uint8_t *mtfsym, djw_weight *freq)
  345 {
  346   int code;
  347   
  348   do
  349     {
  350       /* Offset by 1, since any number of RUN_ symbols implies run>0... */
  351       *mtf_run -= 1;
  352 
  353       code = (*mtf_run & 1) ? RUN_1 : RUN_0;
  354 
  355       mtfsym[(*mtf_i)++] = code;
  356       freq[code] += 1;
  357       *mtf_run >>= 1;
  358     }
  359   while (*mtf_run >= 1);
  360 
  361   *mtf_run = 0;
  362 }
  363 
  364 static void
  365 djw_init_clen_mtf_1_2 (uint8_t *clmtf)
  366 {
  367   usize_t i, cl_i = 0;
  368 
  369   clmtf[cl_i++] = 0;
  370   for (i = 0; i < DJW_BASIC_CODES; i += 1)
  371     {
  372       clmtf[cl_i++] = djw_encode_12basic[i];
  373     }
  374   for (i = 0; i < DJW_EXTRA_CODES; i += 1)
  375     {
  376       clmtf[cl_i++] = djw_encode_12extra[i];
  377     }
  378 }
  379 
  380 /*********************************************************************/
  381 /*                           PREFIX CODES                            */
  382 /*********************************************************************/
  383 #if XD3_ENCODER
  384 static usize_t
  385 djw_build_prefix (const djw_weight *freq, uint8_t *clen, usize_t asize, usize_t maxlen)
  386 {
  387   /* Heap with 0th entry unused, prefix tree with up to ALPHABET_SIZE-1
  388    * internal nodes, never more than ALPHABET_SIZE entries actually in the
  389    * heap (minimum weight subtrees during prefix construction).  First
  390    * ALPHABET_SIZE entries are the actual symbols, next ALPHABET_SIZE-1 are
  391    * internal nodes. */
  392   djw_heapen ents[ALPHABET_SIZE * 2];
  393   usize_t heap[ALPHABET_SIZE + 1];
  394 
  395   usize_t heap_last; /* Index of the last _valid_ heap entry. */
  396   usize_t ents_size; /* Number of entries, including 0th fake entry */
  397   usize_t  overflow;  /* Number of code lengths that overflow */
  398   uint32_t total_bits;
  399   usize_t i;
  400 
  401   IF_DEBUG (uint32_t first_bits = 0);
  402 
  403   /* Insert real symbol frequences. */
  404   for (i = 0; i < asize; i += 1)
  405     {
  406       ents[i+1].freq = freq[i];
  407       IF_DEBUG2 (DP(RINT "ents[%d] = freq[%d] = %d\n",
  408             i+1, i, freq[i]));
  409     }
  410 
  411  again:
  412 
  413   /* The loop is re-entered each time an overflow occurs.  Re-initialize... */
  414   heap_last = 0;
  415   ents_size = 1;
  416   overflow  = 0;
  417   total_bits = 0;
  418 
  419   /* 0th entry terminates the while loop in heap_insert (it's the parent of
  420    * the smallest element, always less-than) */
  421   heap[0] = 0;
  422   ents[0].depth = 0;
  423   ents[0].freq  = 0;
  424 
  425   /* Initial heap. */
  426   for (i = 0; i < asize; i += 1, ents_size += 1)
  427     {
  428       ents[ents_size].depth  = 0;
  429       ents[ents_size].parent = 0;
  430 
  431       if (ents[ents_size].freq != 0)
  432     {
  433       heap_insert (heap, ents, ++heap_last, ents_size);
  434     }
  435     }
  436 
  437   IF_DEBUG (heap_check (heap, ents, heap_last));
  438 
  439   /* Must be at least one symbol, or else we can't get here. */
  440   XD3_ASSERT (heap_last != 0);
  441 
  442   /* If there is only one symbol, fake a second to prevent zero-length
  443    * codes. */
  444   if (heap_last == 1)
  445     {
  446       /* Pick either the first or last symbol. */
  447       int s = freq[0] ? asize-1 : 0;
  448       ents[s+1].freq = 1;
  449       goto again;
  450     }
  451 
  452   /* Build prefix tree. */
  453   while (heap_last > 1)
  454     {
  455       djw_heapen *h1 = heap_extract (heap, ents, --heap_last);
  456       djw_heapen *h2 = heap_extract (heap, ents, --heap_last);
  457 
  458       ents[ents_size].freq   = h1->freq + h2->freq;
  459       ents[ents_size].depth  = 1 + xd3_max (h1->depth, h2->depth);
  460       ents[ents_size].parent = 0;
  461 
  462       h1->parent = h2->parent = ents_size;
  463 
  464       heap_insert (heap, ents, ++heap_last, ents_size++);
  465     }
  466 
  467   IF_DEBUG (heap_check (heap, ents, heap_last));
  468 
  469   /* Now compute prefix code lengths, counting parents. */
  470   for (i = 1; i < asize+1; i += 1)
  471     {
  472       usize_t b = 0;
  473 
  474       if (ents[i].freq != 0)
  475     {
  476       usize_t p = i;
  477 
  478       while ((p = ents[p].parent) != 0) { b += 1; }
  479 
  480       if (b > maxlen) { overflow = 1; }
  481 
  482       total_bits += b * freq[i-1];
  483     }
  484 
  485       /* clen is 0-origin, unlike ents. */
  486       IF_DEBUG2 (DP(RINT "clen[%d] = %d\n", i-1, b));
  487       clen[i-1] = b;
  488     }
  489 
  490   IF_DEBUG (if (first_bits == 0) first_bits = total_bits);
  491 
  492   if (! overflow)
  493     {
  494       IF_DEBUG2 (if (first_bits != total_bits)
  495       {
  496     DP(RINT "code length overflow changed %u bits\n",
  497        (usize_t)(total_bits - first_bits));
  498       });
  499       return total_bits;
  500     }
  501 
  502   /* OPT: There is a non-looping way to fix overflow shown in zlib, but this
  503    * is easier (for now), as done in bzip2. */
  504   for (i = 1; i < asize+1; i += 1)
  505     {
  506       ents[i].freq = ents[i].freq / 2 + 1;
  507     }
  508 
  509   goto again;
  510 }
  511 
  512 static void
  513 djw_build_codes (usize_t *codes, const uint8_t *clen, usize_t asize, usize_t abs_max)
  514 {
  515   usize_t i, l;
  516   usize_t min_clen = DJW_MAX_CODELEN;
  517   usize_t max_clen = 0;
  518   usize_t code = 0;
  519 
  520   /* Find the min and max code length */
  521   for (i = 0; i < asize; i += 1)
  522     {
  523       if (clen[i] > 0 && clen[i] < min_clen)
  524     {
  525       min_clen = clen[i];
  526     }
  527 
  528       max_clen = xd3_max (max_clen, (usize_t) clen[i]);
  529     }
  530 
  531   XD3_ASSERT (max_clen <= abs_max);
  532 
  533   /* Generate a code for each symbol with the appropriate length. */
  534   for (l = min_clen; l <= max_clen; l += 1)
  535     {
  536       for (i = 0; i < asize; i += 1)
  537     {
  538       if (clen[i] == l)
  539         {
  540           codes[i] = code++;
  541         } 
  542     }
  543 
  544       code <<= 1;
  545     }
  546 
  547   IF_DEBUG2 ({
  548       for (i = 0; i < asize; i += 1)
  549     {
  550       DP(RINT "code[%d] = %u\n", i, codes[i]);
  551     }
  552     });
  553 }
  554 
  555 /*********************************************************************/
  556 /*                MOVE-TO-FRONT                          */
  557 /*********************************************************************/
  558 static void
  559 djw_compute_mtf_1_2 (djw_prefix  *prefix,
  560              uint8_t     *mtf,
  561              djw_weight  *freq_out,
  562              usize_t      nsym)
  563 {
  564   size_t i, j, k;
  565   usize_t sym;
  566   usize_t size = prefix->scount;
  567   usize_t mtf_i = 0;
  568   int mtf_run = 0;
  569 
  570   /* This +2 is for the RUN_0, RUN_1 codes */
  571   memset (freq_out, 0, sizeof (freq_out[0]) * (nsym+2));
  572 
  573   for (i = 0; i < size; )
  574     {
  575       /* OPT: Bzip optimizes this algorithm a little by effectively checking
  576        * j==0 before the MTF update. */
  577       sym = prefix->symbol[i++];
  578 
  579       for (j = 0; mtf[j] != sym; j += 1) { }
  580 
  581       XD3_ASSERT (j <= nsym);
  582 
  583       for (k = j; k >= 1; k -= 1) { mtf[k] = mtf[k-1]; }
  584 
  585       mtf[0] = sym;
  586 
  587       if (j == 0)
  588     {
  589       mtf_run += 1;
  590       continue;
  591     }
  592 
  593       if (mtf_run > 0)
  594     {
  595       djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out);
  596     }
  597 
  598       /* Non-zero symbols are offset by RUN_1 */
  599       prefix->mtfsym[mtf_i++] = (uint8_t)(j+RUN_1);
  600       freq_out[j+RUN_1] += 1;
  601     }
  602 
  603   if (mtf_run > 0)
  604     {
  605       djw_update_1_2 (& mtf_run, & mtf_i, prefix->mtfsym, freq_out);
  606     }
  607 
  608   prefix->mcount = mtf_i;
  609 }
  610 
  611 /* Counts character frequencies of the input buffer, returns the size. */
  612 static usize_t
  613 djw_count_freqs (djw_weight *freq, xd3_output *input)
  614 {
  615   xd3_output *in;
  616   usize_t size = 0;
  617 
  618   memset (freq, 0, sizeof (freq[0]) * ALPHABET_SIZE);
  619 
  620   for (in = input; in; in = in->next_page)
  621     {
  622       const uint8_t *p     = in->base;
  623       const uint8_t *p_max = p + in->next;
  624 
  625       size += in->next;
  626 
  627       do
  628     {
  629       ++freq[*p];
  630     }
  631       while (++p < p_max);
  632     }
  633 
  634   IF_DEBUG2 ({int i;
  635   DP(RINT "freqs: ");
  636   for (i = 0; i < ALPHABET_SIZE; i += 1)
  637     {
  638       DP(RINT "%u ", freq[i]);
  639     }
  640   DP(RINT "\n");});
  641 
  642   return size;
  643 }
  644 
  645 static void
  646 djw_compute_multi_prefix (usize_t     groups,
  647               uint8_t     clen[DJW_MAX_GROUPS][ALPHABET_SIZE],
  648               djw_prefix *prefix)
  649 {
  650   usize_t gp, i;
  651       
  652   prefix->scount = ALPHABET_SIZE;
  653   memcpy (prefix->symbol, clen[0], ALPHABET_SIZE);
  654 
  655   for (gp = 1; gp < groups; gp += 1)
  656     {
  657       for (i = 0; i < ALPHABET_SIZE; i += 1)
  658     {
  659       if (clen[gp][i] == 0)
  660         {
  661           continue;
  662         }
  663 
  664       prefix->symbol[prefix->scount++] = clen[gp][i];
  665     }
  666     }
  667 }
  668 
  669 static void
  670 djw_compute_prefix_1_2 (djw_prefix *prefix, djw_weight *freq)
  671 {
  672   /* This +1 is for the 0 code-length. */
  673   uint8_t clmtf[DJW_MAX_CODELEN+1];
  674 
  675   djw_init_clen_mtf_1_2 (clmtf);
  676 
  677   djw_compute_mtf_1_2 (prefix, clmtf, freq, DJW_MAX_CODELEN);
  678 }
  679 
  680 static int
  681 djw_encode_prefix (xd3_stream   *stream,
  682            xd3_output  **output,
  683            bit_state    *bstate,
  684            djw_prefix   *prefix)
  685 {
  686   int ret;
  687   size_t i;
  688   usize_t num_to_encode;
  689   djw_weight clfreq[DJW_TOTAL_CODES];
  690   uint8_t    clclen[DJW_TOTAL_CODES];
  691   usize_t    clcode[DJW_TOTAL_CODES];
  692 
  693   /* Move-to-front encode prefix symbols, count frequencies */
  694   djw_compute_prefix_1_2 (prefix, clfreq);
  695 
  696   /* Compute codes */
  697   djw_build_prefix (clfreq, clclen, DJW_TOTAL_CODES, DJW_MAX_CLCLEN);
  698   djw_build_codes  (clcode, clclen, DJW_TOTAL_CODES, DJW_MAX_CLCLEN);
  699 
  700   /* Compute number of extra codes beyond basic ones for this template. */
  701   num_to_encode = DJW_TOTAL_CODES;
  702   while (num_to_encode > DJW_EXTRA_12OFFSET && clclen[num_to_encode-1] == 0)
  703     {
  704       num_to_encode -= 1;
  705     }
  706   XD3_ASSERT (num_to_encode - DJW_EXTRA_12OFFSET < (1 << DJW_EXTRA_CODE_BITS));
  707 
  708   /* Encode: # of extra codes */
  709   if ((ret = xd3_encode_bits (stream, output, bstate, DJW_EXTRA_CODE_BITS,
  710                   num_to_encode - DJW_EXTRA_12OFFSET)))
  711     {
  712       return ret;
  713     }
  714 
  715   /* Encode: MTF code lengths */
  716   for (i = 0; i < num_to_encode; i += 1)
  717     {
  718       if ((ret = xd3_encode_bits (stream, output, bstate,
  719                   DJW_CLCLEN_BITS, clclen[i])))
  720     {
  721       return ret;
  722     }
  723     }
  724 
  725   /* Encode: CLEN code lengths */
  726   for (i = 0; i < prefix->mcount; i += 1)
  727     {
  728       usize_t mtf_sym = prefix->mtfsym[i];
  729       usize_t bits    = clclen[mtf_sym];
  730       usize_t code    = clcode[mtf_sym];
  731 
  732       if ((ret = xd3_encode_bits (stream, output, bstate, bits, code)))
  733     {
  734       return ret;
  735     }
  736     }
  737 
  738   return 0;
  739 }
  740 
  741 static void
  742 djw_compute_selector_1_2 (djw_prefix *prefix,
  743               usize_t     groups,
  744               djw_weight *gbest_freq)
  745 {
  746   uint8_t grmtf[DJW_MAX_GROUPS];
  747   usize_t i;
  748 
  749   for (i = 0; i < groups; i += 1) { grmtf[i] = i; }
  750 
  751   djw_compute_mtf_1_2 (prefix, grmtf, gbest_freq, groups);
  752 }
  753 
  754 static int
  755 xd3_encode_howmany_groups (xd3_stream *stream,
  756                xd3_sec_cfg *cfg,
  757                usize_t input_size,
  758                usize_t *ret_groups,
  759                usize_t *ret_sector_size)
  760 {
  761   usize_t cfg_groups = 0;
  762   usize_t cfg_sector_size = 0;
  763   usize_t sugg_groups = 0;
  764   usize_t sugg_sector_size = 0;
  765 
  766   if (cfg->ngroups != 0)
  767     {
  768       if (cfg->ngroups > DJW_MAX_GROUPS)
  769     {
  770       stream->msg = "invalid secondary encoder group number";
  771       return XD3_INTERNAL;
  772     }
  773 
  774       cfg_groups = cfg->ngroups;
  775     }
  776 
  777   if (cfg->sector_size != 0)
  778     {
  779       if (cfg->sector_size < DJW_SECTORSZ_MULT ||
  780       cfg->sector_size > DJW_SECTORSZ_MAX ||
  781       (cfg->sector_size % DJW_SECTORSZ_MULT) != 0)
  782     {
  783       stream->msg = "invalid secondary encoder sector size";
  784       return XD3_INTERNAL;
  785     }
  786 
  787       cfg_sector_size = cfg->sector_size;
  788     }
  789 
  790   if (cfg_groups == 0 || cfg_sector_size == 0)
  791     {
  792       /* These values were found empirically using xdelta3-tune around version
  793        * xdfs-0.256. */
  794       switch (cfg->data_type)
  795     {
  796     case DATA_SECTION:
  797       if      (input_size < 1000)   { sugg_groups = 1; sugg_sector_size = 0; }
  798       else if (input_size < 4000)   { sugg_groups = 2; sugg_sector_size = 10; }
  799       else if (input_size < 7000)   { sugg_groups = 3; sugg_sector_size = 10; }
  800       else if (input_size < 10000)  { sugg_groups = 4; sugg_sector_size = 10; }
  801       else if (input_size < 25000)  { sugg_groups = 5; sugg_sector_size = 10; }
  802       else if (input_size < 50000)  { sugg_groups = 7; sugg_sector_size = 20; }
  803       else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 30; }
  804       else                          { sugg_groups = 8; sugg_sector_size = 70; }
  805       break;
  806     case INST_SECTION:
  807       if      (input_size < 7000)   { sugg_groups = 1; sugg_sector_size = 0; }
  808       else if (input_size < 10000)  { sugg_groups = 2; sugg_sector_size = 50; }
  809       else if (input_size < 25000)  { sugg_groups = 3; sugg_sector_size = 50; }
  810       else if (input_size < 50000)  { sugg_groups = 6; sugg_sector_size = 40; }
  811       else if (input_size < 100000) { sugg_groups = 8; sugg_sector_size = 40; }
  812       else                          { sugg_groups = 8; sugg_sector_size = 40; }
  813       break;
  814     case ADDR_SECTION:
  815       if      (input_size < 9000)   { sugg_groups = 1; sugg_sector_size = 0; }
  816       else if (input_size < 25000)  { sugg_groups = 2; sugg_sector_size = 130; }
  817       else if (input_size < 50000)  { sugg_groups = 3; sugg_sector_size = 130; }
  818       else if (input_size < 100000) { sugg_groups = 5; sugg_sector_size = 130; }
  819       else                          { sugg_groups = 7; sugg_sector_size = 130; }
  820       break;
  821     }
  822 
  823       if (cfg_groups == 0)
  824     {
  825       cfg_groups = sugg_groups;
  826     }
  827 
  828       if (cfg_sector_size == 0)
  829     {
  830       cfg_sector_size = sugg_sector_size;
  831     }
  832     }
  833 
  834   if (cfg_groups != 1 && cfg_sector_size == 0)
  835     {
  836       switch (cfg->data_type)
  837     {
  838     case DATA_SECTION:
  839       cfg_sector_size = 20;
  840       break;
  841     case INST_SECTION:
  842       cfg_sector_size = 50;
  843       break;
  844     case ADDR_SECTION:
  845       cfg_sector_size = 130;
  846       break;
  847     }
  848     }
  849 
  850   (*ret_groups)     = cfg_groups;
  851   (*ret_sector_size) = cfg_sector_size;
  852 
  853   XD3_ASSERT (cfg_groups > 0 && cfg_groups <= DJW_MAX_GROUPS);
  854   XD3_ASSERT (cfg_groups == 1 ||
  855           (cfg_sector_size >= DJW_SECTORSZ_MULT &&
  856            cfg_sector_size <= DJW_SECTORSZ_MAX));
  857 
  858   return 0;
  859 }
  860 
  861 static int
  862 xd3_encode_huff (xd3_stream   *stream,
  863          djw_stream   *h,
  864          xd3_output   *input,
  865          xd3_output   *output,
  866          xd3_sec_cfg  *cfg)
  867 {
  868   int         ret;
  869   usize_t     groups, sector_size;
  870   bit_state   bstate = BIT_STATE_ENCODE_INIT;
  871   xd3_output *in;
  872   usize_t     output_bits;
  873   usize_t     input_bits;
  874   usize_t     input_bytes;
  875   usize_t     initial_offset = output->next;
  876   djw_weight  real_freq[ALPHABET_SIZE];
  877   uint8_t    *gbest = NULL;
  878   uint8_t    *gbest_mtf = NULL;
  879 
  880   input_bytes = djw_count_freqs (real_freq, input);
  881   input_bits  = input_bytes * 8;
  882 
  883   XD3_ASSERT (input_bytes > 0);
  884 
  885   if ((ret = xd3_encode_howmany_groups (stream, cfg, input_bytes,
  886                     & groups, & sector_size)))
  887     {
  888       return ret;
  889     }
  890 
  891   if (0)
  892     {
  893     regroup:
  894       /* Sometimes we dynamically decide there are too many groups.  Arrive
  895        * here. */
  896       output->next = initial_offset;
  897       xd3_bit_state_encode_init (& bstate);
  898     }
  899 
  900   /* Encode: # of groups (3 bits) */
  901   if ((ret = xd3_encode_bits (stream, & output, & bstate,
  902                   DJW_GROUP_BITS, groups-1))) { goto failure; }
  903 
  904   if (groups == 1)
  905     {
  906       /* Single Huffman group. */
  907       usize_t    code[ALPHABET_SIZE]; /* Codes */
  908       uint8_t    clen[ALPHABET_SIZE];
  909       uint8_t    prefix_mtfsym[ALPHABET_SIZE];
  910       djw_prefix prefix;
  911 
  912       output_bits =
  913     djw_build_prefix (real_freq, clen, ALPHABET_SIZE, DJW_MAX_CODELEN);
  914       djw_build_codes (code, clen, ALPHABET_SIZE, DJW_MAX_CODELEN);
  915 
  916       if (output_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient)
  917     {
  918       goto nosecond;
  919     }
  920 
  921       /* Encode: prefix */
  922       prefix.mtfsym = prefix_mtfsym;
  923       prefix.symbol = clen;
  924       prefix.scount = ALPHABET_SIZE;
  925 
  926       if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix)))
  927     {
  928       goto failure;
  929     }
  930 
  931       if (output_bits + (8 * output->next) + EFFICIENCY_BITS >=
  932       input_bits && ! cfg->inefficient)
  933     {
  934       goto nosecond;
  935     }
  936 
  937       /* Encode: data */
  938       for (in = input; in; in = in->next_page)
  939     {
  940       const uint8_t *p     = in->base;
  941       const uint8_t *p_max = p + in->next;
  942 
  943       do
  944         {
  945           usize_t sym  = *p++;
  946           usize_t bits = clen[sym];
  947 
  948           IF_DEBUG (output_bits -= bits);
  949 
  950           if ((ret = xd3_encode_bits (stream, & output,
  951                       & bstate, bits, code[sym])))
  952         {
  953           goto failure;
  954         }
  955         }
  956       while (p < p_max);
  957     }
  958 
  959       XD3_ASSERT (output_bits == 0);
  960     }
  961   else
  962     {
  963       /* DJW Huffman */
  964       djw_weight evolve_freq[DJW_MAX_GROUPS][ALPHABET_SIZE];
  965       uint8_t evolve_clen[DJW_MAX_GROUPS][ALPHABET_SIZE];
  966       djw_weight left = input_bytes;
  967       usize_t gp;
  968       usize_t niter = 0;
  969       usize_t select_bits;
  970       usize_t sym1 = 0, sym2 = 0, s;
  971       usize_t gcost[DJW_MAX_GROUPS];
  972       usize_t gbest_code[DJW_MAX_GROUPS+2];
  973       uint8_t gbest_clen[DJW_MAX_GROUPS+2];
  974       usize_t  gbest_max = 1 + (input_bytes - 1) / sector_size;
  975       usize_t best_bits = 0;
  976       usize_t  gbest_no;
  977       usize_t  gpcnt;
  978       const uint8_t *p;
  979       IF_DEBUG2 (usize_t gcount[DJW_MAX_GROUPS]);
  980 
  981       /* Encode: sector size (5 bits) */
  982       if ((ret = xd3_encode_bits (stream, & output, & bstate,
  983                   DJW_SECTORSZ_BITS,
  984                   (sector_size/DJW_SECTORSZ_MULT)-1)))
  985     {
  986       goto failure;
  987     }
  988 
  989       /* Dynamic allocation. */
  990       if (gbest == NULL)
  991     {
  992       if ((gbest = (uint8_t*) xd3_alloc (stream, gbest_max, 1)) == NULL)
  993         {
  994           ret = ENOMEM;
  995           goto failure;
  996         }
  997     }
  998 
  999       if (gbest_mtf == NULL)
 1000     {
 1001       if ((gbest_mtf = (uint8_t*) xd3_alloc (stream, gbest_max, 1)) == NULL)
 1002         {
 1003           ret = ENOMEM;
 1004           goto failure;
 1005         }
 1006     }
 1007 
 1008       /* OPT: Some of the inner loops can be optimized, as shown in bzip2 */
 1009 
 1010       /* Generate initial code length tables. */
 1011       for (gp = 0; gp < groups; gp += 1)
 1012     {
 1013       djw_weight sum  = 0;
 1014       djw_weight goal = left / (groups - gp);
 1015 
 1016       IF_DEBUG2 (usize_t nz = 0);
 1017 
 1018       /* Due to the single-code granularity of this distribution, it may
 1019        * be that we can't generate a distribution for each group.  In that
 1020        * case subtract one group and try again.  If (inefficient), we're
 1021        * testing group behavior, so don't mess things up. */
 1022       if (goal == 0 && !cfg->inefficient)
 1023         {
 1024           IF_DEBUG2 (DP(RINT "too many groups (%u), dropping one\n",
 1025                 groups));
 1026           groups -= 1;
 1027           goto regroup;
 1028         }
 1029 
 1030       /* Sum == goal is possible when (cfg->inefficient)... */
 1031       while (sum < goal)
 1032         {
 1033           XD3_ASSERT (sym2 < ALPHABET_SIZE);
 1034           IF_DEBUG2 (nz += real_freq[sym2] != 0);
 1035           sum += real_freq[sym2++];
 1036         }
 1037 
 1038       IF_DEBUG2(DP(RINT "group %u has symbols %u..%u (%u non-zero) "
 1039                "(%u/%u = %.3f)\n",
 1040                gp, sym1, sym2, nz, sum,
 1041                input_bytes, sum / (double)input_bytes););
 1042 
 1043       for (s = 0; s < ALPHABET_SIZE; s += 1)
 1044         {
 1045           evolve_clen[gp][s] = (s >= sym1 && s <= sym2) ? 1 : 16;
 1046         }
 1047 
 1048       left -= sum;
 1049       sym1  = sym2+1;
 1050     }
 1051 
 1052     repeat:
 1053 
 1054       niter += 1;
 1055       gbest_no = 0;
 1056       memset (evolve_freq, 0, sizeof (evolve_freq[0]) * groups);
 1057       IF_DEBUG2 (memset (gcount, 0, sizeof (gcount[0]) * groups));
 1058 
 1059       /* For each input page (loop is irregular to allow non-pow2-size group
 1060        * size. */
 1061       in = input;
 1062       p  = in->base;
 1063 
 1064       /* For each group-size sector. */
 1065       do
 1066     {
 1067       const uint8_t *p0  = p;
 1068       xd3_output    *in0 = in;
 1069       usize_t best   = 0;
 1070       usize_t winner = 0;
 1071 
 1072       /* Select best group for each sector, update evolve_freq. */
 1073       memset (gcost, 0, sizeof (gcost[0]) * groups);
 1074 
 1075       /* For each byte in sector. */
 1076       for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1)
 1077         {
 1078           /* For each group. */
 1079           for (gp = 0; gp < groups; gp += 1)
 1080         {
 1081           gcost[gp] += evolve_clen[gp][*p];
 1082         }
 1083 
 1084           /* Check end-of-input-page. */
 1085 #             define GP_PAGE()                \
 1086           if ((usize_t)(++p - in->base) == in->next) \
 1087         {                             \
 1088           in = in->next_page;         \
 1089           if (in == NULL) { break; }  \
 1090           p  = in->base;              \
 1091         }
 1092 
 1093           GP_PAGE ();
 1094         }
 1095 
 1096       /* Find min cost group for this sector */
 1097       best = USIZE_T_MAX;
 1098       for (gp = 0; gp < groups; gp += 1)
 1099         {
 1100           if (gcost[gp] < best) 
 1101         { 
 1102           best = gcost[gp]; 
 1103           winner = gp; 
 1104         }
 1105         }
 1106 
 1107       XD3_ASSERT(gbest_no < gbest_max);
 1108       gbest[gbest_no++] = winner;
 1109       IF_DEBUG2 (gcount[winner] += 1);
 1110 
 1111       p  = p0;
 1112       in = in0;
 1113 
 1114       /* Update group frequencies. */
 1115       for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1)
 1116         {
 1117           evolve_freq[winner][*p] += 1;
 1118 
 1119           GP_PAGE ();
 1120         }
 1121     }
 1122       while (in != NULL);
 1123 
 1124       XD3_ASSERT (gbest_no == gbest_max);
 1125 
 1126       /* Recompute code lengths. */
 1127       output_bits = 0;
 1128       for (gp = 0; gp < groups; gp += 1)
 1129     {
 1130       int i;
 1131       uint8_t evolve_zero[ALPHABET_SIZE];
 1132       int any_zeros = 0;
 1133 
 1134       memset (evolve_zero, 0, sizeof (evolve_zero));
 1135 
 1136       /* Cannot allow a zero clen when the real frequency is non-zero.
 1137        * Note: this means we are going to encode a fairly long code for
 1138        * these unused entries.  An improvement would be to implement a
 1139        * NOTUSED code for when these are actually zero, but this requires
 1140        * another data structure (evolve_zero) since we don't know when
 1141        * evolve_freq[i] == 0...  Briefly tested, looked worse. */
 1142       for (i = 0; i < ALPHABET_SIZE; i += 1)
 1143         {
 1144           if (evolve_freq[gp][i] == 0 && real_freq[i] != 0)
 1145         {
 1146           evolve_freq[gp][i] = 1;
 1147           evolve_zero[i] = 1;
 1148           any_zeros = 1;
 1149         }
 1150         }
 1151 
 1152       output_bits += djw_build_prefix (evolve_freq[gp], evolve_clen[gp],
 1153                        ALPHABET_SIZE, DJW_MAX_CODELEN);
 1154 
 1155       /* The above faking of frequencies does not matter for the last
 1156        * iteration, but we don't know when that is yet.  However, it also
 1157        * breaks the output_bits computation.  Necessary for accuracy, and
 1158        * for the (output_bits==0) assert after all bits are output. */
 1159       if (any_zeros)
 1160         {
 1161           IF_DEBUG2 (usize_t save_total = output_bits);
 1162 
 1163           for (i = 0; i < ALPHABET_SIZE; i += 1)
 1164         {
 1165           if (evolve_zero[i]) { output_bits -= evolve_clen[gp][i]; }
 1166         }
 1167 
 1168           IF_DEBUG2 (DP(RINT "evolve_zero reduced %u bits in group %u\n",
 1169                 save_total - output_bits, gp));
 1170         }
 1171     }
 1172 
 1173       IF_DEBUG2(
 1174     DP(RINT "pass %u total bits: %u group uses: ", niter, output_bits);
 1175     for (gp = 0; gp < groups; gp += 1) { DP(RINT "%u ", gcount[gp]); }
 1176     DP(RINT "\n");
 1177     );
 1178 
 1179       /* End iteration. */
 1180 
 1181       IF_DEBUG2 (if (niter > 1 && best_bits < output_bits) {
 1182     DP(RINT "iteration lost %u bits\n", output_bits - best_bits); });
 1183 
 1184       if (niter == 1 || (niter < DJW_MAX_ITER &&
 1185              (best_bits - output_bits) >= DJW_MIN_IMPROVEMENT))
 1186     {
 1187       best_bits = output_bits;
 1188       goto repeat;
 1189     }
 1190 
 1191       /* Efficiency check. */
 1192       if (output_bits + EFFICIENCY_BITS >= input_bits && ! cfg->inefficient)
 1193     {
 1194       goto nosecond;
 1195     }
 1196 
 1197       IF_DEBUG2 (DP(RINT "djw compression: %u -> %0.3f\n",
 1198             input_bytes, output_bits / 8.0));
 1199 
 1200       /* Encode: prefix */
 1201       {
 1202     uint8_t     prefix_symbol[DJW_MAX_GROUPS * ALPHABET_SIZE];
 1203     uint8_t     prefix_mtfsym[DJW_MAX_GROUPS * ALPHABET_SIZE];
 1204     uint8_t     prefix_repcnt[DJW_MAX_GROUPS * ALPHABET_SIZE];
 1205     djw_prefix prefix;
 1206 
 1207     prefix.symbol = prefix_symbol;
 1208     prefix.mtfsym = prefix_mtfsym;
 1209     prefix.repcnt = prefix_repcnt;
 1210 
 1211     djw_compute_multi_prefix (groups, evolve_clen, & prefix);
 1212     if ((ret = djw_encode_prefix (stream, & output, & bstate, & prefix)))
 1213       {
 1214         goto failure;
 1215       }
 1216       }
 1217 
 1218       /* Encode: selector frequencies */
 1219       {
 1220     /* DJW_MAX_GROUPS +2 is for RUN_0, RUN_1 symbols. */
 1221     djw_weight gbest_freq[DJW_MAX_GROUPS+2];
 1222     djw_prefix gbest_prefix;
 1223     usize_t i;
 1224 
 1225     gbest_prefix.scount = gbest_no;
 1226     gbest_prefix.symbol = gbest;
 1227     gbest_prefix.mtfsym = gbest_mtf;
 1228 
 1229     djw_compute_selector_1_2 (& gbest_prefix, groups, gbest_freq);
 1230 
 1231     select_bits =
 1232       djw_build_prefix (gbest_freq, gbest_clen, groups+1, DJW_MAX_GBCLEN);
 1233     djw_build_codes  (gbest_code, gbest_clen, groups+1, DJW_MAX_GBCLEN);
 1234 
 1235     for (i = 0; i < groups+1; i += 1)
 1236       {
 1237         if ((ret = xd3_encode_bits (stream, & output, & bstate,
 1238                     DJW_GBCLEN_BITS, gbest_clen[i])))
 1239           {
 1240         goto failure;
 1241           }
 1242       }
 1243 
 1244     for (i = 0; i < gbest_prefix.mcount; i += 1)
 1245       {
 1246         usize_t gp_mtf      = gbest_mtf[i];
 1247         usize_t gp_sel_bits = gbest_clen[gp_mtf];
 1248         usize_t gp_sel_code = gbest_code[gp_mtf];
 1249 
 1250         XD3_ASSERT (gp_mtf < groups+1);
 1251 
 1252         if ((ret = xd3_encode_bits (stream, & output, & bstate,
 1253                     gp_sel_bits, gp_sel_code)))
 1254           {
 1255         goto failure;
 1256           }
 1257 
 1258         IF_DEBUG (select_bits -= gp_sel_bits);
 1259       }
 1260 
 1261     XD3_ASSERT (select_bits == 0);
 1262       }
 1263 
 1264       /* Efficiency check. */
 1265       if (output_bits + select_bits + (8 * output->next) +
 1266       EFFICIENCY_BITS >= input_bits && ! cfg->inefficient)
 1267     {
 1268       goto nosecond;
 1269     }
 1270 
 1271       /* Encode: data */
 1272       {
 1273     usize_t evolve_code[DJW_MAX_GROUPS][ALPHABET_SIZE];
 1274     usize_t sector = 0;
 1275 
 1276     /* Build code tables for each group. */
 1277     for (gp = 0; gp < groups; gp += 1)
 1278       {
 1279         djw_build_codes (evolve_code[gp], evolve_clen[gp],
 1280                  ALPHABET_SIZE, DJW_MAX_CODELEN);
 1281       }
 1282 
 1283     /* Now loop over the input. */
 1284     in = input;
 1285     p  = in->base;
 1286 
 1287     do
 1288       {
 1289         /* For each sector. */
 1290         usize_t   gp_best  = gbest[sector];
 1291         usize_t *gp_codes = evolve_code[gp_best];
 1292         uint8_t *gp_clens = evolve_clen[gp_best];
 1293 
 1294         XD3_ASSERT (sector < gbest_no);
 1295 
 1296         sector += 1;
 1297 
 1298         /* Encode the sector data. */
 1299         for (gpcnt = 0; gpcnt < sector_size; gpcnt += 1)
 1300           {
 1301         usize_t sym  = *p;
 1302         usize_t bits = gp_clens[sym];
 1303         usize_t code = gp_codes[sym];
 1304 
 1305         IF_DEBUG (output_bits -= bits);
 1306 
 1307         if ((ret = xd3_encode_bits (stream, & output, & bstate,
 1308                         bits, code)))
 1309           {
 1310             goto failure;
 1311           }
 1312 
 1313         GP_PAGE ();
 1314           }
 1315       }
 1316     while (in != NULL);
 1317 
 1318     XD3_ASSERT (select_bits == 0);
 1319     XD3_ASSERT (output_bits == 0);
 1320       }
 1321     }
 1322 
 1323   ret = xd3_flush_bits (stream, & output, & bstate);
 1324 
 1325   if (0)
 1326     {
 1327     nosecond:
 1328       stream->msg = "secondary compression was inefficient";
 1329       ret = XD3_NOSECOND;
 1330     }
 1331 
 1332  failure:
 1333 
 1334   xd3_free (stream, gbest);
 1335   xd3_free (stream, gbest_mtf);
 1336   return ret;
 1337 }
 1338 #endif /* XD3_ENCODER */
 1339 
 1340 /*********************************************************************/
 1341 /*                              DECODE                               */
 1342 /*********************************************************************/
 1343 
 1344 static void
 1345 djw_build_decoder (xd3_stream    *stream,
 1346            usize_t        asize,
 1347            usize_t        abs_max,
 1348            const uint8_t *clen,
 1349            uint8_t       *inorder,
 1350            usize_t       *base,
 1351            usize_t       *limit,
 1352            usize_t       *min_clenp,
 1353            usize_t       *max_clenp)
 1354 {
 1355   usize_t i, l;
 1356   const uint8_t *ci;
 1357   usize_t nr_clen [DJW_TOTAL_CODES];
 1358   usize_t tmp_base[DJW_TOTAL_CODES];
 1359   usize_t min_clen;
 1360   usize_t max_clen;
 1361 
 1362   /* Assumption: the two temporary arrays are large enough to hold abs_max. */
 1363   XD3_ASSERT (abs_max <= DJW_MAX_CODELEN);
 1364 
 1365   /* This looks something like the start of zlib's inftrees.c */
 1366   memset (nr_clen, 0, sizeof (nr_clen[0]) * (abs_max+1));
 1367 
 1368   /* Count number of each code length */
 1369   i  = asize;
 1370   ci = clen;
 1371   do
 1372     {
 1373       /* Caller _must_ check that values are in-range.  Most of the time the
 1374        * caller decodes a specific number of bits, which imply the max value,
 1375        * and the other time the caller decodes a huffman value, which must be
 1376        * in-range.  Therefore, its an assertion and this function cannot
 1377        * otherwise fail. */
 1378       XD3_ASSERT (*ci <= abs_max);
 1379 
 1380       nr_clen[*ci++]++;
 1381     }
 1382   while (--i != 0);
 1383 
 1384   /* Compute min, max. */
 1385   for (i = 1; i <= abs_max; i += 1) { if (nr_clen[i]) { break; } }
 1386   min_clen = i;
 1387   for (i = abs_max; i != 0; i -= 1) { if (nr_clen[i]) { break; } }
 1388   max_clen = i;
 1389 
 1390   /* Fill the BASE, LIMIT table. */
 1391   tmp_base[min_clen] = 0;
 1392   base[min_clen]     = 0;
 1393   limit[min_clen]    = nr_clen[min_clen] - 1;
 1394   for (i = min_clen + 1; i <= max_clen; i += 1)
 1395     {
 1396       usize_t last_limit = ((limit[i-1] + 1) << 1);
 1397       tmp_base[i] = tmp_base[i-1] + nr_clen[i-1];
 1398       limit[i]    = last_limit + nr_clen[i] - 1;
 1399       base[i]     = last_limit - tmp_base[i];
 1400     }
 1401 
 1402   /* Fill the inorder array, canonically ordered codes. */
 1403   ci = clen;
 1404   for (i = 0; i < asize; i += 1)
 1405     {
 1406       if ((l = *ci++) != 0)
 1407     {
 1408       inorder[tmp_base[l]++] = i;
 1409     }
 1410     }
 1411 
 1412   *min_clenp = min_clen;
 1413   *max_clenp = max_clen;
 1414 }
 1415 
 1416 static inline int
 1417 djw_decode_symbol (xd3_stream     *stream,
 1418            bit_state      *bstate,
 1419            const uint8_t **input,
 1420            const uint8_t  *input_end,
 1421            const uint8_t  *inorder,
 1422            const usize_t  *base,
 1423            const usize_t  *limit,
 1424            usize_t         min_clen,
 1425            usize_t         max_clen,
 1426            usize_t         *sym,
 1427            usize_t          max_sym)
 1428 {
 1429   usize_t code = 0;
 1430   usize_t bits = 0;
 1431 
 1432   /* OPT: Supposedly a small lookup table improves speed here... */
 1433 
 1434   /* Code outline is similar to xd3_decode_bits... */
 1435   if (bstate->cur_mask == 0x100) { goto next_byte; }
 1436 
 1437   for (;;)
 1438     {
 1439       do
 1440     {
 1441       if (bits == max_clen) { goto corrupt; }
 1442 
 1443       bits += 1;
 1444       code  = (code << 1);
 1445 
 1446       if (bstate->cur_byte & bstate->cur_mask) { code |= 1; }
 1447 
 1448       bstate->cur_mask <<= 1;
 1449 
 1450       if (bits >= min_clen && code <= limit[bits]) { goto done; }
 1451     }
 1452       while (bstate->cur_mask != 0x100);
 1453 
 1454     next_byte:
 1455 
 1456       if (*input == input_end)
 1457     {
 1458       stream->msg = "secondary decoder end of input";
 1459       return XD3_INVALID_INPUT;
 1460     }
 1461 
 1462       bstate->cur_byte = *(*input)++;
 1463       bstate->cur_mask = 1;
 1464     }
 1465 
 1466  done:
 1467 
 1468   if (base[bits] <= code)
 1469     {
 1470       usize_t offset = code - base[bits];
 1471 
 1472       if (offset <= max_sym)
 1473     {
 1474       IF_DEBUG2 (DP(RINT "(j) %u ", code));
 1475       *sym = inorder[offset];
 1476       return 0;
 1477     }
 1478     }
 1479 
 1480  corrupt:
 1481   stream->msg = "secondary decoder invalid code";
 1482   return XD3_INVALID_INPUT;
 1483 }
 1484 
 1485 static int
 1486 djw_decode_clclen (xd3_stream     *stream,
 1487            bit_state      *bstate,
 1488            const uint8_t **input,
 1489            const uint8_t  *input_end,
 1490            uint8_t        *cl_inorder,
 1491            usize_t        *cl_base,
 1492            usize_t        *cl_limit,
 1493            usize_t        *cl_minlen,
 1494            usize_t        *cl_maxlen,
 1495            uint8_t        *cl_mtf)
 1496 {
 1497   int ret;
 1498   uint8_t cl_clen[DJW_TOTAL_CODES];
 1499   usize_t num_codes, value;
 1500   usize_t i;
 1501 
 1502   /* How many extra code lengths to encode. */
 1503   if ((ret = xd3_decode_bits (stream, bstate, input,
 1504                   input_end, DJW_EXTRA_CODE_BITS, & num_codes)))
 1505     {
 1506       return ret;
 1507     }
 1508 
 1509   num_codes += DJW_EXTRA_12OFFSET;
 1510 
 1511   /* Read num_codes. */
 1512   for (i = 0; i < num_codes; i += 1)
 1513     {
 1514       if ((ret = xd3_decode_bits (stream, bstate, input,
 1515                   input_end, DJW_CLCLEN_BITS, & value)))
 1516     {
 1517       return ret;
 1518     }
 1519 
 1520       cl_clen[i] = value;
 1521     }
 1522 
 1523   /* Set the rest to zero. */
 1524   for (; i < DJW_TOTAL_CODES; i += 1) { cl_clen[i] = 0; }
 1525 
 1526   /* No need to check for in-range clen values, because: */
 1527   XD3_ASSERT (1 << DJW_CLCLEN_BITS == DJW_MAX_CLCLEN + 1);
 1528 
 1529   /* Build the code-length decoder. */
 1530   djw_build_decoder (stream, DJW_TOTAL_CODES, DJW_MAX_CLCLEN,
 1531              cl_clen, cl_inorder, cl_base,
 1532              cl_limit, cl_minlen, cl_maxlen);
 1533 
 1534   /* Initialize the MTF state. */
 1535   djw_init_clen_mtf_1_2 (cl_mtf);
 1536 
 1537   return 0;
 1538 }
 1539 
 1540 static inline int
 1541 djw_decode_1_2 (xd3_stream     *stream,
 1542         bit_state      *bstate,
 1543         const uint8_t **input,
 1544         const uint8_t  *input_end,
 1545         const uint8_t  *inorder,
 1546         const usize_t  *base,
 1547         const usize_t  *limit,
 1548         const usize_t  *minlen,
 1549         const usize_t  *maxlen,
 1550         uint8_t        *mtfvals,
 1551         usize_t         elts,
 1552         usize_t         skip_offset,
 1553         uint8_t        *values)
 1554 {
 1555   usize_t n = 0, rep = 0, mtf = 0, s = 0;
 1556   int ret;
 1557   
 1558   while (n < elts)
 1559     {
 1560       /* Special case inside generic code: CLEN only: If not the first group,
 1561        * we already know the zero frequencies. */
 1562       if (skip_offset != 0 && n >= skip_offset && values[n-skip_offset] == 0)
 1563     {
 1564       values[n++] = 0;
 1565       continue;
 1566     }
 1567 
 1568       /* Repeat last symbol. */
 1569       if (rep != 0)
 1570     {
 1571       values[n++] = mtfvals[0];
 1572       rep -= 1;
 1573       continue;
 1574     }
 1575 
 1576       /* Symbol following last repeat code. */
 1577       if (mtf != 0)
 1578     {
 1579       usize_t sym = djw_update_mtf (mtfvals, mtf);
 1580       values[n++] = sym;
 1581       mtf = 0;
 1582       continue;
 1583     }
 1584 
 1585       /* Decode next symbol/repeat code. */
 1586       if ((ret = djw_decode_symbol (stream, bstate, input, input_end,
 1587                     inorder, base, limit, *minlen, *maxlen,
 1588                     & mtf, DJW_TOTAL_CODES))) { return ret; }
 1589 
 1590       if (mtf <= RUN_1)
 1591     {
 1592       /* Repetition. */
 1593       rep = ((mtf + 1) << s);
 1594       mtf = 0;
 1595       s += 1;
 1596     }
 1597       else
 1598     {
 1599       /* Remove the RUN_1 MTF offset. */
 1600       mtf -= 1;
 1601       s = 0;
 1602     }
 1603     }
 1604 
 1605   /* If (rep != 0) there were too many codes received. */
 1606   if (rep != 0)
 1607     {
 1608       stream->msg = "secondary decoder invalid repeat code";
 1609       return XD3_INVALID_INPUT;
 1610     }
 1611   
 1612   return 0;
 1613 }
 1614 
 1615 static inline int
 1616 djw_decode_prefix (xd3_stream     *stream,
 1617            bit_state      *bstate,
 1618            const uint8_t **input,
 1619            const uint8_t  *input_end,
 1620            const uint8_t  *cl_inorder,
 1621            const usize_t  *cl_base,
 1622            const usize_t  *cl_limit,
 1623            const usize_t  *cl_minlen,
 1624            const usize_t  *cl_maxlen,
 1625            uint8_t        *cl_mtf,
 1626            usize_t         groups,
 1627            uint8_t        *clen)
 1628 {
 1629   return djw_decode_1_2 (stream, bstate, input, input_end,
 1630              cl_inorder, cl_base, cl_limit,
 1631              cl_minlen, cl_maxlen, cl_mtf,
 1632              ALPHABET_SIZE * groups, ALPHABET_SIZE, clen);
 1633 }
 1634 
 1635 static int
 1636 xd3_decode_huff (xd3_stream     *stream,
 1637          djw_stream    *h,
 1638          const uint8_t **input_pos,
 1639          const uint8_t  *const input_end,
 1640          uint8_t       **output_pos,
 1641          const uint8_t  *const output_end)
 1642 {
 1643   const uint8_t *input = *input_pos;
 1644   uint8_t  *output = *output_pos;
 1645   bit_state bstate = BIT_STATE_DECODE_INIT;
 1646   uint8_t  *sel_group = NULL;
 1647   usize_t    groups, gp;
 1648   usize_t    output_bytes = (usize_t)(output_end - output);
 1649   usize_t    sector_size;
 1650   usize_t    sectors;
 1651   int ret;
 1652 
 1653   /* Invalid input. */
 1654   if (output_bytes == 0)
 1655     {
 1656       stream->msg = "secondary decoder invalid input";
 1657       return XD3_INVALID_INPUT;
 1658     }
 1659 
 1660   /* Decode: number of groups */
 1661   if ((ret = xd3_decode_bits (stream, & bstate, & input,
 1662                   input_end, DJW_GROUP_BITS, & groups)))
 1663     {
 1664       goto fail;
 1665     }
 1666 
 1667   groups += 1;
 1668 
 1669   if (groups > 1)
 1670     {
 1671       /* Decode: group size */
 1672       if ((ret = xd3_decode_bits (stream, & bstate, & input,
 1673                   input_end, DJW_SECTORSZ_BITS,
 1674                   & sector_size))) { goto fail; }
 1675       
 1676       sector_size = (sector_size + 1) * DJW_SECTORSZ_MULT;
 1677     }
 1678   else
 1679     {
 1680       /* Default for groups == 1 */
 1681       sector_size = output_bytes;
 1682     }
 1683 
 1684   sectors = 1 + (output_bytes - 1) / sector_size;
 1685 
 1686   /* TODO: In the case of groups==1, lots of extra stack space gets used here.
 1687    * Could dynamically allocate this memory, which would help with excess
 1688    * parameter passing, too.  Passing too many parameters in this file,
 1689    * simplify it! */
 1690 
 1691   /* Outer scope: per-group symbol decoder tables. */
 1692   {
 1693     uint8_t inorder[DJW_MAX_GROUPS][ALPHABET_SIZE];
 1694     usize_t base   [DJW_MAX_GROUPS][DJW_TOTAL_CODES];
 1695     usize_t limit  [DJW_MAX_GROUPS][DJW_TOTAL_CODES];
 1696     usize_t minlen [DJW_MAX_GROUPS];
 1697     usize_t maxlen [DJW_MAX_GROUPS];
 1698 
 1699     /* Nested scope: code length decoder tables. */
 1700     {
 1701       uint8_t clen      [DJW_MAX_GROUPS][ALPHABET_SIZE];
 1702       uint8_t cl_inorder[DJW_TOTAL_CODES];
 1703       usize_t cl_base   [DJW_MAX_CLCLEN+2];
 1704       usize_t cl_limit  [DJW_MAX_CLCLEN+2];
 1705       uint8_t cl_mtf    [DJW_TOTAL_CODES];
 1706       usize_t cl_minlen;
 1707       usize_t cl_maxlen;
 1708 
 1709       /* Compute the code length decoder. */
 1710       if ((ret = djw_decode_clclen (stream, & bstate, & input, input_end,
 1711                     cl_inorder, cl_base, cl_limit, & cl_minlen,
 1712                     & cl_maxlen, cl_mtf))) { goto fail; }
 1713 
 1714       /* Now decode each group decoder. */
 1715       if ((ret = djw_decode_prefix (stream, & bstate, & input, input_end,
 1716                     cl_inorder, cl_base, cl_limit,
 1717                     & cl_minlen, & cl_maxlen, cl_mtf,
 1718                     groups, clen[0]))) { goto fail; }
 1719 
 1720       /* Prepare the actual decoding tables. */
 1721       for (gp = 0; gp < groups; gp += 1)
 1722     {
 1723       djw_build_decoder (stream, ALPHABET_SIZE, DJW_MAX_CODELEN,
 1724                  clen[gp], inorder[gp], base[gp], limit[gp],
 1725                  & minlen[gp], & maxlen[gp]);
 1726     }
 1727     }
 1728 
 1729     /* Decode: selector clens. */
 1730     {
 1731       uint8_t sel_inorder[DJW_MAX_GROUPS+2];
 1732       usize_t sel_base   [DJW_MAX_GBCLEN+2];
 1733       usize_t sel_limit  [DJW_MAX_GBCLEN+2];
 1734       uint8_t sel_mtf    [DJW_MAX_GROUPS+2];
 1735       usize_t sel_minlen;
 1736       usize_t sel_maxlen;
 1737 
 1738       /* Setup group selection. */
 1739       if (groups > 1)
 1740     {
 1741       uint8_t sel_clen[DJW_MAX_GROUPS+1];
 1742 
 1743       for (gp = 0; gp < groups+1; gp += 1)
 1744         {
 1745           usize_t value;
 1746 
 1747           if ((ret = xd3_decode_bits (stream, & bstate, & input,
 1748                       input_end, DJW_GBCLEN_BITS,
 1749                       & value))) { goto fail; }
 1750 
 1751           sel_clen[gp] = value;
 1752           sel_mtf[gp]  = gp;
 1753         }
 1754 
 1755       if ((sel_group = (uint8_t*) xd3_alloc (stream, sectors, 1)) == NULL)
 1756         {
 1757           ret = ENOMEM;
 1758           goto fail;
 1759         }
 1760 
 1761       djw_build_decoder (stream, groups+1, DJW_MAX_GBCLEN, sel_clen,
 1762                  sel_inorder, sel_base, sel_limit,
 1763                  & sel_minlen, & sel_maxlen);
 1764 
 1765       if ((ret = djw_decode_1_2 (stream, & bstate, & input, input_end,
 1766                      sel_inorder, sel_base,
 1767                      sel_limit, & sel_minlen,
 1768                      & sel_maxlen, sel_mtf,
 1769                      sectors, 0, sel_group))) { goto fail; }
 1770     }
 1771 
 1772       /* Now decode each sector. */
 1773       {
 1774     /* Initialize for (groups==1) case. */
 1775     uint8_t *gp_inorder = inorder[0]; 
 1776     usize_t *gp_base    = base[0];
 1777     usize_t *gp_limit   = limit[0];
 1778     usize_t  gp_minlen  = minlen[0];
 1779     usize_t  gp_maxlen  = maxlen[0];
 1780     usize_t c;
 1781 
 1782     for (c = 0; c < sectors; c += 1)
 1783       {
 1784         usize_t n;
 1785 
 1786         if (groups >= 2)
 1787           {
 1788         gp = sel_group[c];
 1789 
 1790         XD3_ASSERT (gp < groups);
 1791 
 1792         gp_inorder = inorder[gp];
 1793         gp_base    = base[gp];
 1794         gp_limit   = limit[gp];
 1795         gp_minlen  = minlen[gp];
 1796         gp_maxlen  = maxlen[gp];
 1797           }
 1798 
 1799         if (output_end < output)
 1800           {
 1801         stream->msg = "secondary decoder invalid input";
 1802         return XD3_INVALID_INPUT;
 1803           }
 1804         
 1805         /* Decode next sector. */
 1806         n = xd3_min (sector_size, (usize_t) (output_end - output));
 1807 
 1808         do
 1809           {
 1810         usize_t sym;
 1811 
 1812         if ((ret = djw_decode_symbol (stream, & bstate,
 1813                           & input, input_end,
 1814                           gp_inorder, gp_base,
 1815                           gp_limit, gp_minlen, gp_maxlen,
 1816                           & sym, ALPHABET_SIZE)))
 1817           {
 1818             goto fail;
 1819           }
 1820 
 1821         *output++ = sym;
 1822           }
 1823         while (--n);
 1824       }
 1825       }
 1826     }
 1827   }
 1828 
 1829   IF_REGRESSION (if ((ret = xd3_test_clean_bits (stream, & bstate)))
 1830            { goto fail; });
 1831   XD3_ASSERT (ret == 0);
 1832 
 1833  fail:
 1834   xd3_free (stream, sel_group);
 1835 
 1836   (*input_pos) = input;
 1837   (*output_pos) = output;
 1838   return ret;
 1839 }
 1840 
 1841 #endif