"Fossies" - the Fresh Open Source Software Archive

Member "lzip-1.21/encoder.cc" (31 Dec 2018, 19703 Bytes) of package /linux/misc/lzip-1.21.tar.lz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "encoder.cc" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 1.20_vs_1.21.

    1 /*  Lzip - LZMA lossless data compressor
    2     Copyright (C) 2008-2019 Antonio Diaz Diaz.
    3 
    4     This program is free software: you can redistribute it and/or modify
    5     it under the terms of the GNU General Public License as published by
    6     the Free Software Foundation, either version 2 of the License, or
    7     (at your option) any later version.
    8 
    9     This program is distributed in the hope that it will be useful,
   10     but WITHOUT ANY WARRANTY; without even the implied warranty of
   11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   12     GNU General Public License for more details.
   13 
   14     You should have received a copy of the GNU General Public License
   15     along with this program.  If not, see <http://www.gnu.org/licenses/>.
   16 */
   17 
   18 #define _FILE_OFFSET_BITS 64
   19 
   20 #include <algorithm>
   21 #include <cerrno>
   22 #include <cstdlib>
   23 #include <cstring>
   24 #include <string>
   25 #include <vector>
   26 #include <stdint.h>
   27 
   28 #include "lzip.h"
   29 #include "encoder_base.h"
   30 #include "encoder.h"
   31 
   32 
   33 const CRC32 crc32;
   34 
   35 
   36 int LZ_encoder::get_match_pairs( Pair * pairs )
   37   {
   38   int len_limit = match_len_limit;
   39   if( len_limit > available_bytes() )
   40     {
   41     len_limit = available_bytes();
   42     if( len_limit < 4 ) return 0;
   43     }
   44 
   45   int maxlen = 0;
   46   int num_pairs = 0;
   47   const int pos1 = pos + 1;
   48   const int min_pos = ( pos > dictionary_size ) ? pos - dictionary_size : 0;
   49   const uint8_t * const data = ptr_to_current_pos();
   50 
   51   unsigned tmp = crc32[data[0]] ^ data[1];
   52   const int key2 = tmp & ( num_prev_positions2 - 1 );
   53   tmp ^= (unsigned)data[2] << 8;
   54   const int key3 = num_prev_positions2 + ( tmp & ( num_prev_positions3 - 1 ) );
   55   const int key4 = num_prev_positions23 +
   56                    ( ( tmp ^ ( crc32[data[3]] << 5 ) ) & key4_mask );
   57 
   58   if( pairs )
   59     {
   60     int np2 = prev_positions[key2];
   61     int np3 = prev_positions[key3];
   62     if( np2 > min_pos && buffer[np2-1] == data[0] )
   63       {
   64       pairs[0].dis = pos - np2;
   65       pairs[0].len = maxlen = 2;
   66       num_pairs = 1;
   67       }
   68     if( np2 != np3 && np3 > min_pos && buffer[np3-1] == data[0] )
   69       {
   70       maxlen = 3;
   71       np2 = np3;
   72       pairs[num_pairs].dis = pos - np2;
   73       ++num_pairs;
   74       }
   75     if( num_pairs > 0 )
   76       {
   77       const int delta = pos1 - np2;
   78       while( maxlen < len_limit && data[maxlen-delta] == data[maxlen] )
   79         ++maxlen;
   80       pairs[num_pairs-1].len = maxlen;
   81       if( maxlen >= len_limit ) pairs = 0;  // done. now just skip
   82       }
   83     if( maxlen < 3 ) maxlen = 3;
   84     }
   85 
   86   prev_positions[key2] = pos1;
   87   prev_positions[key3] = pos1;
   88   int newpos1 = prev_positions[key4];
   89   prev_positions[key4] = pos1;
   90 
   91   int32_t * ptr0 = pos_array + ( cyclic_pos << 1 );
   92   int32_t * ptr1 = ptr0 + 1;
   93   int len = 0, len0 = 0, len1 = 0;
   94 
   95   for( int count = cycles; ; )
   96     {
   97     if( newpos1 <= min_pos || --count < 0 ) { *ptr0 = *ptr1 = 0; break; }
   98 
   99     const int delta = pos1 - newpos1;
  100     int32_t * const newptr = pos_array +
  101       ( ( cyclic_pos - delta +
  102           ( ( cyclic_pos >= delta ) ? 0 : dictionary_size + 1 ) ) << 1 );
  103     if( data[len-delta] == data[len] )
  104       {
  105       while( ++len < len_limit && data[len-delta] == data[len] ) {}
  106       if( pairs && maxlen < len )
  107         {
  108         pairs[num_pairs].dis = delta - 1;
  109         pairs[num_pairs].len = maxlen = len;
  110         ++num_pairs;
  111         }
  112       if( len >= len_limit )
  113         {
  114         *ptr0 = newptr[0];
  115         *ptr1 = newptr[1];
  116         break;
  117         }
  118       }
  119     if( data[len-delta] < data[len] )
  120       {
  121       *ptr0 = newpos1;
  122       ptr0 = newptr + 1;
  123       newpos1 = *ptr0;
  124       len0 = len; if( len1 < len ) len = len1;
  125       }
  126     else
  127       {
  128       *ptr1 = newpos1;
  129       ptr1 = newptr;
  130       newpos1 = *ptr1;
  131       len1 = len; if( len0 < len ) len = len0;
  132       }
  133     }
  134   return num_pairs;
  135   }
  136 
  137 
  138 void LZ_encoder::update_distance_prices()
  139   {
  140   for( int dis = start_dis_model; dis < modeled_distances; ++dis )
  141     {
  142     const int dis_slot = dis_slots[dis];
  143     const int direct_bits = ( dis_slot >> 1 ) - 1;
  144     const int base = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
  145     const int price = price_symbol_reversed( bm_dis + ( base - dis_slot ),
  146                                              dis - base, direct_bits );
  147     for( int len_state = 0; len_state < len_states; ++len_state )
  148       dis_prices[len_state][dis] = price;
  149     }
  150 
  151   for( int len_state = 0; len_state < len_states; ++len_state )
  152     {
  153     int * const dsp = dis_slot_prices[len_state];
  154     const Bit_model * const bmds = bm_dis_slot[len_state];
  155     int slot = 0;
  156     for( ; slot < end_dis_model; ++slot )
  157       dsp[slot] = price_symbol6( bmds, slot );
  158     for( ; slot < num_dis_slots; ++slot )
  159       dsp[slot] = price_symbol6( bmds, slot ) +
  160                   (((( slot >> 1 ) - 1 ) - dis_align_bits ) << price_shift_bits );
  161 
  162     int * const dp = dis_prices[len_state];
  163     int dis = 0;
  164     for( ; dis < start_dis_model; ++dis )
  165       dp[dis] = dsp[dis];
  166     for( ; dis < modeled_distances; ++dis )
  167       dp[dis] += dsp[dis_slots[dis]];
  168     }
  169   }
  170 
  171 
  172 /* Returns the number of bytes advanced (ahead).
  173    trials[0]..trials[ahead-1] contain the steps to encode.
  174    ( trials[0].dis4 == -1 ) means literal.
  175    A match/rep longer or equal than match_len_limit finishes the sequence.
  176 */
  177 int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
  178                                     const State state )
  179   {
  180   int num_pairs, num_trials;
  181 
  182   if( pending_num_pairs > 0 )           // from previous call
  183     {
  184     num_pairs = pending_num_pairs;
  185     pending_num_pairs = 0;
  186     }
  187   else
  188     num_pairs = read_match_distances();
  189   const int main_len = ( num_pairs > 0 ) ? pairs[num_pairs-1].len : 0;
  190 
  191   int replens[num_rep_distances];
  192   int rep_index = 0;
  193   for( int i = 0; i < num_rep_distances; ++i )
  194     {
  195     replens[i] = true_match_len( 0, reps[i] + 1 );
  196     if( replens[i] > replens[rep_index] ) rep_index = i;
  197     }
  198   if( replens[rep_index] >= match_len_limit )
  199     {
  200     trials[0].price = replens[rep_index];
  201     trials[0].dis4 = rep_index;
  202     move_and_update( replens[rep_index] );
  203     return replens[rep_index];
  204     }
  205 
  206   if( main_len >= match_len_limit )
  207     {
  208     trials[0].price = main_len;
  209     trials[0].dis4 = pairs[num_pairs-1].dis + num_rep_distances;
  210     move_and_update( main_len );
  211     return main_len;
  212     }
  213 
  214   const int pos_state = data_position() & pos_state_mask;
  215   const uint8_t prev_byte = peek( 1 );
  216   const uint8_t cur_byte = peek( 0 );
  217   const uint8_t match_byte = peek( reps[0] + 1 );
  218 
  219   trials[1].price = price0( bm_match[state()][pos_state] );
  220   if( state.is_char() )
  221     trials[1].price += price_literal( prev_byte, cur_byte );
  222   else
  223     trials[1].price += price_matched( prev_byte, cur_byte, match_byte );
  224   trials[1].dis4 = -1;                  // literal
  225 
  226   const int match_price = price1( bm_match[state()][pos_state] );
  227   const int rep_match_price = match_price + price1( bm_rep[state()] );
  228 
  229   if( match_byte == cur_byte )
  230     trials[1].update( rep_match_price + price_shortrep( state, pos_state ), 0, 0 );
  231 
  232   num_trials = std::max( main_len, replens[rep_index] );
  233 
  234   if( num_trials < min_match_len )
  235     {
  236     trials[0].price = 1;
  237     trials[0].dis4 = trials[1].dis4;
  238     move_pos();
  239     return 1;
  240     }
  241 
  242   trials[0].state = state;
  243   for( int i = 0; i < num_rep_distances; ++i )
  244     trials[0].reps[i] = reps[i];
  245 
  246   for( int len = min_match_len; len <= num_trials; ++len )
  247     trials[len].price = infinite_price;
  248 
  249   for( int rep = 0; rep < num_rep_distances; ++rep )
  250     {
  251     if( replens[rep] < min_match_len ) continue;
  252     const int price = rep_match_price + price_rep( rep, state, pos_state );
  253     for( int len = min_match_len; len <= replens[rep]; ++len )
  254       trials[len].update( price + rep_len_prices.price( len, pos_state ),
  255                           rep, 0 );
  256     }
  257 
  258   if( main_len > replens[0] )
  259     {
  260     const int normal_match_price = match_price + price0( bm_rep[state()] );
  261     int i = 0, len = std::max( replens[0] + 1, (int)min_match_len );
  262     while( len > pairs[i].len ) ++i;
  263     while( true )
  264       {
  265       const int dis = pairs[i].dis;
  266       trials[len].update( normal_match_price + price_pair( dis, len, pos_state ),
  267                           dis + num_rep_distances, 0 );
  268       if( ++len > pairs[i].len && ++i >= num_pairs ) break;
  269       }
  270     }
  271 
  272   int cur = 0;
  273   while( true )             // price optimization loop
  274     {
  275     move_pos();
  276     if( ++cur >= num_trials )       // no more initialized trials
  277       {
  278       backward( cur );
  279       return cur;
  280       }
  281 
  282     const int num_pairs = read_match_distances();
  283     const int newlen = ( num_pairs > 0 ) ? pairs[num_pairs-1].len : 0;
  284     if( newlen >= match_len_limit )
  285       {
  286       pending_num_pairs = num_pairs;
  287       backward( cur );
  288       return cur;
  289       }
  290 
  291     // give final values to current trial
  292     Trial & cur_trial = trials[cur];
  293     State cur_state;
  294     {
  295     const int dis4 = cur_trial.dis4;
  296     int prev_index = cur_trial.prev_index;
  297     const int prev_index2 = cur_trial.prev_index2;
  298 
  299     if( prev_index2 == single_step_trial )
  300       {
  301       cur_state = trials[prev_index].state;
  302       if( prev_index + 1 == cur )           // len == 1
  303         {
  304         if( dis4 == 0 ) cur_state.set_short_rep();
  305         else cur_state.set_char();          // literal
  306         }
  307       else if( dis4 < num_rep_distances ) cur_state.set_rep();
  308       else cur_state.set_match();
  309       }
  310     else
  311       {
  312       if( prev_index2 == dual_step_trial )  // dis4 == 0 (rep0)
  313         --prev_index;
  314       else                  // prev_index2 >= 0
  315         prev_index = prev_index2;
  316       cur_state.set_char_rep();
  317       }
  318     cur_trial.state = cur_state;
  319     for( int i = 0; i < num_rep_distances; ++i )
  320       cur_trial.reps[i] = trials[prev_index].reps[i];
  321     mtf_reps( dis4, cur_trial.reps );       // literal is ignored
  322     }
  323 
  324     const int pos_state = data_position() & pos_state_mask;
  325     const uint8_t prev_byte = peek( 1 );
  326     const uint8_t cur_byte = peek( 0 );
  327     const uint8_t match_byte = peek( cur_trial.reps[0] + 1 );
  328 
  329     int next_price = cur_trial.price +
  330                      price0( bm_match[cur_state()][pos_state] );
  331     if( cur_state.is_char() )
  332       next_price += price_literal( prev_byte, cur_byte );
  333     else
  334       next_price += price_matched( prev_byte, cur_byte, match_byte );
  335 
  336     // try last updates to next trial
  337     Trial & next_trial = trials[cur+1];
  338 
  339     next_trial.update( next_price, -1, cur );       // literal
  340 
  341     const int match_price = cur_trial.price + price1( bm_match[cur_state()][pos_state] );
  342     const int rep_match_price = match_price + price1( bm_rep[cur_state()] );
  343 
  344     if( match_byte == cur_byte && next_trial.dis4 != 0 &&
  345         next_trial.prev_index2 == single_step_trial )
  346       {
  347       const int price = rep_match_price + price_shortrep( cur_state, pos_state );
  348       if( price <= next_trial.price )
  349         {
  350         next_trial.price = price;
  351         next_trial.dis4 = 0;                // rep0
  352         next_trial.prev_index = cur;
  353         }
  354       }
  355 
  356     const int triable_bytes =
  357       std::min( available_bytes(), max_num_trials - 1 - cur );
  358     if( triable_bytes < min_match_len ) continue;
  359 
  360     const int len_limit = std::min( match_len_limit, triable_bytes );
  361 
  362     // try literal + rep0
  363     if( match_byte != cur_byte && next_trial.prev_index != cur )
  364       {
  365       const uint8_t * const data = ptr_to_current_pos();
  366       const int dis = cur_trial.reps[0] + 1;
  367       const int limit = std::min( match_len_limit + 1, triable_bytes );
  368       int len = 1;
  369       while( len < limit && data[len-dis] == data[len] ) ++len;
  370       if( --len >= min_match_len )
  371         {
  372         const int pos_state2 = ( pos_state + 1 ) & pos_state_mask;
  373         State state2 = cur_state; state2.set_char();
  374         const int price = next_price +
  375                           price1( bm_match[state2()][pos_state2] ) +
  376                           price1( bm_rep[state2()] ) +
  377                           price_rep0_len( len, state2, pos_state2 );
  378         while( num_trials < cur + 1 + len )
  379           trials[++num_trials].price = infinite_price;
  380         trials[cur+1+len].update2( price, cur + 1 );
  381         }
  382       }
  383 
  384     int start_len = min_match_len;
  385 
  386     // try rep distances
  387     for( int rep = 0; rep < num_rep_distances; ++rep )
  388       {
  389       const uint8_t * const data = ptr_to_current_pos();
  390       const int dis = cur_trial.reps[rep] + 1;
  391       int len;
  392 
  393       if( data[0-dis] != data[0] || data[1-dis] != data[1] ) continue;
  394       for( len = min_match_len; len < len_limit; ++len )
  395         if( data[len-dis] != data[len] ) break;
  396       while( num_trials < cur + len )
  397         trials[++num_trials].price = infinite_price;
  398       int price = rep_match_price + price_rep( rep, cur_state, pos_state );
  399       for( int i = min_match_len; i <= len; ++i )
  400         trials[cur+i].update( price + rep_len_prices.price( i, pos_state ),
  401                               rep, cur );
  402 
  403       if( rep == 0 ) start_len = len + 1;   // discard shorter matches
  404 
  405       // try rep + literal + rep0
  406       int len2 = len + 1;
  407       const int limit = std::min( match_len_limit + len2, triable_bytes );
  408       while( len2 < limit && data[len2-dis] == data[len2] ) ++len2;
  409       len2 -= len + 1;
  410       if( len2 < min_match_len ) continue;
  411 
  412       int pos_state2 = ( pos_state + len ) & pos_state_mask;
  413       State state2 = cur_state; state2.set_rep();
  414       price += rep_len_prices.price( len, pos_state ) +
  415                price0( bm_match[state2()][pos_state2] ) +
  416                price_matched( data[len-1], data[len], data[len-dis] );
  417       pos_state2 = ( pos_state2 + 1 ) & pos_state_mask;
  418       state2.set_char();
  419       price += price1( bm_match[state2()][pos_state2] ) +
  420                price1( bm_rep[state2()] ) +
  421                price_rep0_len( len2, state2, pos_state2 );
  422       while( num_trials < cur + len + 1 + len2 )
  423         trials[++num_trials].price = infinite_price;
  424       trials[cur+len+1+len2].update3( price, rep, cur + len + 1, cur );
  425       }
  426 
  427     // try matches
  428     if( newlen >= start_len && newlen <= len_limit )
  429       {
  430       const int normal_match_price = match_price +
  431                                      price0( bm_rep[cur_state()] );
  432 
  433       while( num_trials < cur + newlen )
  434         trials[++num_trials].price = infinite_price;
  435 
  436       int i = 0;
  437       while( pairs[i].len < start_len ) ++i;
  438       int dis = pairs[i].dis;
  439       for( int len = start_len; ; ++len )
  440         {
  441         int price = normal_match_price + price_pair( dis, len, pos_state );
  442         trials[cur+len].update( price, dis + num_rep_distances, cur );
  443 
  444         // try match + literal + rep0
  445         if( len == pairs[i].len )
  446           {
  447           const uint8_t * const data = ptr_to_current_pos();
  448           const int dis2 = dis + 1;
  449           int len2 = len + 1;
  450           const int limit = std::min( match_len_limit + len2, triable_bytes );
  451           while( len2 < limit && data[len2-dis2] == data[len2] ) ++len2;
  452           len2 -= len + 1;
  453           if( len2 >= min_match_len )
  454             {
  455             int pos_state2 = ( pos_state + len ) & pos_state_mask;
  456             State state2 = cur_state; state2.set_match();
  457             price += price0( bm_match[state2()][pos_state2] ) +
  458                      price_matched( data[len-1], data[len], data[len-dis2] );
  459             pos_state2 = ( pos_state2 + 1 ) & pos_state_mask;
  460             state2.set_char();
  461             price += price1( bm_match[state2()][pos_state2] ) +
  462                      price1( bm_rep[state2()] ) +
  463                      price_rep0_len( len2, state2, pos_state2 );
  464 
  465             while( num_trials < cur + len + 1 + len2 )
  466               trials[++num_trials].price = infinite_price;
  467             trials[cur+len+1+len2].update3( price, dis + num_rep_distances,
  468                                             cur + len + 1, cur );
  469             }
  470           if( ++i >= num_pairs ) break;
  471           dis = pairs[i].dis;
  472           }
  473         }
  474       }
  475     }
  476   }
  477 
  478 
  479 bool LZ_encoder::encode_member( const unsigned long long member_size )
  480   {
  481   const unsigned long long member_size_limit =
  482     member_size - Lzip_trailer::size - max_marker_size;
  483   const bool best = ( match_len_limit > 12 );
  484   const int dis_price_count = best ? 1 : 512;
  485   const int align_price_count = best ? 1 : dis_align_size;
  486   const int price_count = ( match_len_limit > 36 ) ? 1013 : 4093;
  487   int price_counter = 0;        // counters may decrement below 0
  488   int dis_price_counter = 0;
  489   int align_price_counter = 0;
  490   int reps[num_rep_distances];
  491   State state;
  492   for( int i = 0; i < num_rep_distances; ++i ) reps[i] = 0;
  493 
  494   if( data_position() != 0 || renc.member_position() != Lzip_header::size )
  495     return false;               // can be called only once
  496 
  497   if( !data_finished() )            // encode first byte
  498     {
  499     const uint8_t prev_byte = 0;
  500     const uint8_t cur_byte = peek( 0 );
  501     renc.encode_bit( bm_match[state()][0], 0 );
  502     encode_literal( prev_byte, cur_byte );
  503     crc32.update_byte( crc_, cur_byte );
  504     get_match_pairs();
  505     move_pos();
  506     }
  507 
  508   while( !data_finished() )
  509     {
  510     if( price_counter <= 0 && pending_num_pairs == 0 )
  511       {
  512       price_counter = price_count;  // recalculate prices every these bytes
  513       if( dis_price_counter <= 0 )
  514         { dis_price_counter = dis_price_count; update_distance_prices(); }
  515       if( align_price_counter <= 0 )
  516         {
  517         align_price_counter = align_price_count;
  518         for( int i = 0; i < dis_align_size; ++i )
  519           align_prices[i] = price_symbol_reversed( bm_align, i, dis_align_bits );
  520         }
  521       match_len_prices.update_prices();
  522       rep_len_prices.update_prices();
  523       }
  524 
  525     int ahead = sequence_optimizer( reps, state );
  526     price_counter -= ahead;
  527 
  528     for( int i = 0; ahead > 0; )
  529       {
  530       const int pos_state = ( data_position() - ahead ) & pos_state_mask;
  531       const int len = trials[i].price;
  532       int dis = trials[i].dis4;
  533 
  534       bool bit = ( dis < 0 );
  535       renc.encode_bit( bm_match[state()][pos_state], !bit );
  536       if( bit )                 // literal byte
  537         {
  538         const uint8_t prev_byte = peek( ahead + 1 );
  539         const uint8_t cur_byte = peek( ahead );
  540         crc32.update_byte( crc_, cur_byte );
  541         if( state.is_char_set_char() )
  542           encode_literal( prev_byte, cur_byte );
  543         else
  544           {
  545           const uint8_t match_byte = peek( ahead + reps[0] + 1 );
  546           encode_matched( prev_byte, cur_byte, match_byte );
  547           }
  548         }
  549       else                  // match or repeated match
  550         {
  551         crc32.update_buf( crc_, ptr_to_current_pos() - ahead, len );
  552         mtf_reps( dis, reps );
  553         bit = ( dis < num_rep_distances );
  554         renc.encode_bit( bm_rep[state()], bit );
  555         if( bit )               // repeated match
  556           {
  557           bit = ( dis == 0 );
  558           renc.encode_bit( bm_rep0[state()], !bit );
  559           if( bit )
  560             renc.encode_bit( bm_len[state()][pos_state], len > 1 );
  561           else
  562             {
  563             renc.encode_bit( bm_rep1[state()], dis > 1 );
  564             if( dis > 1 )
  565               renc.encode_bit( bm_rep2[state()], dis > 2 );
  566             }
  567           if( len == 1 ) state.set_short_rep();
  568           else
  569             {
  570             renc.encode_len( rep_len_model, len, pos_state );
  571             rep_len_prices.decrement_counter( pos_state );
  572             state.set_rep();
  573             }
  574           }
  575         else                    // match
  576           {
  577           dis -= num_rep_distances;
  578           encode_pair( dis, len, pos_state );
  579           if( dis >= modeled_distances ) --align_price_counter;
  580           --dis_price_counter;
  581           match_len_prices.decrement_counter( pos_state );
  582           state.set_match();
  583           }
  584         }
  585       ahead -= len; i += len;
  586       if( renc.member_position() >= member_size_limit )
  587         {
  588         if( !dec_pos( ahead ) ) return false;
  589         full_flush( state );
  590         return true;
  591         }
  592       }
  593     }
  594   full_flush( state );
  595   return true;
  596   }