"Fossies" - the Fresh Open Source Software Archive

Member "lzip-1.22-rc2/encoder.cc" (30 Apr 2020, 19733 Bytes) of package /linux/misc/lzip-1.22-rc2.tar.lz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "encoder.cc" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 1.21_vs_1.22-rc1.

    1 /* Lzip - LZMA lossless data compressor
    2    Copyright (C) 2008-2020 Antonio Diaz Diaz.
    3 
    4    This program is free software: you can redistribute it and/or modify
    5    it under the terms of the GNU General Public License as published by
    6    the Free Software Foundation, either version 2 of the License, or
    7    (at your option) any later version.
    8 
    9    This program is distributed in the hope that it will be useful,
   10    but WITHOUT ANY WARRANTY; without even the implied warranty of
   11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   12    GNU General Public License for more details.
   13 
   14    You should have received a copy of the GNU General Public License
   15    along with this program.  If not, see <http://www.gnu.org/licenses/>.
   16 */
   17 
   18 #define _FILE_OFFSET_BITS 64
   19 
   20 #include <algorithm>
   21 #include <cerrno>
   22 #include <cstdlib>
   23 #include <cstring>
   24 #include <string>
   25 #include <vector>
   26 #include <stdint.h>
   27 
   28 #include "lzip.h"
   29 #include "encoder_base.h"
   30 #include "encoder.h"
   31 
   32 
   33 const CRC32 crc32;
   34 
   35 
   36 int LZ_encoder::get_match_pairs( Pair * pairs )
   37   {
   38   int len_limit = match_len_limit;
   39   if( len_limit > available_bytes() )
   40     {
   41     len_limit = available_bytes();
   42     if( len_limit < 4 ) return 0;
   43     }
   44 
   45   int maxlen = 3;           // only used if pairs != 0
   46   int num_pairs = 0;
   47   const int min_pos = ( pos > dictionary_size ) ? pos - dictionary_size : 0;
   48   const uint8_t * const data = ptr_to_current_pos();
   49 
   50   unsigned tmp = crc32[data[0]] ^ data[1];
   51   const int key2 = tmp & ( num_prev_positions2 - 1 );
   52   tmp ^= (unsigned)data[2] << 8;
   53   const int key3 = num_prev_positions2 + ( tmp & ( num_prev_positions3 - 1 ) );
   54   const int key4 = num_prev_positions23 +
   55                    ( ( tmp ^ ( crc32[data[3]] << 5 ) ) & key4_mask );
   56 
   57   if( pairs )
   58     {
   59     const int np2 = prev_positions[key2];
   60     const int np3 = prev_positions[key3];
   61     if( np2 > min_pos && buffer[np2-1] == data[0] )
   62       {
   63       pairs[0].dis = pos - np2;
   64       pairs[0].len = maxlen = 2 + ( np2 == np3 );
   65       num_pairs = 1;
   66       }
   67     if( np2 != np3 && np3 > min_pos && buffer[np3-1] == data[0] )
   68       {
   69       maxlen = 3;
   70       pairs[num_pairs++].dis = pos - np3;
   71       }
   72     if( num_pairs > 0 )
   73       {
   74       const int delta = pairs[num_pairs-1].dis + 1;
   75       while( maxlen < len_limit && data[maxlen-delta] == data[maxlen] )
   76         ++maxlen;
   77       pairs[num_pairs-1].len = maxlen;
   78       if( maxlen < 3 ) maxlen = 3;
   79       if( maxlen >= len_limit ) pairs = 0;  // done. now just skip
   80       }
   81     }
   82 
   83   const int pos1 = pos + 1;
   84   prev_positions[key2] = pos1;
   85   prev_positions[key3] = pos1;
   86   int newpos1 = prev_positions[key4];
   87   prev_positions[key4] = pos1;
   88 
   89   int32_t * ptr0 = pos_array + ( cyclic_pos << 1 );
   90   int32_t * ptr1 = ptr0 + 1;
   91   int len = 0, len0 = 0, len1 = 0;
   92 
   93   for( int count = cycles; ; )
   94     {
   95     if( newpos1 <= min_pos || --count < 0 ) { *ptr0 = *ptr1 = 0; break; }
   96 
   97     const int delta = pos1 - newpos1;
   98     int32_t * const newptr = pos_array +
   99       ( ( cyclic_pos - delta +
  100           ( ( cyclic_pos >= delta ) ? 0 : dictionary_size + 1 ) ) << 1 );
  101     if( data[len-delta] == data[len] )
  102       {
  103       while( ++len < len_limit && data[len-delta] == data[len] ) {}
  104       if( pairs && maxlen < len )
  105         {
  106         pairs[num_pairs].dis = delta - 1;
  107         pairs[num_pairs].len = maxlen = len;
  108         ++num_pairs;
  109         }
  110       if( len >= len_limit )
  111         {
  112         *ptr0 = newptr[0];
  113         *ptr1 = newptr[1];
  114         break;
  115         }
  116       }
  117     if( data[len-delta] < data[len] )
  118       {
  119       *ptr0 = newpos1;
  120       ptr0 = newptr + 1;
  121       newpos1 = *ptr0;
  122       len0 = len; if( len1 < len ) len = len1;
  123       }
  124     else
  125       {
  126       *ptr1 = newpos1;
  127       ptr1 = newptr;
  128       newpos1 = *ptr1;
  129       len1 = len; if( len0 < len ) len = len0;
  130       }
  131     }
  132   return num_pairs;
  133   }
  134 
  135 
  136 void LZ_encoder::update_distance_prices()
  137   {
  138   for( int dis = start_dis_model; dis < modeled_distances; ++dis )
  139     {
  140     const int dis_slot = dis_slots[dis];
  141     const int direct_bits = ( dis_slot >> 1 ) - 1;
  142     const int base = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
  143     const int price = price_symbol_reversed( bm_dis + ( base - dis_slot ),
  144                                              dis - base, direct_bits );
  145     for( int len_state = 0; len_state < len_states; ++len_state )
  146       dis_prices[len_state][dis] = price;
  147     }
  148 
  149   for( int len_state = 0; len_state < len_states; ++len_state )
  150     {
  151     int * const dsp = dis_slot_prices[len_state];
  152     const Bit_model * const bmds = bm_dis_slot[len_state];
  153     int slot = 0;
  154     for( ; slot < end_dis_model; ++slot )
  155       dsp[slot] = price_symbol6( bmds, slot );
  156     for( ; slot < num_dis_slots; ++slot )
  157       dsp[slot] = price_symbol6( bmds, slot ) +
  158                   (((( slot >> 1 ) - 1 ) - dis_align_bits ) << price_shift_bits );
  159 
  160     int * const dp = dis_prices[len_state];
  161     int dis = 0;
  162     for( ; dis < start_dis_model; ++dis )
  163       dp[dis] = dsp[dis];
  164     for( ; dis < modeled_distances; ++dis )
  165       dp[dis] += dsp[dis_slots[dis]];
  166     }
  167   }
  168 
  169 
  170 /* Returns the number of bytes advanced (ahead).
  171    trials[0]..trials[ahead-1] contain the steps to encode.
  172    ( trials[0].dis4 == -1 ) means literal.
  173    A match/rep longer or equal than match_len_limit finishes the sequence.
  174 */
  175 int LZ_encoder::sequence_optimizer( const int reps[num_rep_distances],
  176                                     const State state )
  177   {
  178   int num_pairs, num_trials;
  179 
  180   if( pending_num_pairs > 0 )           // from previous call
  181     {
  182     num_pairs = pending_num_pairs;
  183     pending_num_pairs = 0;
  184     }
  185   else
  186     num_pairs = read_match_distances();
  187   const int main_len = ( num_pairs > 0 ) ? pairs[num_pairs-1].len : 0;
  188 
  189   int replens[num_rep_distances];
  190   int rep_index = 0;
  191   for( int i = 0; i < num_rep_distances; ++i )
  192     {
  193     replens[i] = true_match_len( 0, reps[i] + 1 );
  194     if( replens[i] > replens[rep_index] ) rep_index = i;
  195     }
  196   if( replens[rep_index] >= match_len_limit )
  197     {
  198     trials[0].price = replens[rep_index];
  199     trials[0].dis4 = rep_index;
  200     move_and_update( replens[rep_index] );
  201     return replens[rep_index];
  202     }
  203 
  204   if( main_len >= match_len_limit )
  205     {
  206     trials[0].price = main_len;
  207     trials[0].dis4 = pairs[num_pairs-1].dis + num_rep_distances;
  208     move_and_update( main_len );
  209     return main_len;
  210     }
  211 
  212   const int pos_state = data_position() & pos_state_mask;
  213   const uint8_t prev_byte = peek( 1 );
  214   const uint8_t cur_byte = peek( 0 );
  215   const uint8_t match_byte = peek( reps[0] + 1 );
  216 
  217   trials[1].price = price0( bm_match[state()][pos_state] );
  218   if( state.is_char() )
  219     trials[1].price += price_literal( prev_byte, cur_byte );
  220   else
  221     trials[1].price += price_matched( prev_byte, cur_byte, match_byte );
  222   trials[1].dis4 = -1;                  // literal
  223 
  224   const int match_price = price1( bm_match[state()][pos_state] );
  225   const int rep_match_price = match_price + price1( bm_rep[state()] );
  226 
  227   if( match_byte == cur_byte )
  228     trials[1].update( rep_match_price + price_shortrep( state, pos_state ), 0, 0 );
  229 
  230   num_trials = std::max( main_len, replens[rep_index] );
  231 
  232   if( num_trials < min_match_len )
  233     {
  234     trials[0].price = 1;
  235     trials[0].dis4 = trials[1].dis4;
  236     move_pos();
  237     return 1;
  238     }
  239 
  240   trials[0].state = state;
  241   for( int i = 0; i < num_rep_distances; ++i )
  242     trials[0].reps[i] = reps[i];
  243 
  244   for( int len = min_match_len; len <= num_trials; ++len )
  245     trials[len].price = infinite_price;
  246 
  247   for( int rep = 0; rep < num_rep_distances; ++rep )
  248     {
  249     if( replens[rep] < min_match_len ) continue;
  250     const int price = rep_match_price + price_rep( rep, state, pos_state );
  251     for( int len = min_match_len; len <= replens[rep]; ++len )
  252       trials[len].update( price + rep_len_prices.price( len, pos_state ),
  253                           rep, 0 );
  254     }
  255 
  256   if( main_len > replens[0] )
  257     {
  258     const int normal_match_price = match_price + price0( bm_rep[state()] );
  259     int i = 0, len = std::max( replens[0] + 1, (int)min_match_len );
  260     while( len > pairs[i].len ) ++i;
  261     while( true )
  262       {
  263       const int dis = pairs[i].dis;
  264       trials[len].update( normal_match_price + price_pair( dis, len, pos_state ),
  265                           dis + num_rep_distances, 0 );
  266       if( ++len > pairs[i].len && ++i >= num_pairs ) break;
  267       }
  268     }
  269 
  270   int cur = 0;
  271   while( true )             // price optimization loop
  272     {
  273     move_pos();
  274     if( ++cur >= num_trials )       // no more initialized trials
  275       {
  276       backward( cur );
  277       return cur;
  278       }
  279 
  280     const int num_pairs = read_match_distances();
  281     const int newlen = ( num_pairs > 0 ) ? pairs[num_pairs-1].len : 0;
  282     if( newlen >= match_len_limit )
  283       {
  284       pending_num_pairs = num_pairs;
  285       backward( cur );
  286       return cur;
  287       }
  288 
  289     // give final values to current trial
  290     Trial & cur_trial = trials[cur];
  291     State cur_state;
  292     {
  293     const int dis4 = cur_trial.dis4;
  294     int prev_index = cur_trial.prev_index;
  295     const int prev_index2 = cur_trial.prev_index2;
  296 
  297     if( prev_index2 == single_step_trial )
  298       {
  299       cur_state = trials[prev_index].state;
  300       if( prev_index + 1 == cur )           // len == 1
  301         {
  302         if( dis4 == 0 ) cur_state.set_short_rep();
  303         else cur_state.set_char();          // literal
  304         }
  305       else if( dis4 < num_rep_distances ) cur_state.set_rep();
  306       else cur_state.set_match();
  307       }
  308     else
  309       {
  310       if( prev_index2 == dual_step_trial )  // dis4 == 0 (rep0)
  311         --prev_index;
  312       else                  // prev_index2 >= 0
  313         prev_index = prev_index2;
  314       cur_state.set_char_rep();
  315       }
  316     cur_trial.state = cur_state;
  317     for( int i = 0; i < num_rep_distances; ++i )
  318       cur_trial.reps[i] = trials[prev_index].reps[i];
  319     mtf_reps( dis4, cur_trial.reps );       // literal is ignored
  320     }
  321 
  322     const int pos_state = data_position() & pos_state_mask;
  323     const uint8_t prev_byte = peek( 1 );
  324     const uint8_t cur_byte = peek( 0 );
  325     const uint8_t match_byte = peek( cur_trial.reps[0] + 1 );
  326 
  327     int next_price = cur_trial.price +
  328                      price0( bm_match[cur_state()][pos_state] );
  329     if( cur_state.is_char() )
  330       next_price += price_literal( prev_byte, cur_byte );
  331     else
  332       next_price += price_matched( prev_byte, cur_byte, match_byte );
  333 
  334     // try last updates to next trial
  335     Trial & next_trial = trials[cur+1];
  336 
  337     next_trial.update( next_price, -1, cur );       // literal
  338 
  339     const int match_price = cur_trial.price + price1( bm_match[cur_state()][pos_state] );
  340     const int rep_match_price = match_price + price1( bm_rep[cur_state()] );
  341 
  342     if( match_byte == cur_byte && next_trial.dis4 != 0 &&
  343         next_trial.prev_index2 == single_step_trial )
  344       {
  345       const int price = rep_match_price + price_shortrep( cur_state, pos_state );
  346       if( price <= next_trial.price )
  347         {
  348         next_trial.price = price;
  349         next_trial.dis4 = 0;                // rep0
  350         next_trial.prev_index = cur;
  351         }
  352       }
  353 
  354     const int triable_bytes =
  355       std::min( available_bytes(), max_num_trials - 1 - cur );
  356     if( triable_bytes < min_match_len ) continue;
  357 
  358     const int len_limit = std::min( match_len_limit, triable_bytes );
  359 
  360     // try literal + rep0
  361     if( match_byte != cur_byte && next_trial.prev_index != cur )
  362       {
  363       const uint8_t * const data = ptr_to_current_pos();
  364       const int dis = cur_trial.reps[0] + 1;
  365       const int limit = std::min( match_len_limit + 1, triable_bytes );
  366       int len = 1;
  367       while( len < limit && data[len-dis] == data[len] ) ++len;
  368       if( --len >= min_match_len )
  369         {
  370         const int pos_state2 = ( pos_state + 1 ) & pos_state_mask;
  371         State state2 = cur_state; state2.set_char();
  372         const int price = next_price +
  373                           price1( bm_match[state2()][pos_state2] ) +
  374                           price1( bm_rep[state2()] ) +
  375                           price_rep0_len( len, state2, pos_state2 );
  376         while( num_trials < cur + 1 + len )
  377           trials[++num_trials].price = infinite_price;
  378         trials[cur+1+len].update2( price, cur + 1 );
  379         }
  380       }
  381 
  382     int start_len = min_match_len;
  383 
  384     // try rep distances
  385     for( int rep = 0; rep < num_rep_distances; ++rep )
  386       {
  387       const uint8_t * const data = ptr_to_current_pos();
  388       const int dis = cur_trial.reps[rep] + 1;
  389       int len;
  390 
  391       if( data[0-dis] != data[0] || data[1-dis] != data[1] ) continue;
  392       for( len = min_match_len; len < len_limit; ++len )
  393         if( data[len-dis] != data[len] ) break;
  394       while( num_trials < cur + len )
  395         trials[++num_trials].price = infinite_price;
  396       int price = rep_match_price + price_rep( rep, cur_state, pos_state );
  397       for( int i = min_match_len; i <= len; ++i )
  398         trials[cur+i].update( price + rep_len_prices.price( i, pos_state ),
  399                               rep, cur );
  400 
  401       if( rep == 0 ) start_len = len + 1;   // discard shorter matches
  402 
  403       // try rep + literal + rep0
  404       int len2 = len + 1;
  405       const int limit = std::min( match_len_limit + len2, triable_bytes );
  406       while( len2 < limit && data[len2-dis] == data[len2] ) ++len2;
  407       len2 -= len + 1;
  408       if( len2 < min_match_len ) continue;
  409 
  410       int pos_state2 = ( pos_state + len ) & pos_state_mask;
  411       State state2 = cur_state; state2.set_rep();
  412       price += rep_len_prices.price( len, pos_state ) +
  413                price0( bm_match[state2()][pos_state2] ) +
  414                price_matched( data[len-1], data[len], data[len-dis] );
  415       pos_state2 = ( pos_state2 + 1 ) & pos_state_mask;
  416       state2.set_char();
  417       price += price1( bm_match[state2()][pos_state2] ) +
  418                price1( bm_rep[state2()] ) +
  419                price_rep0_len( len2, state2, pos_state2 );
  420       while( num_trials < cur + len + 1 + len2 )
  421         trials[++num_trials].price = infinite_price;
  422       trials[cur+len+1+len2].update3( price, rep, cur + len + 1, cur );
  423       }
  424 
  425     // try matches
  426     if( newlen >= start_len && newlen <= len_limit )
  427       {
  428       const int normal_match_price = match_price +
  429                                      price0( bm_rep[cur_state()] );
  430 
  431       while( num_trials < cur + newlen )
  432         trials[++num_trials].price = infinite_price;
  433 
  434       int i = 0;
  435       while( pairs[i].len < start_len ) ++i;
  436       int dis = pairs[i].dis;
  437       for( int len = start_len; ; ++len )
  438         {
  439         int price = normal_match_price + price_pair( dis, len, pos_state );
  440         trials[cur+len].update( price, dis + num_rep_distances, cur );
  441 
  442         // try match + literal + rep0
  443         if( len == pairs[i].len )
  444           {
  445           const uint8_t * const data = ptr_to_current_pos();
  446           const int dis2 = dis + 1;
  447           int len2 = len + 1;
  448           const int limit = std::min( match_len_limit + len2, triable_bytes );
  449           while( len2 < limit && data[len2-dis2] == data[len2] ) ++len2;
  450           len2 -= len + 1;
  451           if( len2 >= min_match_len )
  452             {
  453             int pos_state2 = ( pos_state + len ) & pos_state_mask;
  454             State state2 = cur_state; state2.set_match();
  455             price += price0( bm_match[state2()][pos_state2] ) +
  456                      price_matched( data[len-1], data[len], data[len-dis2] );
  457             pos_state2 = ( pos_state2 + 1 ) & pos_state_mask;
  458             state2.set_char();
  459             price += price1( bm_match[state2()][pos_state2] ) +
  460                      price1( bm_rep[state2()] ) +
  461                      price_rep0_len( len2, state2, pos_state2 );
  462 
  463             while( num_trials < cur + len + 1 + len2 )
  464               trials[++num_trials].price = infinite_price;
  465             trials[cur+len+1+len2].update3( price, dis + num_rep_distances,
  466                                             cur + len + 1, cur );
  467             }
  468           if( ++i >= num_pairs ) break;
  469           dis = pairs[i].dis;
  470           }
  471         }
  472       }
  473     }
  474   }
  475 
  476 
  477 bool LZ_encoder::encode_member( const unsigned long long member_size )
  478   {
  479   const unsigned long long member_size_limit =
  480     member_size - Lzip_trailer::size - max_marker_size;
  481   const bool best = ( match_len_limit > 12 );
  482   const int dis_price_count = best ? 1 : 512;
  483   const int align_price_count = best ? 1 : dis_align_size;
  484   const int price_count = ( match_len_limit > 36 ) ? 1013 : 4093;
  485   int price_counter = 0;        // counters may decrement below 0
  486   int dis_price_counter = 0;
  487   int align_price_counter = 0;
  488   int reps[num_rep_distances];
  489   State state;
  490   for( int i = 0; i < num_rep_distances; ++i ) reps[i] = 0;
  491 
  492   if( data_position() != 0 || renc.member_position() != Lzip_header::size )
  493     return false;               // can be called only once
  494 
  495   if( !data_finished() )            // encode first byte
  496     {
  497     const uint8_t prev_byte = 0;
  498     const uint8_t cur_byte = peek( 0 );
  499     renc.encode_bit( bm_match[state()][0], 0 );
  500     encode_literal( prev_byte, cur_byte );
  501     crc32.update_byte( crc_, cur_byte );
  502     get_match_pairs();
  503     move_pos();
  504     }
  505 
  506   while( !data_finished() )
  507     {
  508     if( price_counter <= 0 && pending_num_pairs == 0 )
  509       {
  510       price_counter = price_count;  // recalculate prices every these bytes
  511       if( dis_price_counter <= 0 )
  512         { dis_price_counter = dis_price_count; update_distance_prices(); }
  513       if( align_price_counter <= 0 )
  514         {
  515         align_price_counter = align_price_count;
  516         for( int i = 0; i < dis_align_size; ++i )
  517           align_prices[i] = price_symbol_reversed( bm_align, i, dis_align_bits );
  518         }
  519       match_len_prices.update_prices();
  520       rep_len_prices.update_prices();
  521       }
  522 
  523     int ahead = sequence_optimizer( reps, state );
  524     price_counter -= ahead;
  525 
  526     for( int i = 0; ahead > 0; )
  527       {
  528       const int pos_state = ( data_position() - ahead ) & pos_state_mask;
  529       const int len = trials[i].price;
  530       int dis = trials[i].dis4;
  531 
  532       bool bit = ( dis < 0 );
  533       renc.encode_bit( bm_match[state()][pos_state], !bit );
  534       if( bit )                 // literal byte
  535         {
  536         const uint8_t prev_byte = peek( ahead + 1 );
  537         const uint8_t cur_byte = peek( ahead );
  538         crc32.update_byte( crc_, cur_byte );
  539         if( state.is_char_set_char() )
  540           encode_literal( prev_byte, cur_byte );
  541         else
  542           {
  543           const uint8_t match_byte = peek( ahead + reps[0] + 1 );
  544           encode_matched( prev_byte, cur_byte, match_byte );
  545           }
  546         }
  547       else                  // match or repeated match
  548         {
  549         crc32.update_buf( crc_, ptr_to_current_pos() - ahead, len );
  550         mtf_reps( dis, reps );
  551         bit = ( dis < num_rep_distances );
  552         renc.encode_bit( bm_rep[state()], bit );
  553         if( bit )               // repeated match
  554           {
  555           bit = ( dis == 0 );
  556           renc.encode_bit( bm_rep0[state()], !bit );
  557           if( bit )
  558             renc.encode_bit( bm_len[state()][pos_state], len > 1 );
  559           else
  560             {
  561             renc.encode_bit( bm_rep1[state()], dis > 1 );
  562             if( dis > 1 )
  563               renc.encode_bit( bm_rep2[state()], dis > 2 );
  564             }
  565           if( len == 1 ) state.set_short_rep();
  566           else
  567             {
  568             renc.encode_len( rep_len_model, len, pos_state );
  569             rep_len_prices.decrement_counter( pos_state );
  570             state.set_rep();
  571             }
  572           }
  573         else                    // match
  574           {
  575           dis -= num_rep_distances;
  576           encode_pair( dis, len, pos_state );
  577           if( dis >= modeled_distances ) --align_price_counter;
  578           --dis_price_counter;
  579           match_len_prices.decrement_counter( pos_state );
  580           state.set_match();
  581           }
  582         }
  583       ahead -= len; i += len;
  584       if( renc.member_position() >= member_size_limit )
  585         {
  586         if( !dec_pos( ahead ) ) return false;
  587         full_flush( state );
  588         return true;
  589         }
  590       }
  591     }
  592   full_flush( state );
  593   return true;
  594   }