"Fossies" - the Fresh Open Source Software Archive

Member "lzip-1.22-rc2/encoder_base.h" (30 Apr 2020, 14881 Bytes) of package /linux/misc/lzip-1.22-rc2.tar.lz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "encoder_base.h" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 1.21_vs_1.22-rc1.

    1 /* Lzip - LZMA lossless data compressor
    2    Copyright (C) 2008-2020 Antonio Diaz Diaz.
    3 
    4    This program is free software: you can redistribute it and/or modify
    5    it under the terms of the GNU General Public License as published by
    6    the Free Software Foundation, either version 2 of the License, or
    7    (at your option) any later version.
    8 
    9    This program is distributed in the hope that it will be useful,
   10    but WITHOUT ANY WARRANTY; without even the implied warranty of
   11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   12    GNU General Public License for more details.
   13 
   14    You should have received a copy of the GNU General Public License
   15    along with this program.  If not, see <http://www.gnu.org/licenses/>.
   16 */
   17 
   18 enum { price_shift_bits = 6,
   19        price_step_bits = 2,
   20        price_step = 1 << price_step_bits };
   21 
   22 class Dis_slots
   23   {
   24   uint8_t data[1<<10];
   25 
   26 public:
   27   void init()
   28     {
   29     for( int slot = 0; slot < 4; ++slot ) data[slot] = slot;
   30     for( int i = 4, size = 2, slot = 4; slot < 20; slot += 2 )
   31       {
   32       std::memset( &data[i], slot, size );
   33       std::memset( &data[i+size], slot + 1, size );
   34       size <<= 1;
   35       i += size;
   36       }
   37     }
   38 
   39   uint8_t operator[]( const int dis ) const { return data[dis]; }
   40   };
   41 
   42 extern Dis_slots dis_slots;
   43 
   44 inline uint8_t get_slot( const unsigned dis )
   45   {
   46   if( dis < (1 << 10) ) return dis_slots[dis];
   47   if( dis < (1 << 19) ) return dis_slots[dis>> 9] + 18;
   48   if( dis < (1 << 28) ) return dis_slots[dis>>18] + 36;
   49   return dis_slots[dis>>27] + 54;
   50   }
   51 
   52 
   53 class Prob_prices
   54   {
   55   short data[bit_model_total >> price_step_bits];
   56 
   57 public:
   58   void init()
   59     {
   60     for( int i = 0; i < bit_model_total >> price_step_bits; ++i )
   61       {
   62       unsigned val = ( i * price_step ) + ( price_step / 2 );
   63       int bits = 0;             // base 2 logarithm of val
   64       for( int j = 0; j < price_shift_bits; ++j )
   65         {
   66         val = val * val;
   67         bits <<= 1;
   68         while( val >= 1 << 16 ) { val >>= 1; ++bits; }
   69         }
   70       bits += 15;               // remaining bits in val
   71       data[i] = ( bit_model_total_bits << price_shift_bits ) - bits;
   72       }
   73     }
   74 
   75   int operator[]( const int probability ) const
   76     { return data[probability >> price_step_bits]; }
   77   };
   78 
   79 extern Prob_prices prob_prices;
   80 
   81 
   82 inline int price0( const Bit_model bm )
   83   { return prob_prices[bm.probability]; }
   84 
   85 inline int price1( const Bit_model bm )
   86   { return prob_prices[bit_model_total - bm.probability]; }
   87 
   88 inline int price_bit( const Bit_model bm, const bool bit )
   89   { return ( bit ? price1( bm ) : price0( bm ) ); }
   90 
   91 
   92 inline int price_symbol3( const Bit_model bm[], int symbol )
   93   {
   94   bool bit = symbol & 1;
   95   symbol |= 8; symbol >>= 1;
   96   int price = price_bit( bm[symbol], bit );
   97   bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
   98   return price + price_bit( bm[1], symbol & 1 );
   99   }
  100 
  101 
  102 inline int price_symbol6( const Bit_model bm[], unsigned symbol )
  103   {
  104   bool bit = symbol & 1;
  105   symbol |= 64; symbol >>= 1;
  106   int price = price_bit( bm[symbol], bit );
  107   bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
  108   bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
  109   bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
  110   bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
  111   return price + price_bit( bm[1], symbol & 1 );
  112   }
  113 
  114 
  115 inline int price_symbol8( const Bit_model bm[], int symbol )
  116   {
  117   bool bit = symbol & 1;
  118   symbol |= 0x100; symbol >>= 1;
  119   int price = price_bit( bm[symbol], bit );
  120   bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
  121   bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
  122   bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
  123   bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
  124   bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
  125   bit = symbol & 1; symbol >>= 1; price += price_bit( bm[symbol], bit );
  126   return price + price_bit( bm[1], symbol & 1 );
  127   }
  128 
  129 
  130 inline int price_symbol_reversed( const Bit_model bm[], int symbol,
  131                                   const int num_bits )
  132   {
  133   int price = 0;
  134   int model = 1;
  135   for( int i = num_bits; i > 0; --i )
  136     {
  137     const bool bit = symbol & 1;
  138     symbol >>= 1;
  139     price += price_bit( bm[model], bit );
  140     model <<= 1; model |= bit;
  141     }
  142   return price;
  143   }
  144 
  145 
  146 inline int price_matched( const Bit_model bm[], unsigned symbol,
  147                           unsigned match_byte )
  148   {
  149   int price = 0;
  150   unsigned mask = 0x100;
  151   symbol |= mask;
  152   while( true )
  153     {
  154     const unsigned match_bit = ( match_byte <<= 1 ) & mask;
  155     const bool bit = ( symbol <<= 1 ) & 0x100;
  156     price += price_bit( bm[(symbol>>9)+match_bit+mask], bit );
  157     if( symbol >= 0x10000 ) return price;
  158     mask &= ~(match_bit ^ symbol);  // if( match_bit != bit ) mask = 0;
  159     }
  160   }
  161 
  162 
  163 class Matchfinder_base
  164   {
  165   bool read_block();
  166   void normalize_pos();
  167 
  168   Matchfinder_base( const Matchfinder_base & ); // declared as private
  169   void operator=( const Matchfinder_base & );   // declared as private
  170 
  171 protected:
  172   unsigned long long partial_data_pos;
  173   uint8_t * buffer;     // input buffer
  174   int32_t * prev_positions; // 1 + last seen position of key. else 0
  175   int32_t * pos_array;      // may be tree or chain
  176   const int before_size;    // bytes to keep in buffer before dictionary
  177   int buffer_size;
  178   int dictionary_size;      // bytes to keep in buffer before pos
  179   int pos;          // current pos in buffer
  180   int cyclic_pos;       // cycles through [0, dictionary_size]
  181   int stream_pos;       // first byte not yet read from file
  182   int pos_limit;        // when reached, a new block must be read
  183   int key4_mask;
  184   const int num_prev_positions23;
  185   int num_prev_positions;   // size of prev_positions
  186   int pos_array_size;
  187   const int infd;       // input file descriptor
  188   bool at_stream_end;       // stream_pos shows real end of file
  189 
  190   Matchfinder_base( const int before_size_,
  191                     const int dict_size, const int after_size,
  192                     const int dict_factor, const int num_prev_positions23_,
  193                     const int pos_array_factor, const int ifd );
  194 
  195   ~Matchfinder_base()
  196     { delete[] prev_positions; std::free( buffer ); }
  197 
  198 public:
  199   uint8_t peek( const int distance ) const { return buffer[pos-distance]; }
  200   int available_bytes() const { return stream_pos - pos; }
  201   unsigned long long data_position() const { return partial_data_pos + pos; }
  202   bool data_finished() const { return at_stream_end && pos >= stream_pos; }
  203   const uint8_t * ptr_to_current_pos() const { return buffer + pos; }
  204 
  205   int true_match_len( const int index, const int distance ) const
  206     {
  207     const uint8_t * const data = buffer + pos;
  208     int i = index;
  209     const int len_limit = std::min( available_bytes(), (int)max_match_len );
  210     while( i < len_limit && data[i-distance] == data[i] ) ++i;
  211     return i;
  212     }
  213 
  214   void move_pos()
  215     {
  216     if( ++cyclic_pos > dictionary_size ) cyclic_pos = 0;
  217     if( ++pos >= pos_limit ) normalize_pos();
  218     }
  219 
  220   void reset();
  221   };
  222 
  223 
  224 class Range_encoder
  225   {
  226   enum { buffer_size = 65536 };
  227   uint64_t low;
  228   unsigned long long partial_member_pos;
  229   uint8_t * const buffer;   // output buffer
  230   int pos;          // current pos in buffer
  231   uint32_t range;
  232   unsigned ff_count;
  233   const int outfd;      // output file descriptor
  234   uint8_t cache;
  235   Lzip_header header;
  236 
  237   void shift_low()
  238     {
  239     if( low >> 24 != 0xFF )
  240       {
  241       const bool carry = ( low > 0xFFFFFFFFU );
  242       put_byte( cache + carry );
  243       for( ; ff_count > 0; --ff_count ) put_byte( 0xFF + carry );
  244       cache = low >> 24;
  245       }
  246     else ++ff_count;
  247     low = ( low & 0x00FFFFFFU ) << 8;
  248     }
  249 
  250   Range_encoder( const Range_encoder & );   // declared as private
  251   void operator=( const Range_encoder & );  // declared as private
  252 
  253 public:
  254   void reset( const unsigned dictionary_size )
  255     {
  256     low = 0;
  257     partial_member_pos = 0;
  258     pos = 0;
  259     range = 0xFFFFFFFFU;
  260     ff_count = 0;
  261     cache = 0;
  262     header.dictionary_size( dictionary_size );
  263     for( int i = 0; i < Lzip_header::size; ++i )
  264       put_byte( header.data[i] );
  265     }
  266 
  267   Range_encoder( const unsigned dictionary_size, const int ofd )
  268     :
  269     buffer( new uint8_t[buffer_size] ), outfd( ofd )
  270     {
  271     header.set_magic();
  272     reset( dictionary_size );
  273     }
  274 
  275   ~Range_encoder() { delete[] buffer; }
  276 
  277   unsigned long long member_position() const
  278     { return partial_member_pos + pos + ff_count; }
  279 
  280   void flush() { for( int i = 0; i < 5; ++i ) shift_low(); }
  281   void flush_data();
  282 
  283   void put_byte( const uint8_t b )
  284     {
  285     buffer[pos] = b;
  286     if( ++pos >= buffer_size ) flush_data();
  287     }
  288 
  289   void encode( const int symbol, const int num_bits )
  290     {
  291     for( unsigned mask = 1 << ( num_bits - 1 ); mask > 0; mask >>= 1 )
  292       {
  293       range >>= 1;
  294       if( symbol & mask ) low += range;
  295       if( range <= 0x00FFFFFFU ) { range <<= 8; shift_low(); }
  296       }
  297     }
  298 
  299   void encode_bit( Bit_model & bm, const bool bit )
  300     {
  301     const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
  302     if( !bit )
  303       {
  304       range = bound;
  305       bm.probability += (bit_model_total - bm.probability) >> bit_model_move_bits;
  306       }
  307     else
  308       {
  309       low += bound;
  310       range -= bound;
  311       bm.probability -= bm.probability >> bit_model_move_bits;
  312       }
  313     if( range <= 0x00FFFFFFU ) { range <<= 8; shift_low(); }
  314     }
  315 
  316   void encode_tree3( Bit_model bm[], const int symbol )
  317     {
  318     bool bit = ( symbol >> 2 ) & 1;
  319     encode_bit( bm[1], bit );
  320     int model = 2 | bit;
  321     bit = ( symbol >> 1 ) & 1;
  322     encode_bit( bm[model], bit ); model <<= 1; model |= bit;
  323     encode_bit( bm[model], symbol & 1 );
  324     }
  325 
  326   void encode_tree6( Bit_model bm[], const unsigned symbol )
  327     {
  328     bool bit = ( symbol >> 5 ) & 1;
  329     encode_bit( bm[1], bit );
  330     int model = 2 | bit;
  331     bit = ( symbol >> 4 ) & 1;
  332     encode_bit( bm[model], bit ); model <<= 1; model |= bit;
  333     bit = ( symbol >> 3 ) & 1;
  334     encode_bit( bm[model], bit ); model <<= 1; model |= bit;
  335     bit = ( symbol >> 2 ) & 1;
  336     encode_bit( bm[model], bit ); model <<= 1; model |= bit;
  337     bit = ( symbol >> 1 ) & 1;
  338     encode_bit( bm[model], bit ); model <<= 1; model |= bit;
  339     encode_bit( bm[model], symbol & 1 );
  340     }
  341 
  342   void encode_tree8( Bit_model bm[], const int symbol )
  343     {
  344     int model = 1;
  345     for( int i = 7; i >= 0; --i )
  346       {
  347       const bool bit = ( symbol >> i ) & 1;
  348       encode_bit( bm[model], bit );
  349       model <<= 1; model |= bit;
  350       }
  351     }
  352 
  353   void encode_tree_reversed( Bit_model bm[], int symbol, const int num_bits )
  354     {
  355     int model = 1;
  356     for( int i = num_bits; i > 0; --i )
  357       {
  358       const bool bit = symbol & 1;
  359       symbol >>= 1;
  360       encode_bit( bm[model], bit );
  361       model <<= 1; model |= bit;
  362       }
  363     }
  364 
  365   void encode_matched( Bit_model bm[], unsigned symbol, unsigned match_byte )
  366     {
  367     unsigned mask = 0x100;
  368     symbol |= mask;
  369     while( true )
  370       {
  371       const unsigned match_bit = ( match_byte <<= 1 ) & mask;
  372       const bool bit = ( symbol <<= 1 ) & 0x100;
  373       encode_bit( bm[(symbol>>9)+match_bit+mask], bit );
  374       if( symbol >= 0x10000 ) break;
  375       mask &= ~(match_bit ^ symbol);    // if( match_bit != bit ) mask = 0;
  376       }
  377     }
  378 
  379   void encode_len( Len_model & lm, int symbol, const int pos_state )
  380     {
  381     bool bit = ( ( symbol -= min_match_len ) >= len_low_symbols );
  382     encode_bit( lm.choice1, bit );
  383     if( !bit )
  384       encode_tree3( lm.bm_low[pos_state], symbol );
  385     else
  386       {
  387       bit = ( ( symbol -= len_low_symbols ) >= len_mid_symbols );
  388       encode_bit( lm.choice2, bit );
  389       if( !bit )
  390         encode_tree3( lm.bm_mid[pos_state], symbol );
  391       else
  392         encode_tree8( lm.bm_high, symbol - len_mid_symbols );
  393       }
  394     }
  395   };
  396 
  397 
  398 class LZ_encoder_base : public Matchfinder_base
  399   {
  400 protected:
  401   enum { max_marker_size = 16,
  402          num_rep_distances = 4 };   // must be 4
  403 
  404   uint32_t crc_;
  405 
  406   Bit_model bm_literal[1<<literal_context_bits][0x300];
  407   Bit_model bm_match[State::states][pos_states];
  408   Bit_model bm_rep[State::states];
  409   Bit_model bm_rep0[State::states];
  410   Bit_model bm_rep1[State::states];
  411   Bit_model bm_rep2[State::states];
  412   Bit_model bm_len[State::states][pos_states];
  413   Bit_model bm_dis_slot[len_states][1<<dis_slot_bits];
  414   Bit_model bm_dis[modeled_distances-end_dis_model+1];
  415   Bit_model bm_align[dis_align_size];
  416   Len_model match_len_model;
  417   Len_model rep_len_model;
  418   Range_encoder renc;
  419 
  420   LZ_encoder_base( const int before_size, const int dict_size,
  421                    const int after_size, const int dict_factor,
  422                    const int num_prev_positions23,
  423                    const int pos_array_factor,
  424                    const int ifd, const int outfd )
  425     :
  426     Matchfinder_base( before_size, dict_size, after_size, dict_factor,
  427                       num_prev_positions23, pos_array_factor, ifd ),
  428     crc_( 0xFFFFFFFFU ),
  429     renc( dictionary_size, outfd )
  430     {}
  431 
  432   unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; }
  433 
  434   int price_literal( const uint8_t prev_byte, const uint8_t symbol ) const
  435     { return price_symbol8( bm_literal[get_lit_state(prev_byte)], symbol ); }
  436 
  437   int price_matched( const uint8_t prev_byte, const uint8_t symbol,
  438                      const uint8_t match_byte ) const
  439     { return ::price_matched( bm_literal[get_lit_state(prev_byte)], symbol,
  440                               match_byte ); }
  441 
  442   void encode_literal( const uint8_t prev_byte, const uint8_t symbol )
  443     { renc.encode_tree8( bm_literal[get_lit_state(prev_byte)], symbol ); }
  444 
  445   void encode_matched( const uint8_t prev_byte, const uint8_t symbol,
  446                        const uint8_t match_byte )
  447     { renc.encode_matched( bm_literal[get_lit_state(prev_byte)], symbol,
  448                            match_byte ); }
  449 
  450   void encode_pair( const unsigned dis, const int len, const int pos_state )
  451     {
  452     renc.encode_len( match_len_model, len, pos_state );
  453     const unsigned dis_slot = get_slot( dis );
  454     renc.encode_tree6( bm_dis_slot[get_len_state(len)], dis_slot );
  455 
  456     if( dis_slot >= start_dis_model )
  457       {
  458       const int direct_bits = ( dis_slot >> 1 ) - 1;
  459       const unsigned base = ( 2 | ( dis_slot & 1 ) ) << direct_bits;
  460       const unsigned direct_dis = dis - base;
  461 
  462       if( dis_slot < end_dis_model )
  463         renc.encode_tree_reversed( bm_dis + ( base - dis_slot ),
  464                                    direct_dis, direct_bits );
  465       else
  466         {
  467         renc.encode( direct_dis >> dis_align_bits, direct_bits - dis_align_bits );
  468         renc.encode_tree_reversed( bm_align, direct_dis, dis_align_bits );
  469         }
  470       }
  471     }
  472 
  473   void full_flush( const State state );
  474 
  475 public:
  476   virtual ~LZ_encoder_base() {}
  477 
  478   unsigned long long member_position() const { return renc.member_position(); }
  479   virtual void reset();
  480 
  481   virtual bool encode_member( const unsigned long long member_size ) = 0;
  482   };