"Fossies" - the Fresh Open Source Software Archive

Member "lzip-1.22-rc2/decoder.h" (30 Apr 2020, 8184 Bytes) of package /linux/misc/lzip-1.22-rc2.tar.lz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "decoder.h" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 1.21_vs_1.22-rc1.

    1 /* Lzip - LZMA lossless data compressor
    2    Copyright (C) 2008-2020 Antonio Diaz Diaz.
    3 
    4    This program is free software: you can redistribute it and/or modify
    5    it under the terms of the GNU General Public License as published by
    6    the Free Software Foundation, either version 2 of the License, or
    7    (at your option) any later version.
    8 
    9    This program is distributed in the hope that it will be useful,
   10    but WITHOUT ANY WARRANTY; without even the implied warranty of
   11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   12    GNU General Public License for more details.
   13 
   14    You should have received a copy of the GNU General Public License
   15    along with this program.  If not, see <http://www.gnu.org/licenses/>.
   16 */
   17 
   18 class Range_decoder
   19   {
   20   enum { buffer_size = 16384 };
   21   unsigned long long partial_member_pos;
   22   uint8_t * const buffer;   // input buffer
   23   int pos;          // current pos in buffer
   24   int stream_pos;       // when reached, a new block must be read
   25   uint32_t code;
   26   uint32_t range;
   27   const int infd;       // input file descriptor
   28   bool at_stream_end;
   29 
   30   bool read_block();
   31 
   32   Range_decoder( const Range_decoder & );   // declared as private
   33   void operator=( const Range_decoder & );  // declared as private
   34 
   35 public:
   36   explicit Range_decoder( const int ifd )
   37     :
   38     partial_member_pos( 0 ),
   39     buffer( new uint8_t[buffer_size] ),
   40     pos( 0 ),
   41     stream_pos( 0 ),
   42     code( 0 ),
   43     range( 0xFFFFFFFFU ),
   44     infd( ifd ),
   45     at_stream_end( false )
   46     {}
   47 
   48   ~Range_decoder() { delete[] buffer; }
   49 
   50   bool finished() { return pos >= stream_pos && !read_block(); }
   51 
   52   unsigned long long member_position() const
   53     { return partial_member_pos + pos; }
   54 
   55   void reset_member_position()
   56     { partial_member_pos = 0; partial_member_pos -= pos; }
   57 
   58   uint8_t get_byte()
   59     {
   60     // 0xFF avoids decoder error if member is truncated at EOS marker
   61     if( finished() ) return 0xFF;
   62     return buffer[pos++];
   63     }
   64 
   65   int read_data( uint8_t * const outbuf, const int size )
   66     {
   67     int sz = 0;
   68     while( sz < size && !finished() )
   69       {
   70       const int rd = std::min( size - sz, stream_pos - pos );
   71       std::memcpy( outbuf + sz, buffer + pos, rd );
   72       pos += rd;
   73       sz += rd;
   74       }
   75     return sz;
   76     }
   77 
   78   void load()
   79     {
   80     code = 0;
   81     for( int i = 0; i < 5; ++i ) code = ( code << 8 ) | get_byte();
   82     range = 0xFFFFFFFFU;
   83     code &= range;      // make sure that first byte is discarded
   84     }
   85 
   86   void normalize()
   87     {
   88     if( range <= 0x00FFFFFFU )
   89       { range <<= 8; code = ( code << 8 ) | get_byte(); }
   90     }
   91 
   92   unsigned decode( const int num_bits )
   93     {
   94     unsigned symbol = 0;
   95     for( int i = num_bits; i > 0; --i )
   96       {
   97       normalize();
   98       range >>= 1;
   99 //      symbol <<= 1;
  100 //      if( code >= range ) { code -= range; symbol |= 1; }
  101       const bool bit = ( code >= range );
  102       symbol <<= 1; symbol += bit;
  103       code -= range & ( 0U - bit );
  104       }
  105     return symbol;
  106     }
  107 
  108   unsigned decode_bit( Bit_model & bm )
  109     {
  110     normalize();
  111     const uint32_t bound = ( range >> bit_model_total_bits ) * bm.probability;
  112     if( code < bound )
  113       {
  114       range = bound;
  115       bm.probability +=
  116         ( bit_model_total - bm.probability ) >> bit_model_move_bits;
  117       return 0;
  118       }
  119     else
  120       {
  121       range -= bound;
  122       code -= bound;
  123       bm.probability -= bm.probability >> bit_model_move_bits;
  124       return 1;
  125       }
  126     }
  127 
  128   unsigned decode_tree3( Bit_model bm[] )
  129     {
  130     unsigned symbol = 2 | decode_bit( bm[1] );
  131     symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
  132     symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
  133     return symbol & 7;
  134     }
  135 
  136   unsigned decode_tree6( Bit_model bm[] )
  137     {
  138     unsigned symbol = 2 | decode_bit( bm[1] );
  139     symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
  140     symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
  141     symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
  142     symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
  143     symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
  144     return symbol & 0x3F;
  145     }
  146 
  147   unsigned decode_tree8( Bit_model bm[] )
  148     {
  149     unsigned symbol = 1;
  150     for( int i = 0; i < 8; ++i )
  151       symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
  152     return symbol & 0xFF;
  153     }
  154 
  155   unsigned decode_tree_reversed( Bit_model bm[], const int num_bits )
  156     {
  157     unsigned model = 1;
  158     unsigned symbol = 0;
  159     for( int i = 0; i < num_bits; ++i )
  160       {
  161       const unsigned bit = decode_bit( bm[model] );
  162       model <<= 1; model += bit;
  163       symbol |= ( bit << i );
  164       }
  165     return symbol;
  166     }
  167 
  168   unsigned decode_tree_reversed4( Bit_model bm[] )
  169     {
  170     unsigned symbol = decode_bit( bm[1] );
  171     symbol += decode_bit( bm[2+symbol] ) << 1;
  172     symbol += decode_bit( bm[4+symbol] ) << 2;
  173     symbol += decode_bit( bm[8+symbol] ) << 3;
  174     return symbol;
  175     }
  176 
  177   unsigned decode_matched( Bit_model bm[], unsigned match_byte )
  178     {
  179     Bit_model * const bm1 = bm + 0x100;
  180     unsigned symbol = 1;
  181     while( symbol < 0x100 )
  182       {
  183       const unsigned match_bit = ( match_byte <<= 1 ) & 0x100;
  184       const bool bit = decode_bit( bm1[symbol+match_bit] );
  185       symbol <<= 1; symbol |= bit;
  186       if( match_bit >> 8 != bit )
  187         {
  188         while( symbol < 0x100 )
  189           symbol = ( symbol << 1 ) | decode_bit( bm[symbol] );
  190         break;
  191         }
  192       }
  193     return symbol & 0xFF;
  194     }
  195 
  196   unsigned decode_len( Len_model & lm, const int pos_state )
  197     {
  198     if( decode_bit( lm.choice1 ) == 0 )
  199       return decode_tree3( lm.bm_low[pos_state] );
  200     if( decode_bit( lm.choice2 ) == 0 )
  201       return len_low_symbols + decode_tree3( lm.bm_mid[pos_state] );
  202     return len_low_symbols + len_mid_symbols + decode_tree8( lm.bm_high );
  203     }
  204   };
  205 
  206 
  207 class LZ_decoder
  208   {
  209   unsigned long long partial_data_pos;
  210   Range_decoder & rdec;
  211   const unsigned dictionary_size;
  212   uint8_t * const buffer;   // output buffer
  213   unsigned pos;         // current pos in buffer
  214   unsigned stream_pos;      // first byte not yet written to file
  215   uint32_t crc_;
  216   const int outfd;      // output file descriptor
  217   bool pos_wrapped;
  218 
  219   void flush_data();
  220   bool verify_trailer( const Pretty_print & pp ) const;
  221 
  222   uint8_t peek_prev() const
  223     { return buffer[((pos > 0) ? pos : dictionary_size)-1]; }
  224 
  225   uint8_t peek( const unsigned distance ) const
  226     {
  227     const unsigned i = ( ( pos > distance ) ? 0 : dictionary_size ) +
  228                        pos - distance - 1;
  229     return buffer[i];
  230     }
  231 
  232   void put_byte( const uint8_t b )
  233     {
  234     buffer[pos] = b;
  235     if( ++pos >= dictionary_size ) flush_data();
  236     }
  237 
  238   void copy_block( const unsigned distance, unsigned len )
  239     {
  240     unsigned lpos = pos, i = lpos - distance - 1;
  241     bool fast, fast2;
  242     if( lpos > distance )
  243       {
  244       fast = ( len < dictionary_size - lpos );
  245       fast2 = ( fast && len <= lpos - i );
  246       }
  247     else
  248       {
  249       i += dictionary_size;
  250       fast = ( len < dictionary_size - i ); // (i == pos) may happen
  251       fast2 = ( fast && len <= i - lpos );
  252       }
  253     if( fast )                  // no wrap
  254       {
  255       pos += len;
  256       if( fast2 )               // no wrap, no overlap
  257         std::memcpy( buffer + lpos, buffer + i, len );
  258       else
  259         for( ; len > 0; --len ) buffer[lpos++] = buffer[i++];
  260       }
  261     else for( ; len > 0; --len )
  262       {
  263       buffer[pos] = buffer[i];
  264       if( ++pos >= dictionary_size ) flush_data();
  265       if( ++i >= dictionary_size ) i = 0;
  266       }
  267     }
  268 
  269   LZ_decoder( const LZ_decoder & );     // declared as private
  270   void operator=( const LZ_decoder & );     // declared as private
  271 
  272 public:
  273   LZ_decoder( Range_decoder & rde, const unsigned dict_size, const int ofd )
  274     :
  275     partial_data_pos( 0 ),
  276     rdec( rde ),
  277     dictionary_size( dict_size ),
  278     buffer( new uint8_t[dictionary_size] ),
  279     pos( 0 ),
  280     stream_pos( 0 ),
  281     crc_( 0xFFFFFFFFU ),
  282     outfd( ofd ),
  283     pos_wrapped( false )
  284     // prev_byte of first byte; also for peek( 0 ) on corrupt file
  285     { buffer[dictionary_size-1] = 0; }
  286 
  287   ~LZ_decoder() { delete[] buffer; }
  288 
  289   unsigned crc() const { return crc_ ^ 0xFFFFFFFFU; }
  290   unsigned long long data_position() const { return partial_data_pos + pos; }
  291 
  292   int decode_member( const Pretty_print & pp );
  293   };