"Fossies" - the Fresh Open Source Software Archive

Member "lzip-1.22-rc2/lzip.h" (17 May 2020, 10967 Bytes) of package /linux/misc/lzip-1.22-rc2.tar.lz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "lzip.h" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 1.21_vs_1.22-rc1.

    1 /* Lzip - LZMA lossless data compressor
    2    Copyright (C) 2008-2020 Antonio Diaz Diaz.
    3 
    4    This program is free software: you can redistribute it and/or modify
    5    it under the terms of the GNU General Public License as published by
    6    the Free Software Foundation, either version 2 of the License, or
    7    (at your option) any later version.
    8 
    9    This program is distributed in the hope that it will be useful,
   10    but WITHOUT ANY WARRANTY; without even the implied warranty of
   11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   12    GNU General Public License for more details.
   13 
   14    You should have received a copy of the GNU General Public License
   15    along with this program.  If not, see <http://www.gnu.org/licenses/>.
   16 */
   17 
   18 class State
   19   {
   20   int st;
   21 
   22 public:
   23   enum { states = 12 };
   24   State() : st( 0 ) {}
   25   int operator()() const { return st; }
   26   bool is_char() const { return st < 7; }
   27 
   28   void set_char()
   29     {
   30     static const int next[states] = { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 };
   31     st = next[st];
   32     }
   33   bool is_char_set_char()
   34     {
   35     if( st < 7 ) { st -= ( st < 4 ) ? st : 3; return true; }
   36     else { st -= ( st < 10 ) ? 3 : 6; return false; }
   37     }
   38   void set_char_rep()  { st = 8; }
   39   void set_match()     { st = ( st < 7 ) ? 7 : 10; }
   40   void set_rep()       { st = ( st < 7 ) ? 8 : 11; }
   41   void set_short_rep() { st = ( st < 7 ) ? 9 : 11; }
   42   };
   43 
   44 
   45 enum {
   46   min_dictionary_bits = 12,
   47   min_dictionary_size = 1 << min_dictionary_bits,   // >= modeled_distances
   48   max_dictionary_bits = 29,
   49   max_dictionary_size = 1 << max_dictionary_bits,
   50   min_member_size = 36,
   51   literal_context_bits = 3,
   52   literal_pos_state_bits = 0,               // not used
   53   pos_state_bits = 2,
   54   pos_states = 1 << pos_state_bits,
   55   pos_state_mask = pos_states - 1,
   56 
   57   len_states = 4,
   58   dis_slot_bits = 6,
   59   start_dis_model = 4,
   60   end_dis_model = 14,
   61   modeled_distances = 1 << (end_dis_model / 2),     // 128
   62   dis_align_bits = 4,
   63   dis_align_size = 1 << dis_align_bits,
   64 
   65   len_low_bits = 3,
   66   len_mid_bits = 3,
   67   len_high_bits = 8,
   68   len_low_symbols = 1 << len_low_bits,
   69   len_mid_symbols = 1 << len_mid_bits,
   70   len_high_symbols = 1 << len_high_bits,
   71   max_len_symbols = len_low_symbols + len_mid_symbols + len_high_symbols,
   72 
   73   min_match_len = 2,                    // must be 2
   74   max_match_len = min_match_len + max_len_symbols - 1,  // 273
   75   min_match_len_limit = 5 };
   76 
   77 inline int get_len_state( const int len )
   78   { return std::min( len - min_match_len, len_states - 1 ); }
   79 
   80 inline int get_lit_state( const uint8_t prev_byte )
   81   { return prev_byte >> ( 8 - literal_context_bits ); }
   82 
   83 
   84 enum { bit_model_move_bits = 5,
   85        bit_model_total_bits = 11,
   86        bit_model_total = 1 << bit_model_total_bits };
   87 
   88 struct Bit_model
   89   {
   90   int probability;
   91   void reset() { probability = bit_model_total / 2; }
   92   void reset( const int size )
   93     { for( int i = 0; i < size; ++i ) this[i].reset(); }
   94   Bit_model() { reset(); }
   95   };
   96 
   97 struct Len_model
   98   {
   99   Bit_model choice1;
  100   Bit_model choice2;
  101   Bit_model bm_low[pos_states][len_low_symbols];
  102   Bit_model bm_mid[pos_states][len_mid_symbols];
  103   Bit_model bm_high[len_high_symbols];
  104 
  105   void reset()
  106     {
  107     choice1.reset();
  108     choice2.reset();
  109     bm_low[0][0].reset( pos_states * len_low_symbols );
  110     bm_mid[0][0].reset( pos_states * len_mid_symbols );
  111     bm_high[0].reset( len_high_symbols );
  112     }
  113   };
  114 
  115 
  116 // defined in main.cc
  117 extern int verbosity;
  118 
  119 class Pretty_print      // requires global var 'int verbosity'
  120   {
  121   std::string name_;
  122   std::string padded_name;
  123   const char * const stdin_name;
  124   unsigned longest_name;
  125   mutable bool first_post;
  126 
  127 public:
  128   Pretty_print( const std::vector< std::string > & filenames )
  129     : stdin_name( "(stdin)" ), longest_name( 0 ), first_post( false )
  130     {
  131     if( verbosity <= 0 ) return;
  132     const unsigned stdin_name_len = std::strlen( stdin_name );
  133     for( unsigned i = 0; i < filenames.size(); ++i )
  134       {
  135       const std::string & s = filenames[i];
  136       const unsigned len = ( s == "-" ) ? stdin_name_len : s.size();
  137       if( longest_name < len ) longest_name = len;
  138       }
  139     if( longest_name == 0 ) longest_name = stdin_name_len;
  140     }
  141 
  142   void set_name( const std::string & filename )
  143     {
  144     if( filename.size() && filename != "-" ) name_ = filename;
  145     else name_ = stdin_name;
  146     padded_name = "  "; padded_name += name_; padded_name += ": ";
  147     if( longest_name > name_.size() )
  148       padded_name.append( longest_name - name_.size(), ' ' );
  149     first_post = true;
  150     }
  151 
  152   void reset() const { if( name_.size() ) first_post = true; }
  153   const char * name() const { return name_.c_str(); }
  154   void operator()( const char * const msg = 0 ) const;
  155   };
  156 
  157 
  158 class CRC32
  159   {
  160   uint32_t data[256];       // Table of CRCs of all 8-bit messages.
  161 
  162 public:
  163   CRC32()
  164     {
  165     for( unsigned n = 0; n < 256; ++n )
  166       {
  167       unsigned c = n;
  168       for( int k = 0; k < 8; ++k )
  169         { if( c & 1 ) c = 0xEDB88320U ^ ( c >> 1 ); else c >>= 1; }
  170       data[n] = c;
  171       }
  172     }
  173 
  174   uint32_t operator[]( const uint8_t byte ) const { return data[byte]; }
  175 
  176   void update_byte( uint32_t & crc, const uint8_t byte ) const
  177     { crc = data[(crc^byte)&0xFF] ^ ( crc >> 8 ); }
  178 
  179   void update_buf( uint32_t & crc, const uint8_t * const buffer,
  180                    const int size ) const
  181     {
  182     uint32_t c = crc;
  183     for( int i = 0; i < size; ++i )
  184       c = data[(c^buffer[i])&0xFF] ^ ( c >> 8 );
  185     crc = c;
  186     }
  187   };
  188 
  189 extern const CRC32 crc32;
  190 
  191 
  192 inline bool isvalid_ds( const unsigned dictionary_size )
  193   { return ( dictionary_size >= min_dictionary_size &&
  194              dictionary_size <= max_dictionary_size ); }
  195 
  196 
  197 inline int real_bits( unsigned value )
  198   {
  199   int bits = 0;
  200   while( value > 0 ) { value >>= 1; ++bits; }
  201   return bits;
  202   }
  203 
  204 
  205 const uint8_t lzip_magic[4] = { 0x4C, 0x5A, 0x49, 0x50 };   // "LZIP"
  206 
  207 struct Lzip_header
  208   {
  209   uint8_t data[6];          // 0-3 magic bytes
  210                     //   4 version
  211                     //   5 coded dictionary size
  212   enum { size = 6 };
  213 
  214   void set_magic() { std::memcpy( data, lzip_magic, 4 ); data[4] = 1; }
  215   bool verify_magic() const
  216     { return ( std::memcmp( data, lzip_magic, 4 ) == 0 ); }
  217 
  218   bool verify_prefix( const int sz ) const  // detect (truncated) header
  219     {
  220     for( int i = 0; i < sz && i < 4; ++i )
  221       if( data[i] != lzip_magic[i] ) return false;
  222     return ( sz > 0 );
  223     }
  224   bool verify_corrupt() const           // detect corrupt header
  225     {
  226     int matches = 0;
  227     for( int i = 0; i < 4; ++i )
  228       if( data[i] == lzip_magic[i] ) ++matches;
  229     return ( matches > 1 && matches < 4 );
  230     }
  231 
  232   uint8_t version() const { return data[4]; }
  233   bool verify_version() const { return ( data[4] == 1 ); }
  234 
  235   unsigned dictionary_size() const
  236     {
  237     unsigned sz = ( 1 << ( data[5] & 0x1F ) );
  238     if( sz > min_dictionary_size )
  239       sz -= ( sz / 16 ) * ( ( data[5] >> 5 ) & 7 );
  240     return sz;
  241     }
  242 
  243   bool dictionary_size( const unsigned sz )
  244     {
  245     if( !isvalid_ds( sz ) ) return false;
  246     data[5] = real_bits( sz - 1 );
  247     if( sz > min_dictionary_size )
  248       {
  249       const unsigned base_size = 1 << data[5];
  250       const unsigned fraction = base_size / 16;
  251       for( unsigned i = 7; i >= 1; --i )
  252         if( base_size - ( i * fraction ) >= sz )
  253           { data[5] |= ( i << 5 ); break; }
  254       }
  255     return true;
  256     }
  257 
  258   bool verify() const
  259     { return verify_magic() && verify_version() &&
  260              isvalid_ds( dictionary_size() ); }
  261   };
  262 
  263 
  264 struct Lzip_trailer
  265   {
  266   uint8_t data[20]; //  0-3  CRC32 of the uncompressed data
  267             //  4-11 size of the uncompressed data
  268             // 12-19 member size including header and trailer
  269   enum { size = 20 };
  270 
  271   unsigned data_crc() const
  272     {
  273     unsigned tmp = 0;
  274     for( int i = 3; i >= 0; --i ) { tmp <<= 8; tmp += data[i]; }
  275     return tmp;
  276     }
  277 
  278   void data_crc( unsigned crc )
  279     { for( int i = 0; i <= 3; ++i ) { data[i] = (uint8_t)crc; crc >>= 8; } }
  280 
  281   unsigned long long data_size() const
  282     {
  283     unsigned long long tmp = 0;
  284     for( int i = 11; i >= 4; --i ) { tmp <<= 8; tmp += data[i]; }
  285     return tmp;
  286     }
  287 
  288   void data_size( unsigned long long sz )
  289     { for( int i = 4; i <= 11; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } }
  290 
  291   unsigned long long member_size() const
  292     {
  293     unsigned long long tmp = 0;
  294     for( int i = 19; i >= 12; --i ) { tmp <<= 8; tmp += data[i]; }
  295     return tmp;
  296     }
  297 
  298   void member_size( unsigned long long sz )
  299     { for( int i = 12; i <= 19; ++i ) { data[i] = (uint8_t)sz; sz >>= 8; } }
  300 
  301   bool verify_consistency() const   // check internal consistency
  302     {
  303     const unsigned crc = data_crc();
  304     const unsigned long long dsize = data_size();
  305     if( ( crc == 0 ) != ( dsize == 0 ) ) return false;
  306     const unsigned long long msize = member_size();
  307     if( msize < min_member_size ) return false;
  308     const unsigned long long mlimit = ( 9 * dsize + 7 ) / 8 + min_member_size;
  309     if( mlimit > dsize && msize > mlimit ) return false;
  310     const unsigned long long dlimit = 7090 * ( msize - 26 ) - 1;
  311     if( dlimit > msize && dsize > dlimit ) return false;
  312     return true;
  313     }
  314   };
  315 
  316 
  317 struct Error
  318   {
  319   const char * const msg;
  320   explicit Error( const char * const s ) : msg( s ) {}
  321   };
  322 
  323 inline void set_retval( int & retval, const int new_val )
  324   { if( retval < new_val ) retval = new_val; }
  325 
  326 const char * const bad_magic_msg = "Bad magic number (file not in lzip format).";
  327 const char * const bad_dict_msg = "Invalid dictionary size in member header.";
  328 const char * const corrupt_mm_msg = "Corrupt header in multimember file.";
  329 const char * const trailing_msg = "Trailing data not allowed.";
  330 
  331 // defined in decoder.cc
  332 int readblock( const int fd, uint8_t * const buf, const int size );
  333 int writeblock( const int fd, const uint8_t * const buf, const int size );
  334 
  335 // defined in list.cc
  336 int list_files( const std::vector< std::string > & filenames,
  337                 const bool ignore_trailing, const bool loose_trailing );
  338 
  339 // defined in main.cc
  340 struct stat;
  341 const char * bad_version( const unsigned version );
  342 const char * format_ds( const unsigned dictionary_size );
  343 void show_header( const unsigned dictionary_size );
  344 int open_instream( const char * const name, struct stat * const in_statsp,
  345                    const bool one_to_one, const bool reg_only = false );
  346 void show_error( const char * const msg, const int errcode = 0,
  347                  const bool help = false );
  348 void show_file_error( const char * const filename, const char * const msg,
  349                       const int errcode = 0 );
  350 void internal_error( const char * const msg );
  351 class Matchfinder_base;
  352 void show_cprogress( const unsigned long long cfile_size = 0,
  353                      const unsigned long long partial_size = 0,
  354                      const Matchfinder_base * const m = 0,
  355                      const Pretty_print * const p = 0 );
  356 class Range_decoder;
  357 void show_dprogress( const unsigned long long cfile_size = 0,
  358                      const unsigned long long partial_size = 0,
  359                      const Range_decoder * const d = 0,
  360                      const Pretty_print * const p = 0 );