"Fossies" - the Fresh Open Source Software Archive

Member "zutils-1.10/zcmp.cc" (5 Jan 2021, 16427 Bytes) of package /linux/privat/zutils-1.10.tar.lz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "zcmp.cc" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes reports: 1.10-rc1_vs_1.10 or 1.9_vs_1.10.

    1 /* Zcmp - decompress and compare two files byte by byte
    2    Copyright (C) 2010-2021 Antonio Diaz Diaz.
    3 
    4    This program is free software: you can redistribute it and/or modify
    5    it under the terms of the GNU General Public License as published by
    6    the Free Software Foundation, either version 2 of the License, or
    7    (at your option) any later version.
    8 
    9    This program is distributed in the hope that it will be useful,
   10    but WITHOUT ANY WARRANTY; without even the implied warranty of
   11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   12    GNU General Public License for more details.
   13 
   14    You should have received a copy of the GNU General Public License
   15    along with this program.  If not, see <http://www.gnu.org/licenses/>.
   16 */
   17 
   18 #define _FILE_OFFSET_BITS 64
   19 
   20 #include <algorithm>
   21 #include <cctype>
   22 #include <cerrno>
   23 #include <climits>
   24 #include <csignal>
   25 #include <cstdio>
   26 #include <cstdlib>
   27 #include <cstring>
   28 #include <string>
   29 #include <vector>
   30 #include <fcntl.h>
   31 #include <stdint.h>
   32 #include <unistd.h>
   33 #include <sys/stat.h>
   34 #if defined(__MSVCRT__) || defined(__OS2__)
   35 #include <io.h>
   36 #endif
   37 
   38 #include "arg_parser.h"
   39 #include "rc.h"
   40 #include "zutils.h"
   41 
   42 #ifndef LLONG_MAX
   43 #define LLONG_MAX  0x7FFFFFFFFFFFFFFFLL
   44 #endif
   45 
   46 
   47 namespace {
   48 
   49 #include "zcmpdiff.cc"
   50 
   51 void show_help()
   52   {
   53   std::printf( "zcmp compares two files and, if they differ, writes to standard output the\n"
   54                "first byte and line number where they differ. Bytes and lines are numbered\n"
   55                "starting with 1. A hyphen '-' used as a file argument means standard input.\n"
   56                "If any file given is compressed, its decompressed content is used. Compressed\n"
   57                "files are decompressed on the fly; no temporary files are created.\n"
   58                "\nThe formats supported are bzip2, gzip, lzip, and xz.\n"
   59                "\nUsage: zcmp [options] file1 [file2]\n"
   60                "\nzcmp compares file1 to file2. The standard input is used only if file1 or\n"
   61                "file2 refers to standard input. If file2 is omitted zcmp tries the\n"
   62                "following:\n"
   63                "\n  - If file1 is compressed, compares its decompressed contents with\n"
   64                "  the corresponding uncompressed file (the name of file1 with the\n"
   65                "  extension removed).\n"
   66                "\n  - If file1 is uncompressed, compares it with the decompressed\n"
   67                "  contents of file1.[lz|bz2|gz|xz] (the first one that is found).\n"
   68                "\nExit status is 0 if inputs are identical, 1 if different, 2 if trouble.\n"
   69                "\nOptions:\n"
   70                "  -h, --help                        display this help and exit\n"
   71                "  -V, --version                     output version information and exit\n"
   72                "  -b, --print-bytes                 print differing bytes\n"
   73                "  -i, --ignore-initial=<n>[:<n2>]   ignore differences in the first <n> bytes\n"
   74                "  -l, --list                        list position, value of all differing bytes\n"
   75                "  -M, --format=<list>               process only the formats in <list>\n"
   76                "  -n, --bytes=<n>                   compare at most <n> bytes\n"
   77                "  -N, --no-rcfile                   don't read runtime configuration file\n"
   78                "  -O, --force-format=[<f1>][,<f2>]  force the formats given (bz2, gz, lz, xz)\n"
   79                "  -q, --quiet                       suppress all messages\n"
   80                "  -s, --silent                      (same as --quiet)\n"
   81                "  -v, --verbose                     verbose mode (same as --list)\n"
   82                "      --bz2=<command>               set compressor and options for bzip2 format\n"
   83                "      --gz=<command>                set compressor and options for gzip format\n"
   84                "      --lz=<command>                set compressor and options for lzip format\n"
   85                "      --xz=<command>                set compressor and options for xz format\n"
   86                "\nNumbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
   87                "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" );
   88   show_help_addr();
   89   }
   90 
   91 
   92 long long getnum( const char * const ptr, const char ** const tailp = 0,
   93                   const long long llimit = 0,
   94                   const long long ulimit = LLONG_MAX )
   95   {
   96   char * tail;
   97   errno = 0;
   98   long long result = strtoll( ptr, &tail, 0 );
   99   if( tail == ptr )
  100     {
  101     show_error( "Bad or missing numerical argument.", 0, true );
  102     std::exit( 2 );
  103     }
  104   if( result < 0 ) errno = ERANGE;
  105 
  106   if( !errno && tail[0] && std::isalpha( tail[0] ) )
  107     {
  108     const unsigned char ch = *tail++;
  109     int factor;
  110     bool bsuf;                  // 'B' suffix is present
  111     if( tail[0] == 'i' ) { ++tail; factor = 1024; } else factor = 1000;
  112     if( tail[0] == 'B' ) { ++tail; bsuf = true; } else bsuf = false;
  113     int exponent = -1;              // -1 = bad multiplier
  114     switch( ch )
  115       {
  116       case 'Y': exponent = 8; break;
  117       case 'Z': exponent = 7; break;
  118       case 'E': exponent = 6; break;
  119       case 'P': exponent = 5; break;
  120       case 'T': exponent = 4; break;
  121       case 'G': exponent = 3; break;
  122       case 'M': exponent = 2; break;
  123       case 'K': if( factor == 1024 ) exponent = 1; break;
  124       case 'k': if( factor == 1000 ) exponent = 1; break;
  125       case 'B': if( factor == 1000 && !bsuf ) exponent = 0; break;
  126       }
  127     if( exponent < 0 )
  128       {
  129       show_error( "Bad multiplier in numerical argument.", 0, true );
  130       std::exit( 2 );
  131       }
  132     for( int i = 0; i < exponent; ++i )
  133       {
  134       if( ulimit / factor >= result ) result *= factor;
  135       else { errno = ERANGE; break; }
  136       }
  137     }
  138   if( !errno && ( result < llimit || result > ulimit ) ) errno = ERANGE;
  139   if( errno )
  140     {
  141     show_error( "Numerical argument out of limits." );
  142     std::exit( 2 );
  143     }
  144   if( tailp ) *tailp = tail;
  145   return result;
  146   }
  147 
  148 
  149 void parse_ignore_initial( const char * const arg, long long ignore_initial[2] )
  150   {
  151   const char * tail;
  152   ignore_initial[0] = getnum( arg, &tail );
  153   if( *tail == ':' || *tail == ',' )
  154     ignore_initial[1] = getnum( ++tail );
  155   else if( *tail == 0 ) ignore_initial[1] = ignore_initial[0];
  156   else
  157     {
  158     show_error( "Bad separator in argument of '--ignore-initial'", 0, true );
  159     std::exit( 2 );
  160     }
  161   }
  162 
  163 
  164 bool skip_ignore_initial( const long long ignore_initial, const int infd )
  165   {
  166   if( ignore_initial > 0 )
  167     {
  168     enum { buffer_size = 4096 };
  169     long long rest = ignore_initial;
  170     uint8_t buffer[buffer_size];
  171     while( rest > 0 )
  172       {
  173       const int size = std::min( rest, (long long)buffer_size );
  174       const int rd = readblock( infd, buffer, size );
  175       if( rd != size && errno ) return false;
  176       if( rd < size ) break;
  177       rest -= rd;
  178       }
  179     }
  180   return true;
  181   }
  182 
  183 
  184 // Put into buf the unsigned char c, making unprintable bytes
  185 // visible by quoting like cat -t does.
  186 void sprintc( char * const buf, unsigned char c )
  187   {
  188   int i = 0;
  189 
  190   if( c < 32 || c >= 127 )
  191     {
  192     if( c >= 128 ) { c -= 128; buf[i++] = 'M'; buf[i++] = '-'; }
  193     if( c < 32 ) { c += 64; buf[i++] = '^'; }
  194     else if( c == 127 ) { c = '?'; buf[i++] = '^'; }
  195     }
  196   buf[i++] = c;
  197   buf[i++] = 0;
  198   }
  199 
  200 
  201 int block_compare( const uint8_t * const buffer0,
  202                    const uint8_t * const buffer1,
  203                    unsigned long long * const line_numberp )
  204   {
  205   const uint8_t * p0 = buffer0;
  206   const uint8_t * p1 = buffer1;
  207 
  208   if( verbosity == 0 )
  209     {
  210     int nl_count = 0;
  211     while( *p0 == *p1 )
  212       { if( *p0 == '\n' ) { ++nl_count; } ++p0; ++p1; }
  213     *line_numberp += nl_count;
  214     }
  215   else while( *p0 == *p1 ) { ++p0; ++p1; }
  216   return p0 - buffer0;
  217   }
  218 
  219 
  220 int cmp( const long long max_size, const int infd[2],
  221          const std::string filenames[2], const bool print_bytes )
  222   {
  223   const int buffer_size = 4096;
  224   unsigned long long byte_number = 1;
  225   unsigned long long line_number = 1;
  226   // remaining number of bytes to compare
  227   long long rest = ( max_size >= 0 ) ? max_size : buffer_size;
  228   // buffers with space for sentinels at the end
  229   uint8_t * const buffer0 = new uint8_t[2*(buffer_size+1)];
  230   uint8_t * const buffer1 = buffer0 + buffer_size + 1;
  231   uint8_t * buffer[2];
  232   buffer[0] = buffer0; buffer[1] = buffer1;
  233   int different = 0;
  234 
  235   while( rest > 0 )
  236     {
  237     const int size = std::min( (long long)buffer_size, rest );
  238     if( max_size >= 0 ) rest -= size;
  239     int rd[2];          // number of bytes read from each file
  240     for( int i = 0; i < 2; ++i )
  241       {
  242       rd[i] = readblock( infd[i], buffer[i], size );
  243       if( rd[i] != size && errno )
  244         {
  245         show_file_error( filenames[i].c_str(), "Read error", errno );
  246         return 2;
  247         }
  248       }
  249 
  250     const int min_rd = std::min( rd[0], rd[1] );
  251     buffer0[min_rd] = 0;        // sentinels for the block compare
  252     buffer1[min_rd] = 1;
  253 
  254     int first_diff = block_compare( buffer0, buffer1, &line_number );
  255     byte_number += first_diff;
  256 
  257     if( first_diff < min_rd )
  258       {
  259       if( verbosity < 0 ) return 1;     // return status only
  260       if( verbosity == 0 )          // show first difference
  261         {
  262         if( !print_bytes )
  263           std::printf( "%s %s differ: byte %llu, line %llu\n",
  264                        filenames[0].c_str(), filenames[1].c_str(),
  265                        byte_number, line_number );
  266         else
  267           {
  268           const unsigned char c0 = buffer0[first_diff];
  269           const unsigned char c1 = buffer1[first_diff];
  270           char buf0[5], buf1[5];
  271           sprintc( buf0, c0 ); sprintc( buf1, c1 );
  272           std::printf( "%s %s differ: byte %llu, line %llu is %3o %s %3o %s\n",
  273                        filenames[0].c_str(), filenames[1].c_str(),
  274                        byte_number, line_number, c0, buf0, c1, buf1 );
  275           }
  276         std::fflush( stdout );
  277         return 1;
  278         }
  279       else          // verbosity > 0 ; show all differences
  280         {
  281         different = 1;
  282         for( ; first_diff < min_rd; ++byte_number, ++first_diff )
  283           {
  284           const unsigned char c0 = buffer0[first_diff];
  285           const unsigned char c1 = buffer1[first_diff];
  286           if( c0 != c1 )
  287             {
  288             if( !print_bytes )
  289               std::printf( "%llu %3o %3o\n", byte_number, c0, c1 );
  290             else
  291               {
  292               char buf0[5], buf1[5];
  293               sprintc( buf0, c0 ); sprintc( buf1, c1 );
  294               std::printf( "%llu %3o %-4s %3o %s\n",
  295                            byte_number, c0, buf0, c1, buf1 );
  296               }
  297             }
  298           }
  299         std::fflush( stdout );
  300         }
  301       }
  302 
  303     if( rd[0] != rd[1] )
  304       {
  305       if( verbosity >= 0 )
  306         std::fprintf( stderr, "%s: EOF on %s\n",
  307                       program_name, filenames[rd[1]<rd[0]].c_str() );
  308       return 1;
  309       }
  310     if( min_rd != buffer_size ) break;
  311     }
  312 
  313   delete[] buffer0;
  314   return different;
  315   }
  316 
  317 } // end namespace
  318 
  319 
  320 int main( const int argc, const char * const argv[] )
  321   {
  322   enum { bz2_opt = 256, gz_opt, lz_opt, xz_opt };
  323   // number of initial bytes ignored for each file
  324   long long ignore_initial[2] = { 0, 0 };
  325   long long max_size = -1;          // < 0 means unlimited size
  326   int format_types[2] = { -1, -1 };
  327   bool print_bytes = false;
  328   program_name = "zcmp";
  329   invocation_name = ( argc > 0 ) ? argv[0] : program_name;
  330 
  331   const Arg_parser::Option options[] =
  332     {
  333     { 'b', "print-bytes",    Arg_parser::no  },
  334     { 'h', "help",           Arg_parser::no  },
  335     { 'i', "ignore-initial", Arg_parser::yes },
  336     { 'l', "list",           Arg_parser::no  },
  337     { 'M', "format",         Arg_parser::yes },
  338     { 'n', "bytes",          Arg_parser::yes },
  339     { 'N', "no-rcfile",      Arg_parser::no  },
  340     { 'O', "force-format",   Arg_parser::yes },
  341     { 'q', "quiet",          Arg_parser::no  },
  342     { 's', "silent",         Arg_parser::no  },
  343     { 'v', "verbose",        Arg_parser::no  },
  344     { 'V', "version",        Arg_parser::no  },
  345     { bz2_opt,    "bz2",     Arg_parser::yes },
  346     { gz_opt,     "gz",      Arg_parser::yes },
  347     { lz_opt,     "lz",      Arg_parser::yes },
  348     { xz_opt,     "xz",      Arg_parser::yes },
  349     {  0 ,  0,               Arg_parser::no  } };
  350 
  351   const Arg_parser parser( argc, argv, options );
  352   if( parser.error().size() )               // bad option
  353     { show_error( parser.error().c_str(), 0, true ); return 2; }
  354 
  355   maybe_process_config_file( parser );
  356 
  357   int argind = 0;
  358   for( ; argind < parser.arguments(); ++argind )
  359     {
  360     const int code = parser.code( argind );
  361     if( !code ) break;                  // no more options
  362     const std::string & arg = parser.argument( argind );
  363     switch( code )
  364       {
  365       case 'b': print_bytes = true; break;
  366       case 'h': show_help(); return 0;
  367       case 'i': parse_ignore_initial( arg.c_str(), ignore_initial ); break;
  368       case 'l': verbosity = 1; break;
  369       case 'M': parse_format_list( arg ); break;
  370       case 'n': max_size = getnum( arg.c_str() ); break;
  371       case 'N': break;
  372       case 'O': parse_format_types2( arg, format_types ); break;
  373       case 'q':
  374       case 's': verbosity = -1; break;
  375       case 'v': verbosity = 1; break;
  376       case 'V': show_version(); return 0;
  377       case bz2_opt: parse_compressor( arg, fmt_bz2 ); break;
  378       case gz_opt: parse_compressor( arg, fmt_gz ); break;
  379       case lz_opt: parse_compressor( arg, fmt_lz ); break;
  380       case xz_opt: parse_compressor( arg, fmt_xz ); break;
  381       default : internal_error( "uncaught option." );
  382       }
  383     } // end process options
  384 
  385 #if defined(__MSVCRT__) || defined(__OS2__)
  386   setmode( STDIN_FILENO, O_BINARY );
  387   setmode( STDOUT_FILENO, O_BINARY );
  388 #endif
  389 
  390   if( argind >= parser.arguments() )
  391     { show_error( "No files given.", 0, true ); return 2; }
  392   if( argind + 2 < parser.arguments() )
  393     { show_error( "Too many files.", 0, true ); return 2; }
  394 
  395   const int files = parser.arguments() - argind;
  396   std::string filenames[2];     // file names of the two input files
  397   filenames[0] = parser.argument( argind );
  398   if( files == 2 ) filenames[1] = parser.argument( argind + 1 );
  399 
  400   int infd[2];              // file descriptors of the two files
  401   infd[0] = ( filenames[0] == "-" ) ?
  402     STDIN_FILENO : open_instream( filenames[0] );
  403   if( infd[0] < 0 ) return 2;
  404 
  405   if( files == 2 )
  406     {
  407     if( check_identical( filenames[0].c_str(), filenames[1].c_str() ) )
  408       {
  409       if( ignore_initial[0] == ignore_initial[1] ) return 0;
  410       else { show_error( "Can't compare parts of same file." ); return 2; }
  411       }
  412     infd[1] = ( filenames[1] == "-" ) ?
  413       STDIN_FILENO : open_instream( filenames[1] );
  414     if( infd[1] < 0 ) return 2;
  415     }
  416   else
  417     {
  418     if( filenames[0] == "-" )
  419       { show_error( "Missing operand after '-'.", 0, true ); return 2; }
  420     if( format_types[0] >= 0 || format_types[1] >= 0 )
  421       { show_error( "Two files must be given when format is specified.", 0, true );
  422         return 2; }
  423     filenames[1] = filenames[0];
  424     infd[1] = open_other_instream( filenames[1] );
  425     if( infd[1] < 0 )
  426       {
  427       if( verbosity >= 0 )
  428         std::fprintf( stderr, "%s: Can't find file to compare with '%s'.\n",
  429                       program_name, filenames[0].c_str() );
  430       show_error( 0, 0, true ); return 2;
  431       }
  432     }
  433 
  434   int old_infd[2];      // copy of file descriptors of the two files
  435   old_infd[0] = infd[0]; old_infd[1] = infd[1];
  436   Children children[2];
  437   if( !set_data_feeder( filenames[0], &infd[0], children[0], format_types[0] ) ||
  438       !set_data_feeder( filenames[1], &infd[1], children[1], format_types[1] ) )
  439     return 2;
  440 
  441   for( int i = 0; i < 2; ++i )
  442     if( !skip_ignore_initial( ignore_initial[i], infd[i] ) )
  443       {
  444       show_file_error( filenames[i].c_str(),
  445                        "Read error skipping initial bytes", errno );
  446       return 2;
  447       }
  448 
  449   int retval = cmp( max_size, infd, filenames, print_bytes );
  450 
  451   for( int i = 0; i < 2; ++i )
  452     if( !good_status( children[i], retval == 0 && max_size < 0 ) ) retval = 2;
  453 
  454   for( int i = 0; i < 2; ++i )
  455     {
  456     if( close( infd[i] ) != 0 )
  457       { show_close_error(); retval = 2; }
  458     if( filenames[i] != "-" && close( old_infd[i] ) != 0 )
  459       {
  460       show_file_error( filenames[i].c_str(), "Error closing input file", errno );
  461       retval = 2;
  462       }
  463     }
  464   if( std::fclose( stdout ) != 0 )
  465     {
  466     show_error( "Error closing stdout", errno );
  467     retval = 2;
  468     }
  469 
  470   return retval;
  471   }