"Fossies" - the Fresh Open Source Software Archive

Member "zutils-1.10/zcat.cc" (5 Jan 2021, 14291 Bytes) of package /linux/privat/zutils-1.10.tar.lz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "zcat.cc" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes reports: 1.10-rc1_vs_1.10 or 1.9_vs_1.10.

    1 /* Zcat - decompress and concatenate files to standard output
    2    Copyright (C) 2010-2021 Antonio Diaz Diaz.
    3 
    4    This program is free software: you can redistribute it and/or modify
    5    it under the terms of the GNU General Public License as published by
    6    the Free Software Foundation, either version 2 of the License, or
    7    (at your option) any later version.
    8 
    9    This program is distributed in the hope that it will be useful,
   10    but WITHOUT ANY WARRANTY; without even the implied warranty of
   11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   12    GNU General Public License for more details.
   13 
   14    You should have received a copy of the GNU General Public License
   15    along with this program.  If not, see <http://www.gnu.org/licenses/>.
   16 */
   17 
   18 #define _FILE_OFFSET_BITS 64
   19 
   20 #include <cerrno>
   21 #include <climits>
   22 #include <csignal>
   23 #include <cstdio>
   24 #include <cstdlib>
   25 #include <cstring>
   26 #include <list>
   27 #include <string>
   28 #include <vector>
   29 #include <dirent.h>
   30 #include <fcntl.h>
   31 #include <stdint.h>
   32 #include <unistd.h>
   33 #include <sys/stat.h>
   34 #if defined(__MSVCRT__) || defined(__OS2__)
   35 #include <io.h>
   36 #endif
   37 
   38 #include "arg_parser.h"
   39 #include "rc.h"
   40 #include "zutils.h"
   41 
   42 
   43 namespace {
   44 
   45 #include "recursive.cc"
   46 #include "zcatgrep.cc"
   47 
   48 struct Cat_options
   49   {
   50   int number_lines;     // 0 = no, 1 = nonblank, 2 = all
   51   bool show_ends;
   52   bool show_nonprinting;
   53   bool show_tabs;
   54   bool squeeze_blank;
   55 
   56   Cat_options()
   57     : number_lines( 0 ), show_ends( false ), show_nonprinting( false ),
   58       show_tabs( false ), squeeze_blank( false ) {}
   59   };
   60 
   61 
   62 class Line_number       // unlimited size line counter
   63   {
   64   std::string str;
   65   unsigned first_digit_pos;
   66 
   67 public:
   68   Line_number() : str( "     0\t" ), first_digit_pos( 5 ) {}
   69 
   70   void next()
   71     {
   72     for( unsigned i = str.size() - 1; i > first_digit_pos; )
   73       {
   74       if( str[--i] < '9' ) { ++str[i]; return; }
   75       str[i] = '0';
   76       }
   77     if( first_digit_pos > 0 ) str[--first_digit_pos] = '1';
   78     else str.insert( str.begin() + first_digit_pos, '1' );
   79     }
   80 
   81   int sprint( uint8_t * const buf )
   82     {
   83     std::memcpy( buf, str.c_str(), str.size() );
   84     return str.size();
   85     }
   86   };
   87 
   88 Line_number line_number;
   89 
   90 
   91 void show_help()
   92   {
   93   std::printf( "zcat copies each file argument to standard output in sequence. If any\n"
   94                "file given is compressed, its decompressed content is copied. If a file\n"
   95                "given does not exist, and its name does not end with one of the known\n"
   96                "extensions, zcat tries the compressed file names corresponding to the\n"
   97                "formats supported. If a file fails to decompress, zcat continues copying the\n"
   98                "rest of the files.\n"
   99                "\nIf a file is specified as '-', data are read from standard input,\n"
  100                "decompressed if needed, and sent to standard output. Data read from\n"
  101                "standard input must be of the same type; all uncompressed or all in the\n"
  102                "same compressed format.\n"
  103                "\nIf no files are specified, recursive searches examine the current\n"
  104                "working directory, and nonrecursive searches read standard input.\n"
  105                "\nThe formats supported are bzip2, gzip, lzip, and xz.\n"
  106                "\nUsage: zcat [options] [files]\n"
  107                "\nExit status is 0 if no errors occurred, 1 otherwise.\n"
  108                "\nOptions:\n"
  109                "  -h, --help                   display this help and exit\n"
  110                "  -V, --version                output version information and exit\n"
  111                "  -A, --show-all               equivalent to '-vET'\n"
  112                "  -b, --number-nonblank        number nonblank output lines\n"
  113                "  -e                           equivalent to '-vE'\n"
  114                "  -E, --show-ends              display '$' at end of each line\n"
  115                "  -M, --format=<list>          process only the formats in <list>\n"
  116                "  -n, --number                 number all output lines\n"
  117                "  -N, --no-rcfile              don't read runtime configuration file\n"
  118                "  -O, --force-format=<fmt>     force the format given (bz2, gz, lz, xz)\n"
  119                "  -q, --quiet                  suppress all messages\n"
  120                "  -r, --recursive              operate recursively on directories\n"
  121                "  -R, --dereference-recursive  recursively follow symbolic links\n"
  122                "  -s, --squeeze-blank          never more than one single blank line\n"
  123                "  -t                           equivalent to '-vT'\n"
  124                "  -T, --show-tabs              display TAB characters as '^I'\n"
  125                "  -v, --show-nonprinting       use '^' and 'M-' notation, except for LF and TAB\n"
  126                "      --verbose                verbose mode (show error messages)\n"
  127                "      --bz2=<command>          set compressor and options for bzip2 format\n"
  128                "      --gz=<command>           set compressor and options for gzip format\n"
  129                "      --lz=<command>           set compressor and options for lzip format\n"
  130                "      --xz=<command>           set compressor and options for xz format\n" );
  131   show_help_addr();
  132   }
  133 
  134 
  135 bool do_cat( const int infd, const int buffer_size,
  136              uint8_t * const inbuf, uint8_t * const outbuf,
  137              const std::string & input_filename,
  138              const Cat_options & cat_options )
  139   {
  140   static int at_bol = 1;    // at begin of line. 0 = false, 1 = true,
  141                 // 2 = at begin of second blank line.
  142   int inpos = 0;        // positions in buffers
  143   int outpos = 0;
  144   int rd = -1;          // bytes read by the last readblock
  145   unsigned char c;
  146 
  147   while( true )
  148     {
  149     do {
  150       if( outpos >= buffer_size )
  151         {
  152         if( writeblock( STDOUT_FILENO, outbuf, outpos ) != outpos )
  153           { show_error( "Write error", errno ); return false; }
  154         outpos = 0;
  155         }
  156       if( inpos > rd )          // inbuf is empty
  157         {
  158         rd = readblock( infd, inbuf, buffer_size );
  159         if( rd != buffer_size && errno )
  160           {
  161           show_file_error( input_filename.c_str(), "Read error", errno );
  162           return false;
  163           }
  164         if( rd == 0 )
  165           {
  166           if( writeblock( STDOUT_FILENO, outbuf, outpos ) != outpos )
  167             { show_error( "Write error", errno ); return false; }
  168           outpos = 0;
  169           return true;
  170           }
  171         inpos = 0;
  172         inbuf[rd] = '\n';       // sentinel newline
  173         }
  174       else              // a real newline was found
  175         {
  176         if( at_bol > 1 )
  177           {
  178           if( cat_options.squeeze_blank ) { c = inbuf[inpos++]; continue; }
  179           }
  180         else ++at_bol;
  181         if( at_bol > 1 && cat_options.number_lines == 2 )
  182           {
  183           line_number.next();
  184           outpos += line_number.sprint( &outbuf[outpos] );
  185           }
  186         if( cat_options.show_ends ) outbuf[outpos++] = '$';
  187         outbuf[outpos++] = '\n';        // output the newline
  188         }
  189       c = inbuf[inpos++];
  190       }
  191     while( c == '\n' );
  192 
  193     if( at_bol > 0 && cat_options.number_lines )
  194       {
  195       line_number.next();
  196       outpos += line_number.sprint( &outbuf[outpos] );
  197       }
  198     at_bol = 0;
  199 
  200     // the loops below continue until a newline (real or sentinel) is found
  201 
  202     if( cat_options.show_nonprinting )
  203       while( true )
  204         {
  205         if( c < 32 || c >= 127 )
  206           {
  207           if( c == '\n' ) break;
  208           if( c != '\t' || cat_options.show_tabs )
  209             {
  210             if( c >= 128 )
  211               { c -= 128; outbuf[outpos++] = 'M'; outbuf[outpos++] = '-'; }
  212             if( c < 32 ) { c += 64; outbuf[outpos++] = '^'; }
  213             else if( c == 127 ) { c = '?'; outbuf[outpos++] = '^'; }
  214             }
  215           }
  216         outbuf[outpos++] = c;
  217         c = inbuf[inpos++];
  218         }
  219     else                // not quoting
  220       while( c != '\n' )
  221         {
  222         if( c == '\t' && cat_options.show_tabs )
  223           { c += 64; outbuf[outpos++] = '^'; }
  224         outbuf[outpos++] = c;
  225         c = inbuf[inpos++];
  226         }
  227     }
  228   }
  229 
  230 
  231 bool cat( int infd, const int format_index, const std::string & input_filename,
  232           const Cat_options & cat_options )
  233   {
  234   enum { buffer_size = 4096, outbuf_size = (5 * buffer_size) + 256 + 1 };
  235     // input buffer with space for sentinel newline at the end
  236   uint8_t * const inbuf = new uint8_t[buffer_size+1];
  237     // output buffer with space for character quoting, 255-digit line number,
  238     // worst case flushing respect to inbuf, and a canary byte.
  239   uint8_t * const outbuf = new uint8_t[outbuf_size];
  240   outbuf[outbuf_size-1] = 0;
  241   Children children;
  242   bool error = false;
  243 
  244   if( !set_data_feeder( input_filename, &infd, children, format_index ) ||
  245       !do_cat( infd, buffer_size, inbuf, outbuf, input_filename, cat_options ) )
  246     error = true;
  247   if( !good_status( children, !error ) ) error = true;
  248   if( !error && close( infd ) != 0 ) { show_close_error(); error = true; }
  249   if( outbuf[outbuf_size-1] != 0 ) internal_error( "buffer overflow." );
  250   delete[] outbuf; delete[] inbuf;
  251   return !error;
  252   }
  253 
  254 } // end namespace
  255 
  256 
  257 int main( const int argc, const char * const argv[] )
  258   {
  259   enum { verbose_opt = 256, bz2_opt, gz_opt, lz_opt, xz_opt };
  260   int format_index = -1;
  261   int recursive = 0;            // 1 = '-r', 2 = '-R'
  262   std::list< std::string > filenames;
  263   Cat_options cat_options;
  264   program_name = "zcat";
  265   invocation_name = ( argc > 0 ) ? argv[0] : program_name;
  266 
  267   const Arg_parser::Option options[] =
  268     {
  269     { 'A', "show-all",              Arg_parser::no  },  // cat
  270     { 'b', "number-nonblank",       Arg_parser::no  },  // cat
  271     { 'c', "stdout",                Arg_parser::no  },  // gzip
  272     { 'd', "decompress",            Arg_parser::no  },  // gzip
  273     { 'e',  0,                      Arg_parser::no  },  // cat
  274     { 'E', "show-ends",             Arg_parser::no  },  // cat
  275     { 'f', "force",                 Arg_parser::no  },  // gzip
  276     { 'h', "help",                  Arg_parser::no  },
  277     { 'l', "list",                  Arg_parser::no  },  // gzip
  278     { 'L', "license",               Arg_parser::no  },  // gzip
  279     { 'M', "format",                Arg_parser::yes },
  280     { 'n', "number",                Arg_parser::no  },  // cat
  281     { 'N', "no-rcfile",             Arg_parser::no  },
  282     { 'O', "force-format",          Arg_parser::yes },
  283     { 'q', "quiet",                 Arg_parser::no  },
  284     { 'r', "recursive",             Arg_parser::no  },
  285     { 'R', "dereference-recursive", Arg_parser::no  },
  286     { 's', "squeeze-blank",         Arg_parser::no  },  // cat
  287     { 't',  0,                      Arg_parser::no  },  // cat
  288     { 'T', "show-tabs",             Arg_parser::no  },  // cat
  289     { 'v', "show-nonprinting",      Arg_parser::no  },  // cat
  290     { 'V', "version",               Arg_parser::no  },
  291     { verbose_opt, "verbose",       Arg_parser::no  },
  292     { bz2_opt,     "bz2",           Arg_parser::yes },
  293     { gz_opt,      "gz",            Arg_parser::yes },
  294     { lz_opt,      "lz",            Arg_parser::yes },
  295     { xz_opt,      "xz",            Arg_parser::yes },
  296     {  0 ,  0,                      Arg_parser::no  } };
  297 
  298   const Arg_parser parser( argc, argv, options );
  299   if( parser.error().size() )               // bad option
  300     { show_error( parser.error().c_str(), 0, true ); return 1; }
  301 
  302   maybe_process_config_file( parser );
  303 
  304   int argind = 0;
  305   for( ; argind < parser.arguments(); ++argind )
  306     {
  307     const int code = parser.code( argind );
  308     if( !code ) break;                  // no more options
  309     const std::string & arg = parser.argument( argind );
  310     switch( code )
  311       {
  312       case 'A': cat_options.show_ends = true;
  313                 cat_options.show_nonprinting = true;
  314                 cat_options.show_tabs = true; break;
  315       case 'b': cat_options.number_lines = 1; break;
  316       case 'c': break;
  317       case 'd': break;
  318       case 'e': cat_options.show_nonprinting = true;    // fall through
  319       case 'E': cat_options.show_ends = true; break;
  320       case 'f': break;
  321       case 'h': show_help(); return 0;
  322       case 'l': break;
  323       case 'L': break;
  324       case 'M': parse_format_list( arg ); break;
  325       case 'n': if( cat_options.number_lines == 0 )
  326                   { cat_options.number_lines = 2; } break;
  327       case 'N': break;
  328       case 'O': format_index = parse_format_type( arg ); break;
  329       case 'q': verbosity = -1; break;
  330       case 'r': recursive = 1; break;
  331       case 'R': recursive = 2; break;
  332       case 's': cat_options.squeeze_blank = true; break;
  333       case 't': cat_options.show_nonprinting = true;    // fall through
  334       case 'T': cat_options.show_tabs = true; break;
  335       case 'v': cat_options.show_nonprinting = true; break;
  336       case 'V': show_version(); return 0;
  337       case verbose_opt: if( verbosity < 4 ) ++verbosity; break;
  338       case bz2_opt: parse_compressor( arg, fmt_bz2, 1 ); break;
  339       case gz_opt: parse_compressor( arg, fmt_gz, 1 ); break;
  340       case lz_opt: parse_compressor( arg, fmt_lz, 1 ); break;
  341       case xz_opt: parse_compressor( arg, fmt_xz, 1 ); break;
  342       default : internal_error( "uncaught option." );
  343       }
  344     } // end process options
  345 
  346 #if defined(__MSVCRT__) || defined(__OS2__)
  347   setmode( STDIN_FILENO, O_BINARY );
  348   setmode( STDOUT_FILENO, O_BINARY );
  349 #endif
  350 
  351   for( ; argind < parser.arguments(); ++argind )
  352     filenames.push_back( parser.argument( argind ) );
  353 
  354   if( filenames.empty() ) filenames.push_back( recursive ? "." : "-" );
  355 
  356   std::string input_filename;
  357   bool error = false;
  358   bool stdin_used = false;
  359   while( next_filename( filenames, input_filename, error, recursive ) )
  360     {
  361     int infd;
  362     if( input_filename == "." )
  363       {
  364       if( stdin_used ) continue; else stdin_used = true;
  365       infd = STDIN_FILENO; input_filename = "-";
  366       }
  367     else
  368       {
  369       infd = open_instream( input_filename, format_index < 0 );
  370       if( infd < 0 ) { error = true; continue; }
  371       }
  372 
  373     if( !cat( infd, format_index, input_filename, cat_options ) ) error = true;
  374 
  375     if( close( infd ) != 0 )
  376       { show_file_error( input_filename.c_str(), "Error closing input file",
  377                          errno ); error = true; }
  378     }
  379 
  380   if( std::fclose( stdout ) != 0 )
  381     {
  382     show_error( "Error closing stdout", errno );
  383     error = true;
  384     }
  385   return error;
  386   }