"Fossies" - the Fresh Open Source Software Archive

Member "zutils-1.10/zgrep.cc" (5 Jan 2021, 17330 Bytes) of package /linux/privat/zutils-1.10.tar.lz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "zgrep.cc" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes reports: 1.10-rc1_vs_1.10 or 1.9_vs_1.10.

    1 /* Zgrep - search compressed files for a regular expression
    2    Copyright (C) 2010-2021 Antonio Diaz Diaz.
    3 
    4    This program is free software: you can redistribute it and/or modify
    5    it under the terms of the GNU General Public License as published by
    6    the Free Software Foundation, either version 2 of the License, or
    7    (at your option) any later version.
    8 
    9    This program is distributed in the hope that it will be useful,
   10    but WITHOUT ANY WARRANTY; without even the implied warranty of
   11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   12    GNU General Public License for more details.
   13 
   14    You should have received a copy of the GNU General Public License
   15    along with this program.  If not, see <http://www.gnu.org/licenses/>.
   16 */
   17 
   18 #define _FILE_OFFSET_BITS 64
   19 
   20 #include <cerrno>
   21 #include <climits>
   22 #include <csignal>
   23 #include <cstdio>
   24 #include <cstdlib>
   25 #include <cstring>
   26 #include <list>
   27 #include <string>
   28 #include <vector>
   29 #include <dirent.h>
   30 #include <fcntl.h>
   31 #include <stdint.h>
   32 #include <unistd.h>
   33 #include <sys/stat.h>
   34 #if defined(__MSVCRT__) || defined(__OS2__)
   35 #include <io.h>
   36 #endif
   37 
   38 #include "arg_parser.h"
   39 #include "rc.h"
   40 #include "zutils.h"
   41 
   42 
   43 namespace {
   44 
   45 #include "recursive.cc"
   46 #include "zcatgrep.cc"
   47 
   48 void show_help()
   49   {
   50   std::printf( "zgrep is a front end to the program grep that allows transparent search\n"
   51                "on any combination of compressed and uncompressed files. If any file\n"
   52                "given is compressed, its decompressed content is used. If a file given\n"
   53                "does not exist, and its name does not end with one of the known\n"
   54                "extensions, zgrep tries the compressed file names corresponding to the\n"
   55                "formats supported. If a file fails to decompress, zgrep continues\n"
   56                "searching the rest of the files.\n"
   57                "\nIf a file is specified as '-', data are read from standard input,\n"
   58                "decompressed if needed, and fed to grep. Data read from standard input\n"
   59                "must be of the same type; all uncompressed or all in the same\n"
   60                "compressed format.\n"
   61                "\nIf no files are specified, recursive searches examine the current\n"
   62                "working directory, and nonrecursive searches read standard input.\n"
   63                "\nThe formats supported are bzip2, gzip, lzip, and xz.\n"
   64                "\nUsage: zgrep [options] <pattern> [files]\n"
   65                "\nExit status is 0 if match, 1 if no match, 2 if trouble.\n"
   66                "Some options only work if the grep program used supports them.\n"
   67                "\nOptions:\n"
   68                "      --help                   display this help and exit\n"
   69                "  -V, --version                output version information and exit\n"
   70                "  -a, --text                   treat all files as text\n"
   71                "  -A, --after-context=<n>      print <n> lines of trailing context\n"
   72                "  -b, --byte-offset            print the byte offset of each line\n"
   73                "  -B, --before-context=<n>     print <n> lines of leading context\n"
   74                "  -c, --count                  only print a count of matching lines per file\n"
   75                "  -C, --context=<n>            print <n> lines of output context\n"
   76                "      --color[=<when>]         show matched strings in color\n"
   77                "  -e, --regexp=<pattern>       use <pattern> as the pattern to match\n"
   78                "  -E, --extended-regexp        <pattern> is an extended regular expression\n"
   79                "  -f, --file=<file>            obtain patterns from <file>\n"
   80                "  -F, --fixed-strings          <pattern> is a set of newline-separated strings\n"
   81                "  -h, --no-filename            suppress the prefixing filename on output\n"
   82                "  -H, --with-filename          print the filename for each match\n"
   83                "  -i, --ignore-case            ignore case distinctions\n"
   84                "  -I                           ignore binary files\n"
   85                "  -l, --files-with-matches     only print names of files containing matches\n"
   86                "  -L, --files-without-match    only print names of files containing no matches\n"
   87                "  -m, --max-count=<n>          stop after <n> matches\n"
   88                "  -M, --format=<list>          process only the formats in <list>\n"
   89                "  -n, --line-number            print the line number of each line\n"
   90                "  -N, --no-rcfile              don't read runtime configuration file\n"
   91                "  -o, --only-matching          show only the part of a line matching <pattern>\n"
   92                "  -O, --force-format=<fmt>     force the format given (bz2, gz, lz, xz)\n"
   93                "  -q, --quiet                  suppress all messages\n"
   94                "  -r, --recursive              operate recursively on directories\n"
   95                "  -R, --dereference-recursive  recursively follow symbolic links\n"
   96                "  -s, --no-messages            suppress error messages\n"
   97                "  -v, --invert-match           select non-matching lines\n"
   98                "      --verbose                verbose mode (show error messages)\n"
   99                "  -w, --word-regexp            match only whole words\n"
  100                "  -x, --line-regexp            match only whole lines\n"
  101                "      --bz2=<command>          set compressor and options for bzip2 format\n"
  102                "      --gz=<command>           set compressor and options for gzip format\n"
  103                "      --lz=<command>           set compressor and options for lzip format\n"
  104                "      --xz=<command>           set compressor and options for xz format\n"
  105                "\nNumbers may be followed by a multiplier: k = kB = 10^3 = 1000,\n"
  106                "Ki = KiB = 2^10 = 1024, M = 10^6, Mi = 2^20, G = 10^9, Gi = 2^30, etc...\n" );
  107   show_help_addr();
  108   }
  109 
  110 
  111 int zgrep_stdin( int infd, const int format_index,
  112                  const std::vector< const char * > & grep_args )
  113   {
  114   Children children;
  115   if( !set_data_feeder( "", &infd, children, format_index ) ) return 2;
  116   const pid_t grep_pid = fork();
  117   if( grep_pid == 0 )           // child (grep)
  118     {
  119     if( dup2( infd, STDIN_FILENO ) >= 0 && close( infd ) == 0 )
  120       {
  121       const char ** const argv = new const char *[grep_args.size()+2];
  122       argv[0] = GREP;
  123       for( unsigned i = 0; i < grep_args.size(); ++i )
  124         argv[i+1] = grep_args[i];
  125       argv[grep_args.size()+1] = 0;
  126       execvp( argv[0], (char **)argv );
  127       }
  128     show_exec_error( GREP );
  129     _exit( 2 );
  130     }
  131   if( grep_pid < 0 )            // parent
  132     { show_fork_error( GREP ); return 2; }
  133 
  134   int retval = wait_for_child( grep_pid, GREP );
  135 
  136   if( !good_status( children, retval == 1 ) ) retval = 2;
  137 
  138   if( close( infd ) != 0 )
  139     { show_close_error(); return 2; }
  140   return retval;
  141   }
  142 
  143 
  144 int zgrep_file( int infd, const int format_index,
  145                 const std::string & input_filename,
  146                 const std::vector< const char * > & grep_args,
  147                 const int list_mode, const bool show_name )
  148   {
  149   Children children;
  150   if( !set_data_feeder( input_filename, &infd, children, format_index ) )
  151     return 2;
  152   int fda[2];               // pipe from grep
  153   if( pipe( fda ) < 0 )
  154     { show_error( "Can't create pipe", errno ); return 2; }
  155   const pid_t grep_pid = fork();
  156   if( grep_pid == 0 )           // child (grep)
  157     {
  158     if( dup2( infd, STDIN_FILENO ) >= 0 &&
  159         dup2( fda[1], STDOUT_FILENO ) >= 0 &&
  160         close( infd ) == 0 && close( fda[0] ) == 0 && close( fda[1] ) == 0 )
  161       {
  162       const char ** const argv = new const char *[grep_args.size()+2];
  163       argv[0] = GREP;
  164       for( unsigned i = 0; i < grep_args.size(); ++i )
  165         argv[i+1] = grep_args[i];
  166       argv[grep_args.size()+1] = 0;
  167       execvp( argv[0], (char **)argv );
  168       }
  169     show_exec_error( GREP );
  170     _exit( 2 );
  171     }
  172   if( grep_pid < 0 )            // parent
  173     { show_fork_error( GREP ); return 2; }
  174 
  175   close( fda[1] );
  176   enum { buffer_size = 256 };
  177   uint8_t buffer[buffer_size];
  178   bool line_begin = true;
  179   while( true )
  180     {
  181     const int size = readblock( fda[0], buffer, buffer_size );
  182     if( size != buffer_size && errno )
  183       { show_error( "Read error", errno ); return 2; }
  184     if( size > 0 && !list_mode )
  185       {
  186       if( show_name )
  187         for( int i = 0; i < size; ++i )
  188           {
  189           if( line_begin )
  190             { line_begin = false; std::printf( "%s:", input_filename.c_str() ); }
  191           if( buffer[i] == '\n' ) line_begin = true;
  192           putchar( buffer[i] );
  193           }
  194       else if( std::fwrite( buffer, 1, size, stdout ) != (unsigned)size )
  195         { std::fflush( stdout ); show_error( "Write error", errno ); return 2; }
  196       std::fflush( stdout );
  197       }
  198     if( size < buffer_size ) break;     // end of grep's output
  199     }
  200 
  201   int retval = wait_for_child( grep_pid, GREP );
  202 
  203   if( !good_status( children, retval == 1 ) ) retval = 2;
  204 
  205   if( list_mode && (retval == 0) == (list_mode == 1) )
  206     { std::printf( "%s\n", input_filename.c_str() ); std::fflush( stdout ); }
  207   if( close( infd ) != 0 )
  208     { show_close_error(); return 2; }
  209   if( close( fda[0] ) != 0 )
  210     { show_close_error( GREP ); return 2; }
  211   return retval;
  212   }
  213 
  214 } // end namespace
  215 
  216 
  217 int main( const int argc, const char * const argv[] )
  218   {
  219   enum { help_opt = 256, verbose_opt, color_opt,
  220          bz2_opt, gz_opt, lz_opt, xz_opt };
  221   int format_index = -1;
  222   int list_mode = 0;        // 1 = list matches, -1 = list non-matches
  223   int recursive = 0;        // 1 = '-r', 2 = '-R'
  224   int show_name = -1;       // tri-state bool
  225   bool no_messages = false;
  226   std::list< std::string > filenames;
  227   std::vector< const char * > grep_args;    // args to grep, maybe empty
  228   std::string color_option;     // needed because of optional arg
  229   program_name = "zgrep";
  230   invocation_name = ( argc > 0 ) ? argv[0] : program_name;
  231 
  232   const Arg_parser::Option options[] =
  233     {
  234     { 'a', "text",                  Arg_parser::no  },  // grep GNU
  235     { 'A', "after-context",         Arg_parser::yes },  // grep GNU
  236     { 'b', "byte-offset",           Arg_parser::no  },  // grep GNU
  237     { 'B', "before-context",        Arg_parser::yes },  // grep GNU
  238     { 'c', "count",                 Arg_parser::no  },  // grep
  239     { 'C', "context",               Arg_parser::yes },  // grep GNU
  240     { 'e', "regexp",                Arg_parser::yes },  // grep
  241     { 'E', "extended-regexp",       Arg_parser::no  },  // grep
  242     { 'f', "file ",                 Arg_parser::yes },  // grep
  243     { 'F', "fixed-strings",         Arg_parser::no  },  // grep
  244     { 'h', "no-filename",           Arg_parser::no  },  // grep GNU
  245     { 'H', "with-filename",         Arg_parser::no  },  // grep GNU
  246     { 'i', "ignore-case",           Arg_parser::no  },  // grep
  247     { 'I',  0,                      Arg_parser::no  },  // grep GNU
  248     { 'l', "files-with-matches",    Arg_parser::no  },  // grep
  249     { 'L', "files-without-match",   Arg_parser::no  },  // grep GNU
  250     { 'm', "max-count",             Arg_parser::yes },  // grep GNU
  251     { 'M', "format",                Arg_parser::yes },
  252     { 'n', "line-number",           Arg_parser::no  },  // grep
  253     { 'N', "no-rcfile",             Arg_parser::no  },
  254     { 'o', "only-matching",         Arg_parser::no  },  // grep
  255     { 'O', "force-format",          Arg_parser::yes },
  256     { 'q', "quiet",                 Arg_parser::no  },
  257     { 'r', "recursive",             Arg_parser::no  },
  258     { 'R', "dereference-recursive", Arg_parser::no  },
  259     { 's', "no-messages",           Arg_parser::no  },  // grep
  260     { 'v', "invert-match",          Arg_parser::no  },  // grep
  261     { 'V', "version",               Arg_parser::no  },
  262     { 'w', "word-regexp",           Arg_parser::no  },  // grep GNU
  263     { 'x', "line-regexp",           Arg_parser::no  },  // grep
  264     { help_opt,    "help",          Arg_parser::no  },
  265     { verbose_opt, "verbose",       Arg_parser::no  },
  266     { color_opt,   "color",         Arg_parser::maybe },
  267     { bz2_opt,     "bz2",           Arg_parser::yes },
  268     { gz_opt,      "gz",            Arg_parser::yes },
  269     { lz_opt,      "lz",            Arg_parser::yes },
  270     { xz_opt,      "xz",            Arg_parser::yes },
  271     {  0 ,  0,                      Arg_parser::no  } };
  272 
  273   const Arg_parser parser( argc, argv, options );
  274   if( parser.error().size() )               // bad option
  275     { show_error( parser.error().c_str(), 0, true ); return 2; }
  276 
  277   maybe_process_config_file( parser );
  278 
  279   int argind = 0;
  280   bool pattern_found = false;
  281   for( ; argind < parser.arguments(); ++argind )
  282     {
  283     const int code = parser.code( argind );
  284     if( !code ) break;                  // no more options
  285     const std::string & arg = parser.argument( argind );
  286     switch( code )
  287       {
  288       case 'a': grep_args.push_back( "-a" ); break;
  289       case 'A': grep_args.push_back( "-A" );
  290                 grep_args.push_back( arg.c_str() ); break;
  291       case 'b': grep_args.push_back( "-b" ); break;
  292       case 'B': grep_args.push_back( "-B" );
  293                 grep_args.push_back( arg.c_str() ); break;
  294       case 'c': grep_args.push_back( "-c" ); break;
  295       case 'C': grep_args.push_back( "-C" );
  296                 grep_args.push_back( arg.c_str() ); break;
  297       case 'e': grep_args.push_back( "-e" );
  298                 grep_args.push_back( arg.c_str() ); pattern_found = true; break;
  299       case 'E': grep_args.push_back( "-E" ); break;
  300       case 'f': grep_args.push_back( "-f" );
  301                 grep_args.push_back( arg.c_str() ); pattern_found = true; break;
  302       case 'F': grep_args.push_back( "-F" ); break;
  303       case 'h': show_name = false; break;
  304       case 'H': show_name = true; break;
  305       case 'i': grep_args.push_back( "-i" ); break;
  306       case 'I': grep_args.push_back( "-I" ); break;
  307       case 'l': grep_args.push_back( "-l" ); list_mode = 1; break;
  308       case 'L': grep_args.push_back( "-L" ); list_mode = -1; break;
  309       case 'm': grep_args.push_back( "-m" );
  310                 grep_args.push_back( arg.c_str() ); break;
  311       case 'M': parse_format_list( arg ); break;
  312       case 'n': grep_args.push_back( "-n" ); break;
  313       case 'N': break;
  314       case 'o': grep_args.push_back( "-o" ); break;
  315       case 'O': format_index = parse_format_type( arg ); break;
  316       case 'q': grep_args.push_back( "-q" ); verbosity = -1; break;
  317       case 'r': recursive = 1; break;
  318       case 'R': recursive = 2; break;
  319       case 's': grep_args.push_back( "-s" ); no_messages = true; break;
  320       case 'v': grep_args.push_back( "-v" ); break;
  321       case 'V': show_version(); return 0;
  322       case 'w': grep_args.push_back( "-w" ); break;
  323       case 'x': grep_args.push_back( "-x" ); break;
  324       case help_opt   : show_help(); return 0;
  325       case verbose_opt: if( verbosity < 4 ) ++verbosity;
  326                         no_messages = false; break;
  327       case color_opt: color_option = "--color";
  328         if( !arg.empty() ) { color_option += '='; color_option += arg; }
  329         break;
  330       case bz2_opt: parse_compressor( arg, fmt_bz2 ); break;
  331       case gz_opt: parse_compressor( arg, fmt_gz ); break;
  332       case lz_opt: parse_compressor( arg, fmt_lz ); break;
  333       case xz_opt: parse_compressor( arg, fmt_xz ); break;
  334       default : internal_error( "uncaught option." );
  335       }
  336     } // end process options
  337 
  338   if( !color_option.empty() )       // push the last value set
  339     grep_args.push_back( color_option.c_str() );
  340 
  341 #if defined(__MSVCRT__) || defined(__OS2__)
  342   setmode( STDIN_FILENO, O_BINARY );
  343   setmode( STDOUT_FILENO, O_BINARY );
  344 #endif
  345 
  346   if( !pattern_found )
  347     {
  348     if( argind >= parser.arguments() )
  349       { show_error( "Pattern not found." ); return 2; }
  350     const std::string & arg = parser.argument( argind++ );
  351     if( arg.size() && arg[0] == '-' ) grep_args.push_back( "-e" );
  352     grep_args.push_back( arg.c_str() );
  353     }
  354 
  355   for( ; argind < parser.arguments(); ++argind )
  356     filenames.push_back( parser.argument( argind ) );
  357 
  358   if( filenames.empty() ) filenames.push_back( recursive ? "." : "-" );
  359 
  360   if( show_name < 0 ) show_name = ( filenames.size() != 1 || recursive );
  361 
  362   std::string input_filename;
  363   int retval = 1;
  364   bool error = false;
  365   bool stdin_used = false;
  366   while( next_filename( filenames, input_filename, error, recursive,
  367                         false, no_messages ) )
  368     {
  369     int infd;
  370     if( input_filename == "." )
  371       {
  372       if( stdin_used ) continue; else stdin_used = true;
  373       infd = STDIN_FILENO; input_filename = "-";
  374       }
  375     else
  376       {
  377       infd = open_instream( input_filename, format_index < 0, no_messages );
  378       if( infd < 0 ) { error = true; continue; }
  379       }
  380 
  381     int tmp;
  382     if( infd == STDIN_FILENO )
  383       tmp = zgrep_stdin( infd, format_index, grep_args );
  384     else tmp = zgrep_file( infd, format_index, input_filename, grep_args,
  385                            list_mode, show_name );
  386     if( tmp == 0 || ( tmp == 2 && retval == 1 ) ) retval = tmp;
  387 
  388     if( close( infd ) != 0 )
  389       { show_file_error( input_filename.c_str(), "Error closing input file",
  390                          errno ); error = true; }
  391     if( retval == 0 && verbosity < 0 ) break;
  392     }
  393 
  394   if( std::fclose( stdout ) != 0 )
  395     {
  396     show_error( "Error closing stdout", errno );
  397     error = true;
  398     }
  399   if( error && ( retval != 0 || verbosity >= 0 ) ) retval = 2;
  400   return retval;
  401   }