"Fossies" - the Fresh Open Source Software Archive

Member "pcre-8.45/pcregrep.c" (26 Jun 2018, 98200 Bytes) of package /linux/misc/pcre-8.45.tar.bz2:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "pcregrep.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 8.42_vs_8.43.

    1 /*************************************************
    2 *               pcregrep program                 *
    3 *************************************************/
    4 
    5 /* This is a grep program that uses the PCRE regular expression library to do
    6 its pattern matching. On Unix-like, Windows, and native z/OS systems it can
    7 recurse into directories, and in z/OS it can handle PDS files.
    8 
    9 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
   10 additional header is required. That header is not included in the main PCRE
   11 distribution because other apparatus is needed to compile pcregrep for z/OS.
   12 The header can be found in the special z/OS distribution, which is available
   13 from www.zaconsultants.net or from www.cbttape.org.
   14 
   15            Copyright (c) 1997-2014 University of Cambridge
   16 
   17 -----------------------------------------------------------------------------
   18 Redistribution and use in source and binary forms, with or without
   19 modification, are permitted provided that the following conditions are met:
   20 
   21     * Redistributions of source code must retain the above copyright notice,
   22       this list of conditions and the following disclaimer.
   23 
   24     * Redistributions in binary form must reproduce the above copyright
   25       notice, this list of conditions and the following disclaimer in the
   26       documentation and/or other materials provided with the distribution.
   27 
   28     * Neither the name of the University of Cambridge nor the names of its
   29       contributors may be used to endorse or promote products derived from
   30       this software without specific prior written permission.
   31 
   32 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   33 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   34 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   35 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   36 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   37 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   38 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   39 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   40 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   41 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   42 POSSIBILITY OF SUCH DAMAGE.
   43 -----------------------------------------------------------------------------
   44 */
   45 
   46 #ifdef HAVE_CONFIG_H
   47 #include "config.h"
   48 #endif
   49 
   50 #include <ctype.h>
   51 #include <locale.h>
   52 #include <stdio.h>
   53 #include <string.h>
   54 #include <stdlib.h>
   55 #include <errno.h>
   56 
   57 #include <sys/types.h>
   58 #include <sys/stat.h>
   59 
   60 #ifdef HAVE_UNISTD_H
   61 #include <unistd.h>
   62 #endif
   63 
   64 #ifdef SUPPORT_LIBZ
   65 #include <zlib.h>
   66 #endif
   67 
   68 #ifdef SUPPORT_LIBBZ2
   69 #include <bzlib.h>
   70 #endif
   71 
   72 #include "pcre.h"
   73 
   74 #define FALSE 0
   75 #define TRUE 1
   76 
   77 typedef int BOOL;
   78 
   79 #define OFFSET_SIZE 99
   80 
   81 #if BUFSIZ > 8192
   82 #define MAXPATLEN BUFSIZ
   83 #else
   84 #define MAXPATLEN 8192
   85 #endif
   86 
   87 #define PATBUFSIZE (MAXPATLEN + 10)   /* Allows for prefix+suffix */
   88 
   89 /* Values for the "filenames" variable, which specifies options for file name
   90 output. The order is important; it is assumed that a file name is wanted for
   91 all values greater than FN_DEFAULT. */
   92 
   93 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
   94 
   95 /* File reading styles */
   96 
   97 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
   98 
   99 /* Actions for the -d and -D options */
  100 
  101 enum { dee_READ, dee_SKIP, dee_RECURSE };
  102 enum { DEE_READ, DEE_SKIP };
  103 
  104 /* Actions for special processing options (flag bits) */
  105 
  106 #define PO_WORD_MATCH     0x0001
  107 #define PO_LINE_MATCH     0x0002
  108 #define PO_FIXED_STRINGS  0x0004
  109 
  110 /* Line ending types */
  111 
  112 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
  113 
  114 /* Binary file options */
  115 
  116 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
  117 
  118 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
  119 environments), a warning is issued if the value of fwrite() is ignored.
  120 Unfortunately, casting to (void) does not suppress the warning. To get round
  121 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
  122 apply to fprintf(). */
  123 
  124 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
  125 
  126 
  127 
  128 /*************************************************
  129 *               Global variables                 *
  130 *************************************************/
  131 
  132 /* Jeffrey Friedl has some debugging requirements that are not part of the
  133 regular code. */
  134 
  135 #ifdef JFRIEDL_DEBUG
  136 static int S_arg = -1;
  137 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
  138 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
  139 static const char *jfriedl_prefix = "";
  140 static const char *jfriedl_postfix = "";
  141 #endif
  142 
  143 static int  endlinetype;
  144 
  145 static char *colour_string = (char *)"1;31";
  146 static char *colour_option = NULL;
  147 static char *dee_option = NULL;
  148 static char *DEE_option = NULL;
  149 static char *locale = NULL;
  150 static char *main_buffer = NULL;
  151 static char *newline = NULL;
  152 static char *om_separator = (char *)"";
  153 static char *stdin_name = (char *)"(standard input)";
  154 
  155 static const unsigned char *pcretables = NULL;
  156 
  157 static int after_context = 0;
  158 static int before_context = 0;
  159 static int binary_files = BIN_BINARY;
  160 static int both_context = 0;
  161 static int bufthird = PCREGREP_BUFSIZE;
  162 static int bufsize = 3*PCREGREP_BUFSIZE;
  163 
  164 #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
  165 static int dee_action = dee_SKIP;
  166 #else
  167 static int dee_action = dee_READ;
  168 #endif
  169 
  170 static int DEE_action = DEE_READ;
  171 static int error_count = 0;
  172 static int filenames = FN_DEFAULT;
  173 static int pcre_options = 0;
  174 static int process_options = 0;
  175 
  176 #ifdef SUPPORT_PCREGREP_JIT
  177 static int study_options = PCRE_STUDY_JIT_COMPILE;
  178 #else
  179 static int study_options = 0;
  180 #endif
  181 
  182 static unsigned long int match_limit = 0;
  183 static unsigned long int match_limit_recursion = 0;
  184 
  185 static BOOL count_only = FALSE;
  186 static BOOL do_colour = FALSE;
  187 static BOOL file_offsets = FALSE;
  188 static BOOL hyphenpending = FALSE;
  189 static BOOL invert = FALSE;
  190 static BOOL line_buffered = FALSE;
  191 static BOOL line_offsets = FALSE;
  192 static BOOL multiline = FALSE;
  193 static BOOL number = FALSE;
  194 static BOOL omit_zero_count = FALSE;
  195 static BOOL resource_error = FALSE;
  196 static BOOL quiet = FALSE;
  197 static BOOL show_only_matching = FALSE;
  198 static BOOL silent = FALSE;
  199 static BOOL utf8 = FALSE;
  200 
  201 /* Structure for list of --only-matching capturing numbers. */
  202 
  203 typedef struct omstr {
  204   struct omstr *next;
  205   int groupnum;
  206 } omstr;
  207 
  208 static omstr *only_matching = NULL;
  209 static omstr *only_matching_last = NULL;
  210 
  211 /* Structure for holding the two variables that describe a number chain. */
  212 
  213 typedef struct omdatastr {
  214   omstr **anchor;
  215   omstr **lastptr;
  216 } omdatastr;
  217 
  218 static omdatastr only_matching_data = { &only_matching, &only_matching_last };
  219 
  220 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
  221 
  222 typedef struct fnstr {
  223   struct fnstr *next;
  224   char *name;
  225 } fnstr;
  226 
  227 static fnstr *exclude_from = NULL;
  228 static fnstr *exclude_from_last = NULL;
  229 static fnstr *include_from = NULL;
  230 static fnstr *include_from_last = NULL;
  231 
  232 static fnstr *file_lists = NULL;
  233 static fnstr *file_lists_last = NULL;
  234 static fnstr *pattern_files = NULL;
  235 static fnstr *pattern_files_last = NULL;
  236 
  237 /* Structure for holding the two variables that describe a file name chain. */
  238 
  239 typedef struct fndatastr {
  240   fnstr **anchor;
  241   fnstr **lastptr;
  242 } fndatastr;
  243 
  244 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
  245 static fndatastr include_from_data = { &include_from, &include_from_last };
  246 static fndatastr file_lists_data = { &file_lists, &file_lists_last };
  247 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
  248 
  249 /* Structure for pattern and its compiled form; used for matching patterns and
  250 also for include/exclude patterns. */
  251 
  252 typedef struct patstr {
  253   struct patstr *next;
  254   char *string;
  255   pcre *compiled;
  256   pcre_extra *hint;
  257 } patstr;
  258 
  259 static patstr *patterns = NULL;
  260 static patstr *patterns_last = NULL;
  261 static patstr *include_patterns = NULL;
  262 static patstr *include_patterns_last = NULL;
  263 static patstr *exclude_patterns = NULL;
  264 static patstr *exclude_patterns_last = NULL;
  265 static patstr *include_dir_patterns = NULL;
  266 static patstr *include_dir_patterns_last = NULL;
  267 static patstr *exclude_dir_patterns = NULL;
  268 static patstr *exclude_dir_patterns_last = NULL;
  269 
  270 /* Structure holding the two variables that describe a pattern chain. A pointer
  271 to such structures is used for each appropriate option. */
  272 
  273 typedef struct patdatastr {
  274   patstr **anchor;
  275   patstr **lastptr;
  276 } patdatastr;
  277 
  278 static patdatastr match_patdata = { &patterns, &patterns_last };
  279 static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
  280 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
  281 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
  282 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
  283 
  284 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
  285                                  &include_dir_patterns, &exclude_dir_patterns };
  286 
  287 static const char *incexname[4] = { "--include", "--exclude",
  288                                     "--include-dir", "--exclude-dir" };
  289 
  290 /* Structure for options and list of them */
  291 
  292 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
  293        OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
  294 
  295 typedef struct option_item {
  296   int type;
  297   int one_char;
  298   void *dataptr;
  299   const char *long_name;
  300   const char *help_text;
  301 } option_item;
  302 
  303 /* Options without a single-letter equivalent get a negative value. This can be
  304 used to identify them. */
  305 
  306 #define N_COLOUR       (-1)
  307 #define N_EXCLUDE      (-2)
  308 #define N_EXCLUDE_DIR  (-3)
  309 #define N_HELP         (-4)
  310 #define N_INCLUDE      (-5)
  311 #define N_INCLUDE_DIR  (-6)
  312 #define N_LABEL        (-7)
  313 #define N_LOCALE       (-8)
  314 #define N_NULL         (-9)
  315 #define N_LOFFSETS     (-10)
  316 #define N_FOFFSETS     (-11)
  317 #define N_LBUFFER      (-12)
  318 #define N_M_LIMIT      (-13)
  319 #define N_M_LIMIT_REC  (-14)
  320 #define N_BUFSIZE      (-15)
  321 #define N_NOJIT        (-16)
  322 #define N_FILE_LIST    (-17)
  323 #define N_BINARY_FILES (-18)
  324 #define N_EXCLUDE_FROM (-19)
  325 #define N_INCLUDE_FROM (-20)
  326 #define N_OM_SEPARATOR (-21)
  327 
  328 static option_item optionlist[] = {
  329   { OP_NODATA,     N_NULL,   NULL,              "",              "terminate options" },
  330   { OP_NODATA,     N_HELP,   NULL,              "help",          "display this help and exit" },
  331   { OP_NUMBER,     'A',      &after_context,    "after-context=number", "set number of following context lines" },
  332   { OP_NODATA,     'a',      NULL,              "text",          "treat binary files as text" },
  333   { OP_NUMBER,     'B',      &before_context,   "before-context=number", "set number of prior context lines" },
  334   { OP_BINFILES,   N_BINARY_FILES, NULL,        "binary-files=word", "set treatment of binary files" },
  335   { OP_NUMBER,     N_BUFSIZE,&bufthird,         "buffer-size=number", "set processing buffer size parameter" },
  336   { OP_OP_STRING,  N_COLOUR, &colour_option,    "color=option",  "matched text color option" },
  337   { OP_OP_STRING,  N_COLOUR, &colour_option,    "colour=option", "matched text colour option" },
  338   { OP_NUMBER,     'C',      &both_context,     "context=number", "set number of context lines, before & after" },
  339   { OP_NODATA,     'c',      NULL,              "count",         "print only a count of matching lines per FILE" },
  340   { OP_STRING,     'D',      &DEE_option,       "devices=action","how to handle devices, FIFOs, and sockets" },
  341   { OP_STRING,     'd',      &dee_option,       "directories=action", "how to handle directories" },
  342   { OP_PATLIST,    'e',      &match_patdata,    "regex(p)=pattern", "specify pattern (may be used more than once)" },
  343   { OP_NODATA,     'F',      NULL,              "fixed-strings", "patterns are sets of newline-separated strings" },
  344   { OP_FILELIST,   'f',      &pattern_files_data, "file=path",   "read patterns from file" },
  345   { OP_FILELIST,   N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
  346   { OP_NODATA,     N_FOFFSETS, NULL,            "file-offsets",  "output file offsets, not text" },
  347   { OP_NODATA,     'H',      NULL,              "with-filename", "force the prefixing filename on output" },
  348   { OP_NODATA,     'h',      NULL,              "no-filename",   "suppress the prefixing filename on output" },
  349   { OP_NODATA,     'I',      NULL,              "",              "treat binary files as not matching (ignore)" },
  350   { OP_NODATA,     'i',      NULL,              "ignore-case",   "ignore case distinctions" },
  351 #ifdef SUPPORT_PCREGREP_JIT
  352   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "do not use just-in-time compiler optimization" },
  353 #else
  354   { OP_NODATA,     N_NOJIT,  NULL,              "no-jit",        "ignored: this pcregrep does not support JIT" },
  355 #endif
  356   { OP_NODATA,     'l',      NULL,              "files-with-matches", "print only FILE names containing matches" },
  357   { OP_NODATA,     'L',      NULL,              "files-without-match","print only FILE names not containing matches" },
  358   { OP_STRING,     N_LABEL,  &stdin_name,       "label=name",    "set name for standard input" },
  359   { OP_NODATA,     N_LBUFFER, NULL,             "line-buffered", "use line buffering" },
  360   { OP_NODATA,     N_LOFFSETS, NULL,            "line-offsets",  "output line numbers and offsets, not text" },
  361   { OP_STRING,     N_LOCALE, &locale,           "locale=locale", "use the named locale" },
  362   { OP_LONGNUMBER, N_M_LIMIT, &match_limit,     "match-limit=number", "set PCRE match limit option" },
  363   { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
  364   { OP_NODATA,     'M',      NULL,              "multiline",     "run in multiline mode" },
  365   { OP_STRING,     'N',      &newline,          "newline=type",  "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
  366   { OP_NODATA,     'n',      NULL,              "line-number",   "print line number with output lines" },
  367   { OP_OP_NUMBERS, 'o',      &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
  368   { OP_STRING,     N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
  369   { OP_NODATA,     'q',      NULL,              "quiet",         "suppress output, just set return code" },
  370   { OP_NODATA,     'r',      NULL,              "recursive",     "recursively scan sub-directories" },
  371   { OP_PATLIST,    N_EXCLUDE,&exclude_patdata,  "exclude=pattern","exclude matching files when recursing" },
  372   { OP_PATLIST,    N_INCLUDE,&include_patdata,  "include=pattern","include matching files when recursing" },
  373   { OP_PATLIST,    N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
  374   { OP_PATLIST,    N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
  375   { OP_FILELIST,   N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
  376   { OP_FILELIST,   N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
  377 
  378   /* These two were accidentally implemented with underscores instead of
  379   hyphens in the option names. As this was not discovered for several releases,
  380   the incorrect versions are left in the table for compatibility. However, the
  381   --help function misses out any option that has an underscore in its name. */
  382 
  383   { OP_PATLIST,   N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
  384   { OP_PATLIST,   N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
  385 
  386 #ifdef JFRIEDL_DEBUG
  387   { OP_OP_NUMBER, 'S',      &S_arg,            "jeffS",         "replace matched (sub)string with X" },
  388 #endif
  389   { OP_NODATA,    's',      NULL,              "no-messages",   "suppress error messages" },
  390   { OP_NODATA,    'u',      NULL,              "utf-8",         "use UTF-8 mode" },
  391   { OP_NODATA,    'V',      NULL,              "version",       "print version information and exit" },
  392   { OP_NODATA,    'v',      NULL,              "invert-match",  "select non-matching lines" },
  393   { OP_NODATA,    'w',      NULL,              "word-regex(p)", "force patterns to match only as words"  },
  394   { OP_NODATA,    'x',      NULL,              "line-regex(p)", "force patterns to match only whole lines" },
  395   { OP_NODATA,    0,        NULL,               NULL,            NULL }
  396 };
  397 
  398 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
  399 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
  400 that the combination of -w and -x has the same effect as -x on its own, so we
  401 can treat them as the same. Note that the MAXPATLEN macro assumes the longest
  402 prefix+suffix is 10 characters; if anything longer is added, it must be
  403 adjusted. */
  404 
  405 static const char *prefix[] = {
  406   "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
  407 
  408 static const char *suffix[] = {
  409   "", "\\b", ")$",   ")$",   "\\E", "\\E\\b", "\\E)$",   "\\E)$" };
  410 
  411 /* UTF-8 tables - used only when the newline setting is "any". */
  412 
  413 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
  414 
  415 const char utf8_table4[] = {
  416   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  417   1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  418   2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
  419   3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
  420 
  421 
  422 
  423 /*************************************************
  424 *         Exit from the program                  *
  425 *************************************************/
  426 
  427 /* If there has been a resource error, give a suitable message.
  428 
  429 Argument:  the return code
  430 Returns:   does not return
  431 */
  432 
  433 static void
  434 pcregrep_exit(int rc)
  435 {
  436 if (resource_error)
  437   {
  438   fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
  439     "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
  440     PCRE_ERROR_JIT_STACKLIMIT);
  441   fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
  442   }
  443 exit(rc);
  444 }
  445 
  446 
  447 /*************************************************
  448 *          Add item to chain of patterns         *
  449 *************************************************/
  450 
  451 /* Used to add an item onto a chain, or just return an unconnected item if the
  452 "after" argument is NULL.
  453 
  454 Arguments:
  455   s          pattern string to add
  456   after      if not NULL points to item to insert after
  457 
  458 Returns:     new pattern block or NULL on error
  459 */
  460 
  461 static patstr *
  462 add_pattern(char *s, patstr *after)
  463 {
  464 patstr *p = (patstr *)malloc(sizeof(patstr));
  465 if (p == NULL)
  466   {
  467   fprintf(stderr, "pcregrep: malloc failed\n");
  468   pcregrep_exit(2);
  469   }
  470 if (strlen(s) > MAXPATLEN)
  471   {
  472   fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
  473     MAXPATLEN);
  474   free(p);
  475   return NULL;
  476   }
  477 p->next = NULL;
  478 p->string = s;
  479 p->compiled = NULL;
  480 p->hint = NULL;
  481 
  482 if (after != NULL)
  483   {
  484   p->next = after->next;
  485   after->next = p;
  486   }
  487 return p;
  488 }
  489 
  490 
  491 /*************************************************
  492 *           Free chain of patterns               *
  493 *************************************************/
  494 
  495 /* Used for several chains of patterns.
  496 
  497 Argument: pointer to start of chain
  498 Returns:  nothing
  499 */
  500 
  501 static void
  502 free_pattern_chain(patstr *pc)
  503 {
  504 while (pc != NULL)
  505   {
  506   patstr *p = pc;
  507   pc = p->next;
  508   if (p->hint != NULL) pcre_free_study(p->hint);
  509   if (p->compiled != NULL) pcre_free(p->compiled);
  510   free(p);
  511   }
  512 }
  513 
  514 
  515 /*************************************************
  516 *           Free chain of file names             *
  517 *************************************************/
  518 
  519 /*
  520 Argument: pointer to start of chain
  521 Returns:  nothing
  522 */
  523 
  524 static void
  525 free_file_chain(fnstr *fn)
  526 {
  527 while (fn != NULL)
  528   {
  529   fnstr *f = fn;
  530   fn = f->next;
  531   free(f);
  532   }
  533 }
  534 
  535 
  536 /*************************************************
  537 *            OS-specific functions               *
  538 *************************************************/
  539 
  540 /* These functions are defined so that they can be made system specific.
  541 At present there are versions for Unix-style environments, Windows, native
  542 z/OS, and "no support". */
  543 
  544 
  545 /************* Directory scanning Unix-style and z/OS ***********/
  546 
  547 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
  548 #include <sys/types.h>
  549 #include <sys/stat.h>
  550 #include <dirent.h>
  551 
  552 #if defined NATIVE_ZOS
  553 /************* Directory and PDS/E scanning for z/OS ***********/
  554 /************* z/OS looks mostly like Unix with USS ************/
  555 /* However, z/OS needs the #include statements in this header */
  556 #include "pcrzosfs.h"
  557 /* That header is not included in the main PCRE distribution because
  558    other apparatus is needed to compile pcregrep for z/OS. The header
  559    can be found in the special z/OS distribution, which is available
  560    from www.zaconsultants.net or from www.cbttape.org. */
  561 #endif
  562 
  563 typedef DIR directory_type;
  564 #define FILESEP '/'
  565 
  566 static int
  567 isdirectory(char *filename)
  568 {
  569 struct stat statbuf;
  570 if (stat(filename, &statbuf) < 0)
  571   return 0;        /* In the expectation that opening as a file will fail */
  572 return (statbuf.st_mode & S_IFMT) == S_IFDIR;
  573 }
  574 
  575 static directory_type *
  576 opendirectory(char *filename)
  577 {
  578 return opendir(filename);
  579 }
  580 
  581 static char *
  582 readdirectory(directory_type *dir)
  583 {
  584 for (;;)
  585   {
  586   struct dirent *dent = readdir(dir);
  587   if (dent == NULL) return NULL;
  588   if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
  589     return dent->d_name;
  590   }
  591 /* Control never reaches here */
  592 }
  593 
  594 static void
  595 closedirectory(directory_type *dir)
  596 {
  597 closedir(dir);
  598 }
  599 
  600 
  601 /************* Test for regular file, Unix-style **********/
  602 
  603 static int
  604 isregfile(char *filename)
  605 {
  606 struct stat statbuf;
  607 if (stat(filename, &statbuf) < 0)
  608   return 1;        /* In the expectation that opening as a file will fail */
  609 return (statbuf.st_mode & S_IFMT) == S_IFREG;
  610 }
  611 
  612 
  613 #if defined NATIVE_ZOS
  614 /************* Test for a terminal in z/OS **********/
  615 /* isatty() does not work in a TSO environment, so always give FALSE.*/
  616 
  617 static BOOL
  618 is_stdout_tty(void)
  619 {
  620 return FALSE;
  621 }
  622 
  623 static BOOL
  624 is_file_tty(FILE *f)
  625 {
  626 return FALSE;
  627 }
  628 
  629 
  630 /************* Test for a terminal, Unix-style **********/
  631 
  632 #else
  633 static BOOL
  634 is_stdout_tty(void)
  635 {
  636 return isatty(fileno(stdout));
  637 }
  638 
  639 static BOOL
  640 is_file_tty(FILE *f)
  641 {
  642 return isatty(fileno(f));
  643 }
  644 #endif
  645 
  646 /* End of Unix-style or native z/OS environment functions. */
  647 
  648 
  649 /************* Directory scanning in Windows ***********/
  650 
  651 /* I (Philip Hazel) have no means of testing this code. It was contributed by
  652 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
  653 when it did not exist. David Byron added a patch that moved the #include of
  654 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
  655 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
  656 undefined when it is indeed undefined. */
  657 
  658 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
  659 
  660 #ifndef STRICT
  661 # define STRICT
  662 #endif
  663 #ifndef WIN32_LEAN_AND_MEAN
  664 # define WIN32_LEAN_AND_MEAN
  665 #endif
  666 
  667 #include <windows.h>
  668 
  669 #ifndef INVALID_FILE_ATTRIBUTES
  670 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
  671 #endif
  672 
  673 typedef struct directory_type
  674 {
  675 HANDLE handle;
  676 BOOL first;
  677 WIN32_FIND_DATA data;
  678 } directory_type;
  679 
  680 #define FILESEP '/'
  681 
  682 int
  683 isdirectory(char *filename)
  684 {
  685 DWORD attr = GetFileAttributes(filename);
  686 if (attr == INVALID_FILE_ATTRIBUTES)
  687   return 0;
  688 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
  689 }
  690 
  691 directory_type *
  692 opendirectory(char *filename)
  693 {
  694 size_t len;
  695 char *pattern;
  696 directory_type *dir;
  697 DWORD err;
  698 len = strlen(filename);
  699 pattern = (char *)malloc(len + 3);
  700 dir = (directory_type *)malloc(sizeof(*dir));
  701 if ((pattern == NULL) || (dir == NULL))
  702   {
  703   fprintf(stderr, "pcregrep: malloc failed\n");
  704   pcregrep_exit(2);
  705   }
  706 memcpy(pattern, filename, len);
  707 memcpy(&(pattern[len]), "\\*", 3);
  708 dir->handle = FindFirstFile(pattern, &(dir->data));
  709 if (dir->handle != INVALID_HANDLE_VALUE)
  710   {
  711   free(pattern);
  712   dir->first = TRUE;
  713   return dir;
  714   }
  715 err = GetLastError();
  716 free(pattern);
  717 free(dir);
  718 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
  719 return NULL;
  720 }
  721 
  722 char *
  723 readdirectory(directory_type *dir)
  724 {
  725 for (;;)
  726   {
  727   if (!dir->first)
  728     {
  729     if (!FindNextFile(dir->handle, &(dir->data)))
  730       return NULL;
  731     }
  732   else
  733     {
  734     dir->first = FALSE;
  735     }
  736   if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
  737     return dir->data.cFileName;
  738   }
  739 #ifndef _MSC_VER
  740 return NULL;   /* Keep compiler happy; never executed */
  741 #endif
  742 }
  743 
  744 void
  745 closedirectory(directory_type *dir)
  746 {
  747 FindClose(dir->handle);
  748 free(dir);
  749 }
  750 
  751 
  752 /************* Test for regular file in Windows **********/
  753 
  754 /* I don't know how to do this, or if it can be done; assume all paths are
  755 regular if they are not directories. */
  756 
  757 int isregfile(char *filename)
  758 {
  759 return !isdirectory(filename);
  760 }
  761 
  762 
  763 /************* Test for a terminal in Windows **********/
  764 
  765 /* I don't know how to do this; assume never */
  766 
  767 static BOOL
  768 is_stdout_tty(void)
  769 {
  770 return FALSE;
  771 }
  772 
  773 static BOOL
  774 is_file_tty(FILE *f)
  775 {
  776 return FALSE;
  777 }
  778 
  779 /* End of Windows functions */
  780 
  781 
  782 /************* Directory scanning when we can't do it ***********/
  783 
  784 /* The type is void, and apart from isdirectory(), the functions do nothing. */
  785 
  786 #else
  787 
  788 #define FILESEP 0
  789 typedef void directory_type;
  790 
  791 int isdirectory(char *filename) { return 0; }
  792 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
  793 char *readdirectory(directory_type *dir) { return (char*)0;}
  794 void closedirectory(directory_type *dir) {}
  795 
  796 
  797 /************* Test for regular file when we can't do it **********/
  798 
  799 /* Assume all files are regular. */
  800 
  801 int isregfile(char *filename) { return 1; }
  802 
  803 
  804 /************* Test for a terminal when we can't do it **********/
  805 
  806 static BOOL
  807 is_stdout_tty(void)
  808 {
  809 return FALSE;
  810 }
  811 
  812 static BOOL
  813 is_file_tty(FILE *f)
  814 {
  815 return FALSE;
  816 }
  817 
  818 #endif  /* End of system-specific functions */
  819 
  820 
  821 
  822 #ifndef HAVE_STRERROR
  823 /*************************************************
  824 *     Provide strerror() for non-ANSI libraries  *
  825 *************************************************/
  826 
  827 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
  828 in their libraries, but can provide the same facility by this simple
  829 alternative function. */
  830 
  831 extern int   sys_nerr;
  832 extern char *sys_errlist[];
  833 
  834 char *
  835 strerror(int n)
  836 {
  837 if (n < 0 || n >= sys_nerr) return "unknown error number";
  838 return sys_errlist[n];
  839 }
  840 #endif /* HAVE_STRERROR */
  841 
  842 
  843 
  844 /*************************************************
  845 *                Usage function                  *
  846 *************************************************/
  847 
  848 static int
  849 usage(int rc)
  850 {
  851 option_item *op;
  852 fprintf(stderr, "Usage: pcregrep [-");
  853 for (op = optionlist; op->one_char != 0; op++)
  854   {
  855   if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
  856   }
  857 fprintf(stderr, "] [long options] [pattern] [files]\n");
  858 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
  859   "options.\n");
  860 return rc;
  861 }
  862 
  863 
  864 
  865 /*************************************************
  866 *                Help function                   *
  867 *************************************************/
  868 
  869 static void
  870 help(void)
  871 {
  872 option_item *op;
  873 
  874 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
  875 printf("Search for PATTERN in each FILE or standard input.\n");
  876 printf("PATTERN must be present if neither -e nor -f is used.\n");
  877 printf("\"-\" can be used as a file name to mean STDIN.\n");
  878 
  879 #ifdef SUPPORT_LIBZ
  880 printf("Files whose names end in .gz are read using zlib.\n");
  881 #endif
  882 
  883 #ifdef SUPPORT_LIBBZ2
  884 printf("Files whose names end in .bz2 are read using bzlib2.\n");
  885 #endif
  886 
  887 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
  888 printf("Other files and the standard input are read as plain files.\n\n");
  889 #else
  890 printf("All files are read as plain files, without any interpretation.\n\n");
  891 #endif
  892 
  893 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
  894 printf("Options:\n");
  895 
  896 for (op = optionlist; op->one_char != 0; op++)
  897   {
  898   int n;
  899   char s[4];
  900 
  901   /* Two options were accidentally implemented and documented with underscores
  902   instead of hyphens in their names, something that was not noticed for quite a
  903   few releases. When fixing this, I left the underscored versions in the list
  904   in case people were using them. However, we don't want to display them in the
  905   help data. There are no other options that contain underscores, and we do not
  906   expect ever to implement such options. Therefore, just omit any option that
  907   contains an underscore. */
  908 
  909   if (strchr(op->long_name, '_') != NULL) continue;
  910 
  911   if (op->one_char > 0 && (op->long_name)[0] == 0)
  912     n = 31 - printf("  -%c", op->one_char);
  913   else
  914     {
  915     if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
  916       else strcpy(s, "   ");
  917     n = 31 - printf("  %s --%s", s, op->long_name);
  918     }
  919 
  920   if (n < 1) n = 1;
  921   printf("%.*s%s\n", n, "                           ", op->help_text);
  922   }
  923 
  924 printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
  925 printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
  926 printf("When reading patterns or file names from a file, trailing white\n");
  927 printf("space is removed and blank lines are ignored.\n");
  928 printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
  929 
  930 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
  931 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
  932 }
  933 
  934 
  935 
  936 /*************************************************
  937 *            Test exclude/includes               *
  938 *************************************************/
  939 
  940 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
  941 there are no includes, the path must match an include pattern.
  942 
  943 Arguments:
  944   path      the path to be matched
  945   ip        the chain of include patterns
  946   ep        the chain of exclude patterns
  947 
  948 Returns:    TRUE if the path is not excluded
  949 */
  950 
  951 static BOOL
  952 test_incexc(char *path, patstr *ip, patstr *ep)
  953 {
  954 int plen = strlen(path);
  955 
  956 for (; ep != NULL; ep = ep->next)
  957   {
  958   if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
  959     return FALSE;
  960   }
  961 
  962 if (ip == NULL) return TRUE;
  963 
  964 for (; ip != NULL; ip = ip->next)
  965   {
  966   if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
  967     return TRUE;
  968   }
  969 
  970 return FALSE;
  971 }
  972 
  973 
  974 
  975 /*************************************************
  976 *         Decode integer argument value          *
  977 *************************************************/
  978 
  979 /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
  980 because SunOS4 doesn't have it. This is used only for unpicking arguments, so
  981 just keep it simple.
  982 
  983 Arguments:
  984   option_data   the option data string
  985   op            the option item (for error messages)
  986   longop        TRUE if option given in long form
  987 
  988 Returns:        a long integer
  989 */
  990 
  991 static long int
  992 decode_number(char *option_data, option_item *op, BOOL longop)
  993 {
  994 unsigned long int n = 0;
  995 char *endptr = option_data;
  996 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
  997 while (isdigit((unsigned char)(*endptr)))
  998   n = n * 10 + (int)(*endptr++ - '0');
  999 if (toupper(*endptr) == 'K')
 1000   {
 1001   n *= 1024;
 1002   endptr++;
 1003   }
 1004 else if (toupper(*endptr) == 'M')
 1005   {
 1006   n *= 1024*1024;
 1007   endptr++;
 1008   }
 1009 
 1010 if (*endptr != 0)   /* Error */
 1011   {
 1012   if (longop)
 1013     {
 1014     char *equals = strchr(op->long_name, '=');
 1015     int nlen = (equals == NULL)? (int)strlen(op->long_name) :
 1016       (int)(equals - op->long_name);
 1017     fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
 1018       option_data, nlen, op->long_name);
 1019     }
 1020   else
 1021     fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
 1022       option_data, op->one_char);
 1023   pcregrep_exit(usage(2));
 1024   }
 1025 
 1026 return n;
 1027 }
 1028 
 1029 
 1030 
 1031 /*************************************************
 1032 *       Add item to a chain of numbers           *
 1033 *************************************************/
 1034 
 1035 /* Used to add an item onto a chain, or just return an unconnected item if the
 1036 "after" argument is NULL.
 1037 
 1038 Arguments:
 1039   n          the number to add
 1040   after      if not NULL points to item to insert after
 1041 
 1042 Returns:     new number block
 1043 */
 1044 
 1045 static omstr *
 1046 add_number(int n, omstr *after)
 1047 {
 1048 omstr *om = (omstr *)malloc(sizeof(omstr));
 1049 
 1050 if (om == NULL)
 1051   {
 1052   fprintf(stderr, "pcregrep: malloc failed\n");
 1053   pcregrep_exit(2);
 1054   }
 1055 om->next = NULL;
 1056 om->groupnum = n;
 1057 
 1058 if (after != NULL)
 1059   {
 1060   om->next = after->next;
 1061   after->next = om;
 1062   }
 1063 return om;
 1064 }
 1065 
 1066 
 1067 
 1068 /*************************************************
 1069 *            Read one line of input              *
 1070 *************************************************/
 1071 
 1072 /* Normally, input is read using fread() into a large buffer, so many lines may
 1073 be read at once. However, doing this for tty input means that no output appears
 1074 until a lot of input has been typed. Instead, tty input is handled line by
 1075 line. We cannot use fgets() for this, because it does not stop at a binary
 1076 zero, and therefore there is no way of telling how many characters it has read,
 1077 because there may be binary zeros embedded in the data.
 1078 
 1079 Arguments:
 1080   buffer     the buffer to read into
 1081   length     the maximum number of characters to read
 1082   f          the file
 1083 
 1084 Returns:     the number of characters read, zero at end of file
 1085 */
 1086 
 1087 static unsigned int
 1088 read_one_line(char *buffer, int length, FILE *f)
 1089 {
 1090 int c;
 1091 int yield = 0;
 1092 while ((c = fgetc(f)) != EOF)
 1093   {
 1094   buffer[yield++] = c;
 1095   if (c == '\n' || yield >= length) break;
 1096   }
 1097 return yield;
 1098 }
 1099 
 1100 
 1101 
 1102 /*************************************************
 1103 *             Find end of line                   *
 1104 *************************************************/
 1105 
 1106 /* The length of the endline sequence that is found is set via lenptr. This may
 1107 be zero at the very end of the file if there is no line-ending sequence there.
 1108 
 1109 Arguments:
 1110   p         current position in line
 1111   endptr    end of available data
 1112   lenptr    where to put the length of the eol sequence
 1113 
 1114 Returns:    pointer after the last byte of the line,
 1115             including the newline byte(s)
 1116 */
 1117 
 1118 static char *
 1119 end_of_line(char *p, char *endptr, int *lenptr)
 1120 {
 1121 switch(endlinetype)
 1122   {
 1123   default:      /* Just in case */
 1124   case EL_LF:
 1125   while (p < endptr && *p != '\n') p++;
 1126   if (p < endptr)
 1127     {
 1128     *lenptr = 1;
 1129     return p + 1;
 1130     }
 1131   *lenptr = 0;
 1132   return endptr;
 1133 
 1134   case EL_CR:
 1135   while (p < endptr && *p != '\r') p++;
 1136   if (p < endptr)
 1137     {
 1138     *lenptr = 1;
 1139     return p + 1;
 1140     }
 1141   *lenptr = 0;
 1142   return endptr;
 1143 
 1144   case EL_CRLF:
 1145   for (;;)
 1146     {
 1147     while (p < endptr && *p != '\r') p++;
 1148     if (++p >= endptr)
 1149       {
 1150       *lenptr = 0;
 1151       return endptr;
 1152       }
 1153     if (*p == '\n')
 1154       {
 1155       *lenptr = 2;
 1156       return p + 1;
 1157       }
 1158     }
 1159   break;
 1160 
 1161   case EL_ANYCRLF:
 1162   while (p < endptr)
 1163     {
 1164     int extra = 0;
 1165     register int c = *((unsigned char *)p);
 1166 
 1167     if (utf8 && c >= 0xc0)
 1168       {
 1169       int gcii, gcss;
 1170       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
 1171       gcss = 6*extra;
 1172       c = (c & utf8_table3[extra]) << gcss;
 1173       for (gcii = 1; gcii <= extra; gcii++)
 1174         {
 1175         gcss -= 6;
 1176         c |= (p[gcii] & 0x3f) << gcss;
 1177         }
 1178       }
 1179 
 1180     p += 1 + extra;
 1181 
 1182     switch (c)
 1183       {
 1184       case '\n':
 1185       *lenptr = 1;
 1186       return p;
 1187 
 1188       case '\r':
 1189       if (p < endptr && *p == '\n')
 1190         {
 1191         *lenptr = 2;
 1192         p++;
 1193         }
 1194       else *lenptr = 1;
 1195       return p;
 1196 
 1197       default:
 1198       break;
 1199       }
 1200     }   /* End of loop for ANYCRLF case */
 1201 
 1202   *lenptr = 0;  /* Must have hit the end */
 1203   return endptr;
 1204 
 1205   case EL_ANY:
 1206   while (p < endptr)
 1207     {
 1208     int extra = 0;
 1209     register int c = *((unsigned char *)p);
 1210 
 1211     if (utf8 && c >= 0xc0)
 1212       {
 1213       int gcii, gcss;
 1214       extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
 1215       gcss = 6*extra;
 1216       c = (c & utf8_table3[extra]) << gcss;
 1217       for (gcii = 1; gcii <= extra; gcii++)
 1218         {
 1219         gcss -= 6;
 1220         c |= (p[gcii] & 0x3f) << gcss;
 1221         }
 1222       }
 1223 
 1224     p += 1 + extra;
 1225 
 1226     switch (c)
 1227       {
 1228       case '\n':    /* LF */
 1229       case '\v':    /* VT */
 1230       case '\f':    /* FF */
 1231       *lenptr = 1;
 1232       return p;
 1233 
 1234       case '\r':    /* CR */
 1235       if (p < endptr && *p == '\n')
 1236         {
 1237         *lenptr = 2;
 1238         p++;
 1239         }
 1240       else *lenptr = 1;
 1241       return p;
 1242 
 1243 #ifndef EBCDIC
 1244       case 0x85:    /* Unicode NEL */
 1245       *lenptr = utf8? 2 : 1;
 1246       return p;
 1247 
 1248       case 0x2028:  /* Unicode LS */
 1249       case 0x2029:  /* Unicode PS */
 1250       *lenptr = 3;
 1251       return p;
 1252 #endif  /* Not EBCDIC */
 1253 
 1254       default:
 1255       break;
 1256       }
 1257     }   /* End of loop for ANY case */
 1258 
 1259   *lenptr = 0;  /* Must have hit the end */
 1260   return endptr;
 1261   }     /* End of overall switch */
 1262 }
 1263 
 1264 
 1265 
 1266 /*************************************************
 1267 *         Find start of previous line            *
 1268 *************************************************/
 1269 
 1270 /* This is called when looking back for before lines to print.
 1271 
 1272 Arguments:
 1273   p         start of the subsequent line
 1274   startptr  start of available data
 1275 
 1276 Returns:    pointer to the start of the previous line
 1277 */
 1278 
 1279 static char *
 1280 previous_line(char *p, char *startptr)
 1281 {
 1282 switch(endlinetype)
 1283   {
 1284   default:      /* Just in case */
 1285   case EL_LF:
 1286   p--;
 1287   while (p > startptr && p[-1] != '\n') p--;
 1288   return p;
 1289 
 1290   case EL_CR:
 1291   p--;
 1292   while (p > startptr && p[-1] != '\n') p--;
 1293   return p;
 1294 
 1295   case EL_CRLF:
 1296   for (;;)
 1297     {
 1298     p -= 2;
 1299     while (p > startptr && p[-1] != '\n') p--;
 1300     if (p <= startptr + 1 || p[-2] == '\r') return p;
 1301     }
 1302   /* Control can never get here */
 1303 
 1304   case EL_ANY:
 1305   case EL_ANYCRLF:
 1306   if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
 1307   if (utf8) while ((*p & 0xc0) == 0x80) p--;
 1308 
 1309   while (p > startptr)
 1310     {
 1311     register unsigned int c;
 1312     char *pp = p - 1;
 1313 
 1314     if (utf8)
 1315       {
 1316       int extra = 0;
 1317       while ((*pp & 0xc0) == 0x80) pp--;
 1318       c = *((unsigned char *)pp);
 1319       if (c >= 0xc0)
 1320         {
 1321         int gcii, gcss;
 1322         extra = utf8_table4[c & 0x3f];  /* Number of additional bytes */
 1323         gcss = 6*extra;
 1324         c = (c & utf8_table3[extra]) << gcss;
 1325         for (gcii = 1; gcii <= extra; gcii++)
 1326           {
 1327           gcss -= 6;
 1328           c |= (pp[gcii] & 0x3f) << gcss;
 1329           }
 1330         }
 1331       }
 1332     else c = *((unsigned char *)pp);
 1333 
 1334     if (endlinetype == EL_ANYCRLF) switch (c)
 1335       {
 1336       case '\n':    /* LF */
 1337       case '\r':    /* CR */
 1338       return p;
 1339 
 1340       default:
 1341       break;
 1342       }
 1343 
 1344     else switch (c)
 1345       {
 1346       case '\n':    /* LF */
 1347       case '\v':    /* VT */
 1348       case '\f':    /* FF */
 1349       case '\r':    /* CR */
 1350 #ifndef EBCDIE
 1351       case 0x85:    /* Unicode NEL */
 1352       case 0x2028:  /* Unicode LS */
 1353       case 0x2029:  /* Unicode PS */
 1354 #endif  /* Not EBCDIC */
 1355       return p;
 1356 
 1357       default:
 1358       break;
 1359       }
 1360 
 1361     p = pp;  /* Back one character */
 1362     }        /* End of loop for ANY case */
 1363 
 1364   return startptr;  /* Hit start of data */
 1365   }     /* End of overall switch */
 1366 }
 1367 
 1368 
 1369 
 1370 
 1371 
 1372 /*************************************************
 1373 *       Print the previous "after" lines         *
 1374 *************************************************/
 1375 
 1376 /* This is called if we are about to lose said lines because of buffer filling,
 1377 and at the end of the file. The data in the line is written using fwrite() so
 1378 that a binary zero does not terminate it.
 1379 
 1380 Arguments:
 1381   lastmatchnumber   the number of the last matching line, plus one
 1382   lastmatchrestart  where we restarted after the last match
 1383   endptr            end of available data
 1384   printname         filename for printing
 1385 
 1386 Returns:            nothing
 1387 */
 1388 
 1389 static void
 1390 do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart,
 1391   char *endptr, char *printname)
 1392 {
 1393 if (after_context > 0 && lastmatchnumber > 0)
 1394   {
 1395   int count = 0;
 1396   while (lastmatchrestart < endptr && count++ < after_context)
 1397     {
 1398     int ellength;
 1399     char *pp = lastmatchrestart;
 1400     if (printname != NULL) fprintf(stdout, "%s-", printname);
 1401     if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
 1402     pp = end_of_line(pp, endptr, &ellength);
 1403     FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
 1404     lastmatchrestart = pp;
 1405     }
 1406   hyphenpending = TRUE;
 1407   }
 1408 }
 1409 
 1410 
 1411 
 1412 /*************************************************
 1413 *   Apply patterns to subject till one matches   *
 1414 *************************************************/
 1415 
 1416 /* This function is called to run through all patterns, looking for a match. It
 1417 is used multiple times for the same subject when colouring is enabled, in order
 1418 to find all possible matches.
 1419 
 1420 Arguments:
 1421   matchptr     the start of the subject
 1422   length       the length of the subject to match
 1423   options      options for pcre_exec
 1424   startoffset  where to start matching
 1425   offsets      the offets vector to fill in
 1426   mrc          address of where to put the result of pcre_exec()
 1427 
 1428 Returns:      TRUE if there was a match
 1429               FALSE if there was no match
 1430               invert if there was a non-fatal error
 1431 */
 1432 
 1433 static BOOL
 1434 match_patterns(char *matchptr, size_t length, unsigned int options,
 1435   int startoffset, int *offsets, int *mrc)
 1436 {
 1437 int i;
 1438 size_t slen = length;
 1439 patstr *p = patterns;
 1440 const char *msg = "this text:\n\n";
 1441 
 1442 if (slen > 200)
 1443   {
 1444   slen = 200;
 1445   msg = "text that starts:\n\n";
 1446   }
 1447 for (i = 1; p != NULL; p = p->next, i++)
 1448   {
 1449   *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
 1450     startoffset, options, offsets, OFFSET_SIZE);
 1451   if (*mrc >= 0) return TRUE;
 1452   if (*mrc == PCRE_ERROR_NOMATCH) continue;
 1453   fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
 1454   if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
 1455   fprintf(stderr, "%s", msg);
 1456   FWRITE(matchptr, 1, slen, stderr);   /* In case binary zero included */
 1457   fprintf(stderr, "\n\n");
 1458   if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
 1459       *mrc == PCRE_ERROR_JIT_STACKLIMIT)
 1460     resource_error = TRUE;
 1461   if (error_count++ > 20)
 1462     {
 1463     fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
 1464     pcregrep_exit(2);
 1465     }
 1466   return invert;    /* No more matching; don't show the line again */
 1467   }
 1468 
 1469 return FALSE;  /* No match, no errors */
 1470 }
 1471 
 1472 
 1473 
 1474 /*************************************************
 1475 *            Grep an individual file             *
 1476 *************************************************/
 1477 
 1478 /* This is called from grep_or_recurse() below. It uses a buffer that is three
 1479 times the value of bufthird. The matching point is never allowed to stray into
 1480 the top third of the buffer, thus keeping more of the file available for
 1481 context printing or for multiline scanning. For large files, the pointer will
 1482 be in the middle third most of the time, so the bottom third is available for
 1483 "before" context printing.
 1484 
 1485 Arguments:
 1486   handle       the fopened FILE stream for a normal file
 1487                the gzFile pointer when reading is via libz
 1488                the BZFILE pointer when reading is via libbz2
 1489   frtype       FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
 1490   filename     the file name or NULL (for errors)
 1491   printname    the file name if it is to be printed for each match
 1492                or NULL if the file name is not to be printed
 1493                it cannot be NULL if filenames[_nomatch]_only is set
 1494 
 1495 Returns:       0 if there was at least one match
 1496                1 otherwise (no matches)
 1497                2 if an overlong line is encountered
 1498                3 if there is a read error on a .bz2 file
 1499 */
 1500 
 1501 static int
 1502 pcregrep(void *handle, int frtype, char *filename, char *printname)
 1503 {
 1504 int rc = 1;
 1505 int filepos = 0;
 1506 int offsets[OFFSET_SIZE];
 1507 unsigned long int linenumber = 1;
 1508 unsigned long int lastmatchnumber = 0;
 1509 unsigned long int count = 0;
 1510 char *lastmatchrestart = NULL;
 1511 char *ptr = main_buffer;
 1512 char *endptr;
 1513 size_t bufflength;
 1514 BOOL binary = FALSE;
 1515 BOOL endhyphenpending = FALSE;
 1516 BOOL input_line_buffered = line_buffered;
 1517 FILE *in = NULL;                    /* Ensure initialized */
 1518 
 1519 #ifdef SUPPORT_LIBZ
 1520 gzFile ingz = NULL;
 1521 #endif
 1522 
 1523 #ifdef SUPPORT_LIBBZ2
 1524 BZFILE *inbz2 = NULL;
 1525 #endif
 1526 
 1527 
 1528 /* Do the first read into the start of the buffer and set up the pointer to end
 1529 of what we have. In the case of libz, a non-zipped .gz file will be read as a
 1530 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
 1531 fail. */
 1532 
 1533 (void)frtype;
 1534 
 1535 #ifdef SUPPORT_LIBZ
 1536 if (frtype == FR_LIBZ)
 1537   {
 1538   ingz = (gzFile)handle;
 1539   bufflength = gzread (ingz, main_buffer, bufsize);
 1540   }
 1541 else
 1542 #endif
 1543 
 1544 #ifdef SUPPORT_LIBBZ2
 1545 if (frtype == FR_LIBBZ2)
 1546   {
 1547   inbz2 = (BZFILE *)handle;
 1548   bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
 1549   if ((int)bufflength < 0) return 2;   /* Gotcha: bufflength is size_t; */
 1550   }                                    /* without the cast it is unsigned. */
 1551 else
 1552 #endif
 1553 
 1554   {
 1555   in = (FILE *)handle;
 1556   if (is_file_tty(in)) input_line_buffered = TRUE;
 1557   bufflength = input_line_buffered?
 1558     read_one_line(main_buffer, bufsize, in) :
 1559     fread(main_buffer, 1, bufsize, in);
 1560   }
 1561 
 1562 endptr = main_buffer + bufflength;
 1563 
 1564 /* Unless binary-files=text, see if we have a binary file. This uses the same
 1565 rule as GNU grep, namely, a search for a binary zero byte near the start of the
 1566 file. */
 1567 
 1568 if (binary_files != BIN_TEXT)
 1569   {
 1570   binary =
 1571     memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
 1572   if (binary && binary_files == BIN_NOMATCH) return 1;
 1573   }
 1574 
 1575 /* Loop while the current pointer is not at the end of the file. For large
 1576 files, endptr will be at the end of the buffer when we are in the middle of the
 1577 file, but ptr will never get there, because as soon as it gets over 2/3 of the
 1578 way, the buffer is shifted left and re-filled. */
 1579 
 1580 while (ptr < endptr)
 1581   {
 1582   int endlinelength;
 1583   int mrc = 0;
 1584   int startoffset = 0;
 1585   int prevoffsets[2];
 1586   unsigned int options = 0;
 1587   BOOL match;
 1588   char *matchptr = ptr;
 1589   char *t = ptr;
 1590   size_t length, linelength;
 1591 
 1592   prevoffsets[0] = prevoffsets[1] = -1;
 1593 
 1594   /* At this point, ptr is at the start of a line. We need to find the length
 1595   of the subject string to pass to pcre_exec(). In multiline mode, it is the
 1596   length remainder of the data in the buffer. Otherwise, it is the length of
 1597   the next line, excluding the terminating newline. After matching, we always
 1598   advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
 1599   option is used for compiling, so that any match is constrained to be in the
 1600   first line. */
 1601 
 1602   t = end_of_line(t, endptr, &endlinelength);
 1603   linelength = t - ptr - endlinelength;
 1604   length = multiline? (size_t)(endptr - ptr) : linelength;
 1605 
 1606   /* Check to see if the line we are looking at extends right to the very end
 1607   of the buffer without a line terminator. This means the line is too long to
 1608   handle. */
 1609 
 1610   if (endlinelength == 0 && t == main_buffer + bufsize)
 1611     {
 1612     fprintf(stderr, "pcregrep: line %lu%s%s is too long for the internal buffer\n"
 1613                     "pcregrep: check the --buffer-size option\n",
 1614                     linenumber,
 1615                     (filename == NULL)? "" : " of file ",
 1616                     (filename == NULL)? "" : filename);
 1617     return 2;
 1618     }
 1619 
 1620   /* Extra processing for Jeffrey Friedl's debugging. */
 1621 
 1622 #ifdef JFRIEDL_DEBUG
 1623   if (jfriedl_XT || jfriedl_XR)
 1624   {
 1625 #     include <sys/time.h>
 1626 #     include <time.h>
 1627       struct timeval start_time, end_time;
 1628       struct timezone dummy;
 1629       int i;
 1630 
 1631       if (jfriedl_XT)
 1632       {
 1633           unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
 1634           const char *orig = ptr;
 1635           ptr = malloc(newlen + 1);
 1636           if (!ptr) {
 1637                   printf("out of memory");
 1638                   pcregrep_exit(2);
 1639           }
 1640           endptr = ptr;
 1641           strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
 1642           for (i = 0; i < jfriedl_XT; i++) {
 1643                   strncpy(endptr, orig,  length);
 1644                   endptr += length;
 1645           }
 1646           strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
 1647           length = newlen;
 1648       }
 1649 
 1650       if (gettimeofday(&start_time, &dummy) != 0)
 1651               perror("bad gettimeofday");
 1652 
 1653 
 1654       for (i = 0; i < jfriedl_XR; i++)
 1655           match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
 1656               PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
 1657 
 1658       if (gettimeofday(&end_time, &dummy) != 0)
 1659               perror("bad gettimeofday");
 1660 
 1661       double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
 1662                       -
 1663                       (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
 1664 
 1665       printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
 1666       return 0;
 1667   }
 1668 #endif
 1669 
 1670   /* We come back here after a match when show_only_matching is set, in order
 1671   to find any further matches in the same line. This applies to
 1672   --only-matching, --file-offsets, and --line-offsets. */
 1673 
 1674   ONLY_MATCHING_RESTART:
 1675 
 1676   /* Run through all the patterns until one matches or there is an error other
 1677   than NOMATCH. This code is in a subroutine so that it can be re-used for
 1678   finding subsequent matches when colouring matched lines. After finding one
 1679   match, set PCRE_NOTEMPTY to disable any further matches of null strings in
 1680   this line. */
 1681 
 1682   match = match_patterns(matchptr, length, options, startoffset, offsets, &mrc);
 1683   options = PCRE_NOTEMPTY;
 1684 
 1685   /* If it's a match or a not-match (as required), do what's wanted. */
 1686 
 1687   if (match != invert)
 1688     {
 1689     BOOL hyphenprinted = FALSE;
 1690 
 1691     /* We've failed if we want a file that doesn't have any matches. */
 1692 
 1693     if (filenames == FN_NOMATCH_ONLY) return 1;
 1694 
 1695     /* If all we want is a yes/no answer, stop now. */
 1696 
 1697     if (quiet) return 0;
 1698 
 1699     /* Just count if just counting is wanted. */
 1700 
 1701     else if (count_only) count++;
 1702 
 1703     /* When handling a binary file and binary-files==binary, the "binary"
 1704     variable will be set true (it's false in all other cases). In this
 1705     situation we just want to output the file name. No need to scan further. */
 1706 
 1707     else if (binary)
 1708       {
 1709       fprintf(stdout, "Binary file %s matches\n", filename);
 1710       return 0;
 1711       }
 1712 
 1713     /* If all we want is a file name, there is no need to scan any more lines
 1714     in the file. */
 1715 
 1716     else if (filenames == FN_MATCH_ONLY)
 1717       {
 1718       fprintf(stdout, "%s\n", printname);
 1719       return 0;
 1720       }
 1721 
 1722     /* The --only-matching option prints just the substring that matched,
 1723     and/or one or more captured portions of it, as long as these strings are
 1724     not empty. The --file-offsets and --line-offsets options output offsets for
 1725     the matching substring (all three set show_only_matching). None of these
 1726     mutually exclusive options prints any context. Afterwards, adjust the start
 1727     and then jump back to look for further matches in the same line. If we are
 1728     in invert mode, however, nothing is printed and we do not restart - this
 1729     could still be useful because the return code is set. */
 1730 
 1731     else if (show_only_matching)
 1732       {
 1733       if (!invert)
 1734         {
 1735         int oldstartoffset = startoffset;
 1736 
 1737         /* It is possible, when a lookbehind assertion contains \K, for the
 1738         same string to be found again. The code below advances startoffset, but
 1739         until it is past the "bumpalong" offset that gave the match, the same
 1740         substring will be returned. The PCRE1 library does not return the
 1741         bumpalong offset, so all we can do is ignore repeated strings. (PCRE2
 1742         does this better.) */
 1743 
 1744         if (prevoffsets[0] != offsets[0] || prevoffsets[1] != offsets[1])
 1745           {
 1746           prevoffsets[0] = offsets[0];
 1747           prevoffsets[1] = offsets[1];
 1748 
 1749           if (printname != NULL) fprintf(stdout, "%s:", printname);
 1750           if (number) fprintf(stdout, "%lu:", linenumber);
 1751 
 1752           /* Handle --line-offsets */
 1753 
 1754           if (line_offsets)
 1755             fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
 1756               offsets[1] - offsets[0]);
 1757 
 1758           /* Handle --file-offsets */
 1759 
 1760           else if (file_offsets)
 1761             fprintf(stdout, "%d,%d\n",
 1762               (int)(filepos + matchptr + offsets[0] - ptr),
 1763               offsets[1] - offsets[0]);
 1764 
 1765           /* Handle --only-matching, which may occur many times */
 1766 
 1767           else
 1768             {
 1769             BOOL printed = FALSE;
 1770             omstr *om;
 1771 
 1772             for (om = only_matching; om != NULL; om = om->next)
 1773               {
 1774               int n = om->groupnum;
 1775               if (n < mrc)
 1776                 {
 1777                 int plen = offsets[2*n + 1] - offsets[2*n];
 1778                 if (plen > 0)
 1779                   {
 1780                   if (printed) fprintf(stdout, "%s", om_separator);
 1781                   if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
 1782                   FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
 1783                   if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
 1784                   printed = TRUE;
 1785                   }
 1786                 }
 1787               }
 1788 
 1789             if (printed || printname != NULL || number) fprintf(stdout, "\n");
 1790             }
 1791           }
 1792 
 1793         /* Prepare to repeat to find the next match. If the patterned contained
 1794         a lookbehind tht included \K, it is possible that the end of the match
 1795         might be at or before the actual strting offset we have just used. We
 1796         need to start one character further on. Unfortunately, for unanchored
 1797         patterns, the actual start offset can be greater that the one that was
 1798         set as a result of "bumpalong". PCRE1 does not return the actual start
 1799         offset, so we have to check against the original start offset. This may
 1800         lead to duplicates - we we need the fudge above to avoid printing them.
 1801         (PCRE2 does this better.) */
 1802 
 1803         match = FALSE;
 1804         if (line_buffered) fflush(stdout);
 1805         rc = 0;                      /* Had some success */
 1806 
 1807         startoffset = offsets[1];    /* Restart after the match */
 1808         if (startoffset <= oldstartoffset)
 1809           {
 1810           if ((size_t)startoffset >= length)
 1811             goto END_ONE_MATCH;              /* We were at the end */
 1812           startoffset = oldstartoffset + 1;
 1813           if (utf8)
 1814             while ((matchptr[startoffset] & 0xc0) == 0x80) startoffset++;
 1815           }
 1816 
 1817         /* If the current match ended past the end of the line (only possible
 1818         in multiline mode), we must move on to the line in which it did end
 1819         before searching for more matches. */
 1820 
 1821         while (startoffset > (int)linelength)
 1822           {
 1823           matchptr = ptr += linelength + endlinelength;
 1824           filepos += (int)(linelength + endlinelength);
 1825           linenumber++;
 1826           startoffset -= (int)(linelength + endlinelength);
 1827           t = end_of_line(ptr, endptr, &endlinelength);
 1828           linelength = t - ptr - endlinelength;
 1829           length = (size_t)(endptr - ptr);
 1830           }
 1831 
 1832         goto ONLY_MATCHING_RESTART;
 1833         }
 1834       }
 1835 
 1836     /* This is the default case when none of the above options is set. We print
 1837     the matching lines(s), possibly preceded and/or followed by other lines of
 1838     context. */
 1839 
 1840     else
 1841       {
 1842       /* See if there is a requirement to print some "after" lines from a
 1843       previous match. We never print any overlaps. */
 1844 
 1845       if (after_context > 0 && lastmatchnumber > 0)
 1846         {
 1847         int ellength;
 1848         int linecount = 0;
 1849         char *p = lastmatchrestart;
 1850 
 1851         while (p < ptr && linecount < after_context)
 1852           {
 1853           p = end_of_line(p, ptr, &ellength);
 1854           linecount++;
 1855           }
 1856 
 1857         /* It is important to advance lastmatchrestart during this printing so
 1858         that it interacts correctly with any "before" printing below. Print
 1859         each line's data using fwrite() in case there are binary zeroes. */
 1860 
 1861         while (lastmatchrestart < p)
 1862           {
 1863           char *pp = lastmatchrestart;
 1864           if (printname != NULL) fprintf(stdout, "%s-", printname);
 1865           if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
 1866           pp = end_of_line(pp, endptr, &ellength);
 1867           FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
 1868           lastmatchrestart = pp;
 1869           }
 1870         if (lastmatchrestart != ptr) hyphenpending = TRUE;
 1871         }
 1872 
 1873       /* If there were non-contiguous lines printed above, insert hyphens. */
 1874 
 1875       if (hyphenpending)
 1876         {
 1877         fprintf(stdout, "--\n");
 1878         hyphenpending = FALSE;
 1879         hyphenprinted = TRUE;
 1880         }
 1881 
 1882       /* See if there is a requirement to print some "before" lines for this
 1883       match. Again, don't print overlaps. */
 1884 
 1885       if (before_context > 0)
 1886         {
 1887         int linecount = 0;
 1888         char *p = ptr;
 1889 
 1890         while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
 1891                linecount < before_context)
 1892           {
 1893           linecount++;
 1894           p = previous_line(p, main_buffer);
 1895           }
 1896 
 1897         if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
 1898           fprintf(stdout, "--\n");
 1899 
 1900         while (p < ptr)
 1901           {
 1902           int ellength;
 1903           char *pp = p;
 1904           if (printname != NULL) fprintf(stdout, "%s-", printname);
 1905           if (number) fprintf(stdout, "%lu-", linenumber - linecount--);
 1906           pp = end_of_line(pp, endptr, &ellength);
 1907           FWRITE(p, 1, pp - p, stdout);
 1908           p = pp;
 1909           }
 1910         }
 1911 
 1912       /* Now print the matching line(s); ensure we set hyphenpending at the end
 1913       of the file if any context lines are being output. */
 1914 
 1915       if (after_context > 0 || before_context > 0)
 1916         endhyphenpending = TRUE;
 1917 
 1918       if (printname != NULL) fprintf(stdout, "%s:", printname);
 1919       if (number) fprintf(stdout, "%lu:", linenumber);
 1920 
 1921       /* In multiline mode, we want to print to the end of the line in which
 1922       the end of the matched string is found, so we adjust linelength and the
 1923       line number appropriately, but only when there actually was a match
 1924       (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
 1925       the match will always be before the first newline sequence. */
 1926 
 1927       if (multiline & !invert)
 1928         {
 1929         char *endmatch = ptr + offsets[1];
 1930         t = ptr;
 1931         while (t <= endmatch)
 1932           {
 1933           t = end_of_line(t, endptr, &endlinelength);
 1934           if (t < endmatch) linenumber++; else break;
 1935           }
 1936         linelength = t - ptr - endlinelength;
 1937         }
 1938 
 1939       /*** NOTE: Use only fwrite() to output the data line, so that binary
 1940       zeroes are treated as just another data character. */
 1941 
 1942       /* This extra option, for Jeffrey Friedl's debugging requirements,
 1943       replaces the matched string, or a specific captured string if it exists,
 1944       with X. When this happens, colouring is ignored. */
 1945 
 1946 #ifdef JFRIEDL_DEBUG
 1947       if (S_arg >= 0 && S_arg < mrc)
 1948         {
 1949         int first = S_arg * 2;
 1950         int last  = first + 1;
 1951         FWRITE(ptr, 1, offsets[first], stdout);
 1952         fprintf(stdout, "X");
 1953         FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
 1954         }
 1955       else
 1956 #endif
 1957 
 1958       /* We have to split the line(s) up if colouring, and search for further
 1959       matches, but not of course if the line is a non-match. */
 1960 
 1961       if (do_colour && !invert)
 1962         {
 1963         int plength;
 1964         FWRITE(ptr, 1, offsets[0], stdout);
 1965         fprintf(stdout, "%c[%sm", 0x1b, colour_string);
 1966         FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
 1967         fprintf(stdout, "%c[00m", 0x1b);
 1968         for (;;)
 1969           {
 1970           startoffset = offsets[1];
 1971           if (startoffset >= (int)linelength + endlinelength ||
 1972               !match_patterns(matchptr, length, options, startoffset, offsets,
 1973                 &mrc))
 1974             break;
 1975           FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
 1976           fprintf(stdout, "%c[%sm", 0x1b, colour_string);
 1977           FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
 1978           fprintf(stdout, "%c[00m", 0x1b);
 1979           }
 1980 
 1981         /* In multiline mode, we may have already printed the complete line
 1982         and its line-ending characters (if they matched the pattern), so there
 1983         may be no more to print. */
 1984 
 1985         plength = (int)((linelength + endlinelength) - startoffset);
 1986         if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
 1987         }
 1988 
 1989       /* Not colouring; no need to search for further matches */
 1990 
 1991       else FWRITE(ptr, 1, linelength + endlinelength, stdout);
 1992       }
 1993 
 1994     /* End of doing what has to be done for a match. If --line-buffered was
 1995     given, flush the output. */
 1996 
 1997     if (line_buffered) fflush(stdout);
 1998     rc = 0;    /* Had some success */
 1999 
 2000     /* Remember where the last match happened for after_context. We remember
 2001     where we are about to restart, and that line's number. */
 2002 
 2003     lastmatchrestart = ptr + linelength + endlinelength;
 2004     lastmatchnumber = linenumber + 1;
 2005     }
 2006 
 2007   /* For a match in multiline inverted mode (which of course did not cause
 2008   anything to be printed), we have to move on to the end of the match before
 2009   proceeding. */
 2010 
 2011   if (multiline && invert && match)
 2012     {
 2013     int ellength;
 2014     char *endmatch = ptr + offsets[1];
 2015     t = ptr;
 2016     while (t < endmatch)
 2017       {
 2018       t = end_of_line(t, endptr, &ellength);
 2019       if (t <= endmatch) linenumber++; else break;
 2020       }
 2021     endmatch = end_of_line(endmatch, endptr, &ellength);
 2022     linelength = endmatch - ptr - ellength;
 2023     }
 2024 
 2025   /* Advance to after the newline and increment the line number. The file
 2026   offset to the current line is maintained in filepos. */
 2027 
 2028   END_ONE_MATCH:
 2029   ptr += linelength + endlinelength;
 2030   filepos += (int)(linelength + endlinelength);
 2031   linenumber++;
 2032 
 2033   /* If input is line buffered, and the buffer is not yet full, read another
 2034   line and add it into the buffer. */
 2035 
 2036   if (input_line_buffered && bufflength < (size_t)bufsize)
 2037     {
 2038     int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
 2039     bufflength += add;
 2040     endptr += add;
 2041     }
 2042 
 2043   /* If we haven't yet reached the end of the file (the buffer is full), and
 2044   the current point is in the top 1/3 of the buffer, slide the buffer down by
 2045   1/3 and refill it. Before we do this, if some unprinted "after" lines are
 2046   about to be lost, print them. */
 2047 
 2048   if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
 2049     {
 2050     if (after_context > 0 &&
 2051         lastmatchnumber > 0 &&
 2052         lastmatchrestart < main_buffer + bufthird)
 2053       {
 2054       do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
 2055       lastmatchnumber = 0;
 2056       }
 2057 
 2058     /* Now do the shuffle */
 2059 
 2060     memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
 2061     ptr -= bufthird;
 2062 
 2063 #ifdef SUPPORT_LIBZ
 2064     if (frtype == FR_LIBZ)
 2065       bufflength = 2*bufthird +
 2066         gzread (ingz, main_buffer + 2*bufthird, bufthird);
 2067     else
 2068 #endif
 2069 
 2070 #ifdef SUPPORT_LIBBZ2
 2071     if (frtype == FR_LIBBZ2)
 2072       bufflength = 2*bufthird +
 2073         BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
 2074     else
 2075 #endif
 2076 
 2077     bufflength = 2*bufthird +
 2078       (input_line_buffered?
 2079        read_one_line(main_buffer + 2*bufthird, bufthird, in) :
 2080        fread(main_buffer + 2*bufthird, 1, bufthird, in));
 2081     endptr = main_buffer + bufflength;
 2082 
 2083     /* Adjust any last match point */
 2084 
 2085     if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
 2086     }
 2087   }     /* Loop through the whole file */
 2088 
 2089 /* End of file; print final "after" lines if wanted; do_after_lines sets
 2090 hyphenpending if it prints something. */
 2091 
 2092 if (!show_only_matching && !count_only)
 2093   {
 2094   do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
 2095   hyphenpending |= endhyphenpending;
 2096   }
 2097 
 2098 /* Print the file name if we are looking for those without matches and there
 2099 were none. If we found a match, we won't have got this far. */
 2100 
 2101 if (filenames == FN_NOMATCH_ONLY)
 2102   {
 2103   fprintf(stdout, "%s\n", printname);
 2104   return 0;
 2105   }
 2106 
 2107 /* Print the match count if wanted */
 2108 
 2109 if (count_only && !quiet)
 2110   {
 2111   if (count > 0 || !omit_zero_count)
 2112     {
 2113     if (printname != NULL && filenames != FN_NONE)
 2114       fprintf(stdout, "%s:", printname);
 2115     fprintf(stdout, "%lu\n", count);
 2116     }
 2117   }
 2118 
 2119 return rc;
 2120 }
 2121 
 2122 
 2123 
 2124 /*************************************************
 2125 *     Grep a file or recurse into a directory    *
 2126 *************************************************/
 2127 
 2128 /* Given a path name, if it's a directory, scan all the files if we are
 2129 recursing; if it's a file, grep it.
 2130 
 2131 Arguments:
 2132   pathname          the path to investigate
 2133   dir_recurse       TRUE if recursing is wanted (-r or -drecurse)
 2134   only_one_at_top   TRUE if the path is the only one at toplevel
 2135 
 2136 Returns:  -1 the file/directory was skipped
 2137            0 if there was at least one match
 2138            1 if there were no matches
 2139            2 there was some kind of error
 2140 
 2141 However, file opening failures are suppressed if "silent" is set.
 2142 */
 2143 
 2144 static int
 2145 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
 2146 {
 2147 int rc = 1;
 2148 int frtype;
 2149 void *handle;
 2150 char *lastcomp;
 2151 FILE *in = NULL;           /* Ensure initialized */
 2152 
 2153 #ifdef SUPPORT_LIBZ
 2154 gzFile ingz = NULL;
 2155 #endif
 2156 
 2157 #ifdef SUPPORT_LIBBZ2
 2158 BZFILE *inbz2 = NULL;
 2159 #endif
 2160 
 2161 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
 2162 int pathlen;
 2163 #endif
 2164 
 2165 #if defined NATIVE_ZOS
 2166 int zos_type;
 2167 FILE *zos_test_file;
 2168 #endif
 2169 
 2170 /* If the file name is "-" we scan stdin */
 2171 
 2172 if (strcmp(pathname, "-") == 0)
 2173   {
 2174   return pcregrep(stdin, FR_PLAIN, stdin_name,
 2175     (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
 2176       stdin_name : NULL);
 2177   }
 2178 
 2179 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
 2180 directories, whereas --include and --exclude apply to everything else. The test
 2181 is against the final component of the path. */
 2182 
 2183 lastcomp = strrchr(pathname, FILESEP);
 2184 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
 2185 
 2186 /* If the file is a directory, skip if not recursing or if explicitly excluded.
 2187 Otherwise, scan the directory and recurse for each path within it. The scanning
 2188 code is localized so it can be made system-specific. */
 2189 
 2190 
 2191 /* For z/OS, determine the file type. */
 2192 
 2193 #if defined NATIVE_ZOS
 2194 zos_test_file =  fopen(pathname,"rb");
 2195 
 2196 if (zos_test_file == NULL)
 2197    {
 2198    if (!silent) fprintf(stderr, "pcregrep: failed to test next file %s\n",
 2199      pathname, strerror(errno));
 2200    return -1;
 2201    }
 2202 zos_type = identifyzosfiletype (zos_test_file);
 2203 fclose (zos_test_file);
 2204 
 2205 /* Handle a PDS in separate code */
 2206 
 2207 if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
 2208    {
 2209    return travelonpdsdir (pathname, only_one_at_top);
 2210    }
 2211 
 2212 /* Deal with regular files in the normal way below. These types are:
 2213    zos_type == __ZOS_PDS_MEMBER
 2214    zos_type == __ZOS_PS
 2215    zos_type == __ZOS_VSAM_KSDS
 2216    zos_type == __ZOS_VSAM_ESDS
 2217    zos_type == __ZOS_VSAM_RRDS
 2218 */
 2219 
 2220 /* Handle a z/OS directory using common code. */
 2221 
 2222 else if (zos_type == __ZOS_HFS)
 2223  {
 2224 #endif  /* NATIVE_ZOS */
 2225 
 2226 
 2227 /* Handle directories: common code for all OS */
 2228 
 2229 if (isdirectory(pathname))
 2230   {
 2231   if (dee_action == dee_SKIP ||
 2232       !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
 2233     return -1;
 2234 
 2235   if (dee_action == dee_RECURSE)
 2236     {
 2237     char buffer[2048];
 2238     char *nextfile;
 2239     directory_type *dir = opendirectory(pathname);
 2240 
 2241     if (dir == NULL)
 2242       {
 2243       if (!silent)
 2244         fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
 2245           strerror(errno));
 2246       return 2;
 2247       }
 2248 
 2249     while ((nextfile = readdirectory(dir)) != NULL)
 2250       {
 2251       int frc;
 2252       int fnlength = strlen(pathname) + strlen(nextfile) + 2;
 2253       if (fnlength > 2048)
 2254         {
 2255         fprintf(stderr, "pcregrep: recursive filename is too long\n");
 2256         rc = 2;
 2257         break;
 2258         }
 2259       sprintf(buffer, "%s%c%s", pathname, FILESEP, nextfile);
 2260       frc = grep_or_recurse(buffer, dir_recurse, FALSE);
 2261       if (frc > 1) rc = frc;
 2262        else if (frc == 0 && rc == 1) rc = 0;
 2263       }
 2264 
 2265     closedirectory(dir);
 2266     return rc;
 2267     }
 2268   }
 2269 
 2270 #if defined NATIVE_ZOS
 2271  }
 2272 #endif
 2273 
 2274 /* If the file is not a directory, check for a regular file, and if it is not,
 2275 skip it if that's been requested. Otherwise, check for an explicit inclusion or
 2276 exclusion. */
 2277 
 2278 else if (
 2279 #if defined NATIVE_ZOS
 2280         (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
 2281 #else  /* all other OS */
 2282         (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
 2283 #endif
 2284         !test_incexc(lastcomp, include_patterns, exclude_patterns))
 2285   return -1;  /* File skipped */
 2286 
 2287 /* Control reaches here if we have a regular file, or if we have a directory
 2288 and recursion or skipping was not requested, or if we have anything else and
 2289 skipping was not requested. The scan proceeds. If this is the first and only
 2290 argument at top level, we don't show the file name, unless we are only showing
 2291 the file name, or the filename was forced (-H). */
 2292 
 2293 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
 2294 pathlen = (int)(strlen(pathname));
 2295 #endif
 2296 
 2297 /* Open using zlib if it is supported and the file name ends with .gz. */
 2298 
 2299 #ifdef SUPPORT_LIBZ
 2300 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
 2301   {
 2302   ingz = gzopen(pathname, "rb");
 2303   if (ingz == NULL)
 2304     {
 2305     if (!silent)
 2306       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
 2307         strerror(errno));
 2308     return 2;
 2309     }
 2310   handle = (void *)ingz;
 2311   frtype = FR_LIBZ;
 2312   }
 2313 else
 2314 #endif
 2315 
 2316 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
 2317 
 2318 #ifdef SUPPORT_LIBBZ2
 2319 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
 2320   {
 2321   inbz2 = BZ2_bzopen(pathname, "rb");
 2322   handle = (void *)inbz2;
 2323   frtype = FR_LIBBZ2;
 2324   }
 2325 else
 2326 #endif
 2327 
 2328 /* Otherwise use plain fopen(). The label is so that we can come back here if
 2329 an attempt to read a .bz2 file indicates that it really is a plain file. */
 2330 
 2331 #ifdef SUPPORT_LIBBZ2
 2332 PLAIN_FILE:
 2333 #endif
 2334   {
 2335   in = fopen(pathname, "rb");
 2336   handle = (void *)in;
 2337   frtype = FR_PLAIN;
 2338   }
 2339 
 2340 /* All the opening methods return errno when they fail. */
 2341 
 2342 if (handle == NULL)
 2343   {
 2344   if (!silent)
 2345     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
 2346       strerror(errno));
 2347   return 2;
 2348   }
 2349 
 2350 /* Now grep the file */
 2351 
 2352 rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
 2353   (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
 2354 
 2355 /* Close in an appropriate manner. */
 2356 
 2357 #ifdef SUPPORT_LIBZ
 2358 if (frtype == FR_LIBZ)
 2359   gzclose(ingz);
 2360 else
 2361 #endif
 2362 
 2363 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
 2364 read failed. If the error indicates that the file isn't in fact bzipped, try
 2365 again as a normal file. */
 2366 
 2367 #ifdef SUPPORT_LIBBZ2
 2368 if (frtype == FR_LIBBZ2)
 2369   {
 2370   if (rc == 3)
 2371     {
 2372     int errnum;
 2373     const char *err = BZ2_bzerror(inbz2, &errnum);
 2374     if (errnum == BZ_DATA_ERROR_MAGIC)
 2375       {
 2376       BZ2_bzclose(inbz2);
 2377       goto PLAIN_FILE;
 2378       }
 2379     else if (!silent)
 2380       fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
 2381         pathname, err);
 2382     rc = 2;    /* The normal "something went wrong" code */
 2383     }
 2384   BZ2_bzclose(inbz2);
 2385   }
 2386 else
 2387 #endif
 2388 
 2389 /* Normal file close */
 2390 
 2391 fclose(in);
 2392 
 2393 /* Pass back the yield from pcregrep(). */
 2394 
 2395 return rc;
 2396 }
 2397 
 2398 
 2399 
 2400 /*************************************************
 2401 *    Handle a single-letter, no data option      *
 2402 *************************************************/
 2403 
 2404 static int
 2405 handle_option(int letter, int options)
 2406 {
 2407 switch(letter)
 2408   {
 2409   case N_FOFFSETS: file_offsets = TRUE; break;
 2410   case N_HELP: help(); pcregrep_exit(0);
 2411   case N_LBUFFER: line_buffered = TRUE; break;
 2412   case N_LOFFSETS: line_offsets = number = TRUE; break;
 2413   case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
 2414   case 'a': binary_files = BIN_TEXT; break;
 2415   case 'c': count_only = TRUE; break;
 2416   case 'F': process_options |= PO_FIXED_STRINGS; break;
 2417   case 'H': filenames = FN_FORCE; break;
 2418   case 'I': binary_files = BIN_NOMATCH; break;
 2419   case 'h': filenames = FN_NONE; break;
 2420   case 'i': options |= PCRE_CASELESS; break;
 2421   case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
 2422   case 'L': filenames = FN_NOMATCH_ONLY; break;
 2423   case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
 2424   case 'n': number = TRUE; break;
 2425 
 2426   case 'o':
 2427   only_matching_last = add_number(0, only_matching_last);
 2428   if (only_matching == NULL) only_matching = only_matching_last;
 2429   break;
 2430 
 2431   case 'q': quiet = TRUE; break;
 2432   case 'r': dee_action = dee_RECURSE; break;
 2433   case 's': silent = TRUE; break;
 2434   case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
 2435   case 'v': invert = TRUE; break;
 2436   case 'w': process_options |= PO_WORD_MATCH; break;
 2437   case 'x': process_options |= PO_LINE_MATCH; break;
 2438 
 2439   case 'V':
 2440   fprintf(stdout, "pcregrep version %s\n", pcre_version());
 2441   pcregrep_exit(0);
 2442   break;
 2443 
 2444   default:
 2445   fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
 2446   pcregrep_exit(usage(2));
 2447   }
 2448 
 2449 return options;
 2450 }
 2451 
 2452 
 2453 
 2454 
 2455 /*************************************************
 2456 *          Construct printed ordinal             *
 2457 *************************************************/
 2458 
 2459 /* This turns a number into "1st", "3rd", etc. */
 2460 
 2461 static char *
 2462 ordin(int n)
 2463 {
 2464 static char buffer[14];
 2465 char *p = buffer;
 2466 sprintf(p, "%d", n);
 2467 while (*p != 0) p++;
 2468 switch (n%10)
 2469   {
 2470   case 1: strcpy(p, "st"); break;
 2471   case 2: strcpy(p, "nd"); break;
 2472   case 3: strcpy(p, "rd"); break;
 2473   default: strcpy(p, "th"); break;
 2474   }
 2475 return buffer;
 2476 }
 2477 
 2478 
 2479 
 2480 /*************************************************
 2481 *          Compile a single pattern              *
 2482 *************************************************/
 2483 
 2484 /* Do nothing if the pattern has already been compiled. This is the case for
 2485 include/exclude patterns read from a file.
 2486 
 2487 When the -F option has been used, each "pattern" may be a list of strings,
 2488 separated by line breaks. They will be matched literally. We split such a
 2489 string and compile the first substring, inserting an additional block into the
 2490 pattern chain.
 2491 
 2492 Arguments:
 2493   p              points to the pattern block
 2494   options        the PCRE options
 2495   popts          the processing options
 2496   fromfile       TRUE if the pattern was read from a file
 2497   fromtext       file name or identifying text (e.g. "include")
 2498   count          0 if this is the only command line pattern, or
 2499                  number of the command line pattern, or
 2500                  linenumber for a pattern from a file
 2501 
 2502 Returns:         TRUE on success, FALSE after an error
 2503 */
 2504 
 2505 static BOOL
 2506 compile_pattern(patstr *p, int options, int popts, int fromfile,
 2507   const char *fromtext, int count)
 2508 {
 2509 char buffer[PATBUFSIZE];
 2510 const char *error;
 2511 char *ps = p->string;
 2512 int patlen = strlen(ps);
 2513 int errptr;
 2514 
 2515 if (p->compiled != NULL) return TRUE;
 2516 
 2517 if ((popts & PO_FIXED_STRINGS) != 0)
 2518   {
 2519   int ellength;
 2520   char *eop = ps + patlen;
 2521   char *pe = end_of_line(ps, eop, &ellength);
 2522 
 2523   if (ellength != 0)
 2524     {
 2525     if (add_pattern(pe, p) == NULL) return FALSE;
 2526     patlen = (int)(pe - ps - ellength);
 2527     }
 2528   }
 2529 
 2530 if (snprintf(buffer, PATBUFSIZE, "%s%.*s%s", prefix[popts], patlen, ps,
 2531       suffix[popts]) > PATBUFSIZE)
 2532   {
 2533   fprintf(stderr, "pcregrep: Buffer overflow while compiling \"%s\"\n",
 2534     ps);
 2535   return FALSE;
 2536   }
 2537 
 2538 p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
 2539 if (p->compiled != NULL) return TRUE;
 2540 
 2541 /* Handle compile errors */
 2542 
 2543 errptr -= (int)strlen(prefix[popts]);
 2544 if (errptr > patlen) errptr = patlen;
 2545 
 2546 if (fromfile)
 2547   {
 2548   fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
 2549     "at offset %d: %s\n", count, fromtext, errptr, error);
 2550   }
 2551 else
 2552   {
 2553   if (count == 0)
 2554     fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
 2555       fromtext, errptr, error);
 2556   else
 2557     fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
 2558       ordin(count), fromtext, errptr, error);
 2559   }
 2560 
 2561 return FALSE;
 2562 }
 2563 
 2564 
 2565 
 2566 /*************************************************
 2567 *     Read and compile a file of patterns        *
 2568 *************************************************/
 2569 
 2570 /* This is used for --filelist, --include-from, and --exclude-from.
 2571 
 2572 Arguments:
 2573   name         the name of the file; "-" is stdin
 2574   patptr       pointer to the pattern chain anchor
 2575   patlastptr   pointer to the last pattern pointer
 2576   popts        the process options to pass to pattern_compile()
 2577 
 2578 Returns:       TRUE if all went well
 2579 */
 2580 
 2581 static BOOL
 2582 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
 2583 {
 2584 int linenumber = 0;
 2585 FILE *f;
 2586 char *filename;
 2587 char buffer[PATBUFSIZE];
 2588 
 2589 if (strcmp(name, "-") == 0)
 2590   {
 2591   f = stdin;
 2592   filename = stdin_name;
 2593   }
 2594 else
 2595   {
 2596   f = fopen(name, "r");
 2597   if (f == NULL)
 2598     {
 2599     fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
 2600     return FALSE;
 2601     }
 2602   filename = name;
 2603   }
 2604 
 2605 while (fgets(buffer, PATBUFSIZE, f) != NULL)
 2606   {
 2607   char *s = buffer + (int)strlen(buffer);
 2608   while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
 2609   *s = 0;
 2610   linenumber++;
 2611   if (buffer[0] == 0) continue;   /* Skip blank lines */
 2612 
 2613   /* Note: this call to add_pattern() puts a pointer to the local variable
 2614   "buffer" into the pattern chain. However, that pointer is used only when
 2615   compiling the pattern, which happens immediately below, so we flatten it
 2616   afterwards, as a precaution against any later code trying to use it. */
 2617 
 2618   *patlastptr = add_pattern(buffer, *patlastptr);
 2619   if (*patlastptr == NULL)
 2620     {
 2621     if (f != stdin) fclose(f);
 2622     return FALSE;
 2623     }
 2624   if (*patptr == NULL) *patptr = *patlastptr;
 2625 
 2626   /* This loop is needed because compiling a "pattern" when -F is set may add
 2627   on additional literal patterns if the original contains a newline. In the
 2628   common case, it never will, because fgets() stops at a newline. However,
 2629   the -N option can be used to give pcregrep a different newline setting. */
 2630 
 2631   for(;;)
 2632     {
 2633     if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
 2634         linenumber))
 2635       {
 2636       if (f != stdin) fclose(f);
 2637       return FALSE;
 2638       }
 2639     (*patlastptr)->string = NULL;            /* Insurance */
 2640     if ((*patlastptr)->next == NULL) break;
 2641     *patlastptr = (*patlastptr)->next;
 2642     }
 2643   }
 2644 
 2645 if (f != stdin) fclose(f);
 2646 return TRUE;
 2647 }
 2648 
 2649 
 2650 
 2651 /*************************************************
 2652 *                Main program                    *
 2653 *************************************************/
 2654 
 2655 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
 2656 
 2657 int
 2658 main(int argc, char **argv)
 2659 {
 2660 int i, j;
 2661 int rc = 1;
 2662 BOOL only_one_at_top;
 2663 patstr *cp;
 2664 fnstr *fn;
 2665 const char *locale_from = "--locale";
 2666 const char *error;
 2667 
 2668 #ifdef SUPPORT_PCREGREP_JIT
 2669 pcre_jit_stack *jit_stack = NULL;
 2670 #endif
 2671 
 2672 /* Set the default line ending value from the default in the PCRE library;
 2673 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
 2674 Note that the return values from pcre_config(), though derived from the ASCII
 2675 codes, are the same in EBCDIC environments, so we must use the actual values
 2676 rather than escapes such as as '\r'. */
 2677 
 2678 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
 2679 switch(i)
 2680   {
 2681   default:               newline = (char *)"lf"; break;
 2682   case 13:               newline = (char *)"cr"; break;
 2683   case (13 << 8) | 10:   newline = (char *)"crlf"; break;
 2684   case -1:               newline = (char *)"any"; break;
 2685   case -2:               newline = (char *)"anycrlf"; break;
 2686   }
 2687 
 2688 /* Process the options */
 2689 
 2690 for (i = 1; i < argc; i++)
 2691   {
 2692   option_item *op = NULL;
 2693   char *option_data = (char *)"";    /* default to keep compiler happy */
 2694   BOOL longop;
 2695   BOOL longopwasequals = FALSE;
 2696 
 2697   if (argv[i][0] != '-') break;
 2698 
 2699   /* If we hit an argument that is just "-", it may be a reference to STDIN,
 2700   but only if we have previously had -e or -f to define the patterns. */
 2701 
 2702   if (argv[i][1] == 0)
 2703     {
 2704     if (pattern_files != NULL || patterns != NULL) break;
 2705       else pcregrep_exit(usage(2));
 2706     }
 2707 
 2708   /* Handle a long name option, or -- to terminate the options */
 2709 
 2710   if (argv[i][1] == '-')
 2711     {
 2712     char *arg = argv[i] + 2;
 2713     char *argequals = strchr(arg, '=');
 2714 
 2715     if (*arg == 0)    /* -- terminates options */
 2716       {
 2717       i++;
 2718       break;                /* out of the options-handling loop */
 2719       }
 2720 
 2721     longop = TRUE;
 2722 
 2723     /* Some long options have data that follows after =, for example file=name.
 2724     Some options have variations in the long name spelling: specifically, we
 2725     allow "regexp" because GNU grep allows it, though I personally go along
 2726     with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
 2727     These options are entered in the table as "regex(p)". Options can be in
 2728     both these categories. */
 2729 
 2730     for (op = optionlist; op->one_char != 0; op++)
 2731       {
 2732       char *opbra = strchr(op->long_name, '(');
 2733       char *equals = strchr(op->long_name, '=');
 2734 
 2735       /* Handle options with only one spelling of the name */
 2736 
 2737       if (opbra == NULL)     /* Does not contain '(' */
 2738         {
 2739         if (equals == NULL)  /* Not thing=data case */
 2740           {
 2741           if (strcmp(arg, op->long_name) == 0) break;
 2742           }
 2743         else                 /* Special case xxx=data */
 2744           {
 2745           int oplen = (int)(equals - op->long_name);
 2746           int arglen = (argequals == NULL)?
 2747             (int)strlen(arg) : (int)(argequals - arg);
 2748           if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
 2749             {
 2750             option_data = arg + arglen;
 2751             if (*option_data == '=')
 2752               {
 2753               option_data++;
 2754               longopwasequals = TRUE;
 2755               }
 2756             break;
 2757             }
 2758           }
 2759         }
 2760 
 2761       /* Handle options with an alternate spelling of the name */
 2762 
 2763       else
 2764         {
 2765         char buff1[24];
 2766         char buff2[24];
 2767 
 2768         int baselen = (int)(opbra - op->long_name);
 2769         int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
 2770         int arglen = (argequals == NULL || equals == NULL)?
 2771           (int)strlen(arg) : (int)(argequals - arg);
 2772 
 2773         if (snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name) >
 2774               (int)sizeof(buff1) ||
 2775             snprintf(buff2, sizeof(buff2), "%s%.*s", buff1,
 2776               fulllen - baselen - 2, opbra + 1) > (int)sizeof(buff2))
 2777           {
 2778           fprintf(stderr, "pcregrep: Buffer overflow when parsing %s option\n",
 2779             op->long_name);
 2780           pcregrep_exit(2);
 2781           }
 2782 
 2783         if (strncmp(arg, buff1, arglen) == 0 ||
 2784            strncmp(arg, buff2, arglen) == 0)
 2785           {
 2786           if (equals != NULL && argequals != NULL)
 2787             {
 2788             option_data = argequals;
 2789             if (*option_data == '=')
 2790               {
 2791               option_data++;
 2792               longopwasequals = TRUE;
 2793               }
 2794             }
 2795           break;
 2796           }
 2797         }
 2798       }
 2799 
 2800     if (op->one_char == 0)
 2801       {
 2802       fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
 2803       pcregrep_exit(usage(2));
 2804       }
 2805     }
 2806 
 2807   /* Jeffrey Friedl's debugging harness uses these additional options which
 2808   are not in the right form for putting in the option table because they use
 2809   only one hyphen, yet are more than one character long. By putting them
 2810   separately here, they will not get displayed as part of the help() output,
 2811   but I don't think Jeffrey will care about that. */
 2812 
 2813 #ifdef JFRIEDL_DEBUG
 2814   else if (strcmp(argv[i], "-pre") == 0) {
 2815           jfriedl_prefix = argv[++i];
 2816           continue;
 2817   } else if (strcmp(argv[i], "-post") == 0) {
 2818           jfriedl_postfix = argv[++i];
 2819           continue;
 2820   } else if (strcmp(argv[i], "-XT") == 0) {
 2821           sscanf(argv[++i], "%d", &jfriedl_XT);
 2822           continue;
 2823   } else if (strcmp(argv[i], "-XR") == 0) {
 2824           sscanf(argv[++i], "%d", &jfriedl_XR);
 2825           continue;
 2826   }
 2827 #endif
 2828 
 2829 
 2830   /* One-char options; many that have no data may be in a single argument; we
 2831   continue till we hit the last one or one that needs data. */
 2832 
 2833   else
 2834     {
 2835     char *s = argv[i] + 1;
 2836     longop = FALSE;
 2837 
 2838     while (*s != 0)
 2839       {
 2840       for (op = optionlist; op->one_char != 0; op++)
 2841         {
 2842         if (*s == op->one_char) break;
 2843         }
 2844       if (op->one_char == 0)
 2845         {
 2846         fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
 2847           *s, argv[i]);
 2848         pcregrep_exit(usage(2));
 2849         }
 2850 
 2851       option_data = s+1;
 2852 
 2853       /* Break out if this is the last character in the string; it's handled
 2854       below like a single multi-char option. */
 2855 
 2856       if (*option_data == 0) break;
 2857 
 2858       /* Check for a single-character option that has data: OP_OP_NUMBER(S)
 2859       are used for ones that either have a numerical number or defaults, i.e.
 2860       the data is optional. If a digit follows, there is data; if not, carry on
 2861       with other single-character options in the same string. */
 2862 
 2863       if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
 2864         {
 2865         if (isdigit((unsigned char)s[1])) break;
 2866         }
 2867       else   /* Check for an option with data */
 2868         {
 2869         if (op->type != OP_NODATA) break;
 2870         }
 2871 
 2872       /* Handle a single-character option with no data, then loop for the
 2873       next character in the string. */
 2874 
 2875       pcre_options = handle_option(*s++, pcre_options);
 2876       }
 2877     }
 2878 
 2879   /* At this point we should have op pointing to a matched option. If the type
 2880   is NO_DATA, it means that there is no data, and the option might set
 2881   something in the PCRE options. */
 2882 
 2883   if (op->type == OP_NODATA)
 2884     {
 2885     pcre_options = handle_option(op->one_char, pcre_options);
 2886     continue;
 2887     }
 2888 
 2889   /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
 2890   either has a value or defaults to something. It cannot have data in a
 2891   separate item. At the moment, the only such options are "colo(u)r",
 2892   "only-matching", and Jeffrey Friedl's special -S debugging option. */
 2893 
 2894   if (*option_data == 0 &&
 2895       (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
 2896        op->type == OP_OP_NUMBERS))
 2897     {
 2898     switch (op->one_char)
 2899       {
 2900       case N_COLOUR:
 2901       colour_option = (char *)"auto";
 2902       break;
 2903 
 2904       case 'o':
 2905       only_matching_last = add_number(0, only_matching_last);
 2906       if (only_matching == NULL) only_matching = only_matching_last;
 2907       break;
 2908 
 2909 #ifdef JFRIEDL_DEBUG
 2910       case 'S':
 2911       S_arg = 0;
 2912       break;
 2913 #endif
 2914       }
 2915     continue;
 2916     }
 2917 
 2918   /* Otherwise, find the data string for the option. */
 2919 
 2920   if (*option_data == 0)
 2921     {
 2922     if (i >= argc - 1 || longopwasequals)
 2923       {
 2924       fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
 2925       pcregrep_exit(usage(2));
 2926       }
 2927     option_data = argv[++i];
 2928     }
 2929 
 2930   /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
 2931   added to a chain of numbers. */
 2932 
 2933   if (op->type == OP_OP_NUMBERS)
 2934     {
 2935     unsigned long int n = decode_number(option_data, op, longop);
 2936     omdatastr *omd = (omdatastr *)op->dataptr;
 2937     *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
 2938     if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
 2939     }
 2940 
 2941   /* If the option type is OP_PATLIST, it's the -e option, or one of the
 2942   include/exclude options, which can be called multiple times to create lists
 2943   of patterns. */
 2944 
 2945   else if (op->type == OP_PATLIST)
 2946     {
 2947     patdatastr *pd = (patdatastr *)op->dataptr;
 2948     *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
 2949     if (*(pd->lastptr) == NULL) goto EXIT2;
 2950     if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
 2951     }
 2952 
 2953   /* If the option type is OP_FILELIST, it's one of the options that names a
 2954   file. */
 2955 
 2956   else if (op->type == OP_FILELIST)
 2957     {
 2958     fndatastr *fd = (fndatastr *)op->dataptr;
 2959     fn = (fnstr *)malloc(sizeof(fnstr));
 2960     if (fn == NULL)
 2961       {
 2962       fprintf(stderr, "pcregrep: malloc failed\n");
 2963       goto EXIT2;
 2964       }
 2965     fn->next = NULL;
 2966     fn->name = option_data;
 2967     if (*(fd->anchor) == NULL)
 2968       *(fd->anchor) = fn;
 2969     else
 2970       (*(fd->lastptr))->next = fn;
 2971     *(fd->lastptr) = fn;
 2972     }
 2973 
 2974   /* Handle OP_BINARY_FILES */
 2975 
 2976   else if (op->type == OP_BINFILES)
 2977     {
 2978     if (strcmp(option_data, "binary") == 0)
 2979       binary_files = BIN_BINARY;
 2980     else if (strcmp(option_data, "without-match") == 0)
 2981       binary_files = BIN_NOMATCH;
 2982     else if (strcmp(option_data, "text") == 0)
 2983       binary_files = BIN_TEXT;
 2984     else
 2985       {
 2986       fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
 2987         option_data);
 2988       pcregrep_exit(usage(2));
 2989       }
 2990     }
 2991 
 2992   /* Otherwise, deal with a single string or numeric data value. */
 2993 
 2994   else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
 2995            op->type != OP_OP_NUMBER)
 2996     {
 2997     *((char **)op->dataptr) = option_data;
 2998     }
 2999   else
 3000     {
 3001     unsigned long int n = decode_number(option_data, op, longop);
 3002     if (op->type == OP_LONGNUMBER) *((unsigned long int *)op->dataptr) = n;
 3003       else *((int *)op->dataptr) = n;
 3004     }
 3005   }
 3006 
 3007 /* Options have been decoded. If -C was used, its value is used as a default
 3008 for -A and -B. */
 3009 
 3010 if (both_context > 0)
 3011   {
 3012   if (after_context == 0) after_context = both_context;
 3013   if (before_context == 0) before_context = both_context;
 3014   }
 3015 
 3016 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
 3017 However, all three set show_only_matching because they display, each in their
 3018 own way, only the data that has matched. */
 3019 
 3020 if ((only_matching != NULL && (file_offsets || line_offsets)) ||
 3021     (file_offsets && line_offsets))
 3022   {
 3023   fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
 3024     "and/or --line-offsets\n");
 3025   pcregrep_exit(usage(2));
 3026   }
 3027 
 3028 if (only_matching != NULL || file_offsets || line_offsets)
 3029   show_only_matching = TRUE;
 3030 
 3031 /* If a locale has not been provided as an option, see if the LC_CTYPE or
 3032 LC_ALL environment variable is set, and if so, use it. */
 3033 
 3034 if (locale == NULL)
 3035   {
 3036   locale = getenv("LC_ALL");
 3037   locale_from = "LC_ALL";
 3038   }
 3039 
 3040 if (locale == NULL)
 3041   {
 3042   locale = getenv("LC_CTYPE");
 3043   locale_from = "LC_CTYPE";
 3044   }
 3045 
 3046 /* If a locale is set, use it to generate the tables the PCRE needs. Otherwise,
 3047 pcretables==NULL, which causes the use of default tables. */
 3048 
 3049 if (locale != NULL)
 3050   {
 3051   if (setlocale(LC_CTYPE, locale) == NULL)
 3052     {
 3053     fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
 3054       locale, locale_from);
 3055     goto EXIT2;
 3056     }
 3057   pcretables = pcre_maketables();
 3058   }
 3059 
 3060 /* Sort out colouring */
 3061 
 3062 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
 3063   {
 3064   if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
 3065   else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
 3066   else
 3067     {
 3068     fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
 3069       colour_option);
 3070     goto EXIT2;
 3071     }
 3072   if (do_colour)
 3073     {
 3074     char *cs = getenv("PCREGREP_COLOUR");
 3075     if (cs == NULL) cs = getenv("PCREGREP_COLOR");
 3076     if (cs != NULL) colour_string = cs;
 3077     }
 3078   }
 3079 
 3080 /* Interpret the newline type; the default settings are Unix-like. */
 3081 
 3082 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
 3083   {
 3084   pcre_options |= PCRE_NEWLINE_CR;
 3085   endlinetype = EL_CR;
 3086   }
 3087 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
 3088   {
 3089   pcre_options |= PCRE_NEWLINE_LF;
 3090   endlinetype = EL_LF;
 3091   }
 3092 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
 3093   {
 3094   pcre_options |= PCRE_NEWLINE_CRLF;
 3095   endlinetype = EL_CRLF;
 3096   }
 3097 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
 3098   {
 3099   pcre_options |= PCRE_NEWLINE_ANY;
 3100   endlinetype = EL_ANY;
 3101   }
 3102 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
 3103   {
 3104   pcre_options |= PCRE_NEWLINE_ANYCRLF;
 3105   endlinetype = EL_ANYCRLF;
 3106   }
 3107 else
 3108   {
 3109   fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
 3110   goto EXIT2;
 3111   }
 3112 
 3113 /* Interpret the text values for -d and -D */
 3114 
 3115 if (dee_option != NULL)
 3116   {
 3117   if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
 3118   else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
 3119   else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
 3120   else
 3121     {
 3122     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
 3123     goto EXIT2;
 3124     }
 3125   }
 3126 
 3127 if (DEE_option != NULL)
 3128   {
 3129   if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
 3130   else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
 3131   else
 3132     {
 3133     fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
 3134     goto EXIT2;
 3135     }
 3136   }
 3137 
 3138 /* Check the values for Jeffrey Friedl's debugging options. */
 3139 
 3140 #ifdef JFRIEDL_DEBUG
 3141 if (S_arg > 9)
 3142   {
 3143   fprintf(stderr, "pcregrep: bad value for -S option\n");
 3144   return 2;
 3145   }
 3146 if (jfriedl_XT != 0 || jfriedl_XR != 0)
 3147   {
 3148   if (jfriedl_XT == 0) jfriedl_XT = 1;
 3149   if (jfriedl_XR == 0) jfriedl_XR = 1;
 3150   }
 3151 #endif
 3152 
 3153 /* Get memory for the main buffer. */
 3154 
 3155 bufsize = 3*bufthird;
 3156 main_buffer = (char *)malloc(bufsize);
 3157 
 3158 if (main_buffer == NULL)
 3159   {
 3160   fprintf(stderr, "pcregrep: malloc failed\n");
 3161   goto EXIT2;
 3162   }
 3163 
 3164 /* If no patterns were provided by -e, and there are no files provided by -f,
 3165 the first argument is the one and only pattern, and it must exist. */
 3166 
 3167 if (patterns == NULL && pattern_files == NULL)
 3168   {
 3169   if (i >= argc) return usage(2);
 3170   patterns = patterns_last = add_pattern(argv[i++], NULL);
 3171   if (patterns == NULL) goto EXIT2;
 3172   }
 3173 
 3174 /* Compile the patterns that were provided on the command line, either by
 3175 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
 3176 after all the command-line options are read so that we know which PCRE options
 3177 to use. When -F is used, compile_pattern() may add another block into the
 3178 chain, so we must not access the next pointer till after the compile. */
 3179 
 3180 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
 3181   {
 3182   if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
 3183        (j == 1 && patterns->next == NULL)? 0 : j))
 3184     goto EXIT2;
 3185   }
 3186 
 3187 /* Read and compile the regular expressions that are provided in files. */
 3188 
 3189 for (fn = pattern_files; fn != NULL; fn = fn->next)
 3190   {
 3191   if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
 3192     goto EXIT2;
 3193   }
 3194 
 3195 /* Study the regular expressions, as we will be running them many times. If an
 3196 extra block is needed for a limit, set PCRE_STUDY_EXTRA_NEEDED so that one is
 3197 returned, even if studying produces no data. */
 3198 
 3199 if (match_limit > 0 || match_limit_recursion > 0)
 3200   study_options |= PCRE_STUDY_EXTRA_NEEDED;
 3201 
 3202 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
 3203 
 3204 #ifdef SUPPORT_PCREGREP_JIT
 3205 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
 3206   jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
 3207 #endif
 3208 
 3209 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
 3210   {
 3211   cp->hint = pcre_study(cp->compiled, study_options, &error);
 3212   if (error != NULL)
 3213     {
 3214     if (patterns->next == NULL)
 3215       fprintf(stderr, "pcregrep: Error while studying regex: %s\n", error);
 3216     else
 3217       fprintf(stderr, "pcregrep: Error while studying regex number %d: %s\n",
 3218         j, error);
 3219     goto EXIT2;
 3220     }
 3221 #ifdef SUPPORT_PCREGREP_JIT
 3222   if (jit_stack != NULL && cp->hint != NULL)
 3223     pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
 3224 #endif
 3225   }
 3226 
 3227 /* If --match-limit or --recursion-limit was set, put the value(s) into the
 3228 pcre_extra block for each pattern. There will always be an extra block because
 3229 of the use of PCRE_STUDY_EXTRA_NEEDED above. */
 3230 
 3231 for (cp = patterns; cp != NULL; cp = cp->next)
 3232   {
 3233   if (match_limit > 0)
 3234     {
 3235     cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
 3236     cp->hint->match_limit = match_limit;
 3237     }
 3238 
 3239   if (match_limit_recursion > 0)
 3240     {
 3241     cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
 3242     cp->hint->match_limit_recursion = match_limit_recursion;
 3243     }
 3244   }
 3245 
 3246 /* If there are include or exclude patterns read from the command line, compile
 3247 them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
 3248 0. */
 3249 
 3250 for (j = 0; j < 4; j++)
 3251   {
 3252   int k;
 3253   for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
 3254     {
 3255     if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
 3256          (k == 1 && cp->next == NULL)? 0 : k))
 3257       goto EXIT2;
 3258     }
 3259   }
 3260 
 3261 /* Read and compile include/exclude patterns from files. */
 3262 
 3263 for (fn = include_from; fn != NULL; fn = fn->next)
 3264   {
 3265   if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
 3266     goto EXIT2;
 3267   }
 3268 
 3269 for (fn = exclude_from; fn != NULL; fn = fn->next)
 3270   {
 3271   if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
 3272     goto EXIT2;
 3273   }
 3274 
 3275 /* If there are no files that contain lists of files to search, and there are
 3276 no file arguments, search stdin, and then exit. */
 3277 
 3278 if (file_lists == NULL && i >= argc)
 3279   {
 3280   rc = pcregrep(stdin, FR_PLAIN, stdin_name,
 3281     (filenames > FN_DEFAULT)? stdin_name : NULL);
 3282   goto EXIT;
 3283   }
 3284 
 3285 /* If any files that contains a list of files to search have been specified,
 3286 read them line by line and search the given files. */
 3287 
 3288 for (fn = file_lists; fn != NULL; fn = fn->next)
 3289   {
 3290   char buffer[PATBUFSIZE];
 3291   FILE *fl;
 3292   if (strcmp(fn->name, "-") == 0) fl = stdin; else
 3293     {
 3294     fl = fopen(fn->name, "rb");
 3295     if (fl == NULL)
 3296       {
 3297       fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
 3298         strerror(errno));
 3299       goto EXIT2;
 3300       }
 3301     }
 3302   while (fgets(buffer, PATBUFSIZE, fl) != NULL)
 3303     {
 3304     int frc;
 3305     char *end = buffer + (int)strlen(buffer);
 3306     while (end > buffer && isspace(end[-1])) end--;
 3307     *end = 0;
 3308     if (*buffer != 0)
 3309       {
 3310       frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
 3311       if (frc > 1) rc = frc;
 3312         else if (frc == 0 && rc == 1) rc = 0;
 3313       }
 3314     }
 3315   if (fl != stdin) fclose(fl);
 3316   }
 3317 
 3318 /* After handling file-list, work through remaining arguments. Pass in the fact
 3319 that there is only one argument at top level - this suppresses the file name if
 3320 the argument is not a directory and filenames are not otherwise forced. */
 3321 
 3322 only_one_at_top = i == argc - 1 && file_lists == NULL;
 3323 
 3324 for (; i < argc; i++)
 3325   {
 3326   int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
 3327     only_one_at_top);
 3328   if (frc > 1) rc = frc;
 3329     else if (frc == 0 && rc == 1) rc = 0;
 3330   }
 3331 
 3332 EXIT:
 3333 #ifdef SUPPORT_PCREGREP_JIT
 3334 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
 3335 #endif
 3336 
 3337 free(main_buffer);
 3338 free((void *)pcretables);
 3339 
 3340 free_pattern_chain(patterns);
 3341 free_pattern_chain(include_patterns);
 3342 free_pattern_chain(include_dir_patterns);
 3343 free_pattern_chain(exclude_patterns);
 3344 free_pattern_chain(exclude_dir_patterns);
 3345 
 3346 free_file_chain(exclude_from);
 3347 free_file_chain(include_from);
 3348 free_file_chain(pattern_files);
 3349 free_file_chain(file_lists);
 3350 
 3351 while (only_matching != NULL)
 3352   {
 3353   omstr *this = only_matching;
 3354   only_matching = this->next;
 3355   free(this);
 3356   }
 3357 
 3358 pcregrep_exit(rc);
 3359 
 3360 EXIT2:
 3361 rc = 2;
 3362 goto EXIT;
 3363 }
 3364 
 3365 /* End of pcregrep */