"Fossies" - the Fresh Open Source Software Archive 
Member "pcre-8.45/pcregrep.c" (26 Jun 2018, 98200 Bytes) of package /linux/misc/pcre-8.45.tar.bz2:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "pcregrep.c" see the
Fossies "Dox" file reference documentation and the last
Fossies "Diffs" side-by-side code changes report:
8.42_vs_8.43.
1 /*************************************************
2 * pcregrep program *
3 *************************************************/
4
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On Unix-like, Windows, and native z/OS systems it can
7 recurse into directories, and in z/OS it can handle PDS files.
8
9 Note that for native z/OS, in addition to defining the NATIVE_ZOS macro, an
10 additional header is required. That header is not included in the main PCRE
11 distribution because other apparatus is needed to compile pcregrep for z/OS.
12 The header can be found in the special z/OS distribution, which is available
13 from www.zaconsultants.net or from www.cbttape.org.
14
15 Copyright (c) 1997-2014 University of Cambridge
16
17 -----------------------------------------------------------------------------
18 Redistribution and use in source and binary forms, with or without
19 modification, are permitted provided that the following conditions are met:
20
21 * Redistributions of source code must retain the above copyright notice,
22 this list of conditions and the following disclaimer.
23
24 * Redistributions in binary form must reproduce the above copyright
25 notice, this list of conditions and the following disclaimer in the
26 documentation and/or other materials provided with the distribution.
27
28 * Neither the name of the University of Cambridge nor the names of its
29 contributors may be used to endorse or promote products derived from
30 this software without specific prior written permission.
31
32 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
33 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
34 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
36 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
37 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
38 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
39 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
40 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
41 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
42 POSSIBILITY OF SUCH DAMAGE.
43 -----------------------------------------------------------------------------
44 */
45
46 #ifdef HAVE_CONFIG_H
47 #include "config.h"
48 #endif
49
50 #include <ctype.h>
51 #include <locale.h>
52 #include <stdio.h>
53 #include <string.h>
54 #include <stdlib.h>
55 #include <errno.h>
56
57 #include <sys/types.h>
58 #include <sys/stat.h>
59
60 #ifdef HAVE_UNISTD_H
61 #include <unistd.h>
62 #endif
63
64 #ifdef SUPPORT_LIBZ
65 #include <zlib.h>
66 #endif
67
68 #ifdef SUPPORT_LIBBZ2
69 #include <bzlib.h>
70 #endif
71
72 #include "pcre.h"
73
74 #define FALSE 0
75 #define TRUE 1
76
77 typedef int BOOL;
78
79 #define OFFSET_SIZE 99
80
81 #if BUFSIZ > 8192
82 #define MAXPATLEN BUFSIZ
83 #else
84 #define MAXPATLEN 8192
85 #endif
86
87 #define PATBUFSIZE (MAXPATLEN + 10) /* Allows for prefix+suffix */
88
89 /* Values for the "filenames" variable, which specifies options for file name
90 output. The order is important; it is assumed that a file name is wanted for
91 all values greater than FN_DEFAULT. */
92
93 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
94
95 /* File reading styles */
96
97 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
98
99 /* Actions for the -d and -D options */
100
101 enum { dee_READ, dee_SKIP, dee_RECURSE };
102 enum { DEE_READ, DEE_SKIP };
103
104 /* Actions for special processing options (flag bits) */
105
106 #define PO_WORD_MATCH 0x0001
107 #define PO_LINE_MATCH 0x0002
108 #define PO_FIXED_STRINGS 0x0004
109
110 /* Line ending types */
111
112 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
113
114 /* Binary file options */
115
116 enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
117
118 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
119 environments), a warning is issued if the value of fwrite() is ignored.
120 Unfortunately, casting to (void) does not suppress the warning. To get round
121 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
122 apply to fprintf(). */
123
124 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
125
126
127
128 /*************************************************
129 * Global variables *
130 *************************************************/
131
132 /* Jeffrey Friedl has some debugging requirements that are not part of the
133 regular code. */
134
135 #ifdef JFRIEDL_DEBUG
136 static int S_arg = -1;
137 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
138 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
139 static const char *jfriedl_prefix = "";
140 static const char *jfriedl_postfix = "";
141 #endif
142
143 static int endlinetype;
144
145 static char *colour_string = (char *)"1;31";
146 static char *colour_option = NULL;
147 static char *dee_option = NULL;
148 static char *DEE_option = NULL;
149 static char *locale = NULL;
150 static char *main_buffer = NULL;
151 static char *newline = NULL;
152 static char *om_separator = (char *)"";
153 static char *stdin_name = (char *)"(standard input)";
154
155 static const unsigned char *pcretables = NULL;
156
157 static int after_context = 0;
158 static int before_context = 0;
159 static int binary_files = BIN_BINARY;
160 static int both_context = 0;
161 static int bufthird = PCREGREP_BUFSIZE;
162 static int bufsize = 3*PCREGREP_BUFSIZE;
163
164 #if defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
165 static int dee_action = dee_SKIP;
166 #else
167 static int dee_action = dee_READ;
168 #endif
169
170 static int DEE_action = DEE_READ;
171 static int error_count = 0;
172 static int filenames = FN_DEFAULT;
173 static int pcre_options = 0;
174 static int process_options = 0;
175
176 #ifdef SUPPORT_PCREGREP_JIT
177 static int study_options = PCRE_STUDY_JIT_COMPILE;
178 #else
179 static int study_options = 0;
180 #endif
181
182 static unsigned long int match_limit = 0;
183 static unsigned long int match_limit_recursion = 0;
184
185 static BOOL count_only = FALSE;
186 static BOOL do_colour = FALSE;
187 static BOOL file_offsets = FALSE;
188 static BOOL hyphenpending = FALSE;
189 static BOOL invert = FALSE;
190 static BOOL line_buffered = FALSE;
191 static BOOL line_offsets = FALSE;
192 static BOOL multiline = FALSE;
193 static BOOL number = FALSE;
194 static BOOL omit_zero_count = FALSE;
195 static BOOL resource_error = FALSE;
196 static BOOL quiet = FALSE;
197 static BOOL show_only_matching = FALSE;
198 static BOOL silent = FALSE;
199 static BOOL utf8 = FALSE;
200
201 /* Structure for list of --only-matching capturing numbers. */
202
203 typedef struct omstr {
204 struct omstr *next;
205 int groupnum;
206 } omstr;
207
208 static omstr *only_matching = NULL;
209 static omstr *only_matching_last = NULL;
210
211 /* Structure for holding the two variables that describe a number chain. */
212
213 typedef struct omdatastr {
214 omstr **anchor;
215 omstr **lastptr;
216 } omdatastr;
217
218 static omdatastr only_matching_data = { &only_matching, &only_matching_last };
219
220 /* Structure for list of file names (for -f and --{in,ex}clude-from) */
221
222 typedef struct fnstr {
223 struct fnstr *next;
224 char *name;
225 } fnstr;
226
227 static fnstr *exclude_from = NULL;
228 static fnstr *exclude_from_last = NULL;
229 static fnstr *include_from = NULL;
230 static fnstr *include_from_last = NULL;
231
232 static fnstr *file_lists = NULL;
233 static fnstr *file_lists_last = NULL;
234 static fnstr *pattern_files = NULL;
235 static fnstr *pattern_files_last = NULL;
236
237 /* Structure for holding the two variables that describe a file name chain. */
238
239 typedef struct fndatastr {
240 fnstr **anchor;
241 fnstr **lastptr;
242 } fndatastr;
243
244 static fndatastr exclude_from_data = { &exclude_from, &exclude_from_last };
245 static fndatastr include_from_data = { &include_from, &include_from_last };
246 static fndatastr file_lists_data = { &file_lists, &file_lists_last };
247 static fndatastr pattern_files_data = { &pattern_files, &pattern_files_last };
248
249 /* Structure for pattern and its compiled form; used for matching patterns and
250 also for include/exclude patterns. */
251
252 typedef struct patstr {
253 struct patstr *next;
254 char *string;
255 pcre *compiled;
256 pcre_extra *hint;
257 } patstr;
258
259 static patstr *patterns = NULL;
260 static patstr *patterns_last = NULL;
261 static patstr *include_patterns = NULL;
262 static patstr *include_patterns_last = NULL;
263 static patstr *exclude_patterns = NULL;
264 static patstr *exclude_patterns_last = NULL;
265 static patstr *include_dir_patterns = NULL;
266 static patstr *include_dir_patterns_last = NULL;
267 static patstr *exclude_dir_patterns = NULL;
268 static patstr *exclude_dir_patterns_last = NULL;
269
270 /* Structure holding the two variables that describe a pattern chain. A pointer
271 to such structures is used for each appropriate option. */
272
273 typedef struct patdatastr {
274 patstr **anchor;
275 patstr **lastptr;
276 } patdatastr;
277
278 static patdatastr match_patdata = { &patterns, &patterns_last };
279 static patdatastr include_patdata = { &include_patterns, &include_patterns_last };
280 static patdatastr exclude_patdata = { &exclude_patterns, &exclude_patterns_last };
281 static patdatastr include_dir_patdata = { &include_dir_patterns, &include_dir_patterns_last };
282 static patdatastr exclude_dir_patdata = { &exclude_dir_patterns, &exclude_dir_patterns_last };
283
284 static patstr **incexlist[4] = { &include_patterns, &exclude_patterns,
285 &include_dir_patterns, &exclude_dir_patterns };
286
287 static const char *incexname[4] = { "--include", "--exclude",
288 "--include-dir", "--exclude-dir" };
289
290 /* Structure for options and list of them */
291
292 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_LONGNUMBER,
293 OP_OP_NUMBER, OP_OP_NUMBERS, OP_PATLIST, OP_FILELIST, OP_BINFILES };
294
295 typedef struct option_item {
296 int type;
297 int one_char;
298 void *dataptr;
299 const char *long_name;
300 const char *help_text;
301 } option_item;
302
303 /* Options without a single-letter equivalent get a negative value. This can be
304 used to identify them. */
305
306 #define N_COLOUR (-1)
307 #define N_EXCLUDE (-2)
308 #define N_EXCLUDE_DIR (-3)
309 #define N_HELP (-4)
310 #define N_INCLUDE (-5)
311 #define N_INCLUDE_DIR (-6)
312 #define N_LABEL (-7)
313 #define N_LOCALE (-8)
314 #define N_NULL (-9)
315 #define N_LOFFSETS (-10)
316 #define N_FOFFSETS (-11)
317 #define N_LBUFFER (-12)
318 #define N_M_LIMIT (-13)
319 #define N_M_LIMIT_REC (-14)
320 #define N_BUFSIZE (-15)
321 #define N_NOJIT (-16)
322 #define N_FILE_LIST (-17)
323 #define N_BINARY_FILES (-18)
324 #define N_EXCLUDE_FROM (-19)
325 #define N_INCLUDE_FROM (-20)
326 #define N_OM_SEPARATOR (-21)
327
328 static option_item optionlist[] = {
329 { OP_NODATA, N_NULL, NULL, "", "terminate options" },
330 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
331 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
332 { OP_NODATA, 'a', NULL, "text", "treat binary files as text" },
333 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
334 { OP_BINFILES, N_BINARY_FILES, NULL, "binary-files=word", "set treatment of binary files" },
335 { OP_NUMBER, N_BUFSIZE,&bufthird, "buffer-size=number", "set processing buffer size parameter" },
336 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
337 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
338 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
339 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
340 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
341 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
342 { OP_PATLIST, 'e', &match_patdata, "regex(p)=pattern", "specify pattern (may be used more than once)" },
343 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
344 { OP_FILELIST, 'f', &pattern_files_data, "file=path", "read patterns from file" },
345 { OP_FILELIST, N_FILE_LIST, &file_lists_data, "file-list=path","read files to search from file" },
346 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
347 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
348 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
349 { OP_NODATA, 'I', NULL, "", "treat binary files as not matching (ignore)" },
350 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
351 #ifdef SUPPORT_PCREGREP_JIT
352 { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use just-in-time compiler optimization" },
353 #else
354 { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcregrep does not support JIT" },
355 #endif
356 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
357 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
358 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
359 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
360 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
361 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
362 { OP_LONGNUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE match limit option" },
363 { OP_LONGNUMBER, N_M_LIMIT_REC, &match_limit_recursion, "recursion-limit=number", "set PCRE match recursion limit option" },
364 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
365 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
366 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
367 { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
368 { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set separator for multiple -o output" },
369 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
370 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
371 { OP_PATLIST, N_EXCLUDE,&exclude_patdata, "exclude=pattern","exclude matching files when recursing" },
372 { OP_PATLIST, N_INCLUDE,&include_patdata, "include=pattern","include matching files when recursing" },
373 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude-dir=pattern","exclude matching directories when recursing" },
374 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include-dir=pattern","include matching directories when recursing" },
375 { OP_FILELIST, N_EXCLUDE_FROM,&exclude_from_data, "exclude-from=path", "read exclude list from file" },
376 { OP_FILELIST, N_INCLUDE_FROM,&include_from_data, "include-from=path", "read include list from file" },
377
378 /* These two were accidentally implemented with underscores instead of
379 hyphens in the option names. As this was not discovered for several releases,
380 the incorrect versions are left in the table for compatibility. However, the
381 --help function misses out any option that has an underscore in its name. */
382
383 { OP_PATLIST, N_EXCLUDE_DIR,&exclude_dir_patdata, "exclude_dir=pattern","exclude matching directories when recursing" },
384 { OP_PATLIST, N_INCLUDE_DIR,&include_dir_patdata, "include_dir=pattern","include matching directories when recursing" },
385
386 #ifdef JFRIEDL_DEBUG
387 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
388 #endif
389 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
390 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
391 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
392 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
393 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
394 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
395 { OP_NODATA, 0, NULL, NULL, NULL }
396 };
397
398 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
399 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
400 that the combination of -w and -x has the same effect as -x on its own, so we
401 can treat them as the same. Note that the MAXPATLEN macro assumes the longest
402 prefix+suffix is 10 characters; if anything longer is added, it must be
403 adjusted. */
404
405 static const char *prefix[] = {
406 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
407
408 static const char *suffix[] = {
409 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
410
411 /* UTF-8 tables - used only when the newline setting is "any". */
412
413 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
414
415 const char utf8_table4[] = {
416 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
417 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
418 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
419 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
420
421
422
423 /*************************************************
424 * Exit from the program *
425 *************************************************/
426
427 /* If there has been a resource error, give a suitable message.
428
429 Argument: the return code
430 Returns: does not return
431 */
432
433 static void
434 pcregrep_exit(int rc)
435 {
436 if (resource_error)
437 {
438 fprintf(stderr, "pcregrep: Error %d, %d or %d means that a resource limit "
439 "was exceeded.\n", PCRE_ERROR_MATCHLIMIT, PCRE_ERROR_RECURSIONLIMIT,
440 PCRE_ERROR_JIT_STACKLIMIT);
441 fprintf(stderr, "pcregrep: Check your regex for nested unlimited loops.\n");
442 }
443 exit(rc);
444 }
445
446
447 /*************************************************
448 * Add item to chain of patterns *
449 *************************************************/
450
451 /* Used to add an item onto a chain, or just return an unconnected item if the
452 "after" argument is NULL.
453
454 Arguments:
455 s pattern string to add
456 after if not NULL points to item to insert after
457
458 Returns: new pattern block or NULL on error
459 */
460
461 static patstr *
462 add_pattern(char *s, patstr *after)
463 {
464 patstr *p = (patstr *)malloc(sizeof(patstr));
465 if (p == NULL)
466 {
467 fprintf(stderr, "pcregrep: malloc failed\n");
468 pcregrep_exit(2);
469 }
470 if (strlen(s) > MAXPATLEN)
471 {
472 fprintf(stderr, "pcregrep: pattern is too long (limit is %d bytes)\n",
473 MAXPATLEN);
474 free(p);
475 return NULL;
476 }
477 p->next = NULL;
478 p->string = s;
479 p->compiled = NULL;
480 p->hint = NULL;
481
482 if (after != NULL)
483 {
484 p->next = after->next;
485 after->next = p;
486 }
487 return p;
488 }
489
490
491 /*************************************************
492 * Free chain of patterns *
493 *************************************************/
494
495 /* Used for several chains of patterns.
496
497 Argument: pointer to start of chain
498 Returns: nothing
499 */
500
501 static void
502 free_pattern_chain(patstr *pc)
503 {
504 while (pc != NULL)
505 {
506 patstr *p = pc;
507 pc = p->next;
508 if (p->hint != NULL) pcre_free_study(p->hint);
509 if (p->compiled != NULL) pcre_free(p->compiled);
510 free(p);
511 }
512 }
513
514
515 /*************************************************
516 * Free chain of file names *
517 *************************************************/
518
519 /*
520 Argument: pointer to start of chain
521 Returns: nothing
522 */
523
524 static void
525 free_file_chain(fnstr *fn)
526 {
527 while (fn != NULL)
528 {
529 fnstr *f = fn;
530 fn = f->next;
531 free(f);
532 }
533 }
534
535
536 /*************************************************
537 * OS-specific functions *
538 *************************************************/
539
540 /* These functions are defined so that they can be made system specific.
541 At present there are versions for Unix-style environments, Windows, native
542 z/OS, and "no support". */
543
544
545 /************* Directory scanning Unix-style and z/OS ***********/
546
547 #if (defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H) || defined NATIVE_ZOS
548 #include <sys/types.h>
549 #include <sys/stat.h>
550 #include <dirent.h>
551
552 #if defined NATIVE_ZOS
553 /************* Directory and PDS/E scanning for z/OS ***********/
554 /************* z/OS looks mostly like Unix with USS ************/
555 /* However, z/OS needs the #include statements in this header */
556 #include "pcrzosfs.h"
557 /* That header is not included in the main PCRE distribution because
558 other apparatus is needed to compile pcregrep for z/OS. The header
559 can be found in the special z/OS distribution, which is available
560 from www.zaconsultants.net or from www.cbttape.org. */
561 #endif
562
563 typedef DIR directory_type;
564 #define FILESEP '/'
565
566 static int
567 isdirectory(char *filename)
568 {
569 struct stat statbuf;
570 if (stat(filename, &statbuf) < 0)
571 return 0; /* In the expectation that opening as a file will fail */
572 return (statbuf.st_mode & S_IFMT) == S_IFDIR;
573 }
574
575 static directory_type *
576 opendirectory(char *filename)
577 {
578 return opendir(filename);
579 }
580
581 static char *
582 readdirectory(directory_type *dir)
583 {
584 for (;;)
585 {
586 struct dirent *dent = readdir(dir);
587 if (dent == NULL) return NULL;
588 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
589 return dent->d_name;
590 }
591 /* Control never reaches here */
592 }
593
594 static void
595 closedirectory(directory_type *dir)
596 {
597 closedir(dir);
598 }
599
600
601 /************* Test for regular file, Unix-style **********/
602
603 static int
604 isregfile(char *filename)
605 {
606 struct stat statbuf;
607 if (stat(filename, &statbuf) < 0)
608 return 1; /* In the expectation that opening as a file will fail */
609 return (statbuf.st_mode & S_IFMT) == S_IFREG;
610 }
611
612
613 #if defined NATIVE_ZOS
614 /************* Test for a terminal in z/OS **********/
615 /* isatty() does not work in a TSO environment, so always give FALSE.*/
616
617 static BOOL
618 is_stdout_tty(void)
619 {
620 return FALSE;
621 }
622
623 static BOOL
624 is_file_tty(FILE *f)
625 {
626 return FALSE;
627 }
628
629
630 /************* Test for a terminal, Unix-style **********/
631
632 #else
633 static BOOL
634 is_stdout_tty(void)
635 {
636 return isatty(fileno(stdout));
637 }
638
639 static BOOL
640 is_file_tty(FILE *f)
641 {
642 return isatty(fileno(f));
643 }
644 #endif
645
646 /* End of Unix-style or native z/OS environment functions. */
647
648
649 /************* Directory scanning in Windows ***********/
650
651 /* I (Philip Hazel) have no means of testing this code. It was contributed by
652 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
653 when it did not exist. David Byron added a patch that moved the #include of
654 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
655 The double test below stops gcc 4.4.4 grumbling that HAVE_WINDOWS_H is
656 undefined when it is indeed undefined. */
657
658 #elif defined HAVE_WINDOWS_H && HAVE_WINDOWS_H
659
660 #ifndef STRICT
661 # define STRICT
662 #endif
663 #ifndef WIN32_LEAN_AND_MEAN
664 # define WIN32_LEAN_AND_MEAN
665 #endif
666
667 #include <windows.h>
668
669 #ifndef INVALID_FILE_ATTRIBUTES
670 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
671 #endif
672
673 typedef struct directory_type
674 {
675 HANDLE handle;
676 BOOL first;
677 WIN32_FIND_DATA data;
678 } directory_type;
679
680 #define FILESEP '/'
681
682 int
683 isdirectory(char *filename)
684 {
685 DWORD attr = GetFileAttributes(filename);
686 if (attr == INVALID_FILE_ATTRIBUTES)
687 return 0;
688 return (attr & FILE_ATTRIBUTE_DIRECTORY) != 0;
689 }
690
691 directory_type *
692 opendirectory(char *filename)
693 {
694 size_t len;
695 char *pattern;
696 directory_type *dir;
697 DWORD err;
698 len = strlen(filename);
699 pattern = (char *)malloc(len + 3);
700 dir = (directory_type *)malloc(sizeof(*dir));
701 if ((pattern == NULL) || (dir == NULL))
702 {
703 fprintf(stderr, "pcregrep: malloc failed\n");
704 pcregrep_exit(2);
705 }
706 memcpy(pattern, filename, len);
707 memcpy(&(pattern[len]), "\\*", 3);
708 dir->handle = FindFirstFile(pattern, &(dir->data));
709 if (dir->handle != INVALID_HANDLE_VALUE)
710 {
711 free(pattern);
712 dir->first = TRUE;
713 return dir;
714 }
715 err = GetLastError();
716 free(pattern);
717 free(dir);
718 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
719 return NULL;
720 }
721
722 char *
723 readdirectory(directory_type *dir)
724 {
725 for (;;)
726 {
727 if (!dir->first)
728 {
729 if (!FindNextFile(dir->handle, &(dir->data)))
730 return NULL;
731 }
732 else
733 {
734 dir->first = FALSE;
735 }
736 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
737 return dir->data.cFileName;
738 }
739 #ifndef _MSC_VER
740 return NULL; /* Keep compiler happy; never executed */
741 #endif
742 }
743
744 void
745 closedirectory(directory_type *dir)
746 {
747 FindClose(dir->handle);
748 free(dir);
749 }
750
751
752 /************* Test for regular file in Windows **********/
753
754 /* I don't know how to do this, or if it can be done; assume all paths are
755 regular if they are not directories. */
756
757 int isregfile(char *filename)
758 {
759 return !isdirectory(filename);
760 }
761
762
763 /************* Test for a terminal in Windows **********/
764
765 /* I don't know how to do this; assume never */
766
767 static BOOL
768 is_stdout_tty(void)
769 {
770 return FALSE;
771 }
772
773 static BOOL
774 is_file_tty(FILE *f)
775 {
776 return FALSE;
777 }
778
779 /* End of Windows functions */
780
781
782 /************* Directory scanning when we can't do it ***********/
783
784 /* The type is void, and apart from isdirectory(), the functions do nothing. */
785
786 #else
787
788 #define FILESEP 0
789 typedef void directory_type;
790
791 int isdirectory(char *filename) { return 0; }
792 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
793 char *readdirectory(directory_type *dir) { return (char*)0;}
794 void closedirectory(directory_type *dir) {}
795
796
797 /************* Test for regular file when we can't do it **********/
798
799 /* Assume all files are regular. */
800
801 int isregfile(char *filename) { return 1; }
802
803
804 /************* Test for a terminal when we can't do it **********/
805
806 static BOOL
807 is_stdout_tty(void)
808 {
809 return FALSE;
810 }
811
812 static BOOL
813 is_file_tty(FILE *f)
814 {
815 return FALSE;
816 }
817
818 #endif /* End of system-specific functions */
819
820
821
822 #ifndef HAVE_STRERROR
823 /*************************************************
824 * Provide strerror() for non-ANSI libraries *
825 *************************************************/
826
827 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
828 in their libraries, but can provide the same facility by this simple
829 alternative function. */
830
831 extern int sys_nerr;
832 extern char *sys_errlist[];
833
834 char *
835 strerror(int n)
836 {
837 if (n < 0 || n >= sys_nerr) return "unknown error number";
838 return sys_errlist[n];
839 }
840 #endif /* HAVE_STRERROR */
841
842
843
844 /*************************************************
845 * Usage function *
846 *************************************************/
847
848 static int
849 usage(int rc)
850 {
851 option_item *op;
852 fprintf(stderr, "Usage: pcregrep [-");
853 for (op = optionlist; op->one_char != 0; op++)
854 {
855 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
856 }
857 fprintf(stderr, "] [long options] [pattern] [files]\n");
858 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
859 "options.\n");
860 return rc;
861 }
862
863
864
865 /*************************************************
866 * Help function *
867 *************************************************/
868
869 static void
870 help(void)
871 {
872 option_item *op;
873
874 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
875 printf("Search for PATTERN in each FILE or standard input.\n");
876 printf("PATTERN must be present if neither -e nor -f is used.\n");
877 printf("\"-\" can be used as a file name to mean STDIN.\n");
878
879 #ifdef SUPPORT_LIBZ
880 printf("Files whose names end in .gz are read using zlib.\n");
881 #endif
882
883 #ifdef SUPPORT_LIBBZ2
884 printf("Files whose names end in .bz2 are read using bzlib2.\n");
885 #endif
886
887 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
888 printf("Other files and the standard input are read as plain files.\n\n");
889 #else
890 printf("All files are read as plain files, without any interpretation.\n\n");
891 #endif
892
893 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
894 printf("Options:\n");
895
896 for (op = optionlist; op->one_char != 0; op++)
897 {
898 int n;
899 char s[4];
900
901 /* Two options were accidentally implemented and documented with underscores
902 instead of hyphens in their names, something that was not noticed for quite a
903 few releases. When fixing this, I left the underscored versions in the list
904 in case people were using them. However, we don't want to display them in the
905 help data. There are no other options that contain underscores, and we do not
906 expect ever to implement such options. Therefore, just omit any option that
907 contains an underscore. */
908
909 if (strchr(op->long_name, '_') != NULL) continue;
910
911 if (op->one_char > 0 && (op->long_name)[0] == 0)
912 n = 31 - printf(" -%c", op->one_char);
913 else
914 {
915 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char);
916 else strcpy(s, " ");
917 n = 31 - printf(" %s --%s", s, op->long_name);
918 }
919
920 if (n < 1) n = 1;
921 printf("%.*s%s\n", n, " ", op->help_text);
922 }
923
924 printf("\nNumbers may be followed by K or M, e.g. --buffer-size=100K.\n");
925 printf("The default value for --buffer-size is %d.\n", PCREGREP_BUFSIZE);
926 printf("When reading patterns or file names from a file, trailing white\n");
927 printf("space is removed and blank lines are ignored.\n");
928 printf("The maximum size of any pattern is %d bytes.\n", MAXPATLEN);
929
930 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
931 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
932 }
933
934
935
936 /*************************************************
937 * Test exclude/includes *
938 *************************************************/
939
940 /* If any exclude pattern matches, the path is excluded. Otherwise, unless
941 there are no includes, the path must match an include pattern.
942
943 Arguments:
944 path the path to be matched
945 ip the chain of include patterns
946 ep the chain of exclude patterns
947
948 Returns: TRUE if the path is not excluded
949 */
950
951 static BOOL
952 test_incexc(char *path, patstr *ip, patstr *ep)
953 {
954 int plen = strlen(path);
955
956 for (; ep != NULL; ep = ep->next)
957 {
958 if (pcre_exec(ep->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
959 return FALSE;
960 }
961
962 if (ip == NULL) return TRUE;
963
964 for (; ip != NULL; ip = ip->next)
965 {
966 if (pcre_exec(ip->compiled, NULL, path, plen, 0, 0, NULL, 0) >= 0)
967 return TRUE;
968 }
969
970 return FALSE;
971 }
972
973
974
975 /*************************************************
976 * Decode integer argument value *
977 *************************************************/
978
979 /* Integer arguments can be followed by K or M. Avoid the use of strtoul()
980 because SunOS4 doesn't have it. This is used only for unpicking arguments, so
981 just keep it simple.
982
983 Arguments:
984 option_data the option data string
985 op the option item (for error messages)
986 longop TRUE if option given in long form
987
988 Returns: a long integer
989 */
990
991 static long int
992 decode_number(char *option_data, option_item *op, BOOL longop)
993 {
994 unsigned long int n = 0;
995 char *endptr = option_data;
996 while (*endptr != 0 && isspace((unsigned char)(*endptr))) endptr++;
997 while (isdigit((unsigned char)(*endptr)))
998 n = n * 10 + (int)(*endptr++ - '0');
999 if (toupper(*endptr) == 'K')
1000 {
1001 n *= 1024;
1002 endptr++;
1003 }
1004 else if (toupper(*endptr) == 'M')
1005 {
1006 n *= 1024*1024;
1007 endptr++;
1008 }
1009
1010 if (*endptr != 0) /* Error */
1011 {
1012 if (longop)
1013 {
1014 char *equals = strchr(op->long_name, '=');
1015 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
1016 (int)(equals - op->long_name);
1017 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
1018 option_data, nlen, op->long_name);
1019 }
1020 else
1021 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
1022 option_data, op->one_char);
1023 pcregrep_exit(usage(2));
1024 }
1025
1026 return n;
1027 }
1028
1029
1030
1031 /*************************************************
1032 * Add item to a chain of numbers *
1033 *************************************************/
1034
1035 /* Used to add an item onto a chain, or just return an unconnected item if the
1036 "after" argument is NULL.
1037
1038 Arguments:
1039 n the number to add
1040 after if not NULL points to item to insert after
1041
1042 Returns: new number block
1043 */
1044
1045 static omstr *
1046 add_number(int n, omstr *after)
1047 {
1048 omstr *om = (omstr *)malloc(sizeof(omstr));
1049
1050 if (om == NULL)
1051 {
1052 fprintf(stderr, "pcregrep: malloc failed\n");
1053 pcregrep_exit(2);
1054 }
1055 om->next = NULL;
1056 om->groupnum = n;
1057
1058 if (after != NULL)
1059 {
1060 om->next = after->next;
1061 after->next = om;
1062 }
1063 return om;
1064 }
1065
1066
1067
1068 /*************************************************
1069 * Read one line of input *
1070 *************************************************/
1071
1072 /* Normally, input is read using fread() into a large buffer, so many lines may
1073 be read at once. However, doing this for tty input means that no output appears
1074 until a lot of input has been typed. Instead, tty input is handled line by
1075 line. We cannot use fgets() for this, because it does not stop at a binary
1076 zero, and therefore there is no way of telling how many characters it has read,
1077 because there may be binary zeros embedded in the data.
1078
1079 Arguments:
1080 buffer the buffer to read into
1081 length the maximum number of characters to read
1082 f the file
1083
1084 Returns: the number of characters read, zero at end of file
1085 */
1086
1087 static unsigned int
1088 read_one_line(char *buffer, int length, FILE *f)
1089 {
1090 int c;
1091 int yield = 0;
1092 while ((c = fgetc(f)) != EOF)
1093 {
1094 buffer[yield++] = c;
1095 if (c == '\n' || yield >= length) break;
1096 }
1097 return yield;
1098 }
1099
1100
1101
1102 /*************************************************
1103 * Find end of line *
1104 *************************************************/
1105
1106 /* The length of the endline sequence that is found is set via lenptr. This may
1107 be zero at the very end of the file if there is no line-ending sequence there.
1108
1109 Arguments:
1110 p current position in line
1111 endptr end of available data
1112 lenptr where to put the length of the eol sequence
1113
1114 Returns: pointer after the last byte of the line,
1115 including the newline byte(s)
1116 */
1117
1118 static char *
1119 end_of_line(char *p, char *endptr, int *lenptr)
1120 {
1121 switch(endlinetype)
1122 {
1123 default: /* Just in case */
1124 case EL_LF:
1125 while (p < endptr && *p != '\n') p++;
1126 if (p < endptr)
1127 {
1128 *lenptr = 1;
1129 return p + 1;
1130 }
1131 *lenptr = 0;
1132 return endptr;
1133
1134 case EL_CR:
1135 while (p < endptr && *p != '\r') p++;
1136 if (p < endptr)
1137 {
1138 *lenptr = 1;
1139 return p + 1;
1140 }
1141 *lenptr = 0;
1142 return endptr;
1143
1144 case EL_CRLF:
1145 for (;;)
1146 {
1147 while (p < endptr && *p != '\r') p++;
1148 if (++p >= endptr)
1149 {
1150 *lenptr = 0;
1151 return endptr;
1152 }
1153 if (*p == '\n')
1154 {
1155 *lenptr = 2;
1156 return p + 1;
1157 }
1158 }
1159 break;
1160
1161 case EL_ANYCRLF:
1162 while (p < endptr)
1163 {
1164 int extra = 0;
1165 register int c = *((unsigned char *)p);
1166
1167 if (utf8 && c >= 0xc0)
1168 {
1169 int gcii, gcss;
1170 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1171 gcss = 6*extra;
1172 c = (c & utf8_table3[extra]) << gcss;
1173 for (gcii = 1; gcii <= extra; gcii++)
1174 {
1175 gcss -= 6;
1176 c |= (p[gcii] & 0x3f) << gcss;
1177 }
1178 }
1179
1180 p += 1 + extra;
1181
1182 switch (c)
1183 {
1184 case '\n':
1185 *lenptr = 1;
1186 return p;
1187
1188 case '\r':
1189 if (p < endptr && *p == '\n')
1190 {
1191 *lenptr = 2;
1192 p++;
1193 }
1194 else *lenptr = 1;
1195 return p;
1196
1197 default:
1198 break;
1199 }
1200 } /* End of loop for ANYCRLF case */
1201
1202 *lenptr = 0; /* Must have hit the end */
1203 return endptr;
1204
1205 case EL_ANY:
1206 while (p < endptr)
1207 {
1208 int extra = 0;
1209 register int c = *((unsigned char *)p);
1210
1211 if (utf8 && c >= 0xc0)
1212 {
1213 int gcii, gcss;
1214 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1215 gcss = 6*extra;
1216 c = (c & utf8_table3[extra]) << gcss;
1217 for (gcii = 1; gcii <= extra; gcii++)
1218 {
1219 gcss -= 6;
1220 c |= (p[gcii] & 0x3f) << gcss;
1221 }
1222 }
1223
1224 p += 1 + extra;
1225
1226 switch (c)
1227 {
1228 case '\n': /* LF */
1229 case '\v': /* VT */
1230 case '\f': /* FF */
1231 *lenptr = 1;
1232 return p;
1233
1234 case '\r': /* CR */
1235 if (p < endptr && *p == '\n')
1236 {
1237 *lenptr = 2;
1238 p++;
1239 }
1240 else *lenptr = 1;
1241 return p;
1242
1243 #ifndef EBCDIC
1244 case 0x85: /* Unicode NEL */
1245 *lenptr = utf8? 2 : 1;
1246 return p;
1247
1248 case 0x2028: /* Unicode LS */
1249 case 0x2029: /* Unicode PS */
1250 *lenptr = 3;
1251 return p;
1252 #endif /* Not EBCDIC */
1253
1254 default:
1255 break;
1256 }
1257 } /* End of loop for ANY case */
1258
1259 *lenptr = 0; /* Must have hit the end */
1260 return endptr;
1261 } /* End of overall switch */
1262 }
1263
1264
1265
1266 /*************************************************
1267 * Find start of previous line *
1268 *************************************************/
1269
1270 /* This is called when looking back for before lines to print.
1271
1272 Arguments:
1273 p start of the subsequent line
1274 startptr start of available data
1275
1276 Returns: pointer to the start of the previous line
1277 */
1278
1279 static char *
1280 previous_line(char *p, char *startptr)
1281 {
1282 switch(endlinetype)
1283 {
1284 default: /* Just in case */
1285 case EL_LF:
1286 p--;
1287 while (p > startptr && p[-1] != '\n') p--;
1288 return p;
1289
1290 case EL_CR:
1291 p--;
1292 while (p > startptr && p[-1] != '\n') p--;
1293 return p;
1294
1295 case EL_CRLF:
1296 for (;;)
1297 {
1298 p -= 2;
1299 while (p > startptr && p[-1] != '\n') p--;
1300 if (p <= startptr + 1 || p[-2] == '\r') return p;
1301 }
1302 /* Control can never get here */
1303
1304 case EL_ANY:
1305 case EL_ANYCRLF:
1306 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
1307 if (utf8) while ((*p & 0xc0) == 0x80) p--;
1308
1309 while (p > startptr)
1310 {
1311 register unsigned int c;
1312 char *pp = p - 1;
1313
1314 if (utf8)
1315 {
1316 int extra = 0;
1317 while ((*pp & 0xc0) == 0x80) pp--;
1318 c = *((unsigned char *)pp);
1319 if (c >= 0xc0)
1320 {
1321 int gcii, gcss;
1322 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
1323 gcss = 6*extra;
1324 c = (c & utf8_table3[extra]) << gcss;
1325 for (gcii = 1; gcii <= extra; gcii++)
1326 {
1327 gcss -= 6;
1328 c |= (pp[gcii] & 0x3f) << gcss;
1329 }
1330 }
1331 }
1332 else c = *((unsigned char *)pp);
1333
1334 if (endlinetype == EL_ANYCRLF) switch (c)
1335 {
1336 case '\n': /* LF */
1337 case '\r': /* CR */
1338 return p;
1339
1340 default:
1341 break;
1342 }
1343
1344 else switch (c)
1345 {
1346 case '\n': /* LF */
1347 case '\v': /* VT */
1348 case '\f': /* FF */
1349 case '\r': /* CR */
1350 #ifndef EBCDIE
1351 case 0x85: /* Unicode NEL */
1352 case 0x2028: /* Unicode LS */
1353 case 0x2029: /* Unicode PS */
1354 #endif /* Not EBCDIC */
1355 return p;
1356
1357 default:
1358 break;
1359 }
1360
1361 p = pp; /* Back one character */
1362 } /* End of loop for ANY case */
1363
1364 return startptr; /* Hit start of data */
1365 } /* End of overall switch */
1366 }
1367
1368
1369
1370
1371
1372 /*************************************************
1373 * Print the previous "after" lines *
1374 *************************************************/
1375
1376 /* This is called if we are about to lose said lines because of buffer filling,
1377 and at the end of the file. The data in the line is written using fwrite() so
1378 that a binary zero does not terminate it.
1379
1380 Arguments:
1381 lastmatchnumber the number of the last matching line, plus one
1382 lastmatchrestart where we restarted after the last match
1383 endptr end of available data
1384 printname filename for printing
1385
1386 Returns: nothing
1387 */
1388
1389 static void
1390 do_after_lines(unsigned long int lastmatchnumber, char *lastmatchrestart,
1391 char *endptr, char *printname)
1392 {
1393 if (after_context > 0 && lastmatchnumber > 0)
1394 {
1395 int count = 0;
1396 while (lastmatchrestart < endptr && count++ < after_context)
1397 {
1398 int ellength;
1399 char *pp = lastmatchrestart;
1400 if (printname != NULL) fprintf(stdout, "%s-", printname);
1401 if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
1402 pp = end_of_line(pp, endptr, &ellength);
1403 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1404 lastmatchrestart = pp;
1405 }
1406 hyphenpending = TRUE;
1407 }
1408 }
1409
1410
1411
1412 /*************************************************
1413 * Apply patterns to subject till one matches *
1414 *************************************************/
1415
1416 /* This function is called to run through all patterns, looking for a match. It
1417 is used multiple times for the same subject when colouring is enabled, in order
1418 to find all possible matches.
1419
1420 Arguments:
1421 matchptr the start of the subject
1422 length the length of the subject to match
1423 options options for pcre_exec
1424 startoffset where to start matching
1425 offsets the offets vector to fill in
1426 mrc address of where to put the result of pcre_exec()
1427
1428 Returns: TRUE if there was a match
1429 FALSE if there was no match
1430 invert if there was a non-fatal error
1431 */
1432
1433 static BOOL
1434 match_patterns(char *matchptr, size_t length, unsigned int options,
1435 int startoffset, int *offsets, int *mrc)
1436 {
1437 int i;
1438 size_t slen = length;
1439 patstr *p = patterns;
1440 const char *msg = "this text:\n\n";
1441
1442 if (slen > 200)
1443 {
1444 slen = 200;
1445 msg = "text that starts:\n\n";
1446 }
1447 for (i = 1; p != NULL; p = p->next, i++)
1448 {
1449 *mrc = pcre_exec(p->compiled, p->hint, matchptr, (int)length,
1450 startoffset, options, offsets, OFFSET_SIZE);
1451 if (*mrc >= 0) return TRUE;
1452 if (*mrc == PCRE_ERROR_NOMATCH) continue;
1453 fprintf(stderr, "pcregrep: pcre_exec() gave error %d while matching ", *mrc);
1454 if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
1455 fprintf(stderr, "%s", msg);
1456 FWRITE(matchptr, 1, slen, stderr); /* In case binary zero included */
1457 fprintf(stderr, "\n\n");
1458 if (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT ||
1459 *mrc == PCRE_ERROR_JIT_STACKLIMIT)
1460 resource_error = TRUE;
1461 if (error_count++ > 20)
1462 {
1463 fprintf(stderr, "pcregrep: Too many errors - abandoned.\n");
1464 pcregrep_exit(2);
1465 }
1466 return invert; /* No more matching; don't show the line again */
1467 }
1468
1469 return FALSE; /* No match, no errors */
1470 }
1471
1472
1473
1474 /*************************************************
1475 * Grep an individual file *
1476 *************************************************/
1477
1478 /* This is called from grep_or_recurse() below. It uses a buffer that is three
1479 times the value of bufthird. The matching point is never allowed to stray into
1480 the top third of the buffer, thus keeping more of the file available for
1481 context printing or for multiline scanning. For large files, the pointer will
1482 be in the middle third most of the time, so the bottom third is available for
1483 "before" context printing.
1484
1485 Arguments:
1486 handle the fopened FILE stream for a normal file
1487 the gzFile pointer when reading is via libz
1488 the BZFILE pointer when reading is via libbz2
1489 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
1490 filename the file name or NULL (for errors)
1491 printname the file name if it is to be printed for each match
1492 or NULL if the file name is not to be printed
1493 it cannot be NULL if filenames[_nomatch]_only is set
1494
1495 Returns: 0 if there was at least one match
1496 1 otherwise (no matches)
1497 2 if an overlong line is encountered
1498 3 if there is a read error on a .bz2 file
1499 */
1500
1501 static int
1502 pcregrep(void *handle, int frtype, char *filename, char *printname)
1503 {
1504 int rc = 1;
1505 int filepos = 0;
1506 int offsets[OFFSET_SIZE];
1507 unsigned long int linenumber = 1;
1508 unsigned long int lastmatchnumber = 0;
1509 unsigned long int count = 0;
1510 char *lastmatchrestart = NULL;
1511 char *ptr = main_buffer;
1512 char *endptr;
1513 size_t bufflength;
1514 BOOL binary = FALSE;
1515 BOOL endhyphenpending = FALSE;
1516 BOOL input_line_buffered = line_buffered;
1517 FILE *in = NULL; /* Ensure initialized */
1518
1519 #ifdef SUPPORT_LIBZ
1520 gzFile ingz = NULL;
1521 #endif
1522
1523 #ifdef SUPPORT_LIBBZ2
1524 BZFILE *inbz2 = NULL;
1525 #endif
1526
1527
1528 /* Do the first read into the start of the buffer and set up the pointer to end
1529 of what we have. In the case of libz, a non-zipped .gz file will be read as a
1530 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
1531 fail. */
1532
1533 (void)frtype;
1534
1535 #ifdef SUPPORT_LIBZ
1536 if (frtype == FR_LIBZ)
1537 {
1538 ingz = (gzFile)handle;
1539 bufflength = gzread (ingz, main_buffer, bufsize);
1540 }
1541 else
1542 #endif
1543
1544 #ifdef SUPPORT_LIBBZ2
1545 if (frtype == FR_LIBBZ2)
1546 {
1547 inbz2 = (BZFILE *)handle;
1548 bufflength = BZ2_bzread(inbz2, main_buffer, bufsize);
1549 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1550 } /* without the cast it is unsigned. */
1551 else
1552 #endif
1553
1554 {
1555 in = (FILE *)handle;
1556 if (is_file_tty(in)) input_line_buffered = TRUE;
1557 bufflength = input_line_buffered?
1558 read_one_line(main_buffer, bufsize, in) :
1559 fread(main_buffer, 1, bufsize, in);
1560 }
1561
1562 endptr = main_buffer + bufflength;
1563
1564 /* Unless binary-files=text, see if we have a binary file. This uses the same
1565 rule as GNU grep, namely, a search for a binary zero byte near the start of the
1566 file. */
1567
1568 if (binary_files != BIN_TEXT)
1569 {
1570 binary =
1571 memchr(main_buffer, 0, (bufflength > 1024)? 1024 : bufflength) != NULL;
1572 if (binary && binary_files == BIN_NOMATCH) return 1;
1573 }
1574
1575 /* Loop while the current pointer is not at the end of the file. For large
1576 files, endptr will be at the end of the buffer when we are in the middle of the
1577 file, but ptr will never get there, because as soon as it gets over 2/3 of the
1578 way, the buffer is shifted left and re-filled. */
1579
1580 while (ptr < endptr)
1581 {
1582 int endlinelength;
1583 int mrc = 0;
1584 int startoffset = 0;
1585 int prevoffsets[2];
1586 unsigned int options = 0;
1587 BOOL match;
1588 char *matchptr = ptr;
1589 char *t = ptr;
1590 size_t length, linelength;
1591
1592 prevoffsets[0] = prevoffsets[1] = -1;
1593
1594 /* At this point, ptr is at the start of a line. We need to find the length
1595 of the subject string to pass to pcre_exec(). In multiline mode, it is the
1596 length remainder of the data in the buffer. Otherwise, it is the length of
1597 the next line, excluding the terminating newline. After matching, we always
1598 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1599 option is used for compiling, so that any match is constrained to be in the
1600 first line. */
1601
1602 t = end_of_line(t, endptr, &endlinelength);
1603 linelength = t - ptr - endlinelength;
1604 length = multiline? (size_t)(endptr - ptr) : linelength;
1605
1606 /* Check to see if the line we are looking at extends right to the very end
1607 of the buffer without a line terminator. This means the line is too long to
1608 handle. */
1609
1610 if (endlinelength == 0 && t == main_buffer + bufsize)
1611 {
1612 fprintf(stderr, "pcregrep: line %lu%s%s is too long for the internal buffer\n"
1613 "pcregrep: check the --buffer-size option\n",
1614 linenumber,
1615 (filename == NULL)? "" : " of file ",
1616 (filename == NULL)? "" : filename);
1617 return 2;
1618 }
1619
1620 /* Extra processing for Jeffrey Friedl's debugging. */
1621
1622 #ifdef JFRIEDL_DEBUG
1623 if (jfriedl_XT || jfriedl_XR)
1624 {
1625 # include <sys/time.h>
1626 # include <time.h>
1627 struct timeval start_time, end_time;
1628 struct timezone dummy;
1629 int i;
1630
1631 if (jfriedl_XT)
1632 {
1633 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1634 const char *orig = ptr;
1635 ptr = malloc(newlen + 1);
1636 if (!ptr) {
1637 printf("out of memory");
1638 pcregrep_exit(2);
1639 }
1640 endptr = ptr;
1641 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1642 for (i = 0; i < jfriedl_XT; i++) {
1643 strncpy(endptr, orig, length);
1644 endptr += length;
1645 }
1646 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1647 length = newlen;
1648 }
1649
1650 if (gettimeofday(&start_time, &dummy) != 0)
1651 perror("bad gettimeofday");
1652
1653
1654 for (i = 0; i < jfriedl_XR; i++)
1655 match = (pcre_exec(patterns->compiled, patterns->hint, ptr, length, 0,
1656 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1657
1658 if (gettimeofday(&end_time, &dummy) != 0)
1659 perror("bad gettimeofday");
1660
1661 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1662 -
1663 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1664
1665 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1666 return 0;
1667 }
1668 #endif
1669
1670 /* We come back here after a match when show_only_matching is set, in order
1671 to find any further matches in the same line. This applies to
1672 --only-matching, --file-offsets, and --line-offsets. */
1673
1674 ONLY_MATCHING_RESTART:
1675
1676 /* Run through all the patterns until one matches or there is an error other
1677 than NOMATCH. This code is in a subroutine so that it can be re-used for
1678 finding subsequent matches when colouring matched lines. After finding one
1679 match, set PCRE_NOTEMPTY to disable any further matches of null strings in
1680 this line. */
1681
1682 match = match_patterns(matchptr, length, options, startoffset, offsets, &mrc);
1683 options = PCRE_NOTEMPTY;
1684
1685 /* If it's a match or a not-match (as required), do what's wanted. */
1686
1687 if (match != invert)
1688 {
1689 BOOL hyphenprinted = FALSE;
1690
1691 /* We've failed if we want a file that doesn't have any matches. */
1692
1693 if (filenames == FN_NOMATCH_ONLY) return 1;
1694
1695 /* If all we want is a yes/no answer, stop now. */
1696
1697 if (quiet) return 0;
1698
1699 /* Just count if just counting is wanted. */
1700
1701 else if (count_only) count++;
1702
1703 /* When handling a binary file and binary-files==binary, the "binary"
1704 variable will be set true (it's false in all other cases). In this
1705 situation we just want to output the file name. No need to scan further. */
1706
1707 else if (binary)
1708 {
1709 fprintf(stdout, "Binary file %s matches\n", filename);
1710 return 0;
1711 }
1712
1713 /* If all we want is a file name, there is no need to scan any more lines
1714 in the file. */
1715
1716 else if (filenames == FN_MATCH_ONLY)
1717 {
1718 fprintf(stdout, "%s\n", printname);
1719 return 0;
1720 }
1721
1722 /* The --only-matching option prints just the substring that matched,
1723 and/or one or more captured portions of it, as long as these strings are
1724 not empty. The --file-offsets and --line-offsets options output offsets for
1725 the matching substring (all three set show_only_matching). None of these
1726 mutually exclusive options prints any context. Afterwards, adjust the start
1727 and then jump back to look for further matches in the same line. If we are
1728 in invert mode, however, nothing is printed and we do not restart - this
1729 could still be useful because the return code is set. */
1730
1731 else if (show_only_matching)
1732 {
1733 if (!invert)
1734 {
1735 int oldstartoffset = startoffset;
1736
1737 /* It is possible, when a lookbehind assertion contains \K, for the
1738 same string to be found again. The code below advances startoffset, but
1739 until it is past the "bumpalong" offset that gave the match, the same
1740 substring will be returned. The PCRE1 library does not return the
1741 bumpalong offset, so all we can do is ignore repeated strings. (PCRE2
1742 does this better.) */
1743
1744 if (prevoffsets[0] != offsets[0] || prevoffsets[1] != offsets[1])
1745 {
1746 prevoffsets[0] = offsets[0];
1747 prevoffsets[1] = offsets[1];
1748
1749 if (printname != NULL) fprintf(stdout, "%s:", printname);
1750 if (number) fprintf(stdout, "%lu:", linenumber);
1751
1752 /* Handle --line-offsets */
1753
1754 if (line_offsets)
1755 fprintf(stdout, "%d,%d\n", (int)(matchptr + offsets[0] - ptr),
1756 offsets[1] - offsets[0]);
1757
1758 /* Handle --file-offsets */
1759
1760 else if (file_offsets)
1761 fprintf(stdout, "%d,%d\n",
1762 (int)(filepos + matchptr + offsets[0] - ptr),
1763 offsets[1] - offsets[0]);
1764
1765 /* Handle --only-matching, which may occur many times */
1766
1767 else
1768 {
1769 BOOL printed = FALSE;
1770 omstr *om;
1771
1772 for (om = only_matching; om != NULL; om = om->next)
1773 {
1774 int n = om->groupnum;
1775 if (n < mrc)
1776 {
1777 int plen = offsets[2*n + 1] - offsets[2*n];
1778 if (plen > 0)
1779 {
1780 if (printed) fprintf(stdout, "%s", om_separator);
1781 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1782 FWRITE(matchptr + offsets[n*2], 1, plen, stdout);
1783 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1784 printed = TRUE;
1785 }
1786 }
1787 }
1788
1789 if (printed || printname != NULL || number) fprintf(stdout, "\n");
1790 }
1791 }
1792
1793 /* Prepare to repeat to find the next match. If the patterned contained
1794 a lookbehind tht included \K, it is possible that the end of the match
1795 might be at or before the actual strting offset we have just used. We
1796 need to start one character further on. Unfortunately, for unanchored
1797 patterns, the actual start offset can be greater that the one that was
1798 set as a result of "bumpalong". PCRE1 does not return the actual start
1799 offset, so we have to check against the original start offset. This may
1800 lead to duplicates - we we need the fudge above to avoid printing them.
1801 (PCRE2 does this better.) */
1802
1803 match = FALSE;
1804 if (line_buffered) fflush(stdout);
1805 rc = 0; /* Had some success */
1806
1807 startoffset = offsets[1]; /* Restart after the match */
1808 if (startoffset <= oldstartoffset)
1809 {
1810 if ((size_t)startoffset >= length)
1811 goto END_ONE_MATCH; /* We were at the end */
1812 startoffset = oldstartoffset + 1;
1813 if (utf8)
1814 while ((matchptr[startoffset] & 0xc0) == 0x80) startoffset++;
1815 }
1816
1817 /* If the current match ended past the end of the line (only possible
1818 in multiline mode), we must move on to the line in which it did end
1819 before searching for more matches. */
1820
1821 while (startoffset > (int)linelength)
1822 {
1823 matchptr = ptr += linelength + endlinelength;
1824 filepos += (int)(linelength + endlinelength);
1825 linenumber++;
1826 startoffset -= (int)(linelength + endlinelength);
1827 t = end_of_line(ptr, endptr, &endlinelength);
1828 linelength = t - ptr - endlinelength;
1829 length = (size_t)(endptr - ptr);
1830 }
1831
1832 goto ONLY_MATCHING_RESTART;
1833 }
1834 }
1835
1836 /* This is the default case when none of the above options is set. We print
1837 the matching lines(s), possibly preceded and/or followed by other lines of
1838 context. */
1839
1840 else
1841 {
1842 /* See if there is a requirement to print some "after" lines from a
1843 previous match. We never print any overlaps. */
1844
1845 if (after_context > 0 && lastmatchnumber > 0)
1846 {
1847 int ellength;
1848 int linecount = 0;
1849 char *p = lastmatchrestart;
1850
1851 while (p < ptr && linecount < after_context)
1852 {
1853 p = end_of_line(p, ptr, &ellength);
1854 linecount++;
1855 }
1856
1857 /* It is important to advance lastmatchrestart during this printing so
1858 that it interacts correctly with any "before" printing below. Print
1859 each line's data using fwrite() in case there are binary zeroes. */
1860
1861 while (lastmatchrestart < p)
1862 {
1863 char *pp = lastmatchrestart;
1864 if (printname != NULL) fprintf(stdout, "%s-", printname);
1865 if (number) fprintf(stdout, "%lu-", lastmatchnumber++);
1866 pp = end_of_line(pp, endptr, &ellength);
1867 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1868 lastmatchrestart = pp;
1869 }
1870 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1871 }
1872
1873 /* If there were non-contiguous lines printed above, insert hyphens. */
1874
1875 if (hyphenpending)
1876 {
1877 fprintf(stdout, "--\n");
1878 hyphenpending = FALSE;
1879 hyphenprinted = TRUE;
1880 }
1881
1882 /* See if there is a requirement to print some "before" lines for this
1883 match. Again, don't print overlaps. */
1884
1885 if (before_context > 0)
1886 {
1887 int linecount = 0;
1888 char *p = ptr;
1889
1890 while (p > main_buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1891 linecount < before_context)
1892 {
1893 linecount++;
1894 p = previous_line(p, main_buffer);
1895 }
1896
1897 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1898 fprintf(stdout, "--\n");
1899
1900 while (p < ptr)
1901 {
1902 int ellength;
1903 char *pp = p;
1904 if (printname != NULL) fprintf(stdout, "%s-", printname);
1905 if (number) fprintf(stdout, "%lu-", linenumber - linecount--);
1906 pp = end_of_line(pp, endptr, &ellength);
1907 FWRITE(p, 1, pp - p, stdout);
1908 p = pp;
1909 }
1910 }
1911
1912 /* Now print the matching line(s); ensure we set hyphenpending at the end
1913 of the file if any context lines are being output. */
1914
1915 if (after_context > 0 || before_context > 0)
1916 endhyphenpending = TRUE;
1917
1918 if (printname != NULL) fprintf(stdout, "%s:", printname);
1919 if (number) fprintf(stdout, "%lu:", linenumber);
1920
1921 /* In multiline mode, we want to print to the end of the line in which
1922 the end of the matched string is found, so we adjust linelength and the
1923 line number appropriately, but only when there actually was a match
1924 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1925 the match will always be before the first newline sequence. */
1926
1927 if (multiline & !invert)
1928 {
1929 char *endmatch = ptr + offsets[1];
1930 t = ptr;
1931 while (t <= endmatch)
1932 {
1933 t = end_of_line(t, endptr, &endlinelength);
1934 if (t < endmatch) linenumber++; else break;
1935 }
1936 linelength = t - ptr - endlinelength;
1937 }
1938
1939 /*** NOTE: Use only fwrite() to output the data line, so that binary
1940 zeroes are treated as just another data character. */
1941
1942 /* This extra option, for Jeffrey Friedl's debugging requirements,
1943 replaces the matched string, or a specific captured string if it exists,
1944 with X. When this happens, colouring is ignored. */
1945
1946 #ifdef JFRIEDL_DEBUG
1947 if (S_arg >= 0 && S_arg < mrc)
1948 {
1949 int first = S_arg * 2;
1950 int last = first + 1;
1951 FWRITE(ptr, 1, offsets[first], stdout);
1952 fprintf(stdout, "X");
1953 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1954 }
1955 else
1956 #endif
1957
1958 /* We have to split the line(s) up if colouring, and search for further
1959 matches, but not of course if the line is a non-match. */
1960
1961 if (do_colour && !invert)
1962 {
1963 int plength;
1964 FWRITE(ptr, 1, offsets[0], stdout);
1965 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1966 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1967 fprintf(stdout, "%c[00m", 0x1b);
1968 for (;;)
1969 {
1970 startoffset = offsets[1];
1971 if (startoffset >= (int)linelength + endlinelength ||
1972 !match_patterns(matchptr, length, options, startoffset, offsets,
1973 &mrc))
1974 break;
1975 FWRITE(matchptr + startoffset, 1, offsets[0] - startoffset, stdout);
1976 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1977 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1978 fprintf(stdout, "%c[00m", 0x1b);
1979 }
1980
1981 /* In multiline mode, we may have already printed the complete line
1982 and its line-ending characters (if they matched the pattern), so there
1983 may be no more to print. */
1984
1985 plength = (int)((linelength + endlinelength) - startoffset);
1986 if (plength > 0) FWRITE(ptr + startoffset, 1, plength, stdout);
1987 }
1988
1989 /* Not colouring; no need to search for further matches */
1990
1991 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1992 }
1993
1994 /* End of doing what has to be done for a match. If --line-buffered was
1995 given, flush the output. */
1996
1997 if (line_buffered) fflush(stdout);
1998 rc = 0; /* Had some success */
1999
2000 /* Remember where the last match happened for after_context. We remember
2001 where we are about to restart, and that line's number. */
2002
2003 lastmatchrestart = ptr + linelength + endlinelength;
2004 lastmatchnumber = linenumber + 1;
2005 }
2006
2007 /* For a match in multiline inverted mode (which of course did not cause
2008 anything to be printed), we have to move on to the end of the match before
2009 proceeding. */
2010
2011 if (multiline && invert && match)
2012 {
2013 int ellength;
2014 char *endmatch = ptr + offsets[1];
2015 t = ptr;
2016 while (t < endmatch)
2017 {
2018 t = end_of_line(t, endptr, &ellength);
2019 if (t <= endmatch) linenumber++; else break;
2020 }
2021 endmatch = end_of_line(endmatch, endptr, &ellength);
2022 linelength = endmatch - ptr - ellength;
2023 }
2024
2025 /* Advance to after the newline and increment the line number. The file
2026 offset to the current line is maintained in filepos. */
2027
2028 END_ONE_MATCH:
2029 ptr += linelength + endlinelength;
2030 filepos += (int)(linelength + endlinelength);
2031 linenumber++;
2032
2033 /* If input is line buffered, and the buffer is not yet full, read another
2034 line and add it into the buffer. */
2035
2036 if (input_line_buffered && bufflength < (size_t)bufsize)
2037 {
2038 int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
2039 bufflength += add;
2040 endptr += add;
2041 }
2042
2043 /* If we haven't yet reached the end of the file (the buffer is full), and
2044 the current point is in the top 1/3 of the buffer, slide the buffer down by
2045 1/3 and refill it. Before we do this, if some unprinted "after" lines are
2046 about to be lost, print them. */
2047
2048 if (bufflength >= (size_t)bufsize && ptr > main_buffer + 2*bufthird)
2049 {
2050 if (after_context > 0 &&
2051 lastmatchnumber > 0 &&
2052 lastmatchrestart < main_buffer + bufthird)
2053 {
2054 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2055 lastmatchnumber = 0;
2056 }
2057
2058 /* Now do the shuffle */
2059
2060 memmove(main_buffer, main_buffer + bufthird, 2*bufthird);
2061 ptr -= bufthird;
2062
2063 #ifdef SUPPORT_LIBZ
2064 if (frtype == FR_LIBZ)
2065 bufflength = 2*bufthird +
2066 gzread (ingz, main_buffer + 2*bufthird, bufthird);
2067 else
2068 #endif
2069
2070 #ifdef SUPPORT_LIBBZ2
2071 if (frtype == FR_LIBBZ2)
2072 bufflength = 2*bufthird +
2073 BZ2_bzread(inbz2, main_buffer + 2*bufthird, bufthird);
2074 else
2075 #endif
2076
2077 bufflength = 2*bufthird +
2078 (input_line_buffered?
2079 read_one_line(main_buffer + 2*bufthird, bufthird, in) :
2080 fread(main_buffer + 2*bufthird, 1, bufthird, in));
2081 endptr = main_buffer + bufflength;
2082
2083 /* Adjust any last match point */
2084
2085 if (lastmatchnumber > 0) lastmatchrestart -= bufthird;
2086 }
2087 } /* Loop through the whole file */
2088
2089 /* End of file; print final "after" lines if wanted; do_after_lines sets
2090 hyphenpending if it prints something. */
2091
2092 if (!show_only_matching && !count_only)
2093 {
2094 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
2095 hyphenpending |= endhyphenpending;
2096 }
2097
2098 /* Print the file name if we are looking for those without matches and there
2099 were none. If we found a match, we won't have got this far. */
2100
2101 if (filenames == FN_NOMATCH_ONLY)
2102 {
2103 fprintf(stdout, "%s\n", printname);
2104 return 0;
2105 }
2106
2107 /* Print the match count if wanted */
2108
2109 if (count_only && !quiet)
2110 {
2111 if (count > 0 || !omit_zero_count)
2112 {
2113 if (printname != NULL && filenames != FN_NONE)
2114 fprintf(stdout, "%s:", printname);
2115 fprintf(stdout, "%lu\n", count);
2116 }
2117 }
2118
2119 return rc;
2120 }
2121
2122
2123
2124 /*************************************************
2125 * Grep a file or recurse into a directory *
2126 *************************************************/
2127
2128 /* Given a path name, if it's a directory, scan all the files if we are
2129 recursing; if it's a file, grep it.
2130
2131 Arguments:
2132 pathname the path to investigate
2133 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
2134 only_one_at_top TRUE if the path is the only one at toplevel
2135
2136 Returns: -1 the file/directory was skipped
2137 0 if there was at least one match
2138 1 if there were no matches
2139 2 there was some kind of error
2140
2141 However, file opening failures are suppressed if "silent" is set.
2142 */
2143
2144 static int
2145 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
2146 {
2147 int rc = 1;
2148 int frtype;
2149 void *handle;
2150 char *lastcomp;
2151 FILE *in = NULL; /* Ensure initialized */
2152
2153 #ifdef SUPPORT_LIBZ
2154 gzFile ingz = NULL;
2155 #endif
2156
2157 #ifdef SUPPORT_LIBBZ2
2158 BZFILE *inbz2 = NULL;
2159 #endif
2160
2161 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2162 int pathlen;
2163 #endif
2164
2165 #if defined NATIVE_ZOS
2166 int zos_type;
2167 FILE *zos_test_file;
2168 #endif
2169
2170 /* If the file name is "-" we scan stdin */
2171
2172 if (strcmp(pathname, "-") == 0)
2173 {
2174 return pcregrep(stdin, FR_PLAIN, stdin_name,
2175 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
2176 stdin_name : NULL);
2177 }
2178
2179 /* Inclusion and exclusion: --include-dir and --exclude-dir apply only to
2180 directories, whereas --include and --exclude apply to everything else. The test
2181 is against the final component of the path. */
2182
2183 lastcomp = strrchr(pathname, FILESEP);
2184 lastcomp = (lastcomp == NULL)? pathname : lastcomp + 1;
2185
2186 /* If the file is a directory, skip if not recursing or if explicitly excluded.
2187 Otherwise, scan the directory and recurse for each path within it. The scanning
2188 code is localized so it can be made system-specific. */
2189
2190
2191 /* For z/OS, determine the file type. */
2192
2193 #if defined NATIVE_ZOS
2194 zos_test_file = fopen(pathname,"rb");
2195
2196 if (zos_test_file == NULL)
2197 {
2198 if (!silent) fprintf(stderr, "pcregrep: failed to test next file %s\n",
2199 pathname, strerror(errno));
2200 return -1;
2201 }
2202 zos_type = identifyzosfiletype (zos_test_file);
2203 fclose (zos_test_file);
2204
2205 /* Handle a PDS in separate code */
2206
2207 if (zos_type == __ZOS_PDS || zos_type == __ZOS_PDSE)
2208 {
2209 return travelonpdsdir (pathname, only_one_at_top);
2210 }
2211
2212 /* Deal with regular files in the normal way below. These types are:
2213 zos_type == __ZOS_PDS_MEMBER
2214 zos_type == __ZOS_PS
2215 zos_type == __ZOS_VSAM_KSDS
2216 zos_type == __ZOS_VSAM_ESDS
2217 zos_type == __ZOS_VSAM_RRDS
2218 */
2219
2220 /* Handle a z/OS directory using common code. */
2221
2222 else if (zos_type == __ZOS_HFS)
2223 {
2224 #endif /* NATIVE_ZOS */
2225
2226
2227 /* Handle directories: common code for all OS */
2228
2229 if (isdirectory(pathname))
2230 {
2231 if (dee_action == dee_SKIP ||
2232 !test_incexc(lastcomp, include_dir_patterns, exclude_dir_patterns))
2233 return -1;
2234
2235 if (dee_action == dee_RECURSE)
2236 {
2237 char buffer[2048];
2238 char *nextfile;
2239 directory_type *dir = opendirectory(pathname);
2240
2241 if (dir == NULL)
2242 {
2243 if (!silent)
2244 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
2245 strerror(errno));
2246 return 2;
2247 }
2248
2249 while ((nextfile = readdirectory(dir)) != NULL)
2250 {
2251 int frc;
2252 int fnlength = strlen(pathname) + strlen(nextfile) + 2;
2253 if (fnlength > 2048)
2254 {
2255 fprintf(stderr, "pcregrep: recursive filename is too long\n");
2256 rc = 2;
2257 break;
2258 }
2259 sprintf(buffer, "%s%c%s", pathname, FILESEP, nextfile);
2260 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
2261 if (frc > 1) rc = frc;
2262 else if (frc == 0 && rc == 1) rc = 0;
2263 }
2264
2265 closedirectory(dir);
2266 return rc;
2267 }
2268 }
2269
2270 #if defined NATIVE_ZOS
2271 }
2272 #endif
2273
2274 /* If the file is not a directory, check for a regular file, and if it is not,
2275 skip it if that's been requested. Otherwise, check for an explicit inclusion or
2276 exclusion. */
2277
2278 else if (
2279 #if defined NATIVE_ZOS
2280 (zos_type == __ZOS_NOFILE && DEE_action == DEE_SKIP) ||
2281 #else /* all other OS */
2282 (!isregfile(pathname) && DEE_action == DEE_SKIP) ||
2283 #endif
2284 !test_incexc(lastcomp, include_patterns, exclude_patterns))
2285 return -1; /* File skipped */
2286
2287 /* Control reaches here if we have a regular file, or if we have a directory
2288 and recursion or skipping was not requested, or if we have anything else and
2289 skipping was not requested. The scan proceeds. If this is the first and only
2290 argument at top level, we don't show the file name, unless we are only showing
2291 the file name, or the filename was forced (-H). */
2292
2293 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
2294 pathlen = (int)(strlen(pathname));
2295 #endif
2296
2297 /* Open using zlib if it is supported and the file name ends with .gz. */
2298
2299 #ifdef SUPPORT_LIBZ
2300 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
2301 {
2302 ingz = gzopen(pathname, "rb");
2303 if (ingz == NULL)
2304 {
2305 if (!silent)
2306 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2307 strerror(errno));
2308 return 2;
2309 }
2310 handle = (void *)ingz;
2311 frtype = FR_LIBZ;
2312 }
2313 else
2314 #endif
2315
2316 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
2317
2318 #ifdef SUPPORT_LIBBZ2
2319 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
2320 {
2321 inbz2 = BZ2_bzopen(pathname, "rb");
2322 handle = (void *)inbz2;
2323 frtype = FR_LIBBZ2;
2324 }
2325 else
2326 #endif
2327
2328 /* Otherwise use plain fopen(). The label is so that we can come back here if
2329 an attempt to read a .bz2 file indicates that it really is a plain file. */
2330
2331 #ifdef SUPPORT_LIBBZ2
2332 PLAIN_FILE:
2333 #endif
2334 {
2335 in = fopen(pathname, "rb");
2336 handle = (void *)in;
2337 frtype = FR_PLAIN;
2338 }
2339
2340 /* All the opening methods return errno when they fail. */
2341
2342 if (handle == NULL)
2343 {
2344 if (!silent)
2345 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
2346 strerror(errno));
2347 return 2;
2348 }
2349
2350 /* Now grep the file */
2351
2352 rc = pcregrep(handle, frtype, pathname, (filenames > FN_DEFAULT ||
2353 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
2354
2355 /* Close in an appropriate manner. */
2356
2357 #ifdef SUPPORT_LIBZ
2358 if (frtype == FR_LIBZ)
2359 gzclose(ingz);
2360 else
2361 #endif
2362
2363 /* If it is a .bz2 file and the result is 3, it means that the first attempt to
2364 read failed. If the error indicates that the file isn't in fact bzipped, try
2365 again as a normal file. */
2366
2367 #ifdef SUPPORT_LIBBZ2
2368 if (frtype == FR_LIBBZ2)
2369 {
2370 if (rc == 3)
2371 {
2372 int errnum;
2373 const char *err = BZ2_bzerror(inbz2, &errnum);
2374 if (errnum == BZ_DATA_ERROR_MAGIC)
2375 {
2376 BZ2_bzclose(inbz2);
2377 goto PLAIN_FILE;
2378 }
2379 else if (!silent)
2380 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
2381 pathname, err);
2382 rc = 2; /* The normal "something went wrong" code */
2383 }
2384 BZ2_bzclose(inbz2);
2385 }
2386 else
2387 #endif
2388
2389 /* Normal file close */
2390
2391 fclose(in);
2392
2393 /* Pass back the yield from pcregrep(). */
2394
2395 return rc;
2396 }
2397
2398
2399
2400 /*************************************************
2401 * Handle a single-letter, no data option *
2402 *************************************************/
2403
2404 static int
2405 handle_option(int letter, int options)
2406 {
2407 switch(letter)
2408 {
2409 case N_FOFFSETS: file_offsets = TRUE; break;
2410 case N_HELP: help(); pcregrep_exit(0);
2411 case N_LBUFFER: line_buffered = TRUE; break;
2412 case N_LOFFSETS: line_offsets = number = TRUE; break;
2413 case N_NOJIT: study_options &= ~PCRE_STUDY_JIT_COMPILE; break;
2414 case 'a': binary_files = BIN_TEXT; break;
2415 case 'c': count_only = TRUE; break;
2416 case 'F': process_options |= PO_FIXED_STRINGS; break;
2417 case 'H': filenames = FN_FORCE; break;
2418 case 'I': binary_files = BIN_NOMATCH; break;
2419 case 'h': filenames = FN_NONE; break;
2420 case 'i': options |= PCRE_CASELESS; break;
2421 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
2422 case 'L': filenames = FN_NOMATCH_ONLY; break;
2423 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
2424 case 'n': number = TRUE; break;
2425
2426 case 'o':
2427 only_matching_last = add_number(0, only_matching_last);
2428 if (only_matching == NULL) only_matching = only_matching_last;
2429 break;
2430
2431 case 'q': quiet = TRUE; break;
2432 case 'r': dee_action = dee_RECURSE; break;
2433 case 's': silent = TRUE; break;
2434 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
2435 case 'v': invert = TRUE; break;
2436 case 'w': process_options |= PO_WORD_MATCH; break;
2437 case 'x': process_options |= PO_LINE_MATCH; break;
2438
2439 case 'V':
2440 fprintf(stdout, "pcregrep version %s\n", pcre_version());
2441 pcregrep_exit(0);
2442 break;
2443
2444 default:
2445 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
2446 pcregrep_exit(usage(2));
2447 }
2448
2449 return options;
2450 }
2451
2452
2453
2454
2455 /*************************************************
2456 * Construct printed ordinal *
2457 *************************************************/
2458
2459 /* This turns a number into "1st", "3rd", etc. */
2460
2461 static char *
2462 ordin(int n)
2463 {
2464 static char buffer[14];
2465 char *p = buffer;
2466 sprintf(p, "%d", n);
2467 while (*p != 0) p++;
2468 switch (n%10)
2469 {
2470 case 1: strcpy(p, "st"); break;
2471 case 2: strcpy(p, "nd"); break;
2472 case 3: strcpy(p, "rd"); break;
2473 default: strcpy(p, "th"); break;
2474 }
2475 return buffer;
2476 }
2477
2478
2479
2480 /*************************************************
2481 * Compile a single pattern *
2482 *************************************************/
2483
2484 /* Do nothing if the pattern has already been compiled. This is the case for
2485 include/exclude patterns read from a file.
2486
2487 When the -F option has been used, each "pattern" may be a list of strings,
2488 separated by line breaks. They will be matched literally. We split such a
2489 string and compile the first substring, inserting an additional block into the
2490 pattern chain.
2491
2492 Arguments:
2493 p points to the pattern block
2494 options the PCRE options
2495 popts the processing options
2496 fromfile TRUE if the pattern was read from a file
2497 fromtext file name or identifying text (e.g. "include")
2498 count 0 if this is the only command line pattern, or
2499 number of the command line pattern, or
2500 linenumber for a pattern from a file
2501
2502 Returns: TRUE on success, FALSE after an error
2503 */
2504
2505 static BOOL
2506 compile_pattern(patstr *p, int options, int popts, int fromfile,
2507 const char *fromtext, int count)
2508 {
2509 char buffer[PATBUFSIZE];
2510 const char *error;
2511 char *ps = p->string;
2512 int patlen = strlen(ps);
2513 int errptr;
2514
2515 if (p->compiled != NULL) return TRUE;
2516
2517 if ((popts & PO_FIXED_STRINGS) != 0)
2518 {
2519 int ellength;
2520 char *eop = ps + patlen;
2521 char *pe = end_of_line(ps, eop, &ellength);
2522
2523 if (ellength != 0)
2524 {
2525 if (add_pattern(pe, p) == NULL) return FALSE;
2526 patlen = (int)(pe - ps - ellength);
2527 }
2528 }
2529
2530 if (snprintf(buffer, PATBUFSIZE, "%s%.*s%s", prefix[popts], patlen, ps,
2531 suffix[popts]) > PATBUFSIZE)
2532 {
2533 fprintf(stderr, "pcregrep: Buffer overflow while compiling \"%s\"\n",
2534 ps);
2535 return FALSE;
2536 }
2537
2538 p->compiled = pcre_compile(buffer, options, &error, &errptr, pcretables);
2539 if (p->compiled != NULL) return TRUE;
2540
2541 /* Handle compile errors */
2542
2543 errptr -= (int)strlen(prefix[popts]);
2544 if (errptr > patlen) errptr = patlen;
2545
2546 if (fromfile)
2547 {
2548 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
2549 "at offset %d: %s\n", count, fromtext, errptr, error);
2550 }
2551 else
2552 {
2553 if (count == 0)
2554 fprintf(stderr, "pcregrep: Error in %s regex at offset %d: %s\n",
2555 fromtext, errptr, error);
2556 else
2557 fprintf(stderr, "pcregrep: Error in %s %s regex at offset %d: %s\n",
2558 ordin(count), fromtext, errptr, error);
2559 }
2560
2561 return FALSE;
2562 }
2563
2564
2565
2566 /*************************************************
2567 * Read and compile a file of patterns *
2568 *************************************************/
2569
2570 /* This is used for --filelist, --include-from, and --exclude-from.
2571
2572 Arguments:
2573 name the name of the file; "-" is stdin
2574 patptr pointer to the pattern chain anchor
2575 patlastptr pointer to the last pattern pointer
2576 popts the process options to pass to pattern_compile()
2577
2578 Returns: TRUE if all went well
2579 */
2580
2581 static BOOL
2582 read_pattern_file(char *name, patstr **patptr, patstr **patlastptr, int popts)
2583 {
2584 int linenumber = 0;
2585 FILE *f;
2586 char *filename;
2587 char buffer[PATBUFSIZE];
2588
2589 if (strcmp(name, "-") == 0)
2590 {
2591 f = stdin;
2592 filename = stdin_name;
2593 }
2594 else
2595 {
2596 f = fopen(name, "r");
2597 if (f == NULL)
2598 {
2599 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", name, strerror(errno));
2600 return FALSE;
2601 }
2602 filename = name;
2603 }
2604
2605 while (fgets(buffer, PATBUFSIZE, f) != NULL)
2606 {
2607 char *s = buffer + (int)strlen(buffer);
2608 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2609 *s = 0;
2610 linenumber++;
2611 if (buffer[0] == 0) continue; /* Skip blank lines */
2612
2613 /* Note: this call to add_pattern() puts a pointer to the local variable
2614 "buffer" into the pattern chain. However, that pointer is used only when
2615 compiling the pattern, which happens immediately below, so we flatten it
2616 afterwards, as a precaution against any later code trying to use it. */
2617
2618 *patlastptr = add_pattern(buffer, *patlastptr);
2619 if (*patlastptr == NULL)
2620 {
2621 if (f != stdin) fclose(f);
2622 return FALSE;
2623 }
2624 if (*patptr == NULL) *patptr = *patlastptr;
2625
2626 /* This loop is needed because compiling a "pattern" when -F is set may add
2627 on additional literal patterns if the original contains a newline. In the
2628 common case, it never will, because fgets() stops at a newline. However,
2629 the -N option can be used to give pcregrep a different newline setting. */
2630
2631 for(;;)
2632 {
2633 if (!compile_pattern(*patlastptr, pcre_options, popts, TRUE, filename,
2634 linenumber))
2635 {
2636 if (f != stdin) fclose(f);
2637 return FALSE;
2638 }
2639 (*patlastptr)->string = NULL; /* Insurance */
2640 if ((*patlastptr)->next == NULL) break;
2641 *patlastptr = (*patlastptr)->next;
2642 }
2643 }
2644
2645 if (f != stdin) fclose(f);
2646 return TRUE;
2647 }
2648
2649
2650
2651 /*************************************************
2652 * Main program *
2653 *************************************************/
2654
2655 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
2656
2657 int
2658 main(int argc, char **argv)
2659 {
2660 int i, j;
2661 int rc = 1;
2662 BOOL only_one_at_top;
2663 patstr *cp;
2664 fnstr *fn;
2665 const char *locale_from = "--locale";
2666 const char *error;
2667
2668 #ifdef SUPPORT_PCREGREP_JIT
2669 pcre_jit_stack *jit_stack = NULL;
2670 #endif
2671
2672 /* Set the default line ending value from the default in the PCRE library;
2673 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
2674 Note that the return values from pcre_config(), though derived from the ASCII
2675 codes, are the same in EBCDIC environments, so we must use the actual values
2676 rather than escapes such as as '\r'. */
2677
2678 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
2679 switch(i)
2680 {
2681 default: newline = (char *)"lf"; break;
2682 case 13: newline = (char *)"cr"; break;
2683 case (13 << 8) | 10: newline = (char *)"crlf"; break;
2684 case -1: newline = (char *)"any"; break;
2685 case -2: newline = (char *)"anycrlf"; break;
2686 }
2687
2688 /* Process the options */
2689
2690 for (i = 1; i < argc; i++)
2691 {
2692 option_item *op = NULL;
2693 char *option_data = (char *)""; /* default to keep compiler happy */
2694 BOOL longop;
2695 BOOL longopwasequals = FALSE;
2696
2697 if (argv[i][0] != '-') break;
2698
2699 /* If we hit an argument that is just "-", it may be a reference to STDIN,
2700 but only if we have previously had -e or -f to define the patterns. */
2701
2702 if (argv[i][1] == 0)
2703 {
2704 if (pattern_files != NULL || patterns != NULL) break;
2705 else pcregrep_exit(usage(2));
2706 }
2707
2708 /* Handle a long name option, or -- to terminate the options */
2709
2710 if (argv[i][1] == '-')
2711 {
2712 char *arg = argv[i] + 2;
2713 char *argequals = strchr(arg, '=');
2714
2715 if (*arg == 0) /* -- terminates options */
2716 {
2717 i++;
2718 break; /* out of the options-handling loop */
2719 }
2720
2721 longop = TRUE;
2722
2723 /* Some long options have data that follows after =, for example file=name.
2724 Some options have variations in the long name spelling: specifically, we
2725 allow "regexp" because GNU grep allows it, though I personally go along
2726 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2727 These options are entered in the table as "regex(p)". Options can be in
2728 both these categories. */
2729
2730 for (op = optionlist; op->one_char != 0; op++)
2731 {
2732 char *opbra = strchr(op->long_name, '(');
2733 char *equals = strchr(op->long_name, '=');
2734
2735 /* Handle options with only one spelling of the name */
2736
2737 if (opbra == NULL) /* Does not contain '(' */
2738 {
2739 if (equals == NULL) /* Not thing=data case */
2740 {
2741 if (strcmp(arg, op->long_name) == 0) break;
2742 }
2743 else /* Special case xxx=data */
2744 {
2745 int oplen = (int)(equals - op->long_name);
2746 int arglen = (argequals == NULL)?
2747 (int)strlen(arg) : (int)(argequals - arg);
2748 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2749 {
2750 option_data = arg + arglen;
2751 if (*option_data == '=')
2752 {
2753 option_data++;
2754 longopwasequals = TRUE;
2755 }
2756 break;
2757 }
2758 }
2759 }
2760
2761 /* Handle options with an alternate spelling of the name */
2762
2763 else
2764 {
2765 char buff1[24];
2766 char buff2[24];
2767
2768 int baselen = (int)(opbra - op->long_name);
2769 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2770 int arglen = (argequals == NULL || equals == NULL)?
2771 (int)strlen(arg) : (int)(argequals - arg);
2772
2773 if (snprintf(buff1, sizeof(buff1), "%.*s", baselen, op->long_name) >
2774 (int)sizeof(buff1) ||
2775 snprintf(buff2, sizeof(buff2), "%s%.*s", buff1,
2776 fulllen - baselen - 2, opbra + 1) > (int)sizeof(buff2))
2777 {
2778 fprintf(stderr, "pcregrep: Buffer overflow when parsing %s option\n",
2779 op->long_name);
2780 pcregrep_exit(2);
2781 }
2782
2783 if (strncmp(arg, buff1, arglen) == 0 ||
2784 strncmp(arg, buff2, arglen) == 0)
2785 {
2786 if (equals != NULL && argequals != NULL)
2787 {
2788 option_data = argequals;
2789 if (*option_data == '=')
2790 {
2791 option_data++;
2792 longopwasequals = TRUE;
2793 }
2794 }
2795 break;
2796 }
2797 }
2798 }
2799
2800 if (op->one_char == 0)
2801 {
2802 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2803 pcregrep_exit(usage(2));
2804 }
2805 }
2806
2807 /* Jeffrey Friedl's debugging harness uses these additional options which
2808 are not in the right form for putting in the option table because they use
2809 only one hyphen, yet are more than one character long. By putting them
2810 separately here, they will not get displayed as part of the help() output,
2811 but I don't think Jeffrey will care about that. */
2812
2813 #ifdef JFRIEDL_DEBUG
2814 else if (strcmp(argv[i], "-pre") == 0) {
2815 jfriedl_prefix = argv[++i];
2816 continue;
2817 } else if (strcmp(argv[i], "-post") == 0) {
2818 jfriedl_postfix = argv[++i];
2819 continue;
2820 } else if (strcmp(argv[i], "-XT") == 0) {
2821 sscanf(argv[++i], "%d", &jfriedl_XT);
2822 continue;
2823 } else if (strcmp(argv[i], "-XR") == 0) {
2824 sscanf(argv[++i], "%d", &jfriedl_XR);
2825 continue;
2826 }
2827 #endif
2828
2829
2830 /* One-char options; many that have no data may be in a single argument; we
2831 continue till we hit the last one or one that needs data. */
2832
2833 else
2834 {
2835 char *s = argv[i] + 1;
2836 longop = FALSE;
2837
2838 while (*s != 0)
2839 {
2840 for (op = optionlist; op->one_char != 0; op++)
2841 {
2842 if (*s == op->one_char) break;
2843 }
2844 if (op->one_char == 0)
2845 {
2846 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2847 *s, argv[i]);
2848 pcregrep_exit(usage(2));
2849 }
2850
2851 option_data = s+1;
2852
2853 /* Break out if this is the last character in the string; it's handled
2854 below like a single multi-char option. */
2855
2856 if (*option_data == 0) break;
2857
2858 /* Check for a single-character option that has data: OP_OP_NUMBER(S)
2859 are used for ones that either have a numerical number or defaults, i.e.
2860 the data is optional. If a digit follows, there is data; if not, carry on
2861 with other single-character options in the same string. */
2862
2863 if (op->type == OP_OP_NUMBER || op->type == OP_OP_NUMBERS)
2864 {
2865 if (isdigit((unsigned char)s[1])) break;
2866 }
2867 else /* Check for an option with data */
2868 {
2869 if (op->type != OP_NODATA) break;
2870 }
2871
2872 /* Handle a single-character option with no data, then loop for the
2873 next character in the string. */
2874
2875 pcre_options = handle_option(*s++, pcre_options);
2876 }
2877 }
2878
2879 /* At this point we should have op pointing to a matched option. If the type
2880 is NO_DATA, it means that there is no data, and the option might set
2881 something in the PCRE options. */
2882
2883 if (op->type == OP_NODATA)
2884 {
2885 pcre_options = handle_option(op->one_char, pcre_options);
2886 continue;
2887 }
2888
2889 /* If the option type is OP_OP_STRING or OP_OP_NUMBER(S), it's an option that
2890 either has a value or defaults to something. It cannot have data in a
2891 separate item. At the moment, the only such options are "colo(u)r",
2892 "only-matching", and Jeffrey Friedl's special -S debugging option. */
2893
2894 if (*option_data == 0 &&
2895 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER ||
2896 op->type == OP_OP_NUMBERS))
2897 {
2898 switch (op->one_char)
2899 {
2900 case N_COLOUR:
2901 colour_option = (char *)"auto";
2902 break;
2903
2904 case 'o':
2905 only_matching_last = add_number(0, only_matching_last);
2906 if (only_matching == NULL) only_matching = only_matching_last;
2907 break;
2908
2909 #ifdef JFRIEDL_DEBUG
2910 case 'S':
2911 S_arg = 0;
2912 break;
2913 #endif
2914 }
2915 continue;
2916 }
2917
2918 /* Otherwise, find the data string for the option. */
2919
2920 if (*option_data == 0)
2921 {
2922 if (i >= argc - 1 || longopwasequals)
2923 {
2924 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2925 pcregrep_exit(usage(2));
2926 }
2927 option_data = argv[++i];
2928 }
2929
2930 /* If the option type is OP_OP_NUMBERS, the value is a number that is to be
2931 added to a chain of numbers. */
2932
2933 if (op->type == OP_OP_NUMBERS)
2934 {
2935 unsigned long int n = decode_number(option_data, op, longop);
2936 omdatastr *omd = (omdatastr *)op->dataptr;
2937 *(omd->lastptr) = add_number((int)n, *(omd->lastptr));
2938 if (*(omd->anchor) == NULL) *(omd->anchor) = *(omd->lastptr);
2939 }
2940
2941 /* If the option type is OP_PATLIST, it's the -e option, or one of the
2942 include/exclude options, which can be called multiple times to create lists
2943 of patterns. */
2944
2945 else if (op->type == OP_PATLIST)
2946 {
2947 patdatastr *pd = (patdatastr *)op->dataptr;
2948 *(pd->lastptr) = add_pattern(option_data, *(pd->lastptr));
2949 if (*(pd->lastptr) == NULL) goto EXIT2;
2950 if (*(pd->anchor) == NULL) *(pd->anchor) = *(pd->lastptr);
2951 }
2952
2953 /* If the option type is OP_FILELIST, it's one of the options that names a
2954 file. */
2955
2956 else if (op->type == OP_FILELIST)
2957 {
2958 fndatastr *fd = (fndatastr *)op->dataptr;
2959 fn = (fnstr *)malloc(sizeof(fnstr));
2960 if (fn == NULL)
2961 {
2962 fprintf(stderr, "pcregrep: malloc failed\n");
2963 goto EXIT2;
2964 }
2965 fn->next = NULL;
2966 fn->name = option_data;
2967 if (*(fd->anchor) == NULL)
2968 *(fd->anchor) = fn;
2969 else
2970 (*(fd->lastptr))->next = fn;
2971 *(fd->lastptr) = fn;
2972 }
2973
2974 /* Handle OP_BINARY_FILES */
2975
2976 else if (op->type == OP_BINFILES)
2977 {
2978 if (strcmp(option_data, "binary") == 0)
2979 binary_files = BIN_BINARY;
2980 else if (strcmp(option_data, "without-match") == 0)
2981 binary_files = BIN_NOMATCH;
2982 else if (strcmp(option_data, "text") == 0)
2983 binary_files = BIN_TEXT;
2984 else
2985 {
2986 fprintf(stderr, "pcregrep: unknown value \"%s\" for binary-files\n",
2987 option_data);
2988 pcregrep_exit(usage(2));
2989 }
2990 }
2991
2992 /* Otherwise, deal with a single string or numeric data value. */
2993
2994 else if (op->type != OP_NUMBER && op->type != OP_LONGNUMBER &&
2995 op->type != OP_OP_NUMBER)
2996 {
2997 *((char **)op->dataptr) = option_data;
2998 }
2999 else
3000 {
3001 unsigned long int n = decode_number(option_data, op, longop);
3002 if (op->type == OP_LONGNUMBER) *((unsigned long int *)op->dataptr) = n;
3003 else *((int *)op->dataptr) = n;
3004 }
3005 }
3006
3007 /* Options have been decoded. If -C was used, its value is used as a default
3008 for -A and -B. */
3009
3010 if (both_context > 0)
3011 {
3012 if (after_context == 0) after_context = both_context;
3013 if (before_context == 0) before_context = both_context;
3014 }
3015
3016 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
3017 However, all three set show_only_matching because they display, each in their
3018 own way, only the data that has matched. */
3019
3020 if ((only_matching != NULL && (file_offsets || line_offsets)) ||
3021 (file_offsets && line_offsets))
3022 {
3023 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
3024 "and/or --line-offsets\n");
3025 pcregrep_exit(usage(2));
3026 }
3027
3028 if (only_matching != NULL || file_offsets || line_offsets)
3029 show_only_matching = TRUE;
3030
3031 /* If a locale has not been provided as an option, see if the LC_CTYPE or
3032 LC_ALL environment variable is set, and if so, use it. */
3033
3034 if (locale == NULL)
3035 {
3036 locale = getenv("LC_ALL");
3037 locale_from = "LC_ALL";
3038 }
3039
3040 if (locale == NULL)
3041 {
3042 locale = getenv("LC_CTYPE");
3043 locale_from = "LC_CTYPE";
3044 }
3045
3046 /* If a locale is set, use it to generate the tables the PCRE needs. Otherwise,
3047 pcretables==NULL, which causes the use of default tables. */
3048
3049 if (locale != NULL)
3050 {
3051 if (setlocale(LC_CTYPE, locale) == NULL)
3052 {
3053 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
3054 locale, locale_from);
3055 goto EXIT2;
3056 }
3057 pcretables = pcre_maketables();
3058 }
3059
3060 /* Sort out colouring */
3061
3062 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
3063 {
3064 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
3065 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
3066 else
3067 {
3068 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
3069 colour_option);
3070 goto EXIT2;
3071 }
3072 if (do_colour)
3073 {
3074 char *cs = getenv("PCREGREP_COLOUR");
3075 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
3076 if (cs != NULL) colour_string = cs;
3077 }
3078 }
3079
3080 /* Interpret the newline type; the default settings are Unix-like. */
3081
3082 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
3083 {
3084 pcre_options |= PCRE_NEWLINE_CR;
3085 endlinetype = EL_CR;
3086 }
3087 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
3088 {
3089 pcre_options |= PCRE_NEWLINE_LF;
3090 endlinetype = EL_LF;
3091 }
3092 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
3093 {
3094 pcre_options |= PCRE_NEWLINE_CRLF;
3095 endlinetype = EL_CRLF;
3096 }
3097 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
3098 {
3099 pcre_options |= PCRE_NEWLINE_ANY;
3100 endlinetype = EL_ANY;
3101 }
3102 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
3103 {
3104 pcre_options |= PCRE_NEWLINE_ANYCRLF;
3105 endlinetype = EL_ANYCRLF;
3106 }
3107 else
3108 {
3109 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
3110 goto EXIT2;
3111 }
3112
3113 /* Interpret the text values for -d and -D */
3114
3115 if (dee_option != NULL)
3116 {
3117 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
3118 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
3119 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
3120 else
3121 {
3122 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
3123 goto EXIT2;
3124 }
3125 }
3126
3127 if (DEE_option != NULL)
3128 {
3129 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
3130 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
3131 else
3132 {
3133 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
3134 goto EXIT2;
3135 }
3136 }
3137
3138 /* Check the values for Jeffrey Friedl's debugging options. */
3139
3140 #ifdef JFRIEDL_DEBUG
3141 if (S_arg > 9)
3142 {
3143 fprintf(stderr, "pcregrep: bad value for -S option\n");
3144 return 2;
3145 }
3146 if (jfriedl_XT != 0 || jfriedl_XR != 0)
3147 {
3148 if (jfriedl_XT == 0) jfriedl_XT = 1;
3149 if (jfriedl_XR == 0) jfriedl_XR = 1;
3150 }
3151 #endif
3152
3153 /* Get memory for the main buffer. */
3154
3155 bufsize = 3*bufthird;
3156 main_buffer = (char *)malloc(bufsize);
3157
3158 if (main_buffer == NULL)
3159 {
3160 fprintf(stderr, "pcregrep: malloc failed\n");
3161 goto EXIT2;
3162 }
3163
3164 /* If no patterns were provided by -e, and there are no files provided by -f,
3165 the first argument is the one and only pattern, and it must exist. */
3166
3167 if (patterns == NULL && pattern_files == NULL)
3168 {
3169 if (i >= argc) return usage(2);
3170 patterns = patterns_last = add_pattern(argv[i++], NULL);
3171 if (patterns == NULL) goto EXIT2;
3172 }
3173
3174 /* Compile the patterns that were provided on the command line, either by
3175 multiple uses of -e or as a single unkeyed pattern. We cannot do this until
3176 after all the command-line options are read so that we know which PCRE options
3177 to use. When -F is used, compile_pattern() may add another block into the
3178 chain, so we must not access the next pointer till after the compile. */
3179
3180 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3181 {
3182 if (!compile_pattern(cp, pcre_options, process_options, FALSE, "command-line",
3183 (j == 1 && patterns->next == NULL)? 0 : j))
3184 goto EXIT2;
3185 }
3186
3187 /* Read and compile the regular expressions that are provided in files. */
3188
3189 for (fn = pattern_files; fn != NULL; fn = fn->next)
3190 {
3191 if (!read_pattern_file(fn->name, &patterns, &patterns_last, process_options))
3192 goto EXIT2;
3193 }
3194
3195 /* Study the regular expressions, as we will be running them many times. If an
3196 extra block is needed for a limit, set PCRE_STUDY_EXTRA_NEEDED so that one is
3197 returned, even if studying produces no data. */
3198
3199 if (match_limit > 0 || match_limit_recursion > 0)
3200 study_options |= PCRE_STUDY_EXTRA_NEEDED;
3201
3202 /* Unless JIT has been explicitly disabled, arrange a stack for it to use. */
3203
3204 #ifdef SUPPORT_PCREGREP_JIT
3205 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3206 jit_stack = pcre_jit_stack_alloc(32*1024, 1024*1024);
3207 #endif
3208
3209 for (j = 1, cp = patterns; cp != NULL; j++, cp = cp->next)
3210 {
3211 cp->hint = pcre_study(cp->compiled, study_options, &error);
3212 if (error != NULL)
3213 {
3214 if (patterns->next == NULL)
3215 fprintf(stderr, "pcregrep: Error while studying regex: %s\n", error);
3216 else
3217 fprintf(stderr, "pcregrep: Error while studying regex number %d: %s\n",
3218 j, error);
3219 goto EXIT2;
3220 }
3221 #ifdef SUPPORT_PCREGREP_JIT
3222 if (jit_stack != NULL && cp->hint != NULL)
3223 pcre_assign_jit_stack(cp->hint, NULL, jit_stack);
3224 #endif
3225 }
3226
3227 /* If --match-limit or --recursion-limit was set, put the value(s) into the
3228 pcre_extra block for each pattern. There will always be an extra block because
3229 of the use of PCRE_STUDY_EXTRA_NEEDED above. */
3230
3231 for (cp = patterns; cp != NULL; cp = cp->next)
3232 {
3233 if (match_limit > 0)
3234 {
3235 cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT;
3236 cp->hint->match_limit = match_limit;
3237 }
3238
3239 if (match_limit_recursion > 0)
3240 {
3241 cp->hint->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3242 cp->hint->match_limit_recursion = match_limit_recursion;
3243 }
3244 }
3245
3246 /* If there are include or exclude patterns read from the command line, compile
3247 them. -F, -w, and -x do not apply, so the third argument of compile_pattern is
3248 0. */
3249
3250 for (j = 0; j < 4; j++)
3251 {
3252 int k;
3253 for (k = 1, cp = *(incexlist[j]); cp != NULL; k++, cp = cp->next)
3254 {
3255 if (!compile_pattern(cp, pcre_options, 0, FALSE, incexname[j],
3256 (k == 1 && cp->next == NULL)? 0 : k))
3257 goto EXIT2;
3258 }
3259 }
3260
3261 /* Read and compile include/exclude patterns from files. */
3262
3263 for (fn = include_from; fn != NULL; fn = fn->next)
3264 {
3265 if (!read_pattern_file(fn->name, &include_patterns, &include_patterns_last, 0))
3266 goto EXIT2;
3267 }
3268
3269 for (fn = exclude_from; fn != NULL; fn = fn->next)
3270 {
3271 if (!read_pattern_file(fn->name, &exclude_patterns, &exclude_patterns_last, 0))
3272 goto EXIT2;
3273 }
3274
3275 /* If there are no files that contain lists of files to search, and there are
3276 no file arguments, search stdin, and then exit. */
3277
3278 if (file_lists == NULL && i >= argc)
3279 {
3280 rc = pcregrep(stdin, FR_PLAIN, stdin_name,
3281 (filenames > FN_DEFAULT)? stdin_name : NULL);
3282 goto EXIT;
3283 }
3284
3285 /* If any files that contains a list of files to search have been specified,
3286 read them line by line and search the given files. */
3287
3288 for (fn = file_lists; fn != NULL; fn = fn->next)
3289 {
3290 char buffer[PATBUFSIZE];
3291 FILE *fl;
3292 if (strcmp(fn->name, "-") == 0) fl = stdin; else
3293 {
3294 fl = fopen(fn->name, "rb");
3295 if (fl == NULL)
3296 {
3297 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", fn->name,
3298 strerror(errno));
3299 goto EXIT2;
3300 }
3301 }
3302 while (fgets(buffer, PATBUFSIZE, fl) != NULL)
3303 {
3304 int frc;
3305 char *end = buffer + (int)strlen(buffer);
3306 while (end > buffer && isspace(end[-1])) end--;
3307 *end = 0;
3308 if (*buffer != 0)
3309 {
3310 frc = grep_or_recurse(buffer, dee_action == dee_RECURSE, FALSE);
3311 if (frc > 1) rc = frc;
3312 else if (frc == 0 && rc == 1) rc = 0;
3313 }
3314 }
3315 if (fl != stdin) fclose(fl);
3316 }
3317
3318 /* After handling file-list, work through remaining arguments. Pass in the fact
3319 that there is only one argument at top level - this suppresses the file name if
3320 the argument is not a directory and filenames are not otherwise forced. */
3321
3322 only_one_at_top = i == argc - 1 && file_lists == NULL;
3323
3324 for (; i < argc; i++)
3325 {
3326 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
3327 only_one_at_top);
3328 if (frc > 1) rc = frc;
3329 else if (frc == 0 && rc == 1) rc = 0;
3330 }
3331
3332 EXIT:
3333 #ifdef SUPPORT_PCREGREP_JIT
3334 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3335 #endif
3336
3337 free(main_buffer);
3338 free((void *)pcretables);
3339
3340 free_pattern_chain(patterns);
3341 free_pattern_chain(include_patterns);
3342 free_pattern_chain(include_dir_patterns);
3343 free_pattern_chain(exclude_patterns);
3344 free_pattern_chain(exclude_dir_patterns);
3345
3346 free_file_chain(exclude_from);
3347 free_file_chain(include_from);
3348 free_file_chain(pattern_files);
3349 free_file_chain(file_lists);
3350
3351 while (only_matching != NULL)
3352 {
3353 omstr *this = only_matching;
3354 only_matching = this->next;
3355 free(this);
3356 }
3357
3358 pcregrep_exit(rc);
3359
3360 EXIT2:
3361 rc = 2;
3362 goto EXIT;
3363 }
3364
3365 /* End of pcregrep */