"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "src/pcre2grep.c" between
pcre2-10.35.tar.bz2 and pcre2-10.36.tar.bz2

About: The PCRE2 library implements Perl compatible regular expression pattern matching. New future PCRE version with revised API.

pcre2grep.c  (pcre2-10.35.tar.bz2):pcre2grep.c  (pcre2-10.36.tar.bz2)
skipping to change at line 167 skipping to change at line 167
/* Actions for special processing options (flag bits) */ /* Actions for special processing options (flag bits) */
#define PO_WORD_MATCH 0x0001 #define PO_WORD_MATCH 0x0001
#define PO_LINE_MATCH 0x0002 #define PO_LINE_MATCH 0x0002
#define PO_FIXED_STRINGS 0x0004 #define PO_FIXED_STRINGS 0x0004
/* Binary file options */ /* Binary file options */
enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT }; enum { BIN_BINARY, BIN_NOMATCH, BIN_TEXT };
/* Return values from decode_dollar_escape() */
enum { DDE_ERROR, DDE_CAPTURE, DDE_CHAR };
/* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
environments), a warning is issued if the value of fwrite() is ignored. environments), a warning is issued if the value of fwrite() is ignored.
Unfortunately, casting to (void) does not suppress the warning. To get round Unfortunately, casting to (void) does not suppress the warning. To get round
this, we use a macro that compiles a fudge. Oddly, this does not also seem to this, we use a macro that compiles a fudge. Oddly, this does not also seem to
apply to fprintf(). */ apply to fprintf(). */
#define FWRITE_IGNORE(a,b,c,d) if (fwrite(a,b,c,d)) {} #define FWRITE_IGNORE(a,b,c,d) if (fwrite(a,b,c,d)) {}
/* Under Windows, we have to set stdout to be binary, so that it does not /* Under Windows, we have to set stdout to be binary, so that it does not
convert \r\n at the ends of output lines to \r\r\n. However, that means that convert \r\n at the ends of output lines to \r\r\n. However, that means that
any messages written to stdout must have \r\n as their line terminator. This is any messages written to stdout must have \r\n as their line terminator. This is
handled by using STDOUT_NL as the newline string. We also use a normal double handled by using STDOUT_NL as the newline string. We also use a normal double
quote for the example, as single quotes aren't usually available. */ quote for the example, as single quotes aren't usually available. */
#ifdef WIN32 #ifdef WIN32
#define STDOUT_NL "\r\n" #define STDOUT_NL "\r\n"
#define QUOT "\"" #define STDOUT_NL_LEN 2
#define QUOT "\""
#else #else
#define STDOUT_NL "\n" #define STDOUT_NL "\n"
#define QUOT "'" #define STDOUT_NL_LEN 1
#define QUOT "'"
#endif #endif
/* This code is returned from decode_dollar_escape() when $n is encountered,
and used to mean "output STDOUT_NL". It is, of course, not a valid Unicode code
point. */
#define STDOUT_NL_CODE 0x7fffffffu
/************************************************* /*************************************************
* Global variables * * Global variables *
*************************************************/ *************************************************/
/* Jeffrey Friedl has some debugging requirements that are not part of the /* Jeffrey Friedl has some debugging requirements that are not part of the
regular code. */ regular code. */
#ifdef JFRIEDL_DEBUG #ifdef JFRIEDL_DEBUG
static int S_arg = -1; static int S_arg = -1;
static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */ static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
skipping to change at line 225 skipping to change at line 237
static int after_context = 0; static int after_context = 0;
static int before_context = 0; static int before_context = 0;
static int binary_files = BIN_BINARY; static int binary_files = BIN_BINARY;
static int both_context = 0; static int both_context = 0;
static int bufthird = PCRE2GREP_BUFSIZE; static int bufthird = PCRE2GREP_BUFSIZE;
static int max_bufthird = PCRE2GREP_MAX_BUFSIZE; static int max_bufthird = PCRE2GREP_MAX_BUFSIZE;
static int bufsize = 3*PCRE2GREP_BUFSIZE; static int bufsize = 3*PCRE2GREP_BUFSIZE;
static int endlinetype; static int endlinetype;
static unsigned long int total_count = 0; static int count_limit = -1; /* Not long, so that it works with OP_NUMBER */
static unsigned long int counts_printed = 0; static unsigned long int counts_printed = 0;
static unsigned long int total_count = 0;
#ifdef WIN32 #ifdef WIN32
static int dee_action = dee_SKIP; static int dee_action = dee_SKIP;
#else #else
static int dee_action = dee_READ; static int dee_action = dee_READ;
#endif #endif
static int DEE_action = DEE_READ; static int DEE_action = DEE_READ;
static int error_count = 0; static int error_count = 0;
static int filenames = FN_DEFAULT; static int filenames = FN_DEFAULT;
skipping to change at line 278 skipping to change at line 291
static BOOL line_offsets = FALSE; static BOOL line_offsets = FALSE;
static BOOL multiline = FALSE; static BOOL multiline = FALSE;
static BOOL number = FALSE; static BOOL number = FALSE;
static BOOL omit_zero_count = FALSE; static BOOL omit_zero_count = FALSE;
static BOOL resource_error = FALSE; static BOOL resource_error = FALSE;
static BOOL quiet = FALSE; static BOOL quiet = FALSE;
static BOOL show_total_count = FALSE; static BOOL show_total_count = FALSE;
static BOOL silent = FALSE; static BOOL silent = FALSE;
static BOOL utf = FALSE; static BOOL utf = FALSE;
static uint8_t utf8_buffer[8];
/* Structure for list of --only-matching capturing numbers. */ /* Structure for list of --only-matching capturing numbers. */
typedef struct omstr { typedef struct omstr {
struct omstr *next; struct omstr *next;
int groupnum; int groupnum;
} omstr; } omstr;
static omstr *only_matching = NULL; static omstr *only_matching = NULL;
static omstr *only_matching_last = NULL; static omstr *only_matching_last = NULL;
static int only_matching_count; static int only_matching_count;
skipping to change at line 444 skipping to change at line 459
{ OP_NODATA, 'L', NULL, "files-without-match","print onl y FILE names not containing matches" }, { OP_NODATA, 'L', NULL, "files-without-match","print onl y FILE names not containing matches" },
{ OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for s tandard input" }, { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for s tandard input" },
{ OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffe ring" }, { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffe ring" },
{ OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line nu mbers and offsets, not text" }, { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line nu mbers and offsets, not text" },
{ OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" }, { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
{ OP_SIZE, N_H_LIMIT, &heap_limit, "heap-limit=number", "set PCRE2 heap limit option (kibibytes)" }, { OP_SIZE, N_H_LIMIT, &heap_limit, "heap-limit=number", "set PCRE2 heap limit option (kibibytes)" },
{ OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE2 match limit option" }, { OP_U32NUMBER, N_M_LIMIT, &match_limit, "match-limit=number", "set PCRE2 match limit option" },
{ OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" }, { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "depth-limit=number", "set PCRE2 depth limit option" },
{ OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsol ete synonym for depth-limit" }, { OP_U32NUMBER, N_M_LIMIT_DEP, &depth_limit, "recursion-limit=number", "obsol ete synonym for depth-limit" },
{ OP_NODATA, 'M', NULL, "multiline", "run in multili ne mode" }, { OP_NODATA, 'M', NULL, "multiline", "run in multili ne mode" },
{ OP_NUMBER, 'm', &count_limit, "max-count=number", "stop after <number> matched lines" },
{ OP_STRING, 'N', &newline_arg, "newline=type", "set newline ty pe (CR, LF, CRLF, ANYCRLF, ANY, or NUL)" }, { OP_STRING, 'N', &newline_arg, "newline=type", "set newline ty pe (CR, LF, CRLF, ANYCRLF, ANY, or NUL)" },
{ OP_NODATA, 'n', NULL, "line-number", "print line num ber with output lines" }, { OP_NODATA, 'n', NULL, "line-number", "print line num ber with output lines" },
#ifdef SUPPORT_PCRE2GREP_JIT #ifdef SUPPORT_PCRE2GREP_JIT
{ OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use jus t-in-time compiler optimization" }, { OP_NODATA, N_NOJIT, NULL, "no-jit", "do not use jus t-in-time compiler optimization" },
#else #else
{ OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" }, { OP_NODATA, N_NOJIT, NULL, "no-jit", "ignored: this pcre2grep does not support JIT" },
#endif #endif
{ OP_STRING, 'O', &output_text, "output=text", "show only thi s text (possibly expanded)" }, { OP_STRING, 'O', &output_text, "output=text", "show only thi s text (possibly expanded)" },
{ OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" }, { OP_OP_NUMBERS, 'o', &only_matching_data, "only-matching=n", "show only the part of the line that matched" },
{ OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set sepa rator for multiple -o output" }, { OP_STRING, N_OM_SEPARATOR, &om_separator, "om-separator=text", "set sepa rator for multiple -o output" },
skipping to change at line 483 skipping to change at line 499
{ OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" }, { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
{ OP_NODATA, 0, NULL, NULL, NULL } { OP_NODATA, 0, NULL, NULL, NULL }
}; };
/* Table of names for newline types. Must be kept in step with the definitions /* Table of names for newline types. Must be kept in step with the definitions
of PCRE2_NEWLINE_xx in pcre2.h. */ of PCRE2_NEWLINE_xx in pcre2.h. */
static const char *newlines[] = { static const char *newlines[] = {
"DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" }; "DEFAULT", "CR", "LF", "CRLF", "ANY", "ANYCRLF", "NUL" };
/* UTF-8 tables - used only when the newline setting is "any". */ /* UTF-8 tables */
const int utf8_table1[] =
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
const int utf8_table1_size = sizeof(utf8_table1) / sizeof(int);
const int utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
const char utf8_table4[] = { const char utf8_table4[] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
#if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE) #if !defined(VPCOMPAT) && !defined(HAVE_MEMMOVE)
/************************************************* /*************************************************
skipping to change at line 531 skipping to change at line 552
for (i = 0; i < n; ++i) *dest++ = *src++; for (i = 0; i < n; ++i) *dest++ = *src++;
return (void *)(dest - n); return (void *)(dest - n);
} }
#endif /* not HAVE_BCOPY */ #endif /* not HAVE_BCOPY */
} }
#undef memmove #undef memmove
#define memmove(d,s,n) emulated_memmove(d,s,n) #define memmove(d,s,n) emulated_memmove(d,s,n)
#endif /* not VPCOMPAT && not HAVE_MEMMOVE */ #endif /* not VPCOMPAT && not HAVE_MEMMOVE */
/************************************************* /*************************************************
* Convert code point to UTF-8 *
*************************************************/
/* A static buffer is used. Returns the number of bytes. */
static int
ord2utf8(uint32_t value)
{
int i, j;
uint8_t *utf8bytes = utf8_buffer;
for (i = 0; i < utf8_table1_size; i++)
if (value <= (uint32_t)utf8_table1[i]) break;
utf8bytes += i;
for (j = i; j > 0; j--)
{
*utf8bytes-- = 0x80 | (value & 0x3f);
value >>= 6;
}
*utf8bytes = utf8_table2[i] | value;
return i + 1;
}
/*************************************************
* Case-independent string compare * * Case-independent string compare *
*************************************************/ *************************************************/
static int static int
strcmpic(const char *str1, const char *str2) strcmpic(const char *str1, const char *str2)
{ {
unsigned int c1, c2; unsigned int c1, c2;
while (*str1 != '\0' || *str2 != '\0') while (*str1 != '\0' || *str2 != '\0')
{ {
c1 = tolower(*str1++); c1 = tolower(*str1++);
skipping to change at line 1743 skipping to change at line 1787
int i; int i;
PCRE2_SIZE slen = length; PCRE2_SIZE slen = length;
patstr *p = patterns; patstr *p = patterns;
const char *msg = "this text:\n\n"; const char *msg = "this text:\n\n";
if (slen > 200) if (slen > 200)
{ {
slen = 200; slen = 200;
msg = "text that starts:\n\n"; msg = "text that starts:\n\n";
} }
for (i = 1; p != NULL; p = p->next, i++) for (i = 1; p != NULL; p = p->next, i++)
{ {
*mrc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, (int)length, *mrc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, (int)length,
startoffset, options, match_data, match_context); startoffset, options, match_data, match_context);
if (*mrc >= 0) return TRUE; if (*mrc >= 0) return TRUE;
if (*mrc == PCRE2_ERROR_NOMATCH) continue; if (*mrc == PCRE2_ERROR_NOMATCH) continue;
fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", *mrc ); fprintf(stderr, "pcre2grep: pcre2_match() gave error %d while matching ", *mrc );
if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i); if (patterns->next != NULL) fprintf(stderr, "pattern number %d to ", i);
fprintf(stderr, "%s", msg); fprintf(stderr, "%s", msg);
FWRITE_IGNORE(matchptr, 1, slen, stderr); /* In case binary zero included */ FWRITE_IGNORE(matchptr, 1, slen, stderr); /* In case binary zero included */
skipping to change at line 1778 skipping to change at line 1823
fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n"); fprintf(stderr, "pcre2grep: Too many errors - abandoned.\n");
pcre2grep_exit(2); pcre2grep_exit(2);
} }
return invert; /* No more matching; don't show the line again */ return invert; /* No more matching; don't show the line again */
} }
return FALSE; /* No match, no errors */ return FALSE; /* No match, no errors */
} }
/************************************************* /*************************************************
* Check output text for errors * * Decode dollar escape sequence *
*************************************************/ *************************************************/
static BOOL /* Called from various places to decode $ escapes in output strings. The escape
syntax_check_output_text(PCRE2_SPTR string, BOOL callout) sequences are as follows:
$<digits> or ${<digits>} returns a capture number. However, if callout is TRUE,
zero is never returned; '0' is substituted.
$a returns bell.
$b returns backspace.
$e returns escape.
$f returns form feed.
$n returns newline.
$r returns carriage return.
$t returns tab.
$v returns vertical tab.
$o<digits> returns the character represented by the given octal
number; up to three digits are processed.
$o{<digits>} does the same, up to 7 digits, but gives an error for mode-invalid
code points.
$x<digits> returns the character represented by the given hexadecimal
number; up to two digits are processed.
$x{<digits} does the same, up to 6 digits, but gives an error for mode-invalid
code points.
Any other character is substituted by itself. E.g: $$ is replaced by a single
dollar.
Arguments:
begin the start of the whole string
string points to the $
callout TRUE if in a callout (inhibits error messages)
value where to return a value
last where to return pointer to the last used character
Returns: DDE_ERROR after a syntax error
DDE_CAPTURE if *value is a capture number
DDE_CHAR if *value is a character code
*/
static int
decode_dollar_escape(PCRE2_SPTR begin, PCRE2_SPTR string, BOOL callout,
uint32_t *value, PCRE2_SPTR *last)
{ {
PCRE2_SPTR begin = string; uint32_t c = 0;
for (; *string != 0; string++) int base = 10;
{ int dcount;
if (*string == '$') int rc = DDE_CHAR;
BOOL brace = FALSE;
switch (*(++string))
{
case 0: /* Syntax error: a character must be present after $. */
if (!callout)
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
(int)(string - begin), "no character after $");
*last = string;
return DDE_ERROR;
case '{':
brace = TRUE;
string++;
if (!isdigit(*string)) /* Syntax error: a decimal number required. */
{ {
PCRE2_SIZE capture_id = 0; if (!callout)
BOOL brace = FALSE; fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
(int)(string - begin), "decimal number expected");
rc = DDE_ERROR;
break;
}
/* Fall through */
/* The maximum capture number is 65535, so any number greater than that will
always be an unknown capture number. We just stop incrementing, in order to
avoid overflow. */
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
do
{
if (c <= 65535) c = c * 10 + (*string - '0');
string++; string++;
}
while (*string >= '0' && *string <= '9');
string--; /* Point to last digit */
/* Syntax error: a character must be present after $. */ /* In a callout, capture number 0 is not available. No error can be given,
if (*string == 0) so just return the character '0'. */
{
if (!callout)
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
(int)(string - begin), "no character after $");
return FALSE;
}
if (*string == '{') if (callout && c == 0)
{ {
/* Must be a decimal number in braces, e.g: {5} or {38} */ *value = '0';
string++; }
else
{
*value = c;
rc = DDE_CAPTURE;
}
break;
brace = TRUE; /* Limit octal numbers to 3 digits without braces, or up to 7 with braces,
} for valid Unicode code points. */
if ((*string >= '1' && *string <= '9') || (!callout && *string == '0')) case 'o':
{ base = 8;
do string++;
{ if (*string == '{')
/* Maximum capture id is 65535. */ {
if (capture_id <= 65535) brace = TRUE;
capture_id = capture_id * 10 + (*string - '0'); string++;
dcount = 7;
}
else dcount = 3;
for (; dcount > 0; dcount--)
{
if (*string < '0' || *string > '7') break;
c = c * 8 + (*string++ - '0');
}
*value = c;
string--; /* Point to last digit */
break;
string++; /* Limit hex numbers to 2 digits without braces, or up to 6 with braces,
} for valid Unicode code points. */
while (*string >= '0' && *string <= '9');
case 'x':
base = 16;
string++;
if (*string == '{')
{
brace = TRUE;
string++;
dcount = 6;
}
else dcount = 2;
for (; dcount > 0; dcount--)
{
if (!isxdigit(*string)) break;
if (*string >= '0' && *string <= '9')
c = c *16 + *string++ - '0';
else
c = c * 16 + (*string++ | 0x20) - 'a' + 10;
}
*value = c;
string--; /* Point to last digit */
break;
if (brace) case 'a': *value = '\a'; break;
case 'b': *value = '\b'; break;
#ifndef EBCDIC
case 'e': *value = '\033'; break;
#else
case 'e': *value = '\047'; break;
#endif
case 'f': *value = '\f'; break;
case 'n': *value = STDOUT_NL_CODE; break;
case 'r': *value = '\r'; break;
case 't': *value = '\t'; break;
case 'v': *value = '\v'; break;
default: *value = *string; break;
}
if (brace)
{
c = string[1];
if (c != '}')
{
rc = DDE_ERROR;
if (!callout)
{
if ((base == 8 && c >= '0' && c <= '7') ||
(base == 16 && isxdigit(c)))
{ {
/* Syntax error: closing brace is missing. */ fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
if (*string != '}') "too many %s digits\n", (int)(string - begin),
{ (base == 8)? "octal" : "hex");
if (!callout)
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n"
,
(int)(string - begin), "missing closing brace");
return FALSE;
}
} }
else else
{ {
/* To negate the effect of the for. */
string--;
}
}
else if (brace)
{
/* Syntax error: a decimal number required. */
if (!callout)
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n", fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
(int)(string - begin), "decimal number expected"); (int)(string - begin), "missing closing brace");
return FALSE;
}
else if (*string == 'o')
{
string++;
if (*string < '0' || *string > '7')
{
/* Syntax error: an octal number required. */
if (!callout)
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n",
(int)(string - begin), "octal number expected");
return FALSE;
} }
} }
else if (*string == 'x') }
{ else string++;
string++; }
if (!isxdigit((unsigned char)*string)) /* Check maximum code point values, but take note of STDOUT_NL_CODE. */
{
/* Syntax error: a hexdecimal number required. */ if (rc == DDE_CHAR && *value != STDOUT_NL_CODE)
if (!callout) {
fprintf(stderr, "pcre2grep: Error in output text at offset %d: %s\n", uint32_t max = utf? 0x0010ffffu : 0xffu;
(int)(string - begin), "hexadecimal number expected"); if (*value > max)
return FALSE; {
} if (!callout)
} fprintf(stderr, "pcre2grep: Error in output text at offset %d: "
"code point greater than 0x%x is invalid\n", (int)(string - begin), max)
;
rc = DDE_ERROR;
} }
} }
return TRUE; *last = string;
return rc;
} }
/************************************************* /*************************************************
* Display output text * * Check output text for errors *
*************************************************/ *************************************************/
/* Display the output text, which is assumed to have already been syntax /* Called early, to get errors before doing anything for -O text; also called
checked. Output may contain escape sequences started by the dollar sign. The from callouts to check before outputting.
escape sequences are substituted as follows:
$<digits> or ${<digits>} is replaced by the captured substring of the given Arguments:
decimal number; zero will substitute the whole match. If the number is string an --output text string
greater than the number of capturing substrings, or if the capture is unset, callout TRUE if in a callout (stops printing errors)
the replacement is empty.
$a is replaced by bell.
$b is replaced by backspace.
$e is replaced by escape.
$f is replaced by form feed.
$n is replaced by newline.
$r is replaced by carriage return.
$t is replaced by tab.
$v is replaced by vertical tab.
$o<digits> is replaced by the character represented by the given octal Returns: TRUE if OK, FALSE on error
number; up to three digits are processed. */
$x<digits> is replaced by the character represented by the given hexadecimal static BOOL
number; up to two digits are processed. syntax_check_output_text(PCRE2_SPTR string, BOOL callout)
{
uint32_t value;
PCRE2_SPTR begin = string;
Any other character is substituted by itself. E.g: $$ is replaced by a single for (; *string != 0; string++)
dollar. {
if (*string == '$' &&
decode_dollar_escape(begin, string, callout, &value, &string) == DDE_ERROR)
return FALSE;
}
return TRUE;
}
/*************************************************
* Display output text *
*************************************************/
/* Display the output text, which is assumed to have already been syntax
checked. Output may contain escape sequences started by the dollar sign.
Arguments: Arguments:
string: the output text string: the output text
callout: TRUE for the builtin callout, FALSE for --output callout: TRUE for the builtin callout, FALSE for --output
subject the start of the subject subject the start of the subject
ovector: capture offsets ovector: capture offsets
capture_top: number of captures capture_top: number of captures
Returns: TRUE if something was output, other than newline Returns: TRUE if something was output, other than newline
FALSE if nothing was output, or newline was last output FALSE if nothing was output, or newline was last output
*/ */
static BOOL static BOOL
display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject, display_output_text(PCRE2_SPTR string, BOOL callout, PCRE2_SPTR subject,
PCRE2_SIZE *ovector, PCRE2_SIZE capture_top) PCRE2_SIZE *ovector, PCRE2_SIZE capture_top)
{ {
uint32_t value;
BOOL printed = FALSE; BOOL printed = FALSE;
PCRE2_SPTR begin = string;
for (; *string != 0; string++) for (; *string != 0; string++)
{ {
int ch = EOF;
if (*string == '$') if (*string == '$')
{ {
PCRE2_SIZE capture_id = 0; switch(decode_dollar_escape(begin, string, callout, &value, &string))
BOOL brace = FALSE;
string++;
if (*string == '{')
{ {
/* Must be a decimal number in braces, e.g: {5} or {38} */ case DDE_CHAR:
string++; if (value == STDOUT_NL_CODE)
brace = TRUE;
}
if ((*string >= '1' && *string <= '9') || (!callout && *string == '0'))
{
do
{
/* Maximum capture id is 65535. */
if (capture_id <= 65535)
capture_id = capture_id * 10 + (*string - '0');
string++;
}
while (*string >= '0' && *string <= '9');
if (!brace)
{ {
/* To negate the effect of the for. */ fprintf(stdout, STDOUT_NL);
string--; printed = FALSE;
continue;
} }
break; /* Will print value */
if (capture_id < capture_top) case DDE_CAPTURE:
if (value < capture_top)
{ {
PCRE2_SIZE capturesize; PCRE2_SIZE capturesize;
capture_id *= 2; value *= 2;
capturesize = ovector[value + 1] - ovector[value];
capturesize = ovector[capture_id + 1] - ovector[capture_id];
if (capturesize > 0) if (capturesize > 0)
{ {
print_match(subject + ovector[capture_id], capturesize); print_match(subject + ovector[value], capturesize);
printed = TRUE; printed = TRUE;
} }
} }
} continue;
else if (*string == 'a') ch = '\a';
else if (*string == 'b') ch = '\b';
#ifndef EBCDIC
else if (*string == 'e') ch = '\033';
#else
else if (*string == 'e') ch = '\047';
#endif
else if (*string == 'f') ch = '\f';
else if (*string == 'r') ch = '\r';
else if (*string == 't') ch = '\t';
else if (*string == 'v') ch = '\v';
else if (*string == 'n')
{
fprintf(stdout, STDOUT_NL);
printed = FALSE;
}
else if (*string == 'o')
{
string++;
ch = *string - '0';
if (string[1] >= '0' && string[1] <= '7')
{
string++;
ch = ch * 8 + (*string - '0');
}
if (string[1] >= '0' && string[1] <= '7')
{
string++;
ch = ch * 8 + (*string - '0');
}
}
else if (*string == 'x')
{
string++;
if (*string >= '0' && *string <= '9') default: /* Should not occur */
ch = *string - '0'; break;
else
ch = (*string | 0x20) - 'a' + 10;
if (isxdigit((unsigned char)string[1]))
{
string++;
ch *= 16;
if (*string >= '0' && *string <= '9')
ch += *string - '0';
else
ch += (*string | 0x20) - 'a' + 10;
}
}
else
{
ch = *string;
} }
} }
else
{ else value = *string; /* Not a $ escape */
ch = *string;
} if (utf && value <= 127) fprintf(stdout, "%c", *string); else
if (ch != EOF)
{ {
fprintf(stdout, "%c", ch); int i;
printed = TRUE; int n = ord2utf8(value);
for (i = 0; i < n; i++) fputc(utf8_buffer[i], stdout);
} }
printed = TRUE;
} }
return printed; return printed;
} }
#ifdef SUPPORT_PCRE2GREP_CALLOUT #ifdef SUPPORT_PCRE2GREP_CALLOUT
/************************************************* /*************************************************
* Parse and execute callout scripts * * Parse and execute callout scripts *
*************************************************/ *************************************************/
skipping to change at line 2118 skipping to change at line 2207
char **argsvector; char **argsvector;
char **argsvectorptr; char **argsvectorptr;
#ifndef WIN32 #ifndef WIN32
pid_t pid; pid_t pid;
#endif #endif
int result = 0; int result = 0;
#endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */ #endif /* SUPPORT_PCRE2GREP_CALLOUT_FORK */
(void)unused; /* Avoid compiler warning */ (void)unused; /* Avoid compiler warning */
/* Only callout with strings are supported. */ /* Only callouts with strings are supported. */
if (string == NULL || length == 0) return 0; if (string == NULL || length == 0) return 0;
/* If there's no command, output the remainder directly. */ /* If there's no command, output the remainder directly. */
if (*string == '|') if (*string == '|')
{ {
string++; string++;
if (!syntax_check_output_text(string, TRUE)) return 0; if (!syntax_check_output_text(string, TRUE)) return 0;
(void)display_output_text(string, TRUE, subject, ovector, capture_top); (void)display_output_text(string, TRUE, subject, ovector, capture_top);
return 0; return 0;
} }
#ifndef SUPPORT_PCRE2GREP_CALLOUT_FORK #ifndef SUPPORT_PCRE2GREP_CALLOUT_FORK
return 0; return 0;
#else #else
/* Checking syntax and compute the number of string fragments. Callout strings /* Checking syntax and compute the number of string fragments. Callout strings
are ignored in case of a syntax error. */ are silently ignored in the event of a syntax error. */
while (length > 0) while (length > 0)
{ {
if (*string == '|') if (*string == '|')
{ {
argsvectorlen++; argsvectorlen++;
if (argsvectorlen > 10000) return 0; /* Too many args */
/* Maximum 10000 arguments allowed. */
if (argsvectorlen > 10000) return 0;
} }
else if (*string == '$') else if (*string == '$')
{ {
PCRE2_SIZE capture_id = 0; uint32_t value;
PCRE2_SPTR begin = string;
string++;
length--;
/* Syntax error: a character must be present after $. */ switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
if (length == 0) return 0;
if (*string >= '1' && *string <= '9')
{ {
do case DDE_CAPTURE:
if (value < capture_top)
{ {
/* Maximum capture id is 65535. */ value *= 2;
if (capture_id <= 65535) argslen += ovector[value + 1] - ovector[value];
capture_id = capture_id * 10 + (*string - '0');
string++;
length--;
} }
while (length > 0 && *string >= '0' && *string <= '9'); argslen--; /* Negate the effect of argslen++ below. */
break;
/* To negate the effect of string++ below. */
string--;
length++;
}
else if (*string == '{')
{
/* Must be a decimal number in braces, e.g: {5} or {38} */
string++;
length--;
/* Syntax error: a decimal number required. */
if (length == 0) return 0;
if (*string < '1' || *string > '9') return 0;
do
{
/* Maximum capture id is 65535. */
if (capture_id <= 65535)
capture_id = capture_id * 10 + (*string - '0');
string++;
length--;
/* Syntax error: no more characters */ case DDE_CHAR:
if (length == 0) return 0; if (value == STDOUT_NL_CODE) argslen += STDOUT_NL_LEN - 1;
} else if (utf && value > 127) argslen += ord2utf8(value) - 1;
while (*string >= '0' && *string <= '9'); break;
/* Syntax error: closing brace is missing. */ default: /* Should not occur */
if (*string != '}') return 0; case DDE_ERROR:
return 0;
} }
if (capture_id > 0) length -= (string - begin);
{
if (capture_id < capture_top)
{
capture_id *= 2;
argslen += ovector[capture_id + 1] - ovector[capture_id];
}
/* To negate the effect of argslen++ below. */
argslen--;
}
} }
string++; string++;
length--; length--;
argslen++; argslen++;
} }
/* Get memory for the argument vector and its strings. */
args = (char*)malloc(argslen); args = (char*)malloc(argslen);
if (args == NULL) return 0; if (args == NULL) return 0;
argsvector = (char**)malloc(argsvectorlen * sizeof(char*)); argsvector = (char**)malloc(argsvectorlen * sizeof(char*));
if (argsvector == NULL) if (argsvector == NULL)
{ {
free(args); free(args);
return 0; return 0;
} }
/* Now reprocess the string and set up the arguments. */
argsptr = args; argsptr = args;
argsvectorptr = argsvector; argsvectorptr = argsvector;
*argsvectorptr++ = argsptr; *argsvectorptr++ = argsptr;
length = calloutptr->callout_string_length; length = calloutptr->callout_string_length;
string = calloutptr->callout_string; string = calloutptr->callout_string;
while (length > 0) while (length > 0)
{ {
if (*string == '|') if (*string == '|')
{ {
*argsptr++ = '\0'; *argsptr++ = '\0';
*argsvectorptr++ = argsptr; *argsvectorptr++ = argsptr;
} }
else if (*string == '$') else if (*string == '$')
{ {
string++; uint32_t value;
length--; PCRE2_SPTR begin = string;
if ((*string >= '1' && *string <= '9') || *string == '{') switch (decode_dollar_escape(begin, string, TRUE, &value, &string))
{ {
PCRE2_SIZE capture_id = 0; case DDE_CAPTURE:
if (value < capture_top)
if (*string != '{')
{ {
do PCRE2_SIZE capturesize;
{ value *= 2;
/* Maximum capture id is 65535. */ capturesize = ovector[value + 1] - ovector[value];
if (capture_id <= 65535) memcpy(argsptr, subject + ovector[value], capturesize);
capture_id = capture_id * 10 + (*string - '0'); argsptr += capturesize;
}
string++; break;
length--;
}
while (length > 0 && *string >= '0' && *string <= '9');
/* To negate the effect of string++ below. */ case DDE_CHAR:
string--; if (value == STDOUT_NL_CODE)
length++; {
memcpy(argsptr, STDOUT_NL, STDOUT_NL_LEN);
argsptr += STDOUT_NL_LEN;
}
else if (utf && value > 127)
{
int n = ord2utf8(value);
memcpy(argsptr, utf8_buffer, n);
argsptr += n;
} }
else else
{ {
string++; *argsptr++ = value;
length--;
do
{
/* Maximum capture id is 65535. */
if (capture_id <= 65535)
capture_id = capture_id * 10 + (*string - '0');
string++;
length--;
}
while (*string != '}');
} }
break;
if (capture_id < capture_top) default: /* Even though this should not occur, the string having *
{ /
PCRE2_SIZE capturesize; case DDE_ERROR: /* been checked above, we need to include the free() */
capture_id *= 2; free(args); /* calls so that source checkers do not complain. */
free(argsvector);
capturesize = ovector[capture_id + 1] - ovector[capture_id]; return 0;
memcpy(argsptr, subject + ovector[capture_id], capturesize);
argsptr += capturesize;
}
}
else
{
*argsptr++ = *string;
} }
length -= (string - begin);
} }
else
{ else *argsptr++ = *string;
*argsptr++ = *string;
} /* Advance along the string */
string++; string++;
length--; length--;
} }
*argsptr++ = '\0'; *argsptr++ = '\0';
*argsvectorptr = NULL; *argsvectorptr = NULL;
/* Running an external command is system-dependent. Handle Windows and VMS as /* Running an external command is system-dependent. Handle Windows and VMS as
necessary, otherwise assume fork(). */ necessary, otherwise assume fork(). */
skipping to change at line 2427 skipping to change at line 2467
*/ */
static int static int
pcre2grep(void *handle, int frtype, const char *filename, const char *printname) pcre2grep(void *handle, int frtype, const char *filename, const char *printname)
{ {
int rc = 1; int rc = 1;
int filepos = 0; int filepos = 0;
unsigned long int linenumber = 1; unsigned long int linenumber = 1;
unsigned long int lastmatchnumber = 0; unsigned long int lastmatchnumber = 0;
unsigned long int count = 0; unsigned long int count = 0;
long int count_matched_lines = 0;
char *lastmatchrestart = main_buffer; char *lastmatchrestart = main_buffer;
char *ptr = main_buffer; char *ptr = main_buffer;
char *endptr; char *endptr;
PCRE2_SIZE bufflength; PCRE2_SIZE bufflength;
BOOL binary = FALSE; BOOL binary = FALSE;
BOOL endhyphenpending = FALSE; BOOL endhyphenpending = FALSE;
BOOL lines_printed = FALSE; BOOL lines_printed = FALSE;
BOOL input_line_buffered = line_buffered; BOOL input_line_buffered = line_buffered;
FILE *in = NULL; /* Ensure initialized */ FILE *in = NULL; /* Ensure initialized */
skipping to change at line 2453 skipping to change at line 2494
{ {
in = (FILE *)handle; in = (FILE *)handle;
if (is_file_tty(in)) input_line_buffered = TRUE; if (is_file_tty(in)) input_line_buffered = TRUE;
} }
else input_line_buffered = FALSE; else input_line_buffered = FALSE;
bufflength = fill_buffer(handle, frtype, main_buffer, bufsize, bufflength = fill_buffer(handle, frtype, main_buffer, bufsize,
input_line_buffered); input_line_buffered);
#ifdef SUPPORT_LIBBZ2 #ifdef SUPPORT_LIBBZ2
if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2; /* Gotcha: bufflengt h is PCRE2_SIZE; */ if (frtype == FR_LIBBZ2 && (int)bufflength < 0) return 2; /* Gotcha: bufflengt h is PCRE2_SIZE */
#endif #endif
endptr = main_buffer + bufflength; endptr = main_buffer + bufflength;
/* Unless binary-files=text, see if we have a binary file. This uses the same /* Unless binary-files=text, see if we have a binary file. This uses the same
rule as GNU grep, namely, a search for a binary zero byte near the start of the rule as GNU grep, namely, a search for a binary zero byte near the start of the
file. However, when the newline convention is binary zero, we can't do this. */ file. However, when the newline convention is binary zero, we can't do this. */
if (binary_files != BIN_TEXT) if (binary_files != BIN_TEXT)
{ {
skipping to change at line 2481 skipping to change at line 2522
files, endptr will be at the end of the buffer when we are in the middle of the files, endptr will be at the end of the buffer when we are in the middle of the
file, but ptr will never get there, because as soon as it gets over 2/3 of the file, but ptr will never get there, because as soon as it gets over 2/3 of the
way, the buffer is shifted left and re-filled. */ way, the buffer is shifted left and re-filled. */
while (ptr < endptr) while (ptr < endptr)
{ {
int endlinelength; int endlinelength;
int mrc = 0; int mrc = 0;
unsigned int options = 0; unsigned int options = 0;
BOOL match; BOOL match;
BOOL line_matched = FALSE;
char *t = ptr; char *t = ptr;
PCRE2_SIZE length, linelength; PCRE2_SIZE length, linelength;
PCRE2_SIZE startoffset = 0; PCRE2_SIZE startoffset = 0;
/* If the -m option set a limit for the number of matched or non-matched
lines, check it here. A limit of zero means that no matching is ever done.
For stdin from a file, set the file position. */
if (count_limit >= 0 && count_matched_lines >= count_limit)
{
if (frtype == FR_PLAIN && filename == stdin_name && !is_file_tty(handle))
(void)fseek(handle, (long int)filepos, SEEK_SET);
rc = (count_limit == 0)? 1 : 0;
break;
}
/* At this point, ptr is at the start of a line. We need to find the length /* At this point, ptr is at the start of a line. We need to find the length
of the subject string to pass to pcre2_match(). In multiline mode, it is the of the subject string to pass to pcre2_match(). In multiline mode, it is the
length remainder of the data in the buffer. Otherwise, it is the length of length remainder of the data in the buffer. Otherwise, it is the length of
the next line, excluding the terminating newline. After matching, we always the next line, excluding the terminating newline. After matching, we always
advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE advance by the length of the next line. In multiline mode the PCRE2_FIRSTLINE
option is used for compiling, so that any match is constrained to be in the option is used for compiling, so that any match is constrained to be in the
first line. */ first line. */
t = end_of_line(t, endptr, &endlinelength); t = end_of_line(t, endptr, &endlinelength);
linelength = t - ptr - endlinelength; linelength = t - ptr - endlinelength;
skipping to change at line 2633 skipping to change at line 2687
another data character. */ another data character. */
if (match != invert) if (match != invert)
{ {
BOOL hyphenprinted = FALSE; BOOL hyphenprinted = FALSE;
/* We've failed if we want a file that doesn't have any matches. */ /* We've failed if we want a file that doesn't have any matches. */
if (filenames == FN_NOMATCH_ONLY) return 1; if (filenames == FN_NOMATCH_ONLY) return 1;
/* Remember that this line matched (for counting matched lines) */
line_matched = TRUE;
/* If all we want is a yes/no answer, we can return immediately. */ /* If all we want is a yes/no answer, we can return immediately. */
if (quiet) return 0; if (quiet) return 0;
/* Just count if just counting is wanted. */ /* Just count if just counting is wanted. */
else if (count_only || show_total_count) count++; else if (count_only || show_total_count) count++;
/* When handling a binary file and binary-files==binary, the "binary" /* When handling a binary file and binary-files==binary, the "binary"
variable will be set true (it's false in all other cases). In this variable will be set true (it's false in all other cases). In this
skipping to change at line 3014 skipping to change at line 3072
} }
/* Advance to after the newline and increment the line number. The file /* Advance to after the newline and increment the line number. The file
offset to the current line is maintained in filepos. */ offset to the current line is maintained in filepos. */
END_ONE_MATCH: END_ONE_MATCH:
ptr += linelength + endlinelength; ptr += linelength + endlinelength;
filepos += (int)(linelength + endlinelength); filepos += (int)(linelength + endlinelength);
linenumber++; linenumber++;
/* If there was at least one match (or a non-match, as required) in the line,
increment the count for the -m option. */
if (line_matched) count_matched_lines++;
/* If input is line buffered, and the buffer is not yet full, read another /* If input is line buffered, and the buffer is not yet full, read another
line and add it into the buffer. */ line and add it into the buffer. */
if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize) if (input_line_buffered && bufflength < (PCRE2_SIZE)bufsize)
{ {
int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in); int add = read_one_line(ptr, bufsize - (int)(ptr - main_buffer), in);
bufflength += add; bufflength += add;
endptr += add; endptr += add;
} }
 End of changes. 83 change blocks. 
312 lines changed or deleted 376 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)