leafnode  1.12.0
About: Leafnode is a store & forward NNTP proxy for small (dialup) sites.
  Fossies Dox: leafnode-1.12.0.tar.xz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

pcre_extract.c
Go to the documentation of this file.
1/* ln_pcre_extract.c -- compile and exec a PCRE and return captured strings
2 * (C) 2002 - 2021 by Matthias Andree <matthias.andree@gmx.de>
3 *
4 * This library is free software; you can redistribute it and/or modify
5 * it under the terms of version 2 of the GNU Lesser General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This library is distributed in the hope that it will be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11 * Lesser General Public License for more details.
12 *
13 * You should have received a copy of the GNU Lesser General Public
14 * License along with this library (look for the COPYING.LGPL file); if
15 * not, write to the Free Software Foundation, Inc., 59 Temple Place,
16 * Suite 330, Boston, MA 02111-1307 USA
17 */
18
19#include "leafnode.h"
20
21#include <string.h>
22#include <limits.h>
23#ifdef TEST
24#include <stdio.h>
25#endif
26
27#include "config.h"
28#include "critmem.h"
29#include "ln_log.h"
30#include "strlcpy.h"
31
32/* This function compiles and executes the regular expression in
33 * "pattern" against the string in "input" and can optionally put the
34 * matching string and captured substring into the output vector.
35 *
36 * Output may be NULL if and only if num is zero.
37 *
38 * If num is nonzero, the matching substring is strdup()ed into
39 * output[0]. If num is > 1, ln_pcre_extract copies the first num - 1
40 * captured substrings into output[...]. If a capturing subpattern did
41 * not match, the corresponding output[] element is NULL.
42 *
43 * The return value is the same as that of pcre_exec. Short: 0 if num
44 * too small to hold all captured strings, otherwise the number of
45 * captured patterns, where the matching string is always captured, or
46 * the PCRE error code.
47 *
48 * WARNING: if compiling the pattern fails, the program logs this
49 * condition and aborts. This function is NOT safe to be used with
50 * user-accessible patterns. Use constant patterns.
51 *
52 * You MUST free() the valid parts in your output[] vector yourself. You
53 * can use ln_pcre_extract_free (which see) to do this.
54 */
55int ln_pcre_extract(const unsigned char *input, const unsigned char *pattern, char **output, size_t num)
56{
57 size_t errpos, i;
58 int errcode, match;
59 const int options = PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_MULTILINE;
60 PCRE2_SIZE *ovec;
61 pcre2_code_8 *re;
62 pcre2_match_data_8 *match_data;
63
64 if (num > INT_MAX / 3) {
65 ln_log(LNLOG_SERR, LNLOG_CTOP, "ln_pcre_extract: array size too large, aborting.");
66 abort();
67 }
68
69 re = pcre2_compile_8(pattern, PCRE2_ZERO_TERMINATED, options, &errcode, &errpos, NULL);
70 if (NULL == re) {
71 unsigned char errstr[SIZE_lineout];
72 int len = pcre2_get_error_message_8(errcode, errstr, sizeof(errstr));
73 ln_log(LNLOG_SERR, LNLOG_CTOP, "ln_pcre_extract: cannot compile \"%s\": %s%s at pos. #%lu",
74 pattern, errstr, len == PCRE2_ERROR_NOMEMORY ? "[...]" : "", (unsigned long)errpos);
75 return -1;
76 }
77
78 match_data = pcre2_match_data_create_from_pattern_8(re, NULL);
79 if (NULL == match_data) {
80 ln_log(LNLOG_SERR, LNLOG_CTOP, "ln_pcre_extract: out of memory allocating match_data");
81 return -1;
82 }
83
84 match = pcre2_match_8(re, input, PCRE2_ZERO_TERMINATED, 0, 0, match_data, NULL);
85 if (match >= 0) {
86 uint32_t ocnt = pcre2_get_ovector_count_8(match_data);
87 ovec = pcre2_get_ovector_pointer_8(match_data);
88 for (i = 0 ; i < num && i < ocnt ; i++) {
89 size_t l = ovec[i*2 + 1] - ovec[i*2];
90 if (ovec[i*2 + 1] < ovec[i * 2]) l = 0; /* can happen, for instance, with \K */
91 output[i] = critmalloc(l + 1, "ln_pcre_extract");
92 (void)memcpy(output[i], input + ovec[i*2], l);
93 output[i][l] = '\0';
94 }
95 }
96 pcre2_match_data_free_8(match_data);
97 pcre2_code_free_8(re);
98 return match;
99}
100
101/* free a vector as allocated by ln_pcre_extract
102 * vec should be the same as output in ln_pcre_extract
103 * count should be the value obtained from ln_pcre_extract
104 * it is safe to pass a negative or zero count.
105 *
106 * vec must NOT be NULL unless count is zero or negative.
107 */
108void ln_pcre_extract_free(char **vec, int count)
109{
110 int i;
111
112 for (i = 0; i < count; i++) {
113 if (vec[i]) {
114 free(vec[i]);
115 vec[i] = NULL;
116 }
117 }
118}
119
120#ifdef TEST
121#define MAX 30
122
123int debug = 0;
124int verbose = 0;
125
126/* test ln_pcre_extract capturing. */
127int main(int argc, char **argv)
128{
129 char *out[MAX]; /* RATS: ignore */
130 int rc, n = MAX;
131
132 if (argc < 3 || argc > 4) {
133 fprintf(stderr, "usage: %s string PCRE [num]\n", argv[0]);
134 exit(1);
135 }
136
137 if (argc == 4) {
138 n = atoi(argv[3]);
139 if (n > MAX) {
140 n = MAX;
141 printf("warning: clamping max from %d to %d\n", n, MAX);
142 }
143 }
144 rc = ln_pcre_extract((unsigned char *)argv[1], (unsigned char *)argv[2], out, argc == 4 ? atoi(argv[3]) : MAX);
145 printf("ln_pcre_extract returned %d\n", rc);
146 if (rc >= 0) {
147 int i;
148 for(i = 0; i < rc; i++) {
149 printf("substring #%d: \"%s\"\n", i, out[i] ? out[i] : "(NULL)");
150 }
151 }
153
154 exit(0);
155}
156#endif
int main(void)
Definition: amiroot.c:12
int verbose
Definition: applyfilter.c:31
int debug
Definition: applyfilter.c:30
char * critmalloc(size_t size, const char *message)
Definition: critmem.c:61
#define SIZE_lineout
Definition: leafnode.h:279
void ln_log(int sev, int ctx, const char *format,...)
Definition: ln_log.c:103
#define LNLOG_SERR
Definition: ln_log.h:13
#define LNLOG_CTOP
Definition: ln_log.h:22
#define len
Definition: mastring.c:31
void ln_pcre_extract_free(char **vec, int count)
Definition: pcre_extract.c:108
int ln_pcre_extract(const unsigned char *input, const unsigned char *pattern, char **output, size_t num)
Definition: pcre_extract.c:55
static int rc
Definition: xsnprintf.c:11