"Fossies" - the Fresh Open Source Software Archive 
Member "tin-2.6.2/src/regex.c" (9 Dec 2022, 8927 Bytes) of package /linux/misc/tin-2.6.2.tar.xz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "regex.c" see the
Fossies "Dox" file reference documentation and the latest
Fossies "Diffs" side-by-side code changes report:
2.6.1_vs_2.6.2.
1 /*
2 * Project : tin - a Usenet reader
3 * Module : regex.c
4 * Author : Jason Faultless <jason@altarstone.com>
5 * Created : 1997-02-21
6 * Updated : 2022-08-29
7 * Notes : Regular expression subroutines
8 * Credits :
9 *
10 * Copyright (c) 1997-2023 Jason Faultless <jason@altarstone.com>
11 * All rights reserved.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 *
17 * 1. Redistributions of source code must retain the above copyright notice,
18 * this list of conditions and the following disclaimer.
19 *
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 *
24 * 3. Neither the name of the copyright holder nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
32 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39 */
40
41
42 #ifndef TIN_H
43 # include "tin.h"
44 #endif /* !TIN_H */
45 #ifndef TCURSES_H
46 # include "tcurses.h"
47 #endif /* !TCURSES_H */
48
49 /*
50 * See if pattern is matched in string. Return TRUE or FALSE
51 * if icase=TRUE then ignore case in the compare
52 * if a precompiled regex is provided it will be used instead of pattern
53 *
54 * If you use match_regex() with full regexes within a loop you should always
55 * provide a precompiled error because if the compilation of the regex fails
56 * an error message will be display on each execution of match_regex()
57 */
58 t_bool
59 match_regex(
60 const char *string,
61 char *pattern,
62 struct regex_cache *cache,
63 t_bool icase)
64 {
65 int error;
66 struct regex_cache tmp_cache = REGEX_CACHE_INITIALIZER;
67 struct regex_cache *ptr_cache;
68
69 if (!tinrc.wildcard) /* wildmat matching */
70 return wildmat(string, pattern, icase);
71
72 /* full regexes */
73 if (cache != NULL && cache->re != NULL)
74 ptr_cache = cache; /* use the provided regex cache */
75 else {
76 /* compile the regex internally */
77 if (!compile_regex(pattern, &tmp_cache, (icase ? REGEX_CASELESS : 0)))
78 return FALSE;
79
80 ptr_cache = &tmp_cache;
81 }
82
83 error = match_regex_ex(string, (int) strlen(string), 0, 0, ptr_cache);
84 if (error >= 0) {
85 regex_cache_destroy(&tmp_cache);
86 return TRUE;
87 }
88
89 /*
90 * match_regex() is mostly used within loops and we don't want to display
91 * an error message on each call
92 */
93 #if 0
94 if (error != REGEX_ERROR_NOMATCH)
95 error_message(2, _(txt_pcre_error_num), error);
96 #endif /* 0 */
97
98 regex_cache_destroy(&tmp_cache);
99 return FALSE;
100 }
101
102
103 /*
104 * See if pattern is matched in string. Return the number of captured strings,
105 * if so, like pcre and pcre2, or a negative error.
106 *
107 * A precompiled regex MUST be provided.
108 *
109 */
110 int
111 match_regex_ex(
112 const char *string,
113 int length,
114 int offset,
115 REGEX_OPTIONS options,
116 struct regex_cache *regex)
117 {
118 #ifndef HAVE_LIB_PCRE2
119 int error;
120
121 error = pcre_exec(regex->re, regex->extra, string, length, offset, options, regex->ovector, regex->ovecalloc);
122 if (error >= 0) {
123 /* error == 0 means 'matched, but not enough space in ovector' */
124 regex->oveccount = error;
125 if (regex->oveccount == 0 && regex->ovecmax > 0)
126 regex->oveccount = 1;
127 /* should not happen ... */
128 if (regex->oveccount > regex->ovecmax)
129 regex->oveccount = regex->ovecmax;
130
131 } else
132 regex->oveccount = 0;
133
134 return error;
135 #else
136 return pcre2_match_8(regex->re, (const PCRE2_UCHAR8*)string, length, offset, options, regex->match, NULL);
137 #endif /* !HAVE_LIB_PCRE2 */
138 }
139
140
141 REGEX_NOFFSET
142 regex_get_ovector_count(
143 struct regex_cache *regex)
144 {
145 #ifdef HAVE_LIB_PCRE2
146 return pcre2_get_ovector_count_8(regex->match);
147 #else
148 return regex->oveccount;
149 #endif /* HAVE_LIB_PCRE2 */
150 }
151
152
153 REGEX_SIZE
154 *regex_get_ovector_pointer(
155 struct regex_cache *regex)
156 {
157 #ifdef HAVE_LIB_PCRE2
158 return pcre2_get_ovector_pointer_8(regex->match);
159 #else
160 return regex->ovector;
161 #endif /* HAVE_LIB_PCRE2 */
162 }
163
164
165 /*
166 * Compile and optimise 'regex'. Return TRUE if all went well
167 */
168 t_bool
169 compile_regex(
170 const char *regex,
171 struct regex_cache *cache,
172 REGEX_OPTIONS options)
173 {
174 #ifdef HAVE_LIB_PCRE2
175 int regex_errcode;
176 PCRE2_SIZE regex_errpos;
177
178 if (regex_use_utf8())
179 options |= PCRE2_UTF;
180
181 cache->re = pcre2_compile_8((const PCRE2_UCHAR8*)regex, PCRE2_ZERO_TERMINATED, options,
182 ®ex_errcode, ®ex_errpos, NULL);
183 if (cache->re == NULL) {
184 PCRE2_UCHAR8 regex_errmsg[256];
185 pcre2_get_error_message_8(regex_errcode, regex_errmsg, sizeof(regex_errmsg));
186 error_message(2, _(txt_pcre_error_at), regex_errmsg, regex_errpos, regex);
187 } else {
188 cache->match = pcre2_match_data_create_from_pattern_8(cache->re, NULL);
189 if (cache->match == NULL) {
190 /* out of memory ... */
191 regex_cache_destroy(cache);
192 regex_cache_init(cache);
193 } else
194 return TRUE;
195 }
196
197 return FALSE;
198
199 #else
200 const char *regex_errmsg = NULL;
201 int regex_errpos;
202
203 if (regex_use_utf8())
204 options |= PCRE_UTF8;
205
206
207 if ((cache->re = pcre_compile(regex, options, ®ex_errmsg, ®ex_errpos, NULL)) == NULL)
208 error_message(2, _(txt_pcre_error_at), regex_errmsg, regex_errpos, regex);
209 else {
210 cache->extra = pcre_study(cache->re, 0, ®ex_errmsg);
211 if (regex_errmsg != NULL) {
212 /* we failed, clean up */
213 regex_cache_destroy(cache);
214 regex_cache_init(cache);
215 error_message(2, _(txt_pcre_error_text), regex_errmsg);
216 } else {
217 int n;
218 int error;
219
220 error = pcre_fullinfo(cache->re, cache->extra, PCRE_INFO_CAPTURECOUNT, &n);
221 if (error != 0)
222 error_message(2, _(txt_pcre_error_num), error);
223 else {
224 if (n <= 0)
225 n = 1;
226
227 cache->ovecalloc = (n + 1) * 3;
228 cache->ovecmax = n;
229 cache->oveccount = 0;
230 cache->ovector = my_malloc(cache->ovecalloc * sizeof(int));
231 return TRUE;
232 }
233 }
234 }
235
236 return FALSE;
237
238 #endif /* HAVE_LIB_PCRE2 */
239 }
240
241
242 /*
243 * Highlight any string on 'row' that match 'regex'
244 */
245 void
246 highlight_regexes(
247 int row,
248 struct regex_cache *regex,
249 int color)
250 {
251 char *ptr;
252 #ifdef USE_CURSES
253 char buf[LEN];
254 #else
255 char *buf;
256 #endif /* USE_CURSES */
257
258 /* Get contents of line from the screen */
259 #ifdef USE_CURSES
260 screen_contents(row, 0, buf);
261 #else
262 buf = screen[row].col;
263 #endif /* USE_CURSES */
264 ptr = buf;
265
266 /* also check for 0 as offsets[] might be too small to hold all captured subpatterns */
267 while (match_regex_ex(ptr, (int) strlen(ptr), 0, 0, regex) >= 0) {
268 REGEX_SIZE *offsets = regex_get_ovector_pointer(regex);
269 /* we have a match */
270 if (color >= 0) /* color the matching text */
271 word_highlight_string(row, (int) ((ptr - buf) + offsets[0]), offsets[1] - offsets[0], color);
272 else
273 /* inverse the matching text */
274 highlight_string(row, (int) ((ptr - buf) + offsets[0]), offsets[1] - offsets[0]);
275
276 if (!tinrc.word_h_display_marks) {
277 #ifdef USE_CURSES
278 screen_contents(row, 0, buf);
279 #endif /* USE_CURSES */
280 ptr += offsets[1] - 2;
281 } else
282 ptr += offsets[1];
283 }
284 }
285
286
287 void
288 regex_cache_init(
289 struct regex_cache *regex)
290 {
291 #ifdef HAVE_LIB_PCRE2
292 regex->re = NULL;
293 regex->match = NULL;
294 #else
295 regex->re = NULL;
296 regex->extra = NULL;
297 regex->ovector = NULL;
298 regex->ovecalloc = 0;
299 regex->ovecmax = 0;
300 regex->oveccount = 0;
301 #endif /* HAVE_LIB_PCRE2 */
302 }
303
304
305 void
306 regex_cache_destroy(
307 struct regex_cache *regex)
308 {
309 #ifdef HAVE_LIB_PCRE2
310 pcre2_code_free_8(regex->re);
311 regex->re = NULL;
312 pcre2_match_data_free_8(regex->match);
313 regex->match = NULL;
314 #else
315 FreeAndNull(regex->re);
316 FreeAndNull(regex->extra);
317 FreeAndNull(regex->ovector);
318 regex->ovecalloc = 0;
319 regex->ovecmax = 0;
320 regex->oveccount = 0;
321 #endif /* HAVE_LIB_PCRE2 */
322 }
323
324
325 t_bool
326 regex_use_utf8(
327 void)
328 {
329 /* TODO: clarify PCRE_MAJOR, as it does not seem to be set by any
330 * configure variant anymore */
331 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
332 int i = 0;
333
334 # ifdef HAVE_LIB_PCRE2
335 (void) pcre2_config_8(PCRE2_CONFIG_UNICODE, &i);
336 # else
337 # if defined(PCRE_MAJOR) && PCRE_MAJOR >= 4
338 (void) pcre_config(PCRE_CONFIG_UTF8, &i);
339 # else
340 /* nothing */
341 # endif /* defined(PCRE_MAJOR) && PCRE_MAJOR >= 4 */
342 # endif /* HAVE_LIB_PCRE2 */
343
344 return (IS_LOCAL_CHARSET("UTF-8") && i ? TRUE : FALSE);
345
346 #else
347
348 return FALSE;
349
350 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */
351 }