tin  2.4.5
About: TIN is a threaded NNTP and spool based UseNet newsreader.
  Fossies Dox: tin-2.4.5.tar.xz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

pcre_get.c
Go to the documentation of this file.
1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
4 
5 /* PCRE is a library of functions to support regular expressions whose syntax
6 and semantics are as close as possible to those of the Perl 5 language.
7 
8  Written by Philip Hazel
9  Copyright (c) 1997-2006 University of Cambridge
10 
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14 
15  * Redistributions of source code must retain the above copyright notice,
16  this list of conditions and the following disclaimer.
17 
18  * Redistributions in binary form must reproduce the above copyright
19  notice, this list of conditions and the following disclaimer in the
20  documentation and/or other materials provided with the distribution.
21 
22  * Neither the name of the University of Cambridge nor the names of its
23  contributors may be used to endorse or promote products derived from
24  this software without specific prior written permission.
25 
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
38 */
39 
40 
41 /* This module contains some convenience functions for extracting substrings
42 from the subject string after a regex match has succeeded. The original idea
43 for these functions came from Scott Wimer. */
44 
45 
46 #include "pcre_internal.h"
47 
48 
49 /*************************************************
50 * Find number for named string *
51 *************************************************/
52 
53 /* This function is used by the get_first_set() function below, as well
54 as being generally available. It assumes that names are unique.
55 
56 Arguments:
57  code the compiled regex
58  stringname the name whose number is required
59 
60 Returns: the number of the named parentheses, or a negative number
61  (PCRE_ERROR_NOSUBSTRING) if not found
62 */
63 
64 int
65 pcre_get_stringnumber(const pcre *code, const char *stringname)
66 {
67 int rc;
68 int entrysize;
69 int top, bot;
70 uschar *nametable;
71 
72 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
73  return rc;
74 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
75 
76 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
77  return rc;
78 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
79  return rc;
80 
81 bot = 0;
82 while (top > bot)
83  {
84  int mid = (top + bot) / 2;
85  uschar *entry = nametable + entrysize*mid;
86  int c = strcmp(stringname, (char *)(entry + 2));
87  if (c == 0) return (entry[0] << 8) + entry[1];
88  if (c > 0) bot = mid + 1; else top = mid;
89  }
90 
92 }
93 
94 
95 
96 /*************************************************
97 * Find (multiple) entries for named string *
98 *************************************************/
99 
100 /* This is used by the get_first_set() function below, as well as being
101 generally available. It is used when duplicated names are permitted.
102 
103 Arguments:
104  code the compiled regex
105  stringname the name whose entries required
106  firstptr where to put the pointer to the first entry
107  lastptr where to put the pointer to the last entry
108 
109 Returns: the length of each entry, or a negative number
110  (PCRE_ERROR_NOSUBSTRING) if not found
111 */
112 
113 int
114 pcre_get_stringtable_entries(const pcre *code, const char *stringname,
115  char **firstptr, char **lastptr)
116 {
117 int rc;
118 int entrysize;
119 int top, bot;
120 uschar *nametable, *lastentry;
121 
122 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
123  return rc;
124 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
125 
126 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
127  return rc;
128 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
129  return rc;
130 
131 lastentry = nametable + entrysize * (top - 1);
132 bot = 0;
133 while (top > bot)
134  {
135  int mid = (top + bot) / 2;
136  uschar *entry = nametable + entrysize*mid;
137  int c = strcmp(stringname, (char *)(entry + 2));
138  if (c == 0)
139  {
140  uschar *first = entry;
141  uschar *last = entry;
142  while (first > nametable)
143  {
144  if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
145  first -= entrysize;
146  }
147  while (last < lastentry)
148  {
149  if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
150  last += entrysize;
151  }
152  *firstptr = (char *)first;
153  *lastptr = (char *)last;
154  return entrysize;
155  }
156  if (c > 0) bot = mid + 1; else top = mid;
157  }
158 
160 }
161 
162 
163 
164 /*************************************************
165 * Find first set of multiple named strings *
166 *************************************************/
167 
168 /* This function allows for duplicate names in the table of named substrings.
169 It returns the number of the first one that was set in a pattern match.
170 
171 Arguments:
172  code the compiled regex
173  stringname the name of the capturing substring
174  ovector the vector of matched substrings
175 
176 Returns: the number of the first that is set,
177  or the number of the last one if none are set,
178  or a negative number on error
179 */
180 
181 static int
182 get_first_set(const pcre *code, const char *stringname, int *ovector)
183 {
184 const real_pcre *re = (const real_pcre *)code;
185 int entrysize;
186 char *first, *last;
187 uschar *entry;
188 if ((re->options & (PCRE_DUPNAMES | PCRE_JCHANGED)) == 0)
189  return pcre_get_stringnumber(code, stringname);
190 entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
191 if (entrysize <= 0) return entrysize;
192 for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
193  {
194  int n = (entry[0] << 8) + entry[1];
195  if (ovector[n*2] >= 0) return n;
196  }
197 return (first[0] << 8) + first[1];
198 }
199 
200 
201 
202 
203 /*************************************************
204 * Copy captured string to given buffer *
205 *************************************************/
206 
207 /* This function copies a single captured substring into a given buffer.
208 Note that we use memcpy() rather than strncpy() in case there are binary zeros
209 in the string.
210 
211 Arguments:
212  subject the subject string that was matched
213  ovector pointer to the offsets table
214  stringcount the number of substrings that were captured
215  (i.e. the yield of the pcre_exec call, unless
216  that was zero, in which case it should be 1/3
217  of the offset table size)
218  stringnumber the number of the required substring
219  buffer where to put the substring
220  size the size of the buffer
221 
222 Returns: if successful:
223  the length of the copied string, not including the zero
224  that is put on the end; can be zero
225  if not successful:
226  PCRE_ERROR_NOMEMORY (-6) buffer too small
227  PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
228 */
229 
230 int
231 pcre_copy_substring(const char *subject, int *ovector, int stringcount,
232  int stringnumber, char *buffer, int size)
233 {
234 int yield;
235 if (stringnumber < 0 || stringnumber >= stringcount)
236  return PCRE_ERROR_NOSUBSTRING;
237 stringnumber *= 2;
238 yield = ovector[stringnumber+1] - ovector[stringnumber];
239 if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
240 memcpy(buffer, subject + ovector[stringnumber], yield);
241 buffer[yield] = 0;
242 return yield;
243 }
244 
245 
246 
247 /*************************************************
248 * Copy named captured string to given buffer *
249 *************************************************/
250 
251 /* This function copies a single captured substring into a given buffer,
252 identifying it by name. If the regex permits duplicate names, the first
253 substring that is set is chosen.
254 
255 Arguments:
256  code the compiled regex
257  subject the subject string that was matched
258  ovector pointer to the offsets table
259  stringcount the number of substrings that were captured
260  (i.e. the yield of the pcre_exec call, unless
261  that was zero, in which case it should be 1/3
262  of the offset table size)
263  stringname the name of the required substring
264  buffer where to put the substring
265  size the size of the buffer
266 
267 Returns: if successful:
268  the length of the copied string, not including the zero
269  that is put on the end; can be zero
270  if not successful:
271  PCRE_ERROR_NOMEMORY (-6) buffer too small
272  PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
273 */
274 
275 int
276 pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
277  int stringcount, const char *stringname, char *buffer, int size)
278 {
279 int n = get_first_set(code, stringname, ovector);
280 if (n <= 0) return n;
281 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
282 }
283 
284 
285 
286 /*************************************************
287 * Copy all captured strings to new store *
288 *************************************************/
289 
290 /* This function gets one chunk of store and builds a list of pointers and all
291 of the captured substrings in it. A NULL pointer is put on the end of the list.
292 
293 Arguments:
294  subject the subject string that was matched
295  ovector pointer to the offsets table
296  stringcount the number of substrings that were captured
297  (i.e. the yield of the pcre_exec call, unless
298  that was zero, in which case it should be 1/3
299  of the offset table size)
300  listptr set to point to the list of pointers
301 
302 Returns: if successful: 0
303  if not successful:
304  PCRE_ERROR_NOMEMORY (-6) failed to get store
305 */
306 
307 int
308 pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
309  const char ***listptr)
310 {
311 int i;
312 int size = sizeof(char *);
313 int double_count = stringcount * 2;
314 char **stringlist;
315 char *p;
316 
317 for (i = 0; i < double_count; i += 2)
318  size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
319 
320 stringlist = (char **)(pcre_malloc)(size);
321 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
322 
323 *listptr = (const char **)stringlist;
324 p = (char *)(stringlist + stringcount + 1);
325 
326 for (i = 0; i < double_count; i += 2)
327  {
328  int len = ovector[i+1] - ovector[i];
329  memcpy(p, subject + ovector[i], len);
330  *stringlist++ = p;
331  p += len;
332  *p++ = 0;
333  }
334 
335 *stringlist = NULL;
336 return 0;
337 }
338 
339 
340 
341 /*************************************************
342 * Free store obtained by get_substring_list *
343 *************************************************/
344 
345 /* This function exists for the benefit of people calling PCRE from non-C
346 programs that can call its functions, but not free() or (pcre_free)() directly.
347 
348 Argument: the result of a previous pcre_get_substring_list()
349 Returns: nothing
350 */
351 
352 void
353 pcre_free_substring_list(const char **pointer)
354 {
355 (pcre_free)((void *)pointer);
356 }
357 
358 
359 
360 /*************************************************
361 * Copy captured string to new store *
362 *************************************************/
363 
364 /* This function copies a single captured substring into a piece of new
365 store
366 
367 Arguments:
368  subject the subject string that was matched
369  ovector pointer to the offsets table
370  stringcount the number of substrings that were captured
371  (i.e. the yield of the pcre_exec call, unless
372  that was zero, in which case it should be 1/3
373  of the offset table size)
374  stringnumber the number of the required substring
375  stringptr where to put a pointer to the substring
376 
377 Returns: if successful:
378  the length of the string, not including the zero that
379  is put on the end; can be zero
380  if not successful:
381  PCRE_ERROR_NOMEMORY (-6) failed to get store
382  PCRE_ERROR_NOSUBSTRING (-7) substring not present
383 */
384 
385 int
386 pcre_get_substring(const char *subject, int *ovector, int stringcount,
387  int stringnumber, const char **stringptr)
388 {
389 int yield;
390 char *substring;
391 if (stringnumber < 0 || stringnumber >= stringcount)
392  return PCRE_ERROR_NOSUBSTRING;
393 stringnumber *= 2;
394 yield = ovector[stringnumber+1] - ovector[stringnumber];
395 substring = (char *)(pcre_malloc)(yield + 1);
396 if (substring == NULL) return PCRE_ERROR_NOMEMORY;
397 memcpy(substring, subject + ovector[stringnumber], yield);
398 substring[yield] = 0;
399 *stringptr = substring;
400 return yield;
401 }
402 
403 
404 
405 /*************************************************
406 * Copy named captured string to new store *
407 *************************************************/
408 
409 /* This function copies a single captured substring, identified by name, into
410 new store. If the regex permits duplicate names, the first substring that is
411 set is chosen.
412 
413 Arguments:
414  code the compiled regex
415  subject the subject string that was matched
416  ovector pointer to the offsets table
417  stringcount the number of substrings that were captured
418  (i.e. the yield of the pcre_exec call, unless
419  that was zero, in which case it should be 1/3
420  of the offset table size)
421  stringname the name of the required substring
422  stringptr where to put the pointer
423 
424 Returns: if successful:
425  the length of the copied string, not including the zero
426  that is put on the end; can be zero
427  if not successful:
428  PCRE_ERROR_NOMEMORY (-6) couldn't get memory
429  PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
430 */
431 
432 int
433 pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
434  int stringcount, const char *stringname, const char **stringptr)
435 {
436 int n = get_first_set(code, stringname, ovector);
437 if (n <= 0) return n;
438 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
439 }
440 
441 
442 
443 
444 /*************************************************
445 * Free store obtained by get_substring *
446 *************************************************/
447 
448 /* This function exists for the benefit of people calling PCRE from non-C
449 programs that can call its functions, but not free() or (pcre_free)() directly.
450 
451 Argument: the result of a previous pcre_get_substring()
452 Returns: nothing
453 */
454 
455 void
456 pcre_free_substring(const char *pointer)
457 {
458 (pcre_free)((void *)pointer);
459 }
460 
461 /* End of pcre_get.c */
#define PCRE_INFO_NAMETABLE
Definition: pcre.h:162
#define PCRE_INFO_NAMEENTRYSIZE
Definition: pcre.h:160
void *(* pcre_malloc)(size_t)
Definition: pcre_globals.c:75
void(* pcre_free)(void *)
Definition: pcre_globals.c:76
#define PCRE_INFO_NAMECOUNT
Definition: pcre.h:161
#define PCRE_DUPNAMES
Definition: pcre.h:117
#define PCRE_ERROR_NOMEMORY
Definition: pcre.h:131
int pcre_fullinfo(const pcre *, const pcre_extra *, int, void *)
Definition: pcre_fullinfo.c:65
#define PCRE_ERROR_NOSUBSTRING
Definition: pcre.h:132
static int get_first_set(const pcre *code, const char *stringname, int *ovector)
Definition: pcre_get.c:182
int pcre_get_substring_list(const char *subject, int *ovector, int stringcount, const char ***listptr)
Definition: pcre_get.c:308
int pcre_get_substring(const char *subject, int *ovector, int stringcount, int stringnumber, const char **stringptr)
Definition: pcre_get.c:386
int pcre_copy_substring(const char *subject, int *ovector, int stringcount, int stringnumber, char *buffer, int size)
Definition: pcre_get.c:231
void pcre_free_substring(const char *pointer)
Definition: pcre_get.c:456
void pcre_free_substring_list(const char **pointer)
Definition: pcre_get.c:353
int pcre_get_stringtable_entries(const pcre *code, const char *stringname, char **firstptr, char **lastptr)
Definition: pcre_get.c:114
int pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector, int stringcount, const char *stringname, char *buffer, int size)
Definition: pcre_get.c:276
int pcre_get_stringnumber(const pcre *code, const char *stringname)
Definition: pcre_get.c:65
int pcre_get_named_substring(const pcre *code, const char *subject, int *ovector, int stringcount, const char *stringname, const char **stringptr)
Definition: pcre_get.c:433
unsigned char uschar
#define PCRE_JCHANGED
static uschar * buffer
Definition: pcretest.c:154
int code
Definition: signal.c:116
pcre_uint32 options