"Fossies" - the Fresh Open Source Software Archive

Member "tin-2.6.2/src/regex.c" (9 Dec 2022, 8927 Bytes) of package /linux/misc/tin-2.6.2.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "regex.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 2.6.1_vs_2.6.2.

    1 /*
    2  *  Project   : tin - a Usenet reader
    3  *  Module    : regex.c
    4  *  Author    : Jason Faultless <jason@altarstone.com>
    5  *  Created   : 1997-02-21
    6  *  Updated   : 2022-08-29
    7  *  Notes     : Regular expression subroutines
    8  *  Credits   :
    9  *
   10  * Copyright (c) 1997-2023 Jason Faultless <jason@altarstone.com>
   11  * All rights reserved.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  *
   17  * 1. Redistributions of source code must retain the above copyright notice,
   18  *    this list of conditions and the following disclaimer.
   19  *
   20  * 2. Redistributions in binary form must reproduce the above copyright
   21  *    notice, this list of conditions and the following disclaimer in the
   22  *    documentation and/or other materials provided with the distribution.
   23  *
   24  * 3. Neither the name of the copyright holder nor the names of its
   25  *    contributors may be used to endorse or promote products derived from
   26  *    this software without specific prior written permission.
   27  *
   28  * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   29  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   30  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   31  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
   32  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   33  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   34  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   35  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   36  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   37  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   38  * POSSIBILITY OF SUCH DAMAGE.
   39  */
   40 
   41 
   42 #ifndef TIN_H
   43 #   include "tin.h"
   44 #endif /* !TIN_H */
   45 #ifndef TCURSES_H
   46 #   include "tcurses.h"
   47 #endif /* !TCURSES_H */
   48 
   49 /*
   50  * See if pattern is matched in string. Return TRUE or FALSE
   51  * if icase=TRUE then ignore case in the compare
   52  * if a precompiled regex is provided it will be used instead of pattern
   53  *
   54  * If you use match_regex() with full regexes within a loop you should always
   55  * provide a precompiled error because if the compilation of the regex fails
   56  * an error message will be display on each execution of match_regex()
   57  */
   58 t_bool
   59 match_regex(
   60     const char *string,
   61     char *pattern,
   62     struct regex_cache *cache,
   63     t_bool icase)
   64 {
   65     int error;
   66     struct regex_cache tmp_cache = REGEX_CACHE_INITIALIZER;
   67     struct regex_cache *ptr_cache;
   68 
   69     if (!tinrc.wildcard)    /* wildmat matching */
   70         return wildmat(string, pattern, icase);
   71 
   72     /* full regexes */
   73     if (cache != NULL && cache->re != NULL)
   74         ptr_cache = cache;  /* use the provided regex cache */
   75     else {
   76         /* compile the regex internally */
   77         if (!compile_regex(pattern, &tmp_cache, (icase ? REGEX_CASELESS : 0)))
   78             return FALSE;
   79 
   80         ptr_cache = &tmp_cache;
   81     }
   82 
   83     error = match_regex_ex(string, (int) strlen(string), 0, 0, ptr_cache);
   84     if (error >= 0) {
   85         regex_cache_destroy(&tmp_cache);
   86         return TRUE;
   87     }
   88 
   89     /*
   90      * match_regex() is mostly used within loops and we don't want to display
   91      * an error message on each call
   92      */
   93 #if 0
   94     if (error != REGEX_ERROR_NOMATCH)
   95         error_message(2, _(txt_pcre_error_num), error);
   96 #endif /* 0 */
   97 
   98     regex_cache_destroy(&tmp_cache);
   99     return FALSE;
  100 }
  101 
  102 
  103 /*
  104  * See if pattern is matched in string. Return the number of captured strings,
  105  * if so, like pcre and pcre2, or a negative error.
  106  *
  107  * A precompiled regex MUST be provided.
  108  *
  109  */
  110 int
  111 match_regex_ex(
  112     const char *string,
  113     int length,
  114     int offset,
  115     REGEX_OPTIONS options,
  116     struct regex_cache *regex)
  117 {
  118 #ifndef HAVE_LIB_PCRE2
  119     int error;
  120 
  121     error = pcre_exec(regex->re, regex->extra, string, length, offset, options, regex->ovector, regex->ovecalloc);
  122     if (error >= 0) {
  123         /* error == 0 means 'matched, but not enough space in ovector' */
  124         regex->oveccount = error;
  125         if (regex->oveccount == 0 && regex->ovecmax > 0)
  126             regex->oveccount = 1;
  127         /* should not happen ... */
  128         if (regex->oveccount > regex->ovecmax)
  129             regex->oveccount = regex->ovecmax;
  130 
  131     } else
  132         regex->oveccount = 0;
  133 
  134     return error;
  135 #else
  136     return pcre2_match_8(regex->re, (const PCRE2_UCHAR8*)string, length, offset, options, regex->match, NULL);
  137 #endif /* !HAVE_LIB_PCRE2 */
  138 }
  139 
  140 
  141 REGEX_NOFFSET
  142 regex_get_ovector_count(
  143     struct regex_cache *regex)
  144 {
  145 #ifdef HAVE_LIB_PCRE2
  146     return pcre2_get_ovector_count_8(regex->match);
  147 #else
  148     return regex->oveccount;
  149 #endif /* HAVE_LIB_PCRE2 */
  150 }
  151 
  152 
  153 REGEX_SIZE
  154 *regex_get_ovector_pointer(
  155     struct regex_cache *regex)
  156 {
  157 #ifdef HAVE_LIB_PCRE2
  158     return pcre2_get_ovector_pointer_8(regex->match);
  159 #else
  160     return regex->ovector;
  161 #endif /* HAVE_LIB_PCRE2 */
  162 }
  163 
  164 
  165 /*
  166  * Compile and optimise 'regex'. Return TRUE if all went well
  167  */
  168 t_bool
  169 compile_regex(
  170     const char *regex,
  171     struct regex_cache *cache,
  172     REGEX_OPTIONS options)
  173 {
  174 #ifdef HAVE_LIB_PCRE2
  175     int regex_errcode;
  176     PCRE2_SIZE regex_errpos;
  177 
  178     if (regex_use_utf8())
  179         options |= PCRE2_UTF;
  180 
  181     cache->re = pcre2_compile_8((const PCRE2_UCHAR8*)regex, PCRE2_ZERO_TERMINATED, options,
  182             &regex_errcode, &regex_errpos, NULL);
  183     if (cache->re == NULL) {
  184         PCRE2_UCHAR8 regex_errmsg[256];
  185         pcre2_get_error_message_8(regex_errcode, regex_errmsg, sizeof(regex_errmsg));
  186         error_message(2, _(txt_pcre_error_at), regex_errmsg, regex_errpos, regex);
  187     } else {
  188         cache->match = pcre2_match_data_create_from_pattern_8(cache->re, NULL);
  189         if (cache->match == NULL) {
  190             /* out of memory ... */
  191             regex_cache_destroy(cache);
  192             regex_cache_init(cache);
  193         } else
  194             return TRUE;
  195     }
  196 
  197     return FALSE;
  198 
  199 #else
  200     const char *regex_errmsg = NULL;
  201     int regex_errpos;
  202 
  203     if (regex_use_utf8())
  204         options |= PCRE_UTF8;
  205 
  206 
  207     if ((cache->re = pcre_compile(regex, options, &regex_errmsg, &regex_errpos, NULL)) == NULL)
  208         error_message(2, _(txt_pcre_error_at), regex_errmsg, regex_errpos, regex);
  209     else {
  210         cache->extra = pcre_study(cache->re, 0, &regex_errmsg);
  211         if (regex_errmsg != NULL) {
  212             /* we failed, clean up */
  213             regex_cache_destroy(cache);
  214             regex_cache_init(cache);
  215             error_message(2, _(txt_pcre_error_text), regex_errmsg);
  216         } else {
  217             int n;
  218             int error;
  219 
  220             error = pcre_fullinfo(cache->re, cache->extra, PCRE_INFO_CAPTURECOUNT, &n);
  221             if (error != 0)
  222                 error_message(2, _(txt_pcre_error_num), error);
  223             else {
  224                 if (n <= 0)
  225                     n = 1;
  226 
  227                 cache->ovecalloc = (n + 1) * 3;
  228                 cache->ovecmax = n;
  229                 cache->oveccount = 0;
  230                 cache->ovector = my_malloc(cache->ovecalloc * sizeof(int));
  231                 return TRUE;
  232             }
  233         }
  234     }
  235 
  236     return FALSE;
  237 
  238 #endif /* HAVE_LIB_PCRE2 */
  239 }
  240 
  241 
  242 /*
  243  * Highlight any string on 'row' that match 'regex'
  244  */
  245 void
  246 highlight_regexes(
  247     int row,
  248     struct regex_cache *regex,
  249     int color)
  250 {
  251     char *ptr;
  252 #ifdef USE_CURSES
  253     char buf[LEN];
  254 #else
  255     char *buf;
  256 #endif /* USE_CURSES */
  257 
  258     /* Get contents of line from the screen */
  259 #ifdef USE_CURSES
  260     screen_contents(row, 0, buf);
  261 #else
  262     buf = screen[row].col;
  263 #endif /* USE_CURSES */
  264     ptr = buf;
  265 
  266     /* also check for 0 as offsets[] might be too small to hold all captured subpatterns */
  267     while (match_regex_ex(ptr, (int) strlen(ptr), 0, 0, regex) >= 0) {
  268         REGEX_SIZE *offsets = regex_get_ovector_pointer(regex);
  269         /* we have a match */
  270         if (color >= 0) /* color the matching text */
  271             word_highlight_string(row, (int) ((ptr - buf) + offsets[0]), offsets[1] - offsets[0], color);
  272         else
  273             /* inverse the matching text */
  274             highlight_string(row, (int) ((ptr - buf) + offsets[0]), offsets[1] - offsets[0]);
  275 
  276         if (!tinrc.word_h_display_marks) {
  277 #ifdef USE_CURSES
  278             screen_contents(row, 0, buf);
  279 #endif /* USE_CURSES */
  280             ptr += offsets[1] - 2;
  281         } else
  282             ptr += offsets[1];
  283     }
  284 }
  285 
  286 
  287 void
  288 regex_cache_init(
  289     struct regex_cache *regex)
  290 {
  291 #ifdef HAVE_LIB_PCRE2
  292     regex->re = NULL;
  293     regex->match = NULL;
  294 #else
  295     regex->re = NULL;
  296     regex->extra = NULL;
  297     regex->ovector = NULL;
  298     regex->ovecalloc = 0;
  299     regex->ovecmax = 0;
  300     regex->oveccount = 0;
  301 #endif /* HAVE_LIB_PCRE2 */
  302 }
  303 
  304 
  305 void
  306 regex_cache_destroy(
  307     struct regex_cache *regex)
  308 {
  309 #ifdef HAVE_LIB_PCRE2
  310     pcre2_code_free_8(regex->re);
  311     regex->re = NULL;
  312     pcre2_match_data_free_8(regex->match);
  313     regex->match = NULL;
  314 #else
  315     FreeAndNull(regex->re);
  316     FreeAndNull(regex->extra);
  317     FreeAndNull(regex->ovector);
  318     regex->ovecalloc = 0;
  319     regex->ovecmax = 0;
  320     regex->oveccount = 0;
  321 #endif /* HAVE_LIB_PCRE2 */
  322 }
  323 
  324 
  325 t_bool
  326 regex_use_utf8(
  327     void)
  328 {
  329     /* TODO: clarify PCRE_MAJOR, as it does not seem to be set by any
  330      * configure variant anymore */
  331 #if defined(MULTIBYTE_ABLE) && !defined(NO_LOCALE)
  332     int i = 0;
  333 
  334 #   ifdef HAVE_LIB_PCRE2
  335     (void) pcre2_config_8(PCRE2_CONFIG_UNICODE, &i);
  336 #   else
  337 #       if defined(PCRE_MAJOR) && PCRE_MAJOR >= 4
  338             (void) pcre_config(PCRE_CONFIG_UTF8, &i);
  339 #       else
  340             /* nothing */
  341 #       endif /* defined(PCRE_MAJOR) && PCRE_MAJOR >= 4 */
  342 #   endif /* HAVE_LIB_PCRE2 */
  343 
  344     return (IS_LOCAL_CHARSET("UTF-8") && i ? TRUE : FALSE);
  345 
  346 #else
  347 
  348     return FALSE;
  349 
  350 #endif /* MULTIBYTE_ABLE && !NO_LOCALE */
  351 }