"Fossies" - the Fresh Open Source Software Archive

Member "pcre-8.43/doc/html/pcredemo.html" (23 Feb 2019, 16192 Bytes) of package /linux/misc/pcre-8.43.tar.bz2:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) HTML source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file.

    1 <html>
    2 <head>
    3 <title>pcredemo specification</title>
    4 </head>
    5 <body bgcolor="#FFFFFF" text="#00005A" link="#0066FF" alink="#3399FF" vlink="#2222BB">
    6 <h1>pcredemo man page</h1>
    7 <p>
    8 Return to the <a href="index.html">PCRE index page</a>.
    9 </p>
   10 <p>
   11 This page is part of the PCRE HTML documentation. It was generated automatically
   12 from the original man page. If there is any nonsense in it, please consult the
   13 man page, in case the conversion went wrong.
   14 <br>
   15 <ul>
   16 </ul>
   17 <PRE>
   18 /*************************************************
   19 *           PCRE DEMONSTRATION PROGRAM           *
   20 *************************************************/
   21 
   22 /* This is a demonstration program to illustrate the most straightforward ways
   23 of calling the PCRE regular expression library from a C program. See the
   24 pcresample documentation for a short discussion ("man pcresample" if you have
   25 the PCRE man pages installed).
   26 
   27 In Unix-like environments, if PCRE is installed in your standard system
   28 libraries, you should be able to compile this program using this command:
   29 
   30 gcc -Wall pcredemo.c -lpcre -o pcredemo
   31 
   32 If PCRE is not installed in a standard place, it is likely to be installed with
   33 support for the pkg-config mechanism. If you have pkg-config, you can compile
   34 this program using this command:
   35 
   36 gcc -Wall pcredemo.c `pkg-config --cflags --libs libpcre` -o pcredemo
   37 
   38 If you do not have pkg-config, you may have to use this:
   39 
   40 gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
   41   -R/usr/local/lib -lpcre -o pcredemo
   42 
   43 Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
   44 library files for PCRE are installed on your system. Only some operating
   45 systems (e.g. Solaris) use the -R option.
   46 
   47 Building under Windows:
   48 
   49 If you want to statically link this program against a non-dll .a file, you must
   50 define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and
   51 pcre_free() exported functions will be declared __declspec(dllimport), with
   52 unwanted results. So in this environment, uncomment the following line. */
   53 
   54 /* #define PCRE_STATIC */
   55 
   56 #include &lt;stdio.h&gt;
   57 #include &lt;string.h&gt;
   58 #include &lt;pcre.h&gt;
   59 
   60 #define OVECCOUNT 30    /* should be a multiple of 3 */
   61 
   62 
   63 int main(int argc, char **argv)
   64 {
   65 pcre *re;
   66 const char *error;
   67 char *pattern;
   68 char *subject;
   69 unsigned char *name_table;
   70 unsigned int option_bits;
   71 int erroffset;
   72 int find_all;
   73 int crlf_is_newline;
   74 int namecount;
   75 int name_entry_size;
   76 int ovector[OVECCOUNT];
   77 int subject_length;
   78 int rc, i;
   79 int utf8;
   80 
   81 
   82 /**************************************************************************
   83 * First, sort out the command line. There is only one possible option at  *
   84 * the moment, "-g" to request repeated matching to find all occurrences,  *
   85 * like Perl's /g option. We set the variable find_all to a non-zero value *
   86 * if the -g option is present. Apart from that, there must be exactly two *
   87 * arguments.                                                              *
   88 **************************************************************************/
   89 
   90 find_all = 0;
   91 for (i = 1; i &lt; argc; i++)
   92   {
   93   if (strcmp(argv[i], "-g") == 0) find_all = 1;
   94     else break;
   95   }
   96 
   97 /* After the options, we require exactly two arguments, which are the pattern,
   98 and the subject string. */
   99 
  100 if (argc - i != 2)
  101   {
  102   printf("Two arguments required: a regex and a subject string\n");
  103   return 1;
  104   }
  105 
  106 pattern = argv[i];
  107 subject = argv[i+1];
  108 subject_length = (int)strlen(subject);
  109 
  110 
  111 /*************************************************************************
  112 * Now we are going to compile the regular expression pattern, and handle *
  113 * and errors that are detected.                                          *
  114 *************************************************************************/
  115 
  116 re = pcre_compile(
  117   pattern,              /* the pattern */
  118   0,                    /* default options */
  119   &amp;error,               /* for error message */
  120   &amp;erroffset,           /* for error offset */
  121   NULL);                /* use default character tables */
  122 
  123 /* Compilation failed: print the error message and exit */
  124 
  125 if (re == NULL)
  126   {
  127   printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
  128   return 1;
  129   }
  130 
  131 
  132 /*************************************************************************
  133 * If the compilation succeeded, we call PCRE again, in order to do a     *
  134 * pattern match against the subject string. This does just ONE match. If *
  135 * further matching is needed, it will be done below.                     *
  136 *************************************************************************/
  137 
  138 rc = pcre_exec(
  139   re,                   /* the compiled pattern */
  140   NULL,                 /* no extra data - we didn't study the pattern */
  141   subject,              /* the subject string */
  142   subject_length,       /* the length of the subject */
  143   0,                    /* start at offset 0 in the subject */
  144   0,                    /* default options */
  145   ovector,              /* output vector for substring information */
  146   OVECCOUNT);           /* number of elements in the output vector */
  147 
  148 /* Matching failed: handle error cases */
  149 
  150 if (rc &lt; 0)
  151   {
  152   switch(rc)
  153     {
  154     case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
  155     /*
  156     Handle other special cases if you like
  157     */
  158     default: printf("Matching error %d\n", rc); break;
  159     }
  160   pcre_free(re);     /* Release memory used for the compiled pattern */
  161   return 1;
  162   }
  163 
  164 /* Match succeded */
  165 
  166 printf("\nMatch succeeded at offset %d\n", ovector[0]);
  167 
  168 
  169 /*************************************************************************
  170 * We have found the first match within the subject string. If the output *
  171 * vector wasn't big enough, say so. Then output any substrings that were *
  172 * captured.                                                              *
  173 *************************************************************************/
  174 
  175 /* The output vector wasn't big enough */
  176 
  177 if (rc == 0)
  178   {
  179   rc = OVECCOUNT/3;
  180   printf("ovector only has room for %d captured substrings\n", rc - 1);
  181   }
  182 
  183 /* Show substrings stored in the output vector by number. Obviously, in a real
  184 application you might want to do things other than print them. */
  185 
  186 for (i = 0; i &lt; rc; i++)
  187   {
  188   char *substring_start = subject + ovector[2*i];
  189   int substring_length = ovector[2*i+1] - ovector[2*i];
  190   printf("%2d: %.*s\n", i, substring_length, substring_start);
  191   }
  192 
  193 
  194 /**************************************************************************
  195 * That concludes the basic part of this demonstration program. We have    *
  196 * compiled a pattern, and performed a single match. The code that follows *
  197 * shows first how to access named substrings, and then how to code for    *
  198 * repeated matches on the same subject.                                   *
  199 **************************************************************************/
  200 
  201 /* See if there are any named substrings, and if so, show them by name. First
  202 we have to extract the count of named parentheses from the pattern. */
  203 
  204 (void)pcre_fullinfo(
  205   re,                   /* the compiled pattern */
  206   NULL,                 /* no extra data - we didn't study the pattern */
  207   PCRE_INFO_NAMECOUNT,  /* number of named substrings */
  208   &amp;namecount);          /* where to put the answer */
  209 
  210 if (namecount &lt;= 0) printf("No named substrings\n"); else
  211   {
  212   unsigned char *tabptr;
  213   printf("Named substrings\n");
  214 
  215   /* Before we can access the substrings, we must extract the table for
  216   translating names to numbers, and the size of each entry in the table. */
  217 
  218   (void)pcre_fullinfo(
  219     re,                       /* the compiled pattern */
  220     NULL,                     /* no extra data - we didn't study the pattern */
  221     PCRE_INFO_NAMETABLE,      /* address of the table */
  222     &amp;name_table);             /* where to put the answer */
  223 
  224   (void)pcre_fullinfo(
  225     re,                       /* the compiled pattern */
  226     NULL,                     /* no extra data - we didn't study the pattern */
  227     PCRE_INFO_NAMEENTRYSIZE,  /* size of each entry in the table */
  228     &amp;name_entry_size);        /* where to put the answer */
  229 
  230   /* Now we can scan the table and, for each entry, print the number, the name,
  231   and the substring itself. */
  232 
  233   tabptr = name_table;
  234   for (i = 0; i &lt; namecount; i++)
  235     {
  236     int n = (tabptr[0] &lt;&lt; 8) | tabptr[1];
  237     printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
  238       ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
  239     tabptr += name_entry_size;
  240     }
  241   }
  242 
  243 
  244 /*************************************************************************
  245 * If the "-g" option was given on the command line, we want to continue  *
  246 * to search for additional matches in the subject string, in a similar   *
  247 * way to the /g option in Perl. This turns out to be trickier than you   *
  248 * might think because of the possibility of matching an empty string.    *
  249 * What happens is as follows:                                            *
  250 *                                                                        *
  251 * If the previous match was NOT for an empty string, we can just start   *
  252 * the next match at the end of the previous one.                         *
  253 *                                                                        *
  254 * If the previous match WAS for an empty string, we can't do that, as it *
  255 * would lead to an infinite loop. Instead, a special call of pcre_exec() *
  256 * is made with the PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED flags set.    *
  257 * The first of these tells PCRE that an empty string at the start of the *
  258 * subject is not a valid match; other possibilities must be tried. The   *
  259 * second flag restricts PCRE to one match attempt at the initial string  *
  260 * position. If this match succeeds, an alternative to the empty string   *
  261 * match has been found, and we can print it and proceed round the loop,  *
  262 * advancing by the length of whatever was found. If this match does not  *
  263 * succeed, we still stay in the loop, advancing by just one character.   *
  264 * In UTF-8 mode, which can be set by (*UTF8) in the pattern, this may be *
  265 * more than one byte.                                                    *
  266 *                                                                        *
  267 * However, there is a complication concerned with newlines. When the     *
  268 * newline convention is such that CRLF is a valid newline, we must       *
  269 * advance by two characters rather than one. The newline convention can  *
  270 * be set in the regex by (*CR), etc.; if not, we must find the default.  *
  271 *************************************************************************/
  272 
  273 if (!find_all)     /* Check for -g */
  274   {
  275   pcre_free(re);   /* Release the memory used for the compiled pattern */
  276   return 0;        /* Finish unless -g was given */
  277   }
  278 
  279 /* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
  280 sequence. First, find the options with which the regex was compiled; extract
  281 the UTF-8 state, and mask off all but the newline options. */
  282 
  283 (void)pcre_fullinfo(re, NULL, PCRE_INFO_OPTIONS, &amp;option_bits);
  284 utf8 = option_bits &amp; PCRE_UTF8;
  285 option_bits &amp;= PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_CRLF|
  286                PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF;
  287 
  288 /* If no newline options were set, find the default newline convention from the
  289 build configuration. */
  290 
  291 if (option_bits == 0)
  292   {
  293   int d;
  294   (void)pcre_config(PCRE_CONFIG_NEWLINE, &amp;d);
  295   /* Note that these values are always the ASCII ones, even in
  296   EBCDIC environments. CR = 13, NL = 10. */
  297   option_bits = (d == 13)? PCRE_NEWLINE_CR :
  298           (d == 10)? PCRE_NEWLINE_LF :
  299           (d == (13&lt;&lt;8 | 10))? PCRE_NEWLINE_CRLF :
  300           (d == -2)? PCRE_NEWLINE_ANYCRLF :
  301           (d == -1)? PCRE_NEWLINE_ANY : 0;
  302   }
  303 
  304 /* See if CRLF is a valid newline sequence. */
  305 
  306 crlf_is_newline =
  307      option_bits == PCRE_NEWLINE_ANY ||
  308      option_bits == PCRE_NEWLINE_CRLF ||
  309      option_bits == PCRE_NEWLINE_ANYCRLF;
  310 
  311 /* Loop for second and subsequent matches */
  312 
  313 for (;;)
  314   {
  315   int options = 0;                 /* Normally no options */
  316   int start_offset = ovector[1];   /* Start at end of previous match */
  317 
  318   /* If the previous match was for an empty string, we are finished if we are
  319   at the end of the subject. Otherwise, arrange to run another match at the
  320   same point to see if a non-empty match can be found. */
  321 
  322   if (ovector[0] == ovector[1])
  323     {
  324     if (ovector[0] == subject_length) break;
  325     options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
  326     }
  327 
  328   /* Run the next matching operation */
  329 
  330   rc = pcre_exec(
  331     re,                   /* the compiled pattern */
  332     NULL,                 /* no extra data - we didn't study the pattern */
  333     subject,              /* the subject string */
  334     subject_length,       /* the length of the subject */
  335     start_offset,         /* starting offset in the subject */
  336     options,              /* options */
  337     ovector,              /* output vector for substring information */
  338     OVECCOUNT);           /* number of elements in the output vector */
  339 
  340   /* This time, a result of NOMATCH isn't an error. If the value in "options"
  341   is zero, it just means we have found all possible matches, so the loop ends.
  342   Otherwise, it means we have failed to find a non-empty-string match at a
  343   point where there was a previous empty-string match. In this case, we do what
  344   Perl does: advance the matching position by one character, and continue. We
  345   do this by setting the "end of previous match" offset, because that is picked
  346   up at the top of the loop as the point at which to start again.
  347 
  348   There are two complications: (a) When CRLF is a valid newline sequence, and
  349   the current position is just before it, advance by an extra byte. (b)
  350   Otherwise we must ensure that we skip an entire UTF-8 character if we are in
  351   UTF-8 mode. */
  352 
  353   if (rc == PCRE_ERROR_NOMATCH)
  354     {
  355     if (options == 0) break;                    /* All matches found */
  356     ovector[1] = start_offset + 1;              /* Advance one byte */
  357     if (crlf_is_newline &amp;&amp;                      /* If CRLF is newline &amp; */
  358         start_offset &lt; subject_length - 1 &amp;&amp;    /* we are at CRLF, */
  359         subject[start_offset] == '\r' &amp;&amp;
  360         subject[start_offset + 1] == '\n')
  361       ovector[1] += 1;                          /* Advance by one more. */
  362     else if (utf8)                              /* Otherwise, ensure we */
  363       {                                         /* advance a whole UTF-8 */
  364       while (ovector[1] &lt; subject_length)       /* character. */
  365         {
  366         if ((subject[ovector[1]] &amp; 0xc0) != 0x80) break;
  367         ovector[1] += 1;
  368         }
  369       }
  370     continue;    /* Go round the loop again */
  371     }
  372 
  373   /* Other matching errors are not recoverable. */
  374 
  375   if (rc &lt; 0)
  376     {
  377     printf("Matching error %d\n", rc);
  378     pcre_free(re);    /* Release memory used for the compiled pattern */
  379     return 1;
  380     }
  381 
  382   /* Match succeded */
  383 
  384   printf("\nMatch succeeded again at offset %d\n", ovector[0]);
  385 
  386   /* The match succeeded, but the output vector wasn't big enough. */
  387 
  388   if (rc == 0)
  389     {
  390     rc = OVECCOUNT/3;
  391     printf("ovector only has room for %d captured substrings\n", rc - 1);
  392     }
  393 
  394   /* As before, show substrings stored in the output vector by number, and then
  395   also any named substrings. */
  396 
  397   for (i = 0; i &lt; rc; i++)
  398     {
  399     char *substring_start = subject + ovector[2*i];
  400     int substring_length = ovector[2*i+1] - ovector[2*i];
  401     printf("%2d: %.*s\n", i, substring_length, substring_start);
  402     }
  403 
  404   if (namecount &lt;= 0) printf("No named substrings\n"); else
  405     {
  406     unsigned char *tabptr = name_table;
  407     printf("Named substrings\n");
  408     for (i = 0; i &lt; namecount; i++)
  409       {
  410       int n = (tabptr[0] &lt;&lt; 8) | tabptr[1];
  411       printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
  412         ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
  413       tabptr += name_entry_size;
  414       }
  415     }
  416   }      /* End of loop to find second and subsequent matches */
  417 
  418 printf("\n");
  419 pcre_free(re);       /* Release memory used for the compiled pattern */
  420 return 0;
  421 }
  422 
  423 /* End of pcredemo.c */
  424 <p>
  425 Return to the <a href="index.html">PCRE index page</a>.
  426 </p>