"Fossies" - the Fresh Open Source Software Archive

Member "tin-2.4.1/pcre/pcredemo.c" (28 Aug 2013, 11732 Bytes) of archive /linux/misc/tin-2.4.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "pcredemo.c" see the Fossies "Dox" file reference documentation.

    1 /*************************************************
    2 *           PCRE DEMONSTRATION PROGRAM           *
    3 *************************************************/
    4 
    5 /* This is a demonstration program to illustrate the most straightforward ways
    6 of calling the PCRE regular expression library from a C program. See the
    7 pcresample documentation for a short discussion.
    8 
    9 Compile thuswise:
   10   gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
   11     -R/usr/local/lib -lpcre
   12 
   13 Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
   14 library files for PCRE are installed on your system. Only some operating
   15 systems (e.g. Solaris) use the -R option.
   16 */
   17 
   18 
   19 #include <stdio.h>
   20 #include <string.h>
   21 #include <pcre.h>
   22 
   23 #define OVECCOUNT 30    /* should be a multiple of 3 */
   24 
   25 
   26 int main(int argc, char **argv)
   27 {
   28 pcre *re;
   29 const char *error;
   30 char *pattern;
   31 char *subject;
   32 unsigned char *name_table;
   33 int erroffset;
   34 int find_all;
   35 int namecount;
   36 int name_entry_size;
   37 int ovector[OVECCOUNT];
   38 int subject_length;
   39 int rc, i;
   40 
   41 
   42 /**************************************************************************
   43 * First, sort out the command line. There is only one possible option at  *
   44 * the moment, "-g" to request repeated matching to find all occurrences,  *
   45 * like Perl's /g option. We set the variable find_all to a non-zero value *
   46 * if the -g option is present. Apart from that, there must be exactly two *
   47 * arguments.                                                              *
   48 **************************************************************************/
   49 
   50 find_all = 0;
   51 for (i = 1; i < argc; i++)
   52   {
   53   if (strcmp(argv[i], "-g") == 0) find_all = 1;
   54     else break;
   55   }
   56 
   57 /* After the options, we require exactly two arguments, which are the pattern,
   58 and the subject string. */
   59 
   60 if (argc - i != 2)
   61   {
   62   printf("Two arguments required: a regex and a subject string\n");
   63   return 1;
   64   }
   65 
   66 pattern = argv[i];
   67 subject = argv[i+1];
   68 subject_length = (int)strlen(subject);
   69 
   70 
   71 /*************************************************************************
   72 * Now we are going to compile the regular expression pattern, and handle *
   73 * and errors that are detected.                                          *
   74 *************************************************************************/
   75 
   76 re = pcre_compile(
   77   pattern,              /* the pattern */
   78   0,                    /* default options */
   79   &error,               /* for error message */
   80   &erroffset,           /* for error offset */
   81   NULL);                /* use default character tables */
   82 
   83 /* Compilation failed: print the error message and exit */
   84 
   85 if (re == NULL)
   86   {
   87   printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
   88   return 1;
   89   }
   90 
   91 
   92 /*************************************************************************
   93 * If the compilation succeeded, we call PCRE again, in order to do a     *
   94 * pattern match against the subject string. This does just ONE match. If *
   95 * further matching is needed, it will be done below.                     *
   96 *************************************************************************/
   97 
   98 rc = pcre_exec(
   99   re,                   /* the compiled pattern */
  100   NULL,                 /* no extra data - we didn't study the pattern */
  101   subject,              /* the subject string */
  102   subject_length,       /* the length of the subject */
  103   0,                    /* start at offset 0 in the subject */
  104   0,                    /* default options */
  105   ovector,              /* output vector for substring information */
  106   OVECCOUNT);           /* number of elements in the output vector */
  107 
  108 /* Matching failed: handle error cases */
  109 
  110 if (rc < 0)
  111   {
  112   switch(rc)
  113     {
  114     case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
  115     /*
  116     Handle other special cases if you like
  117     */
  118     default: printf("Matching error %d\n", rc); break;
  119     }
  120   pcre_free(re);     /* Release memory used for the compiled pattern */
  121   return 1;
  122   }
  123 
  124 /* Match succeded */
  125 
  126 printf("\nMatch succeeded at offset %d\n", ovector[0]);
  127 
  128 
  129 /*************************************************************************
  130 * We have found the first match within the subject string. If the output *
  131 * vector wasn't big enough, set its size to the maximum. Then output any *
  132 * substrings that were captured.                                         *
  133 *************************************************************************/
  134 
  135 /* The output vector wasn't big enough */
  136 
  137 if (rc == 0)
  138   {
  139   rc = OVECCOUNT/3;
  140   printf("ovector only has room for %d captured substrings\n", rc - 1);
  141   }
  142 
  143 /* Show substrings stored in the output vector by number. Obviously, in a real
  144 application you might want to do things other than print them. */
  145 
  146 for (i = 0; i < rc; i++)
  147   {
  148   char *substring_start = subject + ovector[2*i];
  149   int substring_length = ovector[2*i+1] - ovector[2*i];
  150   printf("%2d: %.*s\n", i, substring_length, substring_start);
  151   }
  152 
  153 
  154 /**************************************************************************
  155 * That concludes the basic part of this demonstration program. We have    *
  156 * compiled a pattern, and performed a single match. The code that follows *
  157 * first shows how to access named substrings, and then how to code for    *
  158 * repeated matches on the same subject.                                   *
  159 **************************************************************************/
  160 
  161 /* See if there are any named substrings, and if so, show them by name. First
  162 we have to extract the count of named parentheses from the pattern. */
  163 
  164 (void)pcre_fullinfo(
  165   re,                   /* the compiled pattern */
  166   NULL,                 /* no extra data - we didn't study the pattern */
  167   PCRE_INFO_NAMECOUNT,  /* number of named substrings */
  168   &namecount);          /* where to put the answer */
  169 
  170 if (namecount <= 0) printf("No named substrings\n"); else
  171   {
  172   unsigned char *tabptr;
  173   printf("Named substrings\n");
  174 
  175   /* Before we can access the substrings, we must extract the table for
  176   translating names to numbers, and the size of each entry in the table. */
  177 
  178   (void)pcre_fullinfo(
  179     re,                       /* the compiled pattern */
  180     NULL,                     /* no extra data - we didn't study the pattern */
  181     PCRE_INFO_NAMETABLE,      /* address of the table */
  182     &name_table);             /* where to put the answer */
  183 
  184   (void)pcre_fullinfo(
  185     re,                       /* the compiled pattern */
  186     NULL,                     /* no extra data - we didn't study the pattern */
  187     PCRE_INFO_NAMEENTRYSIZE,  /* size of each entry in the table */
  188     &name_entry_size);        /* where to put the answer */
  189 
  190   /* Now we can scan the table and, for each entry, print the number, the name,
  191   and the substring itself. */
  192 
  193   tabptr = name_table;
  194   for (i = 0; i < namecount; i++)
  195     {
  196     int n = (tabptr[0] << 8) | tabptr[1];
  197     printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
  198       ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
  199     tabptr += name_entry_size;
  200     }
  201   }
  202 
  203 
  204 /*************************************************************************
  205 * If the "-g" option was given on the command line, we want to continue  *
  206 * to search for additional matches in the subject string, in a similar   *
  207 * way to the /g option in Perl. This turns out to be trickier than you   *
  208 * might think because of the possibility of matching an empty string.    *
  209 * What happens is as follows:                                            *
  210 *                                                                        *
  211 * If the previous match was NOT for an empty string, we can just start   *
  212 * the next match at the end of the previous one.                         *
  213 *                                                                        *
  214 * If the previous match WAS for an empty string, we can't do that, as it *
  215 * would lead to an infinite loop. Instead, a special call of pcre_exec() *
  216 * is made with the PCRE_NOTEMPTY and PCRE_ANCHORED flags set. The first  *
  217 * of these tells PCRE that an empty string is not a valid match; other   *
  218 * possibilities must be tried. The second flag restricts PCRE to one     *
  219 * match attempt at the initial string position. If this match succeeds,  *
  220 * an alternative to the empty string match has been found, and we can    *
  221 * proceed round the loop.                                                *
  222 *************************************************************************/
  223 
  224 if (!find_all)
  225   {
  226   pcre_free(re);   /* Release the memory used for the compiled pattern */
  227   return 0;        /* Finish unless -g was given */
  228   }
  229 
  230 /* Loop for second and subsequent matches */
  231 
  232 for (;;)
  233   {
  234   int options = 0;                 /* Normally no options */
  235   int start_offset = ovector[1];   /* Start at end of previous match */
  236 
  237   /* If the previous match was for an empty string, we are finished if we are
  238   at the end of the subject. Otherwise, arrange to run another match at the
  239   same point to see if a non-empty match can be found. */
  240 
  241   if (ovector[0] == ovector[1])
  242     {
  243     if (ovector[0] == subject_length) break;
  244     options = PCRE_NOTEMPTY | PCRE_ANCHORED;
  245     }
  246 
  247   /* Run the next matching operation */
  248 
  249   rc = pcre_exec(
  250     re,                   /* the compiled pattern */
  251     NULL,                 /* no extra data - we didn't study the pattern */
  252     subject,              /* the subject string */
  253     subject_length,       /* the length of the subject */
  254     start_offset,         /* starting offset in the subject */
  255     options,              /* options */
  256     ovector,              /* output vector for substring information */
  257     OVECCOUNT);           /* number of elements in the output vector */
  258 
  259   /* This time, a result of NOMATCH isn't an error. If the value in "options"
  260   is zero, it just means we have found all possible matches, so the loop ends.
  261   Otherwise, it means we have failed to find a non-empty-string match at a
  262   point where there was a previous empty-string match. In this case, we do what
  263   Perl does: advance the matching position by one, and continue. We do this by
  264   setting the "end of previous match" offset, because that is picked up at the
  265   top of the loop as the point at which to start again. */
  266 
  267   if (rc == PCRE_ERROR_NOMATCH)
  268     {
  269     if (options == 0) break;
  270     ovector[1] = start_offset + 1;
  271     continue;    /* Go round the loop again */
  272     }
  273 
  274   /* Other matching errors are not recoverable. */
  275 
  276   if (rc < 0)
  277     {
  278     printf("Matching error %d\n", rc);
  279     pcre_free(re);    /* Release memory used for the compiled pattern */
  280     return 1;
  281     }
  282 
  283   /* Match succeded */
  284 
  285   printf("\nMatch succeeded again at offset %d\n", ovector[0]);
  286 
  287   /* The match succeeded, but the output vector wasn't big enough. */
  288 
  289   if (rc == 0)
  290     {
  291     rc = OVECCOUNT/3;
  292     printf("ovector only has room for %d captured substrings\n", rc - 1);
  293     }
  294 
  295   /* As before, show substrings stored in the output vector by number, and then
  296   also any named substrings. */
  297 
  298   for (i = 0; i < rc; i++)
  299     {
  300     char *substring_start = subject + ovector[2*i];
  301     int substring_length = ovector[2*i+1] - ovector[2*i];
  302     printf("%2d: %.*s\n", i, substring_length, substring_start);
  303     }
  304 
  305   if (namecount <= 0) printf("No named substrings\n"); else
  306     {
  307     unsigned char *tabptr = name_table;
  308     printf("Named substrings\n");
  309     for (i = 0; i < namecount; i++)
  310       {
  311       int n = (tabptr[0] << 8) | tabptr[1];
  312       printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
  313         ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
  314       tabptr += name_entry_size;
  315       }
  316     }
  317   }      /* End of loop to find second and subsequent matches */
  318 
  319 printf("\n");
  320 pcre_free(re);       /* Release memory used for the compiled pattern */
  321 return 0;
  322 }
  323 
  324 /* End of pcredemo.c */