"Fossies" - the Fresh Open Source Software Archive

Member "pcre-8.43/pcredemo.c" (31 Jan 2014, 15520 Bytes) of package /linux/misc/pcre-8.43.tar.bz2:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "pcredemo.c" see the Fossies "Dox" file reference documentation.

    1 /*************************************************
    2 *           PCRE DEMONSTRATION PROGRAM           *
    3 *************************************************/
    4 
    5 /* This is a demonstration program to illustrate the most straightforward ways
    6 of calling the PCRE regular expression library from a C program. See the
    7 pcresample documentation for a short discussion ("man pcresample" if you have
    8 the PCRE man pages installed).
    9 
   10 In Unix-like environments, if PCRE is installed in your standard system
   11 libraries, you should be able to compile this program using this command:
   12 
   13 gcc -Wall pcredemo.c -lpcre -o pcredemo
   14 
   15 If PCRE is not installed in a standard place, it is likely to be installed with
   16 support for the pkg-config mechanism. If you have pkg-config, you can compile
   17 this program using this command:
   18 
   19 gcc -Wall pcredemo.c `pkg-config --cflags --libs libpcre` -o pcredemo
   20 
   21 If you do not have pkg-config, you may have to use this:
   22 
   23 gcc -Wall pcredemo.c -I/usr/local/include -L/usr/local/lib \
   24   -R/usr/local/lib -lpcre -o pcredemo
   25 
   26 Replace "/usr/local/include" and "/usr/local/lib" with wherever the include and
   27 library files for PCRE are installed on your system. Only some operating
   28 systems (e.g. Solaris) use the -R option.
   29 
   30 Building under Windows:
   31 
   32 If you want to statically link this program against a non-dll .a file, you must
   33 define PCRE_STATIC before including pcre.h, otherwise the pcre_malloc() and
   34 pcre_free() exported functions will be declared __declspec(dllimport), with
   35 unwanted results. So in this environment, uncomment the following line. */
   36 
   37 /* #define PCRE_STATIC */
   38 
   39 #include <stdio.h>
   40 #include <string.h>
   41 #include <pcre.h>
   42 
   43 #define OVECCOUNT 30    /* should be a multiple of 3 */
   44 
   45 
   46 int main(int argc, char **argv)
   47 {
   48 pcre *re;
   49 const char *error;
   50 char *pattern;
   51 char *subject;
   52 unsigned char *name_table;
   53 unsigned int option_bits;
   54 int erroffset;
   55 int find_all;
   56 int crlf_is_newline;
   57 int namecount;
   58 int name_entry_size;
   59 int ovector[OVECCOUNT];
   60 int subject_length;
   61 int rc, i;
   62 int utf8;
   63 
   64 
   65 /**************************************************************************
   66 * First, sort out the command line. There is only one possible option at  *
   67 * the moment, "-g" to request repeated matching to find all occurrences,  *
   68 * like Perl's /g option. We set the variable find_all to a non-zero value *
   69 * if the -g option is present. Apart from that, there must be exactly two *
   70 * arguments.                                                              *
   71 **************************************************************************/
   72 
   73 find_all = 0;
   74 for (i = 1; i < argc; i++)
   75   {
   76   if (strcmp(argv[i], "-g") == 0) find_all = 1;
   77     else break;
   78   }
   79 
   80 /* After the options, we require exactly two arguments, which are the pattern,
   81 and the subject string. */
   82 
   83 if (argc - i != 2)
   84   {
   85   printf("Two arguments required: a regex and a subject string\n");
   86   return 1;
   87   }
   88 
   89 pattern = argv[i];
   90 subject = argv[i+1];
   91 subject_length = (int)strlen(subject);
   92 
   93 
   94 /*************************************************************************
   95 * Now we are going to compile the regular expression pattern, and handle *
   96 * and errors that are detected.                                          *
   97 *************************************************************************/
   98 
   99 re = pcre_compile(
  100   pattern,              /* the pattern */
  101   0,                    /* default options */
  102   &error,               /* for error message */
  103   &erroffset,           /* for error offset */
  104   NULL);                /* use default character tables */
  105 
  106 /* Compilation failed: print the error message and exit */
  107 
  108 if (re == NULL)
  109   {
  110   printf("PCRE compilation failed at offset %d: %s\n", erroffset, error);
  111   return 1;
  112   }
  113 
  114 
  115 /*************************************************************************
  116 * If the compilation succeeded, we call PCRE again, in order to do a     *
  117 * pattern match against the subject string. This does just ONE match. If *
  118 * further matching is needed, it will be done below.                     *
  119 *************************************************************************/
  120 
  121 rc = pcre_exec(
  122   re,                   /* the compiled pattern */
  123   NULL,                 /* no extra data - we didn't study the pattern */
  124   subject,              /* the subject string */
  125   subject_length,       /* the length of the subject */
  126   0,                    /* start at offset 0 in the subject */
  127   0,                    /* default options */
  128   ovector,              /* output vector for substring information */
  129   OVECCOUNT);           /* number of elements in the output vector */
  130 
  131 /* Matching failed: handle error cases */
  132 
  133 if (rc < 0)
  134   {
  135   switch(rc)
  136     {
  137     case PCRE_ERROR_NOMATCH: printf("No match\n"); break;
  138     /*
  139     Handle other special cases if you like
  140     */
  141     default: printf("Matching error %d\n", rc); break;
  142     }
  143   pcre_free(re);     /* Release memory used for the compiled pattern */
  144   return 1;
  145   }
  146 
  147 /* Match succeded */
  148 
  149 printf("\nMatch succeeded at offset %d\n", ovector[0]);
  150 
  151 
  152 /*************************************************************************
  153 * We have found the first match within the subject string. If the output *
  154 * vector wasn't big enough, say so. Then output any substrings that were *
  155 * captured.                                                              *
  156 *************************************************************************/
  157 
  158 /* The output vector wasn't big enough */
  159 
  160 if (rc == 0)
  161   {
  162   rc = OVECCOUNT/3;
  163   printf("ovector only has room for %d captured substrings\n", rc - 1);
  164   }
  165 
  166 /* Show substrings stored in the output vector by number. Obviously, in a real
  167 application you might want to do things other than print them. */
  168 
  169 for (i = 0; i < rc; i++)
  170   {
  171   char *substring_start = subject + ovector[2*i];
  172   int substring_length = ovector[2*i+1] - ovector[2*i];
  173   printf("%2d: %.*s\n", i, substring_length, substring_start);
  174   }
  175 
  176 
  177 /**************************************************************************
  178 * That concludes the basic part of this demonstration program. We have    *
  179 * compiled a pattern, and performed a single match. The code that follows *
  180 * shows first how to access named substrings, and then how to code for    *
  181 * repeated matches on the same subject.                                   *
  182 **************************************************************************/
  183 
  184 /* See if there are any named substrings, and if so, show them by name. First
  185 we have to extract the count of named parentheses from the pattern. */
  186 
  187 (void)pcre_fullinfo(
  188   re,                   /* the compiled pattern */
  189   NULL,                 /* no extra data - we didn't study the pattern */
  190   PCRE_INFO_NAMECOUNT,  /* number of named substrings */
  191   &namecount);          /* where to put the answer */
  192 
  193 if (namecount <= 0) printf("No named substrings\n"); else
  194   {
  195   unsigned char *tabptr;
  196   printf("Named substrings\n");
  197 
  198   /* Before we can access the substrings, we must extract the table for
  199   translating names to numbers, and the size of each entry in the table. */
  200 
  201   (void)pcre_fullinfo(
  202     re,                       /* the compiled pattern */
  203     NULL,                     /* no extra data - we didn't study the pattern */
  204     PCRE_INFO_NAMETABLE,      /* address of the table */
  205     &name_table);             /* where to put the answer */
  206 
  207   (void)pcre_fullinfo(
  208     re,                       /* the compiled pattern */
  209     NULL,                     /* no extra data - we didn't study the pattern */
  210     PCRE_INFO_NAMEENTRYSIZE,  /* size of each entry in the table */
  211     &name_entry_size);        /* where to put the answer */
  212 
  213   /* Now we can scan the table and, for each entry, print the number, the name,
  214   and the substring itself. */
  215 
  216   tabptr = name_table;
  217   for (i = 0; i < namecount; i++)
  218     {
  219     int n = (tabptr[0] << 8) | tabptr[1];
  220     printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
  221       ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
  222     tabptr += name_entry_size;
  223     }
  224   }
  225 
  226 
  227 /*************************************************************************
  228 * If the "-g" option was given on the command line, we want to continue  *
  229 * to search for additional matches in the subject string, in a similar   *
  230 * way to the /g option in Perl. This turns out to be trickier than you   *
  231 * might think because of the possibility of matching an empty string.    *
  232 * What happens is as follows:                                            *
  233 *                                                                        *
  234 * If the previous match was NOT for an empty string, we can just start   *
  235 * the next match at the end of the previous one.                         *
  236 *                                                                        *
  237 * If the previous match WAS for an empty string, we can't do that, as it *
  238 * would lead to an infinite loop. Instead, a special call of pcre_exec() *
  239 * is made with the PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED flags set.    *
  240 * The first of these tells PCRE that an empty string at the start of the *
  241 * subject is not a valid match; other possibilities must be tried. The   *
  242 * second flag restricts PCRE to one match attempt at the initial string  *
  243 * position. If this match succeeds, an alternative to the empty string   *
  244 * match has been found, and we can print it and proceed round the loop,  *
  245 * advancing by the length of whatever was found. If this match does not  *
  246 * succeed, we still stay in the loop, advancing by just one character.   *
  247 * In UTF-8 mode, which can be set by (*UTF8) in the pattern, this may be *
  248 * more than one byte.                                                    *
  249 *                                                                        *
  250 * However, there is a complication concerned with newlines. When the     *
  251 * newline convention is such that CRLF is a valid newline, we must       *
  252 * advance by two characters rather than one. The newline convention can  *
  253 * be set in the regex by (*CR), etc.; if not, we must find the default.  *
  254 *************************************************************************/
  255 
  256 if (!find_all)     /* Check for -g */
  257   {
  258   pcre_free(re);   /* Release the memory used for the compiled pattern */
  259   return 0;        /* Finish unless -g was given */
  260   }
  261 
  262 /* Before running the loop, check for UTF-8 and whether CRLF is a valid newline
  263 sequence. First, find the options with which the regex was compiled; extract
  264 the UTF-8 state, and mask off all but the newline options. */
  265 
  266 (void)pcre_fullinfo(re, NULL, PCRE_INFO_OPTIONS, &option_bits);
  267 utf8 = option_bits & PCRE_UTF8;
  268 option_bits &= PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_CRLF|
  269                PCRE_NEWLINE_ANY|PCRE_NEWLINE_ANYCRLF;
  270 
  271 /* If no newline options were set, find the default newline convention from the
  272 build configuration. */
  273 
  274 if (option_bits == 0)
  275   {
  276   int d;
  277   (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
  278   /* Note that these values are always the ASCII ones, even in
  279   EBCDIC environments. CR = 13, NL = 10. */
  280   option_bits = (d == 13)? PCRE_NEWLINE_CR :
  281           (d == 10)? PCRE_NEWLINE_LF :
  282           (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
  283           (d == -2)? PCRE_NEWLINE_ANYCRLF :
  284           (d == -1)? PCRE_NEWLINE_ANY : 0;
  285   }
  286 
  287 /* See if CRLF is a valid newline sequence. */
  288 
  289 crlf_is_newline =
  290      option_bits == PCRE_NEWLINE_ANY ||
  291      option_bits == PCRE_NEWLINE_CRLF ||
  292      option_bits == PCRE_NEWLINE_ANYCRLF;
  293 
  294 /* Loop for second and subsequent matches */
  295 
  296 for (;;)
  297   {
  298   int options = 0;                 /* Normally no options */
  299   int start_offset = ovector[1];   /* Start at end of previous match */
  300 
  301   /* If the previous match was for an empty string, we are finished if we are
  302   at the end of the subject. Otherwise, arrange to run another match at the
  303   same point to see if a non-empty match can be found. */
  304 
  305   if (ovector[0] == ovector[1])
  306     {
  307     if (ovector[0] == subject_length) break;
  308     options = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
  309     }
  310 
  311   /* Run the next matching operation */
  312 
  313   rc = pcre_exec(
  314     re,                   /* the compiled pattern */
  315     NULL,                 /* no extra data - we didn't study the pattern */
  316     subject,              /* the subject string */
  317     subject_length,       /* the length of the subject */
  318     start_offset,         /* starting offset in the subject */
  319     options,              /* options */
  320     ovector,              /* output vector for substring information */
  321     OVECCOUNT);           /* number of elements in the output vector */
  322 
  323   /* This time, a result of NOMATCH isn't an error. If the value in "options"
  324   is zero, it just means we have found all possible matches, so the loop ends.
  325   Otherwise, it means we have failed to find a non-empty-string match at a
  326   point where there was a previous empty-string match. In this case, we do what
  327   Perl does: advance the matching position by one character, and continue. We
  328   do this by setting the "end of previous match" offset, because that is picked
  329   up at the top of the loop as the point at which to start again.
  330 
  331   There are two complications: (a) When CRLF is a valid newline sequence, and
  332   the current position is just before it, advance by an extra byte. (b)
  333   Otherwise we must ensure that we skip an entire UTF-8 character if we are in
  334   UTF-8 mode. */
  335 
  336   if (rc == PCRE_ERROR_NOMATCH)
  337     {
  338     if (options == 0) break;                    /* All matches found */
  339     ovector[1] = start_offset + 1;              /* Advance one byte */
  340     if (crlf_is_newline &&                      /* If CRLF is newline & */
  341         start_offset < subject_length - 1 &&    /* we are at CRLF, */
  342         subject[start_offset] == '\r' &&
  343         subject[start_offset + 1] == '\n')
  344       ovector[1] += 1;                          /* Advance by one more. */
  345     else if (utf8)                              /* Otherwise, ensure we */
  346       {                                         /* advance a whole UTF-8 */
  347       while (ovector[1] < subject_length)       /* character. */
  348         {
  349         if ((subject[ovector[1]] & 0xc0) != 0x80) break;
  350         ovector[1] += 1;
  351         }
  352       }
  353     continue;    /* Go round the loop again */
  354     }
  355 
  356   /* Other matching errors are not recoverable. */
  357 
  358   if (rc < 0)
  359     {
  360     printf("Matching error %d\n", rc);
  361     pcre_free(re);    /* Release memory used for the compiled pattern */
  362     return 1;
  363     }
  364 
  365   /* Match succeded */
  366 
  367   printf("\nMatch succeeded again at offset %d\n", ovector[0]);
  368 
  369   /* The match succeeded, but the output vector wasn't big enough. */
  370 
  371   if (rc == 0)
  372     {
  373     rc = OVECCOUNT/3;
  374     printf("ovector only has room for %d captured substrings\n", rc - 1);
  375     }
  376 
  377   /* As before, show substrings stored in the output vector by number, and then
  378   also any named substrings. */
  379 
  380   for (i = 0; i < rc; i++)
  381     {
  382     char *substring_start = subject + ovector[2*i];
  383     int substring_length = ovector[2*i+1] - ovector[2*i];
  384     printf("%2d: %.*s\n", i, substring_length, substring_start);
  385     }
  386 
  387   if (namecount <= 0) printf("No named substrings\n"); else
  388     {
  389     unsigned char *tabptr = name_table;
  390     printf("Named substrings\n");
  391     for (i = 0; i < namecount; i++)
  392       {
  393       int n = (tabptr[0] << 8) | tabptr[1];
  394       printf("(%d) %*s: %.*s\n", n, name_entry_size - 3, tabptr + 2,
  395         ovector[2*n+1] - ovector[2*n], subject + ovector[2*n]);
  396       tabptr += name_entry_size;
  397       }
  398     }
  399   }      /* End of loop to find second and subsequent matches */
  400 
  401 printf("\n");
  402 pcre_free(re);       /* Release memory used for the compiled pattern */
  403 return 0;
  404 }
  405 
  406 /* End of pcredemo.c */