"Fossies" - the Fresh Open Source Software Archive

Member "tin-2.4.1/pcre/pcre_printint.src" (28 Aug 2013, 13710 Bytes) of archive /linux/misc/tin-2.4.1.tar.gz:


As a special service "Fossies" has tried to format the requested text file into HTML format (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file.

    1 /*************************************************
    2 *      Perl-Compatible Regular Expressions       *
    3 *************************************************/
    4 
    5 /* PCRE is a library of functions to support regular expressions whose syntax
    6 and semantics are as close as possible to those of the Perl 5 language.
    7 
    8                        Written by Philip Hazel
    9            Copyright (c) 1997-2005 University of Cambridge
   10 
   11 -----------------------------------------------------------------------------
   12 Redistribution and use in source and binary forms, with or without
   13 modification, are permitted provided that the following conditions are met:
   14 
   15     * Redistributions of source code must retain the above copyright notice,
   16       this list of conditions and the following disclaimer.
   17 
   18     * Redistributions in binary form must reproduce the above copyright
   19       notice, this list of conditions and the following disclaimer in the
   20       documentation and/or other materials provided with the distribution.
   21 
   22     * Neither the name of the University of Cambridge nor the names of its
   23       contributors may be used to endorse or promote products derived from
   24       this software without specific prior written permission.
   25 
   26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
   27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
   30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   36 POSSIBILITY OF SUCH DAMAGE.
   37 -----------------------------------------------------------------------------
   38 */
   39 
   40 
   41 /* This module contains a PCRE private debugging function for printing out the
   42 internal form of a compiled regular expression, along with some supporting
   43 local functions. This source file is used in two places:
   44 
   45 (1) It is #included by pcre_compile.c when it is compiled in debugging mode
   46 (DEBUG defined in pcre_internal.h). It is not included in production compiles.
   47 
   48 (2) It is always #included by pcretest.c, which can be asked to print out a
   49 compiled regex for debugging purposes. */
   50 
   51 
   52 /* Macro that decides whether a character should be output as a literal or in
   53 hexadecimal. We don't use isprint() because that can vary from system to system
   54 (even without the use of locales) and we want the output always to be the same,
   55 for testing purposes. This macro is used in pcretest as well as in this file. */
   56 
   57 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
   58 
   59 /* The table of operator names. */
   60 
   61 static const char *OP_names[] = { OP_NAME_LIST };
   62 
   63 
   64 
   65 /*************************************************
   66 *       Print single- or multi-byte character    *
   67 *************************************************/
   68 
   69 static int
   70 print_char(FILE *f, uschar *ptr, BOOL utf8)
   71 {
   72 int c = *ptr;
   73 
   74 if (!utf8 || (c & 0xc0) != 0xc0)
   75   {
   76   if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
   77   return 0;
   78   }
   79 else
   80   {
   81   int i;
   82   int a = _pcre_utf8_table4[c & 0x3f];  /* Number of additional bytes */
   83   int s = 6*a;
   84   c = (c & _pcre_utf8_table3[a]) << s;
   85   for (i = 1; i <= a; i++)
   86     {
   87     /* This is a check for malformed UTF-8; it should only occur if the sanity
   88     check has been turned off. Rather than swallow random bytes, just stop if
   89     we hit a bad one. Print it with \X instead of \x as an indication. */
   90 
   91     if ((ptr[i] & 0xc0) != 0x80)
   92       {
   93       fprintf(f, "\\X{%x}", c);
   94       return i - 1;
   95       }
   96 
   97     /* The byte is OK */
   98 
   99     s -= 6;
  100     c |= (ptr[i] & 0x3f) << s;
  101     }
  102   if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
  103   return a;
  104   }
  105 }
  106 
  107 
  108 
  109 /*************************************************
  110 *          Find Unicode property name            *
  111 *************************************************/
  112 
  113 static const char *
  114 get_ucpname(int ptype, int pvalue)
  115 {
  116 #ifdef SUPPORT_UCP
  117 int i;
  118 for (i = _pcre_utt_size; i >= 0; i--)
  119   {
  120   if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
  121   }
  122 return (i >= 0)? _pcre_utt[i].name : "??";
  123 #else
  124 /* It gets harder and harder to shut off unwanted compiler warnings. */
  125 ptype = ptype * pvalue;
  126 return (ptype == pvalue)? "??" : "??";
  127 #endif
  128 }
  129 
  130 
  131 
  132 /*************************************************
  133 *         Print compiled regex                   *
  134 *************************************************/
  135 
  136 /* Make this function work for a regex with integers either byte order.
  137 However, we assume that what we are passed is a compiled regex. */
  138 
  139 static void
  140 pcre_printint(pcre *external_re, FILE *f)
  141 {
  142 real_pcre *re = (real_pcre *)external_re;
  143 uschar *codestart, *code;
  144 BOOL utf8;
  145 
  146 unsigned int options = re->options;
  147 int offset = re->name_table_offset;
  148 int count = re->name_count;
  149 int size = re->name_entry_size;
  150 
  151 if (re->magic_number != MAGIC_NUMBER)
  152   {
  153   offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
  154   count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
  155   size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
  156   options = ((options << 24) & 0xff000000) |
  157             ((options <<  8) & 0x00ff0000) |
  158             ((options >>  8) & 0x0000ff00) |
  159             ((options >> 24) & 0x000000ff);
  160   }
  161 
  162 code = codestart = (uschar *)re + offset + count * size;
  163 utf8 = (options & PCRE_UTF8) != 0;
  164 
  165 for(;;)
  166   {
  167   uschar *ccode;
  168   int c;
  169   int extra = 0;
  170 
  171   fprintf(f, "%3d ", (int)(code - codestart));
  172 
  173   switch(*code)
  174     {
  175     case OP_END:
  176     fprintf(f, "    %s\n", OP_names[*code]);
  177     fprintf(f, "------------------------------------------------------------------\n");
  178     return;
  179 
  180     case OP_OPT:
  181     fprintf(f, " %.2x %s", code[1], OP_names[*code]);
  182     break;
  183 
  184     case OP_CHAR:
  185     fprintf(f, "    ");
  186     do
  187       {
  188       code++;
  189       code += 1 + print_char(f, code, utf8);
  190       }
  191     while (*code == OP_CHAR);
  192     fprintf(f, "\n");
  193     continue;
  194 
  195     case OP_CHARNC:
  196     fprintf(f, " NC ");
  197     do
  198       {
  199       code++;
  200       code += 1 + print_char(f, code, utf8);
  201       }
  202     while (*code == OP_CHARNC);
  203     fprintf(f, "\n");
  204     continue;
  205 
  206     case OP_CBRA:
  207     case OP_SCBRA:
  208     fprintf(f, "%3d %s %d", GET(code, 1), OP_names[*code],
  209       GET2(code, 1+LINK_SIZE));
  210     break;
  211 
  212     case OP_BRA:
  213     case OP_SBRA:
  214     case OP_KETRMAX:
  215     case OP_KETRMIN:
  216     case OP_ALT:
  217     case OP_KET:
  218     case OP_ASSERT:
  219     case OP_ASSERT_NOT:
  220     case OP_ASSERTBACK:
  221     case OP_ASSERTBACK_NOT:
  222     case OP_ONCE:
  223     case OP_COND:
  224     case OP_SCOND:
  225     case OP_REVERSE:
  226     fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
  227     break;
  228 
  229     case OP_CREF:
  230     fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
  231     break;
  232 
  233     case OP_RREF:
  234     c = GET2(code, 1);
  235     if (c == RREF_ANY)
  236       fprintf(f, "    Cond recurse any");
  237     else
  238       fprintf(f, "    Cond recurse %d", c);
  239     break;
  240 
  241     case OP_DEF:
  242     fprintf(f, "    Cond def");
  243     break;
  244 
  245     case OP_STAR:
  246     case OP_MINSTAR:
  247     case OP_POSSTAR:
  248     case OP_PLUS:
  249     case OP_MINPLUS:
  250     case OP_POSPLUS:
  251     case OP_QUERY:
  252     case OP_MINQUERY:
  253     case OP_POSQUERY:
  254     case OP_TYPESTAR:
  255     case OP_TYPEMINSTAR:
  256     case OP_TYPEPOSSTAR:
  257     case OP_TYPEPLUS:
  258     case OP_TYPEMINPLUS:
  259     case OP_TYPEPOSPLUS:
  260     case OP_TYPEQUERY:
  261     case OP_TYPEMINQUERY:
  262     case OP_TYPEPOSQUERY:
  263     fprintf(f, "    ");
  264     if (*code >= OP_TYPESTAR)
  265       {
  266       fprintf(f, "%s", OP_names[code[1]]);
  267       if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
  268         {
  269         fprintf(f, " %s ", get_ucpname(code[2], code[3]));
  270         extra = 2;
  271         }
  272       }
  273     else extra = print_char(f, code+1, utf8);
  274     fprintf(f, "%s", OP_names[*code]);
  275     break;
  276 
  277     case OP_EXACT:
  278     case OP_UPTO:
  279     case OP_MINUPTO:
  280     case OP_POSUPTO:
  281     fprintf(f, "    ");
  282     extra = print_char(f, code+3, utf8);
  283     fprintf(f, "{");
  284     if (*code != OP_EXACT) fprintf(f, "0,");
  285     fprintf(f, "%d}", GET2(code,1));
  286     if (*code == OP_MINUPTO) fprintf(f, "?");
  287       else if (*code == OP_POSUPTO) fprintf(f, "+");
  288     break;
  289 
  290     case OP_TYPEEXACT:
  291     case OP_TYPEUPTO:
  292     case OP_TYPEMINUPTO:
  293     case OP_TYPEPOSUPTO:
  294     fprintf(f, "    %s", OP_names[code[3]]);
  295     if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
  296       {
  297       fprintf(f, " %s ", get_ucpname(code[4], code[5]));
  298       extra = 2;
  299       }
  300     fprintf(f, "{");
  301     if (*code != OP_TYPEEXACT) fprintf(f, "0,");
  302     fprintf(f, "%d}", GET2(code,1));
  303     if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
  304       else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
  305     break;
  306 
  307     case OP_NOT:
  308     c = code[1];
  309     if (PRINTABLE(c)) fprintf(f, "    [^%c]", c);
  310       else fprintf(f, "    [^\\x%02x]", c);
  311     break;
  312 
  313     case OP_NOTSTAR:
  314     case OP_NOTMINSTAR:
  315     case OP_NOTPOSSTAR:
  316     case OP_NOTPLUS:
  317     case OP_NOTMINPLUS:
  318     case OP_NOTPOSPLUS:
  319     case OP_NOTQUERY:
  320     case OP_NOTMINQUERY:
  321     case OP_NOTPOSQUERY:
  322     c = code[1];
  323     if (PRINTABLE(c)) fprintf(f, "    [^%c]", c);
  324       else fprintf(f, "    [^\\x%02x]", c);
  325     fprintf(f, "%s", OP_names[*code]);
  326     break;
  327 
  328     case OP_NOTEXACT:
  329     case OP_NOTUPTO:
  330     case OP_NOTMINUPTO:
  331     case OP_NOTPOSUPTO:
  332     c = code[3];
  333     if (PRINTABLE(c)) fprintf(f, "    [^%c]{", c);
  334       else fprintf(f, "    [^\\x%02x]{", c);
  335     if (*code != OP_NOTEXACT) fprintf(f, "0,");
  336     fprintf(f, "%d}", GET2(code,1));
  337     if (*code == OP_NOTMINUPTO) fprintf(f, "?");
  338       else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
  339     break;
  340 
  341     case OP_RECURSE:
  342     fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
  343     break;
  344 
  345     case OP_REF:
  346     fprintf(f, "    \\%d", GET2(code,1));
  347     ccode = code + _pcre_OP_lengths[*code];
  348     goto CLASS_REF_REPEAT;
  349 
  350     case OP_CALLOUT:
  351     fprintf(f, "    %s %d %d %d", OP_names[*code], code[1], GET(code,2),
  352       GET(code, 2 + LINK_SIZE));
  353     break;
  354 
  355     case OP_PROP:
  356     case OP_NOTPROP:
  357     fprintf(f, "    %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
  358     break;
  359 
  360     /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
  361     having this code always here, and it makes it less messy without all those
  362     #ifdefs. */
  363 
  364     case OP_CLASS:
  365     case OP_NCLASS:
  366     case OP_XCLASS:
  367       {
  368       int i, min, max;
  369       BOOL printmap;
  370 
  371       fprintf(f, "    [");
  372 
  373       if (*code == OP_XCLASS)
  374         {
  375         extra = GET(code, 1);
  376         ccode = code + LINK_SIZE + 1;
  377         printmap = (*ccode & XCL_MAP) != 0;
  378         if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
  379         }
  380       else
  381         {
  382         printmap = TRUE;
  383         ccode = code + 1;
  384         }
  385 
  386       /* Print a bit map */
  387 
  388       if (printmap)
  389         {
  390         for (i = 0; i < 256; i++)
  391           {
  392           if ((ccode[i/8] & (1 << (i&7))) != 0)
  393             {
  394             int j;
  395             for (j = i+1; j < 256; j++)
  396               if ((ccode[j/8] & (1 << (j&7))) == 0) break;
  397             if (i == '-' || i == ']') fprintf(f, "\\");
  398             if (PRINTABLE(i)) fprintf(f, "%c", i);
  399               else fprintf(f, "\\x%02x", i);
  400             if (--j > i)
  401               {
  402               if (j != i + 1) fprintf(f, "-");
  403               if (j == '-' || j == ']') fprintf(f, "\\");
  404               if (PRINTABLE(j)) fprintf(f, "%c", j);
  405                 else fprintf(f, "\\x%02x", j);
  406               }
  407             i = j;
  408             }
  409           }
  410         ccode += 32;
  411         }
  412 
  413       /* For an XCLASS there is always some additional data */
  414 
  415       if (*code == OP_XCLASS)
  416         {
  417         int ch;
  418         while ((ch = *ccode++) != XCL_END)
  419           {
  420           if (ch == XCL_PROP)
  421             {
  422             int ptype = *ccode++;
  423             int pvalue = *ccode++;
  424             fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
  425             }
  426           else if (ch == XCL_NOTPROP)
  427             {
  428             int ptype = *ccode++;
  429             int pvalue = *ccode++;
  430             fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
  431             }
  432           else
  433             {
  434             ccode += 1 + print_char(f, ccode, TRUE);
  435             if (ch == XCL_RANGE)
  436               {
  437               fprintf(f, "-");
  438               ccode += 1 + print_char(f, ccode, TRUE);
  439               }
  440             }
  441           }
  442         }
  443 
  444       /* Indicate a non-UTF8 class which was created by negation */
  445 
  446       fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
  447 
  448       /* Handle repeats after a class or a back reference */
  449 
  450       CLASS_REF_REPEAT:
  451       switch(*ccode)
  452         {
  453         case OP_CRSTAR:
  454         case OP_CRMINSTAR:
  455         case OP_CRPLUS:
  456         case OP_CRMINPLUS:
  457         case OP_CRQUERY:
  458         case OP_CRMINQUERY:
  459         fprintf(f, "%s", OP_names[*ccode]);
  460         extra += _pcre_OP_lengths[*ccode];
  461         break;
  462 
  463         case OP_CRRANGE:
  464         case OP_CRMINRANGE:
  465         min = GET2(ccode,1);
  466         max = GET2(ccode,3);
  467         if (max == 0) fprintf(f, "{%d,}", min);
  468         else fprintf(f, "{%d,%d}", min, max);
  469         if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
  470         extra += _pcre_OP_lengths[*ccode];
  471         break;
  472 
  473         /* Do nothing if it's not a repeat; this code stops picky compilers
  474         warning about the lack of a default code path. */
  475 
  476         default:
  477         break;
  478         }
  479       }
  480     break;
  481 
  482     /* Anything else is just an item with no data*/
  483 
  484     default:
  485     fprintf(f, "    %s", OP_names[*code]);
  486     break;
  487     }
  488 
  489   code += _pcre_OP_lengths[*code] + extra;
  490   fprintf(f, "\n");
  491   }
  492 }
  493 
  494 /* End of pcre_printint.src */