"Fossies" - the Fresh Open Source Software Archive

Member "pcre-8.43/perltest.pl" (15 Sep 2014, 6273 Bytes) of package /linux/misc/pcre-8.43.tar.bz2:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Perl source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "perltest.pl" see the Fossies "Dox" file reference documentation.

    1 #! /usr/bin/env perl
    2 
    3 # Program for testing regular expressions with perl to check that PCRE handles
    4 # them the same. This version needs to have "use utf8" at the start for running
    5 # the UTF-8 tests, but *not* for the other tests. The only way I've found for
    6 # doing this is to cat this line in explicitly in the RunPerlTest script. I've
    7 # also used this method to supply "require Encode" for the UTF-8 tests, so that
    8 # the main test will still run where Encode is not installed.
    9 
   10 #use utf8;
   11 #require Encode;
   12 
   13 # Function for turning a string into a string of printing chars.
   14 
   15 sub pchars {
   16 my($t) = "";
   17 
   18 if ($utf8)
   19   {
   20   @p = unpack('U*', $_[0]);
   21   foreach $c (@p)
   22     {
   23     if ($c >= 32 && $c < 127) { $t .= chr $c; }
   24       else { $t .= sprintf("\\x{%02x}", $c);
   25       }
   26     }
   27   }
   28 else
   29   {
   30   foreach $c (split(//, $_[0]))
   31     {
   32     if (ord $c >= 32 && ord $c < 127) { $t .= $c; }
   33       else { $t .= sprintf("\\x%02x", ord $c); }
   34     }
   35   }
   36 
   37 $t;
   38 }
   39 
   40 
   41 # Read lines from named file or stdin and write to named file or stdout; lines
   42 # consist of a regular expression, in delimiters and optionally followed by
   43 # options, followed by a set of test data, terminated by an empty line.
   44 
   45 # Sort out the input and output files
   46 
   47 if (@ARGV > 0)
   48   {
   49   open(INFILE, "<$ARGV[0]") || die "Failed to open $ARGV[0]\n";
   50   $infile = "INFILE";
   51   }
   52 else { $infile = "STDIN"; }
   53 
   54 if (@ARGV > 1)
   55   {
   56   open(OUTFILE, ">$ARGV[1]") || die "Failed to open $ARGV[1]\n";
   57   $outfile = "OUTFILE";
   58   }
   59 else { $outfile = "STDOUT"; }
   60 
   61 printf($outfile "Perl $] Regular Expressions\n\n");
   62 
   63 # Main loop
   64 
   65 NEXT_RE:
   66 for (;;)
   67   {
   68   printf "  re> " if $infile eq "STDIN";
   69   last if ! ($_ = <$infile>);
   70   printf $outfile "$_" if $infile ne "STDIN";
   71   next if ($_ =~ /^\s*$/ || $_ =~ /^< forbid/);
   72 
   73   $pattern = $_;
   74 
   75   while ($pattern !~ /^\s*(.).*\1/s)
   76     {
   77     printf "    > " if $infile eq "STDIN";
   78     last if ! ($_ = <$infile>);
   79     printf $outfile "$_" if $infile ne "STDIN";
   80     $pattern .= $_;
   81     }
   82 
   83   chomp($pattern);
   84   $pattern =~ s/\s+$//;
   85 
   86   # The private /+ modifier means "print $' afterwards".
   87 
   88   $showrest = ($pattern =~ s/\+(?=[a-zA-Z]*$)//);
   89 
   90   # A doubled version is used by pcretest to print remainders after captures
   91 
   92   $pattern =~ s/\+(?=[a-zA-Z]*$)//;
   93 
   94   # Remove /8 from a UTF-8 pattern.
   95 
   96   $utf8 = $pattern =~ s/8(?=[a-zA-Z]*$)//;
   97 
   98   # Remove /J from a pattern with duplicate names.
   99 
  100   $pattern =~ s/J(?=[a-zA-Z]*$)//;
  101 
  102   # Remove /K from a pattern (asks pcretest to check MARK data) */
  103 
  104   $pattern =~ s/K(?=[a-zA-Z]*$)//;
  105 
  106   # /W asks pcretest to set PCRE_UCP; change this to /u for Perl
  107 
  108   $pattern =~ s/W(?=[a-zA-Z]*$)/u/;
  109 
  110   # Remove /S or /SS from a pattern (asks pcretest to study or not to study)
  111 
  112   $pattern =~ s/S(?=[a-zA-Z]*$)//g;
  113 
  114   # Remove /Y and /O from a pattern (disable PCRE optimizations)
  115 
  116   $pattern =~ s/[YO](?=[a-zA-Z]*$)//;
  117 
  118   # Check that the pattern is valid
  119 
  120   eval "\$_ =~ ${pattern}";
  121   if ($@)
  122     {
  123     printf $outfile "Error: $@";
  124     if ($infile != "STDIN")
  125       {
  126       for (;;)
  127         {
  128         last if ! ($_ = <$infile>);
  129         last if $_ =~ /^\s*$/;
  130         }
  131       }
  132     next NEXT_RE;
  133     }
  134 
  135   # If the /g modifier is present, we want to put a loop round the matching;
  136   # otherwise just a single "if".
  137 
  138   $cmd = ($pattern =~ /g[a-z]*$/)? "while" : "if";
  139 
  140   # If the pattern is actually the null string, Perl uses the most recently
  141   # executed (and successfully compiled) regex is used instead. This is a
  142   # nasty trap for the unwary! The PCRE test suite does contain null strings
  143   # in places - if they are allowed through here all sorts of weird and
  144   # unexpected effects happen. To avoid this, we replace such patterns with
  145   # a non-null pattern that has the same effect.
  146 
  147   $pattern = "/(?#)/$2" if ($pattern =~ /^(.)\1(.*)$/);
  148 
  149   # Read data lines and test them
  150 
  151   for (;;)
  152     {
  153     printf "data> " if $infile eq "STDIN";
  154     last NEXT_RE if ! ($_ = <$infile>);
  155     chomp;
  156     printf $outfile "$_\n" if $infile ne "STDIN";
  157 
  158     s/\s+$//;  # Remove trailing space
  159     s/^\s+//;  # Remove leading space
  160     s/\\Y//g;  # Remove \Y (pcretest flag to set PCRE_NO_START_OPTIMIZE)
  161 
  162     last if ($_ eq "");
  163     $x = eval "\"$_\"";   # To get escapes processed
  164 
  165     # Empty array for holding results, ensure $REGERROR and $REGMARK are
  166     # unset, then do the matching.
  167 
  168     @subs = ();
  169 
  170     $pushes = "push \@subs,\$&;" .
  171          "push \@subs,\$1;" .
  172          "push \@subs,\$2;" .
  173          "push \@subs,\$3;" .
  174          "push \@subs,\$4;" .
  175          "push \@subs,\$5;" .
  176          "push \@subs,\$6;" .
  177          "push \@subs,\$7;" .
  178          "push \@subs,\$8;" .
  179          "push \@subs,\$9;" .
  180          "push \@subs,\$10;" .
  181          "push \@subs,\$11;" .
  182          "push \@subs,\$12;" .
  183          "push \@subs,\$13;" .
  184          "push \@subs,\$14;" .
  185          "push \@subs,\$15;" .
  186          "push \@subs,\$16;" .
  187          "push \@subs,\$'; }";
  188 
  189     undef $REGERROR;
  190     undef $REGMARK;
  191 
  192     eval "${cmd} (\$x =~ ${pattern}) {" . $pushes;
  193 
  194     if ($@)
  195       {
  196       printf $outfile "Error: $@\n";
  197       next NEXT_RE;
  198       }
  199     elsif (scalar(@subs) == 0)
  200       {
  201       printf $outfile "No match";
  202       if (defined $REGERROR && $REGERROR != 1)
  203         { printf $outfile (", mark = %s", &pchars($REGERROR)); }
  204       printf $outfile "\n";
  205       }
  206     else
  207       {
  208       while (scalar(@subs) != 0)
  209         {
  210         printf $outfile (" 0: %s\n", &pchars($subs[0]));
  211         printf $outfile (" 0+ %s\n", &pchars($subs[17])) if $showrest;
  212         $last_printed = 0;
  213         for ($i = 1; $i <= 16; $i++)
  214           {
  215           if (defined $subs[$i])
  216             {
  217             while ($last_printed++ < $i-1)
  218               { printf $outfile ("%2d: <unset>\n", $last_printed); }
  219             printf $outfile ("%2d: %s\n", $i, &pchars($subs[$i]));
  220             $last_printed = $i;
  221             }
  222           }
  223         splice(@subs, 0, 18);
  224         }
  225 
  226       # It seems that $REGMARK is not marked as UTF-8 even when use utf8 is
  227       # set and the input pattern was a UTF-8 string. We can, however, force
  228       # it to be so marked.
  229 
  230       if (defined $REGMARK && $REGMARK != 1)
  231         {
  232         $xx = $REGMARK;
  233         $xx = Encode::decode_utf8($xx) if $utf8;
  234         printf $outfile ("MK: %s\n", &pchars($xx));
  235         }
  236       }
  237     }
  238   }
  239 
  240 # printf $outfile "\n";
  241 
  242 # End