"Fossies" - the Fresh Open Source Software Archive

Member "tin-2.4.1/tools/w2r.pl" (28 Aug 2013, 3347 Bytes) of package /linux/misc/tin-2.4.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Perl source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 #! /usr/bin/perl -w
    2 #
    3 # reads a tin filter file with wildmat filters on STDIN, converts it to
    4 # regexp filters and returns it on STDOUT
    5 #
    6 # 2000-04-27 <urs@tin.org>
    7 #
    8 # NOTE: don't use w2r.pl on regexp filters
    9 #
   10 # for case optimization of your regexp filters use opt-case.pl, i.e.:
   11 # w2r.pl < wildmat-filter-file | opt-case.pl > regexp-filter-file
   12 #
   13 # for joining regexp filters with the same group= and score= use
   14 # joinf.pl (not written yet)
   15 
   16 # perl 5 is needed for lookahead assertions and perl < 5.004 is know
   17 # to be buggy
   18 require 5.004;
   19 
   20 # version Number
   21 # $VERSION = "0.2.7";
   22 
   23 while (defined($line = <>)) {
   24     chomp $line;
   25 
   26     # ignore comments etc.
   27     if ($line =~ m/^(?:[#\s]|$)/o) {
   28         print "$line\n";
   29         next;
   30     }
   31 
   32     # skip 'empty' patterns, they are nonsense
   33     next if ($line =~ m/^[^=]+=$/o);
   34 
   35     # lines which needs to be translated
   36     if ($line =~ m/^(subj|from|msgid(?:|_last|_only)|refs_only|xref)=(.*)$/o) {
   37         printf ("$1=%s\n", w2p($2));
   38         next;
   39     }
   40 
   41     # other lines don't need to be translated
   42     print "$line\n";
   43 }
   44 
   45 
   46 # turns a wildmat into a regexp
   47 sub w2p {
   48     local ($wild)  = @_;    # input line
   49     my $cchar = "";     # current char
   50     my $lchar = "";     # last char
   51     my $reg = "";       # translated char
   52     $bmode = 0;     # inside [] ?
   53     $rval = "";     # output line
   54 
   55     # break line into chars
   56     while ($wild =~ s/(.)//) {
   57         $cchar = $1;
   58 
   59         # if char is a [, and we arn't allreay in []
   60         if ($lchar !~ m/\\/o && $cchar =~ m/\[/o) {
   61             $bmode++;
   62             $reg = $cchar;
   63         }
   64 
   65         # if char is a ], and we were in []
   66         if ($lchar !~ m/\\/o && $cchar =~ m/\]/o) {
   67             $bmode--;
   68             $reg = $cchar;
   69         }
   70 
   71         # usual cases
   72         if ($bmode == 0 && $lchar !~ m/\\/o) {
   73             $reg = $cchar;
   74             $reg =~ s/\t/\\t/o; # translate tabs
   75             $reg =~ s/\./\\./o; # quote .
   76             $reg =~ s/\)/\\)/o; # quote )
   77             $reg =~ s/\(/\\(/o; # quote (
   78             $reg =~ s/\*/\.*/o; # translate *
   79             $reg =~ s/\?/\./o;  # translate ?
   80             $reg =~ s/\^/\\^/o; # quote ^
   81             $reg =~ s/\$/\\\$/o;    # quote $
   82         }
   83 
   84         # if last char was a qute, current char can't be a meta
   85         if ($lchar =~ m/\\/o || $bmode != 0) {
   86             $reg = $cchar;
   87             $cchar =~ s/\\//o;  # skip 2nd \\ inside []
   88         }
   89 
   90         $lchar = $cchar;    # store last char
   91         $rval = $rval.$reg; # build return string
   92     }
   93 
   94     # common abbreviations
   95     # TODO: make this global
   96     # replace [0-9] with [\d] in the first []
   97     # replace [a-zA-Z0-9_] with [\w] in the first []
   98     # replace [a-zA-Z0-9] with [^\W_] in the first []
   99     # replace [a-zA-Z] with [^\W\d_] in the first []
  100     $rval =~ s/^([^\[]*)\[0-9\]/$1\[\\d\]/o;
  101     $rval =~ s/([^\[]*)\[a-za-z0-9_\]/$1\[\\w\]/io;
  102     $rval =~ s/([^\[]*)\[a-za-z0-9\]/$1\[^\\W_\]/io;
  103     $rval =~ s/([^\[]*)\[a-za-z\]/$1\[^\\W\\d_\]/io;
  104 
  105     # optimizations
  106     #
  107     # add ^-anchor if needed
  108     $rval =~ s/^(?!\.\*)(.*)/\^$1/o;
  109     # add $-anchor if needed
  110     $rval =~ s/^((?:.*)(?:[^.][^*]))$/$1\$/o;
  111     # remove leading .* if allowed
  112     $rval =~ s/^\.\*(?!$)//o;
  113     # remove tailing .* if allowed
  114     $rval =~ s/(.+)\.\*$/$1/o;
  115 
  116     return $rval;
  117 }
  118 
  119 __END__
  120 
  121 =head1 NAME
  122 
  123 w2r.pl - Convert tin wildmat filters to tin regexp filters
  124 
  125 =head1 SYNOPSIS
  126 
  127 B<w2r.pl> E<lt> I<input> [E<gt> I<output>]
  128 
  129 =head1 DESCRIPTION
  130 
  131 B<w2r.pl> reads a B<tin>(1) filter file with wildmat filters on STDIN,
  132 converts it to regexp filters and returns it on STDOUT.
  133 
  134 =head1 NOTES
  135 
  136 Don't use B<w2r.pl> on regexp filter files
  137 
  138 =head1 AUTHOR
  139 
  140 Urs Janssen E<lt>urs@tin.orgE<gt>
  141 
  142 =head1 SEE ALSO
  143 
  144 B<tin>(1), B<tin>(5)
  145 
  146 =cut