"Fossies" - the Fresh Open Source Software Archive

Member "tin-2.6.2/tools/w2r.pl" (23 Aug 2021, 3353 Bytes) of package /linux/misc/tin-2.6.2.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Perl source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "w2r.pl" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 2.4.4_vs_2.4.5.

    1 #! /usr/bin/perl -w
    2 #
    3 # reads a tin filter file with wildmat filters on STDIN, converts it to
    4 # regexp filters and returns it on STDOUT
    5 #
    6 # 2020-11-10 <urs@tin.org>
    7 #
    8 # NOTE: don't use w2r.pl on regexp filters
    9 #
   10 # for case optimization of your regexp filters use opt-case.pl, i.e.:
   11 # w2r.pl < wildmat-filter-file | opt-case.pl > regexp-filter-file
   12 #
   13 # for joining regexp filters with the same group= and score= use
   14 # joinf.pl (not written yet)
   15 
   16 # perl 5 is needed for lookahead assertions and perl < 5.004 is know
   17 # to be buggy
   18 require 5.004;
   19 
   20 # version Number
   21 # $VERSION = "0.2.8";
   22 
   23 while (defined($line = <>)) {
   24     chomp $line;
   25 
   26     # ignore comments etc.
   27     if ($line =~ m/^(?:[#\s]|$)/o) {
   28         print "$line\n";
   29         next;
   30     }
   31 
   32     # skip 'empty' patterns, they are nonsense
   33     next if ($line =~ m/^[^=]+=$/o);
   34 
   35     # lines which needs to be translated
   36     if ($line =~ m/^(subj|from|msgid(?:|_last|_only)|refs_only|xref|path)=(.*)$/o) {
   37         printf ("$1=%s\n", w2p($2));
   38         next;
   39     }
   40 
   41     # other lines don't need to be translated
   42     print "$line\n";
   43 }
   44 
   45 
   46 # turns a wildmat into a regexp
   47 sub w2p {
   48     local ($wild)  = @_;    # input line
   49     my $cchar = "";     # current char
   50     my $lchar = "";     # last char
   51     my $reg = "";       # translated char
   52     $bmode = 0;     # inside [] ?
   53     $rval = "";     # output line
   54 
   55     # break line into chars
   56     while ($wild =~ s/(.)//) {
   57         $cchar = $1;
   58 
   59         # if char is a [, and we aren't allreay in []
   60         if ($lchar !~ m/\\/o && $cchar =~ m/\[/o) {
   61             $bmode++;
   62             $reg = $cchar;
   63         }
   64 
   65         # if char is a ], and we were in []
   66         if ($lchar !~ m/\\/o && $cchar =~ m/\]/o) {
   67             $bmode--;
   68             $reg = $cchar;
   69         }
   70 
   71         # usual cases
   72         if ($bmode == 0 && $lchar !~ m/\\/o) {
   73             $reg = $cchar;
   74             $reg =~ s/\t/\\t/o; # translate tabs
   75             $reg =~ s/\./\\./o; # quote .
   76             $reg =~ s/\)/\\)/o; # quote )
   77             $reg =~ s/\(/\\(/o; # quote (
   78             $reg =~ s/\*/\.*/o; # translate *
   79             $reg =~ s/\?/\./o;  # translate ?
   80             $reg =~ s/\^/\\^/o; # quote ^
   81             $reg =~ s/\$/\\\$/o;    # quote $
   82         }
   83 
   84         # if last char was a qute, current char can't be a meta
   85         if ($lchar =~ m/\\/o || $bmode != 0) {
   86             $reg = $cchar;
   87             $cchar =~ s/\\//o;  # skip 2nd \\ inside []
   88         }
   89 
   90         $lchar = $cchar;    # store last char
   91         $rval = $rval.$reg; # build return string
   92     }
   93 
   94     # common abbreviations
   95     # TODO: make this global
   96     # replace [0-9] with [\d] in the first []
   97     # replace [a-zA-Z0-9_] with [\w] in the first []
   98     # replace [a-zA-Z0-9] with [^\W_] in the first []
   99     # replace [a-zA-Z] with [^\W\d_] in the first []
  100     $rval =~ s/^([^\[]*)\[0-9\]/$1\[\\d\]/o;
  101     $rval =~ s/([^\[]*)\[a-za-z0-9_\]/$1\[\\w\]/io;
  102     $rval =~ s/([^\[]*)\[a-za-z0-9\]/$1\[^\\W_\]/io;
  103     $rval =~ s/([^\[]*)\[a-za-z\]/$1\[^\\W\\d_\]/io;
  104 
  105     # optimizations
  106     #
  107     # add ^-anchor if needed
  108     $rval =~ s/^(?!\.\*)(.*)/\^$1/o;
  109     # add $-anchor if needed
  110     $rval =~ s/^((?:.*)(?:[^.][^*]))$/$1\$/o;
  111     # remove leading .* if allowed
  112     $rval =~ s/^\.\*(?!$)//o;
  113     # remove tailing .* if allowed
  114     $rval =~ s/(.+)\.\*$/$1/o;
  115 
  116     return $rval;
  117 }
  118 
  119 __END__
  120 
  121 =head1 NAME
  122 
  123 w2r.pl - Convert tin wildmat filters to tin regexp filters
  124 
  125 =head1 SYNOPSIS
  126 
  127 B<w2r.pl> E<lt> I<input> [E<gt> I<output>]
  128 
  129 =head1 DESCRIPTION
  130 
  131 B<w2r.pl> reads a L<tin(1)> filter file with wildmat filters on STDIN,
  132 converts it to regexp filters and returns it on STDOUT.
  133 
  134 =head1 NOTES
  135 
  136 Don't use B<w2r.pl> on regexp filter files
  137 
  138 =head1 AUTHOR
  139 
  140 Urs Janssen E<lt>urs@tin.orgE<gt>
  141 
  142 =head1 SEE ALSO
  143 
  144 L<tin(1)>, L<tin(5)>
  145 
  146 =cut