"Fossies" - the Fresh Open Source Software Archive 
Member "tin-2.6.2/tools/w2r.pl" (23 Aug 2021, 3353 Bytes) of package /linux/misc/tin-2.6.2.tar.xz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Perl source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "w2r.pl" see the
Fossies "Dox" file reference documentation and the last
Fossies "Diffs" side-by-side code changes report:
2.4.4_vs_2.4.5.
1 #! /usr/bin/perl -w
2 #
3 # reads a tin filter file with wildmat filters on STDIN, converts it to
4 # regexp filters and returns it on STDOUT
5 #
6 # 2020-11-10 <urs@tin.org>
7 #
8 # NOTE: don't use w2r.pl on regexp filters
9 #
10 # for case optimization of your regexp filters use opt-case.pl, i.e.:
11 # w2r.pl < wildmat-filter-file | opt-case.pl > regexp-filter-file
12 #
13 # for joining regexp filters with the same group= and score= use
14 # joinf.pl (not written yet)
15
16 # perl 5 is needed for lookahead assertions and perl < 5.004 is know
17 # to be buggy
18 require 5.004;
19
20 # version Number
21 # $VERSION = "0.2.8";
22
23 while (defined($line = <>)) {
24 chomp $line;
25
26 # ignore comments etc.
27 if ($line =~ m/^(?:[#\s]|$)/o) {
28 print "$line\n";
29 next;
30 }
31
32 # skip 'empty' patterns, they are nonsense
33 next if ($line =~ m/^[^=]+=$/o);
34
35 # lines which needs to be translated
36 if ($line =~ m/^(subj|from|msgid(?:|_last|_only)|refs_only|xref|path)=(.*)$/o) {
37 printf ("$1=%s\n", w2p($2));
38 next;
39 }
40
41 # other lines don't need to be translated
42 print "$line\n";
43 }
44
45
46 # turns a wildmat into a regexp
47 sub w2p {
48 local ($wild) = @_; # input line
49 my $cchar = ""; # current char
50 my $lchar = ""; # last char
51 my $reg = ""; # translated char
52 $bmode = 0; # inside [] ?
53 $rval = ""; # output line
54
55 # break line into chars
56 while ($wild =~ s/(.)//) {
57 $cchar = $1;
58
59 # if char is a [, and we aren't allreay in []
60 if ($lchar !~ m/\\/o && $cchar =~ m/\[/o) {
61 $bmode++;
62 $reg = $cchar;
63 }
64
65 # if char is a ], and we were in []
66 if ($lchar !~ m/\\/o && $cchar =~ m/\]/o) {
67 $bmode--;
68 $reg = $cchar;
69 }
70
71 # usual cases
72 if ($bmode == 0 && $lchar !~ m/\\/o) {
73 $reg = $cchar;
74 $reg =~ s/\t/\\t/o; # translate tabs
75 $reg =~ s/\./\\./o; # quote .
76 $reg =~ s/\)/\\)/o; # quote )
77 $reg =~ s/\(/\\(/o; # quote (
78 $reg =~ s/\*/\.*/o; # translate *
79 $reg =~ s/\?/\./o; # translate ?
80 $reg =~ s/\^/\\^/o; # quote ^
81 $reg =~ s/\$/\\\$/o; # quote $
82 }
83
84 # if last char was a qute, current char can't be a meta
85 if ($lchar =~ m/\\/o || $bmode != 0) {
86 $reg = $cchar;
87 $cchar =~ s/\\//o; # skip 2nd \\ inside []
88 }
89
90 $lchar = $cchar; # store last char
91 $rval = $rval.$reg; # build return string
92 }
93
94 # common abbreviations
95 # TODO: make this global
96 # replace [0-9] with [\d] in the first []
97 # replace [a-zA-Z0-9_] with [\w] in the first []
98 # replace [a-zA-Z0-9] with [^\W_] in the first []
99 # replace [a-zA-Z] with [^\W\d_] in the first []
100 $rval =~ s/^([^\[]*)\[0-9\]/$1\[\\d\]/o;
101 $rval =~ s/([^\[]*)\[a-za-z0-9_\]/$1\[\\w\]/io;
102 $rval =~ s/([^\[]*)\[a-za-z0-9\]/$1\[^\\W_\]/io;
103 $rval =~ s/([^\[]*)\[a-za-z\]/$1\[^\\W\\d_\]/io;
104
105 # optimizations
106 #
107 # add ^-anchor if needed
108 $rval =~ s/^(?!\.\*)(.*)/\^$1/o;
109 # add $-anchor if needed
110 $rval =~ s/^((?:.*)(?:[^.][^*]))$/$1\$/o;
111 # remove leading .* if allowed
112 $rval =~ s/^\.\*(?!$)//o;
113 # remove tailing .* if allowed
114 $rval =~ s/(.+)\.\*$/$1/o;
115
116 return $rval;
117 }
118
119 __END__
120
121 =head1 NAME
122
123 w2r.pl - Convert tin wildmat filters to tin regexp filters
124
125 =head1 SYNOPSIS
126
127 B<w2r.pl> E<lt> I<input> [E<gt> I<output>]
128
129 =head1 DESCRIPTION
130
131 B<w2r.pl> reads a L<tin(1)> filter file with wildmat filters on STDIN,
132 converts it to regexp filters and returns it on STDOUT.
133
134 =head1 NOTES
135
136 Don't use B<w2r.pl> on regexp filter files
137
138 =head1 AUTHOR
139
140 Urs Janssen E<lt>urs@tin.orgE<gt>
141
142 =head1 SEE ALSO
143
144 L<tin(1)>, L<tin(5)>
145
146 =cut