"Fossies" - the Fresh Open Source Software Archive

Member "Perl-Tidy-20190915/examples/perlmask.pl" (6 Aug 2013, 8702 Bytes) of package /linux/misc/Perl-Tidy-20190915.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Perl source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 #!/usr/bin/perl -w
    2 use strict;
    3 
    4 # Walk through a perl script and create a masked file which is
    5 # similar but which masks comments, quotes, patterns, and non-code
    6 # lines so that it is easy to parse with regular expressions.
    7 #
    8 # usage:
    9 # perlmask [-cn]  myfile.pl >myfile.new
   10 # perlmask [-cn] <myfile.pl >myfile.new
   11 #
   12 # In the masked file,
   13 #  -comments and pod will be masked (or removed)
   14 #  -here-doc text lines will be masked (or removed)
   15 #  -quotes and patterns, qw quotes, and here doc << operators will be
   16 #   replaced by the letters 'Q', 'q', or 'h'
   17 #
   18 # The result is a file in which all braces, parens, and square brackets
   19 # are balanced, and it can be parsed relatively easily by regular
   20 # expressions.
   21 #
   22 # -cn is an optional 'compression' flag.  By default the masked file will have
   23 # the same number of characters as the input file, with the difference being
   24 # that certain characters will be changed (masked).
   25 #
   26 # If character position correspondence is not required, the size of the masked
   27 # file can be significantly reduced by increasing the 'compression' level as
   28 # follows:
   29 #
   30 # -c0 all mask file line numbers and character positions agree with
   31 #     original file (DEFAULT)
   32 # -c1 line numbers agree and character positions agree within lines of code
   33 # -c2 line numbers agree but character positions do not
   34 # -c3 no correspondence between line numbers or character positions
   35 #
   36 # Try each of these on a file of significant size to see how they work.
   37 # The default, -c0, is required if you are working with character positions
   38 # that span multiple lines.  The other levels may be useful if you
   39 # do not need this level of correspondence.
   40 #
   41 # This file is one of the examples distributed with perltidy and demonstrates
   42 # using a callback object with Perl::Tidy to walk through a perl file and find
   43 # all of its tokens.  It can be useful for simple perl code parsing tasks.  It
   44 # might even be helpful in debugging.  Or you may want to modify it to suit
   45 # your own purposes.
   46 #
   47 use Getopt::Std;
   48 use IO::File;
   49 $| = 1;
   50 use vars qw($opt_c $opt_h);
   51 my $usage = <<EOM;
   52    usage: perlmask [ -cn ] filename >outfile
   53 EOM
   54 getopts('c:h') or die "$usage";
   55 if ($opt_h) { die $usage }
   56 unless ( defined($opt_c) ) { $opt_c = 0 }
   57 if (@ARGV > 1) { die $usage }
   58 
   59 my $source=$ARGV[0];   # an undefined filename will become stdin
   60 
   61 # strings to hold the files (arrays could be used to)
   62 my ( $masked_file, $original_file );  
   63 
   64 PerlMask::perlmask(
   65     _source         => $source,
   66     _rmasked_file   => \$masked_file,
   67     _roriginal_file => \$original_file,    # optional
   68     _compression    => $opt_c              # optional, default=0
   69 );
   70 
   71 # Now we have the masked and original files in strings of equal length.
   72 # We could search for specific text in the masked file here.  But here
   73 # we'll just print the masked file:
   74 if ($masked_file) { print $masked_file; }
   75 
   76 #####################################################################
   77 #
   78 # The PerlMask package is an interface to perltidy which accepts a
   79 # source filehandle and returns a 'masked' version of the source as
   80 # a string or array.  It can also optionally return the original file
   81 # as a string or array.
   82 #
   83 # It works by making a callback object with a write_line() method to
   84 # receive tokenized lines from perltidy.  This write_line method
   85 # selectively replaces tokens with either their original text or with a
   86 # benign masking character (such as '#' or 'Q').
   87 #
   88 # Usage:
   89 #
   90 #   PerlMask::perlmask(
   91 #       _source         => $fh,             # required source
   92 #       _rmasked_file   => \$masked_file,   # required ref to ARRAY or SCALAR
   93 #       _roriginal_file => \$original_file, # optional ref to ARRAY or SCALAR
   94 #       _compression    => $opt_c           # optional
   95 #   );
   96 #
   97 # _source is any source that perltidy will accept, including a
   98 # filehandle or reference to SCALAR or ARRAY
   99 #
  100 # The compression flag may have these values:
  101 #  0 all mask file line numbers and character positions agree with
  102 #    original file (DEFAULT)
  103 #  1 line numbers agree and character positions agree within lines of code
  104 #  2 line numbers agree but character positions do not
  105 #  3 no correspondence between line numbers or character positions
  106 #
  107 #####################################################################
  108 
  109 package PerlMask;
  110 use Carp;
  111 use Perl::Tidy;
  112 
  113 sub perlmask {
  114 
  115     my %args = ( _compression => 0, @_ );
  116     my $rfile = $args{_rmasked_file};
  117     unless ( defined($rfile) ) {
  118         croak
  119           "Missing required parameter '_rmasked_file' in call to perlmask\n";
  120     }
  121     my $ref=ref($rfile);
  122     unless ( $ref =~ /^(SCALAR|ARRAY)$/ ) {
  123             croak <<EOM;
  124 Expecting _rmasked_file = ref to SCALAR or ARRAY in perlmask but got : ($ref)
  125 EOM
  126     }
  127 
  128     # run perltidy, which will call $formatter's write_line() for each line
  129     my $err=perltidy(
  130         'source'    => $args{_source},
  131         'formatter' => bless( \%args, __PACKAGE__ ),    # callback object
  132         'argv'        => "-npro -se",    # -npro : ignore .perltidyrc,
  133                                          # -se   : errors to STDOUT
  134     );
  135     if ($err) {
  136         die "Error calling perltidy\n";
  137     }
  138 }
  139 
  140 sub print_line {
  141 
  142     # called from write_line to dispatch one line (either masked or original)..
  143     # here we'll either append it to a string or array, as appropriate
  144     my ( $rfile, $line ) = @_;
  145     if ( defined($rfile) ) {
  146         if ( ref($rfile) eq 'SCALAR' ) {
  147             $$rfile .= $line . "\n";
  148         }
  149         elsif ( ref($rfile) eq 'ARRAY' ) {
  150             push @{$rfile}, $line . "\n";
  151         }
  152     }
  153 }
  154 
  155 sub write_line {
  156 
  157     # This is called from perltidy line-by-line
  158     my ( $self, $line_of_tokens ) = @_;
  159     my $rmasked_file   = $self->{_rmasked_file};
  160     my $roriginal_file = $self->{_roriginal_file};
  161     my $opt_c          = $self->{_compression};
  162 
  163     my $line_type         = $line_of_tokens->{_line_type};
  164     my $input_line_number = $line_of_tokens->{_line_number};
  165     my $input_line        = $line_of_tokens->{_line_text};
  166     my $rtoken_type       = $line_of_tokens->{_rtoken_type};
  167     my $rtokens           = $line_of_tokens->{_rtokens};
  168     chomp $input_line;
  169 
  170     # mask non-CODE lines
  171     if ( $line_type ne 'CODE' ) {
  172         return if ( $opt_c == 3 );
  173         my $len = length($input_line);
  174         if ( $opt_c == 0 && $len > 0 ) {
  175             print_line( $roriginal_file, $input_line ) if $roriginal_file;
  176             print_line( $rmasked_file, '#' x $len ); 
  177         }
  178         else {
  179             print_line( $roriginal_file, $input_line ) if $roriginal_file;
  180             print_line( $rmasked_file, "" );
  181         }
  182         return;
  183     }
  184 
  185     # we'll build the masked line token by token
  186     my $masked_line = "";
  187 
  188     # add leading spaces if not in a higher compression mode
  189     if ( $opt_c <= 1 ) {
  190 
  191         # Find leading whitespace.  But be careful..we don't want the
  192         # whitespace if it is part of quoted text, because it will 
  193         # already be contained in a token.
  194         if ( $input_line =~ /^(\s+)/ && !$line_of_tokens->{_starting_in_quote} )
  195         {
  196             $masked_line = $1;
  197         }
  198     }
  199 
  200     # loop over tokens to construct one masked line
  201     for ( my $j = 0 ; $j < @$rtoken_type ; $j++ ) {
  202 
  203         # Mask certain token types by replacing them with their type code:
  204         # type  definition
  205         # ----  ----------
  206         # Q     quote or pattern
  207         # q     qw quote
  208         # h     << here doc operator
  209         # #     comment
  210         #
  211         # This choice will produce a mask file that has balanced
  212         # container tokens and does not cause parsing problems.
  213         if ( $$rtoken_type[$j] =~ /^[Qqh]$/ ) {
  214             if ( $opt_c <= 1 ) {
  215                 $masked_line .= $$rtoken_type[$j] x length( $$rtokens[$j] );
  216             }
  217             else {
  218                 $masked_line .= $$rtoken_type[$j];
  219             }
  220         }
  221 
  222         # Mask a comment
  223         elsif ( $$rtoken_type[$j] eq '#' ) {
  224             if ( $opt_c == 0 ) {
  225                 $masked_line .= '#' x length( $$rtokens[$j] );
  226             }
  227         }
  228 
  229         # All other tokens go out verbatim
  230         else {
  231             $masked_line .= $$rtokens[$j];
  232         }
  233     }
  234     print_line( $roriginal_file, $input_line ) if $roriginal_file;
  235     print_line( $rmasked_file, $masked_line );
  236 
  237     # self-check lengths; this error should never happen
  238     if ( $opt_c == 0 && length($masked_line) != length($input_line) ) {
  239         my $lmask  = length($masked_line);
  240         my $linput = length($input_line);
  241         print STDERR
  242 "$input_line_number: length ERROR, masked length=$lmask but input length=$linput\n";
  243     }
  244 }
  245 
  246 # called once after the last line of a file
  247 sub finish_formatting {
  248     my $self = shift;
  249     return;
  250 }