"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "lib/Perl/Tidy/Tokenizer.pm" between
Perl-Tidy-20210402.tar.gz and Perl-Tidy-20210717.tar.gz

About: Perltidy is a Perl script indenter and reformatter (beautifier).

Tokenizer.pm  (Perl-Tidy-20210402):Tokenizer.pm  (Perl-Tidy-20210717)
skipping to change at line 24 skipping to change at line 24
# The Tokenizer returns a reference to a data structure 'line_of_tokens' # The Tokenizer returns a reference to a data structure 'line_of_tokens'
# containing one tokenized line for each call to its get_line() method. # containing one tokenized line for each call to its get_line() method.
# #
# WARNING: This is not a real class. Only one tokenizer my be used. # WARNING: This is not a real class. Only one tokenizer my be used.
# #
######################################################################## ########################################################################
package Perl::Tidy::Tokenizer; package Perl::Tidy::Tokenizer;
use strict; use strict;
use warnings; use warnings;
our $VERSION = '20210402'; our $VERSION = '20210717';
use Perl::Tidy::LineBuffer; use Perl::Tidy::LineBuffer;
use Carp; use Carp;
# PACKAGE VARIABLES for processing an entire FILE. # PACKAGE VARIABLES for processing an entire FILE.
# These must be package variables because most may get localized during # These must be package variables because most may get localized during
# processing. Most are initialized in sub prepare_for_a_new_file. # processing. Most are initialized in sub prepare_for_a_new_file.
use vars qw{ use vars qw{
$tokenizer_self $tokenizer_self
skipping to change at line 103 skipping to change at line 103
@closing_brace_names @closing_brace_names
%is_keyword_taking_list %is_keyword_taking_list
%is_keyword_taking_optional_arg %is_keyword_taking_optional_arg
%is_keyword_rejecting_slash_as_pattern_delimiter %is_keyword_rejecting_slash_as_pattern_delimiter
%is_keyword_rejecting_question_as_pattern_delimiter %is_keyword_rejecting_question_as_pattern_delimiter
%is_q_qq_qw_qx_qr_s_y_tr_m %is_q_qq_qw_qx_qr_s_y_tr_m
%is_sub %is_sub
%is_package %is_package
%is_comma_question_colon %is_comma_question_colon
%other_line_endings %other_line_endings
$code_skipping_pattern_begin
$code_skipping_pattern_end
}; };
# GLOBAL VARIABLES which are constant after being configured by user-supplied
# parameters. They remain constant as a file is being processed.
my (
$rOpts_code_skipping,
$code_skipping_pattern_begin,
$code_skipping_pattern_end,
);
# possible values of operator_expected() # possible values of operator_expected()
use constant TERM => -1; use constant TERM => -1;
use constant UNKNOWN => 0; use constant UNKNOWN => 0;
use constant OPERATOR => 1; use constant OPERATOR => 1;
# possible values of context # possible values of context
use constant SCALAR_CONTEXT => -1; use constant SCALAR_CONTEXT => -1;
use constant UNKNOWN_CONTEXT => 0; use constant UNKNOWN_CONTEXT => 0;
use constant LIST_CONTEXT => 1; use constant LIST_CONTEXT => 1;
skipping to change at line 132 skipping to change at line 143
use constant { use constant {
_rhere_target_list_ => $i++, _rhere_target_list_ => $i++,
_in_here_doc_ => $i++, _in_here_doc_ => $i++,
_here_doc_target_ => $i++, _here_doc_target_ => $i++,
_here_quote_character_ => $i++, _here_quote_character_ => $i++,
_in_data_ => $i++, _in_data_ => $i++,
_in_end_ => $i++, _in_end_ => $i++,
_in_format_ => $i++, _in_format_ => $i++,
_in_error_ => $i++, _in_error_ => $i++,
_in_pod_ => $i++, _in_pod_ => $i++,
_in_skipped_ => $i++,
_in_attribute_list_ => $i++, _in_attribute_list_ => $i++,
_in_quote_ => $i++, _in_quote_ => $i++,
_quote_target_ => $i++, _quote_target_ => $i++,
_line_start_quote_ => $i++, _line_start_quote_ => $i++,
_starting_level_ => $i++, _starting_level_ => $i++,
_know_starting_level_ => $i++, _know_starting_level_ => $i++,
_tabsize_ => $i++, _tabsize_ => $i++,
_indent_columns_ => $i++, _indent_columns_ => $i++,
_look_for_hash_bang_ => $i++, _look_for_hash_bang_ => $i++,
_trim_qw_ => $i++, _trim_qw_ => $i++,
skipping to change at line 214 skipping to change at line 226
Error detected in package '$my_package', version $VERSION Error detected in package '$my_package', version $VERSION
Received unexpected AUTOLOAD call for sub '$AUTOLOAD' Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
Called from package: '$pkg' Called from package: '$pkg'
Called from File '$fname' at line '$lno' Called from File '$fname' at line '$lno'
This error is probably due to a recent programming change This error is probably due to a recent programming change
====================================================================== ======================================================================
EOM EOM
exit 1; exit 1;
} }
sub Die {
my ($msg) = @_;
Perl::Tidy::Die($msg);
croak "unexpected return from Perl::Tidy::Die";
}
sub bad_pattern {
# See if a pattern will compile. We have to use a string eval here,
# but it should be safe because the pattern has been constructed
# by this program.
my ($pattern) = @_;
eval "'##'=~/$pattern/";
return $@;
}
sub make_code_skipping_pattern {
my ( $rOpts, $opt_name, $default ) = @_;
my $param = $rOpts->{$opt_name};
unless ($param) { $param = $default }
$param =~ s/^\s*//; # allow leading spaces to be like format-skipping
if ( $param !~ /^#/ ) {
Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
}
my $pattern = '^\s*' . $param . '\b';
if ( bad_pattern($pattern) ) {
Die(
"ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
);
}
return $pattern;
}
sub check_options { sub check_options {
# Check Tokenizer parameters # Check Tokenizer parameters
my $rOpts = shift; my $rOpts = shift;
%is_sub = (); %is_sub = ();
$is_sub{'sub'} = 1; $is_sub{'sub'} = 1;
# Install any aliases to 'sub' # Install any aliases to 'sub'
if ( $rOpts->{'sub-alias-list'} ) { if ( $rOpts->{'sub-alias-list'} ) {
# Note that any 'sub-alias-list' has been preprocessed to # Note that any 'sub-alias-list' has been preprocessed to
# be a trimmed, space-separated list which includes 'sub' # be a trimmed, space-separated list which includes 'sub'
# for example, it might be 'sub method fun' # for example, it might be 'sub method fun'
my @sub_alias_list = split /\s+/, $rOpts->{'sub-alias-list'}; my @sub_alias_list = split /\s+/, $rOpts->{'sub-alias-list'};
foreach my $word (@sub_alias_list) { foreach my $word (@sub_alias_list) {
$is_sub{$word} = 1; $is_sub{$word} = 1;
} }
} }
$rOpts_code_skipping = $rOpts->{'code-skipping'};
$code_skipping_pattern_begin =
make_code_skipping_pattern( $rOpts, 'code-skipping-begin', '#<<V' );
$code_skipping_pattern_end =
make_code_skipping_pattern( $rOpts, 'code-skipping-end', '#>>V' );
return; return;
} }
sub new { sub new {
my ( $class, @args ) = @_; my ( $class, @args ) = @_;
# Note: 'tabs' and 'indent_columns' are temporary and should be # Note: 'tabs' and 'indent_columns' are temporary and should be
# removed asap # removed asap
my %defaults = ( my %defaults = (
skipping to change at line 276 skipping to change at line 327
# Tokenizer state data is as follows: # Tokenizer state data is as follows:
# _rhere_target_list_ reference to list of here-doc targets # _rhere_target_list_ reference to list of here-doc targets
# _here_doc_target_ the target string for a here document # _here_doc_target_ the target string for a here document
# _here_quote_character_ the type of here-doc quoting (" ' ` or none) # _here_quote_character_ the type of here-doc quoting (" ' ` or none)
# to determine if interpolation is done # to determine if interpolation is done
# _quote_target_ character we seek if chasing a quote # _quote_target_ character we seek if chasing a quote
# _line_start_quote_ line where we started looking for a long quote # _line_start_quote_ line where we started looking for a long quote
# _in_here_doc_ flag indicating if we are in a here-doc # _in_here_doc_ flag indicating if we are in a here-doc
# _in_pod_ flag set if we are in pod documentation # _in_pod_ flag set if we are in pod documentation
# _in_skipped_ flag set if we are in a skipped section
# _in_error_ flag set if we saw severe error (binary in script) # _in_error_ flag set if we saw severe error (binary in script)
# _in_data_ flag set if we are in __DATA__ section # _in_data_ flag set if we are in __DATA__ section
# _in_end_ flag set if we are in __END__ section # _in_end_ flag set if we are in __END__ section
# _in_format_ flag set if we are in a format description # _in_format_ flag set if we are in a format description
# _in_attribute_list_ flag telling if we are looking for attributes # _in_attribute_list_ flag telling if we are looking for attributes
# _in_quote_ flag telling if we are chasing a quote # _in_quote_ flag telling if we are chasing a quote
# _starting_level_ indentation level of first line # _starting_level_ indentation level of first line
# _line_buffer_object_ object with get_line() method to supply source code # _line_buffer_object_ object with get_line() method to supply source code
# _diagnostics_object_ place to write debugging information # _diagnostics_object_ place to write debugging information
# _unexpected_error_count_ error count used to limit output # _unexpected_error_count_ error count used to limit output
skipping to change at line 299 skipping to change at line 351
my $self = []; my $self = [];
$self->[_rhere_target_list_] = []; $self->[_rhere_target_list_] = [];
$self->[_in_here_doc_] = 0; $self->[_in_here_doc_] = 0;
$self->[_here_doc_target_] = ""; $self->[_here_doc_target_] = "";
$self->[_here_quote_character_] = ""; $self->[_here_quote_character_] = "";
$self->[_in_data_] = 0; $self->[_in_data_] = 0;
$self->[_in_end_] = 0; $self->[_in_end_] = 0;
$self->[_in_format_] = 0; $self->[_in_format_] = 0;
$self->[_in_error_] = 0; $self->[_in_error_] = 0;
$self->[_in_pod_] = 0; $self->[_in_pod_] = 0;
$self->[_in_skipped_] = 0;
$self->[_in_attribute_list_] = 0; $self->[_in_attribute_list_] = 0;
$self->[_in_quote_] = 0; $self->[_in_quote_] = 0;
$self->[_quote_target_] = ""; $self->[_quote_target_] = "";
$self->[_line_start_quote_] = -1; $self->[_line_start_quote_] = -1;
$self->[_starting_level_] = $args{starting_level}; $self->[_starting_level_] = $args{starting_level};
$self->[_know_starting_level_] = defined( $args{starting_level} ); $self->[_know_starting_level_] = defined( $args{starting_level} );
$self->[_tabsize_] = $args{tabsize}; $self->[_tabsize_] = $args{tabsize};
$self->[_indent_columns_] = $args{indent_columns}; $self->[_indent_columns_] = $args{indent_columns};
$self->[_look_for_hash_bang_] = $args{look_for_hash_bang}; $self->[_look_for_hash_bang_] = $args{look_for_hash_bang};
$self->[_trim_qw_] = $args{trim_qw}; $self->[_trim_qw_] = $args{trim_qw};
skipping to change at line 516 skipping to change at line 569
&& !$tokenizer_self->[_saw_hash_bang_] ) && !$tokenizer_self->[_saw_hash_bang_] )
{ {
warning( warning(
"hit EOF without seeing hash-bang line; maybe don't need -x?\n"); "hit EOF without seeing hash-bang line; maybe don't need -x?\n");
} }
if ( $tokenizer_self->[_in_format_] ) { if ( $tokenizer_self->[_in_format_] ) {
warning("hit EOF while in format description\n"); warning("hit EOF while in format description\n");
} }
if ( $tokenizer_self->[_in_skipped_] ) {
write_logfile_entry(
"hit EOF while in lines skipped with --code-skipping\n");
}
if ( $tokenizer_self->[_in_pod_] ) { if ( $tokenizer_self->[_in_pod_] ) {
# Just write log entry if this is after __END__ or __DATA__ # Just write log entry if this is after __END__ or __DATA__
# because this happens to often, and it is not likely to be # because this happens to often, and it is not likely to be
# a parsing error. # a parsing error.
if ( $tokenizer_self->[_saw_data_] || $tokenizer_self->[_saw_end_] ) { if ( $tokenizer_self->[_saw_data_] || $tokenizer_self->[_saw_end_] ) {
write_logfile_entry( write_logfile_entry(
"hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble wit h some pod utilities\n" "hit eof while in pod documentation (no =cut seen)\n\tthis can cause trouble wit h some pod utilities\n"
); );
} }
skipping to change at line 656 skipping to change at line 714
# USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth, # USES GLOBAL VARIABLES: $tokenizer_self, $brace_depth,
# $square_bracket_depth, $paren_depth # $square_bracket_depth, $paren_depth
my $input_line = $tokenizer_self->[_line_buffer_object_]->get_line(); my $input_line = $tokenizer_self->[_line_buffer_object_]->get_line();
$tokenizer_self->[_line_of_text_] = $input_line; $tokenizer_self->[_line_of_text_] = $input_line;
return unless ($input_line); return unless ($input_line);
my $input_line_number = ++$tokenizer_self->[_last_line_number_]; my $input_line_number = ++$tokenizer_self->[_last_line_number_];
my $write_logfile_entry = sub {
my ($msg) = @_;
write_logfile_entry("Line $input_line_number: $msg");
};
# Find and remove what characters terminate this line, including any # Find and remove what characters terminate this line, including any
# control r # control r
my $input_line_separator = ""; my $input_line_separator = "";
if ( chomp($input_line) ) { $input_line_separator = $/ } if ( chomp($input_line) ) { $input_line_separator = $/ }
# The first test here very significantly speeds things up, but be sure to # The first test here very significantly speeds things up, but be sure to
# keep the regex and hash %other_line_endings the same. # keep the regex and hash %other_line_endings the same.
if ( $other_line_endings{ substr( $input_line, -1 ) } ) { if ( $other_line_endings{ substr( $input_line, -1 ) } ) {
if ( $input_line =~ s/((\r|\035|\032)+)$// ) { if ( $input_line =~ s/((\r|\035|\032)+)$// ) {
$input_line_separator = $2 . $input_line_separator; $input_line_separator = $2 . $input_line_separator;
skipping to change at line 742 skipping to change at line 805
chomp $candidate_target; chomp $candidate_target;
# Handle <<~ targets, which are indicated here by a leading space on # Handle <<~ targets, which are indicated here by a leading space on
# the here quote character # the here quote character
if ( $here_quote_character =~ /^\s/ ) { if ( $here_quote_character =~ /^\s/ ) {
$candidate_target =~ s/^\s*//; $candidate_target =~ s/^\s*//;
} }
if ( $candidate_target eq $here_doc_target ) { if ( $candidate_target eq $here_doc_target ) {
$tokenizer_self->[_nearly_matched_here_target_at_] = undef; $tokenizer_self->[_nearly_matched_here_target_at_] = undef;
$line_of_tokens->{_line_type} = 'HERE_END'; $line_of_tokens->{_line_type} = 'HERE_END';
write_logfile_entry("Exiting HERE document $here_doc_target\n"); $write_logfile_entry->("Exiting HERE document $here_doc_target\n");
my $rhere_target_list = $tokenizer_self->[_rhere_target_list_]; my $rhere_target_list = $tokenizer_self->[_rhere_target_list_];
if ( @{$rhere_target_list} ) { # there can be multiple here targets if ( @{$rhere_target_list} ) { # there can be multiple here targets
( $here_doc_target, $here_quote_character ) = ( $here_doc_target, $here_quote_character ) =
@{ shift @{$rhere_target_list} }; @{ shift @{$rhere_target_list} };
$tokenizer_self->[_here_doc_target_] = $here_doc_target; $tokenizer_self->[_here_doc_target_] = $here_doc_target;
$tokenizer_self->[_here_quote_character_] = $tokenizer_self->[_here_quote_character_] =
$here_quote_character; $here_quote_character;
write_logfile_entry( $write_logfile_entry->(
"Entering HERE document $here_doc_target\n"); "Entering HERE document $here_doc_target\n");
$tokenizer_self->[_nearly_matched_here_target_at_] = undef; $tokenizer_self->[_nearly_matched_here_target_at_] = undef;
$tokenizer_self->[_started_looking_for_here_target_at_] = $tokenizer_self->[_started_looking_for_here_target_at_] =
$input_line_number; $input_line_number;
} }
else { else {
$tokenizer_self->[_in_here_doc_] = 0; $tokenizer_self->[_in_here_doc_] = 0;
$tokenizer_self->[_here_doc_target_] = ""; $tokenizer_self->[_here_doc_target_] = "";
$tokenizer_self->[_here_quote_character_] = ""; $tokenizer_self->[_here_quote_character_] = "";
} }
skipping to change at line 777 skipping to change at line 840
$candidate_target =~ s/\s*$//; $candidate_target =~ s/\s*$//;
$candidate_target =~ s/^\s*//; $candidate_target =~ s/^\s*//;
if ( $candidate_target eq $here_doc_target ) { if ( $candidate_target eq $here_doc_target ) {
$tokenizer_self->[_nearly_matched_here_target_at_] = $tokenizer_self->[_nearly_matched_here_target_at_] =
$input_line_number; $input_line_number;
} }
} }
return $line_of_tokens; return $line_of_tokens;
} }
# must print line unchanged if we are in a format section # Print line unchanged if we are in a format section
elsif ( $tokenizer_self->[_in_format_] ) { elsif ( $tokenizer_self->[_in_format_] ) {
if ( $input_line =~ /^\.[\s#]*$/ ) { if ( $input_line =~ /^\.[\s#]*$/ ) {
write_logfile_entry("Exiting format section\n");
$tokenizer_self->[_in_format_] = 0; # Decrement format depth count at a '.' after a 'format'
$line_of_tokens->{_line_type} = 'FORMAT_END'; $tokenizer_self->[_in_format_]--;
# This is the end when count reaches 0
if ( !$tokenizer_self->[_in_format_] ) {
$write_logfile_entry->("Exiting format section\n");
$line_of_tokens->{_line_type} = 'FORMAT_END';
}
} }
else { else {
$line_of_tokens->{_line_type} = 'FORMAT'; $line_of_tokens->{_line_type} = 'FORMAT';
if ( $input_line =~ /^\s*format\s+\w+/ ) {
# Increment format depth count at a 'format' within a 'format'
# This is a simple way to handle nested formats (issue c019).
$tokenizer_self->[_in_format_]++;
}
} }
return $line_of_tokens; return $line_of_tokens;
} }
# must print line unchanged if we are in pod documentation # must print line unchanged if we are in pod documentation
elsif ( $tokenizer_self->[_in_pod_] ) { elsif ( $tokenizer_self->[_in_pod_] ) {
$line_of_tokens->{_line_type} = 'POD'; $line_of_tokens->{_line_type} = 'POD';
if ( $input_line =~ /^=cut/ ) { if ( $input_line =~ /^=cut/ ) {
$line_of_tokens->{_line_type} = 'POD_END'; $line_of_tokens->{_line_type} = 'POD_END';
write_logfile_entry("Exiting POD section\n"); $write_logfile_entry->("Exiting POD section\n");
$tokenizer_self->[_in_pod_] = 0; $tokenizer_self->[_in_pod_] = 0;
} }
if ( $input_line =~ /^\#\!.*perl\b/ && !$tokenizer_self->[_in_end_] ) { if ( $input_line =~ /^\#\!.*perl\b/ && !$tokenizer_self->[_in_end_] ) {
warning( warning(
"Hash-bang in pod can cause older versions of perl to fail! \n" "Hash-bang in pod can cause older versions of perl to fail! \n"
); );
} }
return $line_of_tokens; return $line_of_tokens;
} }
# print line unchanged if in skipped section
elsif ( $tokenizer_self->[_in_skipped_] ) {
# NOTE: marked as the existing type 'FORMAT' to keep html working
$line_of_tokens->{_line_type} = 'FORMAT';
if ( $input_line =~ /$code_skipping_pattern_end/ ) {
$write_logfile_entry->("Exiting code-skipping section\n");
$tokenizer_self->[_in_skipped_] = 0;
}
return $line_of_tokens;
}
# must print line unchanged if we have seen a severe error (i.e., we # must print line unchanged if we have seen a severe error (i.e., we
# are seeing illegal tokens and cannot continue. Syntax errors do # are seeing illegal tokens and cannot continue. Syntax errors do
# not pass this route). Calling routine can decide what to do, but # not pass this route). Calling routine can decide what to do, but
# the default can be to just pass all lines as if they were after __END__ # the default can be to just pass all lines as if they were after __END__
elsif ( $tokenizer_self->[_in_error_] ) { elsif ( $tokenizer_self->[_in_error_] ) {
$line_of_tokens->{_line_type} = 'ERROR'; $line_of_tokens->{_line_type} = 'ERROR';
return $line_of_tokens; return $line_of_tokens;
} }
# print line unchanged if we are __DATA__ section # print line unchanged if we are __DATA__ section
elsif ( $tokenizer_self->[_in_data_] ) { elsif ( $tokenizer_self->[_in_data_] ) {
# ...but look for POD # ...but look for POD
# Note that the _in_data and _in_end flags remain set # Note that the _in_data and _in_end flags remain set
# so that we return to that state after seeing the # so that we return to that state after seeing the
# end of a pod section # end of a pod section
if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) { if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) {
$line_of_tokens->{_line_type} = 'POD_START'; $line_of_tokens->{_line_type} = 'POD_START';
write_logfile_entry("Entering POD section\n"); $write_logfile_entry->("Entering POD section\n");
$tokenizer_self->[_in_pod_] = 1; $tokenizer_self->[_in_pod_] = 1;
return $line_of_tokens; return $line_of_tokens;
} }
else { else {
$line_of_tokens->{_line_type} = 'DATA'; $line_of_tokens->{_line_type} = 'DATA';
return $line_of_tokens; return $line_of_tokens;
} }
} }
# print line unchanged if we are in __END__ section # print line unchanged if we are in __END__ section
elsif ( $tokenizer_self->[_in_end_] ) { elsif ( $tokenizer_self->[_in_end_] ) {
# ...but look for POD # ...but look for POD
# Note that the _in_data and _in_end flags remain set # Note that the _in_data and _in_end flags remain set
# so that we return to that state after seeing the # so that we return to that state after seeing the
# end of a pod section # end of a pod section
if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) { if ( $input_line =~ /^=(\w+)\b/ && $1 ne 'cut' ) {
$line_of_tokens->{_line_type} = 'POD_START'; $line_of_tokens->{_line_type} = 'POD_START';
write_logfile_entry("Entering POD section\n"); $write_logfile_entry->("Entering POD section\n");
$tokenizer_self->[_in_pod_] = 1; $tokenizer_self->[_in_pod_] = 1;
return $line_of_tokens; return $line_of_tokens;
} }
else { else {
$line_of_tokens->{_line_type} = 'END'; $line_of_tokens->{_line_type} = 'END';
return $line_of_tokens; return $line_of_tokens;
} }
} }
# check for a hash-bang line if we haven't seen one # check for a hash-bang line if we haven't seen one
skipping to change at line 936 skipping to change at line 1023
# a first line of the form ': #' will be marked as SYSTEM # a first line of the form ': #' will be marked as SYSTEM
# since lines of this form may be used by tcsh # since lines of this form may be used by tcsh
if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) { if ( $input_line_number == 1 && $input_line =~ /^\s*\:\s*\#/ ) {
$line_of_tokens->{_line_type} = 'SYSTEM'; $line_of_tokens->{_line_type} = 'SYSTEM';
return $line_of_tokens; return $line_of_tokens;
} }
# now we know that it is ok to tokenize the line... # now we know that it is ok to tokenize the line...
# the line tokenizer will modify any of these private variables: # the line tokenizer will modify any of these private variables:
# _rhere_target_list # _rhere_target_list_
# _in_data # _in_data_
# _in_end # _in_end_
# _in_format # _in_format_
# _in_error # _in_error_
# _in_pod # _in_skipped_
# _in_quote # _in_pod_
# _in_quote_
my $ending_in_quote_last = $tokenizer_self->[_in_quote_]; my $ending_in_quote_last = $tokenizer_self->[_in_quote_];
tokenize_this_line($line_of_tokens); tokenize_this_line($line_of_tokens);
# Now finish defining the return structure and return it # Now finish defining the return structure and return it
$line_of_tokens->{_ending_in_quote} = $tokenizer_self->[_in_quote_]; $line_of_tokens->{_ending_in_quote} = $tokenizer_self->[_in_quote_];
# handle severe error (binary data in script) # handle severe error (binary data in script)
if ( $tokenizer_self->[_in_error_] ) { if ( $tokenizer_self->[_in_error_] ) {
$tokenizer_self->[_in_quote_] = 0; # to avoid any more messages $tokenizer_self->[_in_quote_] = 0; # to avoid any more messages
warning("Giving up after error\n"); warning("Giving up after error\n");
skipping to change at line 977 skipping to change at line 1065
{ {
complain("=cut while not in pod ignored\n"); complain("=cut while not in pod ignored\n");
$tokenizer_self->[_in_pod_] = 0; $tokenizer_self->[_in_pod_] = 0;
$line_of_tokens->{_line_type} = 'POD_END'; $line_of_tokens->{_line_type} = 'POD_END';
} }
else { else {
$line_of_tokens->{_line_type} = 'POD_START'; $line_of_tokens->{_line_type} = 'POD_START';
warning( warning(
"=cut starts a pod section .. this can fool pod utilities.\n" "=cut starts a pod section .. this can fool pod utilities.\n"
); );
write_logfile_entry("Entering POD section\n"); $write_logfile_entry->("Entering POD section\n");
} }
} }
else { else {
$line_of_tokens->{_line_type} = 'POD_START'; $line_of_tokens->{_line_type} = 'POD_START';
write_logfile_entry("Entering POD section\n"); $write_logfile_entry->("Entering POD section\n");
} }
return $line_of_tokens; return $line_of_tokens;
} }
# handle start of skipped section
if ( $tokenizer_self->[_in_skipped_] ) {
# NOTE: marked as the existing type 'FORMAT' to keep html working
$line_of_tokens->{_line_type} = 'FORMAT';
$write_logfile_entry->("Entering code-skipping section\n");
return $line_of_tokens;
}
# Update indentation levels for log messages. # Update indentation levels for log messages.
# Skip blank lines and also block comments, unless a logfile is requested. # Skip blank lines and also block comments, unless a logfile is requested.
# Note that _line_of_text_ is the input line but trimmed from left to right. # Note that _line_of_text_ is the input line but trimmed from left to right.
my $lot = $tokenizer_self->[_line_of_text_]; my $lot = $tokenizer_self->[_line_of_text_];
if ( $lot && ( $self->[_rOpts_logfile_] || substr( $lot, 0, 1 ) ne '#' ) ) { if ( $lot && ( $self->[_rOpts_logfile_] || substr( $lot, 0, 1 ) ne '#' ) ) {
my $rlevels = $line_of_tokens->{_rlevels}; my $rlevels = $line_of_tokens->{_rlevels};
$line_of_tokens->{_guessed_indentation_level} = $line_of_tokens->{_guessed_indentation_level} =
guess_old_indentation_level($input_line); guess_old_indentation_level($input_line);
} }
# see if this line contains here doc targets # see if this line contains here doc targets
my $rhere_target_list = $tokenizer_self->[_rhere_target_list_]; my $rhere_target_list = $tokenizer_self->[_rhere_target_list_];
if ( @{$rhere_target_list} ) { if ( @{$rhere_target_list} ) {
my ( $here_doc_target, $here_quote_character ) = my ( $here_doc_target, $here_quote_character ) =
@{ shift @{$rhere_target_list} }; @{ shift @{$rhere_target_list} };
$tokenizer_self->[_in_here_doc_] = 1; $tokenizer_self->[_in_here_doc_] = 1;
$tokenizer_self->[_here_doc_target_] = $here_doc_target; $tokenizer_self->[_here_doc_target_] = $here_doc_target;
$tokenizer_self->[_here_quote_character_] = $here_quote_character; $tokenizer_self->[_here_quote_character_] = $here_quote_character;
write_logfile_entry("Entering HERE document $here_doc_target\n"); $write_logfile_entry->("Entering HERE document $here_doc_target\n");
$tokenizer_self->[_started_looking_for_here_target_at_] = $tokenizer_self->[_started_looking_for_here_target_at_] =
$input_line_number; $input_line_number;
} }
# NOTE: __END__ and __DATA__ statements are written unformatted # NOTE: __END__ and __DATA__ statements are written unformatted
# because they can theoretically contain additional characters # because they can theoretically contain additional characters
# which are not tokenized (and cannot be read with <DATA> either!). # which are not tokenized (and cannot be read with <DATA> either!).
if ( $tokenizer_self->[_in_data_] ) { if ( $tokenizer_self->[_in_data_] ) {
$line_of_tokens->{_line_type} = 'DATA_START'; $line_of_tokens->{_line_type} = 'DATA_START';
write_logfile_entry("Starting __DATA__ section\n"); $write_logfile_entry->("Starting __DATA__ section\n");
$tokenizer_self->[_saw_data_] = 1; $tokenizer_self->[_saw_data_] = 1;
# keep parsing after __DATA__ if use SelfLoader was seen # keep parsing after __DATA__ if use SelfLoader was seen
if ( $tokenizer_self->[_saw_selfloader_] ) { if ( $tokenizer_self->[_saw_selfloader_] ) {
$tokenizer_self->[_in_data_] = 0; $tokenizer_self->[_in_data_] = 0;
write_logfile_entry( $write_logfile_entry->(
"SelfLoader seen, continuing; -nlsl deactivates\n"); "SelfLoader seen, continuing; -nlsl deactivates\n");
} }
return $line_of_tokens; return $line_of_tokens;
} }
elsif ( $tokenizer_self->[_in_end_] ) { elsif ( $tokenizer_self->[_in_end_] ) {
$line_of_tokens->{_line_type} = 'END_START'; $line_of_tokens->{_line_type} = 'END_START';
write_logfile_entry("Starting __END__ section\n"); $write_logfile_entry->("Starting __END__ section\n");
$tokenizer_self->[_saw_end_] = 1; $tokenizer_self->[_saw_end_] = 1;
# keep parsing after __END__ if use AutoLoader was seen # keep parsing after __END__ if use AutoLoader was seen
if ( $tokenizer_self->[_saw_autoloader_] ) { if ( $tokenizer_self->[_saw_autoloader_] ) {
$tokenizer_self->[_in_end_] = 0; $tokenizer_self->[_in_end_] = 0;
write_logfile_entry( $write_logfile_entry->(
"AutoLoader seen, continuing; -nlal deactivates\n"); "AutoLoader seen, continuing; -nlal deactivates\n");
} }
return $line_of_tokens; return $line_of_tokens;
} }
# now, finally, we know that this line is type 'CODE' # now, finally, we know that this line is type 'CODE'
$line_of_tokens->{_line_type} = 'CODE'; $line_of_tokens->{_line_type} = 'CODE';
# remember if we have seen any real code # remember if we have seen any real code
if ( !$tokenizer_self->[_started_tokenizing_] if ( !$tokenizer_self->[_started_tokenizing_]
skipping to change at line 1064 skipping to change at line 1161
} }
if ( $tokenizer_self->[_debugger_object_] ) { if ( $tokenizer_self->[_debugger_object_] ) {
$tokenizer_self->[_debugger_object_] $tokenizer_self->[_debugger_object_]
->write_debug_entry($line_of_tokens); ->write_debug_entry($line_of_tokens);
} }
# Note: if keyword 'format' occurs in this line code, it is still CODE # Note: if keyword 'format' occurs in this line code, it is still CODE
# (keyword 'format' need not start a line) # (keyword 'format' need not start a line)
if ( $tokenizer_self->[_in_format_] ) { if ( $tokenizer_self->[_in_format_] ) {
write_logfile_entry("Entering format section\n"); $write_logfile_entry->("Entering format section\n");
} }
if ( $tokenizer_self->[_in_quote_] if ( $tokenizer_self->[_in_quote_]
and ( $tokenizer_self->[_line_start_quote_] < 0 ) ) and ( $tokenizer_self->[_line_start_quote_] < 0 ) )
{ {
#if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) { #if ( ( my $quote_target = get_quote_target() ) !~ /^\s*$/ ) {
if ( ( my $quote_target = $tokenizer_self->[_quote_target_] ) !~ if ( ( my $quote_target = $tokenizer_self->[_quote_target_] ) !~
/^\s*$/ ) /^\s*$/ )
{ {
$tokenizer_self->[_line_start_quote_] = $input_line_number; $tokenizer_self->[_line_start_quote_] = $input_line_number;
write_logfile_entry( $write_logfile_entry->(
"Start multi-line quote or pattern ending in $quote_target\n"); "Start multi-line quote or pattern ending in $quote_target\n");
} }
} }
elsif ( ( $tokenizer_self->[_line_start_quote_] >= 0 ) elsif ( ( $tokenizer_self->[_line_start_quote_] >= 0 )
&& !$tokenizer_self->[_in_quote_] ) && !$tokenizer_self->[_in_quote_] )
{ {
$tokenizer_self->[_line_start_quote_] = -1; $tokenizer_self->[_line_start_quote_] = -1;
write_logfile_entry("End of multi-line quote or pattern\n"); $write_logfile_entry->("End of multi-line quote or pattern\n");
} }
# we are returning a line of CODE # we are returning a line of CODE
return $line_of_tokens; return $line_of_tokens;
} }
sub find_starting_indentation_level { sub find_starting_indentation_level {
# We need to find the indentation level of the first line of the # We need to find the indentation level of the first line of the
# script being formatted. Often it will be zero for an entire file, # script being formatted. Often it will be zero for an entire file,
skipping to change at line 2001 skipping to change at line 2098
$container_type = $statement_type; $container_type = $statement_type;
} }
else { else {
$container_type = $last_nonblank_token; $container_type = $last_nonblank_token;
# We can check for a syntax error here of unexpected '(', # We can check for a syntax error here of unexpected '(',
# but this is going to get messy... # but this is going to get messy...
if ( if (
$expecting == OPERATOR $expecting == OPERATOR
# be sure this is not a method call of the form # Be sure this is not a method call of the form
# &method(...), $method->(..), &{method}(...), # &method(...), $method->(..), &{method}(...),
# $ref[2](list) is ok & short for $ref[2]->(list) # $ref[2](list) is ok & short for $ref[2]->(list)
# NOTE: at present, braces in something like &{ xxx } # NOTE: at present, braces in something like &{ xxx }
# are not marked as a block, we might have a method call # are not marked as a block, we might have a method call.
&& $last_nonblank_token !~ /^([\]\}\&]|\-\>)/ # Added ')' to fix case c017, something like ()()()
&& $last_nonblank_token !~ /^([\]\}\)\&]|\-\>)/
) )
{ {
# ref: camel 3 p 703. # ref: camel 3 p 703.
if ( $last_last_nonblank_token eq 'do' ) { if ( $last_last_nonblank_token eq 'do' ) {
complain( complain(
"do SUBROUTINE is deprecated; consider & or -> notation\n" "do SUBROUTINE is deprecated; consider & or -> notation\n"
); );
} }
else { else {
# if this is an empty list, (), then it is not an # if this is an empty list, (), then it is not an
# error; for example, we might have a constant pi and # error; for example, we might have a constant pi and
# invoke it with pi() or just pi; # invoke it with pi() or just pi;
my ( $next_nonblank_token, $i_next ) = my ( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $i, $rtokens, find_next_nonblank_token( $i, $rtokens,
$max_token_index ); $max_token_index );
if ( $next_nonblank_token ne ')' ) {
# Patch for c029: give up error check if
# a side comment follows
if ( $next_nonblank_token ne ')'
&& $next_nonblank_token ne '#' )
{
my $hint; my $hint;
# FIXME: this gives an error parsing something like
# $subsubs[0]()(0);
# which is a valid syntax (see subsub.t). We may
# need to revise this coding.
error_if_expecting_OPERATOR('('); error_if_expecting_OPERATOR('(');
if ( $last_nonblank_type eq 'C' ) { if ( $last_nonblank_type eq 'C' ) {
$hint = $hint =
"$last_nonblank_token has a void prototype\n"; "$last_nonblank_token has a void prototype\n";
} }
elsif ( $last_nonblank_type eq 'i' ) { elsif ( $last_nonblank_type eq 'i' ) {
if ( $i_tok > 0 if ( $i_tok > 0
&& $last_nonblank_token =~ /^\$/ ) && $last_nonblank_token =~ /^\$/ )
{ {
skipping to change at line 2135 skipping to change at line 2234
} }
} }
if ( $paren_depth > 0 ) { $paren_depth-- } if ( $paren_depth > 0 ) { $paren_depth-- }
}, },
',' => sub { ',' => sub {
if ( $last_nonblank_type eq ',' ) { if ( $last_nonblank_type eq ',' ) {
complain("Repeated ','s \n"); complain("Repeated ','s \n");
} }
# Note that we have to check both token and type here because a
# comma following a qw list can have last token='(' but type = 'q'
elsif ( $last_nonblank_token eq '(' && $last_nonblank_type eq '{' )
{
warning("Unexpected leading ',' after a '('\n");
}
# patch for operator_expected: note if we are in the list (use.t) # patch for operator_expected: note if we are in the list (use.t)
if ( $statement_type eq 'use' ) { $statement_type = '_use' } if ( $statement_type eq 'use' ) { $statement_type = '_use' }
## FIXME: need to move this elsewhere, perhaps check after a '('
## elsif ($last_nonblank_token eq '(') {
## warning("Leading ','s illegal in some versions of perl\n")
;
## }
}, },
';' => sub { ';' => sub {
$context = UNKNOWN_CONTEXT; $context = UNKNOWN_CONTEXT;
$statement_type = ''; $statement_type = '';
$want_paren = ""; $want_paren = "";
# /^(for|foreach)$/ # /^(for|foreach)$/
if ( $is_for_foreach{ $paren_type[$paren_depth] } ) if ( $is_for_foreach{ $paren_type[$paren_depth] } )
{ # mark ; in for loop { # mark ; in for loop
skipping to change at line 2193 skipping to change at line 2296
if ( $expecting == OPERATOR ); if ( $expecting == OPERATOR );
$in_quote = 1; $in_quote = 1;
$type = 'Q'; $type = 'Q';
$allowed_quote_modifiers = ""; $allowed_quote_modifiers = "";
}, },
'/' => sub { '/' => sub {
my $is_pattern; my $is_pattern;
# a pattern cannot follow certain keywords which take optional # a pattern cannot follow certain keywords which take optional
# arguments, like 'shift' and 'pop'. See also '?'. # arguments, like 'shift' and 'pop'. See also '?'.
if ( $last_nonblank_type eq 'k' if (
$last_nonblank_type eq 'k'
&& $is_keyword_rejecting_slash_as_pattern_delimiter{ && $is_keyword_rejecting_slash_as_pattern_delimiter{
$last_nonblank_token} ) $last_nonblank_token}
)
{ {
$is_pattern = 0; $is_pattern = 0;
} }
elsif ( $expecting == UNKNOWN ) { # indeterminate, must guess.. elsif ( $expecting == UNKNOWN ) { # indeterminate, must guess..
my $msg; my $msg;
( $is_pattern, $msg ) = ( $is_pattern, $msg ) =
guess_if_pattern_or_division( $i, $rtokens, $rtoken_map, guess_if_pattern_or_division( $i, $rtokens, $rtoken_map,
$max_token_index ); $max_token_index );
if ($msg) { if ($msg) {
skipping to change at line 2225 skipping to change at line 2330
$allowed_quote_modifiers = '[msixpodualngc]'; $allowed_quote_modifiers = '[msixpodualngc]';
} }
else { # not a pattern; check for a /= token else { # not a pattern; check for a /= token
if ( $rtokens->[ $i + 1 ] eq '=' ) { # form token /= if ( $rtokens->[ $i + 1 ] eq '=' ) { # form token /=
$i++; $i++;
$tok = '/='; $tok = '/=';
$type = $tok; $type = $tok;
} }
#DEBUG - collecting info on what tokens follow a divide #DEBUG - collecting info on what tokens follow a divide
# for development of guessing algorithm # for development of guessing algorithm
#if ( numerator_expected( $i, $rtokens, $max_token_index ) < 0 ) { #if ( is_possible_numerator( $i, $rtokens, $max_token_index ) < 0 ) {
# #write_diagnostics( "DIVIDE? $input_line\n" ); # #write_diagnostics( "DIVIDE? $input_line\n" );
#} #}
} }
}, },
'{' => sub { '{' => sub {
# if we just saw a ')', we will label this block with # if we just saw a ')', we will label this block with
# its type. We need to do this to allow sub # its type. We need to do this to allow sub
# code_block_type to determine if this brace starts a # code_block_type to determine if this brace starts a
# code block or anonymous hash. (The type of a paren # code block or anonymous hash. (The type of a paren
# pair is the preceding token, such as 'if', 'else', # pair is the preceding token, such as 'if', 'else',
# etc). # etc).
$container_type = ""; $container_type = "";
# ATTRS: for a '{' following an attribute list, reset # ATTRS: for a '{' following an attribute list, reset
# things to look like we just saw the sub name # things to look like we just saw the sub name
# FIXME: need to end with \b here?? if ( $statement_type =~ /^sub\b/ ) {
if ( $statement_type =~ /^sub/ ) {
$last_nonblank_token = $statement_type; $last_nonblank_token = $statement_type;
$last_nonblank_type = 'i'; $last_nonblank_type = 'i';
$statement_type = ""; $statement_type = "";
} }
# patch for SWITCH/CASE: hide these keywords from an immediately # patch for SWITCH/CASE: hide these keywords from an immediately
# following opening brace # following opening brace
elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' ) elsif ( ( $statement_type eq 'case' || $statement_type eq 'when' )
&& $statement_type eq $last_nonblank_token ) && $statement_type eq $last_nonblank_token )
{ {
skipping to change at line 2435 skipping to change at line 2539
else { else {
} }
}, },
'?' => sub { # ?: conditional or starting pattern? '?' => sub { # ?: conditional or starting pattern?
my $is_pattern; my $is_pattern;
# Patch for rt #126965 # Patch for rt #126965
# a pattern cannot follow certain keywords which take optional # a pattern cannot follow certain keywords which take optional
# arguments, like 'shift' and 'pop'. See also '/'. # arguments, like 'shift' and 'pop'. See also '/'.
if ( $last_nonblank_type eq 'k' if (
$last_nonblank_type eq 'k'
&& $is_keyword_rejecting_question_as_pattern_delimiter{ && $is_keyword_rejecting_question_as_pattern_delimiter{
$last_nonblank_token} ) $last_nonblank_token}
)
{ {
$is_pattern = 0; $is_pattern = 0;
} }
# patch for RT#131288, user constant function without prototype # patch for RT#131288, user constant function without prototype
# last type is 'U' followed by ?. # last type is 'U' followed by ?.
elsif ( $last_nonblank_type =~ /^[FUY]$/ ) { elsif ( $last_nonblank_type =~ /^[FUY]$/ ) {
$is_pattern = 0; $is_pattern = 0;
} }
elsif ( $expecting == UNKNOWN ) { elsif ( $expecting == UNKNOWN ) {
skipping to change at line 2481 skipping to change at line 2587
$allowed_quote_modifiers = '[msixpodualngc]'; $allowed_quote_modifiers = '[msixpodualngc]';
} }
else { else {
( $type_sequence, $indent_flag ) = ( $type_sequence, $indent_flag ) =
increase_nesting_depth( QUESTION_COLON, increase_nesting_depth( QUESTION_COLON,
$rtoken_map->[$i_tok] ); $rtoken_map->[$i_tok] );
} }
}, },
'*' => sub { # typeglob, or multiply? '*' => sub { # typeglob, or multiply?
if ( $expecting == UNKNOWN && $last_nonblank_type eq 'Z' ) {
if ( $next_type ne 'b'
&& $next_type ne '('
&& $next_type ne '#' ) # Fix c036
{
$expecting = TERM;
}
}
if ( $expecting == TERM ) { if ( $expecting == TERM ) {
scan_identifier_fast(); scan_identifier_fast();
} }
else { else {
if ( $rtokens->[ $i + 1 ] eq '=' ) { if ( $rtokens->[ $i + 1 ] eq '=' ) {
$tok = '*='; $tok = '*=';
$type = $tok; $type = $tok;
$i++; $i++;
} }
skipping to change at line 2530 skipping to change at line 2644
$type = 'J'; $type = 'J';
} }
# ATTRS: check for a ':' which introduces an attribute list # ATTRS: check for a ':' which introduces an attribute list
# either after a 'sub' keyword or within a paren list # either after a 'sub' keyword or within a paren list
elsif ( $statement_type =~ /^sub\b/ ) { elsif ( $statement_type =~ /^sub\b/ ) {
$type = 'A'; $type = 'A';
$in_attribute_list = 1; $in_attribute_list = 1;
} }
# Withing a signature, unless we are in a ternary. For example, # Within a signature, unless we are in a ternary. For example,
# from 't/filter_example.t': # from 't/filter_example.t':
# method foo4 ( $class: $bar ) { $class->bar($bar) } # method foo4 ( $class: $bar ) { $class->bar($bar) }
elsif ( $paren_type[$paren_depth] =~ /^sub\b/ elsif ( $paren_type[$paren_depth] =~ /^sub\b/
&& !is_balanced_closing_container(QUESTION_COLON) ) && !is_balanced_closing_container(QUESTION_COLON) )
{ {
$type = 'A'; $type = 'A';
$in_attribute_list = 1; $in_attribute_list = 1;
} }
# check for scalar attribute, such as # check for scalar attribute, such as
skipping to change at line 2597 skipping to change at line 2711
} }
}, },
'@' => sub { '@' => sub {
error_if_expecting_OPERATOR("Array") error_if_expecting_OPERATOR("Array")
if ( $expecting == OPERATOR ); if ( $expecting == OPERATOR );
scan_identifier_fast(); scan_identifier_fast();
}, },
'%' => sub { # hash or modulo? '%' => sub { # hash or modulo?
# first guess is hash if no following blank # first guess is hash if no following blank or paren
if ( $expecting == UNKNOWN ) { if ( $expecting == UNKNOWN ) {
if ( $next_type ne 'b' ) { $expecting = TERM } if ( $next_type ne 'b' && $next_type ne '(' ) {
$expecting = TERM;
}
} }
if ( $expecting == TERM ) { if ( $expecting == TERM ) {
scan_identifier_fast(); scan_identifier_fast();
} }
}, },
'[' => sub { '[' => sub {
$square_bracket_type[ ++$square_bracket_depth ] = $square_bracket_type[ ++$square_bracket_depth ] =
$last_nonblank_token; $last_nonblank_token;
( $type_sequence, $indent_flag ) = ( $type_sequence, $indent_flag ) =
increase_nesting_depth( SQUARE_BRACKET, $rtoken_map->[$i_tok] ); increase_nesting_depth( SQUARE_BRACKET, $rtoken_map->[$i_tok] );
skipping to change at line 2804 skipping to change at line 2920
} }
} }
} }
else { else {
} }
}, },
'->' => sub { '->' => sub {
# if -> points to a bare word, we must scan for an identifier, # if -> points to a bare word, we must scan for an identifier,
# otherwise something like ->y would look like the y operator # otherwise something like ->y would look like the y operator
# NOTE: this will currently allow things like
# '->@array' '->*VAR' '->%hash'
# to get parsed as identifiers, even though these are not currently
# allowed syntax. To catch syntax errors like this we could first
# check that the next character and skip this call if it is one of
# ' @ % * '. A disadvantage with doing this is that this would
# have to be fixed if the perltidy syntax is ever extended to make
# any of these valid. So for now this check is not done.
scan_identifier_fast(); scan_identifier_fast();
}, },
# type = 'pp' for pre-increment, '++' for post-increment # type = 'pp' for pre-increment, '++' for post-increment
'++' => sub { '++' => sub {
if ( $expecting == TERM ) { $type = 'pp' } if ( $expecting == TERM ) { $type = 'pp' }
elsif ( $expecting == UNKNOWN ) { elsif ( $expecting == UNKNOWN ) {
my ( $next_nonblank_token, $i_next ) = my ( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $i, $rtokens, $max_token_index ); find_next_nonblank_token( $i, $rtokens, $max_token_index );
# Fix for c042: look past a side comment
if ( $next_nonblank_token eq '#' ) {
( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $max_token_index,
$rtokens, $max_token_index );
}
if ( $next_nonblank_token eq '$' ) { $type = 'pp' } if ( $next_nonblank_token eq '$' ) { $type = 'pp' }
} }
}, },
'=>' => sub { '=>' => sub {
if ( $last_nonblank_type eq $tok ) { if ( $last_nonblank_type eq $tok ) {
complain("Repeated '=>'s \n"); complain("Repeated '=>'s \n");
} }
# patch for operator_expected: note if we are in the list (use.t) # patch for operator_expected: note if we are in the list (use.t)
skipping to change at line 2834 skipping to change at line 2968
if ( $statement_type eq 'use' ) { $statement_type = '_use' } if ( $statement_type eq 'use' ) { $statement_type = '_use' }
}, },
# type = 'mm' for pre-decrement, '--' for post-decrement # type = 'mm' for pre-decrement, '--' for post-decrement
'--' => sub { '--' => sub {
if ( $expecting == TERM ) { $type = 'mm' } if ( $expecting == TERM ) { $type = 'mm' }
elsif ( $expecting == UNKNOWN ) { elsif ( $expecting == UNKNOWN ) {
my ( $next_nonblank_token, $i_next ) = my ( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $i, $rtokens, $max_token_index ); find_next_nonblank_token( $i, $rtokens, $max_token_index );
# Fix for c042: look past a side comment
if ( $next_nonblank_token eq '#' ) {
( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $max_token_index,
$rtokens, $max_token_index );
}
if ( $next_nonblank_token eq '$' ) { $type = 'mm' } if ( $next_nonblank_token eq '$' ) { $type = 'mm' }
} }
}, },
'&&' => sub { '&&' => sub {
error_if_expecting_TERM() error_if_expecting_TERM()
if ( $expecting == TERM ); if ( $expecting == TERM && $last_nonblank_token ne ',' ); #c015
}, },
'||' => sub { '||' => sub {
error_if_expecting_TERM() error_if_expecting_TERM()
if ( $expecting == TERM ); if ( $expecting == TERM && $last_nonblank_token ne ',' ); #c015
}, },
'//' => sub { '//' => sub {
error_if_expecting_TERM() error_if_expecting_TERM()
if ( $expecting == TERM ); if ( $expecting == TERM );
}, },
}; };
# ------------------------------------------------------------ # ------------------------------------------------------------
# end hash of code for handling individual token types # end hash of code for handling individual token types
skipping to change at line 2903 skipping to change at line 3045
my %is_redo_last_next_goto; my %is_redo_last_next_goto;
@_ = qw(redo last next goto); @_ = qw(redo last next goto);
@is_redo_last_next_goto{@_} = (1) x scalar(@_); @is_redo_last_next_goto{@_} = (1) x scalar(@_);
my %is_use_require; my %is_use_require;
@_ = qw(use require); @_ = qw(use require);
@is_use_require{@_} = (1) x scalar(@_); @is_use_require{@_} = (1) x scalar(@_);
# This hash holds the array index in $tokenizer_self for these keywords: # This hash holds the array index in $tokenizer_self for these keywords:
my %is_format_END_DATA = ( # Fix for issue c035: removed 'format' from this hash
'format' => _in_format_, my %is_END_DATA = (
'__END__' => _in_end_, '__END__' => _in_end_,
'__DATA__' => _in_data_, '__DATA__' => _in_data_,
); );
# original ref: camel 3 p 147, # original ref: camel 3 p 147,
# but perl may accept undocumented flags # but perl may accept undocumented flags
# perl 5.10 adds 'p' (preserve) # perl 5.10 adds 'p' (preserve)
# Perl version 5.22 added 'n' # Perl version 5.22 added 'n'
# From http://perldoc.perl.org/perlop.html we have # From http://perldoc.perl.org/perlop.html we have
# /PATTERN/msixpodualngc or m?PATTERN?msixpodualngc # /PATTERN/msixpodualngc or m?PATTERN?msixpodualngc
skipping to change at line 3117 skipping to change at line 3259
$block_type = $last_nonblank_block_type; $block_type = $last_nonblank_block_type;
$container_type = $last_nonblank_container_type; $container_type = $last_nonblank_container_type;
$type_sequence = $last_nonblank_type_sequence; $type_sequence = $last_nonblank_type_sequence;
$indent_flag = 0; $indent_flag = 0;
$peeked_ahead = 0; $peeked_ahead = 0;
# tokenization is done in two stages.. # tokenization is done in two stages..
# stage 1 is a very simple pre-tokenization # stage 1 is a very simple pre-tokenization
my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens my $max_tokens_wanted = 0; # this signals pre_tokenize to get all tokens
# a little optimization for a full-line comment # optimize for a full-line comment
if ( !$in_quote && substr( $input_line, 0, 1 ) eq '#' ) { if ( !$in_quote && substr( $input_line, 0, 1 ) eq '#' ) {
$max_tokens_wanted = 1 # no use tokenizing a comment $max_tokens_wanted = 1; # no use tokenizing a comment
# and check for skipped section
if ( $rOpts_code_skipping
&& $input_line =~ /$code_skipping_pattern_begin/ )
{
$tokenizer_self->[_in_skipped_] = 1;
return;
}
} }
# start by breaking the line into pre-tokens # start by breaking the line into pre-tokens
( $rtokens, $rtoken_map, $rtoken_type ) = ( $rtokens, $rtoken_map, $rtoken_type ) =
pre_tokenize( $input_line, $max_tokens_wanted ); pre_tokenize( $input_line, $max_tokens_wanted );
$max_token_index = scalar( @{$rtokens} ) - 1; $max_token_index = scalar( @{$rtokens} ) - 1;
push( @{$rtokens}, ' ', ' ', ' ' ); # extra whitespace simplifies logic push( @{$rtokens}, ' ', ' ', ' ' ); # extra whitespace simplifies logic
push( @{$rtoken_map}, 0, 0, 0 ); # shouldn't be referenced push( @{$rtoken_map}, 0, 0, 0 ); # shouldn't be referenced
push( @{$rtoken_type}, 'b', 'b', 'b' ); push( @{$rtoken_type}, 'b', 'b', 'b' );
skipping to change at line 3302 skipping to change at line 3452
$last_nonblank_container_type; $last_nonblank_container_type;
$last_last_nonblank_type_sequence = $last_last_nonblank_type_sequence =
$last_nonblank_type_sequence; $last_nonblank_type_sequence;
$last_nonblank_token = $tok; $last_nonblank_token = $tok;
$last_nonblank_type = $type; $last_nonblank_type = $type;
$last_nonblank_prototype = $prototype; $last_nonblank_prototype = $prototype;
$last_nonblank_block_type = $block_type; $last_nonblank_block_type = $block_type;
$last_nonblank_container_type = $container_type; $last_nonblank_container_type = $container_type;
$last_nonblank_type_sequence = $type_sequence; $last_nonblank_type_sequence = $type_sequence;
$last_nonblank_i = $i_tok; $last_nonblank_i = $i_tok;
# Patch for c030: Fix things in case a '->' got separated from
# the subsequent identifier by a side comment. We need the
# last_nonblank_token to have a leading -> to avoid triggering
# an operator expected error message at the next '('. See also
# fix for git #63.
if ( $last_last_nonblank_token eq '->' ) {
if ( $last_nonblank_type eq 'w'
|| $last_nonblank_type eq 'i'
&& substr( $last_nonblank_token, 0, 1 ) eq '$' )
{
$last_nonblank_token = '->' . $last_nonblank_token;
$last_nonblank_type = 'i';
}
}
} }
# store previous token type # store previous token type
if ( $i_tok >= 0 ) { if ( $i_tok >= 0 ) {
$routput_token_type->[$i_tok] = $type; $routput_token_type->[$i_tok] = $type;
$routput_block_type->[$i_tok] = $block_type; $routput_block_type->[$i_tok] = $block_type;
$routput_container_type->[$i_tok] = $container_type; $routput_container_type->[$i_tok] = $container_type;
$routput_type_sequence->[$i_tok] = $type_sequence; $routput_type_sequence->[$i_tok] = $type_sequence;
$routput_indent_flag->[$i_tok] = $indent_flag; $routput_indent_flag->[$i_tok] = $indent_flag;
} }
skipping to change at line 3445 skipping to change at line 3610
local $" = ')('; local $" = ')(';
my @debug_list = ( my @debug_list = (
$last_nonblank_token, $tok, $last_nonblank_token, $tok,
$next_tok, $brace_depth, $next_tok, $brace_depth,
$brace_type[$brace_depth], $paren_depth, $brace_type[$brace_depth], $paren_depth,
$paren_type[$paren_depth] $paren_type[$paren_depth]
); );
print STDOUT "TOKENIZE:(@debug_list)\n"; print STDOUT "TOKENIZE:(@debug_list)\n";
}; };
# turn off attribute list on first non-blank, non-bareword # Turn off attribute list on first non-blank, non-bareword.
if ( $pre_type ne 'w' ) { $in_attribute_list = 0 } # Added '#' to fix c038.
if ( $pre_type ne 'w' && $pre_type ne '#' ) {
$in_attribute_list = 0;
}
############################################################### ###############################################################
# We have the next token, $tok. # We have the next token, $tok.
# Now we have to examine this token and decide what it is # Now we have to examine this token and decide what it is
# and define its $type # and define its $type
# #
# section 1: bare words # section 1: bare words
############################################################### ###############################################################
if ( $pre_type eq 'w' ) { if ( $pre_type eq 'w' ) {
$expecting = $expecting =
operator_expected( [ $prev_type, $tok, $next_type ] ); operator_expected( [ $prev_type, $tok, $next_type ] );
# Patch for c043, part 3: A bareword after '->' expects a TERM
# FIXME: It would be cleaner to give method calls a new type 'M'
# and update sub operator_expected to handle this.
if ( $last_nonblank_type eq '->' ) {
$expecting = TERM;
}
my ( $next_nonblank_token, $i_next ) = my ( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $i, $rtokens, $max_token_index ); find_next_nonblank_token( $i, $rtokens, $max_token_index );
# ATTRS: handle sub and variable attributes # ATTRS: handle sub and variable attributes
if ($in_attribute_list) { if ($in_attribute_list) {
# treat bare word followed by open paren like qw( # treat bare word followed by open paren like qw(
if ( $next_nonblank_token eq '(' ) { if ( $next_nonblank_token eq '(' ) {
# For something like: # For something like:
skipping to change at line 3589 skipping to change at line 3765
$last_nonblank_type eq 'L' $last_nonblank_type eq 'L'
|| ( $last_nonblank_type eq 'm' || ( $last_nonblank_type eq 'm'
&& $last_last_nonblank_type eq 'L' ) && $last_last_nonblank_type eq 'L' )
) )
) )
{ {
$type = 'w'; $type = 'w';
next; next;
} }
# Scan a bare word following a -> as an identifir; it could
# have a long package name. Fixes c037, c041.
if ( $last_nonblank_token eq '->' ) {
scan_bare_identifier();
# Patch for c043, part 4; use type 'w' after a '->'.
# This is just a safety check on sub scan_bare_identifier,
# which should get this case correct.
$type = 'w';
next;
}
# a bare word immediately followed by :: is not a keyword; # a bare word immediately followed by :: is not a keyword;
# use $tok_kw when testing for keywords to avoid a mistake # use $tok_kw when testing for keywords to avoid a mistake
my $tok_kw = $tok; my $tok_kw = $tok;
if ( $rtokens->[ $i + 1 ] eq ':' if ( $rtokens->[ $i + 1 ] eq ':'
&& $rtokens->[ $i + 2 ] eq ':' ) && $rtokens->[ $i + 2 ] eq ':' )
{ {
$tok_kw .= '::'; $tok_kw .= '::';
} }
# Decide if 'sub :' can be the start of a sub attribute list.
# We will decide based on if the colon is followed by a
# bareword which is not a keyword.
my $sub_attribute_ok_here;
if ( $is_sub{$tok_kw}
&& $expecting != OPERATOR
&& $next_nonblank_token eq ':' )
{
my ( $nn_nonblank_token, $i_nn ) =
find_next_nonblank_token( $i_next + 1,
$rtokens, $max_token_index );
$sub_attribute_ok_here =
$nn_nonblank_token =~ /^\w/
&& $nn_nonblank_token !~ /^\d/
&& !$is_keyword{$nn_nonblank_token};
}
# handle operator x (now we know it isn't $x=) # handle operator x (now we know it isn't $x=)
if ( $expecting == OPERATOR if ( $expecting == OPERATOR
&& substr( $tok, 0, 1 ) eq 'x' && substr( $tok, 0, 1 ) eq 'x'
&& $tok =~ /^x\d*$/ ) && $tok =~ /^x\d*$/ )
{ {
if ( $tok eq 'x' ) { if ( $tok eq 'x' ) {
if ( $rtokens->[ $i + 1 ] eq '=' ) { # x= if ( $rtokens->[ $i + 1 ] eq '=' ) { # x=
$tok = 'x='; $tok = 'x=';
$type = $tok; $type = $tok;
$i++; $i++;
} }
else { else {
$type = 'x'; $type = 'x';
} }
} }
# FIXME: Patch: mark something like x4 as an integer for now # NOTE: mark something like x4 as an integer for now
# It gets fixed downstream. This is easier than # It gets fixed downstream. This is easier than
# splitting the pretoken. # splitting the pretoken.
else { else {
$type = 'n'; $type = 'n';
} }
} }
elsif ( $tok_kw eq 'CORE::' ) { elsif ( $tok_kw eq 'CORE::' ) {
$type = $tok = $tok_kw; $type = $tok = $tok_kw;
$i += 2; $i += 2;
} }
skipping to change at line 3702 skipping to change at line 3907
# NOTE: This warning is deactivated because recent # NOTE: This warning is deactivated because recent
# versions of perl do not complain here, but # versions of perl do not complain here, but
# the coding is retained for reference. # the coding is retained for reference.
if ( 0 && $next_nonblank_token ne 'qw' ) { if ( 0 && $next_nonblank_token ne 'qw' ) {
warning( warning(
"Attempting to define constant '$next_nonblank_token' which is a perl keyword\n" "Attempting to define constant '$next_nonblank_token' which is a perl keyword\n"
); );
} }
} }
# FIXME: could check for error in which next token is
# not a word (number, punctuation, ..)
else { else {
$is_constant{$current_package}{$next_nonblank_token} $is_constant{$current_package}{$next_nonblank_token}
= 1; = 1;
} }
} }
} }
# various quote operators # various quote operators
elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) { elsif ( $is_q_qq_qw_qx_qr_s_y_tr_m{$tok} ) {
##NICOL PATCH ##NICOL PATCH
skipping to change at line 3761 skipping to change at line 3964
( $tok eq 'qw' && $tokenizer_self->[_trim_qw_] ) ( $tok eq 'qw' && $tokenizer_self->[_trim_qw_] )
? 'q' ? 'q'
: 'Q'; : 'Q';
$quote_type = $type; $quote_type = $type;
} }
# check for a statement label # check for a statement label
elsif ( elsif (
( $next_nonblank_token eq ':' ) ( $next_nonblank_token eq ':' )
&& ( $rtokens->[ $i_next + 1 ] ne ':' ) && ( $rtokens->[ $i_next + 1 ] ne ':' )
&& ( $i_next <= $max_token_index ) # colon on same line && ( $i_next <= $max_token_index ) # colon on same line
&& !$sub_attribute_ok_here # like 'sub : lvalue' ?
&& label_ok() && label_ok()
) )
{ {
if ( $tok !~ /[A-Z]/ ) { if ( $tok !~ /[A-Z]/ ) {
push @{ $tokenizer_self->[_rlower_case_labels_at_] }, push @{ $tokenizer_self->[_rlower_case_labels_at_] },
$input_line_number; $input_line_number;
} }
$type = 'J'; $type = 'J';
$tok .= ':'; $tok .= ':';
$i = $i_next; $i = $i_next;
skipping to change at line 3790 skipping to change at line 3994
scan_id(); scan_id();
} }
# 'package' # 'package'
elsif ( $is_package{$tok_kw} ) { elsif ( $is_package{$tok_kw} ) {
error_if_expecting_OPERATOR() error_if_expecting_OPERATOR()
if ( $expecting == OPERATOR ); if ( $expecting == OPERATOR );
scan_id(); scan_id();
} }
# Fix for c035: split 'format' from 'is_format_END_DATA' to be
# more restrictive. Require a new statement to be ok here.
elsif ( $tok_kw eq 'format' && new_statement_ok() ) {
$type = ';'; # make tokenizer look for TERM next
$tokenizer_self->[_in_format_] = 1;
last;
}
# Note on token types for format, __DATA__, __END__: # Note on token types for format, __DATA__, __END__:
# It simplifies things to give these type ';', so that when we # It simplifies things to give these type ';', so that when we
# start rescanning we will be expecting a token of type TERM. # start rescanning we will be expecting a token of type TERM.
# We will switch to type 'k' before outputting the tokens. # We will switch to type 'k' before outputting the tokens.
elsif ( $is_format_END_DATA{$tok_kw} ) { elsif ( $is_END_DATA{$tok_kw} ) {
$type = ';'; # make tokenizer look for TERM next $type = ';'; # make tokenizer look for TERM next
# Remember that we are in one of these three sections # Remember that we are in one of these three sections
$tokenizer_self->[ $is_format_END_DATA{$tok_kw} ] = 1; $tokenizer_self->[ $is_END_DATA{$tok_kw} ] = 1;
last; last;
} }
elsif ( $is_keyword{$tok_kw} ) { elsif ( $is_keyword{$tok_kw} ) {
$type = 'k'; $type = 'k';
# Since for and foreach may not be followed immediately # Since for and foreach may not be followed immediately
# by an opening paren, we have to remember which keyword # by an opening paren, we have to remember which keyword
# is associated with the next '(' # is associated with the next '('
if ( $is_for_foreach{$tok} ) { if ( $is_for_foreach{$tok} ) {
skipping to change at line 3946 skipping to change at line 4158
# '-' => \&sse_sub, # '-' => \&sse_sub,
# '*' => \&sse_mul, # '*' => \&sse_mul,
# '/' => \&sse_div; # '/' => \&sse_div;
# FIXME: this should eventually be generalized # FIXME: this should eventually be generalized
if ( $saw_use_module{$current_package}->{'RPerl'} if ( $saw_use_module{$current_package}->{'RPerl'}
&& $tok =~ /^sse_(mul|div|add|sub)$/ ) && $tok =~ /^sse_(mul|div|add|sub)$/ )
{ {
} }
# Fix part 1 for git #63 in which a comment falls
# between an -> and the following word. An
# alternate fix would be to change operator_expected
# to return an UNKNOWN for this type.
elsif ( $last_nonblank_type eq '->' ) {
}
# don't complain about possible indirect object # don't complain about possible indirect object
# notation. # notation.
# For example: # For example:
# package main; # package main;
# sub new($) { ... } # sub new($) { ... }
# $b = new A::; # calls A::new # $b = new A::; # calls A::new
# $c = new A; # same thing but suspicious # $c = new A; # same thing but suspicious
# This will call A::new but we have a 'new' in # This will call A::new but we have a 'new' in
# main:: which looks like a constant. # main:: which looks like a constant.
# #
skipping to change at line 3973 skipping to change at line 4193
} }
else { else {
error_if_expecting_OPERATOR("bareword"); error_if_expecting_OPERATOR("bareword");
} }
} }
# mark bare words immediately followed by a paren as # mark bare words immediately followed by a paren as
# functions # functions
$next_tok = $rtokens->[ $i + 1 ]; $next_tok = $rtokens->[ $i + 1 ];
if ( $next_tok eq '(' ) { if ( $next_tok eq '(' ) {
$type = 'U';
# Fix part 2 for git #63. Leave type as 'w' to keep
# the type the same as if the -> were not separated
$type = 'U' unless ( $last_nonblank_type eq '->' );
} }
# underscore after file test operator is file handle # underscore after file test operator is file handle
if ( $tok eq '_' && $last_nonblank_type eq 'F' ) { if ( $tok eq '_' && $last_nonblank_type eq 'F' ) {
$type = 'Z'; $type = 'Z';
} }
# patch for SWITCH/CASE if 'case' and 'when are # patch for SWITCH/CASE if 'case' and 'when are
# not treated as keywords: # not treated as keywords:
if ( if (
skipping to change at line 4513 skipping to change at line 4736
{ {
$in_statement_continuation = 0; $in_statement_continuation = 0;
} }
} }
# ...and include all block types except user subs with # ...and include all block types except user subs with
# block prototypes and these: (sort|grep|map|do|eval) # block prototypes and these: (sort|grep|map|do|eval)
# /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|e lse|unless|while|until|for|foreach)$/ # /^(\}|\{|BEGIN|END|CHECK|INIT|AUTOLOAD|DESTROY|UNITCHECK|continue|;|if|elsif|e lse|unless|while|until|for|foreach)$/
elsif ( elsif (
$is_zero_continuation_block_type{ $is_zero_continuation_block_type{
$routput_block_type->[$i] } ) $routput_block_type->[$i]
}
)
{ {
$in_statement_continuation = 0; $in_statement_continuation = 0;
} }
# ..but these are not terminal types: # ..but these are not terminal types:
# /^(sort|grep|map|do|eval)$/ ) # /^(sort|grep|map|do|eval)$/ )
elsif ( elsif (
$is_not_zero_continuation_block_type{ $is_not_zero_continuation_block_type{
$routput_block_type->[$i] } ) $routput_block_type->[$i]
}
)
{ {
} }
# ..and a block introduced by a label # ..and a block introduced by a label
# /^\w+\s*:$/gc ) { # /^\w+\s*:$/gc ) {
elsif ( $routput_block_type->[$i] =~ /:$/ ) { elsif ( $routput_block_type->[$i] =~ /:$/ ) {
$in_statement_continuation = 0; $in_statement_continuation = 0;
} }
# user function with block prototype # user function with block prototype
skipping to change at line 4740 skipping to change at line 4967
} }
} # end tokenize_this_line } # end tokenize_this_line
#########i############################################################# #########i#############################################################
# Tokenizer routines which assist in identifying token types # Tokenizer routines which assist in identifying token types
####################################################################### #######################################################################
# hash lookup table of operator expected values # hash lookup table of operator expected values
my %op_expected_table; my %op_expected_table;
# exceptions to perl's weird parsing rules after type 'Z'
my %is_weird_parsing_rule_exception;
BEGIN { BEGIN {
# Always expecting TERM following these types: # Always expecting TERM following these types:
# note: this is identical to '@value_requestor_type' defined later. # note: this is identical to '@value_requestor_type' defined later.
my @q = qw( my @q = qw(
; ! + x & ? F J - p / Y : % f U ~ A G j L * . | ^ < = [ m { \ > t ; ! + x & ? F J - p / Y : % f U ~ A G j L * . | ^ < = [ m { \ > t
|| >= != mm *= => .. !~ == && |= .= pp -= =~ += <= %= ^= x= ~~ ** << /= || >= != mm *= => .. !~ == && |= .= pp -= =~ += <= %= ^= x= ~~ ** << /=
&= // >> ~. &. |. ^. &= // >> ~. &. |. ^.
... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~ ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~
); );
push @q, ','; push @q, ',';
push @q, '('; # for completeness, not currently a token type push @q, '('; # for completeness, not currently a token type
@{op_expected_table}{@q} = (TERM) x scalar(@q); @{op_expected_table}{@q} = (TERM) x scalar(@q);
# Always UNKNOWN following these types: # Always UNKNOWN following these types:
@q = qw( w ); # Fix for c030: added '->' to this list
@q = qw( w -> );
@{op_expected_table}{@q} = (UNKNOWN) x scalar(@q); @{op_expected_table}{@q} = (UNKNOWN) x scalar(@q);
# Always expecting OPERATOR ... # Always expecting OPERATOR ...
# 'n' and 'v' are currently excluded because they might be VERSION numbers # 'n' and 'v' are currently excluded because they might be VERSION numbers
# 'i' is currently excluded because it might be a package # 'i' is currently excluded because it might be a package
# 'q' is currently excluded because it might be a prototype # 'q' is currently excluded because it might be a prototype
@q = qw( -- C -> h R ++ ] Q <> ); ## n v q i ); # Fix for c030: removed '->' from this list:
@q = qw( -- C h R ++ ] Q <> ); ## n v q i );
push @q, ')'; push @q, ')';
@{op_expected_table}{@q} = (OPERATOR) x scalar(@q); @{op_expected_table}{@q} = (OPERATOR) x scalar(@q);
# Fix for git #62: added '*' and '%'
@q = qw( < ? * % );
@{is_weird_parsing_rule_exception}{@q} = (OPERATOR) x scalar(@q);
} }
use constant DEBUG_OPERATOR_EXPECTED => 0;
sub operator_expected { sub operator_expected {
# Returns a parameter indicating what types of tokens can occur next # Returns a parameter indicating what types of tokens can occur next
# Call format: # Call format:
# $op_expected = operator_expected( [ $prev_type, $tok, $next_type ] ); # $op_expected = operator_expected( [ $prev_type, $tok, $next_type ] );
# where # where
# $prev_type is the type of the previous token (blank or not) # $prev_type is the type of the previous token (blank or not)
# $tok is the current token # $tok is the current token
# $next_type is the type of the next token (blank or not) # $next_type is the type of the next token (blank or not)
skipping to change at line 4819 skipping to change at line 5057
# that it uses, which are initialized in the BEGIN section. # that it uses, which are initialized in the BEGIN section.
# USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token, # USES GLOBAL VARIABLES: $last_nonblank_type, $last_nonblank_token,
# $statement_type # $statement_type
# When possible, token types should be selected such that we can determine # When possible, token types should be selected such that we can determine
# the 'operator_expected' value by a simple hash lookup. If there are # the 'operator_expected' value by a simple hash lookup. If there are
# exceptions, that is an indication that a new type is needed. # exceptions, that is an indication that a new type is needed.
my ($rarg) = @_; my ($rarg) = @_;
my $msg = "";
############## ##############
# Table lookup # Table lookup
############## ##############
# Many types are can be obtained by a table lookup given the previous type. # Many types are can be obtained by a table lookup given the previous type.
# This typically handles half or more of the calls. # This typically handles half or more of the calls.
my $op_expected = $op_expected_table{$last_nonblank_type}; my $op_expected = $op_expected_table{$last_nonblank_type};
goto RETURN if ( defined($op_expected) ); if ( defined($op_expected) ) {
$msg = "Table lookup";
goto RETURN;
}
###################### ######################
# Handle special cases # Handle special cases
###################### ######################
$op_expected = UNKNOWN; $op_expected = UNKNOWN;
my ( $prev_type, $tok, $next_type ) = @{$rarg}; my ( $prev_type, $tok, $next_type ) = @{$rarg};
# Types 'k', '}' and 'Z' depend on context # Types 'k', '}' and 'Z' depend on context
# FIXME: Types 'i', 'n', 'v', 'q' currently also temporarily depend on # FIXME: Types 'i', 'n', 'v', 'q' currently also temporarily depend on
skipping to change at line 4866 skipping to change at line 5109
$op_expected = TERM; $op_expected = TERM;
if ( $expecting_operator_token{$last_nonblank_token} ) { if ( $expecting_operator_token{$last_nonblank_token} ) {
$op_expected = OPERATOR; $op_expected = OPERATOR;
} }
elsif ( $expecting_term_token{$last_nonblank_token} ) { elsif ( $expecting_term_token{$last_nonblank_token} ) {
# Exceptions from TERM: # Exceptions from TERM:
# // may follow perl functions which may be unary operators # // may follow perl functions which may be unary operators
# see test file dor.t (defined or); # see test file dor.t (defined or);
if ( $tok eq '/' if (
$tok eq '/'
&& $next_type eq '/' && $next_type eq '/'
&& $is_keyword_rejecting_slash_as_pattern_delimiter{ && $is_keyword_rejecting_slash_as_pattern_delimiter{
$last_nonblank_token} ) $last_nonblank_token}
)
{ {
$op_expected = OPERATOR; $op_expected = OPERATOR;
} }
# Patch to allow a ? following 'split' to be a depricated pattern # Patch to allow a ? following 'split' to be a depricated pattern
# delimiter. This patch is coordinated with the omission of split # delimiter. This patch is coordinated with the omission of split
# from the list # from the list
# %is_keyword_rejecting_question_as_pattern_delimiter. This patch # %is_keyword_rejecting_question_as_pattern_delimiter. This patch
# will force perltidy to guess. # will force perltidy to guess.
elsif ($tok eq '?' elsif ($tok eq '?'
skipping to change at line 4992 skipping to change at line 5237
$op_expected = UNKNOWN; $op_expected = UNKNOWN;
# angle.t # angle.t
if ( $last_nonblank_token =~ /^\w/ ) { if ( $last_nonblank_token =~ /^\w/ ) {
$op_expected = UNKNOWN; $op_expected = UNKNOWN;
} }
# The 'weird parsing rules' of next section do not work for '<' and '?' # The 'weird parsing rules' of next section do not work for '<' and '?'
# It is best to mark them as unknown. Test case: # It is best to mark them as unknown. Test case:
# print $fh <DATA>; # print $fh <DATA>;
elsif ( $tok =~ /^[\<\?]$/ ) { elsif ( $is_weird_parsing_rule_exception{$tok} ) {
$op_expected = UNKNOWN; $op_expected = UNKNOWN;
} }
# For possible file handle like "$a", Perl uses weird parsing rules. # For possible file handle like "$a", Perl uses weird parsing rules.
# For example: # For example:
# print $a/2,"/hi"; - division # print $a/2,"/hi"; - division
# print $a / 2,"/hi"; - division # print $a / 2,"/hi"; - division
# print $a/ 2,"/hi"; - division # print $a/ 2,"/hi"; - division
# print $a /2,"/hi"; - pattern (and error)! # print $a /2,"/hi"; - pattern (and error)!
# Some examples where this logic works okay, for '&','*','+': # Some examples where this logic works okay, for '&','*','+':
skipping to change at line 5035 skipping to change at line 5280
} }
} }
# anything else... # anything else...
else { else {
$op_expected = UNKNOWN; $op_expected = UNKNOWN;
} }
RETURN: RETURN:
# debug and diagnostics can go here.. DEBUG_OPERATOR_EXPECTED && do {
0 && do {
print STDOUT print STDOUT
"EXPECT: returns $op_expected for last type $last_nonblank_type token $last_nonb lank_token\n"; "OPERATOR_EXPECTED: $msg: returns $op_expected for last type $last_nonblank_type token $last_nonblank_token\n";
}; };
return $op_expected; return $op_expected;
} ## end of sub operator_expected } ## end of sub operator_expected
sub new_statement_ok { sub new_statement_ok {
# return true if the current token can start a new statement # return true if the current token can start a new statement
# USES GLOBAL VARIABLES: $last_nonblank_type # USES GLOBAL VARIABLES: $last_nonblank_type
skipping to change at line 5355 skipping to change at line 5598
&& $pre_tokens[$jbeg] !~ /^(s|m|y|tr|qr|q|qq|qx)$/ && $pre_tokens[$jbeg] !~ /^(s|m|y|tr|qr|q|qq|qx)$/
) )
# or a => # or a =>
|| ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' ) || ( $pre_types[$j] eq '=' && $pre_types[ ++$j ] eq '>' )
) )
{ {
$code_block_type = ""; $code_block_type = "";
} }
} }
if ($code_block_type) {
# Patch for cases b1085 b1128: It is uncertain if this is a block.
# If this brace follows a bareword, then append a space as a signal
# to the formatter that this may not be a block brace. To find the
# corresponding code in Formatter.pm search for 'b1085'.
$code_block_type .= " " if ( $code_block_type =~ /^\w/ );
}
} }
return $code_block_type; return $code_block_type;
} }
sub report_unexpected { sub report_unexpected {
# report unexpected token type and show where it is # report unexpected token type and show where it is
# USES GLOBAL VARIABLES: $tokenizer_self # USES GLOBAL VARIABLES: $tokenizer_self
my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map, my ( $found, $expecting, $i_tok, $last_nonblank_i, $rpretoken_map,
skipping to change at line 5724 skipping to change at line 5976
my ( $rtok, $rmap, $rtype ) = my ( $rtok, $rmap, $rtype ) =
pre_tokenize( $line, 2 ); # only need 2 pre-tokens pre_tokenize( $line, 2 ); # only need 2 pre-tokens
my $j = $max_token_index + 1; my $j = $max_token_index + 1;
foreach my $tok ( @{$rtok} ) { foreach my $tok ( @{$rtok} ) {
last if ( $tok =~ "\n" ); last if ( $tok =~ "\n" );
$rtokens->[ ++$j ] = $tok; $rtokens->[ ++$j ] = $tok;
} }
last; last;
} }
return $rtokens; return;
} }
#########i############################################################# #########i#############################################################
# Tokenizer guessing routines for ambiguous situations # Tokenizer guessing routines for ambiguous situations
####################################################################### #######################################################################
sub guess_if_pattern_or_conditional { sub guess_if_pattern_or_conditional {
# this routine is called when we have encountered a ? following an # this routine is called when we have encountered a ? following an
# unknown bareword, and we must decide if it starts a pattern or not # unknown bareword, and we must decide if it starts a pattern or not
# input parameters: # input parameters:
# $i - token index of the ? starting possible pattern # $i - token index of the ? starting possible pattern
# output parameters: # output parameters:
# $is_pattern = 0 if probably not pattern, =1 if probably a pattern # $is_pattern = 0 if probably not pattern, =1 if probably a pattern
# msg = a warning or diagnostic message # msg = a warning or diagnostic message
# USES GLOBAL VARIABLES: $last_nonblank_token # USES GLOBAL VARIABLES: $last_nonblank_token
# FIXME: this needs to be rewritten
my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_; my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
my $is_pattern = 0; my $is_pattern = 0;
my $msg = "guessing that ? after $last_nonblank_token starts a "; my $msg = "guessing that ? after $last_nonblank_token starts a ";
if ( $i >= $max_token_index ) { if ( $i >= $max_token_index ) {
$msg .= "conditional (no end to pattern found on the line)\n"; $msg .= "conditional (no end to pattern found on the line)\n";
} }
else { else {
my $ibeg = $i; my $ibeg = $i;
$i = $ibeg + 1; $i = $ibeg + 1;
skipping to change at line 5841 skipping to change at line 6091
# USES GLOBAL VARIABLES: $last_nonblank_token # USES GLOBAL VARIABLES: $last_nonblank_token
my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_; my ( $i, $rtokens, $rtoken_map, $max_token_index ) = @_;
my $is_pattern = 0; my $is_pattern = 0;
my $msg = "guessing that / after $last_nonblank_token starts a "; my $msg = "guessing that / after $last_nonblank_token starts a ";
if ( $i >= $max_token_index ) { if ( $i >= $max_token_index ) {
$msg .= "division (no end to pattern found on the line)\n"; $msg .= "division (no end to pattern found on the line)\n";
} }
else { else {
my $ibeg = $i; my $ibeg = $i;
my $divide_expected = my $divide_possible =
numerator_expected( $i, $rtokens, $max_token_index ); is_possible_numerator( $i, $rtokens, $max_token_index );
if ( $divide_possible < 0 ) {
$msg = "pattern (division not possible here)\n";
$is_pattern = 1;
goto RETURN;
}
$i = $ibeg + 1; $i = $ibeg + 1;
my $next_token = $rtokens->[$i]; # first token after slash my $next_token = $rtokens->[$i]; # first token after slash
# One of the things we can look at is the spacing around the slash. # One of the things we can look at is the spacing around the slash.
# There # are four possible spacings around the first slash: # There # are four possible spacings around the first slash:
# #
# return pi/two;#/; -/- # return pi/two;#/; -/-
# return pi/ two;#/; -/+ # return pi/ two;#/; -/+
# return pi / two;#/; +/+ # return pi / two;#/; +/+
# return pi /two;#/; +/- <-- possible pattern # return pi /two;#/; +/- <-- possible pattern
skipping to change at line 5877 skipping to change at line 6134
$i, $in_quote, $quote_character, $quote_pos, $quote_depth, $i, $in_quote, $quote_character, $quote_pos, $quote_depth,
$quoted_string $quoted_string
) )
= follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character, = follow_quoted_string( $ibeg, $in_quote, $rtokens, $quote_character,
$quote_pos, $quote_depth, $max_token_index ); $quote_pos, $quote_depth, $max_token_index );
if ($in_quote) { if ($in_quote) {
# we didn't find an ending / on this line, so we bias towards # we didn't find an ending / on this line, so we bias towards
# division # division
if ( $divide_expected >= 0 ) { if ( $divide_possible >= 0 ) {
$is_pattern = 0; $is_pattern = 0;
$msg .= "division (no ending / on this line)\n"; $msg .= "division (no ending / on this line)\n";
} }
else { else {
# assuming a multi-line pattern ... this is risky, but division # assuming a multi-line pattern ... this is risky, but division
# does not seem possible. If this fails, it would either be due # does not seem possible. If this fails, it would either be due
# to a syntax error in the code, or the division_expected logic # to a syntax error in the code, or the division_expected logic
# needs to be fixed. # needs to be fixed.
$msg = "multi-line pattern (division not possible)\n"; $msg = "multi-line pattern (division not possible)\n";
skipping to change at line 5901 skipping to change at line 6158
# we found an ending /, so we bias slightly towards a pattern # we found an ending /, so we bias slightly towards a pattern
else { else {
my $pattern_expected = my $pattern_expected =
pattern_expected( $i, $rtokens, $max_token_index ); pattern_expected( $i, $rtokens, $max_token_index );
if ( $pattern_expected >= 0 ) { if ( $pattern_expected >= 0 ) {
# pattern looks possible... # pattern looks possible...
if ( $divide_expected >= 0 ) { if ( $divide_possible >= 0 ) {
# Both pattern and divide can work here... # Both pattern and divide can work here...
# Increase weight of divide if a pure number follows # Increase weight of divide if a pure number follows
$divide_expected += $next_token =~ /^\d+$/; $divide_possible += $next_token =~ /^\d+$/;
# Check for known constants in the numerator, like 'pi' # Check for known constants in the numerator, like 'pi'
if ( $is_known_constant{$last_nonblank_token} ) { if ( $is_known_constant{$last_nonblank_token} ) {
$msg .= $msg .=
"division (pattern works too but saw known constant '$last_nonblank_token')\n"; "division (pattern works too but saw known constant '$last_nonblank_token')\n";
$is_pattern = 0; $is_pattern = 0;
} }
# A very common bare word in pattern expressions is 'ok' # A very common bare word in pattern expressions is 'ok'
elsif ( $is_known_function{$last_nonblank_token} ) { elsif ( $is_known_function{$last_nonblank_token} ) {
$msg .= $msg .=
"pattern (division works too but saw '$last_nonblank_token')\n"; "pattern (division works too but saw '$last_nonblank_token')\n";
$is_pattern = 1; $is_pattern = 1;
} }
# If one rule is more definite, use it # If one rule is more definite, use it
elsif ( $divide_expected > $pattern_expected ) { elsif ( $divide_possible > $pattern_expected ) {
$msg .= $msg .=
"division (more likely based on following tokens)\n"; "division (more likely based on following tokens)\n";
$is_pattern = 0; $is_pattern = 0;
} }
# otherwise, use the spacing rule # otherwise, use the spacing rule
elsif ($is_pattern_by_spacing) { elsif ($is_pattern_by_spacing) {
$msg .= $msg .=
"pattern (guess on spacing, but division possible too)\n"; "pattern (guess on spacing, but division possible too)\n";
$is_pattern = 1; $is_pattern = 1;
} }
else { else {
$msg .= $msg .=
"division (guess on spacing, but pattern is possible too)\n"; "division (guess on spacing, but pattern is possible too)\n";
$is_pattern = 0; $is_pattern = 0;
} }
} }
# divide_expected < 0 means divide can not work here # divide_possible < 0 means divide can not work here
else { else {
$is_pattern = 1; $is_pattern = 1;
$msg .= "pattern (division not possible)\n"; $msg .= "pattern (division not possible)\n";
} }
} }
# pattern does not look possible... # pattern does not look possible...
else { else {
if ( $divide_expected >= 0 ) { if ( $divide_possible >= 0 ) {
$is_pattern = 0; $is_pattern = 0;
$msg .= "division (pattern not possible)\n"; $msg .= "division (pattern not possible)\n";
} }
# Neither pattern nor divide look possible...go by spacing # Neither pattern nor divide look possible...go by spacing
else { else {
if ($is_pattern_by_spacing) { if ($is_pattern_by_spacing) {
$msg .= "pattern (guess on spacing)\n"; $msg .= "pattern (guess on spacing)\n";
$is_pattern = 1; $is_pattern = 1;
} }
else { else {
$msg .= "division (guess on spacing)\n"; $msg .= "division (guess on spacing)\n";
$is_pattern = 0; $is_pattern = 0;
} }
} }
} }
} }
} }
RETURN:
return ( $is_pattern, $msg ); return ( $is_pattern, $msg );
} }
# try to resolve here-doc vs. shift by looking ahead for # try to resolve here-doc vs. shift by looking ahead for
# non-code or the end token (currently only looks for end token) # non-code or the end token (currently only looks for end token)
# returns 1 if it is probably a here doc, 0 if not # returns 1 if it is probably a here doc, 0 if not
sub guess_if_here_doc { sub guess_if_here_doc {
# This is how many lines we will search for a target as part of the # This is how many lines we will search for a target as part of the
# guessing strategy. It is a constant because there is probably # guessing strategy. It is a constant because there is probably
skipping to change at line 6088 skipping to change at line 6347
$pos--; $pos--;
} }
$package =~ s/\'/::/g; $package =~ s/\'/::/g;
if ( $package =~ /^\:/ ) { $package = 'main' . $package } if ( $package =~ /^\:/ ) { $package = 'main' . $package }
$package =~ s/::$//; $package =~ s/::$//;
} }
else { else {
$package = $current_package; $package = $current_package;
if ( $is_keyword{$tok} ) { # patched for c043, part 1: keyword does not follow '->'
if ( $is_keyword{$tok} && $last_nonblank_type ne '->' ) {
$type = 'k'; $type = 'k';
} }
} }
# if it is a bareword.. # if it is a bareword.. patched for c043, part 2: not following '->'
if ( $type eq 'w' ) { if ( $type eq 'w' && $last_nonblank_type ne '->' ) {
# check for v-string with leading 'v' type character # check for v-string with leading 'v' type character
# (This seems to have precedence over filehandle, type 'Y') # (This seems to have precedence over filehandle, type 'Y')
if ( $tok =~ /^v\d[_\d]*$/ ) { if ( $tok =~ /^v\d[_\d]*$/ ) {
# we only have the first part - something like 'v101' - # we only have the first part - something like 'v101' -
# look for more # look for more
if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) { if ( $input_line =~ m/\G(\.\d[_\d]*)+/gc ) {
$pos = pos($input_line); $pos = pos($input_line);
$numc = $pos - $pos_beg; $numc = $pos - $pos_beg;
skipping to change at line 6452 skipping to change at line 6712
my ( $next_nonblank_token, $i_next ) = my ( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $i, $rtokens, $max_token_index ); find_next_nonblank_token( $i, $rtokens, $max_token_index );
# check that something recognizable follows, but do not parse. # check that something recognizable follows, but do not parse.
# A VERSION number will be parsed later as a number or v-string in the # A VERSION number will be parsed later as a number or v-string in the
# normal way. What is important is to set the statement type if # normal way. What is important is to set the statement type if
# everything looks okay so that the operator_expected() routine # everything looks okay so that the operator_expected() routine
# knows that the number is in a package statement. # knows that the number is in a package statement.
# Examples of valid primitive tokens that might follow are: # Examples of valid primitive tokens that might follow are:
# 1235 . ; { } v3 v # 1235 . ; { } v3 v
if ( $next_nonblank_token =~ /^([v\.\d;\{\}])|v\d|\d+$/ ) { # FIX: added a '#' since a side comment may also follow
if ( $next_nonblank_token =~ /^([v\.\d;\{\}\#])|v\d|\d+$/ ) {
$statement_type = $tok; $statement_type = $tok;
} }
else { else {
warning( warning(
"Unexpected '$next_nonblank_token' after package name '$tok'\n" "Unexpected '$next_nonblank_token' after package name '$tok'\n"
); );
} }
} }
# no match but line not blank -- # no match but line not blank --
skipping to change at line 6624 skipping to change at line 6885
$identifier .= $tok; $identifier .= $tok;
# Perl will accept leading digits in identifiers, # Perl will accept leading digits in identifiers,
# although they may not always produce useful results. # although they may not always produce useful results.
# Something like $main::0 is ok. But this also works: # Something like $main::0 is ok. But this also works:
# #
# sub howdy::123::bubba{ print "bubba $54321!\n" } # sub howdy::123::bubba{ print "bubba $54321!\n" }
# howdy::123::bubba(); # howdy::123::bubba();
# #
} }
elsif ( $tok eq '#' ) {
# $# and POSTDEFREF ->$# # side comment or identifier?
elsif ( if (
( $tok eq '#' )
&& ( $identifier =~ /\$$/ )
# a # inside a prototype or signature can only start a comment # A '#' starts a comment if it follows a space. For example,
&& !$in_prototype_or_signature # the following is equivalent to $ans=40.
) # my $ #
{ # ans = 40;
# A '#' starts a comment if it follows a space. For example, !$last_tok_is_blank
# the following is equivalent to $ans=40.
# my $ # # a # inside a prototype or signature can only start a
# ans = 40; # comment
if ($last_tok_is_blank) { && !$in_prototype_or_signature
$type = 'i';
if ( $id_scan_state eq '$' ) { $type = 't' } # these are valid punctuation vars: *# %# @# $#
# May also be '$#array' or POSTDEFREF ->$#
&& ( $identifier =~ /^[\%\@\$\*]$/ || $identifier =~ /\$$/ )
)
{
$identifier .= $tok; # keep same state, a $ could follow
}
else {
# otherwise it is a side comment
if ( $identifier eq '->' ) { }
elsif ( $id_scan_state eq '$' ) { $type = 't' }
else { $type = 'i' }
$i = $i_save; $i = $i_save;
$id_scan_state = ''; $id_scan_state = '';
last; last;
} }
# May be '$#' or '$#array'
$identifier .= $tok; # keep same state, a $ could follow
} }
elsif ( $tok eq '{' ) { elsif ( $tok eq '{' ) {
# check for something like ${#} or ${} # check for something like ${#} or ${}
if ( if (
( (
$identifier eq '$' $identifier eq '$'
|| $identifier eq '@' || $identifier eq '@'
|| $identifier eq '$#' || $identifier eq '$#'
skipping to change at line 6983 skipping to change at line 7253
# We have to be careful here. If we are in an unknown state, # We have to be careful here. If we are in an unknown state,
# we will reject the punctuation variable. In the following # we will reject the punctuation variable. In the following
# example the '&' is a binary operator but we are in an unknown # example the '&' is a binary operator but we are in an unknown
# state because there is no sigil on 'Prima', so we don't # state because there is no sigil on 'Prima', so we don't
# know what it is. But it is a bad guess that # know what it is. But it is a bad guess that
# '&~' is a function variable. # '&~' is a function variable.
# $self->{text}->{colorMap}->[ # $self->{text}->{colorMap}->[
# Prima::PodView::COLOR_CODE_FOREGROUND # Prima::PodView::COLOR_CODE_FOREGROUND
# & ~tb::COLOR_INDEX ] = # & ~tb::COLOR_INDEX ] =
# $sec->{ColorCode} # $sec->{ColorCode}
if ( $identifier eq '&' && $expecting ) {
# Fix for case c033: a '#' here starts a side comment
if ( $identifier eq '&' && $expecting && $tok ne '#' ) {
$identifier .= $tok; $identifier .= $tok;
} }
else { else {
$identifier = ''; $identifier = '';
$i = $i_save; $i = $i_save;
$type = '&'; $type = '&';
} }
$id_scan_state = ''; $id_scan_state = '';
last; last;
} }
skipping to change at line 7086 skipping to change at line 7358
print STDOUT print STDOUT
"SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_beg in, $id_scan_state_begin, $identifier_begin\n"; "SCANID: called from $a $b $c with tok, i, state, identifier =$tok_begin, $i_beg in, $id_scan_state_begin, $identifier_begin\n";
print STDOUT print STDOUT
"SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $ide ntifier\n"; "SCANID: returned with tok, i, state, identifier =$tok, $i, $id_scan_state, $ide ntifier\n";
}; };
return ( $i, $tok, $type, $id_scan_state, $identifier ); return ( $i, $tok, $type, $id_scan_state, $identifier );
} }
{ ## closure for sub do_scan_sub { ## closure for sub do_scan_sub
my %warn_if_lexical;
BEGIN {
# lexical subs with these names can cause parsing errors in this version
my @q = qw( m q qq qr qw qx s tr y );
@{warn_if_lexical}{@q} = (1) x scalar(@q);
}
# saved package and subnames in case prototype is on separate line # saved package and subnames in case prototype is on separate line
my ( $package_saved, $subname_saved ); my ( $package_saved, $subname_saved );
# initialize subname each time a new 'sub' keyword is encountered # initialize subname each time a new 'sub' keyword is encountered
sub initialize_subname { sub initialize_subname {
$package_saved = ""; $package_saved = "";
$subname_saved = ""; $subname_saved = "";
return; return;
} }
skipping to change at line 7156 skipping to change at line 7437
my $input_line = $rinput_hash->{input_line}; my $input_line = $rinput_hash->{input_line};
my $i = $rinput_hash->{i}; my $i = $rinput_hash->{i};
my $i_beg = $rinput_hash->{i_beg}; my $i_beg = $rinput_hash->{i_beg};
my $tok = $rinput_hash->{tok}; my $tok = $rinput_hash->{tok};
my $type = $rinput_hash->{type}; my $type = $rinput_hash->{type};
my $rtokens = $rinput_hash->{rtokens}; my $rtokens = $rinput_hash->{rtokens};
my $rtoken_map = $rinput_hash->{rtoken_map}; my $rtoken_map = $rinput_hash->{rtoken_map};
my $id_scan_state = $rinput_hash->{id_scan_state}; my $id_scan_state = $rinput_hash->{id_scan_state};
my $max_token_index = $rinput_hash->{max_token_index}; my $max_token_index = $rinput_hash->{max_token_index};
my $i_entry = $i;
# Determine the CALL TYPE # Determine the CALL TYPE
# 1=sub # 1=sub
# 2=( # 2=(
# 3=prototype # 3=prototype
my $call_type = my $call_type =
$tok eq 'prototype' ? PROTOTYPE_CALL $tok eq 'prototype' ? PROTOTYPE_CALL
: $tok eq '(' ? PAREN_CALL : $tok eq '(' ? PAREN_CALL
: SUB_CALL; : SUB_CALL;
$id_scan_state = ""; # normally we get everything in one call $id_scan_state = ""; # normally we get everything in one call
skipping to change at line 7187 skipping to change at line 7470
$call_type == SUB_CALL $call_type == SUB_CALL
&& $input_line =~ m/\G\s* && $input_line =~ m/\G\s*
((?:\w*(?:'|::))*) # package - something that ends in :: or ' ((?:\w*(?:'|::))*) # package - something that ends in :: or '
(\w+) # NAME - required (\w+) # NAME - required
/gcx /gcx
) )
{ {
$match = 1; $match = 1;
$subname = $2; $subname = $2;
$package = ( defined($1) && $1 ) ? $1 : $current_package; my $is_lexical_sub =
$package =~ s/\'/::/g; $last_nonblank_type eq 'k' && $last_nonblank_token eq 'my';
if ( $package =~ /^\:/ ) { $package = 'main' . $package } if ( $is_lexical_sub && $1 ) {
$package =~ s/::$//; warning("'my' sub $subname cannot be in package '$1'\n");
$is_lexical_sub = 0;
}
if ($is_lexical_sub) {
# lexical subs use the block sequence number as a package name
my $seqno =
$current_sequence_number[BRACE][ $current_depth[BRACE] ];
$seqno = 1 unless ( defined($seqno) );
$package = $seqno;
if ( $warn_if_lexical{$subname} ) {
warning(
"'my' sub '$subname' matches a builtin name and may not be handled correctly in
this perltidy version.\n"
);
}
}
else {
$package = ( defined($1) && $1 ) ? $1 : $current_package;
$package =~ s/\'/::/g;
if ( $package =~ /^\:/ ) { $package = 'main' . $package }
$package =~ s/::$//;
}
my $pos = pos($input_line); my $pos = pos($input_line);
my $numc = $pos - $pos_beg; my $numc = $pos - $pos_beg;
$tok = 'sub ' . substr( $input_line, $pos_beg, $numc ); $tok = 'sub ' . substr( $input_line, $pos_beg, $numc );
$type = 'i'; $type = 'i';
# remember the sub name in case another call is needed to # remember the sub name in case another call is needed to
# get the prototype # get the prototype
$package_saved = $package; $package_saved = $package;
$subname_saved = $subname; $subname_saved = $subname;
} }
skipping to change at line 7229 skipping to change at line 7535
if ( if (
$input_line =~ m/\G(\s*\([^\)\(\}\{\,#A-Za-z]*\))? # PROTO $input_line =~ m/\G(\s*\([^\)\(\}\{\,#A-Za-z]*\))? # PROTO
(\s*:)? # ATTRS leading ':' (\s*:)? # ATTRS leading ':'
/gcx /gcx
&& ( $1 || $2 ) && ( $1 || $2 )
) )
{ {
$proto = $1; $proto = $1;
$attrs = $2; $attrs = $2;
# Append the prototype to the starting token if it is 'sub' or # Append the prototype to the starting token if it is 'sub' or
# 'prototype'. This is not necessary but for compatibility with previous # 'prototype'. This is not necessary but for compatibility with
# versions when the -csc flag is used: # previous versions when the -csc flag is used:
if ( $proto && ( $match || $call_type == PROTOTYPE_CALL ) ) { if ( $proto && ( $match || $call_type == PROTOTYPE_CALL ) ) {
$tok .= $proto; $tok .= $proto;
} }
# If we just entered the sub at an opening paren on this call, not # If we just entered the sub at an opening paren on this call, not
# a following :prototype, label it with the previous token. This is # a following :prototype, label it with the previous token. This is
# necessary to propagate the sub name to its opening block. # necessary to propagate the sub name to its opening block.
elsif ( $call_type == PAREN_CALL ) { elsif ( $call_type == PAREN_CALL ) {
$tok = $last_nonblank_token; $tok = $last_nonblank_token;
} }
$match ||= 1; $match ||= 1;
# Patch part #1 to fixes cases b994 and b1053:
# Mark an anonymous sub keyword without prototype as type 'k', i.e.
# 'sub : lvalue { ...'
$type = 'i'; $type = 'i';
if ( $tok eq 'sub' && !$proto ) { $type = 'k' }
} }
if ($match) { if ($match) {
# ATTRS: if there are attributes, back up and let the ':' be # ATTRS: if there are attributes, back up and let the ':' be
# found later by the scanner. # found later by the scanner.
my $pos = pos($input_line); my $pos = pos($input_line);
if ($attrs) { if ($attrs) {
$pos -= length($attrs); $pos -= length($attrs);
} }
skipping to change at line 7274 skipping to change at line 7585
# Otherwise, if we found a match we must convert back from # Otherwise, if we found a match we must convert back from
# string position to the pre_token index for continued parsing. # string position to the pre_token index for continued parsing.
else { else {
# I don't think an error flag can occur here ..but ? # I don't think an error flag can occur here ..but ?
my $error; my $error;
( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map, ( $i, $error ) = inverse_pretoken_map( $i, $pos, $rtoken_map,
$max_token_index ); $max_token_index );
if ($error) { warning("Possibly invalid sub\n") } if ($error) { warning("Possibly invalid sub\n") }
# Patch part #2 to fixes cases b994 and b1053:
# Do not let spaces be part of the token of an anonymous sub keyword
# which we marked as type 'k' above...i.e. for something like:
# 'sub : lvalue { ...'
# Back up and let it be parsed as a blank
if ( $type eq 'k'
&& $attrs
&& $i > $i_entry
&& substr( $rtokens->[$i], 0, 1 ) eq ' ' )
{
$i--;
}
# check for multiple definitions of a sub # check for multiple definitions of a sub
( $next_nonblank_token, my $i_next ) = ( $next_nonblank_token, my $i_next ) =
find_next_nonblank_token_on_this_line( $i, $rtokens, find_next_nonblank_token_on_this_line( $i, $rtokens,
$max_token_index ); $max_token_index );
} }
if ( $next_nonblank_token =~ /^(\s*|#)$/ ) if ( $next_nonblank_token =~ /^(\s*|#)$/ )
{ # skip blank or side comment { # skip blank or side comment
my ( $rpre_tokens, $rpre_types ) = my ( $rpre_tokens, $rpre_types ) =
peek_ahead_for_n_nonblank_pre_tokens(1); peek_ahead_for_n_nonblank_pre_tokens(1);
skipping to change at line 7299 skipping to change at line 7623
} }
} }
# See what's next... # See what's next...
if ( $next_nonblank_token eq '{' ) { if ( $next_nonblank_token eq '{' ) {
if ($subname) { if ($subname) {
# Check for multiple definitions of a sub, but # Check for multiple definitions of a sub, but
# it is ok to have multiple sub BEGIN, etc, # it is ok to have multiple sub BEGIN, etc,
# so we do not complain if name is all caps # so we do not complain if name is all caps
if ( $saw_function_definition{$package}{$subname} if ( $saw_function_definition{$subname}{$package}
&& $subname !~ /^[A-Z]+$/ ) && $subname !~ /^[A-Z]+$/ )
{ {
my $lno = $saw_function_definition{$package}{$subname}; my $lno = $saw_function_definition{$subname}{$package};
warning( if ( $package =~ /^\d/ ) {
warning(
"already saw definition of lexical 'sub $subname' at line $lno\n"
);
}
else {
warning(
"already saw definition of 'sub $subname' in package '$package' at line $lno\n" "already saw definition of 'sub $subname' in package '$package' at line $lno\n"
); );
}
} }
$saw_function_definition{$package}{$subname} = $saw_function_definition{$subname}{$package} =
$tokenizer_self->[_last_line_number_]; $tokenizer_self->[_last_line_number_];
} }
} }
elsif ( $next_nonblank_token eq ';' ) { elsif ( $next_nonblank_token eq ';' ) {
} }
elsif ( $next_nonblank_token eq '}' ) { elsif ( $next_nonblank_token eq '}' ) {
} }
# ATTRS - if an attribute list follows, remember the name # ATTRS - if an attribute list follows, remember the name
# of the sub so the next opening brace can be labeled. # of the sub so the next opening brace can be labeled.
# Setting 'statement_type' causes any ':'s to introduce # Setting 'statement_type' causes any ':'s to introduce
# attributes. # attributes.
elsif ( $next_nonblank_token eq ':' ) { elsif ( $next_nonblank_token eq ':' ) {
$statement_type = $tok if ( $call_type == SUB_CALL ); if ( $call_type == SUB_CALL ) {
$statement_type =
substr( $tok, 0, 3 ) eq 'sub' ? $tok : 'sub';
}
} }
# if we stopped before an open paren ... # if we stopped before an open paren ...
elsif ( $next_nonblank_token eq '(' ) { elsif ( $next_nonblank_token eq '(' ) {
# If we DID NOT see this paren above then it must be on the # If we DID NOT see this paren above then it must be on the
# next line so we will set a flag to come back here and see if # next line so we will set a flag to come back here and see if
# it is a PROTOTYPE # it is a PROTOTYPE
# Otherwise, we assume it is a SIGNATURE rather than a # Otherwise, we assume it is a SIGNATURE rather than a
# PROTOTYPE and let the normal tokenizer handle it as a list # PROTOTYPE and let the normal tokenizer handle it as a list
if ( !$saw_opening_paren ) { if ( !$saw_opening_paren ) {
$id_scan_state = 'sub'; # we must come back to get proto $id_scan_state = 'sub'; # we must come back to get proto
} }
$statement_type = $tok if ( $call_type == SUB_CALL ); if ( $call_type == SUB_CALL ) {
$statement_type =
substr( $tok, 0, 3 ) eq 'sub' ? $tok : 'sub';
}
} }
elsif ($next_nonblank_token) { # EOF technically ok elsif ($next_nonblank_token) { # EOF technically ok
$subname = "" unless defined($subname); $subname = "" unless defined($subname);
warning( warning(
"expecting ':' or ';' or '{' after definition or declaration of sub '$subname' b ut saw '$next_nonblank_token'\n" "expecting ':' or ';' or '{' after definition or declaration of sub '$subname' b ut saw '$next_nonblank_token'\n"
); );
} }
check_prototype( $proto, $package, $subname ); check_prototype( $proto, $package, $subname );
} }
# no match to either sub name or prototype, but line not blank # no match to either sub name or prototype, but line not blank
else { else {
skipping to change at line 7362 skipping to change at line 7700
} }
} }
#########i############################################################### #########i###############################################################
# Tokenizer utility routines which may use CONSTANTS but no other GLOBALS # Tokenizer utility routines which may use CONSTANTS but no other GLOBALS
######################################################################### #########################################################################
sub find_next_nonblank_token { sub find_next_nonblank_token {
my ( $i, $rtokens, $max_token_index ) = @_; my ( $i, $rtokens, $max_token_index ) = @_;
# Returns the next nonblank token after the token at index $i
# To skip past a side comment, and any subsequent block comments
# and blank lines, call with i=$max_token_index
if ( $i >= $max_token_index ) { if ( $i >= $max_token_index ) {
if ( !peeked_ahead() ) { if ( !peeked_ahead() ) {
peeked_ahead(1); peeked_ahead(1);
$rtokens = peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
peek_ahead_for_nonblank_token( $rtokens, $max_token_index );
} }
} }
my $next_nonblank_token = $rtokens->[ ++$i ]; my $next_nonblank_token = $rtokens->[ ++$i ];
return ( " ", $i ) unless defined($next_nonblank_token);
if ( $next_nonblank_token =~ /^\s*$/ ) { if ( $next_nonblank_token =~ /^\s*$/ ) {
$next_nonblank_token = $rtokens->[ ++$i ]; $next_nonblank_token = $rtokens->[ ++$i ];
return ( " ", $i ) unless defined($next_nonblank_token);
} }
return ( $next_nonblank_token, $i ); return ( $next_nonblank_token, $i );
} }
sub numerator_expected { sub is_possible_numerator {
# this is a filter for a possible numerator, in support of guessing # Look at the next non-comment character and decide if it could be a
# for the / pattern delimiter token. # numerator. Return
# returns -
# 1 - yes # 1 - yes
# 0 - can't tell # 0 - can't tell
# -1 - no # -1 - no
# Note: I am using the convention that variables ending in
# _expected have these 3 possible values.
my ( $i, $rtokens, $max_token_index ) = @_; my ( $i, $rtokens, $max_token_index ) = @_;
my $numerator_expected = 0; my $is_possible_numerator = 0;
my $next_token = $rtokens->[ $i + 1 ]; my $next_token = $rtokens->[ $i + 1 ];
if ( $next_token eq '=' ) { $i++; } # handle /= if ( $next_token eq '=' ) { $i++; } # handle /=
my ( $next_nonblank_token, $i_next ) = my ( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $i, $rtokens, $max_token_index ); find_next_nonblank_token( $i, $rtokens, $max_token_index );
if ( $next_nonblank_token eq '#' ) {
( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $max_token_index, $rtokens,
$max_token_index );
}
if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) { if ( $next_nonblank_token =~ /(\(|\$|\w|\.|\@)/ ) {
$numerator_expected = 1; $is_possible_numerator = 1;
}
elsif ( $next_nonblank_token =~ /^\s*$/ ) {
$is_possible_numerator = 0;
} }
else { else {
$is_possible_numerator = -1;
if ( $next_nonblank_token =~ /^\s*$/ ) {
$numerator_expected = 0;
}
else {
$numerator_expected = -1;
}
} }
return $numerator_expected;
return $is_possible_numerator;
} }
{ ## closure for sub pattern_expected { ## closure for sub pattern_expected
my %pattern_test; my %pattern_test;
BEGIN { BEGIN {
# List of tokens which may follow a pattern. Note that we will not # List of tokens which may follow a pattern. Note that we will not
# have formed digraphs at this point, so we will see '&' instead of # have formed digraphs at this point, so we will see '&' instead of
# '&&' and '|' instead of '||' # '&&' and '|' instead of '||'
skipping to change at line 7449 skipping to change at line 7795
$i++; $i++;
} # skip possible modifier } # skip possible modifier
my ( $next_nonblank_token, $i_next ) = my ( $next_nonblank_token, $i_next ) =
find_next_nonblank_token( $i, $rtokens, $max_token_index ); find_next_nonblank_token( $i, $rtokens, $max_token_index );
if ( $pattern_test{$next_nonblank_token} ) { if ( $pattern_test{$next_nonblank_token} ) {
$is_pattern = 1; $is_pattern = 1;
} }
else { else {
if ( $next_nonblank_token =~ /^\s*$/ ) { # Added '#' to fix issue c044
if ( $next_nonblank_token =~ /^\s*$/
|| $next_nonblank_token eq '#' )
{
$is_pattern = 0; $is_pattern = 0;
} }
else { else {
$is_pattern = -1; $is_pattern = -1;
} }
} }
return $is_pattern; return $is_pattern;
} }
} }
skipping to change at line 7665 skipping to change at line 8014
# Look for bad starting characters; Shouldn't happen.. # Look for bad starting characters; Shouldn't happen..
if ( $first_char !~ /[\d\.\+\-Ee]/ ) { if ( $first_char !~ /[\d\.\+\-Ee]/ ) {
warning("Program bug - scan_number given character $first_char\n"); warning("Program bug - scan_number given character $first_char\n");
report_definite_bug(); report_definite_bug();
return ( $i, $type, $number ); return ( $i, $type, $number );
} }
# handle v-string without leading 'v' character ('Two Dot' rule) # handle v-string without leading 'v' character ('Two Dot' rule)
# (vstring.t) # (vstring.t)
# TODO: v-strings may contain underscores # Here is the format prior to including underscores:
## if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) {
pos($input_line) = $pos_beg; pos($input_line) = $pos_beg;
if ( $input_line =~ /\G((\d+)?\.\d+(\.\d+)+)/g ) { if ( $input_line =~ /\G((\d[_\d]*)?\.[\d_]+(\.[\d_]+)+)/g ) {
$pos = pos($input_line); $pos = pos($input_line);
my $numc = $pos - $pos_beg; my $numc = $pos - $pos_beg;
$number = substr( $input_line, $pos_beg, $numc ); $number = substr( $input_line, $pos_beg, $numc );
$type = 'v'; $type = 'v';
report_v_string($number); report_v_string($number);
} }
# handle octal, hex, binary # handle octal, hex, binary
if ( !defined($number) ) { if ( !defined($number) ) {
pos($input_line) = $pos_beg; pos($input_line) = $pos_beg;
skipping to change at line 7698 skipping to change at line 8048
# a hex float, i.e. '0x0.b17217f7d1cf78p0' # a hex float, i.e. '0x0.b17217f7d1cf78p0'
([xX][0-9a-fA-F_]* # X and optional leading digits ([xX][0-9a-fA-F_]* # X and optional leading digits
(\.([0-9a-fA-F][0-9a-fA-F_]*)?)? # optional decimal and fraction (\.([0-9a-fA-F][0-9a-fA-F_]*)?)? # optional decimal and fraction
[Pp][+-]?[0-9a-fA-F] # REQUIRED exponent with digit [Pp][+-]?[0-9a-fA-F] # REQUIRED exponent with digit
[0-9a-fA-F_]*) # optional Additional exponent digits [0-9a-fA-F_]*) # optional Additional exponent digits
# or hex integer # or hex integer
|([xX][0-9a-fA-F_]+) |([xX][0-9a-fA-F_]+)
# or octal fraction # or octal fraction
|([0-7_]+ # string of octal digits |([oO]?[0-7_]+ # string of octal digits
(\.([0-7][0-7_]*)?)? # optional decimal and fraction (\.([0-7][0-7_]*)?)? # optional decimal and fraction
[Pp][+-]?[0-7] # REQUIRED exponent, no underscore [Pp][+-]?[0-7] # REQUIRED exponent, no underscore
[0-7_]*) # Additonal exponent digits, with underscores [0-7_]*) # Additional exponent digits with underscores
# or octal integer # or octal integer
|([0-7_]+) # string of octal digits |([oO]?[0-7_]+) # string of octal digits
# or a binary float # or a binary float
|([bB][01_]* # 'b' with string of binary digits |([bB][01_]* # 'b' with string of binary digits
(\.([01][01_]*)?)? # optional decimal and fraction (\.([01][01_]*)?)? # optional decimal and fraction
[Pp][+-]?[01] # Required exponent indicator, no underscore [Pp][+-]?[01] # Required exponent indicator, no underscore
[01_]*) # additional exponent bits [01_]*) # additional exponent bits
# or binary integer # or binary integer
|([bB][01_]+) # 'b' with string of binary digits |([bB][01_]+) # 'b' with string of binary digits
 End of changes. 135 change blocks. 
154 lines changed or deleted 504 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)