"Fossies" - the Fresh Open Source Software Archive

Member "Perl-Tidy-20200110/lib/Perl/Tidy/Formatter.pm" (7 Jan 2020, 668308 Bytes) of package /linux/misc/Perl-Tidy-20200110.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Perl source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "Formatter.pm" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 20191203_vs_20200110.

    1 #####################################################################
    2 #
    3 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
    4 # line breaks to the token stream
    5 #
    6 # WARNING: This is not a real class for speed reasons.  Only one
    7 # Formatter may be used.
    8 #
    9 #####################################################################
   10 
   11 package Perl::Tidy::Formatter;
   12 use strict;
   13 use warnings;
   14 use Carp;
   15 our $VERSION = '20200110';
   16 
   17 # The Tokenizer will be loaded with the Formatter
   18 ##use Perl::Tidy::Tokenizer;    # for is_keyword()
   19 
   20 sub Die {
   21     my ($msg) = @_;
   22     Perl::Tidy::Die($msg);
   23     croak "unexpected return from Perl::Tidy::Die";
   24 }
   25 
   26 sub Warn {
   27     my ($msg) = @_;
   28     Perl::Tidy::Warn($msg);
   29     return;
   30 }
   31 
   32 sub Exit {
   33     my ($msg) = @_;
   34     Perl::Tidy::Exit($msg);
   35     croak "unexpected return from Perl::Tidy::Exit";
   36 }
   37 
   38 BEGIN {
   39 
   40     # Codes for insertion and deletion of blanks
   41     use constant DELETE => 0;
   42     use constant STABLE => 1;
   43     use constant INSERT => 2;
   44 
   45     # Caution: these debug flags produce a lot of output
   46     # They should all be 0 except when debugging small scripts
   47     use constant FORMATTER_DEBUG_FLAG_RECOMBINE   => 0;
   48     use constant FORMATTER_DEBUG_FLAG_BOND_TABLES => 0;
   49     use constant FORMATTER_DEBUG_FLAG_BOND        => 0;
   50     use constant FORMATTER_DEBUG_FLAG_BREAK       => 0;
   51     use constant FORMATTER_DEBUG_FLAG_CI          => 0;
   52     use constant FORMATTER_DEBUG_FLAG_FLUSH       => 0;
   53     use constant FORMATTER_DEBUG_FLAG_FORCE       => 0;
   54     use constant FORMATTER_DEBUG_FLAG_LIST        => 0;
   55     use constant FORMATTER_DEBUG_FLAG_NOBREAK     => 0;
   56     use constant FORMATTER_DEBUG_FLAG_OUTPUT      => 0;
   57     use constant FORMATTER_DEBUG_FLAG_SPARSE      => 0;
   58     use constant FORMATTER_DEBUG_FLAG_STORE       => 0;
   59     use constant FORMATTER_DEBUG_FLAG_UNDOBP      => 0;
   60     use constant FORMATTER_DEBUG_FLAG_WHITE       => 0;
   61 
   62     my $debug_warning = sub {
   63         print STDOUT "FORMATTER_DEBUGGING with key $_[0]\n";
   64     };
   65 
   66     FORMATTER_DEBUG_FLAG_RECOMBINE   && $debug_warning->('RECOMBINE');
   67     FORMATTER_DEBUG_FLAG_BOND_TABLES && $debug_warning->('BOND_TABLES');
   68     FORMATTER_DEBUG_FLAG_BOND        && $debug_warning->('BOND');
   69     FORMATTER_DEBUG_FLAG_BREAK       && $debug_warning->('BREAK');
   70     FORMATTER_DEBUG_FLAG_CI          && $debug_warning->('CI');
   71     FORMATTER_DEBUG_FLAG_FLUSH       && $debug_warning->('FLUSH');
   72     FORMATTER_DEBUG_FLAG_FORCE       && $debug_warning->('FORCE');
   73     FORMATTER_DEBUG_FLAG_LIST        && $debug_warning->('LIST');
   74     FORMATTER_DEBUG_FLAG_NOBREAK     && $debug_warning->('NOBREAK');
   75     FORMATTER_DEBUG_FLAG_OUTPUT      && $debug_warning->('OUTPUT');
   76     FORMATTER_DEBUG_FLAG_SPARSE      && $debug_warning->('SPARSE');
   77     FORMATTER_DEBUG_FLAG_STORE       && $debug_warning->('STORE');
   78     FORMATTER_DEBUG_FLAG_UNDOBP      && $debug_warning->('UNDOBP');
   79     FORMATTER_DEBUG_FLAG_WHITE       && $debug_warning->('WHITE');
   80 }
   81 
   82 use vars qw{
   83 
   84   @gnu_stack
   85   $max_gnu_stack_index
   86   $gnu_position_predictor
   87   $line_start_index_to_go
   88   $last_indentation_written
   89   $last_unadjusted_indentation
   90   $last_leading_token
   91   $last_output_short_opening_token
   92   $peak_batch_size
   93 
   94   $saw_VERSION_in_this_file
   95   $saw_END_or_DATA_
   96 
   97   @gnu_item_list
   98   $max_gnu_item_index
   99   $gnu_sequence_number
  100   $last_output_indentation
  101   %last_gnu_equals
  102   %gnu_comma_count
  103   %gnu_arrow_count
  104 
  105   @block_type_to_go
  106   @type_sequence_to_go
  107   @container_environment_to_go
  108   @bond_strength_to_go
  109   @forced_breakpoint_to_go
  110   @token_lengths_to_go
  111   @summed_lengths_to_go
  112   @levels_to_go
  113   @leading_spaces_to_go
  114   @reduced_spaces_to_go
  115   @mate_index_to_go
  116   @ci_levels_to_go
  117   @nesting_depth_to_go
  118   @nobreak_to_go
  119   @old_breakpoint_to_go
  120   @tokens_to_go
  121   @K_to_go
  122   @types_to_go
  123   @inext_to_go
  124   @iprev_to_go
  125 
  126   %saved_opening_indentation
  127 
  128   $max_index_to_go
  129   $comma_count_in_batch
  130   $last_nonblank_index_to_go
  131   $last_nonblank_type_to_go
  132   $last_nonblank_token_to_go
  133   $last_last_nonblank_index_to_go
  134   $last_last_nonblank_type_to_go
  135   $last_last_nonblank_token_to_go
  136   @nonblank_lines_at_depth
  137   $starting_in_quote
  138   $ending_in_quote
  139   @whitespace_level_stack
  140   $whitespace_last_level
  141 
  142   $format_skipping_pattern_begin
  143   $format_skipping_pattern_end
  144 
  145   $forced_breakpoint_count
  146   $forced_breakpoint_undo_count
  147   @forced_breakpoint_undo_stack
  148   %postponed_breakpoint
  149 
  150   $tabbing
  151   $embedded_tab_count
  152   $first_embedded_tab_at
  153   $last_embedded_tab_at
  154   $deleted_semicolon_count
  155   $first_deleted_semicolon_at
  156   $last_deleted_semicolon_at
  157   $added_semicolon_count
  158   $first_added_semicolon_at
  159   $last_added_semicolon_at
  160   $first_tabbing_disagreement
  161   $last_tabbing_disagreement
  162   $in_tabbing_disagreement
  163   $tabbing_disagreement_count
  164   $input_line_tabbing
  165 
  166   $last_line_leading_type
  167   $last_line_leading_level
  168   $last_last_line_leading_level
  169 
  170   %block_leading_text
  171   %block_opening_line_number
  172   $csc_new_statement_ok
  173   $csc_last_label
  174   %csc_block_label
  175   $accumulating_text_for_block
  176   $leading_block_text
  177   $rleading_block_if_elsif_text
  178   $leading_block_text_level
  179   $leading_block_text_length_exceeded
  180   $leading_block_text_line_length
  181   $leading_block_text_line_number
  182   $closing_side_comment_prefix_pattern
  183   $closing_side_comment_list_pattern
  184 
  185   $blank_lines_after_opening_block_pattern
  186   $blank_lines_before_closing_block_pattern
  187 
  188   $last_nonblank_token
  189   $last_nonblank_type
  190   $last_last_nonblank_token
  191   $last_last_nonblank_type
  192   $last_nonblank_block_type
  193   $last_output_level
  194   %is_do_follower
  195   %is_if_brace_follower
  196   %space_after_keyword
  197   $rbrace_follower
  198   $looking_for_else
  199   %is_last_next_redo_return
  200   %is_other_brace_follower
  201   %is_else_brace_follower
  202   %is_anon_sub_brace_follower
  203   %is_anon_sub_1_brace_follower
  204   %is_sort_map_grep
  205   %is_sort_map_grep_eval
  206   %want_one_line_block
  207   %is_sort_map_grep_eval_do
  208   %is_block_without_semicolon
  209   %is_if_unless
  210   %is_and_or
  211   %is_assignment
  212   %is_chain_operator
  213   %is_if_unless_and_or_last_next_redo_return
  214   %ok_to_add_semicolon_for_block_type
  215 
  216   @has_broken_sublist
  217   @dont_align
  218   @want_comma_break
  219 
  220   $is_static_block_comment
  221   $index_start_one_line_block
  222   $semicolons_before_block_self_destruct
  223   $index_max_forced_break
  224   $input_line_number
  225   $diagnostics_object
  226   $vertical_aligner_object
  227   $logger_object
  228   $file_writer_object
  229   $formatter_self
  230   @ci_stack
  231   %want_break_before
  232   %outdent_keyword
  233   $static_block_comment_pattern
  234   $static_side_comment_pattern
  235   %opening_vertical_tightness
  236   %closing_vertical_tightness
  237   %closing_token_indentation
  238   $some_closing_token_indentation
  239 
  240   %opening_token_right
  241   %stack_opening_token
  242   %stack_closing_token
  243 
  244   $block_brace_vertical_tightness_pattern
  245   $keyword_group_list_pattern
  246   $keyword_group_list_comment_pattern
  247 
  248   $rOpts_add_newlines
  249   $rOpts_add_whitespace
  250   $rOpts_block_brace_tightness
  251   $rOpts_block_brace_vertical_tightness
  252   $rOpts_brace_left_and_indent
  253   $rOpts_comma_arrow_breakpoints
  254   $rOpts_break_at_old_keyword_breakpoints
  255   $rOpts_break_at_old_comma_breakpoints
  256   $rOpts_break_at_old_logical_breakpoints
  257   $rOpts_break_at_old_method_breakpoints
  258   $rOpts_break_at_old_ternary_breakpoints
  259   $rOpts_break_at_old_attribute_breakpoints
  260   $rOpts_closing_side_comment_else_flag
  261   $rOpts_closing_side_comment_maximum_text
  262   $rOpts_continuation_indentation
  263   $rOpts_delete_old_whitespace
  264   $rOpts_fuzzy_line_length
  265   $rOpts_indent_columns
  266   $rOpts_line_up_parentheses
  267   $rOpts_maximum_fields_per_table
  268   $rOpts_maximum_line_length
  269   $rOpts_variable_maximum_line_length
  270   $rOpts_short_concatenation_item_length
  271   $rOpts_keep_old_blank_lines
  272   $rOpts_ignore_old_breakpoints
  273   $rOpts_format_skipping
  274   $rOpts_space_function_paren
  275   $rOpts_space_keyword_paren
  276   $rOpts_keep_interior_semicolons
  277   $rOpts_ignore_side_comment_lengths
  278   $rOpts_stack_closing_block_brace
  279   $rOpts_space_backslash_quote
  280   $rOpts_whitespace_cycle
  281   $rOpts_one_line_block_semicolons
  282 
  283   %is_opening_type
  284   %is_closing_type
  285   %is_keyword_returning_list
  286   %tightness
  287   %matching_token
  288   $rOpts
  289   %right_bond_strength
  290   %left_bond_strength
  291   %binary_ws_rules
  292   %want_left_space
  293   %want_right_space
  294   %is_digraph
  295   %is_trigraph
  296   $bli_pattern
  297   $bli_list_string
  298   %is_closing_type
  299   %is_opening_type
  300   %is_closing_token
  301   %is_opening_token
  302 
  303   %weld_len_left_closing
  304   %weld_len_right_closing
  305   %weld_len_left_opening
  306   %weld_len_right_opening
  307 
  308   $rcuddled_block_types
  309 
  310   $SUB_PATTERN
  311   $ASUB_PATTERN
  312 
  313   $NVARS
  314 
  315 };
  316 
  317 BEGIN {
  318 
  319     # Array index names for token variables
  320     my $i = 0;
  321     use constant {
  322         _BLOCK_TYPE_            => $i++,
  323         _CI_LEVEL_              => $i++,
  324         _CONTAINER_ENVIRONMENT_ => $i++,
  325         _CONTAINER_TYPE_        => $i++,
  326         _CUMULATIVE_LENGTH_     => $i++,
  327         _LINE_INDEX_            => $i++,
  328         _KNEXT_SEQ_ITEM_        => $i++,
  329         _LEVEL_                 => $i++,
  330         _LEVEL_TRUE_            => $i++,
  331         _SLEVEL_                => $i++,
  332         _TOKEN_                 => $i++,
  333         _TYPE_                  => $i++,
  334         _TYPE_SEQUENCE_         => $i++,
  335     };
  336     $NVARS = 1 + _TYPE_SEQUENCE_;
  337 
  338     # default list of block types for which -bli would apply
  339     $bli_list_string = 'if else elsif unless while for foreach do : sub';
  340 
  341     my @q;
  342 
  343     @q = qw(
  344       .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
  345       <= >= == =~ !~ != ++ -- /= x=
  346     );
  347     @is_digraph{@q} = (1) x scalar(@q);
  348 
  349     @q = qw( ... **= <<= >>= &&= ||= //= <=> <<~ );
  350     @is_trigraph{@q} = (1) x scalar(@q);
  351 
  352     @q = qw(
  353       = **= += *= &= <<= &&=
  354       -= /= |= >>= ||= //=
  355       .= %= ^=
  356       x=
  357     );
  358     @is_assignment{@q} = (1) x scalar(@q);
  359 
  360     @q = qw(
  361       grep
  362       keys
  363       map
  364       reverse
  365       sort
  366       split
  367     );
  368     @is_keyword_returning_list{@q} = (1) x scalar(@q);
  369 
  370     @q = qw(is if unless and or err last next redo return);
  371     @is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
  372 
  373     @q = qw(last next redo return);
  374     @is_last_next_redo_return{@q} = (1) x scalar(@q);
  375 
  376     @q = qw(sort map grep);
  377     @is_sort_map_grep{@q} = (1) x scalar(@q);
  378 
  379     @q = qw(sort map grep eval);
  380     @is_sort_map_grep_eval{@q} = (1) x scalar(@q);
  381 
  382     @q = qw(sort map grep eval do);
  383     @is_sort_map_grep_eval_do{@q} = (1) x scalar(@q);
  384 
  385     @q = qw(if unless);
  386     @is_if_unless{@q} = (1) x scalar(@q);
  387 
  388     @q = qw(and or err);
  389     @is_and_or{@q} = (1) x scalar(@q);
  390 
  391     # Identify certain operators which often occur in chains.
  392     # Note: the minus (-) causes a side effect of padding of the first line in
  393     # something like this (by sub set_logical_padding):
  394     #    Checkbutton => 'Transmission checked',
  395     #   -variable    => \$TRANS
  396     # This usually improves appearance so it seems ok.
  397     @q = qw(&& || and or : ? . + - * /);
  398     @is_chain_operator{@q} = (1) x scalar(@q);
  399 
  400     # We can remove semicolons after blocks preceded by these keywords
  401     @q =
  402       qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
  403       unless while until for foreach given when default);
  404     @is_block_without_semicolon{@q} = (1) x scalar(@q);
  405 
  406     # We will allow semicolons to be added within these block types
  407     # as well as sub and package blocks.
  408     # NOTES:
  409     # 1. Note that these keywords are omitted:
  410     #     switch case given when default sort map grep
  411     # 2. It is also ok to add for sub and package blocks and a labeled block
  412     # 3. But not okay for other perltidy types including:
  413     #     { } ; G t
  414     # 4. Test files: blktype.t, blktype1.t, semicolon.t
  415     @q =
  416       qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
  417       unless do while until eval for foreach );
  418     @ok_to_add_semicolon_for_block_type{@q} = (1) x scalar(@q);
  419 
  420     # 'L' is token for opening { at hash key
  421     @q = qw< L { ( [ >;
  422     @is_opening_type{@q} = (1) x scalar(@q);
  423 
  424     # 'R' is token for closing } at hash key
  425     @q = qw< R } ) ] >;
  426     @is_closing_type{@q} = (1) x scalar(@q);
  427 
  428     @q = qw< { ( [ >;
  429     @is_opening_token{@q} = (1) x scalar(@q);
  430 
  431     @q = qw< } ) ] >;
  432     @is_closing_token{@q} = (1) x scalar(@q);
  433 
  434     # Patterns for standardizing matches to block types for regular subs and
  435     # anonymous subs. Examples
  436     #  'sub process' is a named sub
  437     #  'sub ::m' is a named sub
  438     #  'sub' is an anonymous sub
  439     #  'sub:' is a label, not a sub
  440     #  'substr' is a keyword
  441     $SUB_PATTERN  = '^sub\s+(::|\w)';
  442     $ASUB_PATTERN = '^sub$';
  443 }
  444 
  445 # whitespace codes
  446 use constant WS_YES      => 1;
  447 use constant WS_OPTIONAL => 0;
  448 use constant WS_NO       => -1;
  449 
  450 # Token bond strengths.
  451 use constant NO_BREAK    => 10000;
  452 use constant VERY_STRONG => 100;
  453 use constant STRONG      => 2.1;
  454 use constant NOMINAL     => 1.1;
  455 use constant WEAK        => 0.8;
  456 use constant VERY_WEAK   => 0.55;
  457 
  458 # values for testing indexes in output array
  459 use constant UNDEFINED_INDEX => -1;
  460 
  461 # Maximum number of little messages; probably need not be changed.
  462 use constant MAX_NAG_MESSAGES => 6;
  463 
  464 # increment between sequence numbers for each type
  465 # For example, ?: pairs might have numbers 7,11,15,...
  466 use constant TYPE_SEQUENCE_INCREMENT => 4;
  467 
  468 {
  469 
  470     # methods to count instances
  471     my $_count = 0;
  472     sub get_count        { return $_count; }
  473     sub _increment_count { return ++$_count }
  474     sub _decrement_count { return --$_count }
  475 }
  476 
  477 sub trim {
  478 
  479     # trim leading and trailing whitespace from a string
  480     my $str = shift;
  481     $str =~ s/\s+$//;
  482     $str =~ s/^\s+//;
  483     return $str;
  484 }
  485 
  486 sub max {
  487     my @vals = @_;
  488     my $max  = shift @vals;
  489     foreach my $val (@vals) {
  490         $max = ( $max < $val ) ? $val : $max;
  491     }
  492     return $max;
  493 }
  494 
  495 sub min {
  496     my @vals = @_;
  497     my $min  = shift @vals;
  498     foreach my $val (@vals) {
  499         $min = ( $min > $val ) ? $val : $min;
  500     }
  501     return $min;
  502 }
  503 
  504 sub split_words {
  505 
  506     # given a string containing words separated by whitespace,
  507     # return the list of words
  508     my ($str) = @_;
  509     return unless $str;
  510     $str =~ s/\s+$//;
  511     $str =~ s/^\s+//;
  512     return split( /\s+/, $str );
  513 }
  514 
  515 sub check_keys {
  516     my ( $rtest, $rvalid, $msg, $exact_match ) = @_;
  517 
  518     # Check the keys of a hash:
  519     # $rtest   = ref to hash to test
  520     # $rvalid  = ref to hash with valid keys
  521 
  522     # $msg = a message to write in case of error
  523     # $exact_match defines the type of check:
  524     #     = false: test hash must not have unknown key
  525     #     = true:  test hash must have exactly same keys as known hash
  526     my @unknown_keys =
  527       grep { !exists $rvalid->{$_} } keys %{$rtest};
  528     my @missing_keys =
  529       grep { !exists $rtest->{$_} } keys %{$rvalid};
  530     my $error = @unknown_keys;
  531     if ($exact_match) { $error ||= @missing_keys }
  532     if ($error) {
  533         local $" = ')(';
  534         my @expected_keys = sort keys %{$rvalid};
  535         @unknown_keys = sort @unknown_keys;
  536         Die(<<EOM);
  537 ------------------------------------------------------------------------
  538 Program error detected checking hash keys
  539 Message is: '$msg'
  540 Expected keys: (@expected_keys)
  541 Unknown key(s): (@unknown_keys)
  542 Missing key(s): (@missing_keys)
  543 ------------------------------------------------------------------------
  544 EOM
  545     }
  546     return;
  547 }
  548 
  549 # interface to Perl::Tidy::Logger routines
  550 sub warning {
  551     my ($msg) = @_;
  552     if ($logger_object) { $logger_object->warning($msg); }
  553     return;
  554 }
  555 
  556 sub complain {
  557     my ($msg) = @_;
  558     if ($logger_object) {
  559         $logger_object->complain($msg);
  560     }
  561     return;
  562 }
  563 
  564 sub write_logfile_entry {
  565     my @msg = @_;
  566     if ($logger_object) {
  567         $logger_object->write_logfile_entry(@msg);
  568     }
  569     return;
  570 }
  571 
  572 sub black_box {
  573     my @msg = @_;
  574     if ($logger_object) { $logger_object->black_box(@msg); }
  575     return;
  576 }
  577 
  578 sub report_definite_bug {
  579     if ($logger_object) {
  580         $logger_object->report_definite_bug();
  581     }
  582     return;
  583 }
  584 
  585 sub get_saw_brace_error {
  586     if ($logger_object) {
  587         return $logger_object->get_saw_brace_error();
  588     }
  589     return;
  590 }
  591 
  592 sub we_are_at_the_last_line {
  593     if ($logger_object) {
  594         $logger_object->we_are_at_the_last_line();
  595     }
  596     return;
  597 }
  598 
  599 # interface to Perl::Tidy::Diagnostics routine
  600 sub write_diagnostics {
  601     my $msg = shift;
  602     if ($diagnostics_object) { $diagnostics_object->write_diagnostics($msg); }
  603     return;
  604 }
  605 
  606 sub get_added_semicolon_count {
  607     my $self = shift;
  608     return $added_semicolon_count;
  609 }
  610 
  611 sub DESTROY {
  612     my $self = shift;
  613     $self->_decrement_count();
  614     return;
  615 }
  616 
  617 sub get_output_line_number {
  618     return $vertical_aligner_object->get_output_line_number();
  619 }
  620 
  621 sub new {
  622 
  623     my ( $class, @args ) = @_;
  624 
  625     # we are given an object with a write_line() method to take lines
  626     my %defaults = (
  627         sink_object        => undef,
  628         diagnostics_object => undef,
  629         logger_object      => undef,
  630     );
  631     my %args = ( %defaults, @args );
  632 
  633     $logger_object      = $args{logger_object};
  634     $diagnostics_object = $args{diagnostics_object};
  635 
  636     # we create another object with a get_line() and peek_ahead() method
  637     my $sink_object = $args{sink_object};
  638     $file_writer_object =
  639       Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
  640 
  641     # initialize the leading whitespace stack to negative levels
  642     # so that we can never run off the end of the stack
  643     $peak_batch_size        = 0;    # flag to determine if we have output code
  644     $gnu_position_predictor = 0;    # where the current token is predicted to be
  645     $max_gnu_stack_index    = 0;
  646     $max_gnu_item_index     = -1;
  647     $gnu_stack[0]                = new_lp_indentation_item( 0, -1, -1, 0, 0 );
  648     @gnu_item_list               = ();
  649     $last_output_indentation     = 0;
  650     $last_indentation_written    = 0;
  651     $last_unadjusted_indentation = 0;
  652     $last_leading_token          = "";
  653     $last_output_short_opening_token = 0;
  654 
  655     $saw_VERSION_in_this_file = !$rOpts->{'pass-version-line'};
  656     $saw_END_or_DATA_         = 0;
  657 
  658     @block_type_to_go            = ();
  659     @type_sequence_to_go         = ();
  660     @container_environment_to_go = ();
  661     @bond_strength_to_go         = ();
  662     @forced_breakpoint_to_go     = ();
  663     @summed_lengths_to_go        = ();    # line length to start of ith token
  664     @token_lengths_to_go         = ();
  665     @levels_to_go                = ();
  666     @mate_index_to_go            = ();
  667     @ci_levels_to_go             = ();
  668     @nesting_depth_to_go         = (0);
  669     @nobreak_to_go               = ();
  670     @old_breakpoint_to_go        = ();
  671     @tokens_to_go                = ();
  672     @K_to_go                     = ();
  673     @types_to_go                 = ();
  674     @leading_spaces_to_go        = ();
  675     @reduced_spaces_to_go        = ();
  676     @inext_to_go                 = ();
  677     @iprev_to_go                 = ();
  678 
  679     @whitespace_level_stack = ();
  680     $whitespace_last_level  = -1;
  681 
  682     @dont_align         = ();
  683     @has_broken_sublist = ();
  684     @want_comma_break   = ();
  685 
  686     @ci_stack                   = ("");
  687     $first_tabbing_disagreement = 0;
  688     $last_tabbing_disagreement  = 0;
  689     $tabbing_disagreement_count = 0;
  690     $in_tabbing_disagreement    = 0;
  691     $input_line_tabbing         = undef;
  692 
  693     $last_last_line_leading_level = 0;
  694     $last_line_leading_level      = 0;
  695     $last_line_leading_type       = '#';
  696 
  697     $last_nonblank_token        = ';';
  698     $last_nonblank_type         = ';';
  699     $last_last_nonblank_token   = ';';
  700     $last_last_nonblank_type    = ';';
  701     $last_nonblank_block_type   = "";
  702     $last_output_level          = 0;
  703     $looking_for_else           = 0;
  704     $embedded_tab_count         = 0;
  705     $first_embedded_tab_at      = 0;
  706     $last_embedded_tab_at       = 0;
  707     $deleted_semicolon_count    = 0;
  708     $first_deleted_semicolon_at = 0;
  709     $last_deleted_semicolon_at  = 0;
  710     $added_semicolon_count      = 0;
  711     $first_added_semicolon_at   = 0;
  712     $last_added_semicolon_at    = 0;
  713     $is_static_block_comment    = 0;
  714     %postponed_breakpoint       = ();
  715 
  716     # variables for adding side comments
  717     %block_leading_text        = ();
  718     %block_opening_line_number = ();
  719     $csc_new_statement_ok      = 1;
  720     %csc_block_label           = ();
  721 
  722     %saved_opening_indentation = ();
  723 
  724     reset_block_text_accumulator();
  725 
  726     prepare_for_new_input_lines();
  727 
  728     $vertical_aligner_object =
  729       Perl::Tidy::VerticalAligner->initialize( $rOpts, $file_writer_object,
  730         $logger_object, $diagnostics_object );
  731 
  732     if ( $rOpts->{'entab-leading-whitespace'} ) {
  733         write_logfile_entry(
  734 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
  735         );
  736     }
  737     elsif ( $rOpts->{'tabs'} ) {
  738         write_logfile_entry("Indentation will be with a tab character\n");
  739     }
  740     else {
  741         write_logfile_entry(
  742             "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
  743     }
  744 
  745     # This hash holds the main data structures for formatting
  746     # All hash keys must be defined here.
  747     $formatter_self = {
  748         rlines              => [],       # = ref to array of lines of the file
  749         rlines_new          => [],       # = ref to array of output lines
  750                                          #   (FOR FUTURE DEVELOPMENT)
  751         rLL                 => [],       # = ref to array with all tokens
  752                                          # in the file. LL originally meant
  753                                          # 'Linked List'. Linked lists were a
  754                                          # bad idea but LL is easy to type.
  755         Klimit              => undef,    # = maximum K index for rLL. This is
  756                                          # needed to catch any autovivification
  757                                          # problems.
  758         rnested_pairs       => [],       # for welding decisions
  759         K_opening_container => {},       # for quickly traversing structure
  760         K_closing_container => {},       # for quickly traversing structure
  761         K_opening_ternary   => {},       # for quickly traversing structure
  762         K_closing_ternary   => {},       # for quickly traversing structure
  763         rcontainer_map      => {},       # hierarchical map of containers
  764         rK_phantom_semicolons =>
  765           undef,    # for undoing phantom semicolons if iterating
  766         rpaired_to_inner_container => {},
  767         rbreak_container           => {},    # prevent one-line blocks
  768         rshort_nested              => {},    # blocks not forced open
  769         rvalid_self_keys           => [],    # for checking
  770         valign_batch_count         => 0,
  771     };
  772     my @valid_keys = keys %{$formatter_self};
  773     $formatter_self->{rvalid_self_keys} = \@valid_keys;
  774 
  775     bless $formatter_self, $class;
  776 
  777     # Safety check..this is not a class yet
  778     if ( _increment_count() > 1 ) {
  779         confess
  780 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
  781     }
  782     return $formatter_self;
  783 }
  784 
  785 # Future routines for storing new lines
  786 sub push_line {
  787     my ( $self, $rline ) = @_;
  788 
  789     # my $rline = $rlines->[$index_old];
  790     # push @{$rlines_new}, $rline;
  791     return;
  792 }
  793 
  794 sub push_old_line {
  795     my ( $self, $index_old ) = @_;
  796 
  797     # TODO: This will copy line with index $index_old to the new line array
  798     # my $rlines = $self->{rlines};
  799     # my $rline = $rlines->[$index_old];
  800     # $self->push_line($rline);
  801     return;
  802 }
  803 
  804 sub push_blank_line {
  805     my ($self) = @_;
  806 
  807     # my $rline = ...
  808     # $self->push_line($rline);
  809     return;
  810 }
  811 
  812 sub push_CODE_line {
  813     my ( $self, $Kmin, $Kmax ) = @_;
  814 
  815     # TODO: This will store the values for one new line of CODE
  816     # CHECK TOKEN RANGE HERE
  817     # $self->push_line($rline);
  818     return;
  819 }
  820 
  821 sub increment_valign_batch_count {
  822     my ($self) = shift;
  823     return ++$self->{valign_batch_count};
  824 }
  825 
  826 sub get_valign_batch_count {
  827     my ($self) = shift;
  828     return $self->{valign_batch_count};
  829 }
  830 
  831 sub Fault {
  832     my ($msg) = @_;
  833 
  834     # This routine is called for errors that really should not occur
  835     # except if there has been a bug introduced by a recent program change
  836     my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
  837     my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
  838     my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
  839     my $input_stream_name = $logger_object->get_input_stream_name();
  840 
  841     Die(<<EOM);
  842 ==============================================================================
  843 While operating on input stream with name: '$input_stream_name'
  844 A fault was detected at line $line0 of sub '$subroutine1'
  845 in file '$filename1'
  846 which was called from line $line1 of sub '$subroutine2'
  847 Message: '$msg'
  848 This is probably an error introduced by a recent programming change. 
  849 ==============================================================================
  850 EOM
  851 
  852     # This is for Perl-Critic
  853     return;
  854 }
  855 
  856 sub check_self_hash {
  857     my $self            = shift;
  858     my @valid_self_keys = @{ $self->{rvalid_self_keys} };
  859     my %valid_self_hash;
  860     @valid_self_hash{@valid_self_keys} = (1) x scalar(@valid_self_keys);
  861     check_keys( $self, \%valid_self_hash, "Checkpoint: self error", 1 );
  862     return;
  863 }
  864 
  865 sub check_token_array {
  866     my $self = shift;
  867 
  868     # Check for errors in the array of tokens
  869     # Uses package variable $NVARS
  870     $self->check_self_hash();
  871     my $rLL = $self->{rLL};
  872     for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
  873         my $nvars = @{ $rLL->[$KK] };
  874         if ( $nvars != $NVARS ) {
  875             my $type = $rLL->[$KK]->[_TYPE_];
  876             $type = '*' unless defined($type);
  877             Fault(
  878 "number of vars for node $KK, type '$type', is $nvars but should be $NVARS"
  879             );
  880         }
  881         foreach my $var ( _TOKEN_, _TYPE_ ) {
  882             if ( !defined( $rLL->[$KK]->[$var] ) ) {
  883                 my $iline = $rLL->[$KK]->[_LINE_INDEX_];
  884                 Fault("Undefined variable $var for K=$KK, line=$iline\n");
  885             }
  886         }
  887     }
  888     return;
  889 }
  890 
  891 sub set_rLL_max_index {
  892     my $self = shift;
  893 
  894     # Set the limit of the rLL array, assuming that it is correct.
  895     # This should only be called by routines after they make changes
  896     # to tokenization
  897     my $rLL = $self->{rLL};
  898     if ( !defined($rLL) ) {
  899 
  900         # Shouldn't happen because rLL was initialized to be an array ref
  901         Fault("Undefined Memory rLL");
  902     }
  903     my $Klimit_old = $self->{Klimit};
  904     my $num        = @{$rLL};
  905     my $Klimit;
  906     if ( $num > 0 ) { $Klimit = $num - 1 }
  907     $self->{Klimit} = $Klimit;
  908     return ($Klimit);
  909 }
  910 
  911 sub get_rLL_max_index {
  912     my $self = shift;
  913 
  914     # the memory location $rLL and number of tokens should be obtained
  915     # from this routine so that any autovivication can be immediately caught.
  916     my $rLL    = $self->{rLL};
  917     my $Klimit = $self->{Klimit};
  918     if ( !defined($rLL) ) {
  919 
  920         # Shouldn't happen because rLL was initialized to be an array ref
  921         Fault("Undefined Memory rLL");
  922     }
  923     my $num = @{$rLL};
  924     if (   $num == 0 && defined($Klimit)
  925         || $num > 0 && !defined($Klimit)
  926         || $num > 0 && $Klimit != $num - 1 )
  927     {
  928 
  929         # Possible autovivification problem...
  930         if ( !defined($Klimit) ) { $Klimit = '*' }
  931         Fault("Error getting rLL: Memory items=$num and Klimit=$Klimit");
  932     }
  933     return ($Klimit);
  934 }
  935 
  936 sub prepare_for_new_input_lines {
  937 
  938     # Remember the largest batch size processed. This is needed
  939     # by the pad routine to avoid padding the first nonblank token
  940     if ( $max_index_to_go && $max_index_to_go > $peak_batch_size ) {
  941         $peak_batch_size = $max_index_to_go;
  942     }
  943 
  944     $gnu_sequence_number++;    # increment output batch counter
  945     %last_gnu_equals                = ();
  946     %gnu_comma_count                = ();
  947     %gnu_arrow_count                = ();
  948     $line_start_index_to_go         = 0;
  949     $max_gnu_item_index             = UNDEFINED_INDEX;
  950     $index_max_forced_break         = UNDEFINED_INDEX;
  951     $max_index_to_go                = UNDEFINED_INDEX;
  952     $last_nonblank_index_to_go      = UNDEFINED_INDEX;
  953     $last_nonblank_type_to_go       = '';
  954     $last_nonblank_token_to_go      = '';
  955     $last_last_nonblank_index_to_go = UNDEFINED_INDEX;
  956     $last_last_nonblank_type_to_go  = '';
  957     $last_last_nonblank_token_to_go = '';
  958     $forced_breakpoint_count        = 0;
  959     $forced_breakpoint_undo_count   = 0;
  960     $rbrace_follower                = undef;
  961     $summed_lengths_to_go[0]        = 0;
  962     $comma_count_in_batch           = 0;
  963     $starting_in_quote              = 0;
  964 
  965     destroy_one_line_block();
  966     return;
  967 }
  968 
  969 sub keyword_group_scan {
  970     my $self = shift;
  971 
  972     # Manipulate blank lines around keyword groups (kgb* flags)
  973     # Scan all lines looking for runs of consecutive lines beginning with
  974     # selected keywords.  Example keywords are 'my', 'our', 'local', ... but
  975     # they may be anything.  We will set flags requesting that blanks be
  976     # inserted around and within them according to input parameters.  Note
  977     # that we are scanning the lines as they came in in the input stream, so
  978     # they are not necessarily well formatted.
  979 
  980     # The output of this sub is a return hash ref whose keys are the indexes of
  981     # lines after which we desire a blank line.  For line index i:
  982     #     $rhash_of_desires->{$i} = 1 means we want a blank line AFTER line $i
  983     #     $rhash_of_desires->{$i} = 2 means we want blank line $i removed
  984     my $rhash_of_desires = {};
  985 
  986     my $Opt_blanks_before = $rOpts->{'keyword-group-blanks-before'};   # '-kgbb'
  987     my $Opt_blanks_after  = $rOpts->{'keyword-group-blanks-after'};    # '-kgba'
  988     my $Opt_blanks_inside = $rOpts->{'keyword-group-blanks-inside'};   # '-kgbi'
  989     my $Opt_blanks_delete = $rOpts->{'keyword-group-blanks-delete'};   # '-kgbd'
  990     my $Opt_size          = $rOpts->{'keyword-group-blanks-size'};     # '-kgbs'
  991 
  992     # A range of sizes can be input with decimal notation like 'min.max' with
  993     # any number of dots between the two numbers. Examples:
  994     #    string    =>    min    max  matches
  995     #    1.1             1      1    exactly 1
  996     #    1.3             1      3    1,2, or 3
  997     #    1..3            1      3    1,2, or 3
  998     #    5               5      -    5 or more
  999     #    6.              6      -    6 or more
 1000     #    .2              -      2    up to 2
 1001     #    1.0             1      0    nothing
 1002     my ( $Opt_size_min, $Opt_size_max ) = split /\.+/, $Opt_size;
 1003     if (   $Opt_size_min && $Opt_size_min !~ /^\d+$/
 1004         || $Opt_size_max && $Opt_size_max !~ /^\d+$/ )
 1005     {
 1006         Warn(<<EOM);
 1007 Unexpected value for -kgbs: '$Opt_size'; expecting 'min' or 'min.max'; 
 1008 ignoring all -kgb flags
 1009 EOM
 1010         return $rhash_of_desires;
 1011     }
 1012     $Opt_size_min = 1 unless ($Opt_size_min);
 1013 
 1014     if ( $Opt_size_max && $Opt_size_max < $Opt_size_min ) {
 1015         return $rhash_of_desires;
 1016     }
 1017 
 1018     # codes for $Opt_blanks_before and $Opt_blanks_after:
 1019     # 0 = never (delete if exist)
 1020     # 1 = stable (keep unchanged)
 1021     # 2 = always (insert if missing)
 1022 
 1023     return $rhash_of_desires
 1024       unless $Opt_size_min > 0
 1025       && ( $Opt_blanks_before != 1
 1026         || $Opt_blanks_after != 1
 1027         || $Opt_blanks_inside
 1028         || $Opt_blanks_delete );
 1029 
 1030     my $Opt_pattern         = $keyword_group_list_pattern;
 1031     my $Opt_comment_pattern = $keyword_group_list_comment_pattern;
 1032     my $Opt_repeat_count =
 1033       $rOpts->{'keyword-group-blanks-repeat-count'};    # '-kgbr'
 1034 
 1035     my $rlines              = $self->{rlines};
 1036     my $rLL                 = $self->{rLL};
 1037     my $K_closing_container = $self->{K_closing_container};
 1038 
 1039     # variables for the current group and subgroups:
 1040     my ( $ibeg, $iend, $count, $level_beg, $K_closing, @iblanks, @group,
 1041         @subgroup );
 1042 
 1043     # Definitions:
 1044     # ($ibeg, $iend) = starting and ending line indexes of this entire group
 1045     #         $count = total number of keywords seen in this entire group
 1046     #     $level_beg = indententation level of this group
 1047     #         @group = [ $i, $token, $count ] =list of all keywords & blanks
 1048     #      @subgroup =  $j, index of group where token changes
 1049     #       @iblanks = line indexes of blank lines in input stream in this group
 1050     #  where i=starting line index
 1051     #        token (the keyword)
 1052     #        count = number of this token in this subgroup
 1053     #            j = index in group where token changes
 1054     #
 1055     # These vars will contain values for the most recently seen line:
 1056     my ( $line_type, $CODE_type, $K_first, $K_last );
 1057 
 1058     my $number_of_groups_seen = 0;
 1059 
 1060     ####################
 1061     # helper subroutines
 1062     ####################
 1063 
 1064     my $insert_blank_after = sub {
 1065         my ($i) = @_;
 1066         $rhash_of_desires->{$i} = 1;
 1067         my $ip = $i + 1;
 1068         if ( defined( $rhash_of_desires->{$ip} )
 1069             && $rhash_of_desires->{$ip} == 2 )
 1070         {
 1071             $rhash_of_desires->{$ip} = 0;
 1072         }
 1073         return;
 1074     };
 1075 
 1076     my $split_into_sub_groups = sub {
 1077 
 1078         # place blanks around long sub-groups of keywords
 1079         # ...if requested
 1080         return unless ($Opt_blanks_inside);
 1081 
 1082         # loop over sub-groups, index k
 1083         push @subgroup, scalar @group;
 1084         my $kbeg = 1;
 1085         my $kend = @subgroup - 1;
 1086         for ( my $k = $kbeg ; $k <= $kend ; $k++ ) {
 1087 
 1088             # index j runs through all keywords found
 1089             my $j_b = $subgroup[ $k - 1 ];
 1090             my $j_e = $subgroup[$k] - 1;
 1091 
 1092             # index i is the actual line number of a keyword
 1093             my ( $i_b, $tok_b, $count_b ) = @{ $group[$j_b] };
 1094             my ( $i_e, $tok_e, $count_e ) = @{ $group[$j_e] };
 1095             my $num = $count_e - $count_b + 1;
 1096 
 1097             # This subgroup runs from line $ib to line $ie-1, but may contain
 1098             # blank lines
 1099             if ( $num >= $Opt_size_min ) {
 1100 
 1101                 # if there are blank lines, we require that at least $num lines
 1102                 # be non-blank up to the boundary with the next subgroup.
 1103                 my $nog_b = my $nog_e = 1;
 1104                 if ( @iblanks && !$Opt_blanks_delete ) {
 1105                     my $j_bb = $j_b + $num - 1;
 1106                     my ( $i_bb, $tok_bb, $count_bb ) = @{ $group[$j_bb] };
 1107                     $nog_b = $count_bb - $count_b + 1 == $num;
 1108 
 1109                     my $j_ee = $j_e - ( $num - 1 );
 1110                     my ( $i_ee, $tok_ee, $count_ee ) = @{ $group[$j_ee] };
 1111                     $nog_e = $count_e - $count_ee + 1 == $num;
 1112                 }
 1113                 if ( $nog_b && $k > $kbeg ) {
 1114                     $insert_blank_after->( $i_b - 1 );
 1115                 }
 1116                 if ( $nog_e && $k < $kend ) {
 1117                     my ( $i_ep, $tok_ep, $count_ep ) = @{ $group[ $j_e + 1 ] };
 1118                     $insert_blank_after->( $i_ep - 1 );
 1119                 }
 1120             }
 1121         }
 1122     };
 1123 
 1124     my $delete_if_blank = sub {
 1125         my ($i) = @_;
 1126 
 1127         # delete line $i if it is blank
 1128         return unless ( $i >= 0 && $i < @{$rlines} );
 1129         my $line_type = $rlines->[$i]->{_line_type};
 1130         return if ( $line_type ne 'CODE' );
 1131         my $code_type = $rlines->[$i]->{_code_type};
 1132         if ( $code_type eq 'BL' ) { $rhash_of_desires->{$i} = 2; }
 1133         return;
 1134     };
 1135 
 1136     my $delete_inner_blank_lines = sub {
 1137 
 1138         # always remove unwanted trailing blank lines from our list
 1139         return unless (@iblanks);
 1140         while ( my $ibl = pop(@iblanks) ) {
 1141             if ( $ibl < $iend ) { push @iblanks, $ibl; last }
 1142             $iend = $ibl;
 1143         }
 1144 
 1145         # now mark mark interior blank lines for deletion if requested
 1146         return unless ($Opt_blanks_delete);
 1147 
 1148         while ( my $ibl = pop(@iblanks) ) { $rhash_of_desires->{$ibl} = 2 }
 1149 
 1150     };
 1151 
 1152     my $end_group = sub {
 1153 
 1154         # end a group of keywords
 1155         my ($bad_ending) = @_;
 1156         if ( defined($ibeg) && $ibeg >= 0 ) {
 1157 
 1158             # then handle sufficiently large groups
 1159             if ( $count >= $Opt_size_min ) {
 1160 
 1161                 $number_of_groups_seen++;
 1162 
 1163                 # do any blank deletions regardless of the count
 1164                 $delete_inner_blank_lines->();
 1165 
 1166                 if ( $ibeg > 0 ) {
 1167                     my $code_type = $rlines->[ $ibeg - 1 ]->{_code_type};
 1168 
 1169                     # patch for hash bang line which is not currently marked as
 1170                     # a comment; mark it as a comment
 1171                     if ( $ibeg == 1 && !$code_type ) {
 1172                         my $line_text = $rlines->[ $ibeg - 1 ]->{_line_text};
 1173                         $code_type = 'BC'
 1174                           if ( $line_text && $line_text =~ /^#/ );
 1175                     }
 1176 
 1177                     # Do not insert a blank after a comment
 1178                     # (this could be subject to a flag in the future)
 1179                     if ( $code_type !~ /(BC|SBC|SBCX)/ ) {
 1180                         if ( $Opt_blanks_before == INSERT ) {
 1181                             $insert_blank_after->( $ibeg - 1 );
 1182 
 1183                         }
 1184                         elsif ( $Opt_blanks_before == DELETE ) {
 1185                             $delete_if_blank->( $ibeg - 1 );
 1186                         }
 1187                     }
 1188                 }
 1189 
 1190                 # We will only put blanks before code lines. We could loosen
 1191                 # this rule a little, but we have to be very careful because
 1192                 # for example we certainly don't want to drop a blank line
 1193                 # after a line like this:
 1194                 #   my $var = <<EOM;
 1195                 if ( $line_type eq 'CODE' && defined($K_first) ) {
 1196 
 1197                     # - Do not put a blank before a line of different level
 1198                     # - Do not put a blank line if we ended the search badly
 1199                     # - Do not put a blank at the end of the file
 1200                     # - Do not put a blank line before a hanging side comment
 1201                     my $level    = $rLL->[$K_first]->[_LEVEL_];
 1202                     my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
 1203 
 1204                     if (   $level == $level_beg
 1205                         && $ci_level == 0
 1206                         && !$bad_ending
 1207                         && $iend < @{$rlines}
 1208                         && $CODE_type ne 'HSC' )
 1209                     {
 1210                         if ( $Opt_blanks_after == INSERT ) {
 1211                             $insert_blank_after->($iend);
 1212                         }
 1213                         elsif ( $Opt_blanks_after == DELETE ) {
 1214                             $delete_if_blank->( $iend + 1 );
 1215                         }
 1216                     }
 1217                 }
 1218             }
 1219             $split_into_sub_groups->();
 1220         }
 1221 
 1222         # reset for another group
 1223         $ibeg      = -1;
 1224         $iend      = undef;
 1225         $level_beg = -1;
 1226         $K_closing = undef;
 1227         @group     = ();
 1228         @subgroup  = ();
 1229         @iblanks   = ();
 1230     };
 1231 
 1232     my $find_container_end = sub {
 1233 
 1234         # If the keyword lines ends with an open token, find the closing token
 1235         # '$K_closing' so that we can easily skip past the contents of the
 1236         # container.
 1237         return if ( $K_last <= $K_first );
 1238         my $KK        = $K_last;
 1239         my $type_last = $rLL->[$KK]->[_TYPE_];
 1240         my $tok_last  = $rLL->[$KK]->[_TOKEN_];
 1241         if ( $type_last eq '#' ) {
 1242             $KK       = $self->K_previous_nonblank($KK);
 1243             $tok_last = $rLL->[$KK]->[_TOKEN_];
 1244         }
 1245         if ( $KK > $K_first && $tok_last =~ /^[\(\{\[]$/ ) {
 1246 
 1247             my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
 1248             my $lev           = $rLL->[$KK]->[_LEVEL_];
 1249             if ( $lev == $level_beg ) {
 1250                 $K_closing = $K_closing_container->{$type_sequence};
 1251             }
 1252         }
 1253     };
 1254 
 1255     my $add_to_group = sub {
 1256         my ( $i, $token, $level ) = @_;
 1257 
 1258         # End the previous group if we have reached the maximum
 1259         # group size
 1260         if ( $Opt_size_max && @group >= $Opt_size_max ) {
 1261             $end_group->();
 1262         }
 1263 
 1264         if ( @group == 0 ) {
 1265             $ibeg      = $i;
 1266             $level_beg = $level;
 1267             $count     = 0;
 1268         }
 1269 
 1270         $count++;
 1271         $iend = $i;
 1272 
 1273         # New sub-group?
 1274         if ( !@group || $token ne $group[-1]->[1] ) {
 1275             push @subgroup, scalar(@group);
 1276         }
 1277         push @group, [ $i, $token, $count ];
 1278 
 1279         # remember if this line ends in an open container
 1280         $find_container_end->();
 1281 
 1282         return;
 1283     };
 1284 
 1285     ###################################
 1286     # loop over all lines of the source
 1287     ###################################
 1288     $end_group->();
 1289     my $i = -1;
 1290     foreach my $line_of_tokens ( @{$rlines} ) {
 1291 
 1292         $i++;
 1293         last
 1294           if ( $Opt_repeat_count > 0
 1295             && $number_of_groups_seen >= $Opt_repeat_count );
 1296 
 1297         $CODE_type = "";
 1298         $K_first   = undef;
 1299         $K_last    = undef;
 1300         $line_type = $line_of_tokens->{_line_type};
 1301 
 1302         # always end a group at non-CODE
 1303         if ( $line_type ne 'CODE' ) { $end_group->(); next }
 1304 
 1305         $CODE_type = $line_of_tokens->{_code_type};
 1306 
 1307         # end any group at a format skipping line
 1308         if ( $CODE_type && $CODE_type eq 'FS' ) {
 1309             $end_group->();
 1310             next;
 1311         }
 1312 
 1313         # continue in a verbatim (VB) type; it may be quoted text
 1314         if ( $CODE_type eq 'VB' ) {
 1315             if ( $ibeg >= 0 ) { $iend = $i; }
 1316             next;
 1317         }
 1318 
 1319         # and continue in blank (BL) types
 1320         if ( $CODE_type eq 'BL' ) {
 1321             if ( $ibeg >= 0 ) {
 1322                 $iend = $i;
 1323                 push @{iblanks}, $i;
 1324 
 1325                 # propagate current subgroup token
 1326                 my $tok = $group[-1]->[1];
 1327                 push @group, [ $i, $tok, $count ];
 1328             }
 1329             next;
 1330         }
 1331 
 1332         # examine the first token of this line
 1333         my $rK_range = $line_of_tokens->{_rK_range};
 1334         ( $K_first, $K_last ) = @{$rK_range};
 1335         if ( !defined($K_first) ) {
 1336 
 1337             # Unexpected blank line..shouldn't happen
 1338             # $rK_range should be defined for line type CODE
 1339             Warn(
 1340 "Programming Error: Unexpected Blank Line in sub 'keyword_group_scan'. Ignoring"
 1341             );
 1342             return $rhash_of_desires;
 1343         }
 1344 
 1345         my $level    = $rLL->[$K_first]->[_LEVEL_];
 1346         my $type     = $rLL->[$K_first]->[_TYPE_];
 1347         my $token    = $rLL->[$K_first]->[_TOKEN_];
 1348         my $ci_level = $rLL->[$K_first]->[_CI_LEVEL_];
 1349 
 1350         # see if this is a code type we seek (i.e. comment)
 1351         if (   $CODE_type
 1352             && $Opt_comment_pattern
 1353             && $CODE_type =~ /$Opt_comment_pattern/o )
 1354         {
 1355 
 1356             my $tok = $CODE_type;
 1357 
 1358             # Continuing a group
 1359             if ( $ibeg >= 0 && $level == $level_beg ) {
 1360                 $add_to_group->( $i, $tok, $level );
 1361             }
 1362 
 1363             # Start new group
 1364             else {
 1365 
 1366                 # first end old group if any; we might be starting new
 1367                 # keywords at different level
 1368                 if ( $ibeg > 0 ) { $end_group->(); }
 1369                 $add_to_group->( $i, $tok, $level );
 1370             }
 1371             next;
 1372         }
 1373 
 1374         # See if it is a keyword we seek, but never start a group in a
 1375         # continuation line; the code may be badly formatted.
 1376         if (   $ci_level == 0
 1377             && $type eq 'k'
 1378             && $token =~ /$Opt_pattern/o )
 1379         {
 1380 
 1381             # Continuing a keyword group
 1382             if ( $ibeg >= 0 && $level == $level_beg ) {
 1383                 $add_to_group->( $i, $token, $level );
 1384             }
 1385 
 1386             # Start new keyword group
 1387             else {
 1388 
 1389                 # first end old group if any; we might be starting new
 1390                 # keywords at different level
 1391                 if ( $ibeg > 0 ) { $end_group->(); }
 1392                 $add_to_group->( $i, $token, $level );
 1393             }
 1394             next;
 1395         }
 1396 
 1397         # This is not one of our keywords, but we are in a keyword group
 1398         # so see if we should continue or quit
 1399         elsif ( $ibeg >= 0 ) {
 1400 
 1401             # - bail out on a large level change; we may have walked into a
 1402             #   data structure or anoymous sub code.
 1403             if ( $level > $level_beg + 1 || $level < $level_beg ) {
 1404                 $end_group->();
 1405                 next;
 1406             }
 1407 
 1408             # - keep going on a continuation line of the same level, since
 1409             #   it is probably a continuation of our previous keyword,
 1410             # - and keep going past hanging side comments because we never
 1411             #   want to interrupt them.
 1412             if ( ( ( $level == $level_beg ) && $ci_level > 0 )
 1413                 || $CODE_type eq 'HSC' )
 1414             {
 1415                 $iend = $i;
 1416                 next;
 1417             }
 1418 
 1419             # - continue if if we are within in a container which started with
 1420             # the line of the previous keyword.
 1421             if ( defined($K_closing) && $K_first <= $K_closing ) {
 1422 
 1423                 # continue if entire line is within container
 1424                 if ( $K_last <= $K_closing ) { $iend = $i; next }
 1425 
 1426                 # continue at ); or }; or ];
 1427                 my $KK = $K_closing + 1;
 1428                 if ( $rLL->[$KK]->[_TYPE_] eq ';' ) {
 1429                     if ( $KK < $K_last ) {
 1430                         if ( $rLL->[ ++$KK ]->[_TYPE_] eq 'b' ) { ++$KK }
 1431                         if ( $KK > $K_last || $rLL->[$KK]->[_TYPE_] ne '#' ) {
 1432                             $end_group->(1);
 1433                             next;
 1434                         }
 1435                     }
 1436                     $iend = $i;
 1437                     next;
 1438                 }
 1439 
 1440                 $end_group->(1);
 1441                 next;
 1442             }
 1443 
 1444             # - end the group if none of the above
 1445             $end_group->();
 1446             next;
 1447         }
 1448 
 1449         # not in a keyword group; continue
 1450         else { next }
 1451     }
 1452 
 1453     # end of loop over all lines
 1454     $end_group->();
 1455     return $rhash_of_desires;
 1456 }
 1457 
 1458 sub break_lines {
 1459 
 1460     # Loop over old lines to set new line break points
 1461 
 1462     my $self   = shift;
 1463     my $rlines = $self->{rlines};
 1464 
 1465     # Note for RT#118553, leave only one newline at the end of a file.
 1466     # Example code to do this is in comments below:
 1467     # my $Opt_trim_ending_blank_lines = 0;
 1468     # if ($Opt_trim_ending_blank_lines) {
 1469     #     while ( my $line_of_tokens = pop @{$rlines} ) {
 1470     #         my $line_type = $line_of_tokens->{_line_type};
 1471     #         if ( $line_type eq 'CODE' ) {
 1472     #             my $CODE_type = $line_of_tokens->{_code_type};
 1473     #             next if ( $CODE_type eq 'BL' );
 1474     #         }
 1475     #         push @{$rlines}, $line_of_tokens;
 1476     #         last;
 1477     #     }
 1478     # }
 1479 
 1480    # But while this would be a trivial update, it would have very undesirable
 1481    # side effects when perltidy is run from within an editor on a small snippet.
 1482    # So this is best done with a separate filter, such
 1483    # as 'delete_ending_blank_lines.pl' in the examples folder.
 1484 
 1485     # Flag to prevent blank lines when POD occurs in a format skipping sect.
 1486     my $in_format_skipping_section;
 1487 
 1488     # set locations for blanks around long runs of keywords
 1489     my $rwant_blank_line_after = $self->keyword_group_scan();
 1490 
 1491     my $line_type = "";
 1492     my $i         = -1;
 1493     foreach my $line_of_tokens ( @{$rlines} ) {
 1494         $i++;
 1495 
 1496         # insert blank lines requested for keyword sequences
 1497         if (   $i > 0
 1498             && defined( $rwant_blank_line_after->{ $i - 1 } )
 1499             && $rwant_blank_line_after->{ $i - 1 } == 1 )
 1500         {
 1501             $self->want_blank_line();
 1502         }
 1503 
 1504         my $last_line_type = $line_type;
 1505         $line_type = $line_of_tokens->{_line_type};
 1506         my $input_line = $line_of_tokens->{_line_text};
 1507 
 1508         # _line_type codes are:
 1509         #   SYSTEM         - system-specific code before hash-bang line
 1510         #   CODE           - line of perl code (including comments)
 1511         #   POD_START      - line starting pod, such as '=head'
 1512         #   POD            - pod documentation text
 1513         #   POD_END        - last line of pod section, '=cut'
 1514         #   HERE           - text of here-document
 1515         #   HERE_END       - last line of here-doc (target word)
 1516         #   FORMAT         - format section
 1517         #   FORMAT_END     - last line of format section, '.'
 1518         #   DATA_START     - __DATA__ line
 1519         #   DATA           - unidentified text following __DATA__
 1520         #   END_START      - __END__ line
 1521         #   END            - unidentified text following __END__
 1522         #   ERROR          - we are in big trouble, probably not a perl script
 1523 
 1524         # put a blank line after an =cut which comes before __END__ and __DATA__
 1525         # (required by podchecker)
 1526         if ( $last_line_type eq 'POD_END' && !$saw_END_or_DATA_ ) {
 1527             $file_writer_object->reset_consecutive_blank_lines();
 1528             if ( !$in_format_skipping_section && $input_line !~ /^\s*$/ ) {
 1529                 $self->want_blank_line();
 1530             }
 1531         }
 1532 
 1533         # handle line of code..
 1534         if ( $line_type eq 'CODE' ) {
 1535 
 1536             my $CODE_type = $line_of_tokens->{_code_type};
 1537             $in_format_skipping_section = $CODE_type eq 'FS';
 1538 
 1539             # Handle blank lines
 1540             if ( $CODE_type eq 'BL' ) {
 1541 
 1542                 # If keep-old-blank-lines is zero, we delete all
 1543                 # old blank lines and let the blank line rules generate any
 1544                 # needed blanks.
 1545 
 1546                 # We also delete lines requested by the keyword-group logic
 1547                 my $kgb_keep = !( defined( $rwant_blank_line_after->{$i} )
 1548                     && $rwant_blank_line_after->{$i} == 2 );
 1549 
 1550                 # But the keep-old-blank-lines flag has priority over kgb flags
 1551                 $kgb_keep = 1 if ( $rOpts_keep_old_blank_lines == 2 );
 1552 
 1553                 if ( $rOpts_keep_old_blank_lines && $kgb_keep ) {
 1554                     $self->flush();
 1555                     $file_writer_object->write_blank_code_line(
 1556                         $rOpts_keep_old_blank_lines == 2 );
 1557                     $last_line_leading_type = 'b';
 1558                 }
 1559                 next;
 1560             }
 1561             else {
 1562 
 1563                 # let logger see all non-blank lines of code
 1564                 my $output_line_number = get_output_line_number();
 1565                 black_box( $line_of_tokens, $output_line_number );
 1566             }
 1567 
 1568             # Handle Format Skipping (FS) and Verbatim (VB) Lines
 1569             if ( $CODE_type eq 'VB' || $CODE_type eq 'FS' ) {
 1570                 $self->write_unindented_line("$input_line");
 1571                 $file_writer_object->reset_consecutive_blank_lines();
 1572                 next;
 1573             }
 1574 
 1575             # Handle block comment to be deleted
 1576             elsif ( $CODE_type eq 'DEL' ) {
 1577                 $self->flush();
 1578                 next;
 1579             }
 1580 
 1581             # Handle all other lines of code
 1582             $self->print_line_of_tokens($line_of_tokens);
 1583         }
 1584 
 1585         # handle line of non-code..
 1586         else {
 1587 
 1588             # set special flags
 1589             my $skip_line = 0;
 1590             my $tee_line  = 0;
 1591             if ( $line_type =~ /^POD/ ) {
 1592 
 1593                 # Pod docs should have a preceding blank line.  But stay
 1594                 # out of __END__ and __DATA__ sections, because
 1595                 # the user may be using this section for any purpose whatsoever
 1596                 if ( $rOpts->{'delete-pod'} ) { $skip_line = 1; }
 1597                 if ( $rOpts->{'tee-pod'} )    { $tee_line = 1; }
 1598                 if ( $rOpts->{'trim-pod'} )   { $input_line =~ s/\s+$// }
 1599                 if (   !$skip_line
 1600                     && !$in_format_skipping_section
 1601                     && $line_type eq 'POD_START'
 1602                     && !$saw_END_or_DATA_ )
 1603                 {
 1604                     $self->want_blank_line();
 1605                 }
 1606             }
 1607 
 1608             # leave the blank counters in a predictable state
 1609             # after __END__ or __DATA__
 1610             elsif ( $line_type =~ /^(END_START|DATA_START)$/ ) {
 1611                 $file_writer_object->reset_consecutive_blank_lines();
 1612                 $saw_END_or_DATA_ = 1;
 1613             }
 1614 
 1615             # write unindented non-code line
 1616             if ( !$skip_line ) {
 1617                 if ($tee_line) { $file_writer_object->tee_on() }
 1618                 $self->write_unindented_line($input_line);
 1619                 if ($tee_line) { $file_writer_object->tee_off() }
 1620             }
 1621         }
 1622     }
 1623     return;
 1624 }
 1625 
 1626 {    ## Beginning of routine to check line hashes
 1627 
 1628     my %valid_line_hash;
 1629 
 1630     BEGIN {
 1631 
 1632         # These keys are defined for each line in the formatter
 1633         # Each line must have exactly these quantities
 1634         my @valid_line_keys = qw(
 1635           _curly_brace_depth
 1636           _ending_in_quote
 1637           _guessed_indentation_level
 1638           _line_number
 1639           _line_text
 1640           _line_type
 1641           _paren_depth
 1642           _quote_character
 1643           _rK_range
 1644           _square_bracket_depth
 1645           _starting_in_quote
 1646           _ended_in_blank_token
 1647           _code_type
 1648 
 1649           _ci_level_0
 1650           _level_0
 1651           _nesting_blocks_0
 1652           _nesting_tokens_0
 1653         );
 1654 
 1655         @valid_line_hash{@valid_line_keys} = (1) x scalar(@valid_line_keys);
 1656     }
 1657 
 1658     sub check_line_hashes {
 1659         my $self = shift;
 1660         $self->check_self_hash();
 1661         my $rlines = $self->{rlines};
 1662         foreach my $rline ( @{$rlines} ) {
 1663             my $iline     = $rline->{_line_number};
 1664             my $line_type = $rline->{_line_type};
 1665             check_keys( $rline, \%valid_line_hash,
 1666                 "Checkpoint: line number =$iline,  line_type=$line_type", 1 );
 1667         }
 1668         return;
 1669     }
 1670 
 1671 }    ## End check line hashes
 1672 
 1673 sub write_line {
 1674 
 1675     # We are caching tokenized lines as they arrive and converting them to the
 1676     # format needed for the final formatting.
 1677     my ( $self, $line_of_tokens_old ) = @_;
 1678     my $rLL        = $self->{rLL};
 1679     my $Klimit     = $self->{Klimit};
 1680     my $rlines_new = $self->{rlines};
 1681 
 1682     my $Kfirst;
 1683     my $line_of_tokens = {};
 1684     foreach my $key (
 1685         qw(
 1686         _curly_brace_depth
 1687         _ending_in_quote
 1688         _guessed_indentation_level
 1689         _line_number
 1690         _line_text
 1691         _line_type
 1692         _paren_depth
 1693         _quote_character
 1694         _square_bracket_depth
 1695         _starting_in_quote
 1696         )
 1697       )
 1698     {
 1699         $line_of_tokens->{$key} = $line_of_tokens_old->{$key};
 1700     }
 1701 
 1702     # Data needed by Logger
 1703     $line_of_tokens->{_level_0}          = 0;
 1704     $line_of_tokens->{_ci_level_0}       = 0;
 1705     $line_of_tokens->{_nesting_blocks_0} = "";
 1706     $line_of_tokens->{_nesting_tokens_0} = "";
 1707 
 1708     # Needed to avoid trimming quotes
 1709     $line_of_tokens->{_ended_in_blank_token} = undef;
 1710 
 1711     my $line_type     = $line_of_tokens_old->{_line_type};
 1712     my $input_line_no = $line_of_tokens_old->{_line_number} - 1;
 1713     if ( $line_type eq 'CODE' ) {
 1714 
 1715         my $rtokens         = $line_of_tokens_old->{_rtokens};
 1716         my $rtoken_type     = $line_of_tokens_old->{_rtoken_type};
 1717         my $rblock_type     = $line_of_tokens_old->{_rblock_type};
 1718         my $rcontainer_type = $line_of_tokens_old->{_rcontainer_type};
 1719         my $rcontainer_environment =
 1720           $line_of_tokens_old->{_rcontainer_environment};
 1721         my $rtype_sequence  = $line_of_tokens_old->{_rtype_sequence};
 1722         my $rlevels         = $line_of_tokens_old->{_rlevels};
 1723         my $rslevels        = $line_of_tokens_old->{_rslevels};
 1724         my $rci_levels      = $line_of_tokens_old->{_rci_levels};
 1725         my $rnesting_blocks = $line_of_tokens_old->{_rnesting_blocks};
 1726         my $rnesting_tokens = $line_of_tokens_old->{_rnesting_tokens};
 1727 
 1728         my $jmax = @{$rtokens} - 1;
 1729         if ( $jmax >= 0 ) {
 1730             $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
 1731             foreach my $j ( 0 .. $jmax ) {
 1732 
 1733                 # Clip negative nesting depths to zero to avoid problems.
 1734                 # Negative values can occur in files with unbalanced containers
 1735                 my $slevel = $rslevels->[$j];
 1736                 if ( $slevel < 0 ) { $slevel = 0 }
 1737 
 1738                 my @tokary;
 1739                 @tokary[
 1740                   _TOKEN_,                 _TYPE_,
 1741                   _BLOCK_TYPE_,            _CONTAINER_TYPE_,
 1742                   _CONTAINER_ENVIRONMENT_, _TYPE_SEQUENCE_,
 1743                   _LEVEL_,                 _LEVEL_TRUE_,
 1744                   _SLEVEL_,                _CI_LEVEL_,
 1745                   _LINE_INDEX_,
 1746                   ]
 1747                   = (
 1748                     $rtokens->[$j],                $rtoken_type->[$j],
 1749                     $rblock_type->[$j],            $rcontainer_type->[$j],
 1750                     $rcontainer_environment->[$j], $rtype_sequence->[$j],
 1751                     $rlevels->[$j],                $rlevels->[$j],
 1752                     $slevel,                       $rci_levels->[$j],
 1753                     $input_line_no,
 1754                   );
 1755                 push @{$rLL}, \@tokary;
 1756             }
 1757 
 1758             $Klimit = @{$rLL} - 1;
 1759 
 1760             # Need to remember if we can trim the input line
 1761             $line_of_tokens->{_ended_in_blank_token} =
 1762               $rtoken_type->[$jmax] eq 'b';
 1763 
 1764             $line_of_tokens->{_level_0}          = $rlevels->[0];
 1765             $line_of_tokens->{_ci_level_0}       = $rci_levels->[0];
 1766             $line_of_tokens->{_nesting_blocks_0} = $rnesting_blocks->[0];
 1767             $line_of_tokens->{_nesting_tokens_0} = $rnesting_tokens->[0];
 1768         }
 1769     }
 1770 
 1771     $line_of_tokens->{_rK_range}  = [ $Kfirst, $Klimit ];
 1772     $line_of_tokens->{_code_type} = "";
 1773     $self->{Klimit}               = $Klimit;
 1774 
 1775     push @{$rlines_new}, $line_of_tokens;
 1776     return;
 1777 }
 1778 
 1779 sub initialize_whitespace_hashes {
 1780 
 1781     # initialize these global hashes, which control the use of
 1782     # whitespace around tokens:
 1783     #
 1784     # %binary_ws_rules
 1785     # %want_left_space
 1786     # %want_right_space
 1787     # %space_after_keyword
 1788     #
 1789     # Many token types are identical to the tokens themselves.
 1790     # See the tokenizer for a complete list. Here are some special types:
 1791     #   k = perl keyword
 1792     #   f = semicolon in for statement
 1793     #   m = unary minus
 1794     #   p = unary plus
 1795     # Note that :: is excluded since it should be contained in an identifier
 1796     # Note that '->' is excluded because it never gets space
 1797     # parentheses and brackets are excluded since they are handled specially
 1798     # curly braces are included but may be overridden by logic, such as
 1799     # newline logic.
 1800 
 1801     # NEW_TOKENS: create a whitespace rule here.  This can be as
 1802     # simple as adding your new letter to @spaces_both_sides, for
 1803     # example.
 1804 
 1805     my @opening_type = qw< L { ( [ >;
 1806     @is_opening_type{@opening_type} = (1) x scalar(@opening_type);
 1807 
 1808     my @closing_type = qw< R } ) ] >;
 1809     @is_closing_type{@closing_type} = (1) x scalar(@closing_type);
 1810 
 1811     my @spaces_both_sides = qw#
 1812       + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
 1813       .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
 1814       &&= ||= //= <=> A k f w F n C Y U G v
 1815       #;
 1816 
 1817     my @spaces_left_side = qw<
 1818       t ! ~ m p { \ h pp mm Z j
 1819     >;
 1820     push( @spaces_left_side, '#' );    # avoids warning message
 1821 
 1822     my @spaces_right_side = qw<
 1823       ; } ) ] R J ++ -- **=
 1824     >;
 1825     push( @spaces_right_side, ',' );    # avoids warning message
 1826 
 1827     # Note that we are in a BEGIN block here.  Later in processing
 1828     # the values of %want_left_space and  %want_right_space
 1829     # may be overridden by any user settings specified by the
 1830     # -wls and -wrs parameters.  However the binary_whitespace_rules
 1831     # are hardwired and have priority.
 1832     @want_left_space{@spaces_both_sides} =
 1833       (1) x scalar(@spaces_both_sides);
 1834     @want_right_space{@spaces_both_sides} =
 1835       (1) x scalar(@spaces_both_sides);
 1836     @want_left_space{@spaces_left_side} =
 1837       (1) x scalar(@spaces_left_side);
 1838     @want_right_space{@spaces_left_side} =
 1839       (-1) x scalar(@spaces_left_side);
 1840     @want_left_space{@spaces_right_side} =
 1841       (-1) x scalar(@spaces_right_side);
 1842     @want_right_space{@spaces_right_side} =
 1843       (1) x scalar(@spaces_right_side);
 1844     $want_left_space{'->'}      = WS_NO;
 1845     $want_right_space{'->'}     = WS_NO;
 1846     $want_left_space{'**'}      = WS_NO;
 1847     $want_right_space{'**'}     = WS_NO;
 1848     $want_right_space{'CORE::'} = WS_NO;
 1849 
 1850     # These binary_ws_rules are hardwired and have priority over the above
 1851     # settings.  It would be nice to allow adjustment by the user,
 1852     # but it would be complicated to specify.
 1853     #
 1854     # hash type information must stay tightly bound
 1855     # as in :  ${xxxx}
 1856     $binary_ws_rules{'i'}{'L'} = WS_NO;
 1857     $binary_ws_rules{'i'}{'{'} = WS_YES;
 1858     $binary_ws_rules{'k'}{'{'} = WS_YES;
 1859     $binary_ws_rules{'U'}{'{'} = WS_YES;
 1860     $binary_ws_rules{'i'}{'['} = WS_NO;
 1861     $binary_ws_rules{'R'}{'L'} = WS_NO;
 1862     $binary_ws_rules{'R'}{'{'} = WS_NO;
 1863     $binary_ws_rules{'t'}{'L'} = WS_NO;
 1864     $binary_ws_rules{'t'}{'{'} = WS_NO;
 1865     $binary_ws_rules{'}'}{'L'} = WS_NO;
 1866     $binary_ws_rules{'}'}{'{'} = WS_OPTIONAL;    # RT#129850; was WS_NO
 1867     $binary_ws_rules{'$'}{'L'} = WS_NO;
 1868     $binary_ws_rules{'$'}{'{'} = WS_NO;
 1869     $binary_ws_rules{'@'}{'L'} = WS_NO;
 1870     $binary_ws_rules{'@'}{'{'} = WS_NO;
 1871     $binary_ws_rules{'='}{'L'} = WS_YES;
 1872     $binary_ws_rules{'J'}{'J'} = WS_YES;
 1873 
 1874     # the following includes ') {'
 1875     # as in :    if ( xxx ) { yyy }
 1876     $binary_ws_rules{']'}{'L'} = WS_NO;
 1877     $binary_ws_rules{']'}{'{'} = WS_NO;
 1878     $binary_ws_rules{')'}{'{'} = WS_YES;
 1879     $binary_ws_rules{')'}{'['} = WS_NO;
 1880     $binary_ws_rules{']'}{'['} = WS_NO;
 1881     $binary_ws_rules{']'}{'{'} = WS_NO;
 1882     $binary_ws_rules{'}'}{'['} = WS_NO;
 1883     $binary_ws_rules{'R'}{'['} = WS_NO;
 1884 
 1885     $binary_ws_rules{']'}{'++'} = WS_NO;
 1886     $binary_ws_rules{']'}{'--'} = WS_NO;
 1887     $binary_ws_rules{')'}{'++'} = WS_NO;
 1888     $binary_ws_rules{')'}{'--'} = WS_NO;
 1889 
 1890     $binary_ws_rules{'R'}{'++'} = WS_NO;
 1891     $binary_ws_rules{'R'}{'--'} = WS_NO;
 1892 
 1893     $binary_ws_rules{'i'}{'Q'} = WS_YES;
 1894     $binary_ws_rules{'n'}{'('} = WS_YES;    # occurs in 'use package n ()'
 1895 
 1896     # FIXME: we could to split 'i' into variables and functions
 1897     # and have no space for functions but space for variables.  For now,
 1898     # I have a special patch in the special rules below
 1899     $binary_ws_rules{'i'}{'('} = WS_NO;
 1900 
 1901     $binary_ws_rules{'w'}{'('} = WS_NO;
 1902     $binary_ws_rules{'w'}{'{'} = WS_YES;
 1903     return;
 1904 
 1905 } ## end initialize_whitespace_hashes
 1906 
 1907 sub set_whitespace_flags {
 1908 
 1909     #    This routine examines each pair of nonblank tokens and
 1910     #    sets a flag indicating if white space is needed.
 1911     #
 1912     #    $rwhitespace_flags->[$j] is a flag indicating whether a white space
 1913     #    BEFORE token $j is needed, with the following values:
 1914     #
 1915     #             WS_NO      = -1 do not want a space before token $j
 1916     #             WS_OPTIONAL=  0 optional space or $j is a whitespace
 1917     #             WS_YES     =  1 want a space before token $j
 1918     #
 1919 
 1920     my $self = shift;
 1921     my $rLL  = $self->{rLL};
 1922 
 1923     my $rwhitespace_flags = [];
 1924 
 1925     my ( $last_token, $last_type, $last_block_type, $last_input_line_no,
 1926         $token, $type, $block_type, $input_line_no );
 1927     my $j_tight_closing_paren = -1;
 1928 
 1929     $token              = ' ';
 1930     $type               = 'b';
 1931     $block_type         = '';
 1932     $input_line_no      = 0;
 1933     $last_token         = ' ';
 1934     $last_type          = 'b';
 1935     $last_block_type    = '';
 1936     $last_input_line_no = 0;
 1937 
 1938     my $jmax = @{$rLL} - 1;
 1939 
 1940     my ($ws);
 1941 
 1942     # This is some logic moved to a sub to avoid deep nesting of if stmts
 1943     my $ws_in_container = sub {
 1944 
 1945         my ($j) = @_;
 1946         my $ws = WS_YES;
 1947         if ( $j + 1 > $jmax ) { return (WS_NO) }
 1948 
 1949         # Patch to count '-foo' as single token so that
 1950         # each of  $a{-foo} and $a{foo} and $a{'foo'} do
 1951         # not get spaces with default formatting.
 1952         my $j_here = $j;
 1953         ++$j_here
 1954           if ( $token eq '-'
 1955             && $last_token eq '{'
 1956             && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
 1957 
 1958         # $j_next is where a closing token should be if
 1959         # the container has a single token
 1960         if ( $j_here + 1 > $jmax ) { return (WS_NO) }
 1961         my $j_next =
 1962           ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' )
 1963           ? $j_here + 2
 1964           : $j_here + 1;
 1965 
 1966         if ( $j_next > $jmax ) { return WS_NO }
 1967         my $tok_next  = $rLL->[$j_next]->[_TOKEN_];
 1968         my $type_next = $rLL->[$j_next]->[_TYPE_];
 1969 
 1970         # for tightness = 1, if there is just one token
 1971         # within the matching pair, we will keep it tight
 1972         if (
 1973             $tok_next eq $matching_token{$last_token}
 1974 
 1975             # but watch out for this: [ [ ]    (misc.t)
 1976             && $last_token ne $token
 1977 
 1978             # double diamond is usually spaced
 1979             && $token ne '<<>>'
 1980 
 1981           )
 1982         {
 1983 
 1984             # remember where to put the space for the closing paren
 1985             $j_tight_closing_paren = $j_next;
 1986             return (WS_NO);
 1987         }
 1988         return (WS_YES);
 1989     };
 1990 
 1991     # main loop over all tokens to define the whitespace flags
 1992     for ( my $j = 0 ; $j <= $jmax ; $j++ ) {
 1993 
 1994         my $rtokh = $rLL->[$j];
 1995 
 1996         # Set a default
 1997         $rwhitespace_flags->[$j] = WS_OPTIONAL;
 1998 
 1999         if ( $rtokh->[_TYPE_] eq 'b' ) {
 2000             next;
 2001         }
 2002 
 2003         # set a default value, to be changed as needed
 2004         $ws                 = undef;
 2005         $last_token         = $token;
 2006         $last_type          = $type;
 2007         $last_block_type    = $block_type;
 2008         $last_input_line_no = $input_line_no;
 2009         $token              = $rtokh->[_TOKEN_];
 2010         $type               = $rtokh->[_TYPE_];
 2011         $block_type         = $rtokh->[_BLOCK_TYPE_];
 2012         $input_line_no      = $rtokh->[_LINE_INDEX_];
 2013 
 2014         #---------------------------------------------------------------
 2015         # Whitespace Rules Section 1:
 2016         # Handle space on the inside of opening braces.
 2017         #---------------------------------------------------------------
 2018 
 2019         #    /^[L\{\(\[]$/
 2020         if ( $is_opening_type{$last_type} ) {
 2021 
 2022             $j_tight_closing_paren = -1;
 2023 
 2024             # let us keep empty matched braces together: () {} []
 2025             # except for BLOCKS
 2026             if ( $token eq $matching_token{$last_token} ) {
 2027                 if ($block_type) {
 2028                     $ws = WS_YES;
 2029                 }
 2030                 else {
 2031                     $ws = WS_NO;
 2032                 }
 2033             }
 2034             else {
 2035 
 2036                 # we're considering the right of an opening brace
 2037                 # tightness = 0 means always pad inside with space
 2038                 # tightness = 1 means pad inside if "complex"
 2039                 # tightness = 2 means never pad inside with space
 2040 
 2041                 my $tightness;
 2042                 if (   $last_type eq '{'
 2043                     && $last_token eq '{'
 2044                     && $last_block_type )
 2045                 {
 2046                     $tightness = $rOpts_block_brace_tightness;
 2047                 }
 2048                 else { $tightness = $tightness{$last_token} }
 2049 
 2050                #=============================================================
 2051                # Patch for test problem <<snippets/fabrice_bug.in>>
 2052                # We must always avoid spaces around a bare word beginning
 2053                # with ^ as in:
 2054                #    my $before = ${^PREMATCH};
 2055                # Because all of the following cause an error in perl:
 2056                #    my $before = ${ ^PREMATCH };
 2057                #    my $before = ${ ^PREMATCH};
 2058                #    my $before = ${^PREMATCH };
 2059                # So if brace tightness flag is -bt=0 we must temporarily reset
 2060                # to bt=1.  Note that here we must set tightness=1 and not 2 so
 2061                # that the closing space
 2062                # is also avoided (via the $j_tight_closing_paren flag in coding)
 2063                 if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
 2064 
 2065                 #=============================================================
 2066 
 2067                 if ( $tightness <= 0 ) {
 2068                     $ws = WS_YES;
 2069                 }
 2070                 elsif ( $tightness > 1 ) {
 2071                     $ws = WS_NO;
 2072                 }
 2073                 else {
 2074                     $ws = $ws_in_container->($j);
 2075                 }
 2076             }
 2077         }    # end setting space flag inside opening tokens
 2078         my $ws_1;
 2079         $ws_1 = $ws
 2080           if FORMATTER_DEBUG_FLAG_WHITE;
 2081 
 2082         #---------------------------------------------------------------
 2083         # Whitespace Rules Section 2:
 2084         # Handle space on inside of closing brace pairs.
 2085         #---------------------------------------------------------------
 2086 
 2087         #   /[\}\)\]R]/
 2088         if ( $is_closing_type{$type} ) {
 2089 
 2090             if ( $j == $j_tight_closing_paren ) {
 2091 
 2092                 $j_tight_closing_paren = -1;
 2093                 $ws                    = WS_NO;
 2094             }
 2095             else {
 2096 
 2097                 if ( !defined($ws) ) {
 2098 
 2099                     my $tightness;
 2100                     if ( $type eq '}' && $token eq '}' && $block_type ) {
 2101                         $tightness = $rOpts_block_brace_tightness;
 2102                     }
 2103                     else { $tightness = $tightness{$token} }
 2104 
 2105                     $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
 2106                 }
 2107             }
 2108         }    # end setting space flag inside closing tokens
 2109 
 2110         my $ws_2;
 2111         $ws_2 = $ws
 2112           if FORMATTER_DEBUG_FLAG_WHITE;
 2113 
 2114         #---------------------------------------------------------------
 2115         # Whitespace Rules Section 3:
 2116         # Use the binary rule table.
 2117         #---------------------------------------------------------------
 2118         if ( !defined($ws) ) {
 2119             $ws = $binary_ws_rules{$last_type}{$type};
 2120         }
 2121         my $ws_3;
 2122         $ws_3 = $ws
 2123           if FORMATTER_DEBUG_FLAG_WHITE;
 2124 
 2125         #---------------------------------------------------------------
 2126         # Whitespace Rules Section 4:
 2127         # Handle some special cases.
 2128         #---------------------------------------------------------------
 2129         if ( $token eq '(' ) {
 2130 
 2131             # This will have to be tweaked as tokenization changes.
 2132             # We usually want a space at '} (', for example:
 2133             # <<snippets/space1.in>>
 2134             #     map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
 2135             #
 2136             # But not others:
 2137             #     &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
 2138             # At present, the above & block is marked as type L/R so this case
 2139             # won't go through here.
 2140             if ( $last_type eq '}' ) { $ws = WS_YES }
 2141 
 2142             # NOTE: some older versions of Perl had occasional problems if
 2143             # spaces are introduced between keywords or functions and opening
 2144             # parens.  So the default is not to do this except is certain
 2145             # cases.  The current Perl seems to tolerate spaces.
 2146 
 2147             # Space between keyword and '('
 2148             elsif ( $last_type eq 'k' ) {
 2149                 $ws = WS_NO
 2150                   unless ( $rOpts_space_keyword_paren
 2151                     || $space_after_keyword{$last_token} );
 2152             }
 2153 
 2154             # Space between function and '('
 2155             # -----------------------------------------------------
 2156             # 'w' and 'i' checks for something like:
 2157             #   myfun(    &myfun(   ->myfun(
 2158             # -----------------------------------------------------
 2159             elsif (( $last_type =~ /^[wUG]$/ )
 2160                 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^(\&|->)/ ) )
 2161             {
 2162                 $ws = WS_NO unless ($rOpts_space_function_paren);
 2163             }
 2164 
 2165             # space between something like $i and ( in <<snippets/space2.in>>
 2166             # for $i ( 0 .. 20 ) {
 2167             # FIXME: eventually, type 'i' needs to be split into multiple
 2168             # token types so this can be a hardwired rule.
 2169             elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
 2170                 $ws = WS_YES;
 2171             }
 2172 
 2173             # allow constant function followed by '()' to retain no space
 2174             elsif ($last_type eq 'C'
 2175                 && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
 2176             {
 2177                 $ws = WS_NO;
 2178             }
 2179         }
 2180 
 2181         # patch for SWITCH/CASE: make space at ']{' optional
 2182         # since the '{' might begin a case or when block
 2183         elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
 2184             $ws = WS_OPTIONAL;
 2185         }
 2186 
 2187         # keep space between 'sub' and '{' for anonymous sub definition
 2188         if ( $type eq '{' ) {
 2189             if ( $last_token eq 'sub' ) {
 2190                 $ws = WS_YES;
 2191             }
 2192 
 2193             # this is needed to avoid no space in '){'
 2194             if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
 2195 
 2196             # avoid any space before the brace or bracket in something like
 2197             #  @opts{'a','b',...}
 2198             if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
 2199                 $ws = WS_NO;
 2200             }
 2201         }
 2202 
 2203         elsif ( $type eq 'i' ) {
 2204 
 2205             # never a space before ->
 2206             if ( $token =~ /^\-\>/ ) {
 2207                 $ws = WS_NO;
 2208             }
 2209         }
 2210 
 2211         # retain any space between '-' and bare word
 2212         elsif ( $type eq 'w' || $type eq 'C' ) {
 2213             $ws = WS_OPTIONAL if $last_type eq '-';
 2214 
 2215             # never a space before ->
 2216             if ( $token =~ /^\-\>/ ) {
 2217                 $ws = WS_NO;
 2218             }
 2219         }
 2220 
 2221         # retain any space between '-' and bare word; for example
 2222         # avoid space between 'USER' and '-' here: <<snippets/space2.in>>
 2223         #   $myhash{USER-NAME}='steve';
 2224         elsif ( $type eq 'm' || $type eq '-' ) {
 2225             $ws = WS_OPTIONAL if ( $last_type eq 'w' );
 2226         }
 2227 
 2228         # always space before side comment
 2229         elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
 2230 
 2231         # always preserver whatever space was used after a possible
 2232         # filehandle (except _) or here doc operator
 2233         if (
 2234             $type ne '#'
 2235             && ( ( $last_type eq 'Z' && $last_token ne '_' )
 2236                 || $last_type eq 'h' )
 2237           )
 2238         {
 2239             $ws = WS_OPTIONAL;
 2240         }
 2241 
 2242         # space_backslash_quote; RT #123774  <<snippets/rt123774.in>>
 2243         # allow a space between a backslash and single or double quote
 2244         # to avoid fooling html formatters
 2245         elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ ) {
 2246             if ($rOpts_space_backslash_quote) {
 2247                 if ( $rOpts_space_backslash_quote == 1 ) {
 2248                     $ws = WS_OPTIONAL;
 2249                 }
 2250                 elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES }
 2251                 else { }    # shouldnt happen
 2252             }
 2253             else {
 2254                 $ws = WS_NO;
 2255             }
 2256         }
 2257 
 2258         my $ws_4;
 2259         $ws_4 = $ws
 2260           if FORMATTER_DEBUG_FLAG_WHITE;
 2261 
 2262         #---------------------------------------------------------------
 2263         # Whitespace Rules Section 5:
 2264         # Apply default rules not covered above.
 2265         #---------------------------------------------------------------
 2266 
 2267         # If we fall through to here, look at the pre-defined hash tables for
 2268         # the two tokens, and:
 2269         #  if (they are equal) use the common value
 2270         #  if (either is zero or undef) use the other
 2271         #  if (either is -1) use it
 2272         # That is,
 2273         # left  vs right
 2274         #  1    vs    1     -->  1
 2275         #  0    vs    0     -->  0
 2276         # -1    vs   -1     --> -1
 2277         #
 2278         #  0    vs   -1     --> -1
 2279         #  0    vs    1     -->  1
 2280         #  1    vs    0     -->  1
 2281         # -1    vs    0     --> -1
 2282         #
 2283         # -1    vs    1     --> -1
 2284         #  1    vs   -1     --> -1
 2285         if ( !defined($ws) ) {
 2286             my $wl = $want_left_space{$type};
 2287             my $wr = $want_right_space{$last_type};
 2288             if ( !defined($wl) ) { $wl = 0 }
 2289             if ( !defined($wr) ) { $wr = 0 }
 2290             $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
 2291         }
 2292 
 2293         if ( !defined($ws) ) {
 2294             $ws = 0;
 2295             write_diagnostics(
 2296                 "WS flag is undefined for tokens $last_token $token\n");
 2297         }
 2298 
 2299         # Treat newline as a whitespace. Otherwise, we might combine
 2300         # 'Send' and '-recipients' here according to the above rules:
 2301         # <<snippets/space3.in>>
 2302         #    my $msg = new Fax::Send
 2303         #      -recipients => $to,
 2304         #      -data => $data;
 2305         if ( $ws == 0 && $input_line_no != $last_input_line_no ) { $ws = 1 }
 2306 
 2307         if (   ( $ws == 0 )
 2308             && $j > 0
 2309             && $j < $jmax
 2310             && ( $last_type !~ /^[Zh]$/ ) )
 2311         {
 2312 
 2313             # If this happens, we have a non-fatal but undesirable
 2314             # hole in the above rules which should be patched.
 2315             write_diagnostics(
 2316                 "WS flag is zero for tokens $last_token $token\n");
 2317         }
 2318 
 2319         $rwhitespace_flags->[$j] = $ws;
 2320 
 2321         FORMATTER_DEBUG_FLAG_WHITE && do {
 2322             my $str = substr( $last_token, 0, 15 );
 2323             $str .= ' ' x ( 16 - length($str) );
 2324             if ( !defined($ws_1) ) { $ws_1 = "*" }
 2325             if ( !defined($ws_2) ) { $ws_2 = "*" }
 2326             if ( !defined($ws_3) ) { $ws_3 = "*" }
 2327             if ( !defined($ws_4) ) { $ws_4 = "*" }
 2328             print STDOUT
 2329 "NEW WHITE:  i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
 2330         };
 2331     } ## end main loop
 2332 
 2333     if ( $rOpts->{'tight-secret-operators'} ) {
 2334         new_secret_operator_whitespace( $rLL, $rwhitespace_flags );
 2335     }
 2336     return $rwhitespace_flags;
 2337 } ## end sub set_whitespace_flags
 2338 
 2339 sub respace_tokens {
 2340 
 2341     my $self = shift;
 2342     return if $rOpts->{'indent-only'};
 2343 
 2344     # This routine makes all necessary changes to the tokenization after the
 2345     # file has been read. This consists mostly of inserting and deleting spaces
 2346     # according to the selected parameters. In a few cases non-space characters
 2347     # are added, deleted or modified.
 2348 
 2349     # The old tokens are copied one-by-one, with changes, from the old
 2350     # linear storage array to a new array.
 2351 
 2352     my $rLL                        = $self->{rLL};
 2353     my $Klimit_old                 = $self->{Klimit};
 2354     my $rlines                     = $self->{rlines};
 2355     my $rpaired_to_inner_container = $self->{rpaired_to_inner_container};
 2356 
 2357     my $rLL_new = [];    # This is the new array
 2358     my $KK      = 0;
 2359     my $rtoken_vars;
 2360     my $Kmax = @{$rLL} - 1;
 2361 
 2362     # Set the whitespace flags, which indicate the token spacing preference.
 2363     my $rwhitespace_flags = $self->set_whitespace_flags();
 2364 
 2365     # we will be setting token lengths as we go
 2366     my $cumulative_length = 0;
 2367 
 2368     # We also define these hash indexes giving container token array indexes
 2369     # as a function of the container sequence numbers.  For example,
 2370     my $K_opening_container = {};    # opening [ { or (
 2371     my $K_closing_container = {};    # closing ] } or )
 2372     my $K_opening_ternary   = {};    # opening ? of ternary
 2373     my $K_closing_ternary   = {};    # closing : of ternary
 2374 
 2375     # List of new K indexes of phantom semicolons
 2376     # This will be needed if we want to undo them for iterations
 2377     my $rK_phantom_semicolons = [];
 2378 
 2379     # Temporary hashes for adding semicolons
 2380     ##my $rKfirst_new               = {};
 2381 
 2382     # a sub to link preceding nodes forward to a new node type
 2383     my $link_back = sub {
 2384         my ( $Ktop, $key ) = @_;
 2385 
 2386         my $Kprev = $Ktop - 1;
 2387         while ( $Kprev >= 0
 2388             && !defined( $rLL_new->[$Kprev]->[$key] ) )
 2389         {
 2390             $rLL_new->[$Kprev]->[$key] = $Ktop;
 2391             $Kprev -= 1;
 2392         }
 2393     };
 2394 
 2395     # A sub to store one token in the new array
 2396     # All new tokens must be stored by this sub so that it can update
 2397     # all data structures on the fly.
 2398     my $last_nonblank_type       = ';';
 2399     my $last_nonblank_token      = ';';
 2400     my $last_nonblank_block_type = '';
 2401     my $store_token              = sub {
 2402         my ($item) = @_;
 2403 
 2404         # This will be the index of this item in the new array
 2405         my $KK_new = @{$rLL_new};
 2406 
 2407         # check for a sequenced item (i.e., container or ?/:)
 2408         my $type_sequence = $item->[_TYPE_SEQUENCE_];
 2409         if ($type_sequence) {
 2410 
 2411             $link_back->( $KK_new, _KNEXT_SEQ_ITEM_ );
 2412 
 2413             my $token = $item->[_TOKEN_];
 2414             if ( $is_opening_token{$token} ) {
 2415 
 2416                 $K_opening_container->{$type_sequence} = $KK_new;
 2417             }
 2418             elsif ( $is_closing_token{$token} ) {
 2419 
 2420                 $K_closing_container->{$type_sequence} = $KK_new;
 2421             }
 2422 
 2423             # These are not yet used but could be useful
 2424             else {
 2425                 if ( $token eq '?' ) {
 2426                     $K_opening_ternary->{$type_sequence} = $KK_new;
 2427                 }
 2428                 elsif ( $token eq ':' ) {
 2429                     $K_closing_ternary->{$type_sequence} = $KK_new;
 2430                 }
 2431                 else {
 2432                     # shouldn't happen
 2433                     Fault("Ugh: shouldn't happen");
 2434                 }
 2435             }
 2436         }
 2437 
 2438         # find the length of this token
 2439         my $token_length = length( $item->[_TOKEN_] );
 2440 
 2441         # and update the cumulative length
 2442         $cumulative_length += $token_length;
 2443 
 2444         # Save the length sum to just AFTER this token
 2445         $item->[_CUMULATIVE_LENGTH_] = $cumulative_length;
 2446 
 2447         my $type = $item->[_TYPE_];
 2448 
 2449         # trim side comments
 2450         if ( $type eq '#' ) {
 2451             $item->[_TOKEN_] =~ s/\s*$//;
 2452         }
 2453 
 2454         if ( $type && $type ne 'b' && $type ne '#' ) {
 2455             $last_nonblank_type       = $type;
 2456             $last_nonblank_token      = $item->[_TOKEN_];
 2457             $last_nonblank_block_type = $item->[_BLOCK_TYPE_];
 2458         }
 2459 
 2460         # and finally, add this item to the new array
 2461         push @{$rLL_new}, $item;
 2462     };
 2463 
 2464     my $store_token_and_space = sub {
 2465         my ( $item, $want_space ) = @_;
 2466 
 2467         # store a token with preceding space if requested and needed
 2468 
 2469         # First store the space
 2470         if (   $want_space
 2471             && @{$rLL_new}
 2472             && $rLL_new->[-1]->[_TYPE_] ne 'b'
 2473             && $rOpts_add_whitespace )
 2474         {
 2475             my $rcopy = copy_token_as_type( $item, 'b', ' ' );
 2476             $rcopy->[_LINE_INDEX_] =
 2477               $rLL_new->[-1]->[_LINE_INDEX_];
 2478             $store_token->($rcopy);
 2479         }
 2480 
 2481         # then the token
 2482         $store_token->($item);
 2483     };
 2484 
 2485     my $K_end_q = sub {
 2486         my ($KK)  = @_;
 2487         my $K_end = $KK;
 2488         my $Kn    = $self->K_next_nonblank($KK);
 2489         while ( defined($Kn) && $rLL->[$Kn]->[_TYPE_] eq 'q' ) {
 2490             $K_end = $Kn;
 2491             $Kn    = $self->K_next_nonblank($Kn);
 2492         }
 2493         return $K_end;
 2494     };
 2495 
 2496     my $add_phantom_semicolon = sub {
 2497 
 2498         my ($KK) = @_;
 2499 
 2500         my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
 2501         return unless ( defined($Kp) );
 2502 
 2503         # we are only adding semicolons for certain block types
 2504         my $block_type = $rLL->[$KK]->[_BLOCK_TYPE_];
 2505         return
 2506           unless ( $ok_to_add_semicolon_for_block_type{$block_type}
 2507             || $block_type =~ /^(sub|package)/
 2508             || $block_type =~ /^\w+\:$/ );
 2509 
 2510         my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
 2511 
 2512         my $previous_nonblank_type  = $rLL_new->[$Kp]->[_TYPE_];
 2513         my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
 2514 
 2515         # Do not add a semicolon if...
 2516         return
 2517           if (
 2518 
 2519             # it would follow a comment (and be isolated)
 2520             $previous_nonblank_type eq '#'
 2521 
 2522             # it follows a code block ( because they are not always wanted
 2523             # there and may add clutter)
 2524             || $rLL_new->[$Kp]->[_BLOCK_TYPE_]
 2525 
 2526             # it would follow a label
 2527             || $previous_nonblank_type eq 'J'
 2528 
 2529             # it would be inside a 'format' statement (and cause syntax error)
 2530             || (   $previous_nonblank_type eq 'k'
 2531                 && $previous_nonblank_token =~ /format/ )
 2532 
 2533             # if it would prevent welding two containers
 2534             || $rpaired_to_inner_container->{$type_sequence}
 2535 
 2536           );
 2537 
 2538         # We will insert an empty semicolon here as a placeholder.  Later, if
 2539         # it becomes the last token on a line, we will bring it to life.  The
 2540         # advantage of doing this is that (1) we just have to check line
 2541         # endings, and (2) the phantom semicolon has zero width and therefore
 2542         # won't cause needless breaks of one-line blocks.
 2543         my $Ktop = -1;
 2544         if (   $rLL_new->[$Ktop]->[_TYPE_] eq 'b'
 2545             && $want_left_space{';'} == WS_NO )
 2546         {
 2547 
 2548             # convert the blank into a semicolon..
 2549             # be careful: we are working on the new stack top
 2550             # on a token which has been stored.
 2551             my $rcopy = copy_token_as_type( $rLL_new->[$Ktop], 'b', ' ' );
 2552 
 2553             # Convert the existing blank to:
 2554             #   a phantom semicolon for one_line_block option = 0 or 1
 2555             #   a real semicolon    for one_line_block option = 2
 2556             my $tok = $rOpts_one_line_block_semicolons == 2 ? ';' : '';
 2557 
 2558             $rLL_new->[$Ktop]->[_TOKEN_] = $tok;    # zero length if phantom
 2559             $rLL_new->[$Ktop]->[_TYPE_]  = ';';
 2560             $rLL_new->[$Ktop]->[_SLEVEL_] =
 2561               $rLL->[$KK]->[_SLEVEL_];
 2562 
 2563             push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
 2564 
 2565             # Then store a new blank
 2566             $store_token->($rcopy);
 2567         }
 2568         else {
 2569 
 2570             # insert a new token
 2571             my $rcopy = copy_token_as_type( $rLL_new->[$Kp], ';', '' );
 2572             $rcopy->[_SLEVEL_] = $rLL->[$KK]->[_SLEVEL_];
 2573             $store_token->($rcopy);
 2574             push @{$rK_phantom_semicolons}, @{$rLL_new} - 1;
 2575         }
 2576     };
 2577 
 2578     my $check_Q = sub {
 2579 
 2580         # Check that a quote looks okay
 2581         # This sub works but needs to by sync'd with the log file output
 2582         # before it can be used.
 2583         my ( $KK, $Kfirst ) = @_;
 2584         my $token = $rLL->[$KK]->[_TOKEN_];
 2585         note_embedded_tab() if ( $token =~ "\t" );
 2586 
 2587         my $Kp = $self->K_previous_nonblank( undef, $rLL_new );
 2588         return unless ( defined($Kp) );
 2589         my $previous_nonblank_type  = $rLL_new->[$Kp]->[_TYPE_];
 2590         my $previous_nonblank_token = $rLL_new->[$Kp]->[_TOKEN_];
 2591 
 2592         my $previous_nonblank_type_2  = 'b';
 2593         my $previous_nonblank_token_2 = "";
 2594         my $Kpp = $self->K_previous_nonblank( $Kp, $rLL_new );
 2595         if ( defined($Kpp) ) {
 2596             $previous_nonblank_type_2  = $rLL_new->[$Kpp]->[_TYPE_];
 2597             $previous_nonblank_token_2 = $rLL_new->[$Kpp]->[_TOKEN_];
 2598         }
 2599 
 2600         my $Kn                  = $self->K_next_nonblank($KK);
 2601         my $next_nonblank_token = "";
 2602         if ( defined($Kn) ) {
 2603             $next_nonblank_token = $rLL->[$Kn]->[_TOKEN_];
 2604         }
 2605 
 2606         my $token_0 = $rLL->[$Kfirst]->[_TOKEN_];
 2607         my $type_0  = $rLL->[$Kfirst]->[_TYPE_];
 2608 
 2609         # make note of something like '$var = s/xxx/yyy/;'
 2610         # in case it should have been '$var =~ s/xxx/yyy/;'
 2611         if (
 2612                $token =~ /^(s|tr|y|m|\/)/
 2613             && $previous_nonblank_token =~ /^(=|==|!=)$/
 2614 
 2615             # preceded by simple scalar
 2616             && $previous_nonblank_type_2 eq 'i'
 2617             && $previous_nonblank_token_2 =~ /^\$/
 2618 
 2619             # followed by some kind of termination
 2620             # (but give complaint if we can not see far enough ahead)
 2621             && $next_nonblank_token =~ /^[; \)\}]$/
 2622 
 2623             # scalar is not declared
 2624             && !( $type_0 eq 'k' && $token_0 =~ /^(my|our|local)$/ )
 2625           )
 2626         {
 2627             my $guess = substr( $last_nonblank_token, 0, 1 ) . '~';
 2628             complain(
 2629 "Note: be sure you want '$previous_nonblank_token' instead of '$guess' here\n"
 2630             );
 2631         }
 2632     };
 2633 
 2634     # Main loop over all lines of the file
 2635     my $last_K_out;
 2636     my $CODE_type = "";
 2637     my $line_type = "";
 2638 
 2639     # Testing option to break qw.  Do not use; it can make a mess.
 2640     my $ALLOW_BREAK_MULTILINE_QW = 0;
 2641     my $in_multiline_qw;
 2642     foreach my $line_of_tokens ( @{$rlines} ) {
 2643 
 2644         $input_line_number = $line_of_tokens->{_line_number};
 2645         my $last_line_type = $line_type;
 2646         $line_type = $line_of_tokens->{_line_type};
 2647         next unless ( $line_type eq 'CODE' );
 2648         my $last_CODE_type = $CODE_type;
 2649         $CODE_type = $line_of_tokens->{_code_type};
 2650         my $rK_range = $line_of_tokens->{_rK_range};
 2651         my ( $Kfirst, $Klast ) = @{$rK_range};
 2652         next unless defined($Kfirst);
 2653 
 2654         # Check for correct sequence of token indexes...
 2655         # An error here means that sub write_line() did not correctly
 2656         # package the tokenized lines as it received them.
 2657         if ( defined($last_K_out) ) {
 2658             if ( $Kfirst != $last_K_out + 1 ) {
 2659                 Fault(
 2660                     "Program Bug: last K out was $last_K_out but Kfirst=$Kfirst"
 2661                 );
 2662             }
 2663         }
 2664         else {
 2665             if ( $Kfirst != 0 ) {
 2666                 Fault("Program Bug: first K is $Kfirst but should be 0");
 2667             }
 2668         }
 2669         $last_K_out = $Klast;
 2670 
 2671         # Handle special lines of code
 2672         if ( $CODE_type && $CODE_type ne 'NIN' && $CODE_type ne 'VER' ) {
 2673 
 2674             # CODE_types are as follows.
 2675             # 'BL' = Blank Line
 2676             # 'VB' = Verbatim - line goes out verbatim
 2677             # 'FS' = Format Skipping - line goes out verbatim, no blanks
 2678             # 'IO' = Indent Only - only indentation may be changed
 2679             # 'NIN' = No Internal Newlines - line does not get broken
 2680             # 'HSC'=Hanging Side Comment - fix this hanging side comment
 2681             # 'BC'=Block Comment - an ordinary full line comment
 2682             # 'SBC'=Static Block Comment - a block comment which does not get
 2683             #      indented
 2684             # 'SBCX'=Static Block Comment Without Leading Space
 2685             # 'DEL'=Delete this line
 2686             # 'VER'=VERSION statement
 2687             # '' or (undefined) - no restructions
 2688 
 2689             # For a hanging side comment we insert an empty quote before
 2690             # the comment so that it becomes a normal side comment and
 2691             # will be aligned by the vertical aligner
 2692             if ( $CODE_type eq 'HSC' ) {
 2693 
 2694                 # Safety Check: This must be a line with one token (a comment)
 2695                 my $rtoken_vars = $rLL->[$Kfirst];
 2696                 if ( $Kfirst == $Klast && $rtoken_vars->[_TYPE_] eq '#' ) {
 2697 
 2698                     # Note that even if the flag 'noadd-whitespace' is set, we
 2699                     # will make an exception here and allow a blank to be
 2700                     # inserted to push the comment to the right.  We can think
 2701                     # of this as an adjustment of indentation rather than
 2702                     # whitespace between tokens. This will also prevent the
 2703                     # hanging side comment from getting converted to a block
 2704                     # comment if whitespace gets deleted, as for example with
 2705                     # the -extrude and -mangle options.
 2706                     my $rcopy = copy_token_as_type( $rtoken_vars, 'q', '' );
 2707                     $store_token->($rcopy);
 2708                     $rcopy = copy_token_as_type( $rtoken_vars, 'b', ' ' );
 2709                     $store_token->($rcopy);
 2710                     $store_token->($rtoken_vars);
 2711                     next;
 2712                 }
 2713                 else {
 2714 
 2715                     # This line was mis-marked by sub scan_comment
 2716                     Fault(
 2717                         "Program bug. A hanging side comment has been mismarked"
 2718                     );
 2719                 }
 2720             }
 2721 
 2722             # Copy tokens unchanged
 2723             foreach my $KK ( $Kfirst .. $Klast ) {
 2724                 $store_token->( $rLL->[$KK] );
 2725             }
 2726             next;
 2727         }
 2728 
 2729         # Handle normal line..
 2730 
 2731         # Insert any essential whitespace between lines
 2732         # if last line was normal CODE.
 2733         # Patch for rt #125012: use K_previous_code rather than '_nonblank'
 2734         # because comments may disappear.
 2735         my $type_next  = $rLL->[$Kfirst]->[_TYPE_];
 2736         my $token_next = $rLL->[$Kfirst]->[_TOKEN_];
 2737         my $Kp         = $self->K_previous_code( undef, $rLL_new );
 2738         if (   $last_line_type eq 'CODE'
 2739             && $type_next ne 'b'
 2740             && defined($Kp) )
 2741         {
 2742             my $token_p = $rLL_new->[$Kp]->[_TOKEN_];
 2743             my $type_p  = $rLL_new->[$Kp]->[_TYPE_];
 2744 
 2745             my ( $token_pp, $type_pp );
 2746             my $Kpp = $self->K_previous_code( $Kp, $rLL_new );
 2747             if ( defined($Kpp) ) {
 2748                 $token_pp = $rLL_new->[$Kpp]->[_TOKEN_];
 2749                 $type_pp  = $rLL_new->[$Kpp]->[_TYPE_];
 2750             }
 2751             else {
 2752                 $token_pp = ";";
 2753                 $type_pp  = ';';
 2754             }
 2755 
 2756             if (
 2757                 is_essential_whitespace(
 2758                     $token_pp, $type_pp,    $token_p,
 2759                     $type_p,   $token_next, $type_next,
 2760                 )
 2761               )
 2762             {
 2763 
 2764                 # Copy this first token as blank, but use previous line number
 2765                 my $rcopy = copy_token_as_type( $rLL->[$Kfirst], 'b', ' ' );
 2766                 $rcopy->[_LINE_INDEX_] =
 2767                   $rLL_new->[-1]->[_LINE_INDEX_];
 2768                 $store_token->($rcopy);
 2769             }
 2770         }
 2771 
 2772         # loop to copy all tokens on this line, with any changes
 2773         my $type_sequence;
 2774         for ( my $KK = $Kfirst ; $KK <= $Klast ; $KK++ ) {
 2775             $rtoken_vars = $rLL->[$KK];
 2776             my $token              = $rtoken_vars->[_TOKEN_];
 2777             my $type               = $rtoken_vars->[_TYPE_];
 2778             my $last_type_sequence = $type_sequence;
 2779             $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
 2780 
 2781             # Handle a blank space ...
 2782             if ( $type eq 'b' ) {
 2783 
 2784                 # Delete it if not wanted by whitespace rules
 2785                 # or we are deleting all whitespace
 2786                 # Note that whitespace flag is a flag indicating whether a
 2787                 # white space BEFORE the token is needed
 2788                 next if ( $KK >= $Klast );    # skip terminal blank
 2789                 my $Knext = $KK + 1;
 2790                 my $ws    = $rwhitespace_flags->[$Knext];
 2791                 if (   $ws == -1
 2792                     || $rOpts_delete_old_whitespace )
 2793                 {
 2794 
 2795                     # FIXME: maybe switch to using _new
 2796                     my $Kp = $self->K_previous_nonblank($KK);
 2797                     next unless defined($Kp);
 2798                     my $token_p = $rLL->[$Kp]->[_TOKEN_];
 2799                     my $type_p  = $rLL->[$Kp]->[_TYPE_];
 2800 
 2801                     my ( $token_pp, $type_pp );
 2802 
 2803                     #my $Kpp = $K_previous_nonblank->($Kp);
 2804                     my $Kpp = $self->K_previous_nonblank($Kp);
 2805                     if ( defined($Kpp) ) {
 2806                         $token_pp = $rLL->[$Kpp]->[_TOKEN_];
 2807                         $type_pp  = $rLL->[$Kpp]->[_TYPE_];
 2808                     }
 2809                     else {
 2810                         $token_pp = ";";
 2811                         $type_pp  = ';';
 2812                     }
 2813                     my $token_next = $rLL->[$Knext]->[_TOKEN_];
 2814                     my $type_next  = $rLL->[$Knext]->[_TYPE_];
 2815 
 2816                     my $do_not_delete = is_essential_whitespace(
 2817                         $token_pp, $type_pp,    $token_p,
 2818                         $type_p,   $token_next, $type_next,
 2819                     );
 2820 
 2821                     next unless ($do_not_delete);
 2822                 }
 2823 
 2824                 # make it just one character if allowed
 2825                 if ($rOpts_add_whitespace) {
 2826                     $rtoken_vars->[_TOKEN_] = ' ';
 2827                 }
 2828                 $store_token->($rtoken_vars);
 2829                 next;
 2830             }
 2831 
 2832             # Handle a nonblank token...
 2833 
 2834             # check for a qw quote
 2835             if ( $type eq 'q' ) {
 2836 
 2837                 # trim blanks from right of qw quotes
 2838                 # (To avoid trimming qw quotes use -ntqw; the tokenizer handles
 2839                 # this)
 2840                 $token =~ s/\s*$//;
 2841                 $rtoken_vars->[_TOKEN_] = $token;
 2842                 note_embedded_tab() if ( $token =~ "\t" );
 2843 
 2844                 if ($in_multiline_qw) {
 2845 
 2846                     # If we are at the end of a multiline qw ..
 2847                     if ( $in_multiline_qw == $KK ) {
 2848 
 2849                  # Split off the closing delimiter character
 2850                  # so that the formatter can put a line break there if necessary
 2851                         my $part1 = $token;
 2852                         my $part2 = substr( $part1, -1, 1, "" );
 2853 
 2854                         if ($part1) {
 2855                             my $rcopy =
 2856                               copy_token_as_type( $rtoken_vars, 'q', $part1 );
 2857                             $store_token->($rcopy);
 2858                             $token = $part2;
 2859                             $rtoken_vars->[_TOKEN_] = $token;
 2860 
 2861                         }
 2862                         $in_multiline_qw = undef;
 2863 
 2864                         # store without preceding blank
 2865                         $store_token->($rtoken_vars);
 2866                         next;
 2867                     }
 2868                     else {
 2869                         # continuing a multiline qw
 2870                         $store_token->($rtoken_vars);
 2871                         next;
 2872                     }
 2873                 }
 2874 
 2875                 else {
 2876 
 2877                     # we are encountered new qw token...see if multiline
 2878                     my $K_end = $K_end_q->($KK);
 2879                     if ( $ALLOW_BREAK_MULTILINE_QW && $K_end != $KK ) {
 2880 
 2881                         # Starting multiline qw...
 2882                         # set flag equal to the ending K
 2883                         $in_multiline_qw = $K_end;
 2884 
 2885                  # Split off the leading part
 2886                  # so that the formatter can put a line break there if necessary
 2887                         if ( $token =~ /^(qw\s*.)(.*)$/ ) {
 2888                             my $part1 = $1;
 2889                             my $part2 = $2;
 2890                             if ($part2) {
 2891                                 my $rcopy =
 2892                                   copy_token_as_type( $rtoken_vars, 'q',
 2893                                     $part1 );
 2894                                 $store_token_and_space->(
 2895                                     $rcopy, $rwhitespace_flags->[$KK] == WS_YES
 2896                                 );
 2897                                 $token = $part2;
 2898                                 $rtoken_vars->[_TOKEN_] = $token;
 2899 
 2900                                 # Second part goes without intermediate blank
 2901                                 $store_token->($rtoken_vars);
 2902                                 next;
 2903                             }
 2904                         }
 2905                     }
 2906                     else {
 2907 
 2908                         # this is a new single token qw -
 2909                         # store with possible preceding blank
 2910                         $store_token_and_space->(
 2911                             $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
 2912                         );
 2913                         next;
 2914                     }
 2915                 }
 2916             } ## end if ( $type eq 'q' )
 2917 
 2918             # Modify certain tokens here for whitespace
 2919             # The following is not yet done, but could be:
 2920             #   sub (x x x)
 2921             elsif ( $type =~ /^[wit]$/ ) {
 2922 
 2923                 # Examples: <<snippets/space1.in>>
 2924                 # change '$  var'  to '$var' etc
 2925                 #        '-> new'  to '->new'
 2926                 if ( $token =~ /^([\$\&\%\*\@]|\-\>)\s/ ) {
 2927                     $token =~ s/\s*//g;
 2928                     $rtoken_vars->[_TOKEN_] = $token;
 2929                 }
 2930 
 2931                 # Split identifiers with leading arrows, inserting blanks if
 2932                 # necessary.  It is easier and safer here than in the
 2933                 # tokenizer.  For example '->new' becomes two tokens, '->' and
 2934                 # 'new' with a possible blank between.
 2935                 #
 2936                 # Note: there is a related patch in sub set_whitespace_flags
 2937                 if ( $token =~ /^\-\>(.*)$/ && $1 ) {
 2938                     my $token_save = $1;
 2939                     my $type_save  = $type;
 2940 
 2941                     # store a blank to left of arrow if necessary
 2942                     my $Kprev = $self->K_previous_nonblank($KK);
 2943                     if (   defined($Kprev)
 2944                         && $rLL->[$Kprev]->[_TYPE_] ne 'b'
 2945                         && $rOpts_add_whitespace
 2946                         && $want_left_space{'->'} == WS_YES )
 2947                     {
 2948                         my $rcopy =
 2949                           copy_token_as_type( $rtoken_vars, 'b', ' ' );
 2950                         $store_token->($rcopy);
 2951                     }
 2952 
 2953                     # then store the arrow
 2954                     my $rcopy = copy_token_as_type( $rtoken_vars, '->', '->' );
 2955                     $store_token->($rcopy);
 2956 
 2957                     # then reset the current token to be the remainder,
 2958                     # and reset the whitespace flag according to the arrow
 2959                     $token = $rtoken_vars->[_TOKEN_] = $token_save;
 2960                     $type  = $rtoken_vars->[_TYPE_]  = $type_save;
 2961                     $store_token->($rtoken_vars);
 2962                     next;
 2963                 }
 2964 
 2965                 if ( $token =~ /$SUB_PATTERN/ ) {
 2966 
 2967                     # -spp = 0 : no space before opening prototype paren
 2968                     # -spp = 1 : stable (follow input spacing)
 2969                     # -spp = 2 : always space before opening prototype paren
 2970                     my $spp = $rOpts->{'space-prototype-paren'};
 2971                     if ( defined($spp) ) {
 2972                         if    ( $spp == 0 ) { $token =~ s/\s+\(/\(/; }
 2973                         elsif ( $spp == 2 ) { $token =~ s/\(/ (/; }
 2974                     }
 2975 
 2976                     # one space max, and no tabs
 2977                     $token =~ s/\s+/ /g;
 2978                     $rtoken_vars->[_TOKEN_] = $token;
 2979                 }
 2980 
 2981                 # trim identifiers of trailing blanks which can occur
 2982                 # under some unusual circumstances, such as if the
 2983                 # identifier 'witch' has trailing blanks on input here:
 2984                 #
 2985                 # sub
 2986                 # witch
 2987                 # ()   # prototype may be on new line ...
 2988                 # ...
 2989                 if ( $type eq 'i' ) {
 2990                     $token =~ s/\s+$//g;
 2991                     $rtoken_vars->[_TOKEN_] = $token;
 2992                 }
 2993             }
 2994 
 2995             # change 'LABEL   :'   to 'LABEL:'
 2996             elsif ( $type eq 'J' ) {
 2997                 $token =~ s/\s+//g;
 2998                 $rtoken_vars->[_TOKEN_] = $token;
 2999             }
 3000 
 3001             # patch to add space to something like "x10"
 3002             # This avoids having to split this token in the pre-tokenizer
 3003             elsif ( $type eq 'n' ) {
 3004                 if ( $token =~ /^x\d+/ ) {
 3005                     $token =~ s/x/x /;
 3006                     $rtoken_vars->[_TOKEN_] = $token;
 3007                 }
 3008             }
 3009 
 3010             # check a quote for problems
 3011             elsif ( $type eq 'Q' ) {
 3012                 $check_Q->( $KK, $Kfirst );
 3013             }
 3014 
 3015             # handle semicolons
 3016             elsif ( $type eq ';' ) {
 3017 
 3018                 # Remove unnecessary semicolons, but not after bare
 3019                 # blocks, where it could be unsafe if the brace is
 3020                 # mistokenized.
 3021                 if (
 3022                     $rOpts->{'delete-semicolons'}
 3023                     && (
 3024                         (
 3025                             $last_nonblank_type eq '}'
 3026                             && (
 3027                                 $is_block_without_semicolon{
 3028                                     $last_nonblank_block_type}
 3029                                 || $last_nonblank_block_type =~ /$SUB_PATTERN/
 3030                                 || $last_nonblank_block_type =~ /^\w+:$/ )
 3031                         )
 3032                         || $last_nonblank_type eq ';'
 3033                     )
 3034                   )
 3035                 {
 3036 
 3037                     # This looks like a deletable semicolon, but even if a
 3038                     # semicolon can be deleted it is necessarily best to do so.
 3039                     # We apply these additional rules for deletion:
 3040                     # - Always ok to delete a ';' at the end of a line
 3041                     # - Never delete a ';' before a '#' because it would
 3042                     #   promote it to a block comment.
 3043                     # - If a semicolon is not at the end of line, then only
 3044                     #   delete if it is followed by another semicolon or closing
 3045                     #   token.  This includes the comment rule.  It may take
 3046                     #   two passes to get to a final state, but it is a little
 3047                     #   safer.  For example, keep the first semicolon here:
 3048                     #      eval { sub bubba { ok(0) }; ok(0) } || ok(1);
 3049                     #   It is not required but adds some clarity.
 3050                     my $ok_to_delete = 1;
 3051                     if ( $KK < $Klast ) {
 3052                         my $Kn = $self->K_next_nonblank($KK);
 3053                         if ( defined($Kn) && $Kn <= $Klast ) {
 3054                             my $next_nonblank_token_type =
 3055                               $rLL->[$Kn]->[_TYPE_];
 3056                             $ok_to_delete = $next_nonblank_token_type eq ';'
 3057                               || $next_nonblank_token_type eq '}';
 3058                         }
 3059                     }
 3060 
 3061                     if ($ok_to_delete) {
 3062                         note_deleted_semicolon();
 3063                         next;
 3064                     }
 3065                     else {
 3066                         write_logfile_entry("Extra ';'\n");
 3067                     }
 3068                 }
 3069             }
 3070 
 3071             elsif ($type_sequence) {
 3072 
 3073                 #                if ( $is_opening_token{$token} ) {
 3074                 #                }
 3075 
 3076                 if ( $is_closing_token{$token} ) {
 3077 
 3078                     # Insert a tentative missing semicolon if the next token is
 3079                     # a closing block brace
 3080                     if (
 3081                            $type eq '}'
 3082                         && $token eq '}'
 3083 
 3084                         # not preceded by a ';'
 3085                         && $last_nonblank_type ne ';'
 3086 
 3087                    # and this is not a VERSION stmt (is all one line, we are not
 3088                    # inserting semicolons on one-line blocks)
 3089                         && $CODE_type ne 'VER'
 3090 
 3091                         # and we are allowed to add semicolons
 3092                         && $rOpts->{'add-semicolons'}
 3093                       )
 3094                     {
 3095                         $add_phantom_semicolon->($KK);
 3096                     }
 3097                 }
 3098             }
 3099 
 3100             # Store this token with possible previous blank
 3101             $store_token_and_space->(
 3102                 $rtoken_vars, $rwhitespace_flags->[$KK] == WS_YES
 3103             );
 3104 
 3105         }    # End token loop
 3106     }    # End line loop
 3107 
 3108     # Reset memory to be the new array
 3109     $self->{rLL} = $rLL_new;
 3110     $self->set_rLL_max_index();
 3111     $self->{K_opening_container}   = $K_opening_container;
 3112     $self->{K_closing_container}   = $K_closing_container;
 3113     $self->{K_opening_ternary}     = $K_opening_ternary;
 3114     $self->{K_closing_ternary}     = $K_closing_ternary;
 3115     $self->{rK_phantom_semicolons} = $rK_phantom_semicolons;
 3116 
 3117     # make sure the new array looks okay
 3118     $self->check_token_array();
 3119 
 3120     # reset the token limits of each line
 3121     $self->resync_lines_and_tokens();
 3122 
 3123     return;
 3124 }
 3125 
 3126 {    # scan_comments
 3127 
 3128     my $Last_line_had_side_comment;
 3129     my $In_format_skipping_section;
 3130     my $Saw_VERSION_in_this_file;
 3131 
 3132     sub scan_comments {
 3133         my $self   = shift;
 3134         my $rlines = $self->{rlines};
 3135 
 3136         $Last_line_had_side_comment = undef;
 3137         $In_format_skipping_section = undef;
 3138         $Saw_VERSION_in_this_file   = undef;
 3139 
 3140         # Loop over all lines
 3141         foreach my $line_of_tokens ( @{$rlines} ) {
 3142             my $line_type = $line_of_tokens->{_line_type};
 3143             next unless ( $line_type eq 'CODE' );
 3144             my $CODE_type = $self->get_CODE_type($line_of_tokens);
 3145             $line_of_tokens->{_code_type} = $CODE_type;
 3146         }
 3147         return;
 3148     }
 3149 
 3150     sub get_CODE_type {
 3151         my ( $self, $line_of_tokens ) = @_;
 3152 
 3153         # We are looking at a line of code and setting a flag to
 3154         # describe any special processing that it requires
 3155 
 3156         # Possible CODE_types are as follows.
 3157         # 'BL' = Blank Line
 3158         # 'VB' = Verbatim - line goes out verbatim
 3159         # 'IO' = Indent Only - line goes out unchanged except for indentation
 3160         # 'NIN' = No Internal Newlines - line does not get broken
 3161         # 'HSC'=Hanging Side Comment - fix this hanging side comment
 3162         # 'BC'=Block Comment - an ordinary full line comment
 3163         # 'SBC'=Static Block Comment - a block comment which does not get
 3164         #      indented
 3165         # 'SBCX'=Static Block Comment Without Leading Space
 3166         # 'DEL'=Delete this line
 3167         # 'VER'=VERSION statement
 3168         # '' or (undefined) - no restructions
 3169 
 3170         my $rLL    = $self->{rLL};
 3171         my $Klimit = $self->{Klimit};
 3172 
 3173         my $CODE_type            = $rOpts->{'indent-only'} ? 'IO' : "";
 3174         my $no_internal_newlines = 1 - $rOpts_add_newlines;
 3175         if ( !$CODE_type && $no_internal_newlines ) { $CODE_type = 'NIN' }
 3176 
 3177         # extract what we need for this line..
 3178 
 3179         # Global value for error messages:
 3180         $input_line_number = $line_of_tokens->{_line_number};
 3181 
 3182         my $rK_range = $line_of_tokens->{_rK_range};
 3183         my ( $Kfirst, $Klast ) = @{$rK_range};
 3184         my $jmax = -1;
 3185         if ( defined($Kfirst) ) { $jmax = $Klast - $Kfirst }
 3186         my $input_line         = $line_of_tokens->{_line_text};
 3187         my $in_continued_quote = my $starting_in_quote =
 3188           $line_of_tokens->{_starting_in_quote};
 3189         my $in_quote        = $line_of_tokens->{_ending_in_quote};
 3190         my $ending_in_quote = $in_quote;
 3191         my $guessed_indentation_level =
 3192           $line_of_tokens->{_guessed_indentation_level};
 3193 
 3194         my $is_static_block_comment = 0;
 3195 
 3196         # Handle a continued quote..
 3197         if ($in_continued_quote) {
 3198 
 3199             # A line which is entirely a quote or pattern must go out
 3200             # verbatim.  Note: the \n is contained in $input_line.
 3201             if ( $jmax <= 0 ) {
 3202                 if ( ( $input_line =~ "\t" ) ) {
 3203                     note_embedded_tab();
 3204                 }
 3205                 $Last_line_had_side_comment = 0;
 3206                 return 'VB';
 3207             }
 3208         }
 3209 
 3210         my $is_block_comment =
 3211           ( $jmax == 0 && $rLL->[$Kfirst]->[_TYPE_] eq '#' );
 3212 
 3213         # Write line verbatim if we are in a formatting skip section
 3214         if ($In_format_skipping_section) {
 3215             $Last_line_had_side_comment = 0;
 3216 
 3217             # Note: extra space appended to comment simplifies pattern matching
 3218             if ( $is_block_comment
 3219                 && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
 3220                 /$format_skipping_pattern_end/o )
 3221             {
 3222                 $In_format_skipping_section = 0;
 3223                 write_logfile_entry("Exiting formatting skip section\n");
 3224             }
 3225             return 'FS';
 3226         }
 3227 
 3228         # See if we are entering a formatting skip section
 3229         if (   $rOpts_format_skipping
 3230             && $is_block_comment
 3231             && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
 3232             /$format_skipping_pattern_begin/o )
 3233         {
 3234             $In_format_skipping_section = 1;
 3235             write_logfile_entry("Entering formatting skip section\n");
 3236             $Last_line_had_side_comment = 0;
 3237             return 'FS';
 3238         }
 3239 
 3240         # ignore trailing blank tokens (they will get deleted later)
 3241         if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
 3242             $jmax--;
 3243         }
 3244 
 3245         # Handle a blank line..
 3246         if ( $jmax < 0 ) {
 3247             $Last_line_had_side_comment = 0;
 3248             return 'BL';
 3249         }
 3250 
 3251         # see if this is a static block comment (starts with ## by default)
 3252         my $is_static_block_comment_without_leading_space = 0;
 3253         if (   $is_block_comment
 3254             && $rOpts->{'static-block-comments'}
 3255             && $input_line =~ /$static_block_comment_pattern/o )
 3256         {
 3257             $is_static_block_comment = 1;
 3258             $is_static_block_comment_without_leading_space =
 3259               substr( $input_line, 0, 1 ) eq '#';
 3260         }
 3261 
 3262         # Check for comments which are line directives
 3263         # Treat exactly as static block comments without leading space
 3264         # reference: perlsyn, near end, section Plain Old Comments (Not!)
 3265         # example: '# line 42 "new_filename.plx"'
 3266         if (
 3267                $is_block_comment
 3268             && $input_line =~ /^\#   \s*
 3269                                line \s+ (\d+)   \s*
 3270                                (?:\s("?)([^"]+)\2)? \s*
 3271                                $/x
 3272           )
 3273         {
 3274             $is_static_block_comment                       = 1;
 3275             $is_static_block_comment_without_leading_space = 1;
 3276         }
 3277 
 3278         # look for hanging side comment
 3279         if (
 3280                $is_block_comment
 3281             && $Last_line_had_side_comment  # last line had side comment
 3282             && $input_line =~ /^\s/         # there is some leading space
 3283             && !$is_static_block_comment    # do not make static comment hanging
 3284             && $rOpts->{'hanging-side-comments'}    # user is allowing
 3285                                                     # hanging side comments
 3286                                                     # like this
 3287           )
 3288         {
 3289             $Last_line_had_side_comment = 1;
 3290             return 'HSC';
 3291         }
 3292 
 3293         # remember if this line has a side comment
 3294         $Last_line_had_side_comment =
 3295           ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq '#' );
 3296 
 3297         # Handle a block (full-line) comment..
 3298         if ($is_block_comment) {
 3299 
 3300             if ( $rOpts->{'delete-block-comments'} ) { return 'DEL' }
 3301 
 3302             # TRIM COMMENTS -- This could be turned off as a option
 3303             $rLL->[$Kfirst]->[_TOKEN_] =~ s/\s*$//;    # trim right end
 3304 
 3305             if ($is_static_block_comment_without_leading_space) {
 3306                 return 'SBCX';
 3307             }
 3308             elsif ($is_static_block_comment) {
 3309                 return 'SBC';
 3310             }
 3311             else {
 3312                 return 'BC';
 3313             }
 3314         }
 3315 
 3316         #   Patch needed for MakeMaker.  Do not break a statement
 3317         #   in which $VERSION may be calculated.  See MakeMaker.pm;
 3318         #   this is based on the coding in it.
 3319         #   The first line of a file that matches this will be eval'd:
 3320         #       /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
 3321         #   Examples:
 3322         #     *VERSION = \'1.01';
 3323         #     ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
 3324         #   We will pass such a line straight through without breaking
 3325         #   it unless -npvl is used.
 3326 
 3327         #   Patch for problem reported in RT #81866, where files
 3328         #   had been flattened into a single line and couldn't be
 3329         #   tidied without -npvl.  There are two parts to this patch:
 3330         #   First, it is not done for a really long line (80 tokens for now).
 3331         #   Second, we will only allow up to one semicolon
 3332         #   before the VERSION.  We need to allow at least one semicolon
 3333         #   for statements like this:
 3334         #      require Exporter;  our $VERSION = $Exporter::VERSION;
 3335         #   where both statements must be on a single line for MakeMaker
 3336 
 3337         my $is_VERSION_statement = 0;
 3338         if (  !$Saw_VERSION_in_this_file
 3339             && $jmax < 80
 3340             && $input_line =~
 3341             /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
 3342         {
 3343             $Saw_VERSION_in_this_file = 1;
 3344             write_logfile_entry("passing VERSION line; -npvl deactivates\n");
 3345             $CODE_type = 'VER';
 3346         }
 3347         return $CODE_type;
 3348     }
 3349 }
 3350 
 3351 sub find_nested_pairs {
 3352     my $self = shift;
 3353 
 3354     my $rLL = $self->{rLL};
 3355     return unless ( defined($rLL) && @{$rLL} );
 3356 
 3357     # We define an array of pairs of nested containers
 3358     my @nested_pairs;
 3359 
 3360     # We also set the following hash values to identify container pairs for
 3361     # which the opening and closing tokens are adjacent in the token stream:
 3362     # $rpaired_to_inner_container->{$seqno_out}=$seqno_in where $seqno_out and
 3363     # $seqno_in are the seqence numbers of the outer and inner containers of
 3364     # the pair We need these later to decide if we can insert a missing
 3365     # semicolon
 3366     my $rpaired_to_inner_container = {};
 3367 
 3368     # This local hash remembers if an outer container has a close following
 3369     # inner container;
 3370     # The key is the outer sequence number
 3371     # The value is the token_hash of the inner container
 3372 
 3373     my %has_close_following_opening;
 3374 
 3375     # Names of calling routines can either be marked as 'i' or 'w',
 3376     # and they may invoke a sub call with an '->'. We will consider
 3377     # any consecutive string of such types as a single unit when making
 3378     # weld decisions.  We also allow a leading !
 3379     my $is_name_type = {
 3380         'i'  => 1,
 3381         'w'  => 1,
 3382         'U'  => 1,
 3383         '->' => 1,
 3384         '!'  => 1,
 3385     };
 3386 
 3387     my $is_name = sub {
 3388         my $type = shift;
 3389         return $type && $is_name_type->{$type};
 3390     };
 3391 
 3392     my $last_container;
 3393     my $last_last_container;
 3394     my $last_nonblank_token_vars;
 3395     my $last_count;
 3396 
 3397     my $nonblank_token_count = 0;
 3398 
 3399     # loop over all tokens
 3400     foreach my $rtoken_vars ( @{$rLL} ) {
 3401 
 3402         my $type = $rtoken_vars->[_TYPE_];
 3403 
 3404         next if ( $type eq 'b' );
 3405 
 3406         # long identifier-like items are counted as a single item
 3407         $nonblank_token_count++
 3408           unless ( $is_name->($type)
 3409             && $is_name->( $last_nonblank_token_vars->[_TYPE_] ) );
 3410 
 3411         my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
 3412         if ($type_sequence) {
 3413 
 3414             my $token = $rtoken_vars->[_TOKEN_];
 3415 
 3416             if ( $is_opening_token{$token} ) {
 3417 
 3418                 # following previous opening token ...
 3419                 if (   $last_container
 3420                     && $is_opening_token{ $last_container->[_TOKEN_] } )
 3421                 {
 3422 
 3423                     # adjacent to this one
 3424                     my $tok_diff = $nonblank_token_count - $last_count;
 3425 
 3426                     my $last_tok = $last_nonblank_token_vars->[_TOKEN_];
 3427 
 3428                     if (   $tok_diff == 1
 3429                         || $tok_diff == 2 && $last_container->[_TOKEN_] eq '(' )
 3430                     {
 3431 
 3432                         # remember this pair...
 3433                         my $outer_seqno = $last_container->[_TYPE_SEQUENCE_];
 3434                         my $inner_seqno = $type_sequence;
 3435                         $has_close_following_opening{$outer_seqno} =
 3436                           $rtoken_vars;
 3437                     }
 3438                 }
 3439             }
 3440 
 3441             elsif ( $is_closing_token{$token} ) {
 3442 
 3443                 # if the corresponding opening token had an adjacent opening
 3444                 if (   $has_close_following_opening{$type_sequence}
 3445                     && $is_closing_token{ $last_container->[_TOKEN_] }
 3446                     && $has_close_following_opening{$type_sequence}
 3447                     ->[_TYPE_SEQUENCE_] == $last_container->[_TYPE_SEQUENCE_] )
 3448                 {
 3449 
 3450                     # The closing weld tokens must be adjacent
 3451                     # NOTE: so intermediate commas and semicolons
 3452                     # can currently block a weld.  This is something
 3453                     # that could be fixed in the future by including
 3454                     # a flag to delete un-necessary commas and semicolons.
 3455                     my $tok_diff = $nonblank_token_count - $last_count;
 3456 
 3457                     if ( $tok_diff == 1 ) {
 3458 
 3459                         # This is a closely nested pair ..
 3460                         my $inner_seqno = $last_container->[_TYPE_SEQUENCE_];
 3461                         my $outer_seqno = $type_sequence;
 3462                         $rpaired_to_inner_container->{$outer_seqno} =
 3463                           $inner_seqno;
 3464 
 3465                         push @nested_pairs, [ $inner_seqno, $outer_seqno ];
 3466                     }
 3467                 }
 3468             }
 3469 
 3470             $last_last_container = $last_container;
 3471             $last_container      = $rtoken_vars;
 3472             $last_count          = $nonblank_token_count;
 3473         }
 3474         $last_nonblank_token_vars = $rtoken_vars;
 3475     }
 3476     $self->{rnested_pairs}              = \@nested_pairs;
 3477     $self->{rpaired_to_inner_container} = $rpaired_to_inner_container;
 3478     return;
 3479 }
 3480 
 3481 sub dump_tokens {
 3482 
 3483     # a debug routine, not normally used
 3484     my ( $self, $msg ) = @_;
 3485     my $rLL   = $self->{rLL};
 3486     my $nvars = @{$rLL};
 3487     print STDERR "$msg\n";
 3488     print STDERR "ntokens=$nvars\n";
 3489     print STDERR "K\t_TOKEN_\t_TYPE_\n";
 3490     my $K = 0;
 3491 
 3492     foreach my $item ( @{$rLL} ) {
 3493         print STDERR "$K\t$item->[_TOKEN_]\t$item->[_TYPE_]\n";
 3494         $K++;
 3495     }
 3496     return;
 3497 }
 3498 
 3499 sub get_old_line_index {
 3500     my ( $self, $K ) = @_;
 3501     my $rLL = $self->{rLL};
 3502     return 0 unless defined($K);
 3503     return $rLL->[$K]->[_LINE_INDEX_];
 3504 }
 3505 
 3506 sub get_old_line_count {
 3507     my ( $self, $Kbeg, $Kend ) = @_;
 3508     my $rLL = $self->{rLL};
 3509     return 0 unless defined($Kbeg);
 3510     return 0 unless defined($Kend);
 3511     return $rLL->[$Kend]->[_LINE_INDEX_] - $rLL->[$Kbeg]->[_LINE_INDEX_] + 1;
 3512 }
 3513 
 3514 sub K_next_code {
 3515     my ( $self, $KK, $rLL ) = @_;
 3516 
 3517     # return the index K of the next nonblank, non-comment token
 3518     return unless ( defined($KK) && $KK >= 0 );
 3519 
 3520     # use the standard array unless given otherwise
 3521     $rLL = $self->{rLL} unless ( defined($rLL) );
 3522     my $Num  = @{$rLL};
 3523     my $Knnb = $KK + 1;
 3524     while ( $Knnb < $Num ) {
 3525         if ( !defined( $rLL->[$Knnb] ) ) {
 3526             Fault("Undefined entry for k=$Knnb");
 3527         }
 3528         if (   $rLL->[$Knnb]->[_TYPE_] ne 'b'
 3529             && $rLL->[$Knnb]->[_TYPE_] ne '#' )
 3530         {
 3531             return $Knnb;
 3532         }
 3533         $Knnb++;
 3534     }
 3535     return;
 3536 }
 3537 
 3538 sub K_next_nonblank {
 3539     my ( $self, $KK, $rLL ) = @_;
 3540 
 3541     # return the index K of the next nonblank token
 3542     return unless ( defined($KK) && $KK >= 0 );
 3543 
 3544     # use the standard array unless given otherwise
 3545     $rLL = $self->{rLL} unless ( defined($rLL) );
 3546     my $Num  = @{$rLL};
 3547     my $Knnb = $KK + 1;
 3548     while ( $Knnb < $Num ) {
 3549         if ( !defined( $rLL->[$Knnb] ) ) {
 3550             Fault("Undefined entry for k=$Knnb");
 3551         }
 3552         if ( $rLL->[$Knnb]->[_TYPE_] ne 'b' ) { return $Knnb }
 3553         $Knnb++;
 3554     }
 3555     return;
 3556 }
 3557 
 3558 sub K_previous_code {
 3559 
 3560     # return the index K of the previous nonblank, non-comment token
 3561     # Call with $KK=undef to start search at the top of the array
 3562     my ( $self, $KK, $rLL ) = @_;
 3563 
 3564     # use the standard array unless given otherwise
 3565     $rLL = $self->{rLL} unless ( defined($rLL) );
 3566     my $Num = @{$rLL};
 3567     if ( !defined($KK) ) { $KK = $Num }
 3568     elsif ( $KK > $Num ) {
 3569 
 3570         # The caller should make the first call with KK_new=undef to
 3571         # avoid this error
 3572         Fault(
 3573 "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
 3574         );
 3575     }
 3576     my $Kpnb = $KK - 1;
 3577     while ( $Kpnb >= 0 ) {
 3578         if (   $rLL->[$Kpnb]->[_TYPE_] ne 'b'
 3579             && $rLL->[$Kpnb]->[_TYPE_] ne '#' )
 3580         {
 3581             return $Kpnb;
 3582         }
 3583         $Kpnb--;
 3584     }
 3585     return;
 3586 }
 3587 
 3588 sub K_previous_nonblank {
 3589 
 3590     # return index of previous nonblank token before item K;
 3591     # Call with $KK=undef to start search at the top of the array
 3592     my ( $self, $KK, $rLL ) = @_;
 3593 
 3594     # use the standard array unless given otherwise
 3595     $rLL = $self->{rLL} unless ( defined($rLL) );
 3596     my $Num = @{$rLL};
 3597     if ( !defined($KK) ) { $KK = $Num }
 3598     elsif ( $KK > $Num ) {
 3599 
 3600         # The caller should make the first call with KK_new=undef to
 3601         # avoid this error
 3602         Fault(
 3603 "Program Bug: K_previous_nonblank_new called with K=$KK which exceeds $Num"
 3604         );
 3605     }
 3606     my $Kpnb = $KK - 1;
 3607     while ( $Kpnb >= 0 ) {
 3608         if ( $rLL->[$Kpnb]->[_TYPE_] ne 'b' ) { return $Kpnb }
 3609         $Kpnb--;
 3610     }
 3611     return;
 3612 }
 3613 
 3614 sub map_containers {
 3615 
 3616     # Maps the container hierarchy
 3617     my $self = shift;
 3618     my $rLL  = $self->{rLL};
 3619     return unless ( defined($rLL) && @{$rLL} );
 3620 
 3621     my $K_opening_container = $self->{K_opening_container};
 3622     my $K_closing_container = $self->{K_closing_container};
 3623     my $rcontainer_map      = $self->{rcontainer_map};
 3624 
 3625     # loop over containers
 3626     my @stack;    # stack of container sequence numbers
 3627     my $KNEXT = 0;
 3628     while ( defined($KNEXT) ) {
 3629         my $KK = $KNEXT;
 3630         $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
 3631         my $rtoken_vars   = $rLL->[$KK];
 3632         my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
 3633         if ( !$type_sequence ) {
 3634             next if ( $KK == 0 );    # first token in file may not be container
 3635             Fault("sequence = $type_sequence not defined at K=$KK");
 3636         }
 3637 
 3638         my $token = $rtoken_vars->[_TOKEN_];
 3639         if ( $is_opening_token{$token} ) {
 3640             if (@stack) {
 3641                 $rcontainer_map->{$type_sequence} = $stack[-1];
 3642             }
 3643             push @stack, $type_sequence;
 3644         }
 3645         if ( $is_closing_token{$token} ) {
 3646             if (@stack) {
 3647                 my $seqno = pop @stack;
 3648                 if ( $seqno != $type_sequence ) {
 3649 
 3650                     # shouldn't happen unless file is garbage
 3651                 }
 3652             }
 3653         }
 3654     }
 3655 
 3656     # the stack should be empty for a good file
 3657     if (@stack) {
 3658 
 3659         # unbalanced containers; file probably bad
 3660     }
 3661     else {
 3662         # ok
 3663     }
 3664     return;
 3665 }
 3666 
 3667 sub mark_short_nested_blocks {
 3668 
 3669     # This routine looks at the entire file and marks any short nested blocks
 3670     # which should not be broken.  The results are stored in the hash
 3671     #     $rshort_nested->{$type_sequence}
 3672     # which will be true if the container should remain intact.
 3673     #
 3674     # For example, consider the following line:
 3675 
 3676     #   sub cxt_two { sort { $a <=> $b } test_if_list() }
 3677 
 3678     # The 'sort' block is short and nested within an outer sub block.
 3679     # Normally, the existance of the 'sort' block will force the sub block to
 3680     # break open, but this is not always desirable. Here we will set a flag for
 3681     # the sort block to prevent this.  To give the user control, we will
 3682     # follow the input file formatting.  If either of the blocks is broken in
 3683     # the input file then we will allow it to remain broken. Otherwise we will
 3684     # set a flag to keep it together in later formatting steps.
 3685 
 3686     # The flag which is set here will be checked in two places:
 3687     # 'sub print_line_of_tokens' and 'sub starting_one_line_block'
 3688 
 3689     my $self = shift;
 3690     my $rLL  = $self->{rLL};
 3691     return unless ( defined($rLL) && @{$rLL} );
 3692 
 3693     return unless ( $rOpts->{'one-line-block-nesting'} );
 3694 
 3695     my $K_opening_container = $self->{K_opening_container};
 3696     my $K_closing_container = $self->{K_closing_container};
 3697     my $rbreak_container    = $self->{rbreak_container};
 3698     my $rshort_nested       = $self->{rshort_nested};
 3699     my $rcontainer_map      = $self->{rcontainer_map};
 3700     my $rlines              = $self->{rlines};
 3701 
 3702     # Variables needed for estimating line lengths
 3703     my $starting_indent;
 3704     my $starting_lentot;
 3705     my $length_tol = 1;
 3706 
 3707     my $excess_length_to_K = sub {
 3708         my ($K) = @_;
 3709 
 3710         # Estimate the length from the line start to a given token
 3711         my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
 3712         my $excess_length =
 3713           $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
 3714         return ($excess_length);
 3715     };
 3716 
 3717     my $is_broken_block = sub {
 3718 
 3719         # a block is broken if the input line numbers of the braces differ
 3720         my ($seqno) = @_;
 3721         my $K_opening = $K_opening_container->{$seqno};
 3722         return unless ( defined($K_opening) );
 3723         my $K_closing = $K_closing_container->{$seqno};
 3724         return unless ( defined($K_closing) );
 3725         return $rbreak_container->{$seqno}
 3726           || $rLL->[$K_closing]->[_LINE_INDEX_] !=
 3727           $rLL->[$K_opening]->[_LINE_INDEX_];
 3728     };
 3729 
 3730     # loop over all containers
 3731     my @open_block_stack;
 3732     my $iline = -1;
 3733     my $KNEXT = 0;
 3734     while ( defined($KNEXT) ) {
 3735         my $KK = $KNEXT;
 3736         $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
 3737         my $rtoken_vars   = $rLL->[$KK];
 3738         my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
 3739         if ( !$type_sequence ) {
 3740             next if ( $KK == 0 );    # first token in file may not be container
 3741 
 3742             # an error here is most likely due to a recent programming change
 3743             Fault("sequence = $type_sequence not defined at K=$KK");
 3744         }
 3745 
 3746         # We are just looking at code blocks
 3747         my $token = $rtoken_vars->[_TOKEN_];
 3748         my $type  = $rtoken_vars->[_TYPE_];
 3749         next unless ( $type eq $token );
 3750         my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
 3751         next unless ($block_type);
 3752 
 3753         # Keep a stack of all acceptable block braces seen.
 3754         # Only consider blocks entirely on one line so dump the stack when line
 3755         # changes.
 3756         my $iline_last = $iline;
 3757         $iline = $rLL->[$KK]->[_LINE_INDEX_];
 3758         if ( $iline != $iline_last ) { @open_block_stack = () }
 3759 
 3760         if ( $token eq '}' ) {
 3761             if (@open_block_stack) { pop @open_block_stack }
 3762         }
 3763         next unless ( $token eq '{' );
 3764 
 3765         # block must be balanced (bad scripts may be unbalanced)
 3766         my $K_opening = $K_opening_container->{$type_sequence};
 3767         my $K_closing = $K_closing_container->{$type_sequence};
 3768         next unless ( defined($K_opening) && defined($K_closing) );
 3769 
 3770         # require that this block be entirely on one line
 3771         next if ( $is_broken_block->($type_sequence) );
 3772 
 3773         # See if this block fits on one line of allowed length (which may
 3774         # be different from the input script)
 3775         $starting_lentot =
 3776           $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
 3777         $starting_indent = 0;
 3778         if ( !$rOpts_variable_maximum_line_length ) {
 3779             my $level = $rLL->[$KK]->[_LEVEL_];
 3780             $starting_indent = $rOpts_indent_columns * $level;
 3781         }
 3782 
 3783         # Dump the stack if block is too long and skip this block
 3784         if ( $excess_length_to_K->($K_closing) > 0 ) {
 3785             @open_block_stack = ();
 3786             next;
 3787         }
 3788 
 3789         # OK, Block passes tests, remember it
 3790         push @open_block_stack, $type_sequence;
 3791 
 3792         # We are only marking nested code blocks,
 3793         # so check for a previous block on the stack
 3794         next unless ( @open_block_stack > 1 );
 3795 
 3796         # Looks OK, mark this as a short nested block
 3797         $rshort_nested->{$type_sequence} = 1;
 3798 
 3799     }
 3800     return;
 3801 }
 3802 
 3803 sub weld_containers {
 3804 
 3805     # do any welding operations
 3806     my $self = shift;
 3807 
 3808   # initialize weld length hashes needed later for checking line lengths
 3809   # TODO: These should eventually be stored in $self rather than be package vars
 3810     %weld_len_left_closing  = ();
 3811     %weld_len_right_closing = ();
 3812     %weld_len_left_opening  = ();
 3813     %weld_len_right_opening = ();
 3814 
 3815     return if ( $rOpts->{'indent-only'} );
 3816     return unless ($rOpts_add_newlines);
 3817 
 3818     if ( $rOpts->{'weld-nested-containers'} ) {
 3819 
 3820         # if called, weld_nested_containers must be called before other weld
 3821         # operations.  # This is because weld_nested_containers could overwrite
 3822         # hash values written by weld_cuddled_blocks and weld_nested_quotes.
 3823         $self->weld_nested_containers();
 3824 
 3825         $self->weld_nested_quotes();
 3826     }
 3827 
 3828     # Note that weld_nested_containers() changes the _LEVEL_ values, so
 3829     # weld_cuddled_blocks must use the _TRUE_LEVEL_ values instead.
 3830 
 3831     # Here is a good test case to  Be sure that both cuddling and welding
 3832     # are working and not interfering with each other: <<snippets/ce_wn1.in>>
 3833 
 3834     #   perltidy -wn -ce
 3835 
 3836    # if ($BOLD_MATH) { (
 3837    #     $labels, $comment,
 3838    #     join( '', '<B>', &make_math( $mode, '', '', $_ ), '</B>' )
 3839    # ) } else { (
 3840    #     &process_math_in_latex( $mode, $math_style, $slevel, "\\mbox{$text}" ),
 3841    #     $after
 3842    # ) }
 3843 
 3844     $self->weld_cuddled_blocks();
 3845 
 3846     return;
 3847 }
 3848 
 3849 sub cumulative_length_before_K {
 3850     my ( $self, $KK ) = @_;
 3851     my $rLL = $self->{rLL};
 3852     return ( $KK <= 0 ) ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
 3853 }
 3854 
 3855 sub cumulative_length_after_K {
 3856     my ( $self, $KK ) = @_;
 3857     my $rLL = $self->{rLL};
 3858     return $rLL->[$KK]->[_CUMULATIVE_LENGTH_];
 3859 }
 3860 
 3861 sub weld_cuddled_blocks {
 3862     my $self = shift;
 3863 
 3864     # This routine implements the -cb flag by finding the appropriate
 3865     # closing and opening block braces and welding them together.
 3866     return unless ( %{$rcuddled_block_types} );
 3867 
 3868     my $rLL = $self->{rLL};
 3869     return unless ( defined($rLL) && @{$rLL} );
 3870     my $rbreak_container = $self->{rbreak_container};
 3871 
 3872     my $K_opening_container = $self->{K_opening_container};
 3873     my $K_closing_container = $self->{K_closing_container};
 3874 
 3875     my $length_to_opening_seqno = sub {
 3876         my ($seqno) = @_;
 3877         my $KK      = $K_opening_container->{$seqno};
 3878         my $lentot  = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
 3879         return $lentot;
 3880     };
 3881     my $length_to_closing_seqno = sub {
 3882         my ($seqno) = @_;
 3883         my $KK      = $K_closing_container->{$seqno};
 3884         my $lentot  = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
 3885         return $lentot;
 3886     };
 3887 
 3888     my $is_broken_block = sub {
 3889 
 3890         # a block is broken if the input line numbers of the braces differ
 3891         # we can only cuddle between broken blocks
 3892         my ($seqno) = @_;
 3893         my $K_opening = $K_opening_container->{$seqno};
 3894         return unless ( defined($K_opening) );
 3895         my $K_closing = $K_closing_container->{$seqno};
 3896         return unless ( defined($K_closing) );
 3897         return $rbreak_container->{$seqno}
 3898           || $rLL->[$K_closing]->[_LINE_INDEX_] !=
 3899           $rLL->[$K_opening]->[_LINE_INDEX_];
 3900     };
 3901 
 3902     # A stack to remember open chains at all levels:
 3903     # $in_chain[$level] = [$chain_type, $type_sequence];
 3904     my @in_chain;
 3905     my $CBO = $rOpts->{'cuddled-break-option'};
 3906 
 3907     # loop over structure items to find cuddled pairs
 3908     my $level = 0;
 3909     my $KNEXT = 0;
 3910     while ( defined($KNEXT) ) {
 3911         my $KK = $KNEXT;
 3912         $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
 3913         my $rtoken_vars   = $rLL->[$KK];
 3914         my $type_sequence = $rtoken_vars->[_TYPE_SEQUENCE_];
 3915         if ( !$type_sequence ) {
 3916             next if ( $KK == 0 );    # first token in file may not be container
 3917             Fault("sequence = $type_sequence not defined at K=$KK");
 3918         }
 3919 
 3920         # We use the original levels because they get changed by sub
 3921         # 'weld_nested_containers'. So if this were to be called before that
 3922         # routine, the levels would be wrong and things would go bad.
 3923         my $last_level = $level;
 3924         $level = $rtoken_vars->[_LEVEL_TRUE_];
 3925 
 3926         if    ( $level < $last_level ) { $in_chain[$last_level] = undef }
 3927         elsif ( $level > $last_level ) { $in_chain[$level]      = undef }
 3928 
 3929         # We are only looking at code blocks
 3930         my $token = $rtoken_vars->[_TOKEN_];
 3931         my $type  = $rtoken_vars->[_TYPE_];
 3932         next unless ( $type eq $token );
 3933 
 3934         if ( $token eq '{' ) {
 3935 
 3936             my $block_type = $rtoken_vars->[_BLOCK_TYPE_];
 3937             if ( !$block_type ) {
 3938 
 3939                 # patch for unrecognized block types which may not be labeled
 3940                 my $Kp = $self->K_previous_nonblank($KK);
 3941                 while ( $Kp && $rLL->[$Kp]->[_TYPE_] eq '#' ) {
 3942                     $Kp = $self->K_previous_nonblank($Kp);
 3943                 }
 3944                 next unless $Kp;
 3945                 $block_type = $rLL->[$Kp]->[_TOKEN_];
 3946             }
 3947             if ( $in_chain[$level] ) {
 3948 
 3949                 # we are in a chain and are at an opening block brace.
 3950                 # See if we are welding this opening brace with the previous
 3951                 # block brace.  Get their identification numbers:
 3952                 my $closing_seqno = $in_chain[$level]->[1];
 3953                 my $opening_seqno = $type_sequence;
 3954 
 3955                 # The preceding block must be on multiple lines so that its
 3956                 # closing brace will start a new line.
 3957                 if ( !$is_broken_block->($closing_seqno) ) {
 3958                     next unless ( $CBO == 2 );
 3959                     $rbreak_container->{$closing_seqno} = 1;
 3960                 }
 3961 
 3962                 # we will let the trailing block be either broken or intact
 3963                 ## && $is_broken_block->($opening_seqno);
 3964 
 3965                 # We can weld the closing brace to its following word ..
 3966                 my $Ko  = $K_closing_container->{$closing_seqno};
 3967                 my $Kon = $self->K_next_nonblank($Ko);
 3968 
 3969                 # ..unless it is a comment
 3970                 if ( $rLL->[$Kon]->[_TYPE_] ne '#' ) {
 3971                     my $dlen =
 3972                       $rLL->[$Kon]->[_CUMULATIVE_LENGTH_] -
 3973                       $rLL->[ $Ko - 1 ]->[_CUMULATIVE_LENGTH_];
 3974                     $weld_len_right_closing{$closing_seqno} = $dlen;
 3975 
 3976                     # Set flag that we want to break the next container
 3977                     # so that the cuddled line is balanced.
 3978                     $rbreak_container->{$opening_seqno} = 1
 3979                       if ($CBO);
 3980                 }
 3981 
 3982             }
 3983             else {
 3984 
 3985                 # We are not in a chain. Start a new chain if we see the
 3986                 # starting block type.
 3987                 if ( $rcuddled_block_types->{$block_type} ) {
 3988                     $in_chain[$level] = [ $block_type, $type_sequence ];
 3989                 }
 3990                 else {
 3991                     $block_type = '*';
 3992                     $in_chain[$level] = [ $block_type, $type_sequence ];
 3993                 }
 3994             }
 3995         }
 3996         elsif ( $token eq '}' ) {
 3997             if ( $in_chain[$level] ) {
 3998 
 3999                 # We are in a chain at a closing brace.  See if this chain
 4000                 # continues..
 4001                 my $Knn = $self->K_next_code($KK);
 4002                 next unless $Knn;
 4003 
 4004                 my $chain_type          = $in_chain[$level]->[0];
 4005                 my $next_nonblank_token = $rLL->[$Knn]->[_TOKEN_];
 4006                 if (
 4007                     $rcuddled_block_types->{$chain_type}->{$next_nonblank_token}
 4008                   )
 4009                 {
 4010 
 4011                     # Note that we do not weld yet because we must wait until
 4012                     # we we are sure that an opening brace for this follows.
 4013                     $in_chain[$level]->[1] = $type_sequence;
 4014                 }
 4015                 else { $in_chain[$level] = undef }
 4016             }
 4017         }
 4018     }
 4019 
 4020     return;
 4021 }
 4022 
 4023 sub weld_nested_containers {
 4024     my $self = shift;
 4025 
 4026     # This routine implements the -wn flag by "welding together"
 4027     # the nested closing and opening tokens which were previously
 4028     # identified by sub 'find_nested_pairs'.  "welding" simply
 4029     # involves setting certain hash values which will be checked
 4030     # later during formatting.
 4031 
 4032     my $rLL                 = $self->{rLL};
 4033     my $Klimit              = $self->get_rLL_max_index();
 4034     my $rnested_pairs       = $self->{rnested_pairs};
 4035     my $rlines              = $self->{rlines};
 4036     my $K_opening_container = $self->{K_opening_container};
 4037     my $K_closing_container = $self->{K_closing_container};
 4038 
 4039     # Return unless there are nested pairs to weld
 4040     return unless defined($rnested_pairs) && @{$rnested_pairs};
 4041 
 4042     # This array will hold the sequence numbers of the tokens to be welded.
 4043     my @welds;
 4044 
 4045     # Variables needed for estimating line lengths
 4046     my $starting_indent;
 4047     my $starting_lentot;
 4048 
 4049     # A tolerance to the length for length estimates.  In some rare cases
 4050     # this can avoid problems where a final weld slightly exceeds the
 4051     # line length and gets broken in a bad spot.
 4052     my $length_tol = 1;
 4053 
 4054     my $excess_length_to_K = sub {
 4055         my ($K) = @_;
 4056 
 4057         # Estimate the length from the line start to a given token
 4058         my $length = $self->cumulative_length_before_K($K) - $starting_lentot;
 4059         my $excess_length =
 4060           $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
 4061         return ($excess_length);
 4062     };
 4063 
 4064     my $length_to_opening_seqno = sub {
 4065         my ($seqno) = @_;
 4066         my $KK      = $K_opening_container->{$seqno};
 4067         my $lentot  = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
 4068         return $lentot;
 4069     };
 4070 
 4071     my $length_to_closing_seqno = sub {
 4072         my ($seqno) = @_;
 4073         my $KK      = $K_closing_container->{$seqno};
 4074         my $lentot  = $KK <= 0 ? 0 : $rLL->[ $KK - 1 ]->[_CUMULATIVE_LENGTH_];
 4075         return $lentot;
 4076     };
 4077 
 4078     # Abbreviations:
 4079     #  _oo=outer opening, i.e. first of  { {
 4080     #  _io=inner opening, i.e. second of { {
 4081     #  _oc=outer closing, i.e. second of } {
 4082     #  _ic=inner closing, i.e. first of  } }
 4083 
 4084     my $previous_pair;
 4085 
 4086     # We are working from outermost to innermost pairs so that
 4087     # level changes will be complete when we arrive at the inner pairs.
 4088 
 4089     while ( my $item = pop( @{$rnested_pairs} ) ) {
 4090         my ( $inner_seqno, $outer_seqno ) = @{$item};
 4091 
 4092         my $Kouter_opening = $K_opening_container->{$outer_seqno};
 4093         my $Kinner_opening = $K_opening_container->{$inner_seqno};
 4094         my $Kouter_closing = $K_closing_container->{$outer_seqno};
 4095         my $Kinner_closing = $K_closing_container->{$inner_seqno};
 4096 
 4097         my $outer_opening = $rLL->[$Kouter_opening];
 4098         my $inner_opening = $rLL->[$Kinner_opening];
 4099         my $outer_closing = $rLL->[$Kouter_closing];
 4100         my $inner_closing = $rLL->[$Kinner_closing];
 4101 
 4102         my $iline_oo = $outer_opening->[_LINE_INDEX_];
 4103         my $iline_io = $inner_opening->[_LINE_INDEX_];
 4104 
 4105         # Set flag saying if this pair starts a new weld
 4106         my $starting_new_weld = !( @welds && $outer_seqno == $welds[-1]->[0] );
 4107 
 4108         # Set flag saying if this pair is adjacent to the previous nesting pair
 4109         # (even if previous pair was rejected as a weld)
 4110         my $touch_previous_pair =
 4111           defined($previous_pair) && $outer_seqno == $previous_pair->[0];
 4112         $previous_pair = $item;
 4113 
 4114         # Set a flag if we should not weld. It sometimes looks best not to weld
 4115         # when the opening and closing tokens are very close.  However, there
 4116         # is a danger that we will create a "blinker", which oscillates between
 4117         # two semi-stable states, if we do not weld.  So the rules for
 4118         # not welding have to be carefully defined and tested.
 4119         my $do_not_weld;
 4120         if ( !$touch_previous_pair ) {
 4121 
 4122             # If this pair is not adjacent to the previous pair (skipped or
 4123             # not), then measure lengths from the start of line of oo
 4124 
 4125             my $rK_range = $rlines->[$iline_oo]->{_rK_range};
 4126             my ( $Kfirst, $Klast ) = @{$rK_range};
 4127             $starting_lentot =
 4128               $Kfirst <= 0 ? 0 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
 4129             $starting_indent = 0;
 4130             if ( !$rOpts_variable_maximum_line_length ) {
 4131                 my $level = $rLL->[$Kfirst]->[_LEVEL_];
 4132                 $starting_indent = $rOpts_indent_columns * $level;
 4133             }
 4134 
 4135             # DO-NOT-WELD RULE 1:
 4136             # Do not weld something that looks like the start of a two-line
 4137             # function call, like this: <<snippets/wn6.in>>
 4138             #    $trans->add_transformation(
 4139             #        PDL::Graphics::TriD::Scale->new( $sx, $sy, $sz ) );
 4140             # We will look for a semicolon after the closing paren.
 4141 
 4142             # We want to weld something complex, like this though
 4143             # my $compass = uc( opposite_direction( line_to_canvas_direction(
 4144             #     @{ $coords[0] }, @{ $coords[1] } ) ) );
 4145             # Otherwise we will get a 'blinker'
 4146 
 4147             my $iline_oc = $outer_closing->[_LINE_INDEX_];
 4148             if ( $iline_oc <= $iline_oo + 1 ) {
 4149 
 4150                 # Look for following semicolon...
 4151                 my $Knext_nonblank = $self->K_next_nonblank($Kouter_closing);
 4152                 my $next_nonblank_type =
 4153                   defined($Knext_nonblank)
 4154                   ? $rLL->[$Knext_nonblank]->[_TYPE_]
 4155                   : 'b';
 4156                 if ( $next_nonblank_type eq ';' ) {
 4157 
 4158                     # Then do not weld if no other containers between inner
 4159                     # opening and closing.
 4160                     my $Knext_seq_item = $inner_opening->[_KNEXT_SEQ_ITEM_];
 4161                     if ( $Knext_seq_item == $Kinner_closing ) {
 4162                         $do_not_weld ||= 1;
 4163                     }
 4164                 }
 4165             }
 4166         }
 4167 
 4168         my $iline_ic = $inner_closing->[_LINE_INDEX_];
 4169 
 4170         # DO-NOT-WELD RULE 2:
 4171         # Do not weld an opening paren to an inner one line brace block
 4172         # We will just use old line numbers for this test and require
 4173         # iterations if necessary for convergence
 4174 
 4175         # For example, otherwise we could cause the opening paren
 4176         # in the following example to separate from the caller name
 4177         # as here:
 4178 
 4179         #    $_[0]->code_handler
 4180         #       ( sub { $more .= $_[1] . ":" . $_[0] . "\n" } );
 4181 
 4182         # Here is another example where we do not want to weld:
 4183         #  $wrapped->add_around_modifier(
 4184         #    sub { push @tracelog => 'around 1'; $_[0]->(); } );
 4185 
 4186         # If the one line sub block gets broken due to length or by the
 4187         # user, then we can weld.  The result will then be:
 4188         # $wrapped->add_around_modifier( sub {
 4189         #    push @tracelog => 'around 1';
 4190         #    $_[0]->();
 4191         # } );
 4192 
 4193         if ( $iline_ic == $iline_io ) {
 4194 
 4195             my $token_oo      = $outer_opening->[_TOKEN_];
 4196             my $block_type_io = $inner_opening->[_BLOCK_TYPE_];
 4197             my $token_io      = $inner_opening->[_TOKEN_];
 4198             $do_not_weld ||= $token_oo eq '(' && $token_io eq '{';
 4199         }
 4200 
 4201         # DO-NOT-WELD RULE 3:
 4202         # Do not weld if this makes our line too long
 4203         $do_not_weld ||= $excess_length_to_K->($Kinner_opening) > 0;
 4204 
 4205         # DO-NOT-WELD RULE 4; implemented for git#10:
 4206         # Do not weld an opening -ce brace if the next container is on a single
 4207         # line, different from the opening brace. (This is very rare).  For
 4208         # example, given the following with -ce, we will avoid joining the {
 4209         # and [
 4210 
 4211         #  } else {
 4212         #      [ $_, length($_) ]
 4213         #  }
 4214 
 4215         # because this would produce a terminal one-line block:
 4216 
 4217         #  } else { [ $_, length($_) ]  }
 4218 
 4219         # which may not be what is desired. But given this input:
 4220 
 4221         #  } else { [ $_, length($_) ]  }
 4222 
 4223         # then we will do the weld and retain the one-line block
 4224         if ( $rOpts->{'cuddled-else'} ) {
 4225             my $block_type = $rLL->[$Kouter_opening]->[_BLOCK_TYPE_];
 4226             if ( $block_type && $rcuddled_block_types->{'*'}->{$block_type} ) {
 4227                 my $io_line = $inner_opening->[_LINE_INDEX_];
 4228                 my $ic_line = $inner_closing->[_LINE_INDEX_];
 4229                 my $oo_line = $outer_opening->[_LINE_INDEX_];
 4230                 $do_not_weld ||=
 4231                   ( $oo_line < $io_line && $ic_line == $io_line );
 4232             }
 4233         }
 4234 
 4235         if ($do_not_weld) {
 4236 
 4237             # After neglecting a pair, we start measuring from start of point io
 4238             $starting_lentot =
 4239               $self->cumulative_length_before_K($Kinner_opening);
 4240             $starting_indent = 0;
 4241             if ( !$rOpts_variable_maximum_line_length ) {
 4242                 my $level = $inner_opening->[_LEVEL_];
 4243                 $starting_indent = $rOpts_indent_columns * $level;
 4244             }
 4245 
 4246             # Normally, a broken pair should not decrease indentation of
 4247             # intermediate tokens:
 4248             ##      if ( $last_pair_broken ) { next }
 4249             # However, for long strings of welded tokens, such as '{{{{{{...'
 4250             # we will allow broken pairs to also remove indentation.
 4251             # This will keep very long strings of opening and closing
 4252             # braces from marching off to the right.  We will do this if the
 4253             # number of tokens in a weld before the broken weld is 4 or more.
 4254             # This rule will mainly be needed for test scripts, since typical
 4255             # welds have fewer than about 4 welded tokens.
 4256             if ( !@welds || @{ $welds[-1] } < 4 ) { next }
 4257         }
 4258 
 4259         # otherwise start new weld ...
 4260         elsif ($starting_new_weld) {
 4261             push @welds, $item;
 4262         }
 4263 
 4264         # ... or extend current weld
 4265         else {
 4266             unshift @{ $welds[-1] }, $inner_seqno;
 4267         }
 4268 
 4269         # After welding, reduce the indentation level if all intermediate tokens
 4270         my $dlevel = $outer_opening->[_LEVEL_] - $inner_opening->[_LEVEL_];
 4271         if ( $dlevel != 0 ) {
 4272             my $Kstart = $Kinner_opening;
 4273             my $Kstop  = $Kinner_closing;
 4274             for ( my $KK = $Kstart ; $KK <= $Kstop ; $KK++ ) {
 4275                 $rLL->[$KK]->[_LEVEL_] += $dlevel;
 4276             }
 4277         }
 4278     }
 4279 
 4280     # Define weld lengths needed later to set line breaks
 4281     foreach my $item (@welds) {
 4282 
 4283         # sweep from inner to outer
 4284 
 4285         my $inner_seqno;
 4286         my $len_close = 0;
 4287         my $len_open  = 0;
 4288         foreach my $outer_seqno ( @{$item} ) {
 4289             if ($inner_seqno) {
 4290 
 4291                 my $dlen_opening =
 4292                   $length_to_opening_seqno->($inner_seqno) -
 4293                   $length_to_opening_seqno->($outer_seqno);
 4294 
 4295                 my $dlen_closing =
 4296                   $length_to_closing_seqno->($outer_seqno) -
 4297                   $length_to_closing_seqno->($inner_seqno);
 4298 
 4299                 $len_open  += $dlen_opening;
 4300                 $len_close += $dlen_closing;
 4301 
 4302             }
 4303 
 4304             $weld_len_left_closing{$outer_seqno}  = $len_close;
 4305             $weld_len_right_opening{$outer_seqno} = $len_open;
 4306 
 4307             $inner_seqno = $outer_seqno;
 4308         }
 4309 
 4310         # sweep from outer to inner
 4311         foreach my $seqno ( reverse @{$item} ) {
 4312             $weld_len_right_closing{$seqno} =
 4313               $len_close - $weld_len_left_closing{$seqno};
 4314             $weld_len_left_opening{$seqno} =
 4315               $len_open - $weld_len_right_opening{$seqno};
 4316         }
 4317     }
 4318 
 4319     #####################################
 4320     # DEBUG
 4321     #####################################
 4322     if (0) {
 4323         my $count = 0;
 4324         local $" = ')(';
 4325         foreach my $weld (@welds) {
 4326             print "\nWeld number $count has seq: (@{$weld})\n";
 4327             foreach my $seq ( @{$weld} ) {
 4328                 print <<EOM;
 4329     seq=$seq
 4330         left_opening=$weld_len_left_opening{$seq};
 4331         right_opening=$weld_len_right_opening{$seq};
 4332         left_closing=$weld_len_left_closing{$seq};
 4333         right_closing=$weld_len_right_closing{$seq};
 4334 EOM
 4335             }
 4336 
 4337             $count++;
 4338         }
 4339     }
 4340     return;
 4341 }
 4342 
 4343 sub weld_nested_quotes {
 4344     my $self = shift;
 4345 
 4346     my $rLL = $self->{rLL};
 4347     return unless ( defined($rLL) && @{$rLL} );
 4348 
 4349     my $K_opening_container = $self->{K_opening_container};
 4350     my $K_closing_container = $self->{K_closing_container};
 4351     my $rlines              = $self->{rlines};
 4352 
 4353     my $is_single_quote = sub {
 4354         my ( $Kbeg, $Kend, $quote_type ) = @_;
 4355         foreach my $K ( $Kbeg .. $Kend ) {
 4356             my $test_type = $rLL->[$K]->[_TYPE_];
 4357             next   if ( $test_type eq 'b' );
 4358             return if ( $test_type ne $quote_type );
 4359         }
 4360         return 1;
 4361     };
 4362 
 4363     my $excess_line_length = sub {
 4364         my ( $KK, $Ktest ) = @_;
 4365 
 4366         # what is the excess length if we add token $Ktest to the line with $KK?
 4367         my $iline    = $rLL->[$KK]->[_LINE_INDEX_];
 4368         my $rK_range = $rlines->[$iline]->{_rK_range};
 4369         my ( $Kfirst, $Klast ) = @{$rK_range};
 4370         my $starting_lentot =
 4371           $Kfirst <= 0 ? 0 : $rLL->[ $Kfirst - 1 ]->[_CUMULATIVE_LENGTH_];
 4372         my $starting_indent = 0;
 4373         my $length_tol      = 1;
 4374         if ( !$rOpts_variable_maximum_line_length ) {
 4375             my $level = $rLL->[$Kfirst]->[_LEVEL_];
 4376             $starting_indent = $rOpts_indent_columns * $level;
 4377         }
 4378 
 4379         my $length = $rLL->[$Ktest]->[_CUMULATIVE_LENGTH_] - $starting_lentot;
 4380         my $excess_length =
 4381           $starting_indent + $length + $length_tol - $rOpts_maximum_line_length;
 4382         return $excess_length;
 4383     };
 4384 
 4385     # look for single qw quotes nested in containers
 4386     my $KNEXT = 0;
 4387     while ( defined($KNEXT) ) {
 4388         my $KK = $KNEXT;
 4389         $KNEXT = $rLL->[$KNEXT]->[_KNEXT_SEQ_ITEM_];
 4390         my $rtoken_vars = $rLL->[$KK];
 4391         my $outer_seqno = $rtoken_vars->[_TYPE_SEQUENCE_];
 4392         if ( !$outer_seqno ) {
 4393             next if ( $KK == 0 );    # first token in file may not be container
 4394             Fault("sequence = $outer_seqno not defined at K=$KK");
 4395         }
 4396 
 4397         my $token = $rtoken_vars->[_TOKEN_];
 4398         if ( $is_opening_token{$token} ) {
 4399 
 4400             # see if the next token is a quote of some type
 4401             my $Kn = $self->K_next_nonblank($KK);
 4402             next unless $Kn;
 4403             my $next_token = $rLL->[$Kn]->[_TOKEN_];
 4404             my $next_type  = $rLL->[$Kn]->[_TYPE_];
 4405             next
 4406               unless ( ( $next_type eq 'q' || $next_type eq 'Q' )
 4407                 && $next_token =~ /^q/ );
 4408 
 4409             # The token before the closing container must also be a quote
 4410             my $K_closing = $K_closing_container->{$outer_seqno};
 4411             my $Kt_end    = $self->K_previous_nonblank($K_closing);
 4412             next unless $rLL->[$Kt_end]->[_TYPE_] eq $next_type;
 4413 
 4414             # Do not weld to single-line quotes. Nothing is gained, and it may
 4415             # look bad.
 4416             next if ( $Kt_end == $Kn );
 4417 
 4418             # Only weld to quotes delimited with container tokens. This is
 4419             # because welding to arbitrary quote delimiters can produce code
 4420             # which is less readable than without welding.
 4421             my $closing_delimiter = substr( $rLL->[$Kt_end]->[_TOKEN_], -1, 1 );
 4422             next
 4423               unless ( $is_closing_token{$closing_delimiter}
 4424                 || $closing_delimiter eq '>' );
 4425 
 4426             # Now make sure that there is just a single quote in the container
 4427             next
 4428               unless ( $is_single_quote->( $Kn + 1, $Kt_end - 1, $next_type ) );
 4429 
 4430             # If welded, the line must not exceed allowed line length
 4431             # Assume old line breaks for this estimate.
 4432             next if ( $excess_line_length->( $KK, $Kn ) > 0 );
 4433 
 4434             # OK to weld
 4435             # FIXME: Are these always correct?
 4436             $weld_len_left_closing{$outer_seqno}  = 1;
 4437             $weld_len_right_opening{$outer_seqno} = 2;
 4438 
 4439             # QW PATCH 1 (Testing)
 4440             # undo CI for welded quotes
 4441             foreach my $K ( $Kn .. $Kt_end ) {
 4442                 $rLL->[$K]->[_CI_LEVEL_] = 0;
 4443             }
 4444 
 4445             # Change the level of a closing qw token to be that of the outer
 4446             # containing token. This will allow -lp indentation to function
 4447             # correctly in the vertical aligner.
 4448             $rLL->[$Kt_end]->[_LEVEL_] = $rLL->[$K_closing]->[_LEVEL_];
 4449         }
 4450     }
 4451     return;
 4452 }
 4453 
 4454 sub weld_len_left {
 4455 
 4456     my ( $seqno, $type_or_tok ) = @_;
 4457 
 4458     # Given the sequence number of a token, and the token or its type,
 4459     # return the length of any weld to its left
 4460 
 4461     my $weld_len;
 4462     if ($seqno) {
 4463         if ( $is_closing_type{$type_or_tok} ) {
 4464             $weld_len = $weld_len_left_closing{$seqno};
 4465         }
 4466         elsif ( $is_opening_type{$type_or_tok} ) {
 4467             $weld_len = $weld_len_left_opening{$seqno};
 4468         }
 4469     }
 4470     if ( !defined($weld_len) ) { $weld_len = 0 }
 4471     return $weld_len;
 4472 }
 4473 
 4474 sub weld_len_right {
 4475 
 4476     my ( $seqno, $type_or_tok ) = @_;
 4477 
 4478     # Given the sequence number of a token, and the token or its type,
 4479     # return the length of any weld to its right
 4480 
 4481     my $weld_len;
 4482     if ($seqno) {
 4483         if ( $is_closing_type{$type_or_tok} ) {
 4484             $weld_len = $weld_len_right_closing{$seqno};
 4485         }
 4486         elsif ( $is_opening_type{$type_or_tok} ) {
 4487             $weld_len = $weld_len_right_opening{$seqno};
 4488         }
 4489     }
 4490     if ( !defined($weld_len) ) { $weld_len = 0 }
 4491     return $weld_len;
 4492 }
 4493 
 4494 sub weld_len_left_to_go {
 4495     my ($i) = @_;
 4496 
 4497     # Given the index of a token in the 'to_go' array
 4498     # return the length of any weld to its left
 4499     return if ( $i < 0 );
 4500     my $weld_len =
 4501       weld_len_left( $type_sequence_to_go[$i], $types_to_go[$i] );
 4502     return $weld_len;
 4503 }
 4504 
 4505 sub weld_len_right_to_go {
 4506     my ($i) = @_;
 4507 
 4508     # Given the index of a token in the 'to_go' array
 4509     # return the length of any weld to its right
 4510     return if ( $i < 0 );
 4511     if ( $i > 0 && $types_to_go[$i] eq 'b' ) { $i-- }
 4512     my $weld_len =
 4513       weld_len_right( $type_sequence_to_go[$i], $types_to_go[$i] );
 4514     return $weld_len;
 4515 }
 4516 
 4517 sub link_sequence_items {
 4518 
 4519     # This has been merged into 'respace_tokens' but retained for reference
 4520     my $self   = shift;
 4521     my $rlines = $self->{rlines};
 4522     my $rLL    = $self->{rLL};
 4523 
 4524     # We walk the token list and make links to the next sequence item.
 4525     # We also define these hashes to container tokens using sequence number as
 4526     # the key:
 4527     my $K_opening_container = {};    # opening [ { or (
 4528     my $K_closing_container = {};    # closing ] } or )
 4529     my $K_opening_ternary   = {};    # opening ? of ternary
 4530     my $K_closing_ternary   = {};    # closing : of ternary
 4531 
 4532     # sub to link preceding nodes forward to a new node type
 4533     my $link_back = sub {
 4534         my ( $Ktop, $key ) = @_;
 4535 
 4536         my $Kprev = $Ktop - 1;
 4537         while ( $Kprev >= 0
 4538             && !defined( $rLL->[$Kprev]->[$key] ) )
 4539         {
 4540             $rLL->[$Kprev]->[$key] = $Ktop;
 4541             $Kprev -= 1;
 4542         }
 4543     };
 4544 
 4545     for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
 4546 
 4547         $rLL->[$KK]->[_KNEXT_SEQ_ITEM_] = undef;
 4548 
 4549         my $type = $rLL->[$KK]->[_TYPE_];
 4550 
 4551         next if ( $type eq 'b' );
 4552 
 4553         my $type_sequence = $rLL->[$KK]->[_TYPE_SEQUENCE_];
 4554         if ($type_sequence) {
 4555 
 4556             $link_back->( $KK, _KNEXT_SEQ_ITEM_ );
 4557 
 4558             my $token = $rLL->[$KK]->[_TOKEN_];
 4559             if ( $is_opening_token{$token} ) {
 4560 
 4561                 $K_opening_container->{$type_sequence} = $KK;
 4562             }
 4563             elsif ( $is_closing_token{$token} ) {
 4564 
 4565                 $K_closing_container->{$type_sequence} = $KK;
 4566             }
 4567 
 4568             # These are not yet used but could be useful
 4569             else {
 4570                 if ( $token eq '?' ) {
 4571                     $K_opening_ternary->{$type_sequence} = $KK;
 4572                 }
 4573                 elsif ( $token eq ':' ) {
 4574                     $K_closing_ternary->{$type_sequence} = $KK;
 4575                 }
 4576                 else {
 4577                     Fault(<<EOM);
 4578 Unknown sequenced token type '$type'.  Expecting one of '{[(?:)]}'
 4579 EOM
 4580                 }
 4581             }
 4582         }
 4583     }
 4584 
 4585     $self->{K_opening_container} = $K_opening_container;
 4586     $self->{K_closing_container} = $K_closing_container;
 4587     $self->{K_opening_ternary}   = $K_opening_ternary;
 4588     $self->{K_closing_ternary}   = $K_closing_ternary;
 4589     return;
 4590 }
 4591 
 4592 sub sum_token_lengths {
 4593     my $self = shift;
 4594 
 4595     # This has been merged into 'respace_tokens' but retained for reference
 4596     my $rLL               = $self->{rLL};
 4597     my $cumulative_length = 0;
 4598     for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
 4599 
 4600         # now set the length of this token
 4601         my $token_length = length( $rLL->[$KK]->[_TOKEN_] );
 4602 
 4603         $cumulative_length += $token_length;
 4604 
 4605         # Save the length sum to just AFTER this token
 4606         $rLL->[$KK]->[_CUMULATIVE_LENGTH_] = $cumulative_length;
 4607 
 4608     }
 4609     return;
 4610 }
 4611 
 4612 sub resync_lines_and_tokens {
 4613 
 4614     my $self   = shift;
 4615     my $rLL    = $self->{rLL};
 4616     my $Klimit = $self->{Klimit};
 4617     my $rlines = $self->{rlines};
 4618 
 4619     # Re-construct the arrays of tokens associated with the original input lines
 4620     # since they have probably changed due to inserting and deleting blanks
 4621     # and a few other tokens.
 4622 
 4623     my $Kmax = -1;
 4624 
 4625     # This is the next token and its line index:
 4626     my $Knext = 0;
 4627     my $inext;
 4628     if ( defined($rLL) && @{$rLL} ) {
 4629         $Kmax  = @{$rLL} - 1;
 4630         $inext = $rLL->[$Knext]->[_LINE_INDEX_];
 4631     }
 4632 
 4633     my $get_inext = sub {
 4634         if ( $Knext < 0 || $Knext > $Kmax ) { $inext = undef }
 4635         else {
 4636             $inext = $rLL->[$Knext]->[_LINE_INDEX_];
 4637         }
 4638         return $inext;
 4639     };
 4640 
 4641     # Remember the most recently output token index
 4642     my $Klast_out;
 4643 
 4644     my $iline = -1;
 4645     foreach my $line_of_tokens ( @{$rlines} ) {
 4646         $iline++;
 4647         my $line_type = $line_of_tokens->{_line_type};
 4648         if ( $line_type eq 'CODE' ) {
 4649 
 4650             my @K_array;
 4651             my $rK_range;
 4652             $inext = $get_inext->();
 4653             while ( defined($inext) && $inext <= $iline ) {
 4654                 push @{K_array}, $Knext;
 4655                 $Knext += 1;
 4656                 $inext = $get_inext->();
 4657             }
 4658 
 4659             # Delete any terminal blank token
 4660             if (@K_array) {
 4661                 if ( $rLL->[ $K_array[-1] ]->[_TYPE_] eq 'b' ) {
 4662                     pop @K_array;
 4663                 }
 4664             }
 4665 
 4666             # Define the range of K indexes for the line:
 4667             # $Kfirst = index of first token on line
 4668             # $Klast_out = index of last token on line
 4669             my ( $Kfirst, $Klast );
 4670             if (@K_array) {
 4671                 $Kfirst    = $K_array[0];
 4672                 $Klast     = $K_array[-1];
 4673                 $Klast_out = $Klast;
 4674             }
 4675 
 4676             # It is only safe to trim the actual line text if the input
 4677             # line had a terminal blank token. Otherwise, we may be
 4678             # in a quote.
 4679             if ( $line_of_tokens->{_ended_in_blank_token} ) {
 4680                 $line_of_tokens->{_line_text} =~ s/\s+$//;
 4681             }
 4682             $line_of_tokens->{_rK_range} = [ $Kfirst, $Klast ];
 4683 
 4684             # Deleting semicolons can create new empty code lines
 4685             # which should be marked as blank
 4686             if ( !defined($Kfirst) ) {
 4687                 my $code_type = $line_of_tokens->{_code_type};
 4688                 if ( !$code_type ) {
 4689                     $line_of_tokens->{_code_type} = 'BL';
 4690                 }
 4691             }
 4692         }
 4693     }
 4694 
 4695     # There shouldn't be any nodes beyond the last one unless we start
 4696     # allowing 'link_after' calls
 4697     if ( defined($inext) ) {
 4698 
 4699         Fault("unexpected tokens at end of file when reconstructing lines");
 4700     }
 4701 
 4702     return;
 4703 }
 4704 
 4705 sub dump_verbatim {
 4706     my $self   = shift;
 4707     my $rlines = $self->{rlines};
 4708     foreach my $line ( @{$rlines} ) {
 4709         my $input_line = $line->{_line_text};
 4710         $self->write_unindented_line($input_line);
 4711     }
 4712     return;
 4713 }
 4714 
 4715 sub finish_formatting {
 4716 
 4717     my ( $self, $severe_error ) = @_;
 4718 
 4719     # The file has been tokenized and is ready to be formatted.
 4720     # All of the relevant data is stored in $self, ready to go.
 4721 
 4722     # output file verbatim if severe error or no formatting requested
 4723     if ( $severe_error || $rOpts->{notidy} ) {
 4724         $self->dump_verbatim();
 4725         $self->wrapup();
 4726         return;
 4727     }
 4728 
 4729     # Make a pass through the lines, looking at lines of CODE and identifying
 4730     # special processing needs, such format skipping sections marked by
 4731     # special comments
 4732     $self->scan_comments();
 4733 
 4734     # Find nested pairs of container tokens for any welding. This information
 4735     # is also needed for adding semicolons, so it is split apart from the
 4736     # welding step.
 4737     $self->find_nested_pairs();
 4738 
 4739     # Make sure everything looks good
 4740     $self->check_line_hashes();
 4741 
 4742     # Future: Place to Begin future Iteration Loop
 4743     # foreach my $it_count(1..$maxit) {
 4744 
 4745     # Future: We must reset some things after the first iteration.
 4746     # This includes:
 4747     #   - resetting levels if there was any welding
 4748     #   - resetting any phantom semicolons
 4749     #   - dealing with any line numbering issues so we can relate final lines
 4750     #     line numbers with input line numbers.
 4751     #
 4752     # If ($it_count>1) {
 4753     #   Copy {level_raw} to [_LEVEL_] if ($it_count>1)
 4754     #   Renumber lines
 4755     # }
 4756 
 4757     # Make a pass through all tokens, adding or deleting any whitespace as
 4758     # required.  Also make any other changes, such as adding semicolons.
 4759     # All token changes must be made here so that the token data structure
 4760     # remains fixed for the rest of this iteration.
 4761     $self->respace_tokens();
 4762 
 4763     # Make a hierarchical map of the containers
 4764     $self->map_containers();
 4765 
 4766     # Implement any welding needed for the -wn or -cb options
 4767     $self->weld_containers();
 4768 
 4769     # Locate small nested blocks which should not be broken
 4770     $self->mark_short_nested_blocks();
 4771 
 4772     # Finishes formatting and write the result to the line sink.
 4773     # Eventually this call should just change the 'rlines' data according to the
 4774     # new line breaks and then return so that we can do an internal iteration
 4775     # before continuing with the next stages of formatting.
 4776     $self->break_lines();
 4777 
 4778     ############################################################
 4779     # A possible future decomposition of 'break_lines()' follows.
 4780     # Benefits:
 4781     # - allow perltidy to do an internal iteration which eliminates
 4782     #   many unnecessary steps, such as re-parsing and vertical alignment.
 4783     #   This will allow iterations to be automatic.
 4784     # - consolidate all length calculations to allow utf8 alignment
 4785     ############################################################
 4786 
 4787     # Future: Check for convergence of beginning tokens on CODE lines
 4788 
 4789     # Future: End of Iteration Loop
 4790 
 4791     # Future: add_padding($rargs);
 4792 
 4793     # Future: add_closing_side_comments($rargs);
 4794 
 4795     # Future: vertical_alignment($rargs);
 4796 
 4797     # Future: output results
 4798 
 4799     # A final routine to tie up any loose ends
 4800     $self->wrapup();
 4801     return;
 4802 }
 4803 
 4804 sub create_one_line_block {
 4805     ( $index_start_one_line_block, $semicolons_before_block_self_destruct ) =
 4806       @_;
 4807     return;
 4808 }
 4809 
 4810 sub destroy_one_line_block {
 4811     $index_start_one_line_block            = UNDEFINED_INDEX;
 4812     $semicolons_before_block_self_destruct = 0;
 4813     return;
 4814 }
 4815 
 4816 sub leading_spaces_to_go {
 4817 
 4818     # return the number of indentation spaces for a token in the output stream;
 4819     # these were previously stored by 'set_leading_whitespace'.
 4820 
 4821     my $ii = shift;
 4822     if ( $ii < 0 ) { $ii = 0 }
 4823     return get_spaces( $leading_spaces_to_go[$ii] );
 4824 
 4825 }
 4826 
 4827 sub get_spaces {
 4828 
 4829     # return the number of leading spaces associated with an indentation
 4830     # variable $indentation is either a constant number of spaces or an object
 4831     # with a get_spaces method.
 4832     my $indentation = shift;
 4833     return ref($indentation) ? $indentation->get_spaces() : $indentation;
 4834 }
 4835 
 4836 sub get_recoverable_spaces {
 4837 
 4838     # return the number of spaces (+ means shift right, - means shift left)
 4839     # that we would like to shift a group of lines with the same indentation
 4840     # to get them to line up with their opening parens
 4841     my $indentation = shift;
 4842     return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
 4843 }
 4844 
 4845 sub get_available_spaces_to_go {
 4846 
 4847     my $ii   = shift;
 4848     my $item = $leading_spaces_to_go[$ii];
 4849 
 4850     # return the number of available leading spaces associated with an
 4851     # indentation variable.  $indentation is either a constant number of
 4852     # spaces or an object with a get_available_spaces method.
 4853     return ref($item) ? $item->get_available_spaces() : 0;
 4854 }
 4855 
 4856 sub new_lp_indentation_item {
 4857 
 4858     # this is an interface to the IndentationItem class
 4859     my ( $spaces, $level, $ci_level, $available_spaces, $align_paren ) = @_;
 4860 
 4861     # A negative level implies not to store the item in the item_list
 4862     my $index = 0;
 4863     if ( $level >= 0 ) { $index = ++$max_gnu_item_index; }
 4864 
 4865     my $item = Perl::Tidy::IndentationItem->new(
 4866         $spaces,      $level,
 4867         $ci_level,    $available_spaces,
 4868         $index,       $gnu_sequence_number,
 4869         $align_paren, $max_gnu_stack_index,
 4870         $line_start_index_to_go,
 4871     );
 4872 
 4873     if ( $level >= 0 ) {
 4874         $gnu_item_list[$max_gnu_item_index] = $item;
 4875     }
 4876 
 4877     return $item;
 4878 }
 4879 
 4880 sub set_leading_whitespace {
 4881 
 4882     # This routine defines leading whitespace
 4883     # given: the level and continuation_level of a token,
 4884     # define: space count of leading string which would apply if it
 4885     # were the first token of a new line.
 4886 
 4887     my ( $level_abs, $ci_level, $in_continued_quote ) = @_;
 4888 
 4889     # Adjust levels if necessary to recycle whitespace:
 4890     # given $level_abs, the absolute level
 4891     # define $level, a possibly reduced level for whitespace
 4892     my $level = $level_abs;
 4893     if ( $rOpts_whitespace_cycle && $rOpts_whitespace_cycle > 0 ) {
 4894         if ( $level_abs < $whitespace_last_level ) {
 4895             pop(@whitespace_level_stack);
 4896         }
 4897         if ( !@whitespace_level_stack ) {
 4898             push @whitespace_level_stack, $level_abs;
 4899         }
 4900         elsif ( $level_abs > $whitespace_last_level ) {
 4901             $level = $whitespace_level_stack[-1] +
 4902               ( $level_abs - $whitespace_last_level );
 4903 
 4904             if (
 4905                 # 1 Try to break at a block brace
 4906                 (
 4907                        $level > $rOpts_whitespace_cycle
 4908                     && $last_nonblank_type eq '{'
 4909                     && $last_nonblank_token eq '{'
 4910                 )
 4911 
 4912                 # 2 Then either a brace or bracket
 4913                 || (   $level > $rOpts_whitespace_cycle + 1
 4914                     && $last_nonblank_token =~ /^[\{\[]$/ )
 4915 
 4916                 # 3 Then a paren too
 4917                 || $level > $rOpts_whitespace_cycle + 2
 4918               )
 4919             {
 4920                 $level = 1;
 4921             }
 4922             push @whitespace_level_stack, $level;
 4923         }
 4924         $level = $whitespace_level_stack[-1];
 4925     }
 4926     $whitespace_last_level = $level_abs;
 4927 
 4928     # modify for -bli, which adds one continuation indentation for
 4929     # opening braces
 4930     if (   $rOpts_brace_left_and_indent
 4931         && $max_index_to_go == 0
 4932         && $block_type_to_go[$max_index_to_go] =~ /$bli_pattern/o )
 4933     {
 4934         $ci_level++;
 4935     }
 4936 
 4937     # patch to avoid trouble when input file has negative indentation.
 4938     # other logic should catch this error.
 4939     if ( $level < 0 ) { $level = 0 }
 4940 
 4941     #-------------------------------------------
 4942     # handle the standard indentation scheme
 4943     #-------------------------------------------
 4944     unless ($rOpts_line_up_parentheses) {
 4945         my $space_count =
 4946           $ci_level * $rOpts_continuation_indentation +
 4947           $level * $rOpts_indent_columns;
 4948         my $ci_spaces =
 4949           ( $ci_level == 0 ) ? 0 : $rOpts_continuation_indentation;
 4950 
 4951         if ($in_continued_quote) {
 4952             $space_count = 0;
 4953             $ci_spaces   = 0;
 4954         }
 4955         $leading_spaces_to_go[$max_index_to_go] = $space_count;
 4956         $reduced_spaces_to_go[$max_index_to_go] = $space_count - $ci_spaces;
 4957         return;
 4958     }
 4959 
 4960     #-------------------------------------------------------------
 4961     # handle case of -lp indentation..
 4962     #-------------------------------------------------------------
 4963 
 4964     # The continued_quote flag means that this is the first token of a
 4965     # line, and it is the continuation of some kind of multi-line quote
 4966     # or pattern.  It requires special treatment because it must have no
 4967     # added leading whitespace. So we create a special indentation item
 4968     # which is not in the stack.
 4969     if ($in_continued_quote) {
 4970         my $space_count     = 0;
 4971         my $available_space = 0;
 4972         $level = -1;    # flag to prevent storing in item_list
 4973         $leading_spaces_to_go[$max_index_to_go] =
 4974           $reduced_spaces_to_go[$max_index_to_go] =
 4975           new_lp_indentation_item( $space_count, $level, $ci_level,
 4976             $available_space, 0 );
 4977         return;
 4978     }
 4979 
 4980     # get the top state from the stack
 4981     my $space_count      = $gnu_stack[$max_gnu_stack_index]->get_spaces();
 4982     my $current_level    = $gnu_stack[$max_gnu_stack_index]->get_level();
 4983     my $current_ci_level = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
 4984 
 4985     my $type        = $types_to_go[$max_index_to_go];
 4986     my $token       = $tokens_to_go[$max_index_to_go];
 4987     my $total_depth = $nesting_depth_to_go[$max_index_to_go];
 4988 
 4989     if ( $type eq '{' || $type eq '(' ) {
 4990 
 4991         $gnu_comma_count{ $total_depth + 1 } = 0;
 4992         $gnu_arrow_count{ $total_depth + 1 } = 0;
 4993 
 4994         # If we come to an opening token after an '=' token of some type,
 4995         # see if it would be helpful to 'break' after the '=' to save space
 4996         my $last_equals = $last_gnu_equals{$total_depth};
 4997         if ( $last_equals && $last_equals > $line_start_index_to_go ) {
 4998 
 4999             # find the position if we break at the '='
 5000             my $i_test = $last_equals;
 5001             if ( $types_to_go[ $i_test + 1 ] eq 'b' ) { $i_test++ }
 5002 
 5003             # TESTING
 5004             ##my $too_close = ($i_test==$max_index_to_go-1);
 5005 
 5006             my $test_position = total_line_length( $i_test, $max_index_to_go );
 5007             my $mll           = maximum_line_length($i_test);
 5008 
 5009             if (
 5010 
 5011                 # the equals is not just before an open paren (testing)
 5012                 ##!$too_close &&
 5013 
 5014                 # if we are beyond the midpoint
 5015                 $gnu_position_predictor > $mll - $rOpts_maximum_line_length / 2
 5016 
 5017                 # or we are beyond the 1/4 point and there was an old
 5018                 # break at the equals
 5019                 || (
 5020                     $gnu_position_predictor >
 5021                     $mll - $rOpts_maximum_line_length * 3 / 4
 5022                     && (
 5023                         $old_breakpoint_to_go[$last_equals]
 5024                         || (   $last_equals > 0
 5025                             && $old_breakpoint_to_go[ $last_equals - 1 ] )
 5026                         || (   $last_equals > 1
 5027                             && $types_to_go[ $last_equals - 1 ] eq 'b'
 5028                             && $old_breakpoint_to_go[ $last_equals - 2 ] )
 5029                     )
 5030                 )
 5031               )
 5032             {
 5033 
 5034                 # then make the switch -- note that we do not set a real
 5035                 # breakpoint here because we may not really need one; sub
 5036                 # scan_list will do that if necessary
 5037                 $line_start_index_to_go = $i_test + 1;
 5038                 $gnu_position_predictor = $test_position;
 5039             }
 5040         }
 5041     }
 5042 
 5043     my $halfway =
 5044       maximum_line_length_for_level($level) - $rOpts_maximum_line_length / 2;
 5045 
 5046     # Check for decreasing depth ..
 5047     # Note that one token may have both decreasing and then increasing
 5048     # depth. For example, (level, ci) can go from (1,1) to (2,0).  So,
 5049     # in this example we would first go back to (1,0) then up to (2,0)
 5050     # in a single call.
 5051     if ( $level < $current_level || $ci_level < $current_ci_level ) {
 5052 
 5053         # loop to find the first entry at or completely below this level
 5054         my ( $lev, $ci_lev );
 5055         while (1) {
 5056             if ($max_gnu_stack_index) {
 5057 
 5058                 # save index of token which closes this level
 5059                 $gnu_stack[$max_gnu_stack_index]->set_closed($max_index_to_go);
 5060 
 5061                 # Undo any extra indentation if we saw no commas
 5062                 my $available_spaces =
 5063                   $gnu_stack[$max_gnu_stack_index]->get_available_spaces();
 5064 
 5065                 my $comma_count = 0;
 5066                 my $arrow_count = 0;
 5067                 if ( $type eq '}' || $type eq ')' ) {
 5068                     $comma_count = $gnu_comma_count{$total_depth};
 5069                     $arrow_count = $gnu_arrow_count{$total_depth};
 5070                     $comma_count = 0 unless $comma_count;
 5071                     $arrow_count = 0 unless $arrow_count;
 5072                 }
 5073                 $gnu_stack[$max_gnu_stack_index]->set_comma_count($comma_count);
 5074                 $gnu_stack[$max_gnu_stack_index]->set_arrow_count($arrow_count);
 5075 
 5076                 if ( $available_spaces > 0 ) {
 5077 
 5078                     if ( $comma_count <= 0 || $arrow_count > 0 ) {
 5079 
 5080                         my $i = $gnu_stack[$max_gnu_stack_index]->get_index();
 5081                         my $seqno =
 5082                           $gnu_stack[$max_gnu_stack_index]
 5083                           ->get_sequence_number();
 5084 
 5085                         # Be sure this item was created in this batch.  This
 5086                         # should be true because we delete any available
 5087                         # space from open items at the end of each batch.
 5088                         if (   $gnu_sequence_number != $seqno
 5089                             || $i > $max_gnu_item_index )
 5090                         {
 5091                             warning(
 5092 "Program bug with -lp.  seqno=$seqno should be $gnu_sequence_number and i=$i should be less than max=$max_gnu_item_index\n"
 5093                             );
 5094                             report_definite_bug();
 5095                         }
 5096 
 5097                         else {
 5098                             if ( $arrow_count == 0 ) {
 5099                                 $gnu_item_list[$i]
 5100                                   ->permanently_decrease_available_spaces(
 5101                                     $available_spaces);
 5102                             }
 5103                             else {
 5104                                 $gnu_item_list[$i]
 5105                                   ->tentatively_decrease_available_spaces(
 5106                                     $available_spaces);
 5107                             }
 5108                             foreach my $j ( $i + 1 .. $max_gnu_item_index ) {
 5109                                 $gnu_item_list[$j]
 5110                                   ->decrease_SPACES($available_spaces);
 5111                             }
 5112                         }
 5113                     }
 5114                 }
 5115 
 5116                 # go down one level
 5117                 --$max_gnu_stack_index;
 5118                 $lev    = $gnu_stack[$max_gnu_stack_index]->get_level();
 5119                 $ci_lev = $gnu_stack[$max_gnu_stack_index]->get_ci_level();
 5120 
 5121                 # stop when we reach a level at or below the current level
 5122                 if ( $lev <= $level && $ci_lev <= $ci_level ) {
 5123                     $space_count =
 5124                       $gnu_stack[$max_gnu_stack_index]->get_spaces();
 5125                     $current_level    = $lev;
 5126                     $current_ci_level = $ci_lev;
 5127                     last;
 5128                 }
 5129             }
 5130 
 5131             # reached bottom of stack .. should never happen because
 5132             # only negative levels can get here, and $level was forced
 5133             # to be positive above.
 5134             else {
 5135                 warning(
 5136 "program bug with -lp: stack_error. level=$level; lev=$lev; ci_level=$ci_level; ci_lev=$ci_lev; rerun with -nlp\n"
 5137                 );
 5138                 report_definite_bug();
 5139                 last;
 5140             }
 5141         }
 5142     }
 5143 
 5144     # handle increasing depth
 5145     if ( $level > $current_level || $ci_level > $current_ci_level ) {
 5146 
 5147