"Fossies" - the Fresh Open Source Software Archive

Member "Perl-Tidy-20210717/lib/Perl/Tidy/VerticalAligner.pm" (14 Jul 2021, 203710 Bytes) of package /linux/misc/Perl-Tidy-20210717.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Perl source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 package Perl::Tidy::VerticalAligner;
    2 use strict;
    3 use warnings;
    4 our $VERSION = '20210717';
    5 
    6 use Perl::Tidy::VerticalAligner::Alignment;
    7 use Perl::Tidy::VerticalAligner::Line;
    8 
    9 # The Perl::Tidy::VerticalAligner package collects output lines and
   10 # attempts to line up certain common tokens, such as => and #, which are
   11 # identified by the calling routine.
   12 #
   13 # Usage:
   14 #   - Initiate an object with a call to new().
   15 #   - Write lines one-by-one with calls to valign_input().
   16 #   - Make a final call to flush() to empty the pipeline.
   17 #
   18 # The sub valign_input collects lines into groups.  When a group reaches
   19 # the maximum possible size it is processed for alignment and output.
   20 # The maximum group size is reached whenerver there is a change in indentation
   21 # level, a blank line, a block comment, or an external flush call.  The calling
   22 # routine may also force a break in alignment at any time.
   23 #
   24 # If the calling routine needs to interrupt the output and send other text to
   25 # the output, it must first call flush() to empty the output pipeline.  This
   26 # might occur for example if a block of pod text needs to be sent to the output
   27 # between blocks of code.
   28 
   29 # It is essential that a final call to flush() be made. Otherwise some
   30 # final lines of text will be lost.
   31 
   32 # Index...
   33 # CODE SECTION 1: Preliminary code, global definitions and sub new
   34 #                 sub new
   35 # CODE SECTION 2: Some Basic Utilities
   36 # CODE SECTION 3: Code to accept input and form groups
   37 #                 sub valign_input
   38 # CODE SECTION 4: Code to process comment lines
   39 #                 sub _flush_comment_lines
   40 # CODE SECTION 5: Code to process groups of code lines
   41 #                 sub _flush_group_lines
   42 # CODE SECTION 6: Output Step A
   43 #                 sub valign_output_step_A
   44 # CODE SECTION 7: Output Step B
   45 #                 sub valign_output_step_B
   46 # CODE SECTION 8: Output Step C
   47 #                 sub valign_output_step_C
   48 # CODE SECTION 9: Output Step D
   49 #                 sub valign_output_step_D
   50 # CODE SECTION 10: Summary
   51 #                 sub report_anything_unusual
   52 
   53 ##################################################################
   54 # CODE SECTION 1: Preliminary code, global definitions and sub new
   55 ##################################################################
   56 
   57 sub AUTOLOAD {
   58 
   59     # Catch any undefined sub calls so that we are sure to get
   60     # some diagnostic information.  This sub should never be called
   61     # except for a programming error.
   62     our $AUTOLOAD;
   63     return if ( $AUTOLOAD =~ /\bDESTROY$/ );
   64     my ( $pkg, $fname, $lno ) = caller();
   65     my $my_package = __PACKAGE__;
   66     print STDERR <<EOM;
   67 ======================================================================
   68 Error detected in package '$my_package', version $VERSION
   69 Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
   70 Called from package: '$pkg'  
   71 Called from File '$fname'  at line '$lno'
   72 This error is probably due to a recent programming change
   73 ======================================================================
   74 EOM
   75     exit 1;
   76 }
   77 
   78 sub DESTROY {
   79 
   80     # required to avoid call to AUTOLOAD in some versions of perl
   81 }
   82 
   83 BEGIN {
   84 
   85     # Define the fixed indexes for variables in $self, which is an array
   86     # reference.  Note the convention of leading and trailing underscores to
   87     # keep them unique.
   88     my $i = 0;
   89     use constant {
   90         _file_writer_object_ => $i++,
   91         _logger_object_      => $i++,
   92         _diagnostics_object_ => $i++,
   93         _length_function_    => $i++,
   94 
   95         _rOpts_                              => $i++,
   96         _rOpts_indent_columns_               => $i++,
   97         _rOpts_tabs_                         => $i++,
   98         _rOpts_entab_leading_whitespace_     => $i++,
   99         _rOpts_fixed_position_side_comment_  => $i++,
  100         _rOpts_minimum_space_to_comment_     => $i++,
  101         _rOpts_maximum_line_length_          => $i++,
  102         _rOpts_variable_maximum_line_length_ => $i++,
  103         _rOpts_valign_                       => $i++,
  104 
  105         _last_level_written_            => $i++,
  106         _last_side_comment_column_      => $i++,
  107         _last_side_comment_line_number_ => $i++,
  108         _last_side_comment_length_      => $i++,
  109         _last_side_comment_level_       => $i++,
  110         _outdented_line_count_          => $i++,
  111         _first_outdented_line_at_       => $i++,
  112         _last_outdented_line_at_        => $i++,
  113         _consecutive_block_comments_    => $i++,
  114 
  115         _rgroup_lines_                => $i++,
  116         _group_level_                 => $i++,
  117         _group_type_                  => $i++,
  118         _zero_count_                  => $i++,
  119         _last_leading_space_count_    => $i++,
  120         _comment_leading_space_count_ => $i++,
  121     };
  122 
  123     # Debug flag. This is a relic from the original program development
  124     # looking for problems with tab characters.  Caution: this debug flag can
  125     # produce a lot of output It should be 0 except when debugging small
  126     # scripts.
  127 
  128     use constant DEBUG_TABS => 0;
  129 
  130     my $debug_warning = sub {
  131         print STDOUT "VALIGN_DEBUGGING with key $_[0]\n";
  132         return;
  133     };
  134 
  135     DEBUG_TABS && $debug_warning->('TABS');
  136 
  137 }
  138 
  139 sub new {
  140 
  141     my ( $class, @args ) = @_;
  142 
  143     my %defaults = (
  144         rOpts              => undef,
  145         file_writer_object => undef,
  146         logger_object      => undef,
  147         diagnostics_object => undef,
  148         length_function    => sub { return length( $_[0] ) },
  149     );
  150     my %args = ( %defaults, @args );
  151 
  152     # Initialize other caches and buffers
  153     initialize_step_B_cache();
  154     initialize_valign_buffer();
  155     initialize_leading_string_cache();
  156     initialize_decode();
  157 
  158     # Initialize all variables in $self.
  159     # To add an item to $self, first define a new constant index in the BEGIN
  160     # section.
  161     my $self = [];
  162 
  163     # objects
  164     $self->[_file_writer_object_] = $args{file_writer_object};
  165     $self->[_logger_object_]      = $args{logger_object};
  166     $self->[_diagnostics_object_] = $args{diagnostics_object};
  167     $self->[_length_function_]    = $args{length_function};
  168 
  169     # shortcuts to user options
  170     my $rOpts = $args{rOpts};
  171 
  172     $self->[_rOpts_]                = $rOpts;
  173     $self->[_rOpts_indent_columns_] = $rOpts->{'indent-columns'};
  174     $self->[_rOpts_tabs_]           = $rOpts->{'tabs'};
  175     $self->[_rOpts_entab_leading_whitespace_] =
  176       $rOpts->{'entab-leading-whitespace'};
  177     $self->[_rOpts_fixed_position_side_comment_] =
  178       $rOpts->{'fixed-position-side-comment'};
  179     $self->[_rOpts_minimum_space_to_comment_] =
  180       $rOpts->{'minimum-space-to-comment'};
  181     $self->[_rOpts_maximum_line_length_] = $rOpts->{'maximum-line-length'};
  182     $self->[_rOpts_variable_maximum_line_length_] =
  183       $rOpts->{'variable-maximum-line-length'};
  184     $self->[_rOpts_valign_] = $rOpts->{'valign'};
  185 
  186     # Batch of lines being collected
  187     $self->[_rgroup_lines_]                = [];
  188     $self->[_group_level_]                 = 0;
  189     $self->[_group_type_]                  = "";
  190     $self->[_zero_count_]                  = 0;
  191     $self->[_comment_leading_space_count_] = 0;
  192     $self->[_last_leading_space_count_]    = 0;
  193 
  194     # Memory of what has been processed
  195     $self->[_last_level_written_]            = -1;
  196     $self->[_last_side_comment_column_]      = 0;
  197     $self->[_last_side_comment_line_number_] = 0;
  198     $self->[_last_side_comment_length_]      = 0;
  199     $self->[_last_side_comment_level_]       = -1;
  200     $self->[_outdented_line_count_]          = 0;
  201     $self->[_first_outdented_line_at_]       = 0;
  202     $self->[_last_outdented_line_at_]        = 0;
  203     $self->[_consecutive_block_comments_]    = 0;
  204 
  205     bless $self, $class;
  206     return $self;
  207 }
  208 
  209 #################################
  210 # CODE SECTION 2: Basic Utilities
  211 #################################
  212 
  213 sub flush {
  214 
  215     # flush() is the external call to completely empty the pipeline.
  216     my ($self) = @_;
  217 
  218     # push things out the pipline...
  219 
  220     # push out any current group lines
  221     $self->_flush_group_lines();
  222 
  223     # then anything left in the cache of step_B
  224     $self->_flush_cache();
  225 
  226     # then anything left in the buffer of step_C
  227     $self->dump_valign_buffer();
  228 
  229     return;
  230 }
  231 
  232 sub initialize_for_new_group {
  233     my ($self) = @_;
  234 
  235     $self->[_rgroup_lines_]                = [];
  236     $self->[_group_type_]                  = "";
  237     $self->[_zero_count_]                  = 0;
  238     $self->[_comment_leading_space_count_] = 0;
  239     $self->[_last_leading_space_count_]    = 0;
  240 
  241     # Note that the value for _group_level_ is
  242     # handled separately in sub valign_input
  243     return;
  244 }
  245 
  246 sub group_line_count {
  247     return +@{ $_[0]->[_rgroup_lines_] };
  248 }
  249 
  250 # interface to Perl::Tidy::Diagnostics routines
  251 # For debugging; not currently used
  252 sub write_diagnostics {
  253     my ( $self, $msg ) = @_;
  254     my $diagnostics_object = $self->[_diagnostics_object_];
  255     if ($diagnostics_object) {
  256         $diagnostics_object->write_diagnostics($msg);
  257     }
  258     return;
  259 }
  260 
  261 # interface to Perl::Tidy::Logger routines
  262 sub warning {
  263     my ( $self, $msg ) = @_;
  264     my $logger_object = $self->[_logger_object_];
  265     if ($logger_object) {
  266         $logger_object->warning($msg);
  267     }
  268     return;
  269 }
  270 
  271 sub write_logfile_entry {
  272     my ( $self, $msg ) = @_;
  273     my $logger_object = $self->[_logger_object_];
  274     if ($logger_object) {
  275         $logger_object->write_logfile_entry($msg);
  276     }
  277     return;
  278 }
  279 
  280 sub report_definite_bug {
  281     my ( $self, $msg ) = @_;
  282     my $logger_object = $self->[_logger_object_];
  283     if ($logger_object) {
  284         $logger_object->report_definite_bug();
  285     }
  286     return;
  287 }
  288 
  289 sub get_cached_line_count {
  290     my $self = shift;
  291     return $self->group_line_count() + ( get_cached_line_type() ? 1 : 0 );
  292 }
  293 
  294 sub get_spaces {
  295 
  296     # return the number of leading spaces associated with an indentation
  297     # variable $indentation is either a constant number of spaces or an
  298     # object with a get_spaces method.
  299     my $indentation = shift;
  300     return ref($indentation) ? $indentation->get_spaces() : $indentation;
  301 }
  302 
  303 sub get_recoverable_spaces {
  304 
  305     # return the number of spaces (+ means shift right, - means shift left)
  306     # that we would like to shift a group of lines with the same indentation
  307     # to get them to line up with their opening parens
  308     my $indentation = shift;
  309     return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
  310 }
  311 
  312 sub maximum_line_length_for_level {
  313 
  314     # return maximum line length for line starting with a given level
  315     my ( $self, $level ) = @_;
  316     my $maximum_line_length = $self->[_rOpts_maximum_line_length_];
  317     if ( $self->[_rOpts_variable_maximum_line_length_] ) {
  318         if ( $level < 0 ) { $level = 0 }
  319         $maximum_line_length += $level * $self->[_rOpts_indent_columns_];
  320     }
  321     return $maximum_line_length;
  322 }
  323 
  324 ######################################################
  325 # CODE SECTION 3: Code to accept input and form groups
  326 ######################################################
  327 
  328 sub push_group_line {
  329 
  330     my ( $self, $new_line ) = @_;
  331     my $rgroup_lines = $self->[_rgroup_lines_];
  332     push @{$rgroup_lines}, $new_line;
  333     return;
  334 }
  335 
  336 use constant DEBUG_VALIGN      => 0;
  337 use constant SC_LONG_LINE_DIFF => 12;
  338 
  339 sub valign_input {
  340 
  341     # Place one line in the current vertical group.
  342     #
  343     # The input parameters are:
  344     #     $level = indentation level of this line
  345     #     $rfields = reference to array of fields
  346     #     $rpatterns = reference to array of patterns, one per field
  347     #     $rtokens   = reference to array of tokens starting fields 1,2,..
  348     #
  349     # Here is an example of what this package does.  In this example,
  350     # we are trying to line up both the '=>' and the '#'.
  351     #
  352     #         '18' => 'grave',    #   \`
  353     #         '19' => 'acute',    #   `'
  354     #         '20' => 'caron',    #   \v
  355     # <-tabs-><f1-><--field 2 ---><-f3->
  356     # |            |              |    |
  357     # |            |              |    |
  358     # col1        col2         col3 col4
  359     #
  360     # The calling routine has already broken the entire line into 3 fields as
  361     # indicated.  (So the work of identifying promising common tokens has
  362     # already been done).
  363     #
  364     # In this example, there will be 2 tokens being matched: '=>' and '#'.
  365     # They are the leading parts of fields 2 and 3, but we do need to know
  366     # what they are so that we can dump a group of lines when these tokens
  367     # change.
  368     #
  369     # The fields contain the actual characters of each field.  The patterns
  370     # are like the fields, but they contain mainly token types instead
  371     # of tokens, so they have fewer characters.  They are used to be
  372     # sure we are matching fields of similar type.
  373     #
  374     # In this example, there will be 4 column indexes being adjusted.  The
  375     # first one is always at zero.  The interior columns are at the start of
  376     # the matching tokens, and the last one tracks the maximum line length.
  377     #
  378     # Each time a new line comes in, it joins the current vertical
  379     # group if possible.  Otherwise it causes the current group to be flushed
  380     # and a new group is started.
  381     #
  382     # For each new group member, the column locations are increased, as
  383     # necessary, to make room for the new fields.  When the group is finally
  384     # output, these column numbers are used to compute the amount of spaces of
  385     # padding needed for each field.
  386     #
  387     # Programming note: the fields are assumed not to have any tab characters.
  388     # Tabs have been previously removed except for tabs in quoted strings and
  389     # side comments.  Tabs in these fields can mess up the column counting.
  390     # The log file warns the user if there are any such tabs.
  391 
  392     my ( $self, $rline_hash ) = @_;
  393 
  394     my $level                     = $rline_hash->{level};
  395     my $level_end                 = $rline_hash->{level_end};
  396     my $level_adj                 = $rline_hash->{level_adj};
  397     my $indentation               = $rline_hash->{indentation};
  398     my $list_seqno                = $rline_hash->{list_seqno};
  399     my $outdent_long_lines        = $rline_hash->{outdent_long_lines};
  400     my $is_terminal_ternary       = $rline_hash->{is_terminal_ternary};
  401     my $rvertical_tightness_flags = $rline_hash->{rvertical_tightness_flags};
  402     my $level_jump                = $rline_hash->{level_jump};
  403     my $rfields                   = $rline_hash->{rfields};
  404     my $rtokens                   = $rline_hash->{rtokens};
  405     my $rpatterns                 = $rline_hash->{rpatterns};
  406     my $rfield_lengths            = $rline_hash->{rfield_lengths};
  407     my $terminal_block_type       = $rline_hash->{terminal_block_type};
  408     my $batch_count               = $rline_hash->{batch_count};
  409     my $break_alignment_before    = $rline_hash->{break_alignment_before};
  410     my $break_alignment_after     = $rline_hash->{break_alignment_after};
  411     my $Kend                      = $rline_hash->{Kend};
  412     my $ci_level                  = $rline_hash->{ci_level};
  413 
  414     # The index '$Kend' is a value which passed along with the line text to sub
  415     # 'write_code_line' for a convergence check.
  416 
  417     # number of fields is $jmax
  418     # number of tokens between fields is $jmax-1
  419     my $jmax = @{$rfields} - 1;
  420 
  421     my $leading_space_count = get_spaces($indentation);
  422 
  423     # set outdented flag to be sure we either align within statements or
  424     # across statement boundaries, but not both.
  425     my $is_outdented =
  426       $self->[_last_leading_space_count_] > $leading_space_count;
  427     $self->[_last_leading_space_count_] = $leading_space_count;
  428 
  429     # Identify a hanging side comment.  Hanging side comments have an empty
  430     # initial field.
  431     my $is_hanging_side_comment =
  432       ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
  433 
  434     # Undo outdented flag for a hanging side comment
  435     $is_outdented = 0 if $is_hanging_side_comment;
  436 
  437     # Identify a block comment.
  438     my $is_block_comment = $jmax == 0 && substr( $rfields->[0], 0, 1 ) eq '#';
  439 
  440     # Block comment .. update count
  441     if ($is_block_comment) {
  442         $self->[_consecutive_block_comments_]++;
  443     }
  444 
  445     # Not a block comment ..
  446     # Forget side comment column if we saw 2 or more block comments,
  447     # and reset the count
  448     else {
  449 
  450         if ( $self->[_consecutive_block_comments_] > 1 ) {
  451             $self->forget_side_comment();
  452         }
  453         $self->[_consecutive_block_comments_] = 0;
  454     }
  455 
  456     # Reset side comment location if we are entering a new block from level 0.
  457     # This is intended to keep them from drifting too far to the right.
  458     if ( $terminal_block_type && $level_adj == 0 && $level_end > $level ) {
  459         $self->forget_side_comment();
  460     }
  461 
  462     my $group_level = $self->[_group_level_];
  463 
  464     DEBUG_VALIGN && do {
  465         my $nlines = $self->group_line_count();
  466         print STDOUT
  467 "Entering valign_input: lines=$nlines new #fields= $jmax, leading_count=$leading_space_count, level_jump=$level_jump, level=$level, group_level=$group_level, level_jump=$level_jump\n";
  468     };
  469 
  470     # Validate cached line if necessary: If we can produce a container
  471     # with just 2 lines total by combining an existing cached opening
  472     # token with the closing token to follow, then we will mark both
  473     # cached flags as valid.
  474     my $cached_line_type = get_cached_line_type();
  475     if ($cached_line_type) {
  476         my $cached_line_flag = get_cached_line_flag();
  477         if ($rvertical_tightness_flags) {
  478             my $cached_seqno = get_cached_seqno();
  479             if (   $cached_seqno
  480                 && $self->group_line_count() <= 1
  481                 && $rvertical_tightness_flags->[2]
  482                 && $rvertical_tightness_flags->[2] == $cached_seqno )
  483             {
  484                 $rvertical_tightness_flags->[3] ||= 1;
  485                 set_cached_line_valid(1);
  486             }
  487         }
  488 
  489         # do not join an opening block brace with an unbalanced line
  490         # unless requested with a flag value of 2
  491         if (   $cached_line_type == 3
  492             && !$self->group_line_count()
  493             && $cached_line_flag < 2
  494             && $level_jump != 0 )
  495         {
  496             set_cached_line_valid(0);
  497         }
  498     }
  499 
  500     # shouldn't happen:
  501     if ( $level < 0 ) { $level = 0 }
  502 
  503     # do not align code across indentation level changes
  504     # or if vertical alignment is turned off for debugging
  505     if ( $level != $group_level || $is_outdented || !$self->[_rOpts_valign_] ) {
  506 
  507         $self->_flush_group_lines( $level - $group_level );
  508 
  509         $group_level = $level;
  510         $self->[_group_level_] = $group_level;
  511 
  512         # wait until after the above flush to get the leading space
  513         # count because it may have been changed if the -icp flag is in
  514         # effect
  515         $leading_space_count = get_spaces($indentation);
  516 
  517     }
  518 
  519     # --------------------------------------------------------------------
  520     # Collect outdentable block COMMENTS
  521     # --------------------------------------------------------------------
  522     my $is_blank_line = "";
  523     if ( $self->[_group_type_] eq 'COMMENT' ) {
  524         if (
  525             (
  526                    $is_block_comment
  527                 && $outdent_long_lines
  528                 && $leading_space_count ==
  529                 $self->[_comment_leading_space_count_]
  530             )
  531             || $is_blank_line
  532           )
  533         {
  534 
  535             # Note that for a comment group we are not storing a line
  536             # but rather just the text and its length.
  537             $self->push_group_line(
  538                 [ $rfields->[0], $rfield_lengths->[0], $Kend ] );
  539             return;
  540         }
  541         else {
  542             $self->_flush_group_lines();
  543         }
  544     }
  545 
  546     my $rgroup_lines = $self->[_rgroup_lines_];
  547     if ( $break_alignment_before && @{$rgroup_lines} ) {
  548         $rgroup_lines->[-1]->set_end_group(1);
  549     }
  550 
  551     # --------------------------------------------------------------------
  552     # add dummy fields for terminal ternary
  553     # --------------------------------------------------------------------
  554     my $j_terminal_match;
  555 
  556     if ( $is_terminal_ternary && @{$rgroup_lines} ) {
  557         $j_terminal_match =
  558           fix_terminal_ternary( $rgroup_lines->[-1], $rfields, $rtokens,
  559             $rpatterns, $rfield_lengths, $group_level, );
  560         $jmax = @{$rfields} - 1;
  561     }
  562 
  563     # --------------------------------------------------------------------
  564     # add dummy fields for else statement
  565     # --------------------------------------------------------------------
  566 
  567     # Note the trailing space after 'else' here. If there were no space between
  568     # the else and the next '{' then we would not be able to do vertical
  569     # alignment of the '{'.
  570     if (   $rfields->[0] eq 'else '
  571         && @{$rgroup_lines}
  572         && $level_jump == 0 )
  573     {
  574 
  575         $j_terminal_match =
  576           fix_terminal_else( $rgroup_lines->[-1], $rfields, $rtokens,
  577             $rpatterns, $rfield_lengths );
  578         $jmax = @{$rfields} - 1;
  579     }
  580 
  581     # --------------------------------------------------------------------
  582     # Handle simple line of code with no fields to match.
  583     # --------------------------------------------------------------------
  584     if ( $jmax <= 0 ) {
  585         $self->[_zero_count_]++;
  586 
  587         if ( @{$rgroup_lines}
  588             && !get_recoverable_spaces( $rgroup_lines->[0]->get_indentation() )
  589           )
  590         {
  591 
  592             # flush the current group if it has some aligned columns..
  593             # or we haven't seen a comment lately
  594             if (   $rgroup_lines->[0]->get_jmax() > 1
  595                 || $self->[_zero_count_] > 3 )
  596             {
  597                 $self->_flush_group_lines();
  598             }
  599         }
  600 
  601         # start new COMMENT group if this comment may be outdented
  602         if (   $is_block_comment
  603             && $outdent_long_lines
  604             && !$self->group_line_count() )
  605         {
  606             $self->[_group_type_]                  = 'COMMENT';
  607             $self->[_comment_leading_space_count_] = $leading_space_count;
  608             $self->push_group_line(
  609                 [ $rfields->[0], $rfield_lengths->[0], $Kend ] );
  610             return;
  611         }
  612 
  613         # just write this line directly if no current group, no side comment,
  614         # and no space recovery is needed.
  615         if (   !$self->group_line_count()
  616             && !get_recoverable_spaces($indentation) )
  617         {
  618 
  619             $self->valign_output_step_B(
  620                 {
  621                     leading_space_count       => $leading_space_count,
  622                     line                      => $rfields->[0],
  623                     line_length               => $rfield_lengths->[0],
  624                     side_comment_length       => 0,
  625                     outdent_long_lines        => $outdent_long_lines,
  626                     rvertical_tightness_flags => $rvertical_tightness_flags,
  627                     level                     => $level,
  628                     level_end                 => $level_end,
  629                     Kend                      => $Kend,
  630                 }
  631             );
  632 
  633             return;
  634         }
  635     }
  636     else {
  637         $self->[_zero_count_] = 0;
  638     }
  639 
  640     my $maximum_line_length_for_level =
  641       $self->maximum_line_length_for_level($level);
  642 
  643     # --------------------------------------------------------------------
  644     # It simplifies things to create a zero length side comment
  645     # if none exists.
  646     # --------------------------------------------------------------------
  647     $self->make_side_comment( $rtokens, $rfields, $rpatterns, $rfield_lengths );
  648     $jmax = @{$rfields} - 1;
  649 
  650     # --------------------------------------------------------------------
  651     # create an object to hold this line
  652     # --------------------------------------------------------------------
  653     my $new_line = Perl::Tidy::VerticalAligner::Line->new(
  654         {
  655             jmax                      => $jmax,
  656             rtokens                   => $rtokens,
  657             rfields                   => $rfields,
  658             rpatterns                 => $rpatterns,
  659             rfield_lengths            => $rfield_lengths,
  660             indentation               => $indentation,
  661             leading_space_count       => $leading_space_count,
  662             outdent_long_lines        => $outdent_long_lines,
  663             list_seqno                => $list_seqno,
  664             list_type                 => "",
  665             is_hanging_side_comment   => $is_hanging_side_comment,
  666             maximum_line_length       => $maximum_line_length_for_level,
  667             rvertical_tightness_flags => $rvertical_tightness_flags,
  668             is_terminal_ternary       => $is_terminal_ternary,
  669             j_terminal_match          => $j_terminal_match,
  670             end_group                 => $break_alignment_after,
  671             Kend                      => $Kend,
  672             ci_level                  => $ci_level,
  673             level                     => $level,
  674             level_end                 => $level_end,
  675             imax_pair                 => -1,
  676         }
  677     );
  678 
  679     # --------------------------------------------------------------------
  680     # Decide if this is a simple list of items.
  681     # We use this to be less restrictive in deciding what to align.
  682     # --------------------------------------------------------------------
  683     decide_if_list($new_line) if ($list_seqno);
  684 
  685     # --------------------------------------------------------------------
  686     # Append this line to the current group (or start new group)
  687     # --------------------------------------------------------------------
  688 
  689     $self->push_group_line($new_line);
  690 
  691     # output this group if it ends in a terminal else or ternary line
  692     if ( defined($j_terminal_match) ) {
  693         $self->_flush_group_lines();
  694     }
  695 
  696     # Force break after jump to lower level
  697     if ( $level_jump < 0 ) {
  698         $self->_flush_group_lines($level_jump);
  699     }
  700 
  701     # --------------------------------------------------------------------
  702     # Some old debugging stuff
  703     # --------------------------------------------------------------------
  704     DEBUG_VALIGN && do {
  705         print STDOUT "exiting valign_input fields:";
  706         dump_array( @{$rfields} );
  707         print STDOUT "exiting valign_input tokens:";
  708         dump_array( @{$rtokens} );
  709         print STDOUT "exiting valign_input patterns:";
  710         dump_array( @{$rpatterns} );
  711     };
  712 
  713     return;
  714 }
  715 
  716 sub join_hanging_comment {
  717 
  718     # Add dummy fields to a hanging side comment to make it look
  719     # like the first line in its potential group.  This simplifies
  720     # the coding.
  721     my ( $new_line, $old_line ) = @_;
  722 
  723     my $jmax = $new_line->get_jmax();
  724 
  725     # must be 2 fields
  726     return 0 unless $jmax == 1;
  727     my $rtokens = $new_line->get_rtokens();
  728 
  729     # the second field must be a comment
  730     return 0 unless $rtokens->[0] eq '#';
  731     my $rfields = $new_line->get_rfields();
  732 
  733     # the first field must be empty
  734     return 0 unless $rfields->[0] =~ /^\s*$/;
  735 
  736     # the current line must have fewer fields
  737     my $maximum_field_index = $old_line->get_jmax();
  738     return 0
  739       unless $maximum_field_index > $jmax;
  740 
  741     # looks ok..
  742     my $rpatterns      = $new_line->get_rpatterns();
  743     my $rfield_lengths = $new_line->get_rfield_lengths();
  744 
  745     $new_line->set_is_hanging_side_comment(1);
  746     $jmax = $maximum_field_index;
  747     $new_line->set_jmax($jmax);
  748     $rfields->[$jmax]         = $rfields->[1];
  749     $rfield_lengths->[$jmax]  = $rfield_lengths->[1];
  750     $rtokens->[ $jmax - 1 ]   = $rtokens->[0];
  751     $rpatterns->[ $jmax - 1 ] = $rpatterns->[0];
  752     foreach my $j ( 1 .. $jmax - 1 ) {
  753         $rfields->[$j]         = '';
  754         $rfield_lengths->[$j]  = 0;
  755         $rtokens->[ $j - 1 ]   = "";
  756         $rpatterns->[ $j - 1 ] = "";
  757     }
  758     return 1;
  759 }
  760 
  761 sub make_side_comment {
  762 
  763     # create an empty side comment if none exists
  764 
  765     my ( $self, $rtokens, $rfields, $rpatterns, $rfield_lengths ) = @_;
  766 
  767     my $jmax = @{$rfields} - 1;
  768 
  769     # if line does not have a side comment...
  770     if ( ( $jmax == 0 ) || ( $rtokens->[ $jmax - 1 ] ne '#' ) ) {
  771         $jmax += 1;
  772         $rtokens->[ $jmax - 1 ]  = '#';
  773         $rfields->[$jmax]        = '';
  774         $rfield_lengths->[$jmax] = 0;
  775         $rpatterns->[$jmax]      = '#';
  776     }
  777     return;
  778 }
  779 
  780 {    ## closure for sub decide_if_list
  781 
  782     my %is_comma_token;
  783 
  784     BEGIN {
  785 
  786         my @q = qw( => );
  787         push @q, ',';
  788         @is_comma_token{@q} = (1) x scalar(@q);
  789     }
  790 
  791     sub decide_if_list {
  792 
  793         my $line = shift;
  794 
  795         # A list will be taken to be a line with a forced break in which all
  796         # of the field separators are commas or comma-arrows (except for the
  797         # trailing #)
  798 
  799         my $rtokens    = $line->get_rtokens();
  800         my $test_token = $rtokens->[0];
  801         my ( $raw_tok, $lev, $tag, $tok_count ) =
  802           decode_alignment_token($test_token);
  803         if ( $is_comma_token{$raw_tok} ) {
  804             my $list_type = $test_token;
  805             my $jmax      = $line->get_jmax();
  806 
  807             foreach ( 1 .. $jmax - 2 ) {
  808                 ( $raw_tok, $lev, $tag, $tok_count ) =
  809                   decode_alignment_token( $rtokens->[$_] );
  810                 if ( !$is_comma_token{$raw_tok} ) {
  811                     $list_type = "";
  812                     last;
  813                 }
  814             }
  815             $line->set_list_type($list_type);
  816         }
  817         return;
  818     }
  819 }
  820 
  821 sub fix_terminal_ternary {
  822 
  823     # Add empty fields as necessary to align a ternary term
  824     # like this:
  825     #
  826     #  my $leapyear =
  827     #      $year % 4   ? 0
  828     #    : $year % 100 ? 1
  829     #    : $year % 400 ? 0
  830     #    :               1;
  831     #
  832     # returns the index of the terminal question token, if any
  833 
  834     my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths,
  835         $group_level )
  836       = @_;
  837 
  838     return unless ($old_line);
  839     use constant EXPLAIN_TERNARY => 0;
  840 
  841     my $jmax        = @{$rfields} - 1;
  842     my $rfields_old = $old_line->get_rfields();
  843 
  844     my $rpatterns_old       = $old_line->get_rpatterns();
  845     my $rtokens_old         = $old_line->get_rtokens();
  846     my $maximum_field_index = $old_line->get_jmax();
  847 
  848     # look for the question mark after the :
  849     my ($jquestion);
  850     my $depth_question;
  851     my $pad        = "";
  852     my $pad_length = 0;
  853     foreach my $j ( 0 .. $maximum_field_index - 1 ) {
  854         my $tok = $rtokens_old->[$j];
  855         my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
  856         if ( $raw_tok eq '?' ) {
  857             $depth_question = $lev;
  858 
  859             # depth must be correct
  860             next unless ( $depth_question eq $group_level );
  861 
  862             $jquestion = $j;
  863             if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
  864                 $pad_length = length($1);
  865                 $pad        = " " x $pad_length;
  866             }
  867             else {
  868                 return;    # shouldn't happen
  869             }
  870             last;
  871         }
  872     }
  873     return unless ( defined($jquestion) );    # shouldn't happen
  874 
  875     # Now splice the tokens and patterns of the previous line
  876     # into the else line to insure a match.  Add empty fields
  877     # as necessary.
  878     my $jadd = $jquestion;
  879 
  880     # Work on copies of the actual arrays in case we have
  881     # to return due to an error
  882     my @fields        = @{$rfields};
  883     my @patterns      = @{$rpatterns};
  884     my @tokens        = @{$rtokens};
  885     my @field_lengths = @{$rfield_lengths};
  886 
  887     EXPLAIN_TERNARY && do {
  888         local $" = '><';
  889         print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n";
  890         print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n";
  891         print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
  892         print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n";
  893         print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n";
  894         print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
  895     };
  896 
  897     # handle cases of leading colon on this line
  898     if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
  899 
  900         my ( $colon, $therest ) = ( $1, $2 );
  901 
  902         # Handle sub-case of first field with leading colon plus additional code
  903         # This is the usual situation as at the '1' below:
  904         #  ...
  905         #  : $year % 400 ? 0
  906         #  :               1;
  907         if ($therest) {
  908 
  909             # Split the first field after the leading colon and insert padding.
  910             # Note that this padding will remain even if the terminal value goes
  911             # out on a separate line.  This does not seem to look to bad, so no
  912             # mechanism has been included to undo it.
  913             my $field1        = shift @fields;
  914             my $field_length1 = shift @field_lengths;
  915             my $len_colon     = length($colon);
  916             unshift @fields, ( $colon, $pad . $therest );
  917             unshift @field_lengths,
  918               ( $len_colon, $pad_length + $field_length1 - $len_colon );
  919 
  920             # change the leading pattern from : to ?
  921             return unless ( $patterns[0] =~ s/^\:/?/ );
  922 
  923             # install leading tokens and patterns of existing line
  924             unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
  925             unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
  926 
  927             # insert appropriate number of empty fields
  928             splice( @fields,        1, 0, ('') x $jadd ) if $jadd;
  929             splice( @field_lengths, 1, 0, (0) x $jadd )  if $jadd;
  930         }
  931 
  932         # handle sub-case of first field just equal to leading colon.
  933         # This can happen for example in the example below where
  934         # the leading '(' would create a new alignment token
  935         # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
  936         # :                        ( $mname = $name . '->' );
  937         else {
  938 
  939             return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
  940 
  941             # prepend a leading ? onto the second pattern
  942             $patterns[1] = "?b" . $patterns[1];
  943 
  944             # pad the second field
  945             $fields[1]        = $pad . $fields[1];
  946             $field_lengths[1] = $pad_length + $field_lengths[1];
  947 
  948             # install leading tokens and patterns of existing line, replacing
  949             # leading token and inserting appropriate number of empty fields
  950             splice( @tokens,   0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
  951             splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
  952             splice( @fields,        1, 0, ('') x $jadd ) if $jadd;
  953             splice( @field_lengths, 1, 0, (0) x $jadd )  if $jadd;
  954         }
  955     }
  956 
  957     # Handle case of no leading colon on this line.  This will
  958     # be the case when -wba=':' is used.  For example,
  959     #  $year % 400 ? 0 :
  960     #                1;
  961     else {
  962 
  963         # install leading tokens and patterns of existing line
  964         $patterns[0] = '?' . 'b' . $patterns[0];
  965         unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
  966         unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
  967 
  968         # insert appropriate number of empty fields
  969         $jadd             = $jquestion + 1;
  970         $fields[0]        = $pad . $fields[0];
  971         $field_lengths[0] = $pad_length + $field_lengths[0];
  972         splice( @fields,        0, 0, ('') x $jadd ) if $jadd;
  973         splice( @field_lengths, 0, 0, (0) x $jadd )  if $jadd;
  974     }
  975 
  976     EXPLAIN_TERNARY && do {
  977         local $" = '><';
  978         print STDOUT "MODIFIED TOKENS=<@tokens>\n";
  979         print STDOUT "MODIFIED PATTERNS=<@patterns>\n";
  980         print STDOUT "MODIFIED FIELDS=<@fields>\n";
  981     };
  982 
  983     # all ok .. update the arrays
  984     @{$rfields}        = @fields;
  985     @{$rtokens}        = @tokens;
  986     @{$rpatterns}      = @patterns;
  987     @{$rfield_lengths} = @field_lengths;
  988 
  989     # force a flush after this line
  990     return $jquestion;
  991 }
  992 
  993 sub fix_terminal_else {
  994 
  995     # Add empty fields as necessary to align a balanced terminal
  996     # else block to a previous if/elsif/unless block,
  997     # like this:
  998     #
  999     #  if   ( 1 || $x ) { print "ok 13\n"; }
 1000     #  else             { print "not ok 13\n"; }
 1001     #
 1002     # returns a positive value if the else block should be indented
 1003     #
 1004     my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths ) = @_;
 1005 
 1006     return unless ($old_line);
 1007     my $jmax = @{$rfields} - 1;
 1008     return unless ( $jmax > 0 );
 1009 
 1010     # check for balanced else block following if/elsif/unless
 1011     my $rfields_old = $old_line->get_rfields();
 1012 
 1013     # TBD: add handling for 'case'
 1014     return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
 1015 
 1016     # look for the opening brace after the else, and extract the depth
 1017     my $tok_brace = $rtokens->[0];
 1018     my $depth_brace;
 1019     if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
 1020 
 1021     # probably:  "else # side_comment"
 1022     else { return }
 1023 
 1024     my $rpatterns_old       = $old_line->get_rpatterns();
 1025     my $rtokens_old         = $old_line->get_rtokens();
 1026     my $maximum_field_index = $old_line->get_jmax();
 1027 
 1028     # be sure the previous if/elsif is followed by an opening paren
 1029     my $jparen    = 0;
 1030     my $tok_paren = '(' . $depth_brace;
 1031     my $tok_test  = $rtokens_old->[$jparen];
 1032     return unless ( $tok_test eq $tok_paren );    # shouldn't happen
 1033 
 1034     # Now find the opening block brace
 1035     my ($jbrace);
 1036     foreach my $j ( 1 .. $maximum_field_index - 1 ) {
 1037         my $tok = $rtokens_old->[$j];
 1038         if ( $tok eq $tok_brace ) {
 1039             $jbrace = $j;
 1040             last;
 1041         }
 1042     }
 1043     return unless ( defined($jbrace) );           # shouldn't happen
 1044 
 1045     # Now splice the tokens and patterns of the previous line
 1046     # into the else line to insure a match.  Add empty fields
 1047     # as necessary.
 1048     my $jadd = $jbrace - $jparen;
 1049     splice( @{$rtokens},   0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
 1050     splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
 1051     splice( @{$rfields},        1, 0, ('') x $jadd );
 1052     splice( @{$rfield_lengths}, 1, 0, (0) x $jadd );
 1053 
 1054     # force a flush after this line if it does not follow a case
 1055     if   ( $rfields_old->[0] =~ /^case\s*$/ ) { return }
 1056     else                                      { return $jbrace }
 1057 }
 1058 
 1059 my %is_closing_block_type;
 1060 
 1061 BEGIN {
 1062     @_ = qw< } ] >;
 1063     @is_closing_block_type{@_} = (1) x scalar(@_);
 1064 }
 1065 
 1066 sub check_match {
 1067 
 1068     # See if the current line matches the current vertical alignment group.
 1069 
 1070     my ( $self, $new_line, $base_line, $prev_line ) = @_;
 1071 
 1072     # Given:
 1073     #  $new_line  = the line being considered for group inclusion
 1074     #  $base_line = the first line of the current group
 1075     #  $prev_line = the line just before $new_line
 1076 
 1077     # returns a flag and a value as follows:
 1078     #    return (0, $imax_align)     if the line does not match
 1079     #    return (1, $imax_align)     if the line matches but does not fit
 1080     #    return (2, $imax_align)     if the line matches and fits
 1081 
 1082     # Returns '$imax_align' which is the index of the maximum matching token.
 1083     # It will be used in the subsequent left-to-right sweep to align as many
 1084     # tokens as possible for lines which partially match.
 1085     my $imax_align = -1;
 1086 
 1087     # variable $GoToMsg explains reason for no match, for debugging
 1088     my $GoToMsg = "";
 1089     use constant EXPLAIN_CHECK_MATCH => 0;
 1090 
 1091     # This is a flag for testing alignment by sub sweep_left_to_right only.
 1092     # This test can help find problems with the alignment logic.
 1093     # This flag should normally be zero.
 1094     use constant TEST_SWEEP_ONLY => 0;
 1095 
 1096     my $jmax                = $new_line->get_jmax();
 1097     my $maximum_field_index = $base_line->get_jmax();
 1098 
 1099     my $jlimit = $jmax - 2;
 1100     if ( $jmax > $maximum_field_index ) {
 1101         $jlimit = $maximum_field_index - 2;
 1102     }
 1103 
 1104     if ( $new_line->get_is_hanging_side_comment() ) {
 1105 
 1106         # HSC's can join the group if they fit
 1107     }
 1108 
 1109     # Everything else
 1110     else {
 1111 
 1112         # A group with hanging side comments ends with the first non hanging
 1113         # side comment.
 1114         if ( $base_line->get_is_hanging_side_comment() ) {
 1115             $GoToMsg = "end of hanging side comments";
 1116             goto NO_MATCH;
 1117         }
 1118 
 1119         # The number of tokens that this line shares with the previous line
 1120         # has been stored with the previous line.  This value was calculated
 1121         # and stored by sub 'match_line_pair'.
 1122         $imax_align = $prev_line->get_imax_pair();
 1123 
 1124         if ( $imax_align != $jlimit ) {
 1125             $GoToMsg = "Not all tokens match: $imax_align != $jlimit\n";
 1126             goto NO_MATCH;
 1127         }
 1128 
 1129     }
 1130 
 1131     # The tokens match, but the lines must have identical number of
 1132     # tokens to join the group.
 1133     if ( $maximum_field_index != $jmax ) {
 1134         $GoToMsg = "token count differs";
 1135         goto NO_MATCH;
 1136     }
 1137 
 1138     # The tokens match. Now See if there is space for this line in the
 1139     # current group.
 1140     if ( $self->check_fit( $new_line, $base_line ) && !TEST_SWEEP_ONLY ) {
 1141 
 1142         EXPLAIN_CHECK_MATCH
 1143           && print "match and fit, imax_align=$imax_align, jmax=$jmax\n";
 1144         return ( 2, $jlimit );
 1145     }
 1146     else {
 1147 
 1148         EXPLAIN_CHECK_MATCH
 1149           && print "match but no fit, imax_align=$imax_align, jmax=$jmax\n";
 1150         return ( 1, $jlimit );
 1151     }
 1152 
 1153   NO_MATCH:
 1154 
 1155     EXPLAIN_CHECK_MATCH
 1156       && print
 1157       "no match because $GoToMsg, max match index =i $imax_align, jmax=$jmax\n";
 1158 
 1159     return ( 0, $imax_align );
 1160 }
 1161 
 1162 sub check_fit {
 1163 
 1164     my ( $self, $new_line, $old_line ) = @_;
 1165 
 1166     # The new line has alignments identical to the current group. Now we have
 1167     # to fit the new line into the group without causing a field to exceed the
 1168     # line length limit.
 1169     #   return true if successful
 1170     #   return false if not successful
 1171 
 1172     my $jmax                = $new_line->get_jmax();
 1173     my $leading_space_count = $new_line->get_leading_space_count();
 1174     my $rfield_lengths      = $new_line->get_rfield_lengths();
 1175     my $padding_available   = $old_line->get_available_space_on_right();
 1176     my $jmax_old            = $old_line->get_jmax();
 1177 
 1178     # Safety check ... only lines with equal array sizes should arrive here
 1179     # from sub check_match.  So if this error occurs, look at recent changes in
 1180     # sub check_match.  It is only supposed to check the fit of lines with
 1181     # identical numbers of alignment tokens.
 1182     if ( $jmax_old ne $jmax ) {
 1183 
 1184         $self->warning(<<EOM);
 1185 Program bug detected in Perl::Tidy::VerticalAligner sub check_fit 
 1186 unexpected difference in array lengths: $jmax != $jmax_old
 1187 EOM
 1188         return;
 1189     }
 1190 
 1191     # Save current columns in case this line does not fit.
 1192     my @alignments = $old_line->get_alignments();
 1193     foreach my $alignment (@alignments) {
 1194         $alignment->save_column();
 1195     }
 1196 
 1197     my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
 1198 
 1199     # Loop over all alignments ...
 1200     my $maximum_field_index = $old_line->get_jmax();
 1201     for my $j ( 0 .. $jmax ) {
 1202 
 1203         my $pad = $rfield_lengths->[$j] - $old_line->current_field_width($j);
 1204 
 1205         if ( $j == 0 ) {
 1206             $pad += $leading_space_count;
 1207         }
 1208 
 1209         # Keep going if this field does not need any space.
 1210         next if $pad < 0;
 1211 
 1212         # See if it needs too much space.
 1213         if ( $pad > $padding_available ) {
 1214 
 1215             ################################################
 1216             # Line does not fit -- revert to starting state
 1217             ################################################
 1218             foreach my $alignment (@alignments) {
 1219                 $alignment->restore_column();
 1220             }
 1221             return;
 1222         }
 1223 
 1224         # make room for this field
 1225         $old_line->increase_field_width( $j, $pad );
 1226         $padding_available -= $pad;
 1227     }
 1228 
 1229     ######################################
 1230     # The line fits, the match is accepted
 1231     ######################################
 1232     return 1;
 1233 
 1234 }
 1235 
 1236 sub install_new_alignments {
 1237 
 1238     my ($new_line) = @_;
 1239 
 1240     my $jmax           = $new_line->get_jmax();
 1241     my $rfield_lengths = $new_line->get_rfield_lengths();
 1242     my $col            = $new_line->get_leading_space_count();
 1243 
 1244     for my $j ( 0 .. $jmax ) {
 1245         $col += $rfield_lengths->[$j];
 1246 
 1247         # create initial alignments for the new group
 1248         my $alignment =
 1249           Perl::Tidy::VerticalAligner::Alignment->new( { column => $col } );
 1250         $new_line->set_alignment( $j, $alignment );
 1251     }
 1252     return;
 1253 }
 1254 
 1255 sub copy_old_alignments {
 1256     my ( $new_line, $old_line ) = @_;
 1257     my @new_alignments = $old_line->get_alignments();
 1258     $new_line->set_alignments(@new_alignments);
 1259     return;
 1260 }
 1261 
 1262 sub dump_array {
 1263 
 1264     # debug routine to dump array contents
 1265     local $" = ')(';
 1266     print STDOUT "(@_)\n";
 1267     return;
 1268 }
 1269 
 1270 sub level_change {
 1271 
 1272     # compute decrease in level when we remove $diff spaces from the
 1273     # leading spaces
 1274     my ( $self, $leading_space_count, $diff, $level ) = @_;
 1275 
 1276     my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
 1277     if ($rOpts_indent_columns) {
 1278         my $olev =
 1279           int( ( $leading_space_count + $diff ) / $rOpts_indent_columns );
 1280         my $nlev = int( $leading_space_count / $rOpts_indent_columns );
 1281         $level -= ( $olev - $nlev );
 1282         if ( $level < 0 ) { $level = 0 }
 1283     }
 1284     return $level;
 1285 }
 1286 
 1287 ###############################################
 1288 # CODE SECTION 4: Code to process comment lines
 1289 ###############################################
 1290 
 1291 sub _flush_comment_lines {
 1292 
 1293     # Output a group consisting of COMMENT lines
 1294 
 1295     my ($self) = @_;
 1296     my $rgroup_lines = $self->[_rgroup_lines_];
 1297     return unless ( @{$rgroup_lines} );
 1298     my $group_level         = $self->[_group_level_];
 1299     my $leading_space_count = $self->[_comment_leading_space_count_];
 1300     my $leading_string =
 1301       $self->get_leading_string( $leading_space_count, $group_level );
 1302 
 1303     # look for excessively long lines
 1304     my $max_excess = 0;
 1305     foreach my $item ( @{$rgroup_lines} ) {
 1306         my ( $str, $str_len ) = @{$item};
 1307         my $excess =
 1308           $str_len +
 1309           $leading_space_count -
 1310           $self->maximum_line_length_for_level($group_level);
 1311         if ( $excess > $max_excess ) {
 1312             $max_excess = $excess;
 1313         }
 1314     }
 1315 
 1316     # zero leading space count if any lines are too long
 1317     if ( $max_excess > 0 ) {
 1318         $leading_space_count -= $max_excess;
 1319         if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
 1320         my $file_writer_object = $self->[_file_writer_object_];
 1321         my $last_outdented_line_at =
 1322           $file_writer_object->get_output_line_number();
 1323         $self->[_last_outdented_line_at_] = $last_outdented_line_at;
 1324         my $outdented_line_count = $self->[_outdented_line_count_];
 1325         unless ($outdented_line_count) {
 1326             $self->[_first_outdented_line_at_] = $last_outdented_line_at;
 1327         }
 1328         my $nlines = @{$rgroup_lines};
 1329         $outdented_line_count += $nlines;
 1330         $self->[_outdented_line_count_] = $outdented_line_count;
 1331     }
 1332 
 1333     # write the lines
 1334     my $outdent_long_lines = 0;
 1335 
 1336     foreach my $item ( @{$rgroup_lines} ) {
 1337         my ( $str, $str_len, $Kend ) = @{$item};
 1338         $self->valign_output_step_B(
 1339             {
 1340                 leading_space_count       => $leading_space_count,
 1341                 line                      => $str,
 1342                 line_length               => $str_len,
 1343                 side_comment_length       => 0,
 1344                 outdent_long_lines        => $outdent_long_lines,
 1345                 rvertical_tightness_flags => "",
 1346                 level                     => $group_level,
 1347                 level_end                 => $group_level,
 1348                 Kend                      => $Kend,
 1349             }
 1350         );
 1351     }
 1352 
 1353     $self->initialize_for_new_group();
 1354     return;
 1355 }
 1356 
 1357 ######################################################
 1358 # CODE SECTION 5: Code to process groups of code lines
 1359 ######################################################
 1360 
 1361 sub _flush_group_lines {
 1362 
 1363     # This is the vertical aligner internal flush, which leaves the cache
 1364     # intact
 1365     my ( $self, $level_jump ) = @_;
 1366 
 1367     # $level_jump = $next_level-$group_level, if known
 1368     #             = undef if not known
 1369 
 1370     my $rgroup_lines = $self->[_rgroup_lines_];
 1371     return unless ( @{$rgroup_lines} );
 1372     my $group_type  = $self->[_group_type_];
 1373     my $group_level = $self->[_group_level_];
 1374 
 1375     # Debug
 1376     0 && do {
 1377         my ( $a, $b, $c ) = caller();
 1378         my $nlines = @{$rgroup_lines};
 1379         print STDOUT
 1380 "APPEND0: _flush_group_lines called from $a $b $c lines=$nlines, type=$group_type \n";
 1381     };
 1382 
 1383     ############################################
 1384     # Section 1: Handle a group of COMMENT lines
 1385     ############################################
 1386     if ( $group_type eq 'COMMENT' ) {
 1387         $self->_flush_comment_lines();
 1388         return;
 1389     }
 1390 
 1391     #########################################################################
 1392     # Section 2: Handle line(s) of CODE.  Most of the actual work of vertical
 1393     # aligning happens here in the following steps:
 1394     #########################################################################
 1395 
 1396     # STEP 1: Remove most unmatched tokens. They block good alignments.
 1397     my ( $max_lev_diff, $saw_side_comment ) =
 1398       delete_unmatched_tokens( $rgroup_lines, $group_level );
 1399 
 1400     # STEP 2: Sweep top to bottom, forming subgroups of lines with exactly
 1401     # matching common alignments.  The indexes of these subgroups are in the
 1402     # return variable.
 1403     my $rgroups = $self->sweep_top_down( $rgroup_lines, $group_level );
 1404 
 1405     # STEP 3: Sweep left to right through the lines, looking for leading
 1406     # alignment tokens shared by groups.
 1407     sweep_left_to_right( $rgroup_lines, $rgroups, $group_level )
 1408       if ( @{$rgroups} > 1 );
 1409 
 1410     # STEP 4: Move side comments to a common column if possible.
 1411     if ($saw_side_comment) {
 1412         $self->align_side_comments( $rgroup_lines, $rgroups );
 1413     }
 1414 
 1415     # STEP 5: For the -lp option, increase the indentation of lists
 1416     # to the desired amount, but do not exceed the line length limit.
 1417 
 1418     # We are allowed to shift a group of lines to the right if:
 1419     #  (1) its level is greater than the level of the previous group, and
 1420     #  (2) its level is greater than the level of the next line to be written.
 1421 
 1422     my $extra_indent_ok;
 1423     if ( $group_level > $self->[_last_level_written_] ) {
 1424 
 1425         # Use the level jump to next line to come, if given
 1426         if ( defined($level_jump) ) {
 1427             $extra_indent_ok = $level_jump < 0;
 1428         }
 1429 
 1430         # Otherwise, assume the next line has the level of the end of last line.
 1431         # This fixes case c008.
 1432         else {
 1433             my $level_end = $rgroup_lines->[-1]->get_level_end();
 1434             $extra_indent_ok = $group_level > $level_end;
 1435         }
 1436     }
 1437 
 1438     my $extra_leading_spaces =
 1439       $extra_indent_ok
 1440       ? get_extra_leading_spaces( $rgroup_lines, $rgroups )
 1441       : 0;
 1442 
 1443     # STEP 6: Output the lines.
 1444     # All lines in this batch have the same basic leading spacing:
 1445     my $group_leader_length = $rgroup_lines->[0]->get_leading_space_count();
 1446 
 1447     foreach my $line ( @{$rgroup_lines} ) {
 1448         $self->valign_output_step_A(
 1449             {
 1450                 line                 => $line,
 1451                 min_ci_gap           => 0,
 1452                 do_not_align         => 0,
 1453                 group_leader_length  => $group_leader_length,
 1454                 extra_leading_spaces => $extra_leading_spaces,
 1455                 level                => $group_level,
 1456             }
 1457         );
 1458     }
 1459 
 1460     $self->initialize_for_new_group();
 1461     return;
 1462 }
 1463 
 1464 {    ## closure for sub sweep_top_down
 1465 
 1466     my $rall_lines;         # all of the lines
 1467     my $grp_level;          # level of all lines
 1468     my $rgroups;            # describes the partition of lines we will make here
 1469     my $group_line_count;   # number of lines in current partition
 1470 
 1471     BEGIN { $rgroups = [] }
 1472 
 1473     sub initialize_for_new_rgroup {
 1474         $group_line_count = 0;
 1475         return;
 1476     }
 1477 
 1478     sub add_to_rgroup {
 1479 
 1480         my ($jend) = @_;
 1481         my $rline = $rall_lines->[$jend];
 1482 
 1483         my $jbeg = $jend;
 1484         if ( $group_line_count == 0 ) {
 1485             install_new_alignments($rline);
 1486         }
 1487         else {
 1488             my $rvals = pop @{$rgroups};
 1489             $jbeg = $rvals->[0];
 1490             copy_old_alignments( $rline, $rall_lines->[$jbeg] );
 1491         }
 1492         push @{$rgroups}, [ $jbeg, $jend, undef ];
 1493         $group_line_count++;
 1494         return;
 1495     }
 1496 
 1497     sub get_rgroup_jrange {
 1498 
 1499         return unless @{$rgroups};
 1500         return unless ( $group_line_count > 0 );
 1501         my ( $jbeg, $jend ) = @{ $rgroups->[-1] };
 1502         return ( $jbeg, $jend );
 1503     }
 1504 
 1505     sub end_rgroup {
 1506 
 1507         my ($imax_align) = @_;
 1508         return unless @{$rgroups};
 1509         return unless ( $group_line_count > 0 );
 1510 
 1511         my ( $jbeg, $jend ) = @{ pop @{$rgroups} };
 1512         push @{$rgroups}, [ $jbeg, $jend, $imax_align ];
 1513 
 1514         # Undo some alignments of poor two-line combinations.
 1515         # We had to wait until now to know the line count.
 1516         if ( $jend - $jbeg == 1 ) {
 1517             my $line_0 = $rall_lines->[$jbeg];
 1518             my $line_1 = $rall_lines->[$jend];
 1519 
 1520             my $imax_pair = $line_1->get_imax_pair();
 1521             if ( $imax_pair > $imax_align ) { $imax_align = $imax_pair }
 1522 
 1523             ## flag for possible future use:
 1524             ## my $is_isolated_pair = $imax_pair < 0
 1525             ##  && ( $jbeg == 0
 1526             ##    || $rall_lines->[ $jbeg - 1 ]->get_imax_pair() < 0 );
 1527 
 1528             my $imax_prev =
 1529               $jbeg > 0 ? $rall_lines->[ $jbeg - 1 ]->get_imax_pair() : -1;
 1530 
 1531             my ( $is_marginal, $imax_align_fix ) =
 1532               is_marginal_match( $line_0, $line_1, $grp_level, $imax_align,
 1533                 $imax_prev );
 1534             if ($is_marginal) {
 1535                 combine_fields( $line_0, $line_1, $imax_align_fix );
 1536             }
 1537         }
 1538 
 1539         initialize_for_new_rgroup();
 1540         return;
 1541     }
 1542 
 1543     sub block_penultimate_match {
 1544 
 1545         # emergency reset to prevent sweep_left_to_right from trying to match a
 1546         # failed terminal else match
 1547         return unless @{$rgroups} > 1;
 1548         $rgroups->[-2]->[2] = -1;
 1549         return;
 1550     }
 1551 
 1552     sub sweep_top_down {
 1553         my ( $self, $rlines, $group_level ) = @_;
 1554 
 1555         # Partition the set of lines into final alignment subgroups
 1556         # and store the alignments with the lines.
 1557 
 1558         # The alignment subgroups we are making here are groups of consecutive
 1559         # lines which have (1) identical alignment tokens and (2) do not
 1560         # exceed the allowable maximum line length.  A later sweep from
 1561         # left-to-right ('sweep_lr') will handle additional alignments.
 1562 
 1563         # transfer args to closure variables
 1564         $rall_lines = $rlines;
 1565         $grp_level  = $group_level;
 1566         $rgroups    = [];
 1567         initialize_for_new_rgroup();
 1568         return unless @{$rlines};    # shouldn't happen
 1569 
 1570         # Unset the _end_group flag for the last line if it it set because it
 1571         # is not needed and can causes problems for -lp formatting
 1572         $rall_lines->[-1]->set_end_group(0);
 1573 
 1574         # Loop over all lines ...
 1575         my $jline = -1;
 1576         foreach my $new_line ( @{$rall_lines} ) {
 1577             $jline++;
 1578 
 1579             # Start a new subgroup if necessary
 1580             if ( !$group_line_count ) {
 1581                 add_to_rgroup($jline);
 1582                 if ( $new_line->get_end_group() ) {
 1583                     end_rgroup(-1);
 1584                 }
 1585                 next;
 1586             }
 1587 
 1588             my $j_terminal_match = $new_line->get_j_terminal_match();
 1589             my ( $jbeg, $jend ) = get_rgroup_jrange();
 1590             if ( !defined($jbeg) ) {
 1591 
 1592                 # safety check, shouldn't happen
 1593                 $self->warning(<<EOM);
 1594 Program bug detected in Perl::Tidy::VerticalAligner sub sweep_top_down 
 1595 undefined index for group line count $group_line_count
 1596 EOM
 1597                 $jbeg = $jline;
 1598             }
 1599             my $base_line = $rall_lines->[$jbeg];
 1600 
 1601             # Initialize a global flag saying if the last line of the group
 1602             # should match end of group and also terminate the group.  There
 1603             # should be no returns between here and where the flag is handled
 1604             # at the bottom.
 1605             my $col_matching_terminal = 0;
 1606             if ( defined($j_terminal_match) ) {
 1607 
 1608                 # remember the column of the terminal ? or { to match with
 1609                 $col_matching_terminal =
 1610                   $base_line->get_column($j_terminal_match);
 1611 
 1612                 # Ignore an undefined value as a defensive step; shouldn't
 1613                 # normally happen.
 1614                 $col_matching_terminal = 0
 1615                   unless defined($col_matching_terminal);
 1616             }
 1617 
 1618             # -------------------------------------------------------------
 1619             # Allow hanging side comment to join current group, if any.  The
 1620             # only advantage is to keep the other tokens in the same group. For
 1621             # example, this would make the '=' align here:
 1622             #  $ax         = 1;           # side comment
 1623             #                             # hanging side comment
 1624             #  $boondoggle = 5;           # side comment
 1625             #  $beetle     = 5;           # side comment
 1626 
 1627             # here is another example..
 1628 
 1629             #  _rtoc_name_count   => {},                   # hash to track ..
 1630             #  _rpackage_stack    => [],                   # stack to check ..
 1631             #                                              # name changes
 1632             #  _rlast_level       => \$last_level,         # brace indentation
 1633             #
 1634             #
 1635             # If this were not desired, the next step could be skipped.
 1636             # -------------------------------------------------------------
 1637             if ( $new_line->get_is_hanging_side_comment() ) {
 1638                 join_hanging_comment( $new_line, $base_line );
 1639             }
 1640 
 1641             # If this line has no matching tokens, then flush out the lines
 1642             # BEFORE this line unless both it and the previous line have side
 1643             # comments.  This prevents this line from pushing side coments out
 1644             # to the right.
 1645             elsif ( $new_line->get_jmax() == 1 ) {
 1646 
 1647                 # There are no matching tokens, so now check side comments.
 1648                 # Programming note: accessing arrays with index -1 is
 1649                 # risky in Perl, but we have verified there is at least one
 1650                 # line in the group and that there is at least one field.
 1651                 my $prev_comment =
 1652                   $rall_lines->[ $jline - 1 ]->get_rfields()->[-1];
 1653                 my $side_comment = $new_line->get_rfields()->[-1];
 1654                 end_rgroup(-1) unless ( $side_comment && $prev_comment );
 1655             }
 1656 
 1657             # See if the new line matches and fits the current group,
 1658             # if it still exists. Flush the current group if not.
 1659             my $match_code;
 1660             if ($group_line_count) {
 1661                 ( $match_code, my $imax_align ) =
 1662                   $self->check_match( $new_line, $base_line,
 1663                     $rall_lines->[ $jline - 1 ] );
 1664                 if ( $match_code != 2 ) { end_rgroup($imax_align) }
 1665             }
 1666 
 1667             # Store the new line
 1668             add_to_rgroup($jline);
 1669 
 1670             if ( defined($j_terminal_match) ) {
 1671 
 1672                 # Decide if we should fix a terminal match. We can either:
 1673                 # 1. fix it and prevent the sweep_lr from changing it, or
 1674                 # 2. leave it alone and let sweep_lr try to fix it.
 1675 
 1676                 # The current logic is to fix it if:
 1677                 # -it has not joined to previous lines,
 1678                 # -and either the previous subgroup has just 1 line, or
 1679                 # -this line matched but did not fit (so sweep won't work)
 1680                 my $fixit;
 1681                 if ( $group_line_count == 1 ) {
 1682                     $fixit ||= $match_code;
 1683                     if ( !$fixit ) {
 1684                         if ( @{$rgroups} > 1 ) {
 1685                             my ( $jbegx, $jendx ) = @{ $rgroups->[-2] };
 1686                             my $nlines = $jendx - $jbegx + 1;
 1687                             $fixit ||= $nlines <= 1;
 1688                         }
 1689                     }
 1690                 }
 1691 
 1692                 if ($fixit) {
 1693                     $base_line = $new_line;
 1694                     my $col_now = $base_line->get_column($j_terminal_match);
 1695 
 1696                     # Ignore an undefined value as a defensive step; shouldn't
 1697                     # normally happen.
 1698                     $col_now = 0 unless defined($col_now);
 1699 
 1700                     my $pad = $col_matching_terminal - $col_now;
 1701                     my $padding_available =
 1702                       $base_line->get_available_space_on_right();
 1703                     if ( $col_now && $pad > 0 && $pad <= $padding_available ) {
 1704                         $base_line->increase_field_width( $j_terminal_match,
 1705                             $pad );
 1706                     }
 1707 
 1708                     # do not let sweep_left_to_right change an isolated 'else'
 1709                     if ( !$new_line->get_is_terminal_ternary() ) {
 1710                         block_penultimate_match();
 1711                     }
 1712                 }
 1713                 end_rgroup(-1);
 1714             }
 1715 
 1716             # end the group if we know we cannot match next line.
 1717             elsif ( $new_line->get_end_group() ) {
 1718                 end_rgroup(-1);
 1719             }
 1720         } ## end loop over lines
 1721 
 1722         end_rgroup(-1);
 1723         return ($rgroups);
 1724     }
 1725 }
 1726 
 1727 sub two_line_pad {
 1728 
 1729     my ( $line_m, $line, $imax_min ) = @_;
 1730 
 1731     # Given:
 1732     #  two isolated (list) lines
 1733     #  imax_min = number of common alignment tokens
 1734     # Return:
 1735     #  $pad_max = maximum suggested pad distnce
 1736     #           = 0 if alignment not recommended
 1737     # Note that this is only for two lines which do not have alignment tokens
 1738     # in common with any other lines.  It is intended for lists, but it might
 1739     # also be used for two non-list lines with a common leading '='.
 1740 
 1741     # Allow alignment if the difference in the two unpadded line lengths
 1742     # is not more than either line length.  The idea is to avoid
 1743     # aligning lines with very different field lengths, like these two:
 1744 
 1745     #   [
 1746     #       'VARCHAR', DBI::SQL_VARCHAR, undef, "'", "'", undef, 0, 1,
 1747     #       1, 0, 0, 0, undef, 0, 0
 1748     #   ];
 1749     my $rfield_lengths   = $line->get_rfield_lengths();
 1750     my $rfield_lengths_m = $line_m->get_rfield_lengths();
 1751 
 1752     # Safety check - shouldn't happen
 1753     return 0
 1754       unless $imax_min < @{$rfield_lengths} && $imax_min < @{$rfield_lengths_m};
 1755 
 1756     my $lensum_m = 0;
 1757     my $lensum   = 0;
 1758     for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
 1759         $lensum_m += $rfield_lengths_m->[$i];
 1760         $lensum   += $rfield_lengths->[$i];
 1761     }
 1762 
 1763     my ( $lenmin, $lenmax ) =
 1764       $lensum >= $lensum_m ? ( $lensum_m, $lensum ) : ( $lensum, $lensum_m );
 1765 
 1766     my $patterns_match;
 1767     if ( $line_m->get_list_type() && $line->get_list_type() ) {
 1768         $patterns_match = 1;
 1769         my $rpatterns_m = $line_m->get_rpatterns();
 1770         my $rpatterns   = $line->get_rpatterns();
 1771         for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
 1772             my $pat   = $rpatterns->[$i];
 1773             my $pat_m = $rpatterns_m->[$i];
 1774             if ( $pat ne $pat_m ) { $patterns_match = 0; last }
 1775         }
 1776     }
 1777 
 1778     my $pad_max = $lenmax;
 1779     if ( !$patterns_match && $lenmax > 2 * $lenmin ) { $pad_max = 0 }
 1780 
 1781     return $pad_max;
 1782 }
 1783 
 1784 sub sweep_left_to_right {
 1785 
 1786     my ( $rlines, $rgroups, $group_level ) = @_;
 1787 
 1788     # So far we have divided the lines into groups having an equal number of
 1789     # identical alignments.  Here we are going to look for common leading
 1790     # alignments between the different groups and align them when possible.
 1791     # For example, the three lines below are in three groups because each line
 1792     # has a different number of commas.  In this routine we will sweep from
 1793     # left to right, aligning the leading commas as we go, but stopping if we
 1794     # hit the line length limit.
 1795 
 1796     #  my ( $num, $numi, $numj,  $xyza, $ka,   $xyzb, $kb, $aff, $error );
 1797     #  my ( $i,   $j,    $error, $aff,  $asum, $avec );
 1798     #  my ( $km,  $area, $varea );
 1799 
 1800     # nothing to do if just one group
 1801     my $ng_max = @{$rgroups} - 1;
 1802     return unless ( $ng_max > 0 );
 1803 
 1804     ############################################################################
 1805     # Step 1: Loop over groups to find all common leading alignment tokens
 1806     ############################################################################
 1807 
 1808     my $line;
 1809     my $rtokens;
 1810     my $imax;     # index of maximum non-side-comment alignment token
 1811     my $istop;    # an optional stopping index
 1812     my $jbeg;     # starting line index
 1813     my $jend;     # ending line index
 1814 
 1815     my $line_m;
 1816     my $rtokens_m;
 1817     my $imax_m;
 1818     my $istop_m;
 1819     my $jbeg_m;
 1820     my $jend_m;
 1821 
 1822     my $istop_mm;
 1823 
 1824     # Look at neighboring pairs of groups and form a simple list
 1825     # of all common leading alignment tokens. Foreach such match we
 1826     # store [$i, $ng], where
 1827     #  $i = index of the token in the line (0,1,...)
 1828     #  $ng is the second of the two groups with this common token
 1829     my @icommon;
 1830 
 1831     # Hash to hold the maximum alignment change for any group
 1832     my %max_move;
 1833 
 1834     # a small number of columns
 1835     my $short_pad = 4;
 1836 
 1837     my $ng = -1;
 1838     foreach my $item ( @{$rgroups} ) {
 1839         $ng++;
 1840 
 1841         $istop_mm = $istop_m;
 1842 
 1843         # save _m values of previous group
 1844         $line_m    = $line;
 1845         $rtokens_m = $rtokens;
 1846         $imax_m    = $imax;
 1847         $istop_m   = $istop;
 1848         $jbeg_m    = $jbeg;
 1849         $jend_m    = $jend;
 1850 
 1851         # Get values for this group. Note that we just have to use values for
 1852         # one of the lines of the group since all members have the same
 1853         # alignments.
 1854         ( $jbeg, $jend, $istop ) = @{$item};
 1855 
 1856         $line    = $rlines->[$jbeg];
 1857         $rtokens = $line->get_rtokens();
 1858         $imax    = $line->get_jmax() - 2;
 1859         $istop   = -1 unless ( defined($istop) );
 1860         $istop   = $imax if ( $istop > $imax );
 1861 
 1862         # Initialize on first group
 1863         next if ( $ng == 0 );
 1864 
 1865         # Use the minimum index limit of the two groups
 1866         my $imax_min = $imax > $imax_m ? $imax_m : $imax;
 1867 
 1868         # Also impose a limit if given.
 1869         if ( $istop_m < $imax_min ) {
 1870             $imax_min = $istop_m;
 1871         }
 1872 
 1873         # Special treatment of two one-line groups isolated from other lines,
 1874         # unless they form a simple list or a terminal match.  Otherwise the
 1875         # alignment can look strange in some cases.
 1876         my $list_type = $rlines->[$jbeg]->get_list_type();
 1877         if (
 1878                $jend == $jbeg
 1879             && $jend_m == $jbeg_m
 1880             && ( $ng == 1 || $istop_mm < 0 )
 1881             && ( $ng == $ng_max || $istop < 0 )
 1882             && !$line->get_j_terminal_match()
 1883 
 1884             # Only do this for imperfect matches. This is normally true except
 1885             # when two perfect matches cannot form a group because the line
 1886             # length limit would be exceeded. In that case we can still try
 1887             # to match as many alignments as possible.
 1888             && ( $imax != $imax_m || $istop_m != $imax_m )
 1889           )
 1890         {
 1891 
 1892             # We will just align assignments and simple lists
 1893             next unless ( $imax_min >= 0 );
 1894             next
 1895               unless ( $rtokens->[0] =~ /^=\d/
 1896                 || $list_type );
 1897 
 1898             # In this case we will limit padding to a short distance.  This
 1899             # is a compromise to keep some vertical alignment but prevent large
 1900             # gaps, which do not look good for just two lines.
 1901             my $pad_max =
 1902               two_line_pad( $rlines->[$jbeg], $rlines->[$jbeg_m], $imax_min );
 1903             next unless ($pad_max);
 1904             my $ng_m = $ng - 1;
 1905             $max_move{"$ng_m"} = $pad_max;
 1906             $max_move{"$ng"}   = $pad_max;
 1907         }
 1908 
 1909         # Loop to find all common leading tokens.
 1910         if ( $imax_min >= 0 ) {
 1911             foreach my $i ( 0 .. $imax_min ) {
 1912                 my $tok   = $rtokens->[$i];
 1913                 my $tok_m = $rtokens_m->[$i];
 1914                 last if ( $tok ne $tok_m );
 1915                 push @icommon, [ $i, $ng, $tok ];
 1916             }
 1917         }
 1918     }
 1919     return unless @icommon;
 1920 
 1921     ###########################################################
 1922     # Step 2: Reorder and consolidate the list into a task list
 1923     ###########################################################
 1924 
 1925     # We have to work first from lowest token index to highest, then by group,
 1926     # sort our list first on token index then group number
 1927     @icommon = sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] } @icommon;
 1928 
 1929     # Make a task list of the form
 1930     #   [$i, ng_beg, $ng_end, $tok], ..
 1931     # where
 1932     #   $i is the index of the token to be aligned
 1933     #   $ng_beg..$ng_end is the group range for this action
 1934     my @todo;
 1935     my ( $i, $ng_end, $tok );
 1936     foreach my $item (@icommon) {
 1937         my $ng_last = $ng_end;
 1938         my $i_last  = $i;
 1939         ( $i, $ng_end, $tok ) = @{$item};
 1940         my $ng_beg = $ng_end - 1;
 1941         if ( defined($ng_last) && $ng_beg == $ng_last && $i == $i_last ) {
 1942             my $var = pop(@todo);
 1943             $ng_beg = $var->[1];
 1944         }
 1945         my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
 1946         push @todo, [ $i, $ng_beg, $ng_end, $raw_tok, $lev ];
 1947     }
 1948 
 1949     ###############################
 1950     # Step 3: Execute the task list
 1951     ###############################
 1952     do_left_to_right_sweep( $rlines, $rgroups, \@todo, \%max_move, $short_pad,
 1953         $group_level );
 1954     return;
 1955 }
 1956 
 1957 {    ## closure for sub do_left_to_right_sweep
 1958 
 1959     my %is_good_alignment_token;
 1960 
 1961     BEGIN {
 1962 
 1963         # One of the most difficult aspects of vertical alignment is knowing
 1964         # when not to align.  Alignment can go from looking very nice to very
 1965         # bad when overdone.  In the sweep algorithm there are two special
 1966         # cases where we may need to limit padding to a '$short_pad' distance
 1967         # to avoid some very ugly formatting:
 1968 
 1969         # 1. Two isolated lines with partial alignment
 1970         # 2. A 'tail-wag-dog' situation, in which a single terminal
 1971         #    line with partial alignment could cause a significant pad
 1972         #    increase in many previous lines if allowed to join the alignment.
 1973 
 1974         # For most alignment tokens, we will allow only a small pad to be
 1975         # introduced (the hardwired $short_pad variable) . But for some 'good'
 1976         # alignments we can be less restrictive.
 1977 
 1978         # These are 'good' alignments, which are allowed more padding:
 1979         my @q = qw(
 1980           => = ? if unless or || {
 1981         );
 1982         push @q, ',';
 1983         @is_good_alignment_token{@q} = (0) x scalar(@q);
 1984 
 1985         # Promote a few of these to 'best', with essentially no pad limit:
 1986         $is_good_alignment_token{'='}      = 1;
 1987         $is_good_alignment_token{'if'}     = 1;
 1988         $is_good_alignment_token{'unless'} = 1;
 1989         $is_good_alignment_token{'=>'}     = 1
 1990 
 1991           # Note the hash values are set so that:
 1992           #         if ($is_good_alignment_token{$raw_tok}) => best
 1993           # if defined ($is_good_alignment_token{$raw_tok}) => good or best
 1994 
 1995     }
 1996 
 1997     sub do_left_to_right_sweep {
 1998         my ( $rlines, $rgroups, $rtodo, $rmax_move, $short_pad, $group_level )
 1999           = @_;
 2000 
 2001         # $blocking_level[$nj is the level at a match failure between groups
 2002         # $ng-1 and $ng
 2003         my @blocking_level;
 2004         my $group_list_type = $rlines->[0]->get_list_type();
 2005 
 2006         my $move_to_common_column = sub {
 2007 
 2008             # Move the alignment column of token $itok to $col_want for a
 2009             # sequence of groups.
 2010             my ( $ngb, $nge, $itok, $col_want, $raw_tok ) = @_;
 2011             return unless ( defined($ngb) && $nge > $ngb );
 2012             foreach my $ng ( $ngb .. $nge ) {
 2013 
 2014                 my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
 2015                 my $line  = $rlines->[$jbeg];
 2016                 my $col   = $line->get_column($itok);
 2017                 my $avail = $line->get_available_space_on_right();
 2018                 my $move  = $col_want - $col;
 2019                 if ( $move > 0 ) {
 2020 
 2021                     # limit padding increase in isolated two lines
 2022                     next
 2023                       if ( defined( $rmax_move->{$ng} )
 2024                         && $move > $rmax_move->{$ng}
 2025                         && !$is_good_alignment_token{$raw_tok} );
 2026 
 2027                     $line->increase_field_width( $itok, $move );
 2028                 }
 2029                 elsif ( $move < 0 ) {
 2030 
 2031                     # spot to take special action on failure to move
 2032                 }
 2033             }
 2034         };
 2035 
 2036         foreach my $task ( @{$rtodo} ) {
 2037             my ( $itok, $ng_beg, $ng_end, $raw_tok, $lev ) = @{$task};
 2038 
 2039             # Nothing to do for a single group
 2040             next unless ( $ng_end > $ng_beg );
 2041 
 2042             my $ng_first;  # index of the first group of a continuous sequence
 2043             my $col_want;  # the common alignment column of a sequence of groups
 2044             my $col_limit; # maximum column before bumping into max line length
 2045             my $line_count_ng_m = 0;
 2046             my $jmax_m;
 2047             my $it_stop_m;
 2048 
 2049             # Loop over the groups
 2050             # 'ix_' = index in the array of lines
 2051             # 'ng_' = index in the array of groups
 2052             # 'it_' = index in the array of tokens
 2053             my $ix_min      = $rgroups->[$ng_beg]->[0];
 2054             my $ix_max      = $rgroups->[$ng_end]->[1];
 2055             my $lines_total = $ix_max - $ix_min + 1;
 2056             foreach my $ng ( $ng_beg .. $ng_end ) {
 2057                 my ( $ix_beg, $ix_end, $it_stop ) = @{ $rgroups->[$ng] };
 2058                 my $line_count_ng = $ix_end - $ix_beg + 1;
 2059 
 2060                 # Important: note that since all lines in a group have a common
 2061                 # alignments object, we just have to work on one of the lines
 2062                 # (the first line).  All of the rest will be changed
 2063                 # automatically.
 2064                 my $line = $rlines->[$ix_beg];
 2065                 my $jmax = $line->get_jmax();
 2066 
 2067                 # the maximum space without exceeding the line length:
 2068                 my $avail   = $line->get_available_space_on_right();
 2069                 my $col     = $line->get_column($itok);
 2070                 my $col_max = $col + $avail;
 2071 
 2072                 # Initialize on first group
 2073                 if ( !defined($col_want) ) {
 2074                     $ng_first        = $ng;
 2075                     $col_want        = $col;
 2076                     $col_limit       = $col_max;
 2077                     $line_count_ng_m = $line_count_ng;
 2078                     $jmax_m          = $jmax;
 2079                     $it_stop_m       = $it_stop;
 2080                     next;
 2081                 }
 2082 
 2083                 # RULE: Throw a blocking flag upon encountering a token level
 2084                 # different from the level of the first blocking token.  For
 2085                 # example, in the following example, if the = matches get
 2086                 # blocked between two groups as shown, then we want to start
 2087                 # blocking matches at the commas, which are at deeper level, so
 2088                 # that we do not get the big gaps shown here:
 2089 
 2090                 #  my $unknown3 = pack( "v",          -2 );
 2091                 #  my $unknown4 = pack( "v",          0x09 );
 2092                 #  my $unknown5 = pack( "VVV",        0x06, 0x00, 0x00 );
 2093                 #  my $num_bbd_blocks  = pack( "V",   $num_lists );
 2094                 #  my $root_startblock = pack( "V",   $root_start );
 2095                 #  my $unknown6        = pack( "VV",  0x00, 0x1000 );
 2096 
 2097                 # On the other hand, it is okay to keep matching at the same
 2098                 # level such as in a simple list of commas and/or fat arrors.
 2099 
 2100                 my $is_blocked = defined( $blocking_level[$ng] )
 2101                   && $lev > $blocking_level[$ng];
 2102 
 2103                 # TAIL-WAG-DOG RULE: prevent a 'tail-wag-dog' syndrom, meaning:
 2104                 # Do not let one or two lines with a **different number of
 2105                 # alignments** open up a big gap in a large block.  For
 2106                 # example, we will prevent something like this, where the first
 2107                 # line prys open the rest:
 2108 
 2109             #  $worksheet->write( "B7", "http://www.perl.com", undef, $format );
 2110             #  $worksheet->write( "C7", "",                    $format );
 2111             #  $worksheet->write( "D7", "",                    $format );
 2112             #  $worksheet->write( "D8", "",                    $format );
 2113             #  $worksheet->write( "D8", "",                    $format );
 2114 
 2115                 # We should exclude from consideration two groups which are
 2116                 # effectively the same but separated because one does not
 2117                 # fit in the maximum allowed line length.
 2118                 my $is_same_group =
 2119                   $jmax == $jmax_m && $it_stop_m == $jmax_m - 2;
 2120 
 2121                 my $lines_above = $ix_beg - $ix_min;
 2122                 my $lines_below = $lines_total - $lines_above;
 2123 
 2124                 # Increase the tolerable gap for certain favorable factors
 2125                 my $factor    = 1;
 2126                 my $top_level = $lev == $group_level;
 2127 
 2128                 # Align best top level alignment tokens like '=', 'if', ...
 2129                 # A factor of 10 allows a gap of up to 40 spaces
 2130                 if ( $top_level && $is_good_alignment_token{$raw_tok} ) {
 2131                     $factor = 10;
 2132                 }
 2133 
 2134                 # Otherwise allow some minimal padding of good alignments
 2135                 elsif (
 2136 
 2137                     defined( $is_good_alignment_token{$raw_tok} )
 2138 
 2139                     # We have to be careful if there are just 2 lines.  This
 2140                     # two-line factor allows large gaps only for 2 lines which
 2141                     # are simple lists with fewer items on the second line. It
 2142                     # gives results similar to previous versions of perltidy.
 2143                     && (   $lines_total > 2
 2144                         || $group_list_type && $jmax < $jmax_m && $top_level )
 2145                   )
 2146                 {
 2147                     $factor += 1;
 2148                     if ($top_level) {
 2149                         $factor += 1;
 2150                     }
 2151                 }
 2152 
 2153                 my $is_big_gap;
 2154                 if ( !$is_same_group ) {
 2155                     $is_big_gap ||=
 2156                       (      $lines_above == 1
 2157                           || $lines_above == 2 && $lines_below >= 4 )
 2158                       && $col_want > $col + $short_pad * $factor;
 2159                     $is_big_gap ||=
 2160                       (      $lines_below == 1
 2161                           || $lines_below == 2 && $lines_above >= 4 )
 2162                       && $col > $col_want + $short_pad * $factor;
 2163                 }
 2164 
 2165                 # if match is limited by gap size, stop aligning at this level
 2166                 if ($is_big_gap) {
 2167                     $blocking_level[$ng] = $lev - 1;
 2168                 }
 2169 
 2170                 # quit and restart if it cannot join this batch
 2171                 if (   $col_want > $col_max
 2172                     || $col > $col_limit
 2173                     || $is_big_gap
 2174                     || $is_blocked )
 2175                 {
 2176 
 2177                     # remember the level of the first blocking token
 2178                     if ( !defined( $blocking_level[$ng] ) ) {
 2179                         $blocking_level[$ng] = $lev;
 2180                     }
 2181 
 2182                     $move_to_common_column->(
 2183                         $ng_first, $ng - 1, $itok, $col_want, $raw_tok
 2184                     );
 2185                     $ng_first        = $ng;
 2186                     $col_want        = $col;
 2187                     $col_limit       = $col_max;
 2188                     $line_count_ng_m = $line_count_ng;
 2189                     $jmax_m          = $jmax;
 2190                     $it_stop_m       = $it_stop;
 2191                     next;
 2192                 }
 2193 
 2194                 $line_count_ng_m += $line_count_ng;
 2195 
 2196                 # update the common column and limit
 2197                 if ( $col > $col_want )      { $col_want  = $col }
 2198                 if ( $col_max < $col_limit ) { $col_limit = $col_max }
 2199 
 2200             } ## end loop over groups
 2201 
 2202             if ( $ng_end > $ng_first ) {
 2203                 $move_to_common_column->(
 2204                     $ng_first, $ng_end, $itok, $col_want, $raw_tok
 2205                 );
 2206             } ## end loop over groups for one task
 2207         } ## end loop over tasks
 2208 
 2209         return;
 2210     }
 2211 }
 2212 
 2213 sub delete_selected_tokens {
 2214 
 2215     my ( $line_obj, $ridel ) = @_;
 2216 
 2217     # $line_obj    is the line to be modified
 2218     # $ridel       is a ref to list of indexes to be deleted
 2219 
 2220     # remove an unused alignment token(s) to improve alignment chances
 2221 
 2222     return unless ( defined($line_obj) && defined($ridel) && @{$ridel} );
 2223 
 2224     my $jmax_old           = $line_obj->get_jmax();
 2225     my $rfields_old        = $line_obj->get_rfields();
 2226     my $rfield_lengths_old = $line_obj->get_rfield_lengths();
 2227     my $rpatterns_old      = $line_obj->get_rpatterns();
 2228     my $rtokens_old        = $line_obj->get_rtokens();
 2229     my $j_terminal_match   = $line_obj->get_j_terminal_match();
 2230 
 2231     use constant EXPLAIN_DELETE_SELECTED => 0;
 2232 
 2233     local $" = '> <';
 2234     EXPLAIN_DELETE_SELECTED && print <<EOM;
 2235 delete indexes: <@{$ridel}>
 2236 old jmax: $jmax_old
 2237 old tokens: <@{$rtokens_old}>
 2238 old patterns: <@{$rpatterns_old}>
 2239 old fields: <@{$rfields_old}>
 2240 old field_lengths: <@{$rfield_lengths_old}>
 2241 EOM
 2242 
 2243     my $rfields_new        = [];
 2244     my $rpatterns_new      = [];
 2245     my $rtokens_new        = [];
 2246     my $rfield_lengths_new = [];
 2247 
 2248     # Convert deletion list to a hash to allow any order, multiple entries,
 2249     # and avoid problems with index values out of range
 2250     my %delete_me;
 2251     @delete_me{ @{$ridel} } = (1) x scalar( @{$ridel} );
 2252 
 2253     my $pattern      = $rpatterns_old->[0];
 2254     my $field        = $rfields_old->[0];
 2255     my $field_length = $rfield_lengths_old->[0];
 2256     push @{$rfields_new},        $field;
 2257     push @{$rfield_lengths_new}, $field_length;
 2258     push @{$rpatterns_new},      $pattern;
 2259 
 2260     # Loop to either copy items or concatenate fields and patterns
 2261     my $jmin_del;
 2262     for ( my $j = 0 ; $j < $jmax_old ; $j++ ) {
 2263         my $token        = $rtokens_old->[$j];
 2264         my $field        = $rfields_old->[ $j + 1 ];
 2265         my $field_length = $rfield_lengths_old->[ $j + 1 ];
 2266         my $pattern      = $rpatterns_old->[ $j + 1 ];
 2267         if ( !$delete_me{$j} ) {
 2268             push @{$rtokens_new},        $token;
 2269             push @{$rfields_new},        $field;
 2270             push @{$rpatterns_new},      $pattern;
 2271             push @{$rfield_lengths_new}, $field_length;
 2272         }
 2273         else {
 2274             if ( !defined($jmin_del) ) { $jmin_del = $j }
 2275             $rfields_new->[-1] .= $field;
 2276             $rfield_lengths_new->[-1] += $field_length;
 2277             $rpatterns_new->[-1] .= $pattern;
 2278         }
 2279     }
 2280 
 2281     # ----- x ------ x ------ x ------
 2282     #t      0        1        2        <- token indexing
 2283     #f   0      1        2        3    <- field and pattern
 2284 
 2285     my $jmax_new = @{$rfields_new} - 1;
 2286     $line_obj->set_rtokens($rtokens_new);
 2287     $line_obj->set_rpatterns($rpatterns_new);
 2288     $line_obj->set_rfields($rfields_new);
 2289     $line_obj->set_rfield_lengths($rfield_lengths_new);
 2290     $line_obj->set_jmax($jmax_new);
 2291 
 2292     # The value of j_terminal_match will be incorrect if we delete tokens prior
 2293     # to it. We will have to give up on aligning the terminal tokens if this
 2294     # happens.
 2295     if ( defined($j_terminal_match) && $jmin_del <= $j_terminal_match ) {
 2296         $line_obj->set_j_terminal_match(undef);
 2297     }
 2298 
 2299     # update list type -
 2300     if ( $line_obj->get_list_seqno() ) {
 2301 
 2302         ## This works, but for efficiency see if we need to make a change:
 2303         ## decide_if_list($line_obj);
 2304 
 2305         # An existing list will still be a list but with possibly different
 2306         # leading token
 2307         my $old_list_type = $line_obj->get_list_type();
 2308         my $new_list_type = "";
 2309         if ( $rtokens_new->[0] =~ /^(=>|,)/ ) {
 2310             $new_list_type = $rtokens_new->[0];
 2311         }
 2312         if ( !$old_list_type || $old_list_type ne $new_list_type ) {
 2313             decide_if_list($line_obj);
 2314         }
 2315     }
 2316 
 2317     EXPLAIN_DELETE_SELECTED && print <<EOM;
 2318 
 2319 new jmax: $jmax_new
 2320 new tokens: <@{$rtokens_new}>
 2321 new patterns: <@{$rpatterns_new}>
 2322 new fields: <@{$rfields_new}>
 2323 EOM
 2324     return;
 2325 }
 2326 
 2327 {    ## closure for sub decode_alignment_token
 2328 
 2329     # This routine is called repeatedly for each token, so it needs to be
 2330     # efficient.  We can speed things up by remembering the inputs and outputs
 2331     # in a hash.
 2332     my %decoded_token;
 2333 
 2334     sub initialize_decode {
 2335 
 2336         # We will re-initialize the hash for each file. Otherwise, there is
 2337         # a danger that the hash can become arbitrarily large if a very large
 2338         # number of files is processed at once.
 2339         %decoded_token = ();
 2340         return;
 2341     }
 2342 
 2343     sub decode_alignment_token {
 2344 
 2345         # Unpack the values packed in an alignment token
 2346         #
 2347         # Usage:
 2348         #        my ( $raw_tok, $lev, $tag, $tok_count ) =
 2349         #          decode_alignment_token($token);
 2350 
 2351         # Alignment tokens have a trailing decimal level and optional tag (for
 2352         # commas):
 2353         # For example, the first comma in the following line
 2354         #     sub banner  { crlf; report( shift, '/', shift ); crlf }
 2355         # is decorated as follows:
 2356         #    ,2+report-6  => (tok,lev,tag) =qw( ,   2   +report-6)
 2357 
 2358         # An optional token count may be appended with a leading dot.
 2359         # Currently this is only done for '=' tokens but this could change.
 2360         # For example, consider the following line:
 2361         #   $nport   = $port = shift || $name;
 2362         # The first '=' may either be '=0' or '=0.1' [level 0, first equals]
 2363         # The second '=' will be '=0.2' [level 0, second equals]
 2364         my ($tok) = @_;
 2365 
 2366         if ( defined( $decoded_token{$tok} ) ) {
 2367             return @{ $decoded_token{$tok} };
 2368         }
 2369 
 2370         my ( $raw_tok, $lev, $tag, $tok_count ) = ( $tok, 0, "", 1 );
 2371         if ( $tok =~ /^(\D+)(\d+)([^\.]*)(\.(\d+))?$/ ) {
 2372             $raw_tok   = $1;
 2373             $lev       = $2;
 2374             $tag       = $3 if ($3);
 2375             $tok_count = $5 if ($5);
 2376         }
 2377         my @vals = ( $raw_tok, $lev, $tag, $tok_count );
 2378         $decoded_token{$tok} = \@vals;
 2379         return @vals;
 2380     }
 2381 }
 2382 
 2383 {    ## closure for sub delete_unmatched_tokens
 2384 
 2385     my %is_assignment;
 2386     my %keep_after_deleted_assignment;
 2387 
 2388     BEGIN {
 2389         my @q;
 2390 
 2391         @q = qw(
 2392           = **= += *= &= <<= &&=
 2393           -= /= |= >>= ||= //=
 2394           .= %= ^=
 2395           x=
 2396         );
 2397         @is_assignment{@q} = (1) x scalar(@q);
 2398 
 2399         # These tokens may be kept following an = deletion
 2400         @q = qw(
 2401           if unless or ||
 2402         );
 2403         @keep_after_deleted_assignment{@q} = (1) x scalar(@q);
 2404 
 2405     }
 2406 
 2407     # This flag is for testing only and should normally be zero.
 2408     use constant TEST_DELETE_NULL => 0;
 2409 
 2410     sub delete_unmatched_tokens {
 2411         my ( $rlines, $group_level ) = @_;
 2412 
 2413         # This is a preliminary step in vertical alignment in which we remove
 2414         # as many obviously un-needed alignment tokens as possible.  This will
 2415         # prevent them from interfering with the final alignment.
 2416 
 2417         # These are the return values
 2418         my $max_lev_diff     = 0;    # used to avoid a call to prune_tree
 2419         my $saw_side_comment = 0;    # used to avoid a call for side comments
 2420 
 2421         # Handle no lines -- shouldn't happen
 2422         return unless @{$rlines};
 2423 
 2424         # Handle a single line
 2425         if ( @{$rlines} == 1 ) {
 2426             my $line   = $rlines->[0];
 2427             my $jmax   = $line->get_jmax();
 2428             my $length = $line->get_rfield_lengths()->[$jmax];
 2429             $saw_side_comment = $length > 0;
 2430             return ( $max_lev_diff, $saw_side_comment );
 2431         }
 2432 
 2433         my $has_terminal_match = $rlines->[-1]->get_j_terminal_match();
 2434 
 2435         # ignore hanging side comments in these operations
 2436         my @filtered   = grep { !$_->get_is_hanging_side_comment() } @{$rlines};
 2437         my $rnew_lines = \@filtered;
 2438 
 2439         $saw_side_comment = @filtered != @{$rlines};
 2440         $max_lev_diff     = 0;
 2441 
 2442         # nothing to do if all lines were hanging side comments
 2443         my $jmax = @{$rnew_lines} - 1;
 2444         return ( $max_lev_diff, $saw_side_comment ) unless ( $jmax >= 0 );
 2445 
 2446         my @equals_info;
 2447         my @line_info;
 2448         my %is_good_tok;
 2449 
 2450         # create a hash of tokens for each line
 2451         my $rline_hashes = [];
 2452         foreach my $line ( @{$rnew_lines} ) {
 2453             my $rhash     = {};
 2454             my $rtokens   = $line->get_rtokens();
 2455             my $rpatterns = $line->get_rpatterns();
 2456             my $i         = 0;
 2457             my ( $i_eq, $tok_eq, $pat_eq );
 2458             my ( $lev_min, $lev_max );
 2459             foreach my $tok ( @{$rtokens} ) {
 2460                 my ( $raw_tok, $lev, $tag, $tok_count ) =
 2461                   decode_alignment_token($tok);
 2462 
 2463                 if ( $tok ne '#' ) {
 2464                     if ( !defined($lev_min) ) {
 2465                         $lev_min = $lev;
 2466                         $lev_max = $lev;
 2467                     }
 2468                     else {
 2469                         if ( $lev < $lev_min ) { $lev_min = $lev }
 2470                         if ( $lev > $lev_max ) { $lev_max = $lev }
 2471                     }
 2472                 }
 2473                 else {
 2474                     if ( !$saw_side_comment ) {
 2475                         my $length = $line->get_rfield_lengths()->[ $i + 1 ];
 2476                         $saw_side_comment ||= $length;
 2477                     }
 2478                 }
 2479 
 2480                 # Possible future upgrade: for multiple matches,
 2481                 # record [$i1, $i2, ..] instead of $i
 2482                 $rhash->{$tok} =
 2483                   [ $i, undef, undef, $raw_tok, $lev, $tag, $tok_count ];
 2484 
 2485                 # remember the first equals at line level
 2486                 if ( !defined($i_eq) && $raw_tok eq '=' ) {
 2487 
 2488                     if ( $lev eq $group_level ) {
 2489                         $i_eq   = $i;
 2490                         $tok_eq = $tok;
 2491                         $pat_eq = $rpatterns->[$i];
 2492                     }
 2493                 }
 2494                 $i++;
 2495             }
 2496             push @{$rline_hashes}, $rhash;
 2497             push @equals_info, [ $i_eq, $tok_eq, $pat_eq ];
 2498             push @line_info, [ $lev_min, $lev_max ];
 2499             if ( defined($lev_min) ) {
 2500                 my $lev_diff = $lev_max - $lev_min;
 2501                 if ( $lev_diff > $max_lev_diff ) { $max_lev_diff = $lev_diff }
 2502             }
 2503         }
 2504 
 2505         # compare each line pair and record matches
 2506         my $rtok_hash = {};
 2507         my $nr        = 0;
 2508         for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) {
 2509             my $nl = $nr;
 2510             $nr = 0;
 2511             my $jr      = $jl + 1;
 2512             my $rhash_l = $rline_hashes->[$jl];
 2513             my $rhash_r = $rline_hashes->[$jr];
 2514             my $count   = 0;                      # UNUSED NOW?
 2515             my $ntoks   = 0;
 2516             foreach my $tok ( keys %{$rhash_l} ) {
 2517                 $ntoks++;
 2518                 if ( defined( $rhash_r->{$tok} ) ) {
 2519                     if ( $tok ne '#' ) { $count++; }
 2520                     my $il = $rhash_l->{$tok}->[0];
 2521                     my $ir = $rhash_r->{$tok}->[0];
 2522                     $rhash_l->{$tok}->[2] = $ir;
 2523                     $rhash_r->{$tok}->[1] = $il;
 2524                     if ( $tok ne '#' ) {
 2525                         push @{ $rtok_hash->{$tok} }, ( $jl, $jr );
 2526                         $nr++;
 2527                     }
 2528                 }
 2529             }
 2530 
 2531             # Set a line break if no matching tokens between these lines
 2532             # (this is not strictly necessary now but does not hurt)
 2533             if ( $nr == 0 && $nl > 0 ) {
 2534                 $rnew_lines->[$jl]->set_end_group(1);
 2535             }
 2536 
 2537             # Also set a line break if both lines have simple equals but with
 2538             # different leading characters in patterns.  This check is similar
 2539             # to one in sub check_match, and will prevent sub
 2540             # prune_alignment_tree from removing alignments which otherwise
 2541             # should be kept. This fix is rarely needed, but it can
 2542             # occasionally improve formatting.
 2543             # For example:
 2544             #     my $name = $this->{Name};
 2545             #     $type = $this->ctype($genlooptype) if defined $genlooptype;
 2546             #     my $declini = ( $asgnonly ? ""          : "\t$type *" );
 2547             #     my $cast    = ( $type     ? "($type *)" : "" );
 2548             # The last two lines start with 'my' and will not match the
 2549             # previous line starting with $type, so we do not want
 2550             # prune_alignment tree to delete their ? : alignments at a deeper
 2551             # level.
 2552             my ( $i_eq_l, $tok_eq_l, $pat_eq_l ) = @{ $equals_info[$jl] };
 2553             my ( $i_eq_r, $tok_eq_r, $pat_eq_r ) = @{ $equals_info[$jr] };
 2554             if ( defined($i_eq_l) && defined($i_eq_r) ) {
 2555 
 2556                 # Also, do not align equals across a change in ci level
 2557                 my $ci_jump = $rnew_lines->[$jl]->get_ci_level() !=
 2558                   $rnew_lines->[$jr]->get_ci_level();
 2559 
 2560                 if (
 2561                        $tok_eq_l eq $tok_eq_r
 2562                     && $i_eq_l == 0
 2563                     && $i_eq_r == 0
 2564                     && ( substr( $pat_eq_l, 0, 1 ) ne substr( $pat_eq_r, 0, 1 )
 2565                         || $ci_jump )
 2566                   )
 2567                 {
 2568                     $rnew_lines->[$jl]->set_end_group(1);
 2569                 }
 2570             }
 2571         }
 2572 
 2573         # find subgroups
 2574         my @subgroups;
 2575         push @subgroups, [ 0, $jmax ];
 2576         for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) {
 2577             if ( $rnew_lines->[$jl]->get_end_group() ) {
 2578                 $subgroups[-1]->[1] = $jl;
 2579                 push @subgroups, [ $jl + 1, $jmax ];
 2580             }
 2581         }
 2582 
 2583         # flag to allow skipping pass 2
 2584         my $saw_large_group;
 2585 
 2586         ############################################################
 2587         # PASS 1 over subgroups to remove unmatched alignment tokens
 2588         ############################################################
 2589         foreach my $item (@subgroups) {
 2590             my ( $jbeg, $jend ) = @{$item};
 2591 
 2592             my $nlines = $jend - $jbeg + 1;
 2593 
 2594             ####################################################
 2595             # Look for complete if/elsif/else and ternary blocks
 2596             ####################################################
 2597 
 2598             # We are looking for a common '$dividing_token' like these:
 2599 
 2600             #    if    ( $b and $s ) { $p->{'type'} = 'a'; }
 2601             #    elsif ($b)          { $p->{'type'} = 'b'; }
 2602             #    elsif ($s)          { $p->{'type'} = 's'; }
 2603             #    else                { $p->{'type'} = ''; }
 2604             #                        ^----------- dividing_token
 2605 
 2606             #   my $severity =
 2607             #      !$routine                     ? '[PFX]'
 2608             #     : $routine =~ /warn.*_d\z/     ? '[DS]'
 2609             #     : $routine =~ /ck_warn/        ? 'W'
 2610             #     : $routine =~ /ckWARN\d*reg_d/ ? 'S'
 2611             #     : $routine =~ /ckWARN\d*reg/   ? 'W'
 2612             #     : $routine =~ /vWARN\d/        ? '[WDS]'
 2613             #     :                                '[PFX]';
 2614             #                                    ^----------- dividing_token
 2615 
 2616             # Only look for groups which are more than 2 lines long.  Two lines
 2617             # can get messed up doing this, probably due to the various
 2618             # two-line rules.
 2619 
 2620             my $dividing_token;
 2621             my %token_line_count;
 2622             if ( $nlines > 2 ) {
 2623 
 2624                 for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
 2625                     my %seen;
 2626                     my $line    = $rnew_lines->[$jj];
 2627                     my $rtokens = $line->get_rtokens();
 2628                     foreach my $tok ( @{$rtokens} ) {
 2629                         if ( !$seen{$tok} ) {
 2630                             $seen{$tok}++;
 2631                             $token_line_count{$tok}++;
 2632                         }
 2633                     }
 2634                 }
 2635 
 2636                 foreach my $tok ( keys %token_line_count ) {
 2637                     if ( $token_line_count{$tok} == $nlines ) {
 2638                         if (   substr( $tok, 0, 1 ) eq '?'
 2639                             || substr( $tok, 0, 1 ) eq '{'
 2640                             && $tok =~ /^\{\d+if/ )
 2641                         {
 2642                             $dividing_token = $tok;
 2643                             last;
 2644                         }
 2645                     }
 2646                 }
 2647             }
 2648 
 2649             #####################################################
 2650             # Loop over lines to remove unwanted alignment tokens
 2651             #####################################################
 2652             for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
 2653                 my $line    = $rnew_lines->[$jj];
 2654                 my $rtokens = $line->get_rtokens();
 2655                 my $rhash   = $rline_hashes->[$jj];
 2656                 my $i_eq    = $equals_info[$jj]->[0];
 2657                 my @idel;
 2658                 my $imax = @{$rtokens} - 2;
 2659                 my $delete_above_level;
 2660                 my $deleted_assignment_token;
 2661 
 2662                 my $saw_dividing_token = "";
 2663                 $saw_large_group ||= $nlines > 2 && $imax > 1;
 2664 
 2665                 # Loop over all alignment tokens
 2666                 for ( my $i = 0 ; $i <= $imax ; $i++ ) {
 2667                     my $tok = $rtokens->[$i];
 2668                     next if ( $tok eq '#' );    # shouldn't happen
 2669                     my ( $iii, $il, $ir, $raw_tok, $lev, $tag, $tok_count ) =
 2670                       @{ $rhash->{$tok} };
 2671 
 2672                     #######################################################
 2673                     # Here is the basic RULE: remove an unmatched alignment
 2674                     # which does not occur in the surrounding lines.
 2675                     #######################################################
 2676                     my $delete_me = !defined($il) && !defined($ir);
 2677 
 2678                     # But now we modify this with exceptions...
 2679 
 2680                     # EXCEPTION 1: If we are in a complete ternary or
 2681                     # if/elsif/else group, and this token is not on every line
 2682                     # of the group, should we delete it to preserve overall
 2683                     # alignment?
 2684                     if ($dividing_token) {
 2685                         if ( $token_line_count{$tok} >= $nlines ) {
 2686                             $saw_dividing_token ||= $tok eq $dividing_token;
 2687                         }
 2688                         else {
 2689 
 2690                             # For shorter runs, delete toks to save alignment.
 2691                             # For longer runs, keep toks after the '{' or '?'
 2692                             # to allow sub-alignments within braces.  The
 2693                             # number 5 lines is arbitrary but seems to work ok.
 2694                             $delete_me ||=
 2695                               ( $nlines < 5 || !$saw_dividing_token );
 2696                         }
 2697                     }
 2698 
 2699                     # EXCEPTION 2: Remove all tokens above a certain level
 2700                     # following a previous deletion.  For example, we have to
 2701                     # remove tagged higher level alignment tokens following a
 2702                     # '=>' deletion because the tags of higher level tokens
 2703                     # will now be incorrect. For example, this will prevent
 2704                     # aligning commas as follows after deleting the second '=>'
 2705                     #    $w->insert(
 2706                     #   ListBox => origin => [ 270, 160 ],
 2707                     #   size    => [ 200,           55 ],
 2708                     #    );
 2709                     if ( defined($delete_above_level) ) {
 2710                         if ( $lev > $delete_above_level ) {
 2711                             $delete_me ||= 1;    #$tag;
 2712                         }
 2713                         else { $delete_above_level = undef }
 2714                     }
 2715 
 2716                     # EXCEPTION 3: Remove all but certain tokens after an
 2717                     # assignment deletion.
 2718                     if (
 2719                         $deleted_assignment_token
 2720                         && ( $lev > $group_level
 2721                             || !$keep_after_deleted_assignment{$raw_tok} )
 2722                       )
 2723                     {
 2724                         $delete_me ||= 1;
 2725                     }
 2726 
 2727                     # EXCEPTION 4: Do not touch the first line of a 2 line
 2728                     # terminal match, such as below, because j_terminal has
 2729                     # already been set.
 2730                     #    if ($tag) { $tago = "<$tag>"; $tagc = "</$tag>"; }
 2731                     #    else      { $tago = $tagc = ''; }
 2732                     # But see snippets 'else1.t' and 'else2.t'
 2733                     $delete_me = 0
 2734                       if ( $jj == $jbeg
 2735                         && $has_terminal_match
 2736                         && $nlines == 2 );
 2737 
 2738                     # EXCEPTION 5: misc additional rules for commas and equals
 2739                     if ($delete_me) {
 2740 
 2741                         # okay to delete second and higher copies of a token
 2742                         if ( $tok_count == 1 ) {
 2743 
 2744                             # for a comma...
 2745                             if ( $raw_tok eq ',' ) {
 2746 
 2747                                 # Do not delete commas before an equals
 2748                                 $delete_me = 0
 2749                                   if ( defined($i_eq) && $i < $i_eq );
 2750 
 2751                                 # Do not delete line-level commas
 2752                                 $delete_me = 0 if ( $lev <= $group_level );
 2753                             }
 2754 
 2755                             # For an assignment at group level..
 2756                             if (   $is_assignment{$raw_tok}
 2757                                 && $lev == $group_level )
 2758                             {
 2759 
 2760                                 # Do not delete if it is the last alignment of
 2761                                 # multiple tokens; this will prevent some
 2762                                 # undesirable alignments
 2763                                 if ( $imax > 0 && $i == $imax ) {
 2764                                     $delete_me = 0;
 2765                                 }
 2766 
 2767                                 # Otherwise, set a flag to delete most
 2768                                 # remaining tokens
 2769                                 else { $deleted_assignment_token = $raw_tok }
 2770                             }
 2771                         }
 2772                     }
 2773 
 2774                     #####################################
 2775                     # Add this token to the deletion list
 2776                     #####################################
 2777                     if ($delete_me) {
 2778                         push @idel, $i;
 2779 
 2780                         # update deletion propagation flags
 2781                         if ( !defined($delete_above_level)
 2782                             || $lev < $delete_above_level )
 2783                         {
 2784 
 2785                             # delete all following higher level alignments
 2786                             $delete_above_level = $lev;
 2787 
 2788                             # but keep deleting after => to next lower level
 2789                             # to avoid some bizarre alignments
 2790                             if ( $raw_tok eq '=>' ) {
 2791                                 $delete_above_level = $lev - 1;
 2792                             }
 2793                         }
 2794                     }
 2795                 }    # End loop over alignment tokens
 2796 
 2797                 # Process all deletion requests for this line
 2798                 if (@idel) {
 2799                     delete_selected_tokens( $line, \@idel );
 2800                 }
 2801             }    # End loopover lines
 2802         }    # End loop over subgroups
 2803 
 2804         #################################################
 2805         # PASS 2 over subgroups to remove null alignments
 2806         #################################################
 2807 
 2808         # This pass is only used for testing. It is helping to identify
 2809         # alignment situations which might be improved with a future more
 2810         # general algorithm which adds a tail matching capability.
 2811         if (TEST_DELETE_NULL) {
 2812             delete_null_alignments( $rnew_lines, $rline_hashes, \@subgroups )
 2813               if ($saw_large_group);
 2814         }
 2815 
 2816         # PASS 3: Construct a tree of matched lines and delete some small deeper
 2817         # levels of tokens.  They also block good alignments.
 2818         prune_alignment_tree($rnew_lines) if ($max_lev_diff);
 2819 
 2820         # PASS 4: compare all lines for common tokens
 2821         match_line_pairs( $rlines, $rnew_lines, \@subgroups, $group_level );
 2822 
 2823         return ( $max_lev_diff, $saw_side_comment );
 2824     }
 2825 }
 2826 
 2827 sub delete_null_alignments {
 2828     my ( $rnew_lines, $rline_hashes, $rsubgroups ) = @_;
 2829 
 2830     # This is an optional second pass for deleting alignment tokens which can
 2831     # occasionally improve alignment.  We look for and remove 'null
 2832     # alignments', which are alignments that require no padding.  So we can
 2833     # 'cheat' and delete them. For example, notice the '=~' alignment in the
 2834     # first two lines of the following code:
 2835 
 2836     #    $sysname .= 'del' if $self->label =~ /deletion/;
 2837     #    $sysname .= 'ins' if $self->label =~ /insertion/;
 2838     #    $sysname .= uc $self->allele_ori->seq if $self->allele_ori->seq;
 2839 
 2840     # These '=~' tokens are already aligned because they are both the same
 2841     # distance from the previous alignment token, the 'if'.  So we can
 2842     # eliminate them as alignments.  The advantage is that in some cases, such
 2843     # as this one, this will allow other tokens to be aligned. In this case we
 2844     # then get the 'if' tokens to align:
 2845 
 2846     #   $sysname .= 'del'                     if $self->label =~ /deletion/;
 2847     #   $sysname .= 'ins'                     if $self->label =~ /insertion/;
 2848     #   $sysname .= uc $self->allele_ori->seq if $self->allele_ori->seq;
 2849 
 2850     # The following rules for limiting this operation have been found to
 2851     # work well and avoid problems:
 2852 
 2853     # Rule 1. We only consider a sequence of lines which have the same
 2854     # sequence of alignment tokens.
 2855 
 2856     # Rule 2. We never eliminate the first alignment token.  One reason is that
 2857     # lines may have different leading indentation spaces, so keeping the
 2858     # first alignment token insures that our length measurements start at
 2859     # a well-defined point.  Another reason is that nothing is gained because
 2860     # the left-to-right sweep can always handle alignment of this token.
 2861 
 2862     # Rule 3. We require that the first alignment token exist in either
 2863     # a previous line or a subsequent line.  The reason is that this avoids
 2864     # changing two-line matches which go through special logic.
 2865 
 2866     # Rule 4. Do not delete a token which occurs in a previous or subsequent
 2867     # line. For example, in the above example, it was ok to eliminate the '=~'
 2868     # token from two lines because it did not occur in a surrounding line.
 2869     # If it did occur in a surrounding line, the result could be confusing
 2870     # or even incorrectly aligned.
 2871 
 2872     # A consequence of these rules is that we only need to consider subgroups
 2873     # with at least 3 lines and 2 alignment tokens.
 2874 
 2875     # The subgroup line index range
 2876     my ( $jbeg, $jend );
 2877 
 2878     # Vars to keep track of the start of a current sequence of matching
 2879     # lines.
 2880     my $rtokens_match;
 2881     my $rfield_lengths_match;
 2882     my $j_match_beg;
 2883     my $j_match_end;
 2884     my $imax_match;
 2885     my $rneed_pad;
 2886 
 2887     # Vars for a line being tested
 2888     my $rtokens;
 2889     my $rfield_lengths;
 2890     my $imax;
 2891 
 2892     my $start_match = sub {
 2893         my ($jj) = @_;
 2894         $rtokens_match        = $rtokens;
 2895         $rfield_lengths_match = $rfield_lengths;
 2896         $j_match_beg          = $jj;
 2897         $j_match_end          = $jj;
 2898         $imax_match           = $imax;
 2899         $rneed_pad            = [];
 2900         return;
 2901     };
 2902 
 2903     my $add_to_match = sub {
 2904         my ($jj) = @_;
 2905         $j_match_end = $jj;
 2906 
 2907         # Keep track of any padding that would be needed for each token
 2908         for ( my $i = 0 ; $i <= $imax ; $i++ ) {
 2909             next if ( $rneed_pad->[$i] );
 2910             my $length       = $rfield_lengths->[$i];
 2911             my $length_match = $rfield_lengths_match->[$i];
 2912             if ( $length ne $length_match ) { $rneed_pad->[$i] = 1 }
 2913         }
 2914     };
 2915 
 2916     my $end_match = sub {
 2917         return unless ( $j_match_end > $j_match_beg );
 2918         my $nlines    = $j_match_end - $j_match_beg + 1;
 2919         my $rhash_beg = $rline_hashes->[$j_match_beg];
 2920         my $rhash_end = $rline_hashes->[$j_match_end];
 2921         my @idel;
 2922 
 2923         # Do not delete unless the first token also occurs in a surrounding line
 2924         my $tok0 = $rtokens_match->[0];
 2925         return
 2926           unless (
 2927             (
 2928                    $j_match_beg > $jbeg
 2929                 && $rnew_lines->[ $j_match_beg - 1 ]->get_rtokens()->[0] eq
 2930                 $tok0
 2931             )
 2932             || (   $j_match_end < $jend
 2933                 && $rnew_lines->[ $j_match_end + 1 ]->get_rtokens()->[0] eq
 2934                 $tok0 )
 2935           );
 2936 
 2937         # Note that we are skipping the token at i=0
 2938         for ( my $i = 1 ; $i <= $imax_match ; $i++ ) {
 2939 
 2940             # do not delete a token which requires padding to align
 2941             next if ( $rneed_pad->[$i] );
 2942 
 2943             my $tok = $rtokens_match->[$i];
 2944 
 2945             # Do not delete a token which occurs in a surrounding line
 2946             next
 2947               if ( $j_match_beg > $jbeg
 2948                 && defined( $rline_hashes->[ $j_match_beg - 1 ]->{$tok} ) );
 2949             next
 2950               if ( $j_match_end < $jend
 2951                 && defined( $rline_hashes->[ $j_match_end + 1 ]->{$tok} ) );
 2952 
 2953             # ok to delete
 2954             push @idel, $i;
 2955             ##print "ok to delete tok=$tok\n";
 2956         }
 2957         if (@idel) {
 2958             foreach my $j ( $j_match_beg .. $j_match_end ) {
 2959                 delete_selected_tokens( $rnew_lines->[$j], \@idel );
 2960             }
 2961         }
 2962     };
 2963 
 2964     foreach my $item ( @{$rsubgroups} ) {
 2965         ( $jbeg, $jend ) = @{$item};
 2966         my $nlines = $jend - $jbeg + 1;
 2967         next unless ( $nlines > 2 );
 2968 
 2969         for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
 2970             my $line = $rnew_lines->[$jj];
 2971             $rtokens        = $line->get_rtokens();
 2972             $rfield_lengths = $line->get_rfield_lengths();
 2973             $imax           = @{$rtokens} - 2;
 2974 
 2975             # start a new match group
 2976             if ( $jj == $jbeg ) {
 2977                 $start_match->($jj);
 2978                 next;
 2979             }
 2980 
 2981             # see if all tokens of this line match the current group
 2982             my $match;
 2983             if ( $imax == $imax_match ) {
 2984                 for ( my $i = 0 ; $i <= $imax ; $i++ ) {
 2985                     my $tok       = $rtokens->[$i];
 2986                     my $tok_match = $rtokens_match->[$i];
 2987                     last if ( $tok ne $tok_match );
 2988                 }
 2989                 $match = 1;
 2990             }
 2991 
 2992             # yes, they all match
 2993             if ($match) {
 2994                 $add_to_match->($jj);
 2995             }
 2996 
 2997             # now, this line does not match
 2998             else {
 2999                 $end_match->();
 3000                 $start_match->($jj);
 3001             }
 3002         }    # End loopover lines
 3003         $end_match->();
 3004     }    # End loop over subgroups
 3005     return;
 3006 } ## end sub delete_null_alignments
 3007 
 3008 sub match_line_pairs {
 3009     my ( $rlines, $rnew_lines, $rsubgroups, $group_level ) = @_;
 3010 
 3011     # Compare each pair of lines and save information about common matches
 3012     # $rlines     = list of lines including hanging side comments
 3013     # $rnew_lines = list of lines without any hanging side comments
 3014     # $rsubgroups = list of subgroups of the new lines
 3015 
 3016     # TODO:
 3017     # Maybe change: imax_pair => pair_match_info = ref to array
 3018     #  = [$imax_align, $rMsg, ... ]
 3019     #  This may eventually have multi-level match info
 3020 
 3021     # Previous line vars
 3022     my ( $line_m, $rtokens_m, $rpatterns_m, $rfield_lengths_m, $imax_m,
 3023         $list_type_m, $ci_level_m );
 3024 
 3025     # Current line vars
 3026     my ( $line, $rtokens, $rpatterns, $rfield_lengths, $imax, $list_type,
 3027         $ci_level );
 3028 
 3029     use constant EXPLAIN_COMPARE_PATTERNS => 0;
 3030 
 3031     my $compare_patterns = sub {
 3032 
 3033         # helper routine to decide if patterns match well enough..
 3034         # return code:
 3035         #   0 = patterns match, continue
 3036         #   1 = no match
 3037         #   2 = no match, and lines do not match at all
 3038 
 3039         my ( $tok, $tok_m, $pat, $pat_m, $pad ) = @_;
 3040         my $GoToMsg     = "";
 3041         my $return_code = 1;
 3042 
 3043         my ( $alignment_token, $lev, $tag, $tok_count ) =
 3044           decode_alignment_token($tok);
 3045 
 3046         # We have to be very careful about aligning commas
 3047         # when the pattern's don't match, because it can be
 3048         # worse to create an alignment where none is needed
 3049         # than to omit one.  Here's an example where the ','s
 3050         # are not in named containers.  The first line below
 3051         # should not match the next two:
 3052         #   ( $a, $b ) = ( $b, $r );
 3053         #   ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
 3054         #   ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
 3055         if ( $alignment_token eq ',' ) {
 3056 
 3057             # do not align commas unless they are in named
 3058             # containers
 3059             $GoToMsg = "do not align commas in unnamed containers";
 3060             goto NO_MATCH unless ( $tok =~ /[A-Za-z]/ );
 3061         }
 3062 
 3063         # do not align parens unless patterns match;
 3064         # large ugly spaces can occur in math expressions.
 3065         elsif ( $alignment_token eq '(' ) {
 3066 
 3067             # But we can allow a match if the parens don't
 3068             # require any padding.
 3069             $GoToMsg = "do not align '(' unless patterns match or pad=0";
 3070             if ( $pad != 0 ) { goto NO_MATCH }
 3071         }
 3072 
 3073         # Handle an '=' alignment with different patterns to
 3074         # the left.
 3075         elsif ( $alignment_token eq '=' ) {
 3076 
 3077             # It is best to be a little restrictive when
 3078             # aligning '=' tokens.  Here is an example of
 3079             # two lines that we will not align:
 3080             #       my $variable=6;
 3081             #       $bb=4;
 3082             # The problem is that one is a 'my' declaration,
 3083             # and the other isn't, so they're not very similar.
 3084             # We will filter these out by comparing the first
 3085             # letter of the pattern.  This is crude, but works
 3086             # well enough.
 3087             if ( substr( $pat_m, 0, 1 ) ne substr( $pat, 0, 1 ) ) {
 3088                 $GoToMsg = "first character before equals differ";
 3089                 goto NO_MATCH;
 3090             }
 3091 
 3092             # The introduction of sub 'prune_alignment_tree'
 3093             # enabled alignment of lists left of the equals with
 3094             # other scalar variables. For example:
 3095             # my ( $D, $s, $e ) = @_;
 3096             # my $d             = length $D;
 3097             # my $c             = $e - $s - $d;
 3098 
 3099             # But this would change formatting of a lot of scripts,
 3100             # so for now we prevent alignment of comma lists on the
 3101             # left with scalars on the left.  We will also prevent
 3102             # any partial alignments.
 3103 
 3104           # set return code 2 if the = is at line level, but
 3105           # set return code 1 if the = is below line level, i.e.
 3106           #    sub new { my ( $p, $v ) = @_; bless \$v, $p }
 3107           #    sub iter { my ($x) = @_; return undef if $$x < 0; return $$x--; }
 3108 
 3109             elsif (
 3110                 ( index( $pat_m, ',' ) >= 0 ) ne ( index( $pat, ',' ) >= 0 ) )
 3111             {
 3112                 $GoToMsg = "mixed commas/no-commas before equals";
 3113                 if ( $lev eq $group_level ) {
 3114                     $return_code = 2;
 3115                 }
 3116                 goto NO_MATCH;
 3117             }
 3118         }
 3119 
 3120       MATCH:
 3121         return ( 0, \$GoToMsg );
 3122 
 3123       NO_MATCH:
 3124 
 3125         EXPLAIN_COMPARE_PATTERNS
 3126           && print STDERR "no match because $GoToMsg\n";
 3127 
 3128         return ( $return_code, \$GoToMsg );
 3129 
 3130     };    ## end of $compare_patterns->()
 3131 
 3132     # loop over subgroups
 3133     foreach my $item ( @{$rsubgroups} ) {
 3134         my ( $jbeg, $jend ) = @{$item};
 3135         my $nlines = $jend - $jbeg + 1;
 3136         next unless ( $nlines > 1 );
 3137 
 3138         # loop over lines in a subgroup
 3139         for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
 3140 
 3141             $line_m           = $line;
 3142             $rtokens_m        = $rtokens;
 3143             $rpatterns_m      = $rpatterns;
 3144             $rfield_lengths_m = $rfield_lengths;
 3145             $imax_m           = $imax;
 3146             $list_type_m      = $list_type;
 3147             $ci_level_m       = $ci_level;
 3148 
 3149             $line           = $rnew_lines->[$jj];
 3150             $rtokens        = $line->get_rtokens();
 3151             $rpatterns      = $line->get_rpatterns();
 3152             $rfield_lengths = $line->get_rfield_lengths();
 3153             $imax           = @{$rtokens} - 2;
 3154             $list_type      = $line->get_list_type();
 3155             $ci_level       = $line->get_ci_level();
 3156 
 3157             # nothing to do for first line
 3158             next if ( $jj == $jbeg );
 3159 
 3160             my $ci_jump = $ci_level - $ci_level_m;
 3161 
 3162             my $imax_min = $imax_m < $imax ? $imax_m : $imax;
 3163 
 3164             my $imax_align = -1;
 3165 
 3166             # find number of leading common tokens
 3167 
 3168             #################################
 3169             # No match to hanging side comment
 3170             #################################
 3171             if ( $line->get_is_hanging_side_comment() ) {
 3172 
 3173                 # Should not get here; HSC's have been filtered out
 3174                 $imax_align = -1;
 3175             }
 3176 
 3177             ##############################
 3178             # Handle comma-separated lists
 3179             ##############################
 3180             elsif ( $list_type && $list_type eq $list_type_m ) {
 3181 
 3182                 # do not align lists across a ci jump with new list method
 3183                 if ($ci_jump) { $imax_min = -1 }
 3184 
 3185                 my $i_nomatch = $imax_min + 1;
 3186                 for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
 3187                     my $tok   = $rtokens->[$i];
 3188                     my $tok_m = $rtokens_m->[$i];
 3189                     if ( $tok ne $tok_m ) {
 3190                         $i_nomatch = $i;
 3191                         last;
 3192                     }
 3193                 }
 3194 
 3195                 $imax_align = $i_nomatch - 1;
 3196             }
 3197 
 3198             ##################
 3199             # Handle non-lists
 3200             ##################
 3201             else {
 3202                 my $i_nomatch = $imax_min + 1;
 3203                 for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
 3204                     my $tok   = $rtokens->[$i];
 3205                     my $tok_m = $rtokens_m->[$i];
 3206                     if ( $tok ne $tok_m ) {
 3207                         $i_nomatch = $i;
 3208                         last;
 3209                     }
 3210 
 3211                     my $pat   = $rpatterns->[$i];
 3212                     my $pat_m = $rpatterns_m->[$i];
 3213 
 3214                     # If patterns don't match, we have to be careful...
 3215                     if ( $pat_m ne $pat ) {
 3216                         my $pad =
 3217                           $rfield_lengths->[$i] - $rfield_lengths_m->[$i];
 3218                         my ( $match_code, $rmsg ) = $compare_patterns->(
 3219                             $tok, $tok_m, $pat, $pat_m, $pad
 3220                         );
 3221                         if ($match_code) {
 3222                             if    ( $match_code eq 1 ) { $i_nomatch = $i }
 3223                             elsif ( $match_code eq 2 ) { $i_nomatch = 0 }
 3224                             last;
 3225                         }
 3226                     }
 3227                 }
 3228                 $imax_align = $i_nomatch - 1;
 3229             }
 3230 
 3231             $line_m->set_imax_pair($imax_align);
 3232 
 3233         } ## end loop over lines
 3234 
 3235         # Put fence at end of subgroup
 3236         $line->set_imax_pair(-1);
 3237 
 3238     } ## end loop over subgroups
 3239 
 3240     # if there are hanging side comments, propagate the pair info down to them
 3241     # so that lines can just look back one line for their pair info.
 3242     if ( @{$rlines} > @{$rnew_lines} ) {
 3243         my $last_pair_info = -1;
 3244         foreach my $line ( @{$rlines} ) {
 3245             if ( $line->get_is_hanging_side_comment() ) {
 3246                 $line->set_imax_pair($last_pair_info);
 3247             }
 3248             else {
 3249                 $last_pair_info = $line->get_imax_pair();
 3250             }
 3251         }
 3252     }
 3253     return;
 3254 }
 3255 
 3256 sub fat_comma_to_comma {
 3257     my ($str) = @_;
 3258 
 3259     # We are changing '=>' to ',' and removing any trailing decimal count
 3260     # because currently fat commas have a count and commas do not.
 3261     # For example, we will change '=>2+{-3.2' into ',2+{-3'
 3262     if ( $str =~ /^=>([^\.]*)/ ) { $str = ',' . $1 }
 3263     return $str;
 3264 }
 3265 
 3266 sub get_line_token_info {
 3267 
 3268     # scan lines of tokens and return summary information about the range of
 3269     # levels and patterns.
 3270     my ($rlines) = @_;
 3271 
 3272     # First scan to check monotonicity. Here is an example of several
 3273     # lines which are monotonic. The = is the lowest level, and
 3274     # the commas are all one level deeper. So this is not nonmonotonic.
 3275     #  $$d{"weeks"}   = [ "w",  "wk",  "wks", "week", "weeks" ];
 3276     #  $$d{"days"}    = [ "d",  "day", "days" ];
 3277     #  $$d{"hours"}   = [ "h",  "hr",  "hrs", "hour", "hours" ];
 3278     my @all_token_info;
 3279     my $all_monotonic = 1;
 3280     for ( my $jj = 0 ; $jj < @{$rlines} ; $jj++ ) {
 3281         my ($line) = $rlines->[$jj];
 3282         my $rtokens = $line->get_rtokens();
 3283         my $last_lev;
 3284         my $is_monotonic = 1;
 3285         my $i            = -1;
 3286         foreach my $tok ( @{$rtokens} ) {
 3287             $i++;
 3288             my ( $raw_tok, $lev, $tag, $tok_count ) =
 3289               decode_alignment_token($tok);
 3290             push @{ $all_token_info[$jj] },
 3291               [ $raw_tok, $lev, $tag, $tok_count ];
 3292             last if ( $tok eq '#' );
 3293             if ( $i > 0 && $lev < $last_lev ) { $is_monotonic = 0 }
 3294             $last_lev = $lev;
 3295         }
 3296         if ( !$is_monotonic ) { $all_monotonic = 0 }
 3297     }
 3298 
 3299     my $rline_values = [];
 3300     for ( my $jj = 0 ; $jj < @{$rlines} ; $jj++ ) {
 3301         my ($line) = $rlines->[$jj];
 3302 
 3303         my $rtokens = $line->get_rtokens();
 3304         my $i       = -1;
 3305         my ( $lev_min, $lev_max );
 3306         my $token_pattern_max = "";
 3307         my %saw_level;
 3308         my @token_info;
 3309         my $is_monotonic = 1;
 3310 
 3311         # find the index of the last token before the side comment
 3312         my $imax      = @{$rtokens} - 2;
 3313         my $imax_true = $imax;
 3314 
 3315         # If the entire group is monotonic, and the line ends in a comma list,
 3316         # walk it back to the first such comma. this will have the effect of
 3317         # making all trailing ragged comma lists match in the prune tree
 3318         # routine.  these trailing comma lists can better be handled by later
 3319         # alignment rules.
 3320 
 3321         # Treat fat commas the same as commas here by converting them to
 3322         # commas.  This will improve the chance of aligning the leading parts
 3323         # of ragged lists.
 3324 
 3325         my $tok_end = fat_comma_to_comma( $rtokens->[$imax] );
 3326         if ( $all_monotonic && $tok_end =~ /^,/ ) {
 3327             my $i = $imax - 1;
 3328             while ( $i >= 0
 3329                 && fat_comma_to_comma( $rtokens->[$i] ) eq $tok_end )
 3330             {
 3331                 $imax = $i;
 3332                 $i--;
 3333             }
 3334         }
 3335 
 3336         # make a first pass to find level range
 3337         my $last_lev;
 3338         foreach my $tok ( @{$rtokens} ) {
 3339             $i++;
 3340             last if ( $i > $imax );
 3341             last if ( $tok eq '#' );
 3342             my ( $raw_tok, $lev, $tag, $tok_count ) =
 3343               @{ $all_token_info[$jj]->[$i] };
 3344 
 3345             last if ( $tok eq '#' );
 3346             $token_pattern_max .= $tok;
 3347             $saw_level{$lev}++;
 3348             if ( !defined($lev_min) ) {
 3349                 $lev_min = $lev;
 3350                 $lev_max = $lev;
 3351             }
 3352             else {
 3353                 if ( $lev < $lev_min )  { $lev_min      = $lev; }
 3354                 if ( $lev > $lev_max )  { $lev_max      = $lev; }
 3355                 if ( $lev < $last_lev ) { $is_monotonic = 0 }
 3356             }
 3357             $last_lev = $lev;
 3358         }
 3359 
 3360         # handle no levels
 3361         my $rtoken_patterns = {};
 3362         my $rtoken_indexes  = {};
 3363         my @levs            = sort keys %saw_level;
 3364         if ( !defined($lev_min) ) {
 3365             $lev_min                     = -1;
 3366             $lev_max                     = -1;
 3367             $levs[0]                     = -1;
 3368             $rtoken_patterns->{$lev_min} = "";
 3369             $rtoken_indexes->{$lev_min}  = [];
 3370         }
 3371 
 3372         # handle one level
 3373         elsif ( $lev_max == $lev_min ) {
 3374             $rtoken_patterns->{$lev_max} = $token_pattern_max;
 3375             $rtoken_indexes->{$lev_max}  = [ ( 0 .. $imax ) ];
 3376         }
 3377 
 3378         # handle multiple levels
 3379         else {
 3380             $rtoken_patterns->{$lev_max} = $token_pattern_max;
 3381             $rtoken_indexes->{$lev_max}  = [ ( 0 .. $imax ) ];
 3382 
 3383             my $debug   = 0;
 3384             my $lev_top = pop @levs;    # alread did max level
 3385             my $itok    = -1;
 3386             foreach my $tok ( @{$rtokens} ) {
 3387                 $itok++;
 3388                 last if ( $itok > $imax );
 3389                 my ( $raw_tok, $lev, $tag, $tok_count ) =
 3390                   @{ $all_token_info[$jj]->[$itok] };
 3391                 last if ( $raw_tok eq '#' );
 3392                 foreach my $lev_test (@levs) {
 3393                     next if ( $lev > $lev_test );
 3394                     $rtoken_patterns->{$lev_test} .= $tok;
 3395                     push @{ $rtoken_indexes->{$lev_test} }, $itok;
 3396                 }
 3397             }
 3398             push @levs, $lev_top;
 3399         }
 3400 
 3401         push @{$rline_values},
 3402           [
 3403             $lev_min,        $lev_max,      $rtoken_patterns, \@levs,
 3404             $rtoken_indexes, $is_monotonic, $imax_true,       $imax,
 3405           ];
 3406 
 3407         # debug
 3408         0 && do {
 3409             local $" = ')(';
 3410             print "lev_min=$lev_min, lev_max=$lev_max, levels=(@levs)\n";
 3411             foreach my $key ( sort keys %{$rtoken_patterns} ) {
 3412                 print "$key => $rtoken_patterns->{$key}\n";
 3413                 print "$key => @{$rtoken_indexes->{$key}}\n";
 3414             }
 3415         };
 3416     } ## end loop over lines
 3417     return ( $rline_values, $all_monotonic );
 3418 }
 3419 
 3420 sub prune_alignment_tree {
 3421     my ($rlines) = @_;
 3422     my $jmax = @{$rlines} - 1;
 3423     return unless $jmax > 0;
 3424 
 3425     # Vertical alignment in perltidy is done as an iterative process.  The
 3426     # starting point is to mark all possible alignment tokens ('=', ',', '=>',
 3427     # etc) for vertical alignment.  Then we have to delete all alignments
 3428     # which, if actually made, would detract from overall alignment.  This
 3429     # is done in several phases of which this is one.
 3430 
 3431     # In this routine we look at the alignments of a group of lines as a
 3432     # hierarchical tree.  We will 'prune' the tree to limited depths if that
 3433     # will improve overall alignment at the lower depths.
 3434     # For each line we will be looking at its alignment patterns down to
 3435     # different fixed depths. For each depth, we include all lower depths and
 3436     # ignore all higher depths.  We want to see if we can get alignment of a
 3437     # larger group of lines if we ignore alignments at some lower depth.
 3438     # Here is an # example:
 3439 
 3440     # for (
 3441     #     [ '$var',     sub { join $_, "bar" },            0, "bar" ],
 3442     #     [ 'CONSTANT', sub { join "foo", "bar" },         0, "bar" ],
 3443     #     [ 'CONSTANT', sub { join "foo", "bar", 3 },      1, "barfoo3" ],
 3444     #     [ '$myvar',   sub { my $var; join $var, "bar" }, 0, "bar" ],
 3445     # );
 3446 
 3447     # In the above example, all lines have three commas at the lowest depth
 3448     # (zero), so if there were no other alignements, these lines would all
 3449     # align considering only the zero depth alignment token.  But some lines
 3450     # have additional comma alignments at the next depth, so we need to decide
 3451     # if we should drop those to keep the top level alignments, or keep those
 3452     # for some additional low level alignments at the expense losing some top
 3453     # level alignments.  In this case we will drop the deeper level commas to
 3454     # keep the entire collection aligned.  But in some cases the decision could
 3455     # go the other way.
 3456 
 3457     # The tree for this example at the zero depth has one node containing
 3458     # all four lines, since they are identical at zero level (three commas).
 3459     # At depth one, there are three 'children' nodes, namely:
 3460     # - lines 1 and 2, which have a single comma in the 'sub' at depth 1
 3461     # - line 3, which has 2 commas at depth 1
 3462     # - line4, which has a ';' and a ',' at depth 1
 3463     # There are no deeper alignments in this example.
 3464     # so the tree structure for this example is:
 3465     #
 3466     #    depth 0         depth 1      depth 2
 3467     #    [lines 1-4] --  [line 1-2] -  (empty)
 3468     #                 |  [line 3]   -  (empty)
 3469     #                 |  [line 4]   -  (empty)
 3470 
 3471     # We can carry this to any depth, but it is not really useful to go below
 3472     # depth 2. To cleanly stop there, we will consider depth 2 to contain all
 3473     # alignments at depth >=2.
 3474 
 3475     use constant EXPLAIN_PRUNE => 0;
 3476 
 3477     ####################################################################
 3478     # Prune Tree Step 1. Start by scanning the lines and collecting info
 3479     ####################################################################
 3480 
 3481     # Note that the caller had this info but we have to redo this now because
 3482     # alignment tokens may have been deleted.
 3483     my ( $rline_values, $all_monotonic ) = get_line_token_info($rlines);
 3484 
 3485     # If all the lines have levels which increase monotonically from left to
 3486     # right, then the sweep-left-to-right pass can do a better job of alignment
 3487     # than pruning, and without deleting alignments.
 3488     return if ($all_monotonic);
 3489 
 3490     # Contents of $rline_values
 3491     #   [
 3492     #     $lev_min,        $lev_max,      $rtoken_patterns, \@levs,
 3493     #     $rtoken_indexes, $is_monotonic, $imax_true,       $imax,
 3494     #   ];
 3495 
 3496     # We can work to any depth, but there is little advantage to working
 3497     # to a a depth greater than 2
 3498     my $MAX_DEPTH = 2;
 3499 
 3500     # This arrays will hold the tree of alignment tokens at different depths
 3501     # for these lines.
 3502     my @match_tree;
 3503 
 3504     # Tree nodes contain these values:
 3505     # $match_tree[$depth] = [$jbeg, $jend, $n_parent, $level, $pattern,
 3506     #                        $nc_beg_p, $nc_end_p, $rindexes];
 3507     # where
 3508     #      $depth = 0,1,2 = index of depth of the match
 3509 
 3510     #  $jbeg beginning index j of the range of lines in this match
 3511     #  $jend ending index j of the range of lines in this match
 3512     #  $n_parent = index of the containing group at $depth-1, if it exists
 3513     #  $level = actual level of code being matched in this group
 3514     #  $pattern = alignment pattern being matched
 3515     #  $nc_beg_p = first child
 3516     #  $nc_end_p = last child
 3517     #  $rindexes = ref to token indexes
 3518 
 3519     # the patterns and levels of the current group being formed at each depth
 3520     my ( @token_patterns_current, @levels_current, @token_indexes_current );
 3521 
 3522     # the patterns and levels of the next line being tested at each depth
 3523     my ( @token_patterns_next, @levels_next, @token_indexes_next );
 3524 
 3525     #########################################################
 3526     # define a recursive worker subroutine for tree construction
 3527     #########################################################
 3528 
 3529     # This is a recursive routine which is called if a match condition changes
 3530     # at any depth when a new line is encountered.  It ends the match node
 3531     # which changed plus all deeper nodes attached to it.
 3532     my $end_node;
 3533     $end_node = sub {
 3534         my ( $depth, $jl, $n_parent ) = @_;
 3535 
 3536         # $depth is the tree depth
 3537         # $jl is the  index of the line
 3538         # $n_parent is index of the parent node of this node
 3539 
 3540         return if ( $depth > $MAX_DEPTH );
 3541 
 3542         # end any current group at this depth
 3543         if (   $jl >= 0
 3544             && defined( $match_tree[$depth] )
 3545             && @{ $match_tree[$depth] }
 3546             && defined( $levels_current[$depth] ) )
 3547         {
 3548             $match_tree[$depth]->[-1]->[1] = $jl;
 3549         }
 3550 
 3551         # Define the index of the node we will create below
 3552         my $ng_self = 0;
 3553         if ( defined( $match_tree[$depth] ) ) {
 3554             $ng_self = @{ $match_tree[$depth] };
 3555         }
 3556 
 3557         # end any next deeper child node(s)
 3558         $end_node->( $depth + 1, $jl, $ng_self );
 3559 
 3560         # update the levels being matched
 3561         $token_patterns_current[$depth] = $token_patterns_next[$depth];
 3562         $token_indexes_current[$depth]  = $token_indexes_next[$depth];
 3563         $levels_current[$depth]         = $levels_next[$depth];
 3564 
 3565         # Do not start a new group at this level if it is not being used
 3566         if ( !defined( $levels_next[$depth] )
 3567             || $depth > 0
 3568             && $levels_next[$depth] <= $levels_next[ $depth - 1 ] )
 3569         {
 3570             return;
 3571         }
 3572 
 3573         # Create a node for the next group at this depth. We initially assume
 3574         # that it will continue to $jmax, and correct that later if the node
 3575         # ends earlier.
 3576         push @{ $match_tree[$depth] },
 3577           [
 3578             $jl + 1, $jmax, $n_parent, $levels_current[$depth],
 3579             $token_patterns_current[$depth],
 3580             undef, undef, $token_indexes_current[$depth],
 3581           ];
 3582 
 3583         return;
 3584     };    ## end sub end_node
 3585 
 3586     ######################################################
 3587     # Prune Tree Step 2. Loop to form the tree of matches.
 3588     ######################################################
 3589     for ( my $jp = 0 ; $jp <= $jmax ; $jp++ ) {
 3590 
 3591         # working with two adjacent line indexes, 'm'=minus, 'p'=plus
 3592         my $jm = $jp - 1;
 3593 
 3594         # Pull out needed values for the next line
 3595         my ( $lev_min, $lev_max, $rtoken_patterns, $rlevs, $rtoken_indexes,
 3596             $is_monotonic, $imax_true, $imax )
 3597           = @{ $rline_values->[$jp] };
 3598 
 3599         # Transfer levels and patterns for this line to the working arrays.
 3600         # If the number of levels differs from our chosen MAX_DEPTH ...
 3601         # if fewer than MAX_DEPTH: leave levels at missing depths undefined
 3602         # if more than MAX_DEPTH: set the MAX_DEPTH level to be the maximum
 3603         @levels_next = @{$rlevs}[ 0 .. $MAX_DEPTH ];
 3604         if ( @{$rlevs} > $MAX_DEPTH ) {
 3605             $levels_next[$MAX_DEPTH] = $rlevs->[-1];
 3606         }
 3607         my $depth = 0;
 3608         foreach (@levels_next) {
 3609             $token_patterns_next[$depth] =
 3610               defined($_) ? $rtoken_patterns->{$_} : undef;
 3611             $token_indexes_next[$depth] =
 3612               defined($_) ? $rtoken_indexes->{$_} : undef;
 3613             $depth++;
 3614         }
 3615 
 3616         # Look for a change in match groups...
 3617 
 3618         # Initialize on the first line
 3619         if ( $jp == 0 ) {
 3620             my $n_parent;
 3621             $end_node->( 0, $jm, $n_parent );
 3622         }
 3623 
 3624         # End groups if a hard flag has been set
 3625         elsif ( $rlines->[$jm]->get_end_group() ) {
 3626             my $n_parent;
 3627             $end_node->( 0, $jm, $n_parent );
 3628         }
 3629 
 3630         # Continue at hanging side comment
 3631         elsif ( $rlines->[$jp]->get_is_hanging_side_comment() ) {
 3632             next;
 3633         }
 3634 
 3635         # Otherwise see if anything changed and update the tree if so
 3636         else {
 3637             foreach my $depth ( 0 .. $MAX_DEPTH ) {
 3638 
 3639                 my $def_current = defined( $token_patterns_current[$depth] );
 3640                 my $def_next    = defined( $token_patterns_next[$depth] );
 3641                 last unless ( $def_current || $def_next );
 3642                 if (   !$def_current
 3643                     || !$def_next
 3644                     || $token_patterns_current[$depth] ne
 3645                     $token_patterns_next[$depth] )
 3646                 {
 3647                     my $n_parent;
 3648                     if ( $depth > 0 && defined( $match_tree[ $depth - 1 ] ) ) {
 3649                         $n_parent = @{ $match_tree[ $depth - 1 ] } - 1;
 3650                     }
 3651                     $end_node->( $depth, $jm, $n_parent );
 3652                     last;
 3653                 }
 3654             }
 3655         }
 3656     } ## end loop to form tree of matches
 3657 
 3658     ##########################################################
 3659     # Prune Tree Step 3. Make links from parent to child nodes
 3660     ##########################################################
 3661 
 3662     # It seemed cleaner to do this as a separate step rather than during tree
 3663     # construction.  The children nodes have links up to the parent node which
 3664     # created them.  Now make links in the opposite direction, so the parents
 3665     # can find the children.  We store the range of children nodes ($nc_beg,
 3666     # $nc_end) of each parent with two additional indexes in the orignal array.
 3667     # These will be undef if no children.
 3668     for ( my $depth = $MAX_DEPTH ; $depth > 0 ; $depth-- ) {
 3669         next unless defined( $match_tree[$depth] );
 3670         my $nc_max = @{ $match_tree[$depth] } - 1;
 3671         my $np_now;
 3672         foreach my $nc ( 0 .. $nc_max ) {
 3673             my $np = $match_tree[$depth]->[$nc]->[2];
 3674             if ( !defined($np) ) {
 3675 
 3676                 # shouldn't happen
 3677                 #print STDERR "lost child $np at depth $depth\n";
 3678                 next;
 3679             }
 3680             if ( !defined($np_now) || $np != $np_now ) {
 3681                 $np_now = $np;
 3682                 $match_tree[ $depth - 1 ]->[$np]->[5] = $nc;
 3683             }
 3684             $match_tree[ $depth - 1 ]->[$np]->[6] = $nc;
 3685         }
 3686     } ## end loop to make links down to the child nodes
 3687 
 3688     EXPLAIN_PRUNE > 0 && do {
 3689         print "Tree complete. Found these groups:\n";
 3690         foreach my $depth ( 0 .. $MAX_DEPTH ) {
 3691             Dump_tree_groups( \@{ $match_tree[$depth] }, "depth=$depth" );
 3692         }
 3693     };
 3694 
 3695     #######################################################
 3696     # Prune Tree Step 4. Make a list of nodes to be deleted
 3697     #######################################################
 3698 
 3699     #  list of lines with tokens to be deleted:
 3700     #  [$jbeg, $jend, $level_keep]
 3701     #  $jbeg..$jend is the range of line indexes,
 3702     #  $level_keep is the minimum level to keep
 3703     my @delete_list;
 3704 
 3705     #  Groups with ending comma lists and their range of sizes:
 3706     #  $ragged_comma_group{$id} = [ imax_group_min, imax_group_max ]
 3707     my %ragged_comma_group;
 3708 
 3709     # Define a threshold line count for forcing a break
 3710     my $nlines_break = 3;
 3711 
 3712     # We work with a list of nodes to visit at the next deeper depth.
 3713     my @todo_list;
 3714     if ( defined( $match_tree[0] ) ) {
 3715         @todo_list = ( 0 .. @{ $match_tree[0] } - 1 );
 3716     }
 3717 
 3718     for ( my $depth = 0 ; $depth <= $MAX_DEPTH ; $depth++ ) {
 3719         last unless (@todo_list);
 3720         my @todo_next;
 3721         foreach my $np (@todo_list) {
 3722             my ( $jbeg_p, $jend_p, $np_p, $lev_p, $pat_p, $nc_beg_p, $nc_end_p,
 3723                 $rindexes_p )
 3724               = @{ $match_tree[$depth]->[$np] };
 3725             my $nlines_p = $jend_p - $jbeg_p + 1;
 3726 
 3727             # nothing to do if no children
 3728             next unless defined($nc_beg_p);
 3729 
 3730             # Define the number of lines to either keep or delete a child node.
 3731             # This is the key decision we have to make.  We want to delete
 3732             # short runs of matched lines, and keep long runs.  It seems easier
 3733             # for the eye to follow breaks in monotonic level changes than
 3734             # non-monotonic level changes.  For example, the following looks
 3735             # best if we delete the lower level alignments:
 3736 
 3737             #  [1]                  ~~ [];
 3738             #  [ ["foo"], ["bar"] ] ~~ [ qr/o/, qr/a/ ];
 3739             #  [ qr/o/, qr/a/ ]     ~~ [ ["foo"], ["bar"] ];
 3740             #  [ "foo", "bar" ]     ~~ [ qr/o/, qr/a/ ];
 3741             #  [ qr/o/, qr/a/ ]     ~~ [ "foo", "bar" ];
 3742             #  $deep1               ~~ $deep1;
 3743 
 3744             # So we will use two thresholds.
 3745             my $nmin_mono     = $depth + 2;
 3746             my $nmin_non_mono = $depth + 6;
 3747             if ( $nmin_mono > $nlines_p - 1 ) {
 3748                 $nmin_mono = $nlines_p - 1;
 3749             }
 3750             if ( $nmin_non_mono > $nlines_p - 1 ) {
 3751                 $nmin_non_mono = $nlines_p - 1;
 3752             }
 3753 
 3754             # loop to keep or delete each child node
 3755             foreach my $nc ( $nc_beg_p .. $nc_end_p ) {
 3756                 my ( $jbeg_c, $jend_c, $np_c, $lev_c, $pat_c, $nc_beg_c,
 3757                     $nc_end_c )
 3758                   = @{ $match_tree[ $depth + 1 ]->[$nc] };
 3759                 my $nlines_c     = $jend_c - $jbeg_c + 1;
 3760                 my $is_monotonic = $rline_values->[$jbeg_c]->[5];
 3761                 my $nmin         = $is_monotonic ? $nmin_mono : $nmin_non_mono;
 3762                 if ( $nlines_c < $nmin ) {
 3763 ##print "deleting child, nlines=$nlines_c, nmin=$nmin\n";
 3764                     push @delete_list, [ $jbeg_c, $jend_c, $lev_p ];
 3765                 }
 3766                 else {
 3767 ##print "keeping child, nlines=$nlines_c, nmin=$nmin\n";
 3768                     push @todo_next, $nc;
 3769                 }
 3770             }
 3771         }
 3772         @todo_list = @todo_next;
 3773     } ## end loop to mark nodes to delete
 3774 
 3775     #############################################################
 3776     # Prune Tree Step 5. Loop to delete selected alignment tokens
 3777     #############################################################
 3778     foreach my $item (@delete_list) {
 3779         my ( $jbeg, $jend, $level_keep ) = @{$item};
 3780         foreach my $jj ( $jbeg .. $jend ) {
 3781             my $line = $rlines->[$jj];
 3782             my @idel;
 3783             my $rtokens = $line->get_rtokens();
 3784             my $imax    = @{$rtokens} - 2;
 3785             for ( my $i = 0 ; $i <= $imax ; $i++ ) {
 3786                 my $tok = $rtokens->[$i];
 3787                 my ( $raw_tok, $lev, $tag, $tok_count ) =
 3788                   decode_alignment_token($tok);
 3789                 if ( $lev > $level_keep ) {
 3790                     push @idel, $i;
 3791                 }
 3792             }
 3793             if (@idel) {
 3794                 delete_selected_tokens( $line, \@idel );
 3795             }
 3796         }
 3797     } ## end loop to delete selected alignment tokens
 3798 
 3799     return;
 3800 } ## end sub prune_alignment_tree
 3801 
 3802 sub Dump_tree_groups {
 3803     my ( $rgroup, $msg ) = @_;
 3804     print "$msg\n";
 3805     local $" = ')(';
 3806     foreach my $item ( @{$rgroup} ) {
 3807         my @fix = @{$item};
 3808         foreach (@fix) { $_ = "undef" unless defined $_; }
 3809         $fix[4] = "...";
 3810         print "(@fix)\n";
 3811     }
 3812     return;
 3813 }
 3814 
 3815 {    ## closure for sub is_marginal_match
 3816 
 3817     my %is_if_or;
 3818     my %is_assignment;
 3819     my %is_good_alignment;
 3820 
 3821     # This test did not give sufficiently better results to use as an update,
 3822     # but the flag is worth keeping as a starting point for future testing.
 3823     use constant TEST_MARGINAL_EQ_ALIGNMENT => 0;
 3824 
 3825     BEGIN {
 3826 
 3827         my @q = qw(
 3828           if unless or ||
 3829         );
 3830         @is_if_or{@q} = (1) x scalar(@q);
 3831 
 3832         @q = qw(
 3833           = **= += *= &= <<= &&=
 3834           -= /= |= >>= ||= //=
 3835           .= %= ^=
 3836           x=
 3837         );
 3838         @is_assignment{@q} = (1) x scalar(@q);
 3839 
 3840         # Vertically aligning on certain "good" tokens is usually okay
 3841         # so we can be less restrictive in marginal cases.
 3842         @q = qw( { ? => = );
 3843         push @q, (',');
 3844         @is_good_alignment{@q} = (1) x scalar(@q);
 3845     }
 3846 
 3847     sub is_marginal_match {
 3848 
 3849         my ( $line_0, $line_1, $group_level, $imax_align, $imax_prev ) = @_;
 3850 
 3851         # Decide if we should undo some or all of the common alignments of a
 3852         # group of just two lines.
 3853 
 3854         # Given:
 3855         #   $line_0 and $line_1 - the two lines
 3856         #   $group_level = the indentation level of the group being processed
 3857         #   $imax_align = the maximum index of the common alignment tokens
 3858         #                 of the two lines
 3859         #   $imax_prev  = the maximum index of the common alignment tokens
 3860         #                 with the line before $line_0 (=-1 of does not exist)
 3861 
 3862         # Return:
 3863         #   $is_marginal = true if the two lines should NOT be fully aligned
 3864         #                = false if the two lines can remain fully aligned
 3865         #   $imax_align  = the index of the highest alignment token shared by
 3866         #                  these two lines to keep if the match is marginal.
 3867 
 3868         # When we have an alignment group of just two lines like this, we are
 3869         # working in the twilight zone of what looks good and what looks bad.
 3870         # This routine is a collection of rules which work have been found to
 3871         # work fairly well, but it will need to be updated from time to time.
 3872 
 3873         my $is_marginal = 0;
 3874 
 3875         # always keep alignments of a terminal else or ternary
 3876         goto RETURN if ( defined( $line_1->get_j_terminal_match() ) );
 3877 
 3878         # always align lists
 3879         my $group_list_type = $line_0->get_list_type();
 3880         goto RETURN if ($group_list_type);
 3881 
 3882         # always align hanging side comments
 3883         my $is_hanging_side_comment = $line_1->get_is_hanging_side_comment();
 3884         goto RETURN if ($is_hanging_side_comment);
 3885 
 3886         my $jmax_0           = $line_0->get_jmax();
 3887         my $jmax_1           = $line_1->get_jmax();
 3888         my $rtokens_1        = $line_1->get_rtokens();
 3889         my $rtokens_0        = $line_0->get_rtokens();
 3890         my $rfield_lengths_0 = $line_0->get_rfield_lengths();
 3891         my $rfield_lengths_1 = $line_1->get_rfield_lengths();
 3892         my $rpatterns_0      = $line_0->get_rpatterns();
 3893         my $rpatterns_1      = $line_1->get_rpatterns();
 3894         my $imax_next        = $line_1->get_imax_pair();
 3895 
 3896         # We will scan the alignment tokens and set a flag '$is_marginal' if
 3897         # it seems that the an alignment would look bad.
 3898         my $max_pad            = 0;
 3899         my $saw_good_alignment = 0;
 3900         my $saw_if_or;        # if we saw an 'if' or 'or' at group level
 3901         my $raw_tokb = "";    # first token seen at group level
 3902         my $jfirst_bad;
 3903         my $line_ending_fat_comma;    # is last token just a '=>' ?
 3904         my $j0_eq_pad;
 3905         my $j0_max_pad = 0;
 3906 
 3907         for ( my $j = 0 ; $j < $jmax_1 - 1 ; $j++ ) {
 3908             my ( $raw_tok, $lev, $tag, $tok_count ) =
 3909               decode_alignment_token( $rtokens_1->[$j] );
 3910             if ( $raw_tok && $lev == $group_level ) {
 3911                 if ( !$raw_tokb ) { $raw_tokb = $raw_tok }
 3912                 $saw_if_or ||= $is_if_or{$raw_tok};
 3913             }
 3914 
 3915             # When the first of the two lines ends in a bare '=>' this will
 3916             # probably be marginal match.  (For a bare =>, the next field length
 3917             # will be 2 or 3, depending on side comment)
 3918             $line_ending_fat_comma =
 3919                  $j == $jmax_1 - 2
 3920               && $raw_tok eq '=>'
 3921               && $rfield_lengths_0->[ $j + 1 ] <= 3;
 3922 
 3923             my $pad = $rfield_lengths_1->[$j] - $rfield_lengths_0->[$j];
 3924             if ( $j == 0 ) {
 3925                 $pad += $line_1->get_leading_space_count() -
 3926                   $line_0->get_leading_space_count();
 3927 
 3928                 # Remember the pad at a leading equals
 3929                 if ( $raw_tok eq '=' && $lev == $group_level ) {
 3930                     $j0_eq_pad = $pad;
 3931                     $j0_max_pad =
 3932                       0.5 * ( $rfield_lengths_1->[0] + $rfield_lengths_0->[0] );
 3933                     $j0_max_pad = 4 if ( $j0_max_pad < 4 );
 3934                 }
 3935             }
 3936 
 3937             if ( $pad < 0 )        { $pad     = -$pad }
 3938             if ( $pad > $max_pad ) { $max_pad = $pad }
 3939             if ( $is_good_alignment{$raw_tok} && !$line_ending_fat_comma ) {
 3940                 $saw_good_alignment = 1;
 3941             }
 3942             else {
 3943                 $jfirst_bad = $j unless defined($jfirst_bad);
 3944             }
 3945             if ( $rpatterns_0->[$j] ne $rpatterns_1->[$j] ) {
 3946 
 3947                 # Flag this as a marginal match since patterns differ.
 3948                 # Normally, we will not allow just two lines to match if
 3949                 # marginal. But we can allow matching in some specific cases.
 3950 
 3951                 $jfirst_bad  = $j if ( !defined($jfirst_bad) );
 3952                 $is_marginal = 1  if ( $is_marginal == 0 );
 3953                 if ( $raw_tok eq '=' ) {
 3954 
 3955                     # Here is an example of a marginal match:
 3956                     #       $done{$$op} = 1;
 3957                     #       $op         = compile_bblock($op);
 3958                     # The left tokens are both identifiers, but
 3959                     # one accesses a hash and the other doesn't.
 3960                     # We'll let this be a tentative match and undo
 3961                     # it later if we don't find more than 2 lines
 3962                     # in the group.
 3963                     $is_marginal = 2;
 3964                 }
 3965             }
 3966         }
 3967 
 3968         $is_marginal = 1 if ( $is_marginal == 0 && $line_ending_fat_comma );
 3969 
 3970         # Turn off the "marginal match" flag in some cases...
 3971         # A "marginal match" occurs when the alignment tokens agree
 3972         # but there are differences in the other tokens (patterns).
 3973         # If we leave the marginal match flag set, then the rule is that we
 3974         # will align only if there are more than two lines in the group.
 3975         # We will turn of the flag if we almost have a match
 3976         # and either we have seen a good alignment token or we
 3977         # just need a small pad (2 spaces) to fit.  These rules are
 3978         # the result of experimentation.  Tokens which misaligned by just
 3979         # one or two characters are annoying.  On the other hand,
 3980         # large gaps to less important alignment tokens are also annoying.
 3981         if ( $is_marginal == 1
 3982             && ( $saw_good_alignment || $max_pad < 3 ) )
 3983         {
 3984             $is_marginal = 0;
 3985         }
 3986 
 3987         # We will use the line endings to help decide on alignments...
 3988         # See if the lines end with semicolons...
 3989         my $sc_term0;
 3990         my $sc_term1;
 3991         if ( $jmax_0 < 1 || $jmax_1 < 1 ) {
 3992 
 3993             # shouldn't happen
 3994         }
 3995         else {
 3996             my $pat0 = $rpatterns_0->[ $jmax_0 - 1 ];
 3997             my $pat1 = $rpatterns_1->[ $jmax_1 - 1 ];
 3998             $sc_term0 = $pat0 =~ /;b?$/;
 3999             $sc_term1 = $pat1 =~ /;b?$/;
 4000         }
 4001 
 4002         if ( !$is_marginal && !$sc_term0 ) {
 4003 
 4004             # First line of assignment should be semicolon terminated.
 4005             # For example, do not align here:
 4006             #  $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
 4007             #    $$href{-NUM_DIRS} = 0;
 4008             if ( $is_assignment{$raw_tokb} ) {
 4009                 $is_marginal = 1;
 4010             }
 4011         }
 4012 
 4013         # Try to avoid some undesirable alignments of opening tokens
 4014         # for example, the space between grep and { here:
 4015         #  return map { ( $_ => $_ ) }
 4016         #    grep     { /$handles/ } $self->_get_delegate_method_list;
 4017         $is_marginal ||=
 4018              ( $raw_tokb eq '(' || $raw_tokb eq '{' )
 4019           && $jmax_1 == 2
 4020           && $sc_term0 ne $sc_term1;
 4021 
 4022         ########################################
 4023         # return unless this is a marginal match
 4024         ########################################
 4025         goto RETURN if ( !$is_marginal );
 4026 
 4027         # Undo the marginal match flag in certain cases,
 4028 
 4029         # Two lines with a leading equals-like operator are allowed to
 4030         # align if the patterns to the left of the equals are the same.
 4031         # For example the following two lines are a marginal match but have
 4032         # the same left side patterns, so we will align the equals.
 4033         #     my $orig = my $format = "^<<<<< ~~\n";
 4034         #     my $abc  = "abc";
 4035         # But these have a different left pattern so they will not be
 4036         # aligned
 4037         #     $xmldoc .= $`;
 4038         #     $self->{'leftovers'} .= "<bx-seq:seq" . $';
 4039 
 4040         # First line semicolon terminated but second not, usually ok:
 4041         #               my $want = "'ab', 'a', 'b'";
 4042         #               my $got  = join( ", ",
 4043         #                    map { defined($_) ? "'$_'" : "undef" }
 4044         #                          @got );
 4045         #  First line not semicolon terminated, Not OK to match:
 4046         #   $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
 4047         #      $$href{-NUM_DIRS} = 0;
 4048         my $pat0 = $rpatterns_0->[0];
 4049         my $pat1 = $rpatterns_1->[0];
 4050 
 4051         ##########################################################
 4052         # Turn off the marginal flag for some types of assignments
 4053         ##########################################################
 4054         if ( $is_assignment{$raw_tokb} ) {
 4055 
 4056             # undo marginal flag if first line is semicolon terminated
 4057             # and leading patters match
 4058             if ($sc_term0) {    # && $sc_term1) {
 4059                 $is_marginal = $pat0 ne $pat1;
 4060             }
 4061         }
 4062         elsif ( $raw_tokb eq '=>' ) {
 4063 
 4064             # undo marginal flag if patterns match
 4065             $is_marginal = $pat0 ne $pat1 || $line_ending_fat_comma;
 4066         }
 4067         elsif ( $raw_tokb eq '=~' ) {
 4068 
 4069             # undo marginal flag if both lines are semicolon terminated
 4070             # and leading patters match
 4071             if ( $sc_term1 && $sc_term0 ) {
 4072                 $is_marginal = $pat0 ne $pat1;
 4073             }
 4074         }
 4075 
 4076         ######################################################
 4077         # Turn off the marginal flag if we saw an 'if' or 'or'
 4078         ######################################################
 4079 
 4080         # A trailing 'if' and 'or' often gives a good alignment
 4081         # For example, we can align these:
 4082         #  return -1     if $_[0] =~ m/^CHAPT|APPENDIX/;
 4083         #  return $1 + 0 if $_[0] =~ m/^SECT(\d*)$/;
 4084 
 4085         # or
 4086         #  $d_in_m[2] = 29          if ( &Date_LeapYear($y) );
 4087         #  $d         = $d_in_m[$m] if ( $d > $d_in_m[$m] );
 4088 
 4089         if ($saw_if_or) {
 4090 
 4091             # undo marginal flag if both lines are semicolon terminated
 4092             if ( $sc_term0 && $sc_term1 ) {
 4093                 $is_marginal = 0;
 4094             }
 4095         }
 4096 
 4097         # For a marginal match, only keep matches before the first 'bad' match
 4098         if (   $is_marginal
 4099             && defined($jfirst_bad)
 4100             && $imax_align > $jfirst_bad - 1 )
 4101         {
 4102             $imax_align = $jfirst_bad - 1;
 4103         }
 4104 
 4105         ###########################################################
 4106         # Allow sweep to match lines with leading '=' in some cases
 4107         ###########################################################
 4108         if ( $imax_align < 0 && defined($j0_eq_pad) ) {
 4109 
 4110             if (
 4111 
 4112                 # If there is a following line with leading equals, or
 4113                 # preceding line with leading equals, then let the sweep align
 4114                 # them without restriction.  For example, the first two lines
 4115                 # here are a marginal match, but they are followed by a line
 4116                 # with leading equals, so the sweep-lr logic can align all of
 4117                 # the lines:
 4118 
 4119                 #  $date[1] = $month_to_num{ $date[1] };            # <--line_0
 4120                 #  @xdate   = split( /[:\/\s]/, $log->field('t') ); # <--line_1
 4121                 #  $day     = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
 4122                 #  $time    = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
 4123 
 4124                 # Likewise, if we reverse the two pairs we want the same result
 4125 
 4126                 #  $day     = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
 4127                 #  $time    = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
 4128                 #  $date[1] = $month_to_num{ $date[1] };            # <--line_0
 4129                 #  @xdate   = split( /[:\/\s]/, $log->field('t') ); # <--line_1
 4130 
 4131                 (
 4132                        $imax_next >= 0
 4133                     || $imax_prev >= 0
 4134                     || TEST_MARGINAL_EQ_ALIGNMENT
 4135                 )
 4136                 && $j0_eq_pad >= -$j0_max_pad
 4137                 && $j0_eq_pad <= $j0_max_pad
 4138               )
 4139             {
 4140 
 4141                 # But do not do this if there is a comma before the '='.
 4142                 # For example, the first two lines below have commas and
 4143                 # therefore are not allowed to align with lines 3 & 4:
 4144 
 4145                 # my ( $x, $y ) = $self->Size();                      #<--line_0
 4146                 # my ( $left, $top, $right, $bottom ) = $self->Window(); #<--l_1
 4147                 # my $vx = $right - $left;
 4148                 # my $vy = $bottom - $top;
 4149 
 4150                 if ( $rpatterns_0->[0] !~ /,/ && $rpatterns_1->[0] !~ /,/ ) {
 4151                     $imax_align = 0;
 4152                 }
 4153             }
 4154         }
 4155 
 4156       RETURN:
 4157         return ( $is_marginal, $imax_align );
 4158     }
 4159 }
 4160 
 4161 sub get_extra_leading_spaces {
 4162 
 4163     my ( $rlines, $rgroups ) = @_;
 4164 
 4165     #----------------------------------------------------------
 4166     # Define any extra indentation space (for the -lp option).
 4167     # Here is why:
 4168     # If a list has side comments, sub scan_list must dump the
 4169     # list before it sees everything.  When this happens, it sets
 4170     # the indentation to the standard scheme, but notes how
 4171     # many spaces it would have liked to use.  We may be able
 4172     # to recover that space here in the event that all of the
 4173     # lines of a list are back together again.
 4174     #----------------------------------------------------------
 4175 
 4176     return 0 unless ( @{$rlines} && @{$rgroups} );
 4177 
 4178     my $object = $rlines->[0]->get_indentation();
 4179     return 0 unless ( ref($object) );
 4180     my $extra_leading_spaces            = 0;
 4181     my $extra_indentation_spaces_wanted = get_recoverable_spaces($object);
 4182     return ($extra_leading_spaces) unless ($extra_indentation_spaces_wanted);
 4183 
 4184     my $min_spaces = $extra_indentation_spaces_wanted;
 4185     if ( $min_spaces > 0 ) { $min_spaces = 0 }
 4186 
 4187     # loop over all groups
 4188     my $ng      = -1;
 4189     my $ngroups = @{$rgroups};
 4190     foreach my $item ( @{$rgroups} ) {
 4191         $ng++;
 4192         my ( $jbeg, $jend ) = @{$item};
 4193         foreach my $j ( $jbeg .. $jend ) {
 4194             next if ( $j == 0 );
 4195 
 4196             # all indentation objects must be the same
 4197             if ( $object != $rlines->[$j]->get_indentation() ) {
 4198                 return 0;
 4199             }
 4200         }
 4201 
 4202        # find the maximum space without exceeding the line length for this group
 4203         my $avail = $rlines->[$jbeg]->get_available_space_on_right();
 4204         my $spaces =
 4205           ( $avail > $extra_indentation_spaces_wanted )
 4206           ? $extra_indentation_spaces_wanted
 4207           : $avail;
 4208 
 4209         #########################################################
 4210         # Note: min spaces can be negative; for example with -gnu
 4211         # f(
 4212         #   do { 1; !!(my $x = bless []); }
 4213         #  );
 4214         #########################################################
 4215         # The following rule is needed to match older formatting:
 4216         # For multiple groups, we will keep spaces non-negative.
 4217         # For a single group, we will allow a negative space.
 4218         if ( $ngroups > 1 && $spaces < 0 ) { $spaces = 0 }
 4219 
 4220         # update the minimum spacing
 4221         if ( $ng == 0 || $spaces < $extra_leading_spaces ) {
 4222             $extra_leading_spaces = $spaces;
 4223         }
 4224     }
 4225 
 4226     # update the indentation object because with -icp the terminal
 4227     # ');' will use the same adjustment.
 4228     $object->permanently_decrease_available_spaces( -$extra_leading_spaces );
 4229     return $extra_leading_spaces;
 4230 }
 4231 
 4232 sub forget_side_comment {
 4233     my ($self) = @_;
 4234     $self->[_last_side_comment_column_] = 0;
 4235     return;
 4236 }
 4237 
 4238 sub is_good_side_comment_column {
 4239     my ( $self, $line, $line_number, $level, $num5 ) = @_;
 4240 
 4241     # Upon encountering the first side comment of a group, decide if
 4242     # a previous side comment should be forgotten.  This involves
 4243     # checking several rules.
 4244 
 4245     # Return true to keep old comment location
 4246     # Return false to forget old comment location
 4247 
 4248     my $rfields                 = $line->get_rfields();
 4249     my $is_hanging_side_comment = $line->get_is_hanging_side_comment();
 4250 
 4251     # RULE1: Never forget comment before a hanging side comment
 4252     goto KEEP if ($is_hanging_side_comment);
 4253 
 4254     # RULE2: Forget a side comment after a short line difference,
 4255     # where 'short line difference' is computed from a formula.
 4256     # Using a smooth formula helps minimize sudden large changes.
 4257     my $line_diff = $line_number - $self->[_last_side_comment_line_number_];
 4258     my $alev_diff = abs( $level - $self->[_last_side_comment_level_] );
 4259 
 4260     # '$num5' is the number of comments in the first 5 lines after the first
 4261     # comment.  It is needed to keep a compact group of side comments from
 4262     # being influenced by a more distant side comment.
 4263     $num5 = 1 unless ($num5);
 4264 
 4265     # Some values:
 4266 
 4267     #        $adiff  $num5   $short_diff
 4268     #        0       *       12
 4269     #        1       1       6
 4270     #        1       2       4
 4271     #        1       3       3
 4272     #        1       4       2
 4273     #        2       1       4
 4274     #        2       2       2
 4275     #        2       3       1
 4276     #        3       1       3
 4277     #        3       2       1
 4278 
 4279     my $short_diff = SC_LONG_LINE_DIFF / ( 1 + $alev_diff * $num5 );
 4280 
 4281     goto FORGET
 4282       if ( $line_diff > $short_diff );
 4283 
 4284     # RULE3: Forget a side comment if this line is at lower level and
 4285     # ends a block
 4286     my $last_sc_level = $self->[_last_side_comment_level_];
 4287     goto FORGET
 4288       if ( $level < $last_sc_level
 4289         && $is_closing_block_type{ substr( $rfields->[0], 0, 1 ) } );
 4290 
 4291     # RULE 4: Forget the last side comment if this comment might join a cached
 4292     # line ...
 4293     if ( my $cached_line_type = get_cached_line_type() ) {
 4294 
 4295         # ... otherwise side comment alignment will get messed up.
 4296         # For example, in the following test script
 4297         # with using 'perltidy -sct -act=2', the last comment would try to
 4298         # align with the previous and then be in the wrong column when
 4299         # the lines are combined:
 4300 
 4301         # foreach $line (
 4302         #    [0, 1, 2], [3, 4, 5], [6, 7, 8],    # rows
 4303         #    [0, 3, 6], [1, 4, 7], [2, 5, 8],    # columns
 4304         #    [0, 4, 8], [2, 4, 6]
 4305         #  )                                     # diagonals
 4306         goto FORGET
 4307           if ( $cached_line_type == 2 || $cached_line_type == 4 );
 4308     }
 4309 
 4310     # Otherwise, keep it alive
 4311     goto KEEP;
 4312 
 4313   FORGET:
 4314     return 0;
 4315 
 4316   KEEP:
 4317     return 1;
 4318 }
 4319 
 4320 sub align_side_comments {
 4321 
 4322     my ( $self, $rlines, $rgroups ) = @_;
 4323 
 4324     # Align any side comments in this batch of lines
 4325 
 4326     # Given:
 4327     #  $rlines  - the lines
 4328     #  $rgroups - the partition of the lines into groups
 4329     #
 4330     # We will be working group-by-group because all side comments
 4331     # (real or fake) in each group are already aligned. So we just have
 4332     # to make alignments between groups wherever possible.
 4333 
 4334     # An unusual aspect is that within each group we have aligned both real
 4335     # and fake side comments.  This has the consequence that the lengths of
 4336     # long lines without real side comments can cause 'push' all side comments
 4337     # to the right.  This seems unusual, but testing with and without this
 4338     # feature shows that it is usually better this way.  Othewise, side
 4339     # comments can be hidden between long lines without side comments and
 4340     # thus be harder to read.
 4341 
 4342     my $group_level        = $self->[_group_level_];
 4343     my $continuing_sc_flow = $self->[_last_side_comment_length_] > 0
 4344       && $group_level == $self->[_last_level_written_];
 4345 
 4346     # Find groups with side comments, and remember the first nonblank comment
 4347     my $j_sc_beg;
 4348     my @todo;
 4349     my $ng = -1;
 4350     foreach my $item ( @{$rgroups} ) {
 4351         $ng++;
 4352         my ( $jbeg, $jend ) = @{$item};
 4353         foreach my $j ( $jbeg .. $jend ) {
 4354             my $line = $rlines->[$j];
 4355             my $jmax = $line->get_jmax();
 4356             if ( $line->get_rfield_lengths()->[$jmax] ) {
 4357 
 4358                 # this group has a line with a side comment
 4359                 push @todo, $ng;
 4360                 if ( !defined($j_sc_beg) ) {
 4361                     $j_sc_beg = $j;
 4362                 }
 4363                 last;
 4364             }
 4365         }
 4366     }
 4367 
 4368     # done if no groups with side comments
 4369     return unless @todo;
 4370 
 4371     # Count $num5 = number of comments in the 5 lines after the first comment
 4372     # This is an important factor in a decision formula
 4373     my $num5 = 1;
 4374     for ( my $jj = $j_sc_beg + 1 ; $jj < @{$rlines} ; $jj++ ) {
 4375         my $ldiff = $jj - $j_sc_beg;
 4376         last if ( $ldiff > 5 );
 4377         my $line   = $rlines->[$jj];
 4378         my $jmax   = $line->get_jmax();
 4379         my $sc_len = $line->get_rfield_lengths()->[$jmax];
 4380         next unless ($sc_len);
 4381         $num5++;
 4382     }
 4383 
 4384     # Forget the old side comment location if necessary
 4385     my $line = $rlines->[$j_sc_beg];
 4386     my $lnum =
 4387       $j_sc_beg + $self->[_file_writer_object_]->get_output_line_number();
 4388     my $keep_it =
 4389       $self->is_good_side_comment_column( $line, $lnum, $group_level, $num5 );
 4390     my $last_side_comment_column =
 4391       $keep_it ? $self->[_last_side_comment_column_] : 0;
 4392 
 4393     # If there are multiple groups we will do two passes
 4394     # so that we can find a common alignment for all groups.
 4395     my $MAX_PASS = @todo > 1 ? 2 : 1;
 4396 
 4397     # Loop over passes
 4398     my $max_comment_column = $last_side_comment_column;
 4399     for ( my $PASS = 1 ; $PASS <= $MAX_PASS ; $PASS++ ) {
 4400 
 4401         # If there are two passes, then on the last pass make the old column
 4402         # equal to the largest of the group.  This will result in the comments
 4403         # being aligned if possible.
 4404         if ( $PASS == $MAX_PASS ) {
 4405             $last_side_comment_column = $max_comment_column;
 4406         }
 4407 
 4408         # Loop over the groups with side comments
 4409         my $column_limit;
 4410         foreach my $ng (@todo) {
 4411             my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
 4412 
 4413             # Note that since all lines in a group have common alignments, we
 4414             # just have to work on one of the lines (the first line).
 4415             my $line                    = $rlines->[$jbeg];
 4416             my $jmax                    = $line->get_jmax();
 4417             my $is_hanging_side_comment = $line->get_is_hanging_side_comment();
 4418             last
 4419               if ( $PASS < $MAX_PASS && $is_hanging_side_comment );
 4420 
 4421             # the maximum space without exceeding the line length:
 4422             my $avail = $line->get_available_space_on_right();
 4423 
 4424             # try to use the previous comment column
 4425             my $side_comment_column = $line->get_column( $jmax - 1 );
 4426             my $move = $last_side_comment_column - $side_comment_column;
 4427 
 4428             # Remember the maximum possible column of the first line with
 4429             # side comment
 4430             if ( !defined($column_limit) ) {
 4431                 $column_limit = $side_comment_column + $avail;
 4432             }
 4433 
 4434             next if ( $jmax <= 0 );
 4435 
 4436             # but if this doesn't work, give up and use the minimum space
 4437             my $min_move = $self->[_rOpts_minimum_space_to_comment_] - 1;
 4438             if ( $move > $avail ) {
 4439                 $move = $min_move;
 4440             }
 4441 
 4442             # but we want some minimum space to the comment
 4443             if (   $move >= 0
 4444                 && $j_sc_beg == 0
 4445                 && $continuing_sc_flow )
 4446             {
 4447                 $min_move = 0;
 4448             }
 4449 
 4450             # remove constraints on hanging side comments
 4451             if ($is_hanging_side_comment) { $min_move = 0 }
 4452 
 4453             if ( $move < $min_move ) {
 4454                 $move = $min_move;
 4455             }
 4456 
 4457             # don't exceed the available space
 4458             if ( $move > $avail ) { $move = $avail }
 4459 
 4460             # We can only increase space, never decrease.
 4461             if ( $move < 0 ) { $move = 0 }
 4462 
 4463             # Discover the largest column on the preliminary  pass
 4464             if ( $PASS < $MAX_PASS ) {
 4465                 my $col = $line->get_column( $jmax - 1 ) + $move;
 4466 
 4467                 # but ignore columns too large for the starting line
 4468                 if ( $col > $max_comment_column && $col < $column_limit ) {
 4469                     $max_comment_column = $col;
 4470                 }
 4471             }
 4472 
 4473             # Make the changes on the final pass
 4474             else {
 4475                 $line->increase_field_width( $jmax - 1, $move );
 4476 
 4477                 # remember this column for the next group
 4478                 $last_side_comment_column = $line->get_column( $jmax - 1 );
 4479             }
 4480         } ## end loop over groups
 4481     } ## end loop over passes
 4482 
 4483     # Find the last side comment
 4484     my $j_sc_last;
 4485     my $ng_last = $todo[-1];
 4486     my ( $jbeg, $jend ) = @{ $rgroups->[$ng_last] };
 4487     for ( my $jj = $jend ; $jj >= $jbeg ; $jj-- ) {
 4488         my $line = $rlines->[$jj];
 4489         my $jmax = $line->get_jmax();
 4490         if ( $line->get_rfield_lengths()->[$jmax] ) {
 4491             $j_sc_last = $jj;
 4492             last;
 4493         }
 4494     }
 4495 
 4496     # Save final side comment info for possible use by the next batch
 4497     if ( defined($j_sc_last) ) {
 4498         my $line_number =
 4499           $self->[_file_writer_object_]->get_output_line_number() + $j_sc_last;
 4500         $self->[_last_side_comment_column_]      = $last_side_comment_column;
 4501         $self->[_last_side_comment_line_number_] = $line_number;
 4502         $self->[_last_side_comment_level_]       = $group_level;
 4503     }
 4504     return;
 4505 }
 4506 
 4507 ###############################
 4508 # CODE SECTION 6: Output Step A
 4509 ###############################
 4510 
 4511 sub valign_output_step_A {
 4512 
 4513     ###############################################################
 4514     # This is Step A in writing vertically aligned lines.
 4515     # The line is prepared according to the alignments which have
 4516     # been found. Then it is shipped to the next step.
 4517     ###############################################################
 4518 
 4519     my ( $self, $rinput_hash ) = @_;
 4520 
 4521     my $line                 = $rinput_hash->{line};
 4522     my $min_ci_gap           = $rinput_hash->{min_ci_gap};
 4523     my $do_not_align         = $rinput_hash->{do_not_align};
 4524     my $group_leader_length  = $rinput_hash->{group_leader_length};
 4525     my $extra_leading_spaces = $rinput_hash->{extra_leading_spaces};
 4526     my $level                = $rinput_hash->{level};
 4527 
 4528     my $rfields                   = $line->get_rfields();
 4529     my $rfield_lengths            = $line->get_rfield_lengths();
 4530     my $leading_space_count       = $line->get_leading_space_count();
 4531     my $outdent_long_lines        = $line->get_outdent_long_lines();
 4532     my $maximum_field_index       = $line->get_jmax();
 4533     my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
 4534     my $Kend                      = $line->get_Kend();
 4535     my $level_end                 = $line->get_level_end();
 4536 
 4537     # add any extra spaces
 4538     if ( $leading_space_count > $group_leader_length ) {
 4539         $leading_space_count += $min_ci_gap;
 4540     }
 4541 
 4542     my $str     = $rfields->[0];
 4543     my $str_len = $rfield_lengths->[0];
 4544 
 4545     # loop to concatenate all fields of this line and needed padding
 4546     my $total_pad_count = 0;
 4547     for my $j ( 1 .. $maximum_field_index ) {
 4548 
 4549         # skip zero-length side comments
 4550         last
 4551           if (
 4552             ( $j == $maximum_field_index )
 4553             && ( !defined( $rfields->[$j] )
 4554                 || ( $rfield_lengths->[$j] == 0 ) )
 4555           );
 4556 
 4557         # compute spaces of padding before this field
 4558         my $col = $line->get_column( $j - 1 );
 4559         my $pad = $col - ( $str_len + $leading_space_count );
 4560 
 4561         if ($do_not_align) {
 4562             $pad =
 4563               ( $j < $maximum_field_index )
 4564               ? 0
 4565               : $self->[_rOpts_minimum_space_to_comment_] - 1;
 4566         }
 4567 
 4568         # if the -fpsc flag is set, move the side comment to the selected
 4569         # column if and only if it is possible, ignoring constraints on
 4570         # line length and minimum space to comment
 4571         if (   $self->[_rOpts_fixed_position_side_comment_]
 4572             && $j == $maximum_field_index )
 4573         {
 4574             my $newpad =
 4575               $pad + $self->[_rOpts_fixed_position_side_comment_] - $col - 1;
 4576             if ( $newpad >= 0 ) { $pad = $newpad; }
 4577         }
 4578 
 4579         # accumulate the padding
 4580         if ( $pad > 0 ) { $total_pad_count += $pad; }
 4581 
 4582         # only add padding when we have a finite field;
 4583         # this avoids extra terminal spaces if we have empty fields
 4584         if ( $rfield_lengths->[$j] > 0 ) {
 4585             $str .= ' ' x $total_pad_count;
 4586             $str_len += $total_pad_count;
 4587             $total_pad_count = 0;
 4588             $str .= $rfields->[$j];
 4589             $str_len += $rfield_lengths->[$j];
 4590         }
 4591         else {
 4592             $total_pad_count = 0;
 4593         }
 4594     }
 4595 
 4596     my $side_comment_length = $rfield_lengths->[$maximum_field_index];
 4597 
 4598     # ship this line off
 4599     $self->valign_output_step_B(
 4600         {
 4601             leading_space_count => $leading_space_count + $extra_leading_spaces,
 4602             line                => $str,
 4603             line_length         => $str_len,
 4604             side_comment_length => $side_comment_length,
 4605             outdent_long_lines  => $outdent_long_lines,
 4606             rvertical_tightness_flags => $rvertical_tightness_flags,
 4607             level                     => $level,
 4608             level_end                 => $level_end,
 4609             Kend                      => $Kend,
 4610         }
 4611     );
 4612     return;
 4613 }
 4614 
 4615 sub combine_fields {
 4616 
 4617     # We have a group of two lines for which we do not want to align tokens
 4618     # between index $imax_align and the side comment.  So we will delete fields
 4619     # between $imax_align and the side comment.  Alignments have already
 4620     # been set so we have to adjust them.
 4621 
 4622     my ( $line_0, $line_1, $imax_align ) = @_;
 4623 
 4624     if ( !defined($imax_align) ) { $imax_align = -1 }
 4625 
 4626     # First delete the unwanted tokens
 4627     my $jmax_old       = $line_0->get_jmax();
 4628     my @old_alignments = $line_0->get_alignments();
 4629     my @idel           = ( $imax_align + 1 .. $jmax_old - 2 );
 4630 
 4631     return unless (@idel);
 4632 
 4633     foreach my $line ( $line_0, $line_1 ) {
 4634         delete_selected_tokens( $line, \@idel );
 4635     }
 4636 
 4637     # Now adjust the alignments.  Note that the side comment alignment
 4638     # is always at jmax-1, and there is an ending alignment at jmax.
 4639     my @new_alignments;
 4640     if ( $imax_align >= 0 ) {
 4641         @new_alignments[ 0 .. $imax_align ] =
 4642           @old_alignments[ 0 .. $imax_align ];
 4643     }
 4644 
 4645     my $jmax_new = $line_0->get_jmax();
 4646 
 4647     $new_alignments[ $jmax_new - 1 ] = $old_alignments[ $jmax_old - 1 ];
 4648     $new_alignments[$jmax_new] = $old_alignments[$jmax_old];
 4649     $line_0->set_alignments(@new_alignments);
 4650     $line_1->set_alignments(@new_alignments);
 4651     return;
 4652 }
 4653 
 4654 sub get_output_line_number {
 4655 
 4656     # The output line number reported to a caller =
 4657     # the number of items still in the buffer +
 4658     # the number of items written.
 4659     return $_[0]->group_line_count() +
 4660       $_[0]->[_file_writer_object_]->get_output_line_number();
 4661 }
 4662 
 4663 ###############################
 4664 # CODE SECTION 7: Output Step B
 4665 ###############################
 4666 
 4667 {    ## closure for sub valign_output_step_B
 4668 
 4669     # These are values for a cache used by valign_output_step_B.
 4670     my $cached_line_text;
 4671     my $cached_line_text_length;
 4672     my $cached_line_type;
 4673     my $cached_line_flag;
 4674     my $cached_seqno;
 4675     my $cached_line_valid;
 4676     my $cached_line_leading_space_count;
 4677     my $cached_seqno_string;
 4678     my $cached_line_Kend;
 4679     my $seqno_string;
 4680     my $last_nonblank_seqno_string;
 4681 
 4682     sub get_seqno_string {
 4683         return $seqno_string;
 4684     }
 4685 
 4686     sub get_last_nonblank_seqno_string {
 4687         return $last_nonblank_seqno_string;
 4688     }
 4689 
 4690     sub set_last_nonblank_seqno_string {
 4691         my ($val) = @_;
 4692         $last_nonblank_seqno_string = $val;
 4693         return;
 4694     }
 4695 
 4696     sub get_cached_line_flag {
 4697         return $cached_line_flag;
 4698     }
 4699 
 4700     sub get_cached_line_type {
 4701         return $cached_line_type;
 4702     }
 4703 
 4704     sub set_cached_line_valid {
 4705         my ($val) = @_;
 4706         $cached_line_valid = $val;
 4707         return;
 4708     }
 4709 
 4710     sub get_cached_seqno {
 4711         return $cached_seqno;
 4712     }
 4713 
 4714     sub initialize_step_B_cache {
 4715 
 4716         # valign_output_step_B cache:
 4717         $cached_line_text                = "";
 4718         $cached_line_text_length         = 0;
 4719         $cached_line_type                = 0;
 4720         $cached_line_flag                = 0;
 4721         $cached_seqno                    = 0;
 4722         $cached_line_valid               = 0;
 4723         $cached_line_leading_space_count = 0;
 4724         $cached_seqno_string             = "";
 4725         $cached_line_Kend                = undef;
 4726 
 4727         # These vars hold a string of sequence numbers joined together used by
 4728         # the cache
 4729         $seqno_string               = "";
 4730         $last_nonblank_seqno_string = "";
 4731         return;
 4732     }
 4733 
 4734     sub _flush_cache {
 4735         my ($self) = @_;
 4736         if ($cached_line_type) {
 4737             $seqno_string = $cached_seqno_string;
 4738             $self->valign_output_step_C(
 4739                 $cached_line_text,
 4740                 $cached_line_leading_space_count,
 4741                 $self->[_last_level_written_],
 4742                 $cached_line_Kend,
 4743             );
 4744             $cached_line_type        = 0;
 4745             $cached_line_text        = "";
 4746             $cached_line_text_length = 0;
 4747             $cached_seqno_string     = "";
 4748             $cached_line_Kend        = undef;
 4749         }
 4750         return;
 4751     }
 4752 
 4753     sub valign_output_step_B {
 4754 
 4755         ###############################################################
 4756         # This is Step B in writing vertically aligned lines.
 4757         # Vertical tightness is applied according to preset flags.
 4758         # In particular this routine handles stacking of opening
 4759         # and closing tokens.
 4760         ###############################################################
 4761 
 4762         my ( $self, $rinput ) = @_;
 4763 
 4764         my $leading_space_count       = $rinput->{leading_space_count};
 4765         my $str                       = $rinput->{line};
 4766         my $str_length                = $rinput->{line_length};
 4767         my $side_comment_length       = $rinput->{side_comment_length};
 4768         my $outdent_long_lines        = $rinput->{outdent_long_lines};
 4769         my $rvertical_tightness_flags = $rinput->{rvertical_tightness_flags};
 4770         my $level                     = $rinput->{level};
 4771         my $level_end                 = $rinput->{level_end};
 4772         my $Kend                      = $rinput->{Kend};
 4773 
 4774         my $last_level_written = $self->[_last_level_written_];
 4775 
 4776         # Useful -gcs test cases for wide characters are
 4777         # perl527/(method.t.2, reg_mesg.t, mime-header.t)
 4778 
 4779         # handle outdenting of long lines:
 4780         my $is_outdented_line;
 4781         if ($outdent_long_lines) {
 4782             my $excess =
 4783               $str_length -
 4784               $side_comment_length +
 4785               $leading_space_count -
 4786               $self->maximum_line_length_for_level($level);
 4787             if ( $excess > 0 ) {
 4788                 $leading_space_count = 0;
 4789                 my $file_writer_object = $self->[_file_writer_object_];
 4790                 my $last_outdented_line_at =
 4791                   $file_writer_object->get_output_line_number();
 4792                 $self->[_last_outdented_line_at_] = $last_outdented_line_at;
 4793 
 4794                 my $outdented_line_count = $self->[_outdented_line_count_];
 4795                 unless ($outdented_line_count) {
 4796                     $self->[_first_outdented_line_at_] =
 4797                       $last_outdented_line_at;
 4798                 }
 4799                 $outdented_line_count++;
 4800                 $self->[_outdented_line_count_] = $outdented_line_count;
 4801                 $is_outdented_line = 1;
 4802             }
 4803         }
 4804 
 4805         # Make preliminary leading whitespace.  It could get changed
 4806         # later by entabbing, so we have to keep track of any changes
 4807         # to the leading_space_count from here on.
 4808         my $leading_string =
 4809           $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
 4810         my $leading_string_length = length($leading_string);
 4811 
 4812         # Unpack any recombination data; it was packed by
 4813         # sub send_lines_to_vertical_aligner. Contents:
 4814         #
 4815         #   [0] type: 1=opening non-block    2=closing non-block
 4816         #             3=opening block brace  4=closing block brace
 4817         #   [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
 4818         #             if closing: spaces of padding to use
 4819         #   [2] sequence number of container
 4820         #   [3] valid flag: do not append if this flag is false
 4821         #
 4822         my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
 4823             $seqno_end );
 4824         if ($rvertical_tightness_flags) {
 4825             (
 4826                 $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
 4827                 $seqno_end
 4828             ) = @{$rvertical_tightness_flags};
 4829         }
 4830 
 4831         $seqno_string = $seqno_end;
 4832 
 4833         # handle any cached line ..
 4834         # either append this line to it or write it out
 4835         # Note: the function length() is used in this next test out of caution.
 4836         # All testing has shown that the variable $cached_line_text_length is
 4837         # correct, but its calculation is complex and a loss of cached text
 4838         # would be a disaster.
 4839         if ( length($cached_line_text) ) {
 4840 
 4841             # Dump an invalid cached line
 4842             if ( !$cached_line_valid ) {
 4843                 $self->valign_output_step_C(
 4844                     $cached_line_text,   $cached_line_leading_space_count,
 4845                     $last_level_written, $cached_line_Kend
 4846                 );
 4847             }
 4848 
 4849             # Handle cached line ending in OPENING tokens
 4850             elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
 4851 
 4852                 my $gap = $leading_space_count - $cached_line_text_length;
 4853 
 4854                 # handle option of just one tight opening per line:
 4855                 if ( $cached_line_flag == 1 ) {
 4856                     if ( defined($open_or_close) && $open_or_close == 1 ) {
 4857                         $gap = -1;
 4858                     }
 4859                 }
 4860 
 4861                 # Do not join the lines if this might produce a one-line
 4862                 # container which exceeds the maximum line length.  This is
 4863                 # necessary prevent blinking, particularly with the combination
 4864                 # -xci -pvt=2.  In that case a one-line block alternately forms
 4865                 # and breaks, causing -xci to alternately turn on and off (case
 4866                 # b765).
 4867                 # Patched to fix cases b656 b862 b971 b972: always do the check
 4868                 # if -vmll is set.  The reason is that the -vmll option can
 4869                 # cause changes in the maximum line length, leading to blinkers
 4870                 # if not checked.
 4871                 if (
 4872                     $gap >= 0
 4873                     && ( $self->[_rOpts_variable_maximum_line_length_]
 4874                         || ( defined($level_end) && $level > $level_end ) )
 4875                   )
 4876                 {
 4877                     my $test_line_length =
 4878                       $cached_line_text_length + $gap + $str_length;
 4879                     my $maximum_line_length =
 4880                       $self->maximum_line_length_for_level($last_level_written);
 4881 
 4882                     # Add a small tolerance in the length test (fixes case b862)
 4883                     if ( $test_line_length > $maximum_line_length - 2 ) {
 4884                         $gap = -1;
 4885                     }
 4886                 }
 4887 
 4888                 if ( $gap >= 0 && defined($seqno_beg) ) {
 4889                     $leading_string        = $cached_line_text . ' ' x $gap;
 4890                     $leading_string_length = $cached_line_text_length + $gap;
 4891                     $leading_space_count   = $cached_line_leading_space_count;
 4892                     $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
 4893                     $level        = $last_level_written;
 4894                 }
 4895                 else {
 4896                     $self->valign_output_step_C(
 4897                         $cached_line_text,   $cached_line_leading_space_count,
 4898                         $last_level_written, $cached_line_Kend
 4899                     );
 4900                 }
 4901             }
 4902 
 4903             # Handle cached line ending in CLOSING tokens
 4904             else {
 4905                 my $test_line =
 4906                   $cached_line_text . ' ' x $cached_line_flag . $str;
 4907                 my $test_line_length =
 4908                   $cached_line_text_length + $cached_line_flag + $str_length;
 4909                 if (
 4910 
 4911                     # The new line must start with container
 4912                     $seqno_beg
 4913 
 4914                     # The container combination must be okay..
 4915                     && (
 4916 
 4917                         # okay to combine like types
 4918                         ( $open_or_close == $cached_line_type )
 4919 
 4920                         # closing block brace may append to non-block
 4921                         || ( $cached_line_type == 2 && $open_or_close == 4 )
 4922 
 4923                         # something like ');'
 4924                         || ( !$open_or_close && $cached_line_type == 2 )
 4925 
 4926                     )
 4927 
 4928                     # The combined line must fit
 4929                     && (
 4930                         $test_line_length <=
 4931                         $self->maximum_line_length_for_level(
 4932                             $last_level_written)
 4933                     )
 4934                   )
 4935                 {
 4936 
 4937                     $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
 4938 
 4939                     # Patch to outdent closing tokens ending # in ');' If we
 4940                     # are joining a line like ');' to a previous stacked set of
 4941                     # closing tokens, then decide if we may outdent the
 4942                     # combined stack to the indentation of the ');'.  Since we
 4943                     # should not normally outdent any of the other tokens more
 4944                     # than the indentation of the lines that contained them, we
 4945                     # will only do this if all of the corresponding opening
 4946                     # tokens were on the same line.  This can happen with -sot
 4947                     # and -sct.
 4948 
 4949                     # For example, it is ok here:
 4950                     #   __PACKAGE__->load_components( qw(
 4951                     #         PK::Auto
 4952                     #         Core
 4953                     #   ));
 4954                     #
 4955                     # But, for example, we do not outdent in this example
 4956                     # because that would put the closing sub brace out farther
 4957                     # than the opening sub brace:
 4958                     #
 4959                     #   perltidy -sot -sct
 4960                     #   $c->Tk::bind(
 4961                     #       '<Control-f>' => sub {
 4962                     #           my ($c) = @_;
 4963                     #           my $e = $c->XEvent;
 4964                     #           itemsUnderArea $c;
 4965                     #       } );
 4966                     #
 4967                     if (   $str =~ /^\);/
 4968                         && $cached_line_text =~ /^[\)\}\]\s]*$/ )
 4969                     {
 4970 
 4971                         # The way to tell this is if the stacked sequence
 4972                         # numbers of this output line are the reverse of the
 4973                         # stacked sequence numbers of the previous non-blank
 4974                         # line of sequence numbers.  So we can join if the
 4975                         # previous nonblank string of tokens is the mirror
 4976                         # image.  For example if stack )}] is 13:8:6 then we
 4977                         # are looking for a leading stack like [{( which
 4978                         # is 6:8:13. We only need to check the two ends,
 4979                         # because the intermediate tokens must fall in order.
 4980                         # Note on speed: having to split on colons and
 4981                         # eliminate multiple colons might appear to be slow,
 4982                         # but it's not an issue because we almost never come
 4983                         # through here.  In a typical file we don't.
 4984 
 4985                         $seqno_string               =~ s/^:+//;
 4986                         $last_nonblank_seqno_string =~ s/^:+//;
 4987                         $seqno_string               =~ s/:+/:/g;
 4988                         $last_nonblank_seqno_string =~ s/:+/:/g;
 4989 
 4990                         # how many spaces can we outdent?
 4991                         my $diff =
 4992                           $cached_line_leading_space_count -
 4993                           $leading_space_count;
 4994                         if (   $diff > 0
 4995                             && length($seqno_string)
 4996                             && length($last_nonblank_seqno_string) ==
 4997                             length($seqno_string) )
 4998                         {
 4999                             my @seqno_last =
 5000                               ( split /:/, $last_nonblank_seqno_string );
 5001                             my @seqno_now = ( split /:/, $seqno_string );
 5002                             if (   @seqno_now
 5003                                 && @seqno_last
 5004                                 && $seqno_now[-1] == $seqno_last[0]
 5005                                 && $seqno_now[0] == $seqno_last[-1] )
 5006                             {
 5007 
 5008                                 # OK to outdent ..
 5009                                 # for absolute safety, be sure we only remove
 5010                                 # whitespace
 5011                                 my $ws = substr( $test_line, 0, $diff );
 5012                                 if ( ( length($ws) == $diff )
 5013                                     && $ws =~ /^\s+$/ )
 5014                                 {
 5015 
 5016                                     $test_line = substr( $test_line, $diff );
 5017                                     $cached_line_leading_space_count -= $diff;
 5018                                     $last_level_written =
 5019                                       $self->level_change(
 5020                                         $cached_line_leading_space_count,
 5021                                         $diff, $last_level_written );
 5022                                     $self->reduce_valign_buffer_indentation(
 5023                                         $diff);
 5024                                 }
 5025 
 5026                                 # shouldn't happen, but not critical:
 5027                                 ##else {
 5028                                 ## ERROR transferring indentation here
 5029                                 ##}
 5030                             }
 5031                         }
 5032                     }
 5033 
 5034                     $str                   = $test_line;
 5035                     $str_length            = $test_line_length;
 5036                     $leading_string        = "";
 5037                     $leading_string_length = 0;
 5038                     $leading_space_count   = $cached_line_leading_space_count;
 5039                     $level                 = $last_level_written;
 5040                 }
 5041                 else {
 5042                     $self->valign_output_step_C(
 5043                         $cached_line_text,   $cached_line_leading_space_count,
 5044                         $last_level_written, $cached_line_Kend
 5045                     );
 5046                 }
 5047             }
 5048         }
 5049         $cached_line_type        = 0;
 5050         $cached_line_text        = "";
 5051         $cached_line_text_length = 0;
 5052         $cached_line_Kend        = undef;
 5053 
 5054         # make the line to be written
 5055         my $line        = $leading_string . $str;
 5056         my $line_length = $leading_string_length + $str_length;
 5057 
 5058         # Safety check: be sure that a line to be cached as a stacked block
 5059         # brace line ends in the appropriate opening or closing block brace.
 5060         # This should always be the case if the caller set flags correctly.
 5061         # Code '3' is for -sobb, code '4' is for -scbb.
 5062         if ($open_or_close) {
 5063             if (   $open_or_close == 3 && $line !~ /\{\s*$/
 5064                 || $open_or_close == 4 && $line !~ /\}\s*$/ )
 5065             {
 5066                 $open_or_close = 0;
 5067             }
 5068         }
 5069 
 5070         # write or cache this line ...
 5071         # fix for case b999: do not cache an outdented line
 5072         if ( !$open_or_close || $side_comment_length > 0 || $is_outdented_line )
 5073         {
 5074             $self->valign_output_step_C( $line, $leading_space_count, $level,
 5075                 $Kend );
 5076         }
 5077         else {
 5078             $cached_line_text                = $line;
 5079             $cached_line_text_length         = $line_length;
 5080             $cached_line_type                = $open_or_close;
 5081             $cached_line_flag                = $tightness_flag;
 5082             $cached_seqno                    = $seqno;
 5083             $cached_line_valid               = $valid;
 5084             $cached_line_leading_space_count = $leading_space_count;
 5085             $cached_seqno_string             = $seqno_string;
 5086             $cached_line_Kend                = $Kend;
 5087         }
 5088 
 5089         $self->[_last_level_written_]       = $level;
 5090         $self->[_last_side_comment_length_] = $side_comment_length;
 5091         return;
 5092     }
 5093 }
 5094 
 5095 ###############################
 5096 # CODE SECTION 8: Output Step C
 5097 ###############################
 5098 
 5099 {    ## closure for sub valign_output_step_C
 5100 
 5101     # Vertical alignment buffer used by valign_output_step_C
 5102     my $valign_buffer_filling;
 5103     my @valign_buffer;
 5104 
 5105     sub initialize_valign_buffer {
 5106         @valign_buffer         = ();
 5107         $valign_buffer_filling = "";
 5108         return;
 5109     }
 5110 
 5111     sub dump_valign_buffer {
 5112         my ($self) = @_;
 5113         if (@valign_buffer) {
 5114             foreach (@valign_buffer) {
 5115                 $self->valign_output_step_D( @{$_} );
 5116             }
 5117             @valign_buffer = ();
 5118         }
 5119         $valign_buffer_filling = "";
 5120         return;
 5121     }
 5122 
 5123     sub reduce_valign_buffer_indentation {
 5124 
 5125         my ( $self, $diff ) = @_;
 5126         if ( $valign_buffer_filling && $diff ) {
 5127             my $max_valign_buffer = @valign_buffer;
 5128             foreach my $i ( 0 .. $max_valign_buffer - 1 ) {
 5129                 my ( $line, $leading_space_count, $level, $Kend ) =
 5130                   @{ $valign_buffer[$i] };
 5131                 my $ws = substr( $line, 0, $diff );
 5132                 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
 5133                     $line = substr( $line, $diff );
 5134                 }
 5135                 if ( $leading_space_count >= $diff ) {
 5136                     $leading_space_count -= $diff;
 5137                     $level =
 5138                       $self->level_change( $leading_space_count, $diff,
 5139                         $level );
 5140                 }
 5141                 $valign_buffer[$i] =
 5142                   [ $line, $leading_space_count, $level, $Kend ];
 5143             }
 5144         }
 5145         return;
 5146     }
 5147 
 5148     sub valign_output_step_C {
 5149 
 5150         ###############################################################
 5151         # This is Step C in writing vertically aligned lines.
 5152         # Lines are either stored in a buffer or passed along to the next step.
 5153         # The reason for storing lines is that we may later want to reduce their
 5154         # indentation when -sot and -sct are both used.
 5155         ###############################################################
 5156         my ( $self, @args ) = @_;
 5157 
 5158         my $seqno_string               = get_seqno_string();
 5159         my $last_nonblank_seqno_string = get_last_nonblank_seqno_string();
 5160 
 5161         # Dump any saved lines if we see a line with an unbalanced opening or
 5162         # closing token.
 5163         $self->dump_valign_buffer()
 5164           if ( $seqno_string && $valign_buffer_filling );
 5165 
 5166         # Either store or write this line
 5167         if ($valign_buffer_filling) {
 5168             push @valign_buffer, [@args];
 5169         }
 5170         else {
 5171