"Fossies" - the Fresh Open Source Software Archive

Member "Perl-Tidy-20210717/lib/Perl/Tidy/VerticalAligner.pm" (14 Jul 2021, 203710 Bytes) of package /linux/misc/Perl-Tidy-20210717.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Perl source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "VerticalAligner.pm" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 20210402_vs_20210717.

    1 package Perl::Tidy::VerticalAligner;
    2 use strict;
    3 use warnings;
    4 our $VERSION = '20210717';
    5 
    6 use Perl::Tidy::VerticalAligner::Alignment;
    7 use Perl::Tidy::VerticalAligner::Line;
    8 
    9 # The Perl::Tidy::VerticalAligner package collects output lines and
   10 # attempts to line up certain common tokens, such as => and #, which are
   11 # identified by the calling routine.
   12 #
   13 # Usage:
   14 #   - Initiate an object with a call to new().
   15 #   - Write lines one-by-one with calls to valign_input().
   16 #   - Make a final call to flush() to empty the pipeline.
   17 #
   18 # The sub valign_input collects lines into groups.  When a group reaches
   19 # the maximum possible size it is processed for alignment and output.
   20 # The maximum group size is reached whenerver there is a change in indentation
   21 # level, a blank line, a block comment, or an external flush call.  The calling
   22 # routine may also force a break in alignment at any time.
   23 #
   24 # If the calling routine needs to interrupt the output and send other text to
   25 # the output, it must first call flush() to empty the output pipeline.  This
   26 # might occur for example if a block of pod text needs to be sent to the output
   27 # between blocks of code.
   28 
   29 # It is essential that a final call to flush() be made. Otherwise some
   30 # final lines of text will be lost.
   31 
   32 # Index...
   33 # CODE SECTION 1: Preliminary code, global definitions and sub new
   34 #                 sub new
   35 # CODE SECTION 2: Some Basic Utilities
   36 # CODE SECTION 3: Code to accept input and form groups
   37 #                 sub valign_input
   38 # CODE SECTION 4: Code to process comment lines
   39 #                 sub _flush_comment_lines
   40 # CODE SECTION 5: Code to process groups of code lines
   41 #                 sub _flush_group_lines
   42 # CODE SECTION 6: Output Step A
   43 #                 sub valign_output_step_A
   44 # CODE SECTION 7: Output Step B
   45 #                 sub valign_output_step_B
   46 # CODE SECTION 8: Output Step C
   47 #                 sub valign_output_step_C
   48 # CODE SECTION 9: Output Step D
   49 #                 sub valign_output_step_D
   50 # CODE SECTION 10: Summary
   51 #                 sub report_anything_unusual
   52 
   53 ##################################################################
   54 # CODE SECTION 1: Preliminary code, global definitions and sub new
   55 ##################################################################
   56 
   57 sub AUTOLOAD {
   58 
   59     # Catch any undefined sub calls so that we are sure to get
   60     # some diagnostic information.  This sub should never be called
   61     # except for a programming error.
   62     our $AUTOLOAD;
   63     return if ( $AUTOLOAD =~ /\bDESTROY$/ );
   64     my ( $pkg, $fname, $lno ) = caller();
   65     my $my_package = __PACKAGE__;
   66     print STDERR <<EOM;
   67 ======================================================================
   68 Error detected in package '$my_package', version $VERSION
   69 Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
   70 Called from package: '$pkg'  
   71 Called from File '$fname'  at line '$lno'
   72 This error is probably due to a recent programming change
   73 ======================================================================
   74 EOM
   75     exit 1;
   76 }
   77 
   78 sub DESTROY {
   79 
   80     # required to avoid call to AUTOLOAD in some versions of perl
   81 }
   82 
   83 BEGIN {
   84 
   85     # Define the fixed indexes for variables in $self, which is an array
   86     # reference.  Note the convention of leading and trailing underscores to
   87     # keep them unique.
   88     my $i = 0;
   89     use constant {
   90         _file_writer_object_ => $i++,
   91         _logger_object_      => $i++,
   92         _diagnostics_object_ => $i++,
   93         _length_function_    => $i++,
   94 
   95         _rOpts_                              => $i++,
   96         _rOpts_indent_columns_               => $i++,
   97         _rOpts_tabs_                         => $i++,
   98         _rOpts_entab_leading_whitespace_     => $i++,
   99         _rOpts_fixed_position_side_comment_  => $i++,
  100         _rOpts_minimum_space_to_comment_     => $i++,
  101         _rOpts_maximum_line_length_          => $i++,
  102         _rOpts_variable_maximum_line_length_ => $i++,
  103         _rOpts_valign_                       => $i++,
  104 
  105         _last_level_written_            => $i++,
  106         _last_side_comment_column_      => $i++,
  107         _last_side_comment_line_number_ => $i++,
  108         _last_side_comment_length_      => $i++,
  109         _last_side_comment_level_       => $i++,
  110         _outdented_line_count_          => $i++,
  111         _first_outdented_line_at_       => $i++,
  112         _last_outdented_line_at_        => $i++,
  113         _consecutive_block_comments_    => $i++,
  114 
  115         _rgroup_lines_                => $i++,
  116         _group_level_                 => $i++,
  117         _group_type_                  => $i++,
  118         _zero_count_                  => $i++,
  119         _last_leading_space_count_    => $i++,
  120         _comment_leading_space_count_ => $i++,
  121     };
  122 
  123     # Debug flag. This is a relic from the original program development
  124     # looking for problems with tab characters.  Caution: this debug flag can
  125     # produce a lot of output It should be 0 except when debugging small
  126     # scripts.
  127 
  128     use constant DEBUG_TABS => 0;
  129 
  130     my $debug_warning = sub {
  131         print STDOUT "VALIGN_DEBUGGING with key $_[0]\n";
  132         return;
  133     };
  134 
  135     DEBUG_TABS && $debug_warning->('TABS');
  136 
  137 }
  138 
  139 sub new {
  140 
  141     my ( $class, @args ) = @_;
  142 
  143     my %defaults = (
  144         rOpts              => undef,
  145         file_writer_object => undef,
  146         logger_object      => undef,
  147         diagnostics_object => undef,
  148         length_function    => sub { return length( $_[0] ) },
  149     );
  150     my %args = ( %defaults, @args );
  151 
  152     # Initialize other caches and buffers
  153     initialize_step_B_cache();
  154     initialize_valign_buffer();
  155     initialize_leading_string_cache();
  156     initialize_decode();
  157 
  158     # Initialize all variables in $self.
  159     # To add an item to $self, first define a new constant index in the BEGIN
  160     # section.
  161     my $self = [];
  162 
  163     # objects
  164     $self->[_file_writer_object_] = $args{file_writer_object};
  165     $self->[_logger_object_]      = $args{logger_object};
  166     $self->[_diagnostics_object_] = $args{diagnostics_object};
  167     $self->[_length_function_]    = $args{length_function};
  168 
  169     # shortcuts to user options
  170     my $rOpts = $args{rOpts};
  171 
  172     $self->[_rOpts_]                = $rOpts;
  173     $self->[_rOpts_indent_columns_] = $rOpts->{'indent-columns'};
  174     $self->[_rOpts_tabs_]           = $rOpts->{'tabs'};
  175     $self->[_rOpts_entab_leading_whitespace_] =
  176       $rOpts->{'entab-leading-whitespace'};
  177     $self->[_rOpts_fixed_position_side_comment_] =
  178       $rOpts->{'fixed-position-side-comment'};
  179     $self->[_rOpts_minimum_space_to_comment_] =
  180       $rOpts->{'minimum-space-to-comment'};
  181     $self->[_rOpts_maximum_line_length_] = $rOpts->{'maximum-line-length'};
  182     $self->[_rOpts_variable_maximum_line_length_] =
  183       $rOpts->{'variable-maximum-line-length'};
  184     $self->[_rOpts_valign_] = $rOpts->{'valign'};
  185 
  186     # Batch of lines being collected
  187     $self->[_rgroup_lines_]                = [];
  188     $self->[_group_level_]                 = 0;
  189     $self->[_group_type_]                  = "";
  190     $self->[_zero_count_]                  = 0;
  191     $self->[_comment_leading_space_count_] = 0;
  192     $self->[_last_leading_space_count_]    = 0;
  193 
  194     # Memory of what has been processed
  195     $self->[_last_level_written_]            = -1;
  196     $self->[_last_side_comment_column_]      = 0;
  197     $self->[_last_side_comment_line_number_] = 0;
  198     $self->[_last_side_comment_length_]      = 0;
  199     $self->[_last_side_comment_level_]       = -1;
  200     $self->[_outdented_line_count_]          = 0;
  201     $self->[_first_outdented_line_at_]       = 0;
  202     $self->[_last_outdented_line_at_]        = 0;
  203     $self->[_consecutive_block_comments_]    = 0;
  204 
  205     bless $self, $class;
  206     return $self;
  207 }
  208 
  209 #################################
  210 # CODE SECTION 2: Basic Utilities
  211 #################################
  212 
  213 sub flush {
  214 
  215     # flush() is the external call to completely empty the pipeline.
  216     my ($self) = @_;
  217 
  218     # push things out the pipline...
  219 
  220     # push out any current group lines
  221     $self->_flush_group_lines();
  222 
  223     # then anything left in the cache of step_B
  224     $self->_flush_cache();
  225 
  226     # then anything left in the buffer of step_C
  227     $self->dump_valign_buffer();
  228 
  229     return;
  230 }
  231 
  232 sub initialize_for_new_group {
  233     my ($self) = @_;
  234 
  235     $self->[_rgroup_lines_]                = [];
  236     $self->[_group_type_]                  = "";
  237     $self->[_zero_count_]                  = 0;
  238     $self->[_comment_leading_space_count_] = 0;
  239     $self->[_last_leading_space_count_]    = 0;
  240 
  241     # Note that the value for _group_level_ is
  242     # handled separately in sub valign_input
  243     return;
  244 }
  245 
  246 sub group_line_count {
  247     return +@{ $_[0]->[_rgroup_lines_] };
  248 }
  249 
  250 # interface to Perl::Tidy::Diagnostics routines
  251 # For debugging; not currently used
  252 sub write_diagnostics {
  253     my ( $self, $msg ) = @_;
  254     my $diagnostics_object = $self->[_diagnostics_object_];
  255     if ($diagnostics_object) {
  256         $diagnostics_object->write_diagnostics($msg);
  257     }
  258     return;
  259 }
  260 
  261 # interface to Perl::Tidy::Logger routines
  262 sub warning {
  263     my ( $self, $msg ) = @_;
  264     my $logger_object = $self->[_logger_object_];
  265     if ($logger_object) {
  266         $logger_object->warning($msg);
  267     }
  268     return;
  269 }
  270 
  271 sub write_logfile_entry {
  272     my ( $self, $msg ) = @_;
  273     my $logger_object = $self->[_logger_object_];
  274     if ($logger_object) {
  275         $logger_object->write_logfile_entry($msg);
  276     }
  277     return;
  278 }
  279 
  280 sub report_definite_bug {
  281     my ( $self, $msg ) = @_;
  282     my $logger_object = $self->[_logger_object_];
  283     if ($logger_object) {
  284         $logger_object->report_definite_bug();
  285     }
  286     return;
  287 }
  288 
  289 sub get_cached_line_count {
  290     my $self = shift;
  291     return $self->group_line_count() + ( get_cached_line_type() ? 1 : 0 );
  292 }
  293 
  294 sub get_spaces {
  295 
  296     # return the number of leading spaces associated with an indentation
  297     # variable $indentation is either a constant number of spaces or an
  298     # object with a get_spaces method.
  299     my $indentation = shift;
  300     return ref($indentation) ? $indentation->get_spaces() : $indentation;
  301 }
  302 
  303 sub get_recoverable_spaces {
  304 
  305     # return the number of spaces (+ means shift right, - means shift left)
  306     # that we would like to shift a group of lines with the same indentation
  307     # to get them to line up with their opening parens
  308     my $indentation = shift;
  309     return ref($indentation) ? $indentation->get_recoverable_spaces() : 0;
  310 }
  311 
  312 sub maximum_line_length_for_level {
  313 
  314     # return maximum line length for line starting with a given level
  315     my ( $self, $level ) = @_;
  316     my $maximum_line_length = $self->[_rOpts_maximum_line_length_];
  317     if ( $self->[_rOpts_variable_maximum_line_length_] ) {
  318         if ( $level < 0 ) { $level = 0 }
  319         $maximum_line_length += $level * $self->[_rOpts_indent_columns_];
  320     }
  321     return $maximum_line_length;
  322 }
  323 
  324 ######################################################
  325 # CODE SECTION 3: Code to accept input and form groups
  326 ######################################################
  327 
  328 sub push_group_line {
  329 
  330     my ( $self, $new_line ) = @_;
  331     my $rgroup_lines = $self->[_rgroup_lines_];
  332     push @{$rgroup_lines}, $new_line;
  333     return;
  334 }
  335 
  336 use constant DEBUG_VALIGN      => 0;
  337 use constant SC_LONG_LINE_DIFF => 12;
  338 
  339 sub valign_input {
  340 
  341     # Place one line in the current vertical group.
  342     #
  343     # The input parameters are:
  344     #     $level = indentation level of this line
  345     #     $rfields = reference to array of fields
  346     #     $rpatterns = reference to array of patterns, one per field
  347     #     $rtokens   = reference to array of tokens starting fields 1,2,..
  348     #
  349     # Here is an example of what this package does.  In this example,
  350     # we are trying to line up both the '=>' and the '#'.
  351     #
  352     #         '18' => 'grave',    #   \`
  353     #         '19' => 'acute',    #   `'
  354     #         '20' => 'caron',    #   \v
  355     # <-tabs-><f1-><--field 2 ---><-f3->
  356     # |            |              |    |
  357     # |            |              |    |
  358     # col1        col2         col3 col4
  359     #
  360     # The calling routine has already broken the entire line into 3 fields as
  361     # indicated.  (So the work of identifying promising common tokens has
  362     # already been done).
  363     #
  364     # In this example, there will be 2 tokens being matched: '=>' and '#'.
  365     # They are the leading parts of fields 2 and 3, but we do need to know
  366     # what they are so that we can dump a group of lines when these tokens
  367     # change.
  368     #
  369     # The fields contain the actual characters of each field.  The patterns
  370     # are like the fields, but they contain mainly token types instead
  371     # of tokens, so they have fewer characters.  They are used to be
  372     # sure we are matching fields of similar type.
  373     #
  374     # In this example, there will be 4 column indexes being adjusted.  The
  375     # first one is always at zero.  The interior columns are at the start of
  376     # the matching tokens, and the last one tracks the maximum line length.
  377     #
  378     # Each time a new line comes in, it joins the current vertical
  379     # group if possible.  Otherwise it causes the current group to be flushed
  380     # and a new group is started.
  381     #
  382     # For each new group member, the column locations are increased, as
  383     # necessary, to make room for the new fields.  When the group is finally
  384     # output, these column numbers are used to compute the amount of spaces of
  385     # padding needed for each field.
  386     #
  387     # Programming note: the fields are assumed not to have any tab characters.
  388     # Tabs have been previously removed except for tabs in quoted strings and
  389     # side comments.  Tabs in these fields can mess up the column counting.
  390     # The log file warns the user if there are any such tabs.
  391 
  392     my ( $self, $rline_hash ) = @_;
  393 
  394     my $level                     = $rline_hash->{level};
  395     my $level_end                 = $rline_hash->{level_end};
  396     my $level_adj                 = $rline_hash->{level_adj};
  397     my $indentation               = $rline_hash->{indentation};
  398     my $list_seqno                = $rline_hash->{list_seqno};
  399     my $outdent_long_lines        = $rline_hash->{outdent_long_lines};
  400     my $is_terminal_ternary       = $rline_hash->{is_terminal_ternary};
  401     my $rvertical_tightness_flags = $rline_hash->{rvertical_tightness_flags};
  402     my $level_jump                = $rline_hash->{level_jump};
  403     my $rfields                   = $rline_hash->{rfields};
  404     my $rtokens                   = $rline_hash->{rtokens};
  405     my $rpatterns                 = $rline_hash->{rpatterns};
  406     my $rfield_lengths            = $rline_hash->{rfield_lengths};
  407     my $terminal_block_type       = $rline_hash->{terminal_block_type};
  408     my $batch_count               = $rline_hash->{batch_count};
  409     my $break_alignment_before    = $rline_hash->{break_alignment_before};
  410     my $break_alignment_after     = $rline_hash->{break_alignment_after};
  411     my $Kend                      = $rline_hash->{Kend};
  412     my $ci_level                  = $rline_hash->{ci_level};
  413 
  414     # The index '$Kend' is a value which passed along with the line text to sub
  415     # 'write_code_line' for a convergence check.
  416 
  417     # number of fields is $jmax
  418     # number of tokens between fields is $jmax-1
  419     my $jmax = @{$rfields} - 1;
  420 
  421     my $leading_space_count = get_spaces($indentation);
  422 
  423     # set outdented flag to be sure we either align within statements or
  424     # across statement boundaries, but not both.
  425     my $is_outdented =
  426       $self->[_last_leading_space_count_] > $leading_space_count;
  427     $self->[_last_leading_space_count_] = $leading_space_count;
  428 
  429     # Identify a hanging side comment.  Hanging side comments have an empty
  430     # initial field.
  431     my $is_hanging_side_comment =
  432       ( $jmax == 1 && $rtokens->[0] eq '#' && $rfields->[0] =~ /^\s*$/ );
  433 
  434     # Undo outdented flag for a hanging side comment
  435     $is_outdented = 0 if $is_hanging_side_comment;
  436 
  437     # Identify a block comment.
  438     my $is_block_comment = $jmax == 0 && substr( $rfields->[0], 0, 1 ) eq '#';
  439 
  440     # Block comment .. update count
  441     if ($is_block_comment) {
  442         $self->[_consecutive_block_comments_]++;
  443     }
  444 
  445     # Not a block comment ..
  446     # Forget side comment column if we saw 2 or more block comments,
  447     # and reset the count
  448     else {
  449 
  450         if ( $self->[_consecutive_block_comments_] > 1 ) {
  451             $self->forget_side_comment();
  452         }
  453         $self->[_consecutive_block_comments_] = 0;
  454     }
  455 
  456     # Reset side comment location if we are entering a new block from level 0.
  457     # This is intended to keep them from drifting too far to the right.
  458     if ( $terminal_block_type && $level_adj == 0 && $level_end > $level ) {
  459         $self->forget_side_comment();
  460     }
  461 
  462     my $group_level = $self->[_group_level_];
  463 
  464     DEBUG_VALIGN && do {
  465         my $nlines = $self->group_line_count();
  466         print STDOUT
  467 "Entering valign_input: lines=$nlines new #fields= $jmax, leading_count=$leading_space_count, level_jump=$level_jump, level=$level, group_level=$group_level, level_jump=$level_jump\n";
  468     };
  469 
  470     # Validate cached line if necessary: If we can produce a container
  471     # with just 2 lines total by combining an existing cached opening
  472     # token with the closing token to follow, then we will mark both
  473     # cached flags as valid.
  474     my $cached_line_type = get_cached_line_type();
  475     if ($cached_line_type) {
  476         my $cached_line_flag = get_cached_line_flag();
  477         if ($rvertical_tightness_flags) {
  478             my $cached_seqno = get_cached_seqno();
  479             if (   $cached_seqno
  480                 && $self->group_line_count() <= 1
  481                 && $rvertical_tightness_flags->[2]
  482                 && $rvertical_tightness_flags->[2] == $cached_seqno )
  483             {
  484                 $rvertical_tightness_flags->[3] ||= 1;
  485                 set_cached_line_valid(1);
  486             }
  487         }
  488 
  489         # do not join an opening block brace with an unbalanced line
  490         # unless requested with a flag value of 2
  491         if (   $cached_line_type == 3
  492             && !$self->group_line_count()
  493             && $cached_line_flag < 2
  494             && $level_jump != 0 )
  495         {
  496             set_cached_line_valid(0);
  497         }
  498     }
  499 
  500     # shouldn't happen:
  501     if ( $level < 0 ) { $level = 0 }
  502 
  503     # do not align code across indentation level changes
  504     # or if vertical alignment is turned off for debugging
  505     if ( $level != $group_level || $is_outdented || !$self->[_rOpts_valign_] ) {
  506 
  507         $self->_flush_group_lines( $level - $group_level );
  508 
  509         $group_level = $level;
  510         $self->[_group_level_] = $group_level;
  511 
  512         # wait until after the above flush to get the leading space
  513         # count because it may have been changed if the -icp flag is in
  514         # effect
  515         $leading_space_count = get_spaces($indentation);
  516 
  517     }
  518 
  519     # --------------------------------------------------------------------
  520     # Collect outdentable block COMMENTS
  521     # --------------------------------------------------------------------
  522     my $is_blank_line = "";
  523     if ( $self->[_group_type_] eq 'COMMENT' ) {
  524         if (
  525             (
  526                    $is_block_comment
  527                 && $outdent_long_lines
  528                 && $leading_space_count ==
  529                 $self->[_comment_leading_space_count_]
  530             )
  531             || $is_blank_line
  532           )
  533         {
  534 
  535             # Note that for a comment group we are not storing a line
  536             # but rather just the text and its length.
  537             $self->push_group_line(
  538                 [ $rfields->[0], $rfield_lengths->[0], $Kend ] );
  539             return;
  540         }
  541         else {
  542             $self->_flush_group_lines();
  543         }
  544     }
  545 
  546     my $rgroup_lines = $self->[_rgroup_lines_];
  547     if ( $break_alignment_before && @{$rgroup_lines} ) {
  548         $rgroup_lines->[-1]->set_end_group(1);
  549     }
  550 
  551     # --------------------------------------------------------------------
  552     # add dummy fields for terminal ternary
  553     # --------------------------------------------------------------------
  554     my $j_terminal_match;
  555 
  556     if ( $is_terminal_ternary && @{$rgroup_lines} ) {
  557         $j_terminal_match =
  558           fix_terminal_ternary( $rgroup_lines->[-1], $rfields, $rtokens,
  559             $rpatterns, $rfield_lengths, $group_level, );
  560         $jmax = @{$rfields} - 1;
  561     }
  562 
  563     # --------------------------------------------------------------------
  564     # add dummy fields for else statement
  565     # --------------------------------------------------------------------
  566 
  567     # Note the trailing space after 'else' here. If there were no space between
  568     # the else and the next '{' then we would not be able to do vertical
  569     # alignment of the '{'.
  570     if (   $rfields->[0] eq 'else '
  571         && @{$rgroup_lines}
  572         && $level_jump == 0 )
  573     {
  574 
  575         $j_terminal_match =
  576           fix_terminal_else( $rgroup_lines->[-1], $rfields, $rtokens,
  577             $rpatterns, $rfield_lengths );
  578         $jmax = @{$rfields} - 1;
  579     }
  580 
  581     # --------------------------------------------------------------------
  582     # Handle simple line of code with no fields to match.
  583     # --------------------------------------------------------------------
  584     if ( $jmax <= 0 ) {
  585         $self->[_zero_count_]++;
  586 
  587         if ( @{$rgroup_lines}
  588             && !get_recoverable_spaces( $rgroup_lines->[0]->get_indentation() )
  589           )
  590         {
  591 
  592             # flush the current group if it has some aligned columns..
  593             # or we haven't seen a comment lately
  594             if (   $rgroup_lines->[0]->get_jmax() > 1
  595                 || $self->[_zero_count_] > 3 )
  596             {
  597                 $self->_flush_group_lines();
  598             }
  599         }
  600 
  601         # start new COMMENT group if this comment may be outdented
  602         if (   $is_block_comment
  603             && $outdent_long_lines
  604             && !$self->group_line_count() )
  605         {
  606             $self->[_group_type_]                  = 'COMMENT';
  607             $self->[_comment_leading_space_count_] = $leading_space_count;
  608             $self->push_group_line(
  609                 [ $rfields->[0], $rfield_lengths->[0], $Kend ] );
  610             return;
  611         }
  612 
  613         # just write this line directly if no current group, no side comment,
  614         # and no space recovery is needed.
  615         if (   !$self->group_line_count()
  616             && !get_recoverable_spaces($indentation) )
  617         {
  618 
  619             $self->valign_output_step_B(
  620                 {
  621                     leading_space_count       => $leading_space_count,
  622                     line                      => $rfields->[0],
  623                     line_length               => $rfield_lengths->[0],
  624                     side_comment_length       => 0,
  625                     outdent_long_lines        => $outdent_long_lines,
  626                     rvertical_tightness_flags => $rvertical_tightness_flags,
  627                     level                     => $level,
  628                     level_end                 => $level_end,
  629                     Kend                      => $Kend,
  630                 }
  631             );
  632 
  633             return;
  634         }
  635     }
  636     else {
  637         $self->[_zero_count_] = 0;
  638     }
  639 
  640     my $maximum_line_length_for_level =
  641       $self->maximum_line_length_for_level($level);
  642 
  643     # --------------------------------------------------------------------
  644     # It simplifies things to create a zero length side comment
  645     # if none exists.
  646     # --------------------------------------------------------------------
  647     $self->make_side_comment( $rtokens, $rfields, $rpatterns, $rfield_lengths );
  648     $jmax = @{$rfields} - 1;
  649 
  650     # --------------------------------------------------------------------
  651     # create an object to hold this line
  652     # --------------------------------------------------------------------
  653     my $new_line = Perl::Tidy::VerticalAligner::Line->new(
  654         {
  655             jmax                      => $jmax,
  656             rtokens                   => $rtokens,
  657             rfields                   => $rfields,
  658             rpatterns                 => $rpatterns,
  659             rfield_lengths            => $rfield_lengths,
  660             indentation               => $indentation,
  661             leading_space_count       => $leading_space_count,
  662             outdent_long_lines        => $outdent_long_lines,
  663             list_seqno                => $list_seqno,
  664             list_type                 => "",
  665             is_hanging_side_comment   => $is_hanging_side_comment,
  666             maximum_line_length       => $maximum_line_length_for_level,
  667             rvertical_tightness_flags => $rvertical_tightness_flags,
  668             is_terminal_ternary       => $is_terminal_ternary,
  669             j_terminal_match          => $j_terminal_match,
  670             end_group                 => $break_alignment_after,
  671             Kend                      => $Kend,
  672             ci_level                  => $ci_level,
  673             level                     => $level,
  674             level_end                 => $level_end,
  675             imax_pair                 => -1,
  676         }
  677     );
  678 
  679     # --------------------------------------------------------------------
  680     # Decide if this is a simple list of items.
  681     # We use this to be less restrictive in deciding what to align.
  682     # --------------------------------------------------------------------
  683     decide_if_list($new_line) if ($list_seqno);
  684 
  685     # --------------------------------------------------------------------
  686     # Append this line to the current group (or start new group)
  687     # --------------------------------------------------------------------
  688 
  689     $self->push_group_line($new_line);
  690 
  691     # output this group if it ends in a terminal else or ternary line
  692     if ( defined($j_terminal_match) ) {
  693         $self->_flush_group_lines();
  694     }
  695 
  696     # Force break after jump to lower level
  697     if ( $level_jump < 0 ) {
  698         $self->_flush_group_lines($level_jump);
  699     }
  700 
  701     # --------------------------------------------------------------------
  702     # Some old debugging stuff
  703     # --------------------------------------------------------------------
  704     DEBUG_VALIGN && do {
  705         print STDOUT "exiting valign_input fields:";
  706         dump_array( @{$rfields} );
  707         print STDOUT "exiting valign_input tokens:";
  708         dump_array( @{$rtokens} );
  709         print STDOUT "exiting valign_input patterns:";
  710         dump_array( @{$rpatterns} );
  711     };
  712 
  713     return;
  714 }
  715 
  716 sub join_hanging_comment {
  717 
  718     # Add dummy fields to a hanging side comment to make it look
  719     # like the first line in its potential group.  This simplifies
  720     # the coding.
  721     my ( $new_line, $old_line ) = @_;
  722 
  723     my $jmax = $new_line->get_jmax();
  724 
  725     # must be 2 fields
  726     return 0 unless $jmax == 1;
  727     my $rtokens = $new_line->get_rtokens();
  728 
  729     # the second field must be a comment
  730     return 0 unless $rtokens->[0] eq '#';
  731     my $rfields = $new_line->get_rfields();
  732 
  733     # the first field must be empty
  734     return 0 unless $rfields->[0] =~ /^\s*$/;
  735 
  736     # the current line must have fewer fields
  737     my $maximum_field_index = $old_line->get_jmax();
  738     return 0
  739       unless $maximum_field_index > $jmax;
  740 
  741     # looks ok..
  742     my $rpatterns      = $new_line->get_rpatterns();
  743     my $rfield_lengths = $new_line->get_rfield_lengths();
  744 
  745     $new_line->set_is_hanging_side_comment(1);
  746     $jmax = $maximum_field_index;
  747     $new_line->set_jmax($jmax);
  748     $rfields->[$jmax]         = $rfields->[1];
  749     $rfield_lengths->[$jmax]  = $rfield_lengths->[1];
  750     $rtokens->[ $jmax - 1 ]   = $rtokens->[0];
  751     $rpatterns->[ $jmax - 1 ] = $rpatterns->[0];
  752     foreach my $j ( 1 .. $jmax - 1 ) {
  753         $rfields->[$j]         = '';
  754         $rfield_lengths->[$j]  = 0;
  755         $rtokens->[ $j - 1 ]   = "";
  756         $rpatterns->[ $j - 1 ] = "";
  757     }
  758     return 1;
  759 }
  760 
  761 sub make_side_comment {
  762 
  763     # create an empty side comment if none exists
  764 
  765     my ( $self, $rtokens, $rfields, $rpatterns, $rfield_lengths ) = @_;
  766 
  767     my $jmax = @{$rfields} - 1;
  768 
  769     # if line does not have a side comment...
  770     if ( ( $jmax == 0 ) || ( $rtokens->[ $jmax - 1 ] ne '#' ) ) {
  771         $jmax += 1;
  772         $rtokens->[ $jmax - 1 ]  = '#';
  773         $rfields->[$jmax]        = '';
  774         $rfield_lengths->[$jmax] = 0;
  775         $rpatterns->[$jmax]      = '#';
  776     }
  777     return;
  778 }
  779 
  780 {    ## closure for sub decide_if_list
  781 
  782     my %is_comma_token;
  783 
  784     BEGIN {
  785 
  786         my @q = qw( => );
  787         push @q, ',';
  788         @is_comma_token{@q} = (1) x scalar(@q);
  789     }
  790 
  791     sub decide_if_list {
  792 
  793         my $line = shift;
  794 
  795         # A list will be taken to be a line with a forced break in which all
  796         # of the field separators are commas or comma-arrows (except for the
  797         # trailing #)
  798 
  799         my $rtokens    = $line->get_rtokens();
  800         my $test_token = $rtokens->[0];
  801         my ( $raw_tok, $lev, $tag, $tok_count ) =
  802           decode_alignment_token($test_token);
  803         if ( $is_comma_token{$raw_tok} ) {
  804             my $list_type = $test_token;
  805             my $jmax      = $line->get_jmax();
  806 
  807             foreach ( 1 .. $jmax - 2 ) {
  808                 ( $raw_tok, $lev, $tag, $tok_count ) =
  809                   decode_alignment_token( $rtokens->[$_] );
  810                 if ( !$is_comma_token{$raw_tok} ) {
  811                     $list_type = "";
  812                     last;
  813                 }
  814             }
  815             $line->set_list_type($list_type);
  816         }
  817         return;
  818     }
  819 }
  820 
  821 sub fix_terminal_ternary {
  822 
  823     # Add empty fields as necessary to align a ternary term
  824     # like this:
  825     #
  826     #  my $leapyear =
  827     #      $year % 4   ? 0
  828     #    : $year % 100 ? 1
  829     #    : $year % 400 ? 0
  830     #    :               1;
  831     #
  832     # returns the index of the terminal question token, if any
  833 
  834     my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths,
  835         $group_level )
  836       = @_;
  837 
  838     return unless ($old_line);
  839     use constant EXPLAIN_TERNARY => 0;
  840 
  841     my $jmax        = @{$rfields} - 1;
  842     my $rfields_old = $old_line->get_rfields();
  843 
  844     my $rpatterns_old       = $old_line->get_rpatterns();
  845     my $rtokens_old         = $old_line->get_rtokens();
  846     my $maximum_field_index = $old_line->get_jmax();
  847 
  848     # look for the question mark after the :
  849     my ($jquestion);
  850     my $depth_question;
  851     my $pad        = "";
  852     my $pad_length = 0;
  853     foreach my $j ( 0 .. $maximum_field_index - 1 ) {
  854         my $tok = $rtokens_old->[$j];
  855         my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
  856         if ( $raw_tok eq '?' ) {
  857             $depth_question = $lev;
  858 
  859             # depth must be correct
  860             next unless ( $depth_question eq $group_level );
  861 
  862             $jquestion = $j;
  863             if ( $rfields_old->[ $j + 1 ] =~ /^(\?\s*)/ ) {
  864                 $pad_length = length($1);
  865                 $pad        = " " x $pad_length;
  866             }
  867             else {
  868                 return;    # shouldn't happen
  869             }
  870             last;
  871         }
  872     }
  873     return unless ( defined($jquestion) );    # shouldn't happen
  874 
  875     # Now splice the tokens and patterns of the previous line
  876     # into the else line to insure a match.  Add empty fields
  877     # as necessary.
  878     my $jadd = $jquestion;
  879 
  880     # Work on copies of the actual arrays in case we have
  881     # to return due to an error
  882     my @fields        = @{$rfields};
  883     my @patterns      = @{$rpatterns};
  884     my @tokens        = @{$rtokens};
  885     my @field_lengths = @{$rfield_lengths};
  886 
  887     EXPLAIN_TERNARY && do {
  888         local $" = '><';
  889         print STDOUT "CURRENT FIELDS=<@{$rfields_old}>\n";
  890         print STDOUT "CURRENT TOKENS=<@{$rtokens_old}>\n";
  891         print STDOUT "CURRENT PATTERNS=<@{$rpatterns_old}>\n";
  892         print STDOUT "UNMODIFIED FIELDS=<@{$rfields}>\n";
  893         print STDOUT "UNMODIFIED TOKENS=<@{$rtokens}>\n";
  894         print STDOUT "UNMODIFIED PATTERNS=<@{$rpatterns}>\n";
  895     };
  896 
  897     # handle cases of leading colon on this line
  898     if ( $fields[0] =~ /^(:\s*)(.*)$/ ) {
  899 
  900         my ( $colon, $therest ) = ( $1, $2 );
  901 
  902         # Handle sub-case of first field with leading colon plus additional code
  903         # This is the usual situation as at the '1' below:
  904         #  ...
  905         #  : $year % 400 ? 0
  906         #  :               1;
  907         if ($therest) {
  908 
  909             # Split the first field after the leading colon and insert padding.
  910             # Note that this padding will remain even if the terminal value goes
  911             # out on a separate line.  This does not seem to look to bad, so no
  912             # mechanism has been included to undo it.
  913             my $field1        = shift @fields;
  914             my $field_length1 = shift @field_lengths;
  915             my $len_colon     = length($colon);
  916             unshift @fields, ( $colon, $pad . $therest );
  917             unshift @field_lengths,
  918               ( $len_colon, $pad_length + $field_length1 - $len_colon );
  919 
  920             # change the leading pattern from : to ?
  921             return unless ( $patterns[0] =~ s/^\:/?/ );
  922 
  923             # install leading tokens and patterns of existing line
  924             unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
  925             unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
  926 
  927             # insert appropriate number of empty fields
  928             splice( @fields,        1, 0, ('') x $jadd ) if $jadd;
  929             splice( @field_lengths, 1, 0, (0) x $jadd )  if $jadd;
  930         }
  931 
  932         # handle sub-case of first field just equal to leading colon.
  933         # This can happen for example in the example below where
  934         # the leading '(' would create a new alignment token
  935         # : ( $name =~ /[]}]$/ ) ? ( $mname = $name )
  936         # :                        ( $mname = $name . '->' );
  937         else {
  938 
  939             return unless ( $jmax > 0 && $tokens[0] ne '#' ); # shouldn't happen
  940 
  941             # prepend a leading ? onto the second pattern
  942             $patterns[1] = "?b" . $patterns[1];
  943 
  944             # pad the second field
  945             $fields[1]        = $pad . $fields[1];
  946             $field_lengths[1] = $pad_length + $field_lengths[1];
  947 
  948             # install leading tokens and patterns of existing line, replacing
  949             # leading token and inserting appropriate number of empty fields
  950             splice( @tokens,   0, 1, @{$rtokens_old}[ 0 .. $jquestion ] );
  951             splice( @patterns, 1, 0, @{$rpatterns_old}[ 1 .. $jquestion ] );
  952             splice( @fields,        1, 0, ('') x $jadd ) if $jadd;
  953             splice( @field_lengths, 1, 0, (0) x $jadd )  if $jadd;
  954         }
  955     }
  956 
  957     # Handle case of no leading colon on this line.  This will
  958     # be the case when -wba=':' is used.  For example,
  959     #  $year % 400 ? 0 :
  960     #                1;
  961     else {
  962 
  963         # install leading tokens and patterns of existing line
  964         $patterns[0] = '?' . 'b' . $patterns[0];
  965         unshift( @tokens,   @{$rtokens_old}[ 0 .. $jquestion ] );
  966         unshift( @patterns, @{$rpatterns_old}[ 0 .. $jquestion ] );
  967 
  968         # insert appropriate number of empty fields
  969         $jadd             = $jquestion + 1;
  970         $fields[0]        = $pad . $fields[0];
  971         $field_lengths[0] = $pad_length + $field_lengths[0];
  972         splice( @fields,        0, 0, ('') x $jadd ) if $jadd;
  973         splice( @field_lengths, 0, 0, (0) x $jadd )  if $jadd;
  974     }
  975 
  976     EXPLAIN_TERNARY && do {
  977         local $" = '><';
  978         print STDOUT "MODIFIED TOKENS=<@tokens>\n";
  979         print STDOUT "MODIFIED PATTERNS=<@patterns>\n";
  980         print STDOUT "MODIFIED FIELDS=<@fields>\n";
  981     };
  982 
  983     # all ok .. update the arrays
  984     @{$rfields}        = @fields;
  985     @{$rtokens}        = @tokens;
  986     @{$rpatterns}      = @patterns;
  987     @{$rfield_lengths} = @field_lengths;
  988 
  989     # force a flush after this line
  990     return $jquestion;
  991 }
  992 
  993 sub fix_terminal_else {
  994 
  995     # Add empty fields as necessary to align a balanced terminal
  996     # else block to a previous if/elsif/unless block,
  997     # like this:
  998     #
  999     #  if   ( 1 || $x ) { print "ok 13\n"; }
 1000     #  else             { print "not ok 13\n"; }
 1001     #
 1002     # returns a positive value if the else block should be indented
 1003     #
 1004     my ( $old_line, $rfields, $rtokens, $rpatterns, $rfield_lengths ) = @_;
 1005 
 1006     return unless ($old_line);
 1007     my $jmax = @{$rfields} - 1;
 1008     return unless ( $jmax > 0 );
 1009 
 1010     # check for balanced else block following if/elsif/unless
 1011     my $rfields_old = $old_line->get_rfields();
 1012 
 1013     # TBD: add handling for 'case'
 1014     return unless ( $rfields_old->[0] =~ /^(if|elsif|unless)\s*$/ );
 1015 
 1016     # look for the opening brace after the else, and extract the depth
 1017     my $tok_brace = $rtokens->[0];
 1018     my $depth_brace;
 1019     if ( $tok_brace =~ /^\{(\d+)/ ) { $depth_brace = $1; }
 1020 
 1021     # probably:  "else # side_comment"
 1022     else { return }
 1023 
 1024     my $rpatterns_old       = $old_line->get_rpatterns();
 1025     my $rtokens_old         = $old_line->get_rtokens();
 1026     my $maximum_field_index = $old_line->get_jmax();
 1027 
 1028     # be sure the previous if/elsif is followed by an opening paren
 1029     my $jparen    = 0;
 1030     my $tok_paren = '(' . $depth_brace;
 1031     my $tok_test  = $rtokens_old->[$jparen];
 1032     return unless ( $tok_test eq $tok_paren );    # shouldn't happen
 1033 
 1034     # Now find the opening block brace
 1035     my ($jbrace);
 1036     foreach my $j ( 1 .. $maximum_field_index - 1 ) {
 1037         my $tok = $rtokens_old->[$j];
 1038         if ( $tok eq $tok_brace ) {
 1039             $jbrace = $j;
 1040             last;
 1041         }
 1042     }
 1043     return unless ( defined($jbrace) );           # shouldn't happen
 1044 
 1045     # Now splice the tokens and patterns of the previous line
 1046     # into the else line to insure a match.  Add empty fields
 1047     # as necessary.
 1048     my $jadd = $jbrace - $jparen;
 1049     splice( @{$rtokens},   0, 0, @{$rtokens_old}[ $jparen .. $jbrace - 1 ] );
 1050     splice( @{$rpatterns}, 1, 0, @{$rpatterns_old}[ $jparen + 1 .. $jbrace ] );
 1051     splice( @{$rfields},        1, 0, ('') x $jadd );
 1052     splice( @{$rfield_lengths}, 1, 0, (0) x $jadd );
 1053 
 1054     # force a flush after this line if it does not follow a case
 1055     if   ( $rfields_old->[0] =~ /^case\s*$/ ) { return }
 1056     else                                      { return $jbrace }
 1057 }
 1058 
 1059 my %is_closing_block_type;
 1060 
 1061 BEGIN {
 1062     @_ = qw< } ] >;
 1063     @is_closing_block_type{@_} = (1) x scalar(@_);
 1064 }
 1065 
 1066 sub check_match {
 1067 
 1068     # See if the current line matches the current vertical alignment group.
 1069 
 1070     my ( $self, $new_line, $base_line, $prev_line ) = @_;
 1071 
 1072     # Given:
 1073     #  $new_line  = the line being considered for group inclusion
 1074     #  $base_line = the first line of the current group
 1075     #  $prev_line = the line just before $new_line
 1076 
 1077     # returns a flag and a value as follows:
 1078     #    return (0, $imax_align)     if the line does not match
 1079     #    return (1, $imax_align)     if the line matches but does not fit
 1080     #    return (2, $imax_align)     if the line matches and fits
 1081 
 1082     # Returns '$imax_align' which is the index of the maximum matching token.
 1083     # It will be used in the subsequent left-to-right sweep to align as many
 1084     # tokens as possible for lines which partially match.
 1085     my $imax_align = -1;
 1086 
 1087     # variable $GoToMsg explains reason for no match, for debugging
 1088     my $GoToMsg = "";
 1089     use constant EXPLAIN_CHECK_MATCH => 0;
 1090 
 1091     # This is a flag for testing alignment by sub sweep_left_to_right only.
 1092     # This test can help find problems with the alignment logic.
 1093     # This flag should normally be zero.
 1094     use constant TEST_SWEEP_ONLY => 0;
 1095 
 1096     my $jmax                = $new_line->get_jmax();
 1097     my $maximum_field_index = $base_line->get_jmax();
 1098 
 1099     my $jlimit = $jmax - 2;
 1100     if ( $jmax > $maximum_field_index ) {
 1101         $jlimit = $maximum_field_index - 2;
 1102     }
 1103 
 1104     if ( $new_line->get_is_hanging_side_comment() ) {
 1105 
 1106         # HSC's can join the group if they fit
 1107     }
 1108 
 1109     # Everything else
 1110     else {
 1111 
 1112         # A group with hanging side comments ends with the first non hanging
 1113         # side comment.
 1114         if ( $base_line->get_is_hanging_side_comment() ) {
 1115             $GoToMsg = "end of hanging side comments";
 1116             goto NO_MATCH;
 1117         }
 1118 
 1119         # The number of tokens that this line shares with the previous line
 1120         # has been stored with the previous line.  This value was calculated
 1121         # and stored by sub 'match_line_pair'.
 1122         $imax_align = $prev_line->get_imax_pair();
 1123 
 1124         if ( $imax_align != $jlimit ) {
 1125             $GoToMsg = "Not all tokens match: $imax_align != $jlimit\n";
 1126             goto NO_MATCH;
 1127         }
 1128 
 1129     }
 1130 
 1131     # The tokens match, but the lines must have identical number of
 1132     # tokens to join the group.
 1133     if ( $maximum_field_index != $jmax ) {
 1134         $GoToMsg = "token count differs";
 1135         goto NO_MATCH;
 1136     }
 1137 
 1138     # The tokens match. Now See if there is space for this line in the
 1139     # current group.
 1140     if ( $self->check_fit( $new_line, $base_line ) && !TEST_SWEEP_ONLY ) {
 1141 
 1142         EXPLAIN_CHECK_MATCH
 1143           && print "match and fit, imax_align=$imax_align, jmax=$jmax\n";
 1144         return ( 2, $jlimit );
 1145     }
 1146     else {
 1147 
 1148         EXPLAIN_CHECK_MATCH
 1149           && print "match but no fit, imax_align=$imax_align, jmax=$jmax\n";
 1150         return ( 1, $jlimit );
 1151     }
 1152 
 1153   NO_MATCH:
 1154 
 1155     EXPLAIN_CHECK_MATCH
 1156       && print
 1157       "no match because $GoToMsg, max match index =i $imax_align, jmax=$jmax\n";
 1158 
 1159     return ( 0, $imax_align );
 1160 }
 1161 
 1162 sub check_fit {
 1163 
 1164     my ( $self, $new_line, $old_line ) = @_;
 1165 
 1166     # The new line has alignments identical to the current group. Now we have
 1167     # to fit the new line into the group without causing a field to exceed the
 1168     # line length limit.
 1169     #   return true if successful
 1170     #   return false if not successful
 1171 
 1172     my $jmax                = $new_line->get_jmax();
 1173     my $leading_space_count = $new_line->get_leading_space_count();
 1174     my $rfield_lengths      = $new_line->get_rfield_lengths();
 1175     my $padding_available   = $old_line->get_available_space_on_right();
 1176     my $jmax_old            = $old_line->get_jmax();
 1177 
 1178     # Safety check ... only lines with equal array sizes should arrive here
 1179     # from sub check_match.  So if this error occurs, look at recent changes in
 1180     # sub check_match.  It is only supposed to check the fit of lines with
 1181     # identical numbers of alignment tokens.
 1182     if ( $jmax_old ne $jmax ) {
 1183 
 1184         $self->warning(<<EOM);
 1185 Program bug detected in Perl::Tidy::VerticalAligner sub check_fit 
 1186 unexpected difference in array lengths: $jmax != $jmax_old
 1187 EOM
 1188         return;
 1189     }
 1190 
 1191     # Save current columns in case this line does not fit.
 1192     my @alignments = $old_line->get_alignments();
 1193     foreach my $alignment (@alignments) {
 1194         $alignment->save_column();
 1195     }
 1196 
 1197     my $is_hanging_side_comment = $new_line->get_is_hanging_side_comment();
 1198 
 1199     # Loop over all alignments ...
 1200     my $maximum_field_index = $old_line->get_jmax();
 1201     for my $j ( 0 .. $jmax ) {
 1202 
 1203         my $pad = $rfield_lengths->[$j] - $old_line->current_field_width($j);
 1204 
 1205         if ( $j == 0 ) {
 1206             $pad += $leading_space_count;
 1207         }
 1208 
 1209         # Keep going if this field does not need any space.
 1210         next if $pad < 0;
 1211 
 1212         # See if it needs too much space.
 1213         if ( $pad > $padding_available ) {
 1214 
 1215             ################################################
 1216             # Line does not fit -- revert to starting state
 1217             ################################################
 1218             foreach my $alignment (@alignments) {
 1219                 $alignment->restore_column();
 1220             }
 1221             return;
 1222         }
 1223 
 1224         # make room for this field
 1225         $old_line->increase_field_width( $j, $pad );
 1226         $padding_available -= $pad;
 1227     }
 1228 
 1229     ######################################
 1230     # The line fits, the match is accepted
 1231     ######################################
 1232     return 1;
 1233 
 1234 }
 1235 
 1236 sub install_new_alignments {
 1237 
 1238     my ($new_line) = @_;
 1239 
 1240     my $jmax           = $new_line->get_jmax();
 1241     my $rfield_lengths = $new_line->get_rfield_lengths();
 1242     my $col            = $new_line->get_leading_space_count();
 1243 
 1244     for my $j ( 0 .. $jmax ) {
 1245         $col += $rfield_lengths->[$j];
 1246 
 1247         # create initial alignments for the new group
 1248         my $alignment =
 1249           Perl::Tidy::VerticalAligner::Alignment->new( { column => $col } );
 1250         $new_line->set_alignment( $j, $alignment );
 1251     }
 1252     return;
 1253 }
 1254 
 1255 sub copy_old_alignments {
 1256     my ( $new_line, $old_line ) = @_;
 1257     my @new_alignments = $old_line->get_alignments();
 1258     $new_line->set_alignments(@new_alignments);
 1259     return;
 1260 }
 1261 
 1262 sub dump_array {
 1263 
 1264     # debug routine to dump array contents
 1265     local $" = ')(';
 1266     print STDOUT "(@_)\n";
 1267     return;
 1268 }
 1269 
 1270 sub level_change {
 1271 
 1272     # compute decrease in level when we remove $diff spaces from the
 1273     # leading spaces
 1274     my ( $self, $leading_space_count, $diff, $level ) = @_;
 1275 
 1276     my $rOpts_indent_columns = $self->[_rOpts_indent_columns_];
 1277     if ($rOpts_indent_columns) {
 1278         my $olev =
 1279           int( ( $leading_space_count + $diff ) / $rOpts_indent_columns );
 1280         my $nlev = int( $leading_space_count / $rOpts_indent_columns );
 1281         $level -= ( $olev - $nlev );
 1282         if ( $level < 0 ) { $level = 0 }
 1283     }
 1284     return $level;
 1285 }
 1286 
 1287 ###############################################
 1288 # CODE SECTION 4: Code to process comment lines
 1289 ###############################################
 1290 
 1291 sub _flush_comment_lines {
 1292 
 1293     # Output a group consisting of COMMENT lines
 1294 
 1295     my ($self) = @_;
 1296     my $rgroup_lines = $self->[_rgroup_lines_];
 1297     return unless ( @{$rgroup_lines} );
 1298     my $group_level         = $self->[_group_level_];
 1299     my $leading_space_count = $self->[_comment_leading_space_count_];
 1300     my $leading_string =
 1301       $self->get_leading_string( $leading_space_count, $group_level );
 1302 
 1303     # look for excessively long lines
 1304     my $max_excess = 0;
 1305     foreach my $item ( @{$rgroup_lines} ) {
 1306         my ( $str, $str_len ) = @{$item};
 1307         my $excess =
 1308           $str_len +
 1309           $leading_space_count -
 1310           $self->maximum_line_length_for_level($group_level);
 1311         if ( $excess > $max_excess ) {
 1312             $max_excess = $excess;
 1313         }
 1314     }
 1315 
 1316     # zero leading space count if any lines are too long
 1317     if ( $max_excess > 0 ) {
 1318         $leading_space_count -= $max_excess;
 1319         if ( $leading_space_count < 0 ) { $leading_space_count = 0 }
 1320         my $file_writer_object = $self->[_file_writer_object_];
 1321         my $last_outdented_line_at =
 1322           $file_writer_object->get_output_line_number();
 1323         $self->[_last_outdented_line_at_] = $last_outdented_line_at;
 1324         my $outdented_line_count = $self->[_outdented_line_count_];
 1325         unless ($outdented_line_count) {
 1326             $self->[_first_outdented_line_at_] = $last_outdented_line_at;
 1327         }
 1328         my $nlines = @{$rgroup_lines};
 1329         $outdented_line_count += $nlines;
 1330         $self->[_outdented_line_count_] = $outdented_line_count;
 1331     }
 1332 
 1333     # write the lines
 1334     my $outdent_long_lines = 0;
 1335 
 1336     foreach my $item ( @{$rgroup_lines} ) {
 1337         my ( $str, $str_len, $Kend ) = @{$item};
 1338         $self->valign_output_step_B(
 1339             {
 1340                 leading_space_count       => $leading_space_count,
 1341                 line                      => $str,
 1342                 line_length               => $str_len,
 1343                 side_comment_length       => 0,
 1344                 outdent_long_lines        => $outdent_long_lines,
 1345                 rvertical_tightness_flags => "",
 1346                 level                     => $group_level,
 1347                 level_end                 => $group_level,
 1348                 Kend                      => $Kend,
 1349             }
 1350         );
 1351     }
 1352 
 1353     $self->initialize_for_new_group();
 1354     return;
 1355 }
 1356 
 1357 ######################################################
 1358 # CODE SECTION 5: Code to process groups of code lines
 1359 ######################################################
 1360 
 1361 sub _flush_group_lines {
 1362 
 1363     # This is the vertical aligner internal flush, which leaves the cache
 1364     # intact
 1365     my ( $self, $level_jump ) = @_;
 1366 
 1367     # $level_jump = $next_level-$group_level, if known
 1368     #             = undef if not known
 1369 
 1370     my $rgroup_lines = $self->[_rgroup_lines_];
 1371     return unless ( @{$rgroup_lines} );
 1372     my $group_type  = $self->[_group_type_];
 1373     my $group_level = $self->[_group_level_];
 1374 
 1375     # Debug
 1376     0 && do {
 1377         my ( $a, $b, $c ) = caller();
 1378         my $nlines = @{$rgroup_lines};
 1379         print STDOUT
 1380 "APPEND0: _flush_group_lines called from $a $b $c lines=$nlines, type=$group_type \n";
 1381     };
 1382 
 1383     ############################################
 1384     # Section 1: Handle a group of COMMENT lines
 1385     ############################################
 1386     if ( $group_type eq 'COMMENT' ) {
 1387         $self->_flush_comment_lines();
 1388         return;
 1389     }
 1390 
 1391     #########################################################################
 1392     # Section 2: Handle line(s) of CODE.  Most of the actual work of vertical
 1393     # aligning happens here in the following steps:
 1394     #########################################################################
 1395 
 1396     # STEP 1: Remove most unmatched tokens. They block good alignments.
 1397     my ( $max_lev_diff, $saw_side_comment ) =
 1398       delete_unmatched_tokens( $rgroup_lines, $group_level );
 1399 
 1400     # STEP 2: Sweep top to bottom, forming subgroups of lines with exactly
 1401     # matching common alignments.  The indexes of these subgroups are in the
 1402     # return variable.
 1403     my $rgroups = $self->sweep_top_down( $rgroup_lines, $group_level );
 1404 
 1405     # STEP 3: Sweep left to right through the lines, looking for leading
 1406     # alignment tokens shared by groups.
 1407     sweep_left_to_right( $rgroup_lines, $rgroups, $group_level )
 1408       if ( @{$rgroups} > 1 );
 1409 
 1410     # STEP 4: Move side comments to a common column if possible.
 1411     if ($saw_side_comment) {
 1412         $self->align_side_comments( $rgroup_lines, $rgroups );
 1413     }
 1414 
 1415     # STEP 5: For the -lp option, increase the indentation of lists
 1416     # to the desired amount, but do not exceed the line length limit.
 1417 
 1418     # We are allowed to shift a group of lines to the right if:
 1419     #  (1) its level is greater than the level of the previous group, and
 1420     #  (2) its level is greater than the level of the next line to be written.
 1421 
 1422     my $extra_indent_ok;
 1423     if ( $group_level > $self->[_last_level_written_] ) {
 1424 
 1425         # Use the level jump to next line to come, if given
 1426         if ( defined($level_jump) ) {
 1427             $extra_indent_ok = $level_jump < 0;
 1428         }
 1429 
 1430         # Otherwise, assume the next line has the level of the end of last line.
 1431         # This fixes case c008.
 1432         else {
 1433             my $level_end = $rgroup_lines->[-1]->get_level_end();
 1434             $extra_indent_ok = $group_level > $level_end;
 1435         }
 1436     }
 1437 
 1438     my $extra_leading_spaces =
 1439       $extra_indent_ok
 1440       ? get_extra_leading_spaces( $rgroup_lines, $rgroups )
 1441       : 0;
 1442 
 1443     # STEP 6: Output the lines.
 1444     # All lines in this batch have the same basic leading spacing:
 1445     my $group_leader_length = $rgroup_lines->[0]->get_leading_space_count();
 1446 
 1447     foreach my $line ( @{$rgroup_lines} ) {
 1448         $self->valign_output_step_A(
 1449             {
 1450                 line                 => $line,
 1451                 min_ci_gap           => 0,
 1452                 do_not_align         => 0,
 1453                 group_leader_length  => $group_leader_length,
 1454                 extra_leading_spaces => $extra_leading_spaces,
 1455                 level                => $group_level,
 1456             }
 1457         );
 1458     }
 1459 
 1460     $self->initialize_for_new_group();
 1461     return;
 1462 }
 1463 
 1464 {    ## closure for sub sweep_top_down
 1465 
 1466     my $rall_lines;         # all of the lines
 1467     my $grp_level;          # level of all lines
 1468     my $rgroups;            # describes the partition of lines we will make here
 1469     my $group_line_count;   # number of lines in current partition
 1470 
 1471     BEGIN { $rgroups = [] }
 1472 
 1473     sub initialize_for_new_rgroup {
 1474         $group_line_count = 0;
 1475         return;
 1476     }
 1477 
 1478     sub add_to_rgroup {
 1479 
 1480         my ($jend) = @_;
 1481         my $rline = $rall_lines->[$jend];
 1482 
 1483         my $jbeg = $jend;
 1484         if ( $group_line_count == 0 ) {
 1485             install_new_alignments($rline);
 1486         }
 1487         else {
 1488             my $rvals = pop @{$rgroups};
 1489             $jbeg = $rvals->[0];
 1490             copy_old_alignments( $rline, $rall_lines->[$jbeg] );
 1491         }
 1492         push @{$rgroups}, [ $jbeg, $jend, undef ];
 1493         $group_line_count++;
 1494         return;
 1495     }
 1496 
 1497     sub get_rgroup_jrange {
 1498 
 1499         return unless @{$rgroups};
 1500         return unless ( $group_line_count > 0 );
 1501         my ( $jbeg, $jend ) = @{ $rgroups->[-1] };
 1502         return ( $jbeg, $jend );
 1503     }
 1504 
 1505     sub end_rgroup {
 1506 
 1507         my ($imax_align) = @_;
 1508         return unless @{$rgroups};
 1509         return unless ( $group_line_count > 0 );
 1510 
 1511         my ( $jbeg, $jend ) = @{ pop @{$rgroups} };
 1512         push @{$rgroups}, [ $jbeg, $jend, $imax_align ];
 1513 
 1514         # Undo some alignments of poor two-line combinations.
 1515         # We had to wait until now to know the line count.
 1516         if ( $jend - $jbeg == 1 ) {
 1517             my $line_0 = $rall_lines->[$jbeg];
 1518             my $line_1 = $rall_lines->[$jend];
 1519 
 1520             my $imax_pair = $line_1->get_imax_pair();
 1521             if ( $imax_pair > $imax_align ) { $imax_align = $imax_pair }
 1522 
 1523             ## flag for possible future use:
 1524             ## my $is_isolated_pair = $imax_pair < 0
 1525             ##  && ( $jbeg == 0
 1526             ##    || $rall_lines->[ $jbeg - 1 ]->get_imax_pair() < 0 );
 1527 
 1528             my $imax_prev =
 1529               $jbeg > 0 ? $rall_lines->[ $jbeg - 1 ]->get_imax_pair() : -1;
 1530 
 1531             my ( $is_marginal, $imax_align_fix ) =
 1532               is_marginal_match( $line_0, $line_1, $grp_level, $imax_align,
 1533                 $imax_prev );
 1534             if ($is_marginal) {
 1535                 combine_fields( $line_0, $line_1, $imax_align_fix );
 1536             }
 1537         }
 1538 
 1539         initialize_for_new_rgroup();
 1540         return;
 1541     }
 1542 
 1543     sub block_penultimate_match {
 1544 
 1545         # emergency reset to prevent sweep_left_to_right from trying to match a
 1546         # failed terminal else match
 1547         return unless @{$rgroups} > 1;
 1548         $rgroups->[-2]->[2] = -1;
 1549         return;
 1550     }
 1551 
 1552     sub sweep_top_down {
 1553         my ( $self, $rlines, $group_level ) = @_;
 1554 
 1555         # Partition the set of lines into final alignment subgroups
 1556         # and store the alignments with the lines.
 1557 
 1558         # The alignment subgroups we are making here are groups of consecutive
 1559         # lines which have (1) identical alignment tokens and (2) do not
 1560         # exceed the allowable maximum line length.  A later sweep from
 1561         # left-to-right ('sweep_lr') will handle additional alignments.
 1562 
 1563         # transfer args to closure variables
 1564         $rall_lines = $rlines;
 1565         $grp_level  = $group_level;
 1566         $rgroups    = [];
 1567         initialize_for_new_rgroup();
 1568         return unless @{$rlines};    # shouldn't happen
 1569 
 1570         # Unset the _end_group flag for the last line if it it set because it
 1571         # is not needed and can causes problems for -lp formatting
 1572         $rall_lines->[-1]->set_end_group(0);
 1573 
 1574         # Loop over all lines ...
 1575         my $jline = -1;
 1576         foreach my $new_line ( @{$rall_lines} ) {
 1577             $jline++;
 1578 
 1579             # Start a new subgroup if necessary
 1580             if ( !$group_line_count ) {
 1581                 add_to_rgroup($jline);
 1582                 if ( $new_line->get_end_group() ) {
 1583                     end_rgroup(-1);
 1584                 }
 1585                 next;
 1586             }
 1587 
 1588             my $j_terminal_match = $new_line->get_j_terminal_match();
 1589             my ( $jbeg, $jend ) = get_rgroup_jrange();
 1590             if ( !defined($jbeg) ) {
 1591 
 1592                 # safety check, shouldn't happen
 1593                 $self->warning(<<EOM);
 1594 Program bug detected in Perl::Tidy::VerticalAligner sub sweep_top_down 
 1595 undefined index for group line count $group_line_count
 1596 EOM
 1597                 $jbeg = $jline;
 1598             }
 1599             my $base_line = $rall_lines->[$jbeg];
 1600 
 1601             # Initialize a global flag saying if the last line of the group
 1602             # should match end of group and also terminate the group.  There
 1603             # should be no returns between here and where the flag is handled
 1604             # at the bottom.
 1605             my $col_matching_terminal = 0;
 1606             if ( defined($j_terminal_match) ) {
 1607 
 1608                 # remember the column of the terminal ? or { to match with
 1609                 $col_matching_terminal =
 1610                   $base_line->get_column($j_terminal_match);
 1611 
 1612                 # Ignore an undefined value as a defensive step; shouldn't
 1613                 # normally happen.
 1614                 $col_matching_terminal = 0
 1615                   unless defined($col_matching_terminal);
 1616             }
 1617 
 1618             # -------------------------------------------------------------
 1619             # Allow hanging side comment to join current group, if any.  The
 1620             # only advantage is to keep the other tokens in the same group. For
 1621             # example, this would make the '=' align here:
 1622             #  $ax         = 1;           # side comment
 1623             #                             # hanging side comment
 1624             #  $boondoggle = 5;           # side comment
 1625             #  $beetle     = 5;           # side comment
 1626 
 1627             # here is another example..
 1628 
 1629             #  _rtoc_name_count   => {},                   # hash to track ..
 1630             #  _rpackage_stack    => [],                   # stack to check ..
 1631             #                                              # name changes
 1632             #  _rlast_level       => \$last_level,         # brace indentation
 1633             #
 1634             #
 1635             # If this were not desired, the next step could be skipped.
 1636             # -------------------------------------------------------------
 1637             if ( $new_line->get_is_hanging_side_comment() ) {
 1638                 join_hanging_comment( $new_line, $base_line );
 1639             }
 1640 
 1641             # If this line has no matching tokens, then flush out the lines
 1642             # BEFORE this line unless both it and the previous line have side
 1643             # comments.  This prevents this line from pushing side coments out
 1644             # to the right.
 1645             elsif ( $new_line->get_jmax() == 1 ) {
 1646 
 1647                 # There are no matching tokens, so now check side comments.
 1648                 # Programming note: accessing arrays with index -1 is
 1649                 # risky in Perl, but we have verified there is at least one
 1650                 # line in the group and that there is at least one field.
 1651                 my $prev_comment =
 1652                   $rall_lines->[ $jline - 1 ]->get_rfields()->[-1];
 1653                 my $side_comment = $new_line->get_rfields()->[-1];
 1654                 end_rgroup(-1) unless ( $side_comment && $prev_comment );
 1655             }
 1656 
 1657             # See if the new line matches and fits the current group,
 1658             # if it still exists. Flush the current group if not.
 1659             my $match_code;
 1660             if ($group_line_count) {
 1661                 ( $match_code, my $imax_align ) =
 1662                   $self->check_match( $new_line, $base_line,
 1663                     $rall_lines->[ $jline - 1 ] );
 1664                 if ( $match_code != 2 ) { end_rgroup($imax_align) }
 1665             }
 1666 
 1667             # Store the new line
 1668             add_to_rgroup($jline);
 1669 
 1670             if ( defined($j_terminal_match) ) {
 1671 
 1672                 # Decide if we should fix a terminal match. We can either:
 1673                 # 1. fix it and prevent the sweep_lr from changing it, or
 1674                 # 2. leave it alone and let sweep_lr try to fix it.
 1675 
 1676                 # The current logic is to fix it if:
 1677                 # -it has not joined to previous lines,
 1678                 # -and either the previous subgroup has just 1 line, or
 1679                 # -this line matched but did not fit (so sweep won't work)
 1680                 my $fixit;
 1681                 if ( $group_line_count == 1 ) {
 1682                     $fixit ||= $match_code;
 1683                     if ( !$fixit ) {
 1684                         if ( @{$rgroups} > 1 ) {
 1685                             my ( $jbegx, $jendx ) = @{ $rgroups->[-2] };
 1686                             my $nlines = $jendx - $jbegx + 1;
 1687                             $fixit ||= $nlines <= 1;
 1688                         }
 1689                     }
 1690                 }
 1691 
 1692                 if ($fixit) {
 1693                     $base_line = $new_line;
 1694                     my $col_now = $base_line->get_column($j_terminal_match);
 1695 
 1696                     # Ignore an undefined value as a defensive step; shouldn't
 1697                     # normally happen.
 1698                     $col_now = 0 unless defined($col_now);
 1699 
 1700                     my $pad = $col_matching_terminal - $col_now;
 1701                     my $padding_available =
 1702                       $base_line->get_available_space_on_right();
 1703                     if ( $col_now && $pad > 0 && $pad <= $padding_available ) {
 1704                         $base_line->increase_field_width( $j_terminal_match,
 1705                             $pad );
 1706                     }
 1707 
 1708                     # do not let sweep_left_to_right change an isolated 'else'
 1709                     if ( !$new_line->get_is_terminal_ternary() ) {
 1710                         block_penultimate_match();
 1711                     }
 1712                 }
 1713                 end_rgroup(-1);
 1714             }
 1715 
 1716             # end the group if we know we cannot match next line.
 1717             elsif ( $new_line->get_end_group() ) {
 1718                 end_rgroup(-1);
 1719             }
 1720         } ## end loop over lines
 1721 
 1722         end_rgroup(-1);
 1723         return ($rgroups);
 1724     }
 1725 }
 1726 
 1727 sub two_line_pad {
 1728 
 1729     my ( $line_m, $line, $imax_min ) = @_;
 1730 
 1731     # Given:
 1732     #  two isolated (list) lines
 1733     #  imax_min = number of common alignment tokens
 1734     # Return:
 1735     #  $pad_max = maximum suggested pad distnce
 1736     #           = 0 if alignment not recommended
 1737     # Note that this is only for two lines which do not have alignment tokens
 1738     # in common with any other lines.  It is intended for lists, but it might
 1739     # also be used for two non-list lines with a common leading '='.
 1740 
 1741     # Allow alignment if the difference in the two unpadded line lengths
 1742     # is not more than either line length.  The idea is to avoid
 1743     # aligning lines with very different field lengths, like these two:
 1744 
 1745     #   [
 1746     #       'VARCHAR', DBI::SQL_VARCHAR, undef, "'", "'", undef, 0, 1,
 1747     #       1, 0, 0, 0, undef, 0, 0
 1748     #   ];
 1749     my $rfield_lengths   = $line->get_rfield_lengths();
 1750     my $rfield_lengths_m = $line_m->get_rfield_lengths();
 1751 
 1752     # Safety check - shouldn't happen
 1753     return 0
 1754       unless $imax_min < @{$rfield_lengths} && $imax_min < @{$rfield_lengths_m};
 1755 
 1756     my $lensum_m = 0;
 1757     my $lensum   = 0;
 1758     for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
 1759         $lensum_m += $rfield_lengths_m->[$i];
 1760         $lensum   += $rfield_lengths->[$i];
 1761     }
 1762 
 1763     my ( $lenmin, $lenmax ) =
 1764       $lensum >= $lensum_m ? ( $lensum_m, $lensum ) : ( $lensum, $lensum_m );
 1765 
 1766     my $patterns_match;
 1767     if ( $line_m->get_list_type() && $line->get_list_type() ) {
 1768         $patterns_match = 1;
 1769         my $rpatterns_m = $line_m->get_rpatterns();
 1770         my $rpatterns   = $line->get_rpatterns();
 1771         for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
 1772             my $pat   = $rpatterns->[$i];
 1773             my $pat_m = $rpatterns_m->[$i];
 1774             if ( $pat ne $pat_m ) { $patterns_match = 0; last }
 1775         }
 1776     }
 1777 
 1778     my $pad_max = $lenmax;
 1779     if ( !$patterns_match && $lenmax > 2 * $lenmin ) { $pad_max = 0 }
 1780 
 1781     return $pad_max;
 1782 }
 1783 
 1784 sub sweep_left_to_right {
 1785 
 1786     my ( $rlines, $rgroups, $group_level ) = @_;
 1787 
 1788     # So far we have divided the lines into groups having an equal number of
 1789     # identical alignments.  Here we are going to look for common leading
 1790     # alignments between the different groups and align them when possible.
 1791     # For example, the three lines below are in three groups because each line
 1792     # has a different number of commas.  In this routine we will sweep from
 1793     # left to right, aligning the leading commas as we go, but stopping if we
 1794     # hit the line length limit.
 1795 
 1796     #  my ( $num, $numi, $numj,  $xyza, $ka,   $xyzb, $kb, $aff, $error );
 1797     #  my ( $i,   $j,    $error, $aff,  $asum, $avec );
 1798     #  my ( $km,  $area, $varea );
 1799 
 1800     # nothing to do if just one group
 1801     my $ng_max = @{$rgroups} - 1;
 1802     return unless ( $ng_max > 0 );
 1803 
 1804     ############################################################################
 1805     # Step 1: Loop over groups to find all common leading alignment tokens
 1806     ############################################################################
 1807 
 1808     my $line;
 1809     my $rtokens;
 1810     my $imax;     # index of maximum non-side-comment alignment token
 1811     my $istop;    # an optional stopping index
 1812     my $jbeg;     # starting line index
 1813     my $jend;     # ending line index
 1814 
 1815     my $line_m;
 1816     my $rtokens_m;
 1817     my $imax_m;
 1818     my $istop_m;
 1819     my $jbeg_m;
 1820     my $jend_m;
 1821 
 1822     my $istop_mm;
 1823 
 1824     # Look at neighboring pairs of groups and form a simple list
 1825     # of all common leading alignment tokens. Foreach such match we
 1826     # store [$i, $ng], where
 1827     #  $i = index of the token in the line (0,1,...)
 1828     #  $ng is the second of the two groups with this common token
 1829     my @icommon;
 1830 
 1831     # Hash to hold the maximum alignment change for any group
 1832     my %max_move;
 1833 
 1834     # a small number of columns
 1835     my $short_pad = 4;
 1836 
 1837     my $ng = -1;
 1838     foreach my $item ( @{$rgroups} ) {
 1839         $ng++;
 1840 
 1841         $istop_mm = $istop_m;
 1842 
 1843         # save _m values of previous group
 1844         $line_m    = $line;
 1845         $rtokens_m = $rtokens;
 1846         $imax_m    = $imax;
 1847         $istop_m   = $istop;
 1848         $jbeg_m    = $jbeg;
 1849         $jend_m    = $jend;
 1850 
 1851         # Get values for this group. Note that we just have to use values for
 1852         # one of the lines of the group since all members have the same
 1853         # alignments.
 1854         ( $jbeg, $jend, $istop ) = @{$item};
 1855 
 1856         $line    = $rlines->[$jbeg];
 1857         $rtokens = $line->get_rtokens();
 1858         $imax    = $line->get_jmax() - 2;
 1859         $istop   = -1 unless ( defined($istop) );
 1860         $istop   = $imax if ( $istop > $imax );
 1861 
 1862         # Initialize on first group
 1863         next if ( $ng == 0 );
 1864 
 1865         # Use the minimum index limit of the two groups
 1866         my $imax_min = $imax > $imax_m ? $imax_m : $imax;
 1867 
 1868         # Also impose a limit if given.
 1869         if ( $istop_m < $imax_min ) {
 1870             $imax_min = $istop_m;
 1871         }
 1872 
 1873         # Special treatment of two one-line groups isolated from other lines,
 1874         # unless they form a simple list or a terminal match.  Otherwise the
 1875         # alignment can look strange in some cases.
 1876         my $list_type = $rlines->[$jbeg]->get_list_type();
 1877         if (
 1878                $jend == $jbeg
 1879             && $jend_m == $jbeg_m
 1880             && ( $ng == 1 || $istop_mm < 0 )
 1881             && ( $ng == $ng_max || $istop < 0 )
 1882             && !$line->get_j_terminal_match()
 1883 
 1884             # Only do this for imperfect matches. This is normally true except
 1885             # when two perfect matches cannot form a group because the line
 1886             # length limit would be exceeded. In that case we can still try
 1887             # to match as many alignments as possible.
 1888             && ( $imax != $imax_m || $istop_m != $imax_m )
 1889           )
 1890         {
 1891 
 1892             # We will just align assignments and simple lists
 1893             next unless ( $imax_min >= 0 );
 1894             next
 1895               unless ( $rtokens->[0] =~ /^=\d/
 1896                 || $list_type );
 1897 
 1898             # In this case we will limit padding to a short distance.  This
 1899             # is a compromise to keep some vertical alignment but prevent large
 1900             # gaps, which do not look good for just two lines.
 1901             my $pad_max =
 1902               two_line_pad( $rlines->[$jbeg], $rlines->[$jbeg_m], $imax_min );
 1903             next unless ($pad_max);
 1904             my $ng_m = $ng - 1;
 1905             $max_move{"$ng_m"} = $pad_max;
 1906             $max_move{"$ng"}   = $pad_max;
 1907         }
 1908 
 1909         # Loop to find all common leading tokens.
 1910         if ( $imax_min >= 0 ) {
 1911             foreach my $i ( 0 .. $imax_min ) {
 1912                 my $tok   = $rtokens->[$i];
 1913                 my $tok_m = $rtokens_m->[$i];
 1914                 last if ( $tok ne $tok_m );
 1915                 push @icommon, [ $i, $ng, $tok ];
 1916             }
 1917         }
 1918     }
 1919     return unless @icommon;
 1920 
 1921     ###########################################################
 1922     # Step 2: Reorder and consolidate the list into a task list
 1923     ###########################################################
 1924 
 1925     # We have to work first from lowest token index to highest, then by group,
 1926     # sort our list first on token index then group number
 1927     @icommon = sort { $a->[0] <=> $b->[0] || $a->[1] <=> $b->[1] } @icommon;
 1928 
 1929     # Make a task list of the form
 1930     #   [$i, ng_beg, $ng_end, $tok], ..
 1931     # where
 1932     #   $i is the index of the token to be aligned
 1933     #   $ng_beg..$ng_end is the group range for this action
 1934     my @todo;
 1935     my ( $i, $ng_end, $tok );
 1936     foreach my $item (@icommon) {
 1937         my $ng_last = $ng_end;
 1938         my $i_last  = $i;
 1939         ( $i, $ng_end, $tok ) = @{$item};
 1940         my $ng_beg = $ng_end - 1;
 1941         if ( defined($ng_last) && $ng_beg == $ng_last && $i == $i_last ) {
 1942             my $var = pop(@todo);
 1943             $ng_beg = $var->[1];
 1944         }
 1945         my ( $raw_tok, $lev, $tag, $tok_count ) = decode_alignment_token($tok);
 1946         push @todo, [ $i, $ng_beg, $ng_end, $raw_tok, $lev ];
 1947     }
 1948 
 1949     ###############################
 1950     # Step 3: Execute the task list
 1951     ###############################
 1952     do_left_to_right_sweep( $rlines, $rgroups, \@todo, \%max_move, $short_pad,
 1953         $group_level );
 1954     return;
 1955 }
 1956 
 1957 {    ## closure for sub do_left_to_right_sweep
 1958 
 1959     my %is_good_alignment_token;
 1960 
 1961     BEGIN {
 1962 
 1963         # One of the most difficult aspects of vertical alignment is knowing
 1964         # when not to align.  Alignment can go from looking very nice to very
 1965         # bad when overdone.  In the sweep algorithm there are two special
 1966         # cases where we may need to limit padding to a '$short_pad' distance
 1967         # to avoid some very ugly formatting:
 1968 
 1969         # 1. Two isolated lines with partial alignment
 1970         # 2. A 'tail-wag-dog' situation, in which a single terminal
 1971         #    line with partial alignment could cause a significant pad
 1972         #    increase in many previous lines if allowed to join the alignment.
 1973 
 1974         # For most alignment tokens, we will allow only a small pad to be
 1975         # introduced (the hardwired $short_pad variable) . But for some 'good'
 1976         # alignments we can be less restrictive.
 1977 
 1978         # These are 'good' alignments, which are allowed more padding:
 1979         my @q = qw(
 1980           => = ? if unless or || {
 1981         );
 1982         push @q, ',';
 1983         @is_good_alignment_token{@q} = (0) x scalar(@q);
 1984 
 1985         # Promote a few of these to 'best', with essentially no pad limit:
 1986         $is_good_alignment_token{'='}      = 1;
 1987         $is_good_alignment_token{'if'}     = 1;
 1988         $is_good_alignment_token{'unless'} = 1;
 1989         $is_good_alignment_token{'=>'}     = 1
 1990 
 1991           # Note the hash values are set so that:
 1992           #         if ($is_good_alignment_token{$raw_tok}) => best
 1993           # if defined ($is_good_alignment_token{$raw_tok}) => good or best
 1994 
 1995     }
 1996 
 1997     sub do_left_to_right_sweep {
 1998         my ( $rlines, $rgroups, $rtodo, $rmax_move, $short_pad, $group_level )
 1999           = @_;
 2000 
 2001         # $blocking_level[$nj is the level at a match failure between groups
 2002         # $ng-1 and $ng
 2003         my @blocking_level;
 2004         my $group_list_type = $rlines->[0]->get_list_type();
 2005 
 2006         my $move_to_common_column = sub {
 2007 
 2008             # Move the alignment column of token $itok to $col_want for a
 2009             # sequence of groups.
 2010             my ( $ngb, $nge, $itok, $col_want, $raw_tok ) = @_;
 2011             return unless ( defined($ngb) && $nge > $ngb );
 2012             foreach my $ng ( $ngb .. $nge ) {
 2013 
 2014                 my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
 2015                 my $line  = $rlines->[$jbeg];
 2016                 my $col   = $line->get_column($itok);
 2017                 my $avail = $line->get_available_space_on_right();
 2018                 my $move  = $col_want - $col;
 2019                 if ( $move > 0 ) {
 2020 
 2021                     # limit padding increase in isolated two lines
 2022                     next
 2023                       if ( defined( $rmax_move->{$ng} )
 2024                         && $move > $rmax_move->{$ng}
 2025                         && !$is_good_alignment_token{$raw_tok} );
 2026 
 2027                     $line->increase_field_width( $itok, $move );
 2028                 }
 2029                 elsif ( $move < 0 ) {
 2030 
 2031                     # spot to take special action on failure to move
 2032                 }
 2033             }
 2034         };
 2035 
 2036         foreach my $task ( @{$rtodo} ) {
 2037             my ( $itok, $ng_beg, $ng_end, $raw_tok, $lev ) = @{$task};
 2038 
 2039             # Nothing to do for a single group
 2040             next unless ( $ng_end > $ng_beg );
 2041 
 2042             my $ng_first;  # index of the first group of a continuous sequence
 2043             my $col_want;  # the common alignment column of a sequence of groups
 2044             my $col_limit; # maximum column before bumping into max line length
 2045             my $line_count_ng_m = 0;
 2046             my $jmax_m;
 2047             my $it_stop_m;
 2048 
 2049             # Loop over the groups
 2050             # 'ix_' = index in the array of lines
 2051             # 'ng_' = index in the array of groups
 2052             # 'it_' = index in the array of tokens
 2053             my $ix_min      = $rgroups->[$ng_beg]->[0];
 2054             my $ix_max      = $rgroups->[$ng_end]->[1];
 2055             my $lines_total = $ix_max - $ix_min + 1;
 2056             foreach my $ng ( $ng_beg .. $ng_end ) {
 2057                 my ( $ix_beg, $ix_end, $it_stop ) = @{ $rgroups->[$ng] };
 2058                 my $line_count_ng = $ix_end - $ix_beg + 1;
 2059 
 2060                 # Important: note that since all lines in a group have a common
 2061                 # alignments object, we just have to work on one of the lines
 2062                 # (the first line).  All of the rest will be changed
 2063                 # automatically.
 2064                 my $line = $rlines->[$ix_beg];
 2065                 my $jmax = $line->get_jmax();
 2066 
 2067                 # the maximum space without exceeding the line length:
 2068                 my $avail   = $line->get_available_space_on_right();
 2069                 my $col     = $line->get_column($itok);
 2070                 my $col_max = $col + $avail;
 2071 
 2072                 # Initialize on first group
 2073                 if ( !defined($col_want) ) {
 2074                     $ng_first        = $ng;
 2075                     $col_want        = $col;
 2076                     $col_limit       = $col_max;
 2077                     $line_count_ng_m = $line_count_ng;
 2078                     $jmax_m          = $jmax;
 2079                     $it_stop_m       = $it_stop;
 2080                     next;
 2081                 }
 2082 
 2083                 # RULE: Throw a blocking flag upon encountering a token level
 2084                 # different from the level of the first blocking token.  For
 2085                 # example, in the following example, if the = matches get
 2086                 # blocked between two groups as shown, then we want to start
 2087                 # blocking matches at the commas, which are at deeper level, so
 2088                 # that we do not get the big gaps shown here:
 2089 
 2090                 #  my $unknown3 = pack( "v",          -2 );
 2091                 #  my $unknown4 = pack( "v",          0x09 );
 2092                 #  my $unknown5 = pack( "VVV",        0x06, 0x00, 0x00 );
 2093                 #  my $num_bbd_blocks  = pack( "V",   $num_lists );
 2094                 #  my $root_startblock = pack( "V",   $root_start );
 2095                 #  my $unknown6        = pack( "VV",  0x00, 0x1000 );
 2096 
 2097                 # On the other hand, it is okay to keep matching at the same
 2098                 # level such as in a simple list of commas and/or fat arrors.
 2099 
 2100                 my $is_blocked = defined( $blocking_level[$ng] )
 2101                   && $lev > $blocking_level[$ng];
 2102 
 2103                 # TAIL-WAG-DOG RULE: prevent a 'tail-wag-dog' syndrom, meaning:
 2104                 # Do not let one or two lines with a **different number of
 2105                 # alignments** open up a big gap in a large block.  For
 2106                 # example, we will prevent something like this, where the first
 2107                 # line prys open the rest:
 2108 
 2109             #  $worksheet->write( "B7", "http://www.perl.com", undef, $format );
 2110             #  $worksheet->write( "C7", "",                    $format );
 2111             #  $worksheet->write( "D7", "",                    $format );
 2112             #  $worksheet->write( "D8", "",                    $format );
 2113             #  $worksheet->write( "D8", "",                    $format );
 2114 
 2115                 # We should exclude from consideration two groups which are
 2116                 # effectively the same but separated because one does not
 2117                 # fit in the maximum allowed line length.
 2118                 my $is_same_group =
 2119                   $jmax == $jmax_m && $it_stop_m == $jmax_m - 2;
 2120 
 2121                 my $lines_above = $ix_beg - $ix_min;
 2122                 my $lines_below = $lines_total - $lines_above;
 2123 
 2124                 # Increase the tolerable gap for certain favorable factors
 2125                 my $factor    = 1;
 2126                 my $top_level = $lev == $group_level;
 2127 
 2128                 # Align best top level alignment tokens like '=', 'if', ...
 2129                 # A factor of 10 allows a gap of up to 40 spaces
 2130                 if ( $top_level && $is_good_alignment_token{$raw_tok} ) {
 2131                     $factor = 10;
 2132                 }
 2133 
 2134                 # Otherwise allow some minimal padding of good alignments
 2135                 elsif (
 2136 
 2137                     defined( $is_good_alignment_token{$raw_tok} )
 2138 
 2139                     # We have to be careful if there are just 2 lines.  This
 2140                     # two-line factor allows large gaps only for 2 lines which
 2141                     # are simple lists with fewer items on the second line. It
 2142                     # gives results similar to previous versions of perltidy.
 2143                     && (   $lines_total > 2
 2144                         || $group_list_type && $jmax < $jmax_m && $top_level )
 2145                   )
 2146                 {
 2147                     $factor += 1;
 2148                     if ($top_level) {
 2149                         $factor += 1;
 2150                     }
 2151                 }
 2152 
 2153                 my $is_big_gap;
 2154                 if ( !$is_same_group ) {
 2155                     $is_big_gap ||=
 2156                       (      $lines_above == 1
 2157                           || $lines_above == 2 && $lines_below >= 4 )
 2158                       && $col_want > $col + $short_pad * $factor;
 2159                     $is_big_gap ||=
 2160                       (      $lines_below == 1
 2161                           || $lines_below == 2 && $lines_above >= 4 )
 2162                       && $col > $col_want + $short_pad * $factor;
 2163                 }
 2164 
 2165                 # if match is limited by gap size, stop aligning at this level
 2166                 if ($is_big_gap) {
 2167                     $blocking_level[$ng] = $lev - 1;
 2168                 }
 2169 
 2170                 # quit and restart if it cannot join this batch
 2171                 if (   $col_want > $col_max
 2172                     || $col > $col_limit
 2173                     || $is_big_gap
 2174                     || $is_blocked )
 2175                 {
 2176 
 2177                     # remember the level of the first blocking token
 2178                     if ( !defined( $blocking_level[$ng] ) ) {
 2179                         $blocking_level[$ng] = $lev;
 2180                     }
 2181 
 2182                     $move_to_common_column->(
 2183                         $ng_first, $ng - 1, $itok, $col_want, $raw_tok
 2184                     );
 2185                     $ng_first        = $ng;
 2186                     $col_want        = $col;
 2187                     $col_limit       = $col_max;
 2188                     $line_count_ng_m = $line_count_ng;
 2189                     $jmax_m          = $jmax;
 2190                     $it_stop_m       = $it_stop;
 2191                     next;
 2192                 }
 2193 
 2194                 $line_count_ng_m += $line_count_ng;
 2195 
 2196                 # update the common column and limit
 2197                 if ( $col > $col_want )      { $col_want  = $col }
 2198                 if ( $col_max < $col_limit ) { $col_limit = $col_max }
 2199 
 2200             } ## end loop over groups
 2201 
 2202             if ( $ng_end > $ng_first ) {
 2203                 $move_to_common_column->(
 2204                     $ng_first, $ng_end, $itok, $col_want, $raw_tok
 2205                 );
 2206             } ## end loop over groups for one task
 2207         } ## end loop over tasks
 2208 
 2209         return;
 2210     }
 2211 }
 2212 
 2213 sub delete_selected_tokens {
 2214 
 2215     my ( $line_obj, $ridel ) = @_;
 2216 
 2217     # $line_obj    is the line to be modified
 2218     # $ridel       is a ref to list of indexes to be deleted
 2219 
 2220     # remove an unused alignment token(s) to improve alignment chances
 2221 
 2222     return unless ( defined($line_obj) && defined($ridel) && @{$ridel} );
 2223 
 2224     my $jmax_old           = $line_obj->get_jmax();
 2225     my $rfields_old        = $line_obj->get_rfields();
 2226     my $rfield_lengths_old = $line_obj->get_rfield_lengths();
 2227     my $rpatterns_old      = $line_obj->get_rpatterns();
 2228     my $rtokens_old        = $line_obj->get_rtokens();
 2229     my $j_terminal_match   = $line_obj->get_j_terminal_match();
 2230 
 2231     use constant EXPLAIN_DELETE_SELECTED => 0;
 2232 
 2233     local $" = '> <';
 2234     EXPLAIN_DELETE_SELECTED && print <<EOM;
 2235 delete indexes: <@{$ridel}>
 2236 old jmax: $jmax_old
 2237 old tokens: <@{$rtokens_old}>
 2238 old patterns: <@{$rpatterns_old}>
 2239 old fields: <@{$rfields_old}>
 2240 old field_lengths: <@{$rfield_lengths_old}>
 2241 EOM
 2242 
 2243     my $rfields_new        = [];
 2244     my $rpatterns_new      = [];
 2245     my $rtokens_new        = [];
 2246     my $rfield_lengths_new = [];
 2247 
 2248     # Convert deletion list to a hash to allow any order, multiple entries,
 2249     # and avoid problems with index values out of range
 2250     my %delete_me;
 2251     @delete_me{ @{$ridel} } = (1) x scalar( @{$ridel} );
 2252 
 2253     my $pattern      = $rpatterns_old->[0];
 2254     my $field        = $rfields_old->[0];
 2255     my $field_length = $rfield_lengths_old->[0];
 2256     push @{$rfields_new},        $field;
 2257     push @{$rfield_lengths_new}, $field_length;
 2258     push @{$rpatterns_new},      $pattern;
 2259 
 2260     # Loop to either copy items or concatenate fields and patterns
 2261     my $jmin_del;
 2262     for ( my $j = 0 ; $j < $jmax_old ; $j++ ) {
 2263         my $token        = $rtokens_old->[$j];
 2264         my $field        = $rfields_old->[ $j + 1 ];
 2265         my $field_length = $rfield_lengths_old->[ $j + 1 ];
 2266         my $pattern      = $rpatterns_old->[ $j + 1 ];
 2267         if ( !$delete_me{$j} ) {
 2268             push @{$rtokens_new},        $token;
 2269             push @{$rfields_new},        $field;
 2270             push @{$rpatterns_new},      $pattern;
 2271             push @{$rfield_lengths_new}, $field_length;
 2272         }
 2273         else {
 2274             if ( !defined($jmin_del) ) { $jmin_del = $j }
 2275             $rfields_new->[-1] .= $field;
 2276             $rfield_lengths_new->[-1] += $field_length;
 2277             $rpatterns_new->[-1] .= $pattern;
 2278         }
 2279     }
 2280 
 2281     # ----- x ------ x ------ x ------
 2282     #t      0        1        2        <- token indexing
 2283     #f   0      1        2        3    <- field and pattern
 2284 
 2285     my $jmax_new = @{$rfields_new} - 1;
 2286     $line_obj->set_rtokens($rtokens_new);
 2287     $line_obj->set_rpatterns($rpatterns_new);
 2288     $line_obj->set_rfields($rfields_new);
 2289     $line_obj->set_rfield_lengths($rfield_lengths_new);
 2290     $line_obj->set_jmax($jmax_new);
 2291 
 2292     # The value of j_terminal_match will be incorrect if we delete tokens prior
 2293     # to it. We will have to give up on aligning the terminal tokens if this
 2294     # happens.
 2295     if ( defined($j_terminal_match) && $jmin_del <= $j_terminal_match ) {
 2296         $line_obj->set_j_terminal_match(undef);
 2297     }
 2298 
 2299     # update list type -
 2300     if ( $line_obj->get_list_seqno() ) {
 2301 
 2302         ## This works, but for efficiency see if we need to make a change:
 2303         ## decide_if_list($line_obj);
 2304 
 2305         # An existing list will still be a list but with possibly different
 2306         # leading token
 2307         my $old_list_type = $line_obj->get_list_type();
 2308         my $new_list_type = "";
 2309         if ( $rtokens_new->[0] =~ /^(=>|,)/ ) {
 2310             $new_list_type = $rtokens_new->[0];
 2311         }
 2312         if ( !$old_list_type || $old_list_type ne $new_list_type ) {
 2313             decide_if_list($line_obj);
 2314         }
 2315     }
 2316 
 2317     EXPLAIN_DELETE_SELECTED && print <<EOM;
 2318 
 2319 new jmax: $jmax_new
 2320 new tokens: <@{$rtokens_new}>
 2321 new patterns: <@{$rpatterns_new}>
 2322 new fields: <@{$rfields_new}>
 2323 EOM
 2324     return;
 2325 }
 2326 
 2327 {    ## closure for sub decode_alignment_token
 2328 
 2329     # This routine is called repeatedly for each token, so it needs to be
 2330     # efficient.  We can speed things up by remembering the inputs and outputs
 2331     # in a hash.
 2332     my %decoded_token;
 2333 
 2334     sub initialize_decode {
 2335 
 2336         # We will re-initialize the hash for each file. Otherwise, there is
 2337         # a danger that the hash can become arbitrarily large if a very large
 2338         # number of files is processed at once.
 2339         %decoded_token = ();
 2340         return;
 2341     }
 2342 
 2343     sub decode_alignment_token {
 2344 
 2345         # Unpack the values packed in an alignment token
 2346         #
 2347         # Usage:
 2348         #        my ( $raw_tok, $lev, $tag, $tok_count ) =
 2349         #          decode_alignment_token($token);
 2350 
 2351         # Alignment tokens have a trailing decimal level and optional tag (for
 2352         # commas):
 2353         # For example, the first comma in the following line
 2354         #     sub banner  { crlf; report( shift, '/', shift ); crlf }
 2355         # is decorated as follows:
 2356         #    ,2+report-6  => (tok,lev,tag) =qw( ,   2   +report-6)
 2357 
 2358         # An optional token count may be appended with a leading dot.
 2359         # Currently this is only done for '=' tokens but this could change.
 2360         # For example, consider the following line:
 2361         #   $nport   = $port = shift || $name;
 2362         # The first '=' may either be '=0' or '=0.1' [level 0, first equals]
 2363         # The second '=' will be '=0.2' [level 0, second equals]
 2364         my ($tok) = @_;
 2365 
 2366         if ( defined( $decoded_token{$tok} ) ) {
 2367             return @{ $decoded_token{$tok} };
 2368         }
 2369 
 2370         my ( $raw_tok, $lev, $tag, $tok_count ) = ( $tok, 0, "", 1 );
 2371         if ( $tok =~ /^(\D+)(\d+)([^\.]*)(\.(\d+))?$/ ) {
 2372             $raw_tok   = $1;
 2373             $lev       = $2;
 2374             $tag       = $3 if ($3);
 2375             $tok_count = $5 if ($5);
 2376         }
 2377         my @vals = ( $raw_tok, $lev, $tag, $tok_count );
 2378         $decoded_token{$tok} = \@vals;
 2379         return @vals;
 2380     }
 2381 }
 2382 
 2383 {    ## closure for sub delete_unmatched_tokens
 2384 
 2385     my %is_assignment;
 2386     my %keep_after_deleted_assignment;
 2387 
 2388     BEGIN {
 2389         my @q;
 2390 
 2391         @q = qw(
 2392           = **= += *= &= <<= &&=
 2393           -= /= |= >>= ||= //=
 2394           .= %= ^=
 2395           x=
 2396         );
 2397         @is_assignment{@q} = (1) x scalar(@q);
 2398 
 2399         # These tokens may be kept following an = deletion
 2400         @q = qw(
 2401           if unless or ||
 2402         );
 2403         @keep_after_deleted_assignment{@q} = (1) x scalar(@q);
 2404 
 2405     }
 2406 
 2407     # This flag is for testing only and should normally be zero.
 2408     use constant TEST_DELETE_NULL => 0;
 2409 
 2410     sub delete_unmatched_tokens {
 2411         my ( $rlines, $group_level ) = @_;
 2412 
 2413         # This is a preliminary step in vertical alignment in which we remove
 2414         # as many obviously un-needed alignment tokens as possible.  This will
 2415         # prevent them from interfering with the final alignment.
 2416 
 2417         # These are the return values
 2418         my $max_lev_diff     = 0;    # used to avoid a call to prune_tree
 2419         my $saw_side_comment = 0;    # used to avoid a call for side comments
 2420 
 2421         # Handle no lines -- shouldn't happen
 2422         return unless @{$rlines};
 2423 
 2424         # Handle a single line
 2425         if ( @{$rlines} == 1 ) {
 2426             my $line   = $rlines->[0];
 2427             my $jmax   = $line->get_jmax();
 2428             my $length = $line->get_rfield_lengths()->[$jmax];
 2429             $saw_side_comment = $length > 0;
 2430             return ( $max_lev_diff, $saw_side_comment );
 2431         }
 2432 
 2433         my $has_terminal_match = $rlines->[-1]->get_j_terminal_match();
 2434 
 2435         # ignore hanging side comments in these operations
 2436         my @filtered   = grep { !$_->get_is_hanging_side_comment() } @{$rlines};
 2437         my $rnew_lines = \@filtered;
 2438 
 2439         $saw_side_comment = @filtered != @{$rlines};
 2440         $max_lev_diff     = 0;
 2441 
 2442         # nothing to do if all lines were hanging side comments
 2443         my $jmax = @{$rnew_lines} - 1;
 2444         return ( $max_lev_diff, $saw_side_comment ) unless ( $jmax >= 0 );
 2445 
 2446         my @equals_info;
 2447         my @line_info;
 2448         my %is_good_tok;
 2449 
 2450         # create a hash of tokens for each line
 2451         my $rline_hashes = [];
 2452         foreach my $line ( @{$rnew_lines} ) {
 2453             my $rhash     = {};
 2454             my $rtokens   = $line->get_rtokens();
 2455             my $rpatterns = $line->get_rpatterns();
 2456             my $i         = 0;
 2457             my ( $i_eq, $tok_eq, $pat_eq );
 2458             my ( $lev_min, $lev_max );
 2459             foreach my $tok ( @{$rtokens} ) {
 2460                 my ( $raw_tok, $lev, $tag, $tok_count ) =
 2461                   decode_alignment_token($tok);
 2462 
 2463                 if ( $tok ne '#' ) {
 2464                     if ( !defined($lev_min) ) {
 2465                         $lev_min = $lev;
 2466                         $lev_max = $lev;
 2467                     }
 2468                     else {
 2469                         if ( $lev < $lev_min ) { $lev_min = $lev }
 2470                         if ( $lev > $lev_max ) { $lev_max = $lev }
 2471                     }
 2472                 }
 2473                 else {
 2474                     if ( !$saw_side_comment ) {
 2475                         my $length = $line->get_rfield_lengths()->[ $i + 1 ];
 2476                         $saw_side_comment ||= $length;
 2477                     }
 2478                 }
 2479 
 2480                 # Possible future upgrade: for multiple matches,
 2481                 # record [$i1, $i2, ..] instead of $i
 2482                 $rhash->{$tok} =
 2483                   [ $i, undef, undef, $raw_tok, $lev, $tag, $tok_count ];
 2484 
 2485                 # remember the first equals at line level
 2486                 if ( !defined($i_eq) && $raw_tok eq '=' ) {
 2487 
 2488                     if ( $lev eq $group_level ) {
 2489                         $i_eq   = $i;
 2490                         $tok_eq = $tok;
 2491                         $pat_eq = $rpatterns->[$i];
 2492                     }
 2493                 }
 2494                 $i++;
 2495             }
 2496             push @{$rline_hashes}, $rhash;
 2497             push @equals_info, [ $i_eq, $tok_eq, $pat_eq ];
 2498             push @line_info, [ $lev_min, $lev_max ];
 2499             if ( defined($lev_min) ) {
 2500                 my $lev_diff = $lev_max - $lev_min;
 2501                 if ( $lev_diff > $max_lev_diff ) { $max_lev_diff = $lev_diff }
 2502             }
 2503         }
 2504 
 2505         # compare each line pair and record matches
 2506         my $rtok_hash = {};
 2507         my $nr        = 0;
 2508         for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) {
 2509             my $nl = $nr;
 2510             $nr = 0;
 2511             my $jr      = $jl + 1;
 2512             my $rhash_l = $rline_hashes->[$jl];
 2513             my $rhash_r = $rline_hashes->[$jr];
 2514             my $count   = 0;                      # UNUSED NOW?
 2515             my $ntoks   = 0;
 2516             foreach my $tok ( keys %{$rhash_l} ) {
 2517                 $ntoks++;
 2518                 if ( defined( $rhash_r->{$tok} ) ) {
 2519                     if ( $tok ne '#' ) { $count++; }
 2520                     my $il = $rhash_l->{$tok}->[0];
 2521                     my $ir = $rhash_r->{$tok}->[0];
 2522                     $rhash_l->{$tok}->[2] = $ir;
 2523                     $rhash_r->{$tok}->[1] = $il;
 2524                     if ( $tok ne '#' ) {
 2525                         push @{ $rtok_hash->{$tok} }, ( $jl, $jr );
 2526                         $nr++;
 2527                     }
 2528                 }
 2529             }
 2530 
 2531             # Set a line break if no matching tokens between these lines
 2532             # (this is not strictly necessary now but does not hurt)
 2533             if ( $nr == 0 && $nl > 0 ) {
 2534                 $rnew_lines->[$jl]->set_end_group(1);
 2535             }
 2536 
 2537             # Also set a line break if both lines have simple equals but with
 2538             # different leading characters in patterns.  This check is similar
 2539             # to one in sub check_match, and will prevent sub
 2540             # prune_alignment_tree from removing alignments which otherwise
 2541             # should be kept. This fix is rarely needed, but it can
 2542             # occasionally improve formatting.
 2543             # For example:
 2544             #     my $name = $this->{Name};
 2545             #     $type = $this->ctype($genlooptype) if defined $genlooptype;
 2546             #     my $declini = ( $asgnonly ? ""          : "\t$type *" );
 2547             #     my $cast    = ( $type     ? "($type *)" : "" );
 2548             # The last two lines start with 'my' and will not match the
 2549             # previous line starting with $type, so we do not want
 2550             # prune_alignment tree to delete their ? : alignments at a deeper
 2551             # level.
 2552             my ( $i_eq_l, $tok_eq_l, $pat_eq_l ) = @{ $equals_info[$jl] };
 2553             my ( $i_eq_r, $tok_eq_r, $pat_eq_r ) = @{ $equals_info[$jr] };
 2554             if ( defined($i_eq_l) && defined($i_eq_r) ) {
 2555 
 2556                 # Also, do not align equals across a change in ci level
 2557                 my $ci_jump = $rnew_lines->[$jl]->get_ci_level() !=
 2558                   $rnew_lines->[$jr]->get_ci_level();
 2559 
 2560                 if (
 2561                        $tok_eq_l eq $tok_eq_r
 2562                     && $i_eq_l == 0
 2563                     && $i_eq_r == 0
 2564                     && ( substr( $pat_eq_l, 0, 1 ) ne substr( $pat_eq_r, 0, 1 )
 2565                         || $ci_jump )
 2566                   )
 2567                 {
 2568                     $rnew_lines->[$jl]->set_end_group(1);
 2569                 }
 2570             }
 2571         }
 2572 
 2573         # find subgroups
 2574         my @subgroups;
 2575         push @subgroups, [ 0, $jmax ];
 2576         for ( my $jl = 0 ; $jl < $jmax ; $jl++ ) {
 2577             if ( $rnew_lines->[$jl]->get_end_group() ) {
 2578                 $subgroups[-1]->[1] = $jl;
 2579                 push @subgroups, [ $jl + 1, $jmax ];
 2580             }
 2581         }
 2582 
 2583         # flag to allow skipping pass 2
 2584         my $saw_large_group;
 2585 
 2586         ############################################################
 2587         # PASS 1 over subgroups to remove unmatched alignment tokens
 2588         ############################################################
 2589         foreach my $item (@subgroups) {
 2590             my ( $jbeg, $jend ) = @{$item};
 2591 
 2592             my $nlines = $jend - $jbeg + 1;
 2593 
 2594             ####################################################
 2595             # Look for complete if/elsif/else and ternary blocks
 2596             ####################################################
 2597 
 2598             # We are looking for a common '$dividing_token' like these:
 2599 
 2600             #    if    ( $b and $s ) { $p->{'type'} = 'a'; }
 2601             #    elsif ($b)          { $p->{'type'} = 'b'; }
 2602             #    elsif ($s)          { $p->{'type'} = 's'; }
 2603             #    else                { $p->{'type'} = ''; }
 2604             #                        ^----------- dividing_token
 2605 
 2606             #   my $severity =
 2607             #      !$routine                     ? '[PFX]'
 2608             #     : $routine =~ /warn.*_d\z/     ? '[DS]'
 2609             #     : $routine =~ /ck_warn/        ? 'W'
 2610             #     : $routine =~ /ckWARN\d*reg_d/ ? 'S'
 2611             #     : $routine =~ /ckWARN\d*reg/   ? 'W'
 2612             #     : $routine =~ /vWARN\d/        ? '[WDS]'
 2613             #     :                                '[PFX]';
 2614             #                                    ^----------- dividing_token
 2615 
 2616             # Only look for groups which are more than 2 lines long.  Two lines
 2617             # can get messed up doing this, probably due to the various
 2618             # two-line rules.
 2619 
 2620             my $dividing_token;
 2621             my %token_line_count;
 2622             if ( $nlines > 2 ) {
 2623 
 2624                 for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
 2625                     my %seen;
 2626                     my $line    = $rnew_lines->[$jj];
 2627                     my $rtokens = $line->get_rtokens();
 2628                     foreach my $tok ( @{$rtokens} ) {
 2629                         if ( !$seen{$tok} ) {
 2630                             $seen{$tok}++;
 2631                             $token_line_count{$tok}++;
 2632                         }
 2633                     }
 2634                 }
 2635 
 2636                 foreach my $tok ( keys %token_line_count ) {
 2637                     if ( $token_line_count{$tok} == $nlines ) {
 2638                         if (   substr( $tok, 0, 1 ) eq '?'
 2639                             || substr( $tok, 0, 1 ) eq '{'
 2640                             && $tok =~ /^\{\d+if/ )
 2641                         {
 2642                             $dividing_token = $tok;
 2643                             last;
 2644                         }
 2645                     }
 2646                 }
 2647             }
 2648 
 2649             #####################################################
 2650             # Loop over lines to remove unwanted alignment tokens
 2651             #####################################################
 2652             for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
 2653                 my $line    = $rnew_lines->[$jj];
 2654                 my $rtokens = $line->get_rtokens();
 2655                 my $rhash   = $rline_hashes->[$jj];
 2656                 my $i_eq    = $equals_info[$jj]->[0];
 2657                 my @idel;
 2658                 my $imax = @{$rtokens} - 2;
 2659                 my $delete_above_level;
 2660                 my $deleted_assignment_token;
 2661 
 2662                 my $saw_dividing_token = "";
 2663                 $saw_large_group ||= $nlines > 2 && $imax > 1;
 2664 
 2665                 # Loop over all alignment tokens
 2666                 for ( my $i = 0 ; $i <= $imax ; $i++ ) {
 2667                     my $tok = $rtokens->[$i];
 2668                     next if ( $tok eq '#' );    # shouldn't happen
 2669                     my ( $iii, $il, $ir, $raw_tok, $lev, $tag, $tok_count ) =
 2670                       @{ $rhash->{$tok} };
 2671 
 2672                     #######################################################
 2673                     # Here is the basic RULE: remove an unmatched alignment
 2674                     # which does not occur in the surrounding lines.
 2675                     #######################################################
 2676                     my $delete_me = !defined($il) && !defined($ir);
 2677 
 2678                     # But now we modify this with exceptions...
 2679 
 2680                     # EXCEPTION 1: If we are in a complete ternary or
 2681                     # if/elsif/else group, and this token is not on every line
 2682                     # of the group, should we delete it to preserve overall
 2683                     # alignment?
 2684                     if ($dividing_token) {
 2685                         if ( $token_line_count{$tok} >= $nlines ) {
 2686                             $saw_dividing_token ||= $tok eq $dividing_token;
 2687                         }
 2688                         else {
 2689 
 2690                             # For shorter runs, delete toks to save alignment.
 2691                             # For longer runs, keep toks after the '{' or '?'
 2692                             # to allow sub-alignments within braces.  The
 2693                             # number 5 lines is arbitrary but seems to work ok.
 2694                             $delete_me ||=
 2695                               ( $nlines < 5 || !$saw_dividing_token );
 2696                         }
 2697                     }
 2698 
 2699                     # EXCEPTION 2: Remove all tokens above a certain level
 2700                     # following a previous deletion.  For example, we have to
 2701                     # remove tagged higher level alignment tokens following a
 2702                     # '=>' deletion because the tags of higher level tokens
 2703                     # will now be incorrect. For example, this will prevent
 2704                     # aligning commas as follows after deleting the second '=>'
 2705                     #    $w->insert(
 2706                     #   ListBox => origin => [ 270, 160 ],
 2707                     #   size    => [ 200,           55 ],
 2708                     #    );
 2709                     if ( defined($delete_above_level) ) {
 2710                         if ( $lev > $delete_above_level ) {
 2711                             $delete_me ||= 1;    #$tag;
 2712                         }
 2713                         else { $delete_above_level = undef }
 2714                     }
 2715 
 2716                     # EXCEPTION 3: Remove all but certain tokens after an
 2717                     # assignment deletion.
 2718                     if (
 2719                         $deleted_assignment_token
 2720                         && ( $lev > $group_level
 2721                             || !$keep_after_deleted_assignment{$raw_tok} )
 2722                       )
 2723                     {
 2724                         $delete_me ||= 1;
 2725                     }
 2726 
 2727                     # EXCEPTION 4: Do not touch the first line of a 2 line
 2728                     # terminal match, such as below, because j_terminal has
 2729                     # already been set.
 2730                     #    if ($tag) { $tago = "<$tag>"; $tagc = "</$tag>"; }
 2731                     #    else      { $tago = $tagc = ''; }
 2732                     # But see snippets 'else1.t' and 'else2.t'
 2733                     $delete_me = 0
 2734                       if ( $jj == $jbeg
 2735                         && $has_terminal_match
 2736                         && $nlines == 2 );
 2737 
 2738                     # EXCEPTION 5: misc additional rules for commas and equals
 2739                     if ($delete_me) {
 2740 
 2741                         # okay to delete second and higher copies of a token
 2742                         if ( $tok_count == 1 ) {
 2743 
 2744                             # for a comma...
 2745                             if ( $raw_tok eq ',' ) {
 2746 
 2747                                 # Do not delete commas before an equals
 2748                                 $delete_me = 0
 2749                                   if ( defined($i_eq) && $i < $i_eq );
 2750 
 2751                                 # Do not delete line-level commas
 2752                                 $delete_me = 0 if ( $lev <= $group_level );
 2753                             }
 2754 
 2755                             # For an assignment at group level..
 2756                             if (   $is_assignment{$raw_tok}
 2757                                 && $lev == $group_level )
 2758                             {
 2759 
 2760                                 # Do not delete if it is the last alignment of
 2761                                 # multiple tokens; this will prevent some
 2762                                 # undesirable alignments
 2763                                 if ( $imax > 0 && $i == $imax ) {
 2764                                     $delete_me = 0;
 2765                                 }
 2766 
 2767                                 # Otherwise, set a flag to delete most
 2768                                 # remaining tokens
 2769                                 else { $deleted_assignment_token = $raw_tok }
 2770                             }
 2771                         }
 2772                     }
 2773 
 2774                     #####################################
 2775                     # Add this token to the deletion list
 2776                     #####################################
 2777                     if ($delete_me) {
 2778                         push @idel, $i;
 2779 
 2780                         # update deletion propagation flags
 2781                         if ( !defined($delete_above_level)
 2782                             || $lev < $delete_above_level )
 2783                         {
 2784 
 2785                             # delete all following higher level alignments
 2786                             $delete_above_level = $lev;
 2787 
 2788                             # but keep deleting after => to next lower level
 2789                             # to avoid some bizarre alignments
 2790                             if ( $raw_tok eq '=>' ) {
 2791                                 $delete_above_level = $lev - 1;
 2792                             }
 2793                         }
 2794                     }
 2795                 }    # End loop over alignment tokens
 2796 
 2797                 # Process all deletion requests for this line
 2798                 if (@idel) {
 2799                     delete_selected_tokens( $line, \@idel );
 2800                 }
 2801             }    # End loopover lines
 2802         }    # End loop over subgroups
 2803 
 2804         #################################################
 2805         # PASS 2 over subgroups to remove null alignments
 2806         #################################################
 2807 
 2808         # This pass is only used for testing. It is helping to identify
 2809         # alignment situations which might be improved with a future more
 2810         # general algorithm which adds a tail matching capability.
 2811         if (TEST_DELETE_NULL) {
 2812             delete_null_alignments( $rnew_lines, $rline_hashes, \@subgroups )
 2813               if ($saw_large_group);
 2814         }
 2815 
 2816         # PASS 3: Construct a tree of matched lines and delete some small deeper
 2817         # levels of tokens.  They also block good alignments.
 2818         prune_alignment_tree($rnew_lines) if ($max_lev_diff);
 2819 
 2820         # PASS 4: compare all lines for common tokens
 2821         match_line_pairs( $rlines, $rnew_lines, \@subgroups, $group_level );
 2822 
 2823         return ( $max_lev_diff, $saw_side_comment );
 2824     }
 2825 }
 2826 
 2827 sub delete_null_alignments {
 2828     my ( $rnew_lines, $rline_hashes, $rsubgroups ) = @_;
 2829 
 2830     # This is an optional second pass for deleting alignment tokens which can
 2831     # occasionally improve alignment.  We look for and remove 'null
 2832     # alignments', which are alignments that require no padding.  So we can
 2833     # 'cheat' and delete them. For example, notice the '=~' alignment in the
 2834     # first two lines of the following code:
 2835 
 2836     #    $sysname .= 'del' if $self->label =~ /deletion/;
 2837     #    $sysname .= 'ins' if $self->label =~ /insertion/;
 2838     #    $sysname .= uc $self->allele_ori->seq if $self->allele_ori->seq;
 2839 
 2840     # These '=~' tokens are already aligned because they are both the same
 2841     # distance from the previous alignment token, the 'if'.  So we can
 2842     # eliminate them as alignments.  The advantage is that in some cases, such
 2843     # as this one, this will allow other tokens to be aligned. In this case we
 2844     # then get the 'if' tokens to align:
 2845 
 2846     #   $sysname .= 'del'                     if $self->label =~ /deletion/;
 2847     #   $sysname .= 'ins'                     if $self->label =~ /insertion/;
 2848     #   $sysname .= uc $self->allele_ori->seq if $self->allele_ori->seq;
 2849 
 2850     # The following rules for limiting this operation have been found to
 2851     # work well and avoid problems:
 2852 
 2853     # Rule 1. We only consider a sequence of lines which have the same
 2854     # sequence of alignment tokens.
 2855 
 2856     # Rule 2. We never eliminate the first alignment token.  One reason is that
 2857     # lines may have different leading indentation spaces, so keeping the
 2858     # first alignment token insures that our length measurements start at
 2859     # a well-defined point.  Another reason is that nothing is gained because
 2860     # the left-to-right sweep can always handle alignment of this token.
 2861 
 2862     # Rule 3. We require that the first alignment token exist in either
 2863     # a previous line or a subsequent line.  The reason is that this avoids
 2864     # changing two-line matches which go through special logic.
 2865 
 2866     # Rule 4. Do not delete a token which occurs in a previous or subsequent
 2867     # line. For example, in the above example, it was ok to eliminate the '=~'
 2868     # token from two lines because it did not occur in a surrounding line.
 2869     # If it did occur in a surrounding line, the result could be confusing
 2870     # or even incorrectly aligned.
 2871 
 2872     # A consequence of these rules is that we only need to consider subgroups
 2873     # with at least 3 lines and 2 alignment tokens.
 2874 
 2875     # The subgroup line index range
 2876     my ( $jbeg, $jend );
 2877 
 2878     # Vars to keep track of the start of a current sequence of matching
 2879     # lines.
 2880     my $rtokens_match;
 2881     my $rfield_lengths_match;
 2882     my $j_match_beg;
 2883     my $j_match_end;
 2884     my $imax_match;
 2885     my $rneed_pad;
 2886 
 2887     # Vars for a line being tested
 2888     my $rtokens;
 2889     my $rfield_lengths;
 2890     my $imax;
 2891 
 2892     my $start_match = sub {
 2893         my ($jj) = @_;
 2894         $rtokens_match        = $rtokens;
 2895         $rfield_lengths_match = $rfield_lengths;
 2896         $j_match_beg          = $jj;
 2897         $j_match_end          = $jj;
 2898         $imax_match           = $imax;
 2899         $rneed_pad            = [];
 2900         return;
 2901     };
 2902 
 2903     my $add_to_match = sub {
 2904         my ($jj) = @_;
 2905         $j_match_end = $jj;
 2906 
 2907         # Keep track of any padding that would be needed for each token
 2908         for ( my $i = 0 ; $i <= $imax ; $i++ ) {
 2909             next if ( $rneed_pad->[$i] );
 2910             my $length       = $rfield_lengths->[$i];
 2911             my $length_match = $rfield_lengths_match->[$i];
 2912             if ( $length ne $length_match ) { $rneed_pad->[$i] = 1 }
 2913         }
 2914     };
 2915 
 2916     my $end_match = sub {
 2917         return unless ( $j_match_end > $j_match_beg );
 2918         my $nlines    = $j_match_end - $j_match_beg + 1;
 2919         my $rhash_beg = $rline_hashes->[$j_match_beg];
 2920         my $rhash_end = $rline_hashes->[$j_match_end];
 2921         my @idel;
 2922 
 2923         # Do not delete unless the first token also occurs in a surrounding line
 2924         my $tok0 = $rtokens_match->[0];
 2925         return
 2926           unless (
 2927             (
 2928                    $j_match_beg > $jbeg
 2929                 && $rnew_lines->[ $j_match_beg - 1 ]->get_rtokens()->[0] eq
 2930                 $tok0
 2931             )
 2932             || (   $j_match_end < $jend
 2933                 && $rnew_lines->[ $j_match_end + 1 ]->get_rtokens()->[0] eq
 2934                 $tok0 )
 2935           );
 2936 
 2937         # Note that we are skipping the token at i=0
 2938         for ( my $i = 1 ; $i <= $imax_match ; $i++ ) {
 2939 
 2940             # do not delete a token which requires padding to align
 2941             next if ( $rneed_pad->[$i] );
 2942 
 2943             my $tok = $rtokens_match->[$i];
 2944 
 2945             # Do not delete a token which occurs in a surrounding line
 2946             next
 2947               if ( $j_match_beg > $jbeg
 2948                 && defined( $rline_hashes->[ $j_match_beg - 1 ]->{$tok} ) );
 2949             next
 2950               if ( $j_match_end < $jend
 2951                 && defined( $rline_hashes->[ $j_match_end + 1 ]->{$tok} ) );
 2952 
 2953             # ok to delete
 2954             push @idel, $i;
 2955             ##print "ok to delete tok=$tok\n";
 2956         }
 2957         if (@idel) {
 2958             foreach my $j ( $j_match_beg .. $j_match_end ) {
 2959                 delete_selected_tokens( $rnew_lines->[$j], \@idel );
 2960             }
 2961         }
 2962     };
 2963 
 2964     foreach my $item ( @{$rsubgroups} ) {
 2965         ( $jbeg, $jend ) = @{$item};
 2966         my $nlines = $jend - $jbeg + 1;
 2967         next unless ( $nlines > 2 );
 2968 
 2969         for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
 2970             my $line = $rnew_lines->[$jj];
 2971             $rtokens        = $line->get_rtokens();
 2972             $rfield_lengths = $line->get_rfield_lengths();
 2973             $imax           = @{$rtokens} - 2;
 2974 
 2975             # start a new match group
 2976             if ( $jj == $jbeg ) {
 2977                 $start_match->($jj);
 2978                 next;
 2979             }
 2980 
 2981             # see if all tokens of this line match the current group
 2982             my $match;
 2983             if ( $imax == $imax_match ) {
 2984                 for ( my $i = 0 ; $i <= $imax ; $i++ ) {
 2985                     my $tok       = $rtokens->[$i];
 2986                     my $tok_match = $rtokens_match->[$i];
 2987                     last if ( $tok ne $tok_match );
 2988                 }
 2989                 $match = 1;
 2990             }
 2991 
 2992             # yes, they all match
 2993             if ($match) {
 2994                 $add_to_match->($jj);
 2995             }
 2996 
 2997             # now, this line does not match
 2998             else {
 2999                 $end_match->();
 3000                 $start_match->($jj);
 3001             }
 3002         }    # End loopover lines
 3003         $end_match->();
 3004     }    # End loop over subgroups
 3005     return;
 3006 } ## end sub delete_null_alignments
 3007 
 3008 sub match_line_pairs {
 3009     my ( $rlines, $rnew_lines, $rsubgroups, $group_level ) = @_;
 3010 
 3011     # Compare each pair of lines and save information about common matches
 3012     # $rlines     = list of lines including hanging side comments
 3013     # $rnew_lines = list of lines without any hanging side comments
 3014     # $rsubgroups = list of subgroups of the new lines
 3015 
 3016     # TODO:
 3017     # Maybe change: imax_pair => pair_match_info = ref to array
 3018     #  = [$imax_align, $rMsg, ... ]
 3019     #  This may eventually have multi-level match info
 3020 
 3021     # Previous line vars
 3022     my ( $line_m, $rtokens_m, $rpatterns_m, $rfield_lengths_m, $imax_m,
 3023         $list_type_m, $ci_level_m );
 3024 
 3025     # Current line vars
 3026     my ( $line, $rtokens, $rpatterns, $rfield_lengths, $imax, $list_type,
 3027         $ci_level );
 3028 
 3029     use constant EXPLAIN_COMPARE_PATTERNS => 0;
 3030 
 3031     my $compare_patterns = sub {
 3032 
 3033         # helper routine to decide if patterns match well enough..
 3034         # return code:
 3035         #   0 = patterns match, continue
 3036         #   1 = no match
 3037         #   2 = no match, and lines do not match at all
 3038 
 3039         my ( $tok, $tok_m, $pat, $pat_m, $pad ) = @_;
 3040         my $GoToMsg     = "";
 3041         my $return_code = 1;
 3042 
 3043         my ( $alignment_token, $lev, $tag, $tok_count ) =
 3044           decode_alignment_token($tok);
 3045 
 3046         # We have to be very careful about aligning commas
 3047         # when the pattern's don't match, because it can be
 3048         # worse to create an alignment where none is needed
 3049         # than to omit one.  Here's an example where the ','s
 3050         # are not in named containers.  The first line below
 3051         # should not match the next two:
 3052         #   ( $a, $b ) = ( $b, $r );
 3053         #   ( $x1, $x2 ) = ( $x2 - $q * $x1, $x1 );
 3054         #   ( $y1, $y2 ) = ( $y2 - $q * $y1, $y1 );
 3055         if ( $alignment_token eq ',' ) {
 3056 
 3057             # do not align commas unless they are in named
 3058             # containers
 3059             $GoToMsg = "do not align commas in unnamed containers";
 3060             goto NO_MATCH unless ( $tok =~ /[A-Za-z]/ );
 3061         }
 3062 
 3063         # do not align parens unless patterns match;
 3064         # large ugly spaces can occur in math expressions.
 3065         elsif ( $alignment_token eq '(' ) {
 3066 
 3067             # But we can allow a match if the parens don't
 3068             # require any padding.
 3069             $GoToMsg = "do not align '(' unless patterns match or pad=0";
 3070             if ( $pad != 0 ) { goto NO_MATCH }
 3071         }
 3072 
 3073         # Handle an '=' alignment with different patterns to
 3074         # the left.
 3075         elsif ( $alignment_token eq '=' ) {
 3076 
 3077             # It is best to be a little restrictive when
 3078             # aligning '=' tokens.  Here is an example of
 3079             # two lines that we will not align:
 3080             #       my $variable=6;
 3081             #       $bb=4;
 3082             # The problem is that one is a 'my' declaration,
 3083             # and the other isn't, so they're not very similar.
 3084             # We will filter these out by comparing the first
 3085             # letter of the pattern.  This is crude, but works
 3086             # well enough.
 3087             if ( substr( $pat_m, 0, 1 ) ne substr( $pat, 0, 1 ) ) {
 3088                 $GoToMsg = "first character before equals differ";
 3089                 goto NO_MATCH;
 3090             }
 3091 
 3092             # The introduction of sub 'prune_alignment_tree'
 3093             # enabled alignment of lists left of the equals with
 3094             # other scalar variables. For example:
 3095             # my ( $D, $s, $e ) = @_;
 3096             # my $d             = length $D;
 3097             # my $c             = $e - $s - $d;
 3098 
 3099             # But this would change formatting of a lot of scripts,
 3100             # so for now we prevent alignment of comma lists on the
 3101             # left with scalars on the left.  We will also prevent
 3102             # any partial alignments.
 3103 
 3104           # set return code 2 if the = is at line level, but
 3105           # set return code 1 if the = is below line level, i.e.
 3106           #    sub new { my ( $p, $v ) = @_; bless \$v, $p }
 3107           #    sub iter { my ($x) = @_; return undef if $$x < 0; return $$x--; }
 3108 
 3109             elsif (
 3110                 ( index( $pat_m, ',' ) >= 0 ) ne ( index( $pat, ',' ) >= 0 ) )
 3111             {
 3112                 $GoToMsg = "mixed commas/no-commas before equals";
 3113                 if ( $lev eq $group_level ) {
 3114                     $return_code = 2;
 3115                 }
 3116                 goto NO_MATCH;
 3117             }
 3118         }
 3119 
 3120       MATCH:
 3121         return ( 0, \$GoToMsg );
 3122 
 3123       NO_MATCH:
 3124 
 3125         EXPLAIN_COMPARE_PATTERNS
 3126           && print STDERR "no match because $GoToMsg\n";
 3127 
 3128         return ( $return_code, \$GoToMsg );
 3129 
 3130     };    ## end of $compare_patterns->()
 3131 
 3132     # loop over subgroups
 3133     foreach my $item ( @{$rsubgroups} ) {
 3134         my ( $jbeg, $jend ) = @{$item};
 3135         my $nlines = $jend - $jbeg + 1;
 3136         next unless ( $nlines > 1 );
 3137 
 3138         # loop over lines in a subgroup
 3139         for ( my $jj = $jbeg ; $jj <= $jend ; $jj++ ) {
 3140 
 3141             $line_m           = $line;
 3142             $rtokens_m        = $rtokens;
 3143             $rpatterns_m      = $rpatterns;
 3144             $rfield_lengths_m = $rfield_lengths;
 3145             $imax_m           = $imax;
 3146             $list_type_m      = $list_type;
 3147             $ci_level_m       = $ci_level;
 3148 
 3149             $line           = $rnew_lines->[$jj];
 3150             $rtokens        = $line->get_rtokens();
 3151             $rpatterns      = $line->get_rpatterns();
 3152             $rfield_lengths = $line->get_rfield_lengths();
 3153             $imax           = @{$rtokens} - 2;
 3154             $list_type      = $line->get_list_type();
 3155             $ci_level       = $line->get_ci_level();
 3156 
 3157             # nothing to do for first line
 3158             next if ( $jj == $jbeg );
 3159 
 3160             my $ci_jump = $ci_level - $ci_level_m;
 3161 
 3162             my $imax_min = $imax_m < $imax ? $imax_m : $imax;
 3163 
 3164             my $imax_align = -1;
 3165 
 3166             # find number of leading common tokens
 3167 
 3168             #################################
 3169             # No match to hanging side comment
 3170             #################################
 3171             if ( $line->get_is_hanging_side_comment() ) {
 3172 
 3173                 # Should not get here; HSC's have been filtered out
 3174                 $imax_align = -1;
 3175             }
 3176 
 3177             ##############################
 3178             # Handle comma-separated lists
 3179             ##############################
 3180             elsif ( $list_type && $list_type eq $list_type_m ) {
 3181 
 3182                 # do not align lists across a ci jump with new list method
 3183                 if ($ci_jump) { $imax_min = -1 }
 3184 
 3185                 my $i_nomatch = $imax_min + 1;
 3186                 for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
 3187                     my $tok   = $rtokens->[$i];
 3188                     my $tok_m = $rtokens_m->[$i];
 3189                     if ( $tok ne $tok_m ) {
 3190                         $i_nomatch = $i;
 3191                         last;
 3192                     }
 3193                 }
 3194 
 3195                 $imax_align = $i_nomatch - 1;
 3196             }
 3197 
 3198             ##################
 3199             # Handle non-lists
 3200             ##################
 3201             else {
 3202                 my $i_nomatch = $imax_min + 1;
 3203                 for ( my $i = 0 ; $i <= $imax_min ; $i++ ) {
 3204                     my $tok   = $rtokens->[$i];
 3205                     my $tok_m = $rtokens_m->[$i];
 3206                     if ( $tok ne $tok_m ) {
 3207                         $i_nomatch = $i;
 3208                         last;
 3209                     }
 3210 
 3211                     my $pat   = $rpatterns->[$i];
 3212                     my $pat_m = $rpatterns_m->[$i];
 3213 
 3214                     # If patterns don't match, we have to be careful...
 3215                     if ( $pat_m ne $pat ) {
 3216                         my $pad =
 3217                           $rfield_lengths->[$i] - $rfield_lengths_m->[$i];
 3218                         my ( $match_code, $rmsg ) = $compare_patterns->(
 3219                             $tok, $tok_m, $pat, $pat_m, $pad
 3220                         );
 3221                         if ($match_code) {
 3222                             if    ( $match_code eq 1 ) { $i_nomatch = $i }
 3223                             elsif ( $match_code eq 2 ) { $i_nomatch = 0 }
 3224                             last;
 3225                         }
 3226                     }
 3227                 }
 3228                 $imax_align = $i_nomatch - 1;
 3229             }
 3230 
 3231             $line_m->set_imax_pair($imax_align);
 3232 
 3233         } ## end loop over lines
 3234 
 3235         # Put fence at end of subgroup
 3236         $line->set_imax_pair(-1);
 3237 
 3238     } ## end loop over subgroups
 3239 
 3240     # if there are hanging side comments, propagate the pair info down to them
 3241     # so that lines can just look back one line for their pair info.
 3242     if ( @{$rlines} > @{$rnew_lines} ) {
 3243         my $last_pair_info = -1;
 3244         foreach my $line ( @{$rlines} ) {
 3245             if ( $line->get_is_hanging_side_comment() ) {
 3246                 $line->set_imax_pair($last_pair_info);
 3247             }
 3248             else {
 3249                 $last_pair_info = $line->get_imax_pair();
 3250             }
 3251         }
 3252     }
 3253     return;
 3254 }
 3255 
 3256 sub fat_comma_to_comma {
 3257     my ($str) = @_;
 3258 
 3259     # We are changing '=>' to ',' and removing any trailing decimal count
 3260     # because currently fat commas have a count and commas do not.
 3261     # For example, we will change '=>2+{-3.2' into ',2+{-3'
 3262     if ( $str =~ /^=>([^\.]*)/ ) { $str = ',' . $1 }
 3263     return $str;
 3264 }
 3265 
 3266 sub get_line_token_info {
 3267 
 3268     # scan lines of tokens and return summary information about the range of
 3269     # levels and patterns.
 3270     my ($rlines) = @_;
 3271 
 3272     # First scan to check monotonicity. Here is an example of several
 3273     # lines which are monotonic. The = is the lowest level, and
 3274     # the commas are all one level deeper. So this is not nonmonotonic.
 3275     #  $$d{"weeks"}   = [ "w",  "wk",  "wks", "week", "weeks" ];
 3276     #  $$d{"days"}    = [ "d",  "day", "days" ];
 3277     #  $$d{"hours"}   = [ "h",  "hr",  "hrs", "hour", "hours" ];
 3278     my @all_token_info;
 3279     my $all_monotonic = 1;
 3280     for ( my $jj = 0 ; $jj < @{$rlines} ; $jj++ ) {
 3281         my ($line) = $rlines->[$jj];
 3282         my $rtokens = $line->get_rtokens();
 3283         my $last_lev;
 3284         my $is_monotonic = 1;
 3285         my $i            = -1;
 3286         foreach my $tok ( @{$rtokens} ) {
 3287             $i++;
 3288             my ( $raw_tok, $lev, $tag, $tok_count ) =
 3289               decode_alignment_token($tok);
 3290             push @{ $all_token_info[$jj] },
 3291               [ $raw_tok, $lev, $tag, $tok_count ];
 3292             last if ( $tok eq '#' );
 3293             if ( $i > 0 && $lev < $last_lev ) { $is_monotonic = 0 }
 3294             $last_lev = $lev;
 3295         }
 3296         if ( !$is_monotonic ) { $all_monotonic = 0 }
 3297     }
 3298 
 3299     my $rline_values = [];
 3300     for ( my $jj = 0 ; $jj < @{$rlines} ; $jj++ ) {
 3301         my ($line) = $rlines->[$jj];
 3302 
 3303         my $rtokens = $line->get_rtokens();
 3304         my $i       = -1;
 3305         my ( $lev_min, $lev_max );
 3306         my $token_pattern_max = "";
 3307         my %saw_level;
 3308         my @token_info;
 3309         my $is_monotonic = 1;
 3310 
 3311         # find the index of the last token before the side comment
 3312         my $imax      = @{$rtokens} - 2;
 3313         my $imax_true = $imax;
 3314 
 3315         # If the entire group is monotonic, and the line ends in a comma list,
 3316         # walk it back to the first such comma. this will have the effect of
 3317         # making all trailing ragged comma lists match in the prune tree
 3318         # routine.  these trailing comma lists can better be handled by later
 3319         # alignment rules.
 3320 
 3321         # Treat fat commas the same as commas here by converting them to
 3322         # commas.  This will improve the chance of aligning the leading parts
 3323         # of ragged lists.
 3324 
 3325         my $tok_end = fat_comma_to_comma( $rtokens->[$imax] );
 3326         if ( $all_monotonic && $tok_end =~ /^,/ ) {
 3327             my $i = $imax - 1;
 3328             while ( $i >= 0
 3329                 && fat_comma_to_comma( $rtokens->[$i] ) eq $tok_end )
 3330             {
 3331                 $imax = $i;
 3332                 $i--;
 3333             }
 3334         }
 3335 
 3336         # make a first pass to find level range
 3337         my $last_lev;
 3338         foreach my $tok ( @{$rtokens} ) {
 3339             $i++;
 3340             last if ( $i > $imax );
 3341             last if ( $tok eq '#' );
 3342             my ( $raw_tok, $lev, $tag, $tok_count ) =
 3343               @{ $all_token_info[$jj]->[$i] };
 3344 
 3345             last if ( $tok eq '#' );
 3346             $token_pattern_max .= $tok;
 3347             $saw_level{$lev}++;
 3348             if ( !defined($lev_min) ) {
 3349                 $lev_min = $lev;
 3350                 $lev_max = $lev;
 3351             }
 3352             else {
 3353                 if ( $lev < $lev_min )  { $lev_min      = $lev; }
 3354                 if ( $lev > $lev_max )  { $lev_max      = $lev; }
 3355                 if ( $lev < $last_lev ) { $is_monotonic = 0 }
 3356             }
 3357             $last_lev = $lev;
 3358         }
 3359 
 3360         # handle no levels
 3361         my $rtoken_patterns = {};
 3362         my $rtoken_indexes  = {};
 3363         my @levs            = sort keys %saw_level;
 3364         if ( !defined($lev_min) ) {
 3365             $lev_min                     = -1;
 3366             $lev_max                     = -1;
 3367             $levs[0]                     = -1;
 3368             $rtoken_patterns->{$lev_min} = "";
 3369             $rtoken_indexes->{$lev_min}  = [];
 3370         }
 3371 
 3372         # handle one level
 3373         elsif ( $lev_max == $lev_min ) {
 3374             $rtoken_patterns->{$lev_max} = $token_pattern_max;
 3375             $rtoken_indexes->{$lev_max}  = [ ( 0 .. $imax ) ];
 3376         }
 3377 
 3378         # handle multiple levels
 3379         else {
 3380             $rtoken_patterns->{$lev_max} = $token_pattern_max;
 3381             $rtoken_indexes->{$lev_max}  = [ ( 0 .. $imax ) ];
 3382 
 3383             my $debug   = 0;
 3384             my $lev_top = pop @levs;    # alread did max level
 3385             my $itok    = -1;
 3386             foreach my $tok ( @{$rtokens} ) {
 3387                 $itok++;
 3388                 last if ( $itok > $imax );
 3389                 my ( $raw_tok, $lev, $tag, $tok_count ) =
 3390                   @{ $all_token_info[$jj]->[$itok] };
 3391                 last if ( $raw_tok eq '#' );
 3392                 foreach my $lev_test (@levs) {
 3393                     next if ( $lev > $lev_test );
 3394                     $rtoken_patterns->{$lev_test} .= $tok;
 3395                     push @{ $rtoken_indexes->{$lev_test} }, $itok;
 3396                 }
 3397             }
 3398             push @levs, $lev_top;
 3399         }
 3400 
 3401         push @{$rline_values},
 3402           [
 3403             $lev_min,        $lev_max,      $rtoken_patterns, \@levs,
 3404             $rtoken_indexes, $is_monotonic, $imax_true,       $imax,
 3405           ];
 3406 
 3407         # debug
 3408         0 && do {
 3409             local $" = ')(';
 3410             print "lev_min=$lev_min, lev_max=$lev_max, levels=(@levs)\n";
 3411             foreach my $key ( sort keys %{$rtoken_patterns} ) {
 3412                 print "$key => $rtoken_patterns->{$key}\n";
 3413                 print "$key => @{$rtoken_indexes->{$key}}\n";
 3414             }
 3415         };
 3416     } ## end loop over lines
 3417     return ( $rline_values, $all_monotonic );
 3418 }
 3419 
 3420 sub prune_alignment_tree {
 3421     my ($rlines) = @_;
 3422     my $jmax = @{$rlines} - 1;
 3423     return unless $jmax > 0;
 3424 
 3425     # Vertical alignment in perltidy is done as an iterative process.  The
 3426     # starting point is to mark all possible alignment tokens ('=', ',', '=>',
 3427     # etc) for vertical alignment.  Then we have to delete all alignments
 3428     # which, if actually made, would detract from overall alignment.  This
 3429     # is done in several phases of which this is one.
 3430 
 3431     # In this routine we look at the alignments of a group of lines as a
 3432     # hierarchical tree.  We will 'prune' the tree to limited depths if that
 3433     # will improve overall alignment at the lower depths.
 3434     # For each line we will be looking at its alignment patterns down to
 3435     # different fixed depths. For each depth, we include all lower depths and
 3436     # ignore all higher depths.  We want to see if we can get alignment of a
 3437     # larger group of lines if we ignore alignments at some lower depth.
 3438     # Here is an # example:
 3439 
 3440     # for (
 3441     #     [ '$var',     sub { join $_, "bar" },            0, "bar" ],
 3442     #     [ 'CONSTANT', sub { join "foo", "bar" },         0, "bar" ],
 3443     #     [ 'CONSTANT', sub { join "foo", "bar", 3 },      1, "barfoo3" ],
 3444     #     [ '$myvar',   sub { my $var; join $var, "bar" }, 0, "bar" ],
 3445     # );
 3446 
 3447     # In the above example, all lines have three commas at the lowest depth
 3448     # (zero), so if there were no other alignements, these lines would all
 3449     # align considering only the zero depth alignment token.  But some lines
 3450     # have additional comma alignments at the next depth, so we need to decide
 3451     # if we should drop those to keep the top level alignments, or keep those
 3452     # for some additional low level alignments at the expense losing some top
 3453     # level alignments.  In this case we will drop the deeper level commas to
 3454     # keep the entire collection aligned.  But in some cases the decision could
 3455     # go the other way.
 3456 
 3457     # The tree for this example at the zero depth has one node containing
 3458     # all four lines, since they are identical at zero level (three commas).
 3459     # At depth one, there are three 'children' nodes, namely:
 3460     # - lines 1 and 2, which have a single comma in the 'sub' at depth 1
 3461     # - line 3, which has 2 commas at depth 1
 3462     # - line4, which has a ';' and a ',' at depth 1
 3463     # There are no deeper alignments in this example.
 3464     # so the tree structure for this example is:
 3465     #
 3466     #    depth 0         depth 1      depth 2
 3467     #    [lines 1-4] --  [line 1-2] -  (empty)
 3468     #                 |  [line 3]   -  (empty)
 3469     #                 |  [line 4]   -  (empty)
 3470 
 3471     # We can carry this to any depth, but it is not really useful to go below
 3472     # depth 2. To cleanly stop there, we will consider depth 2 to contain all
 3473     # alignments at depth >=2.
 3474 
 3475     use constant EXPLAIN_PRUNE => 0;
 3476 
 3477     ####################################################################
 3478     # Prune Tree Step 1. Start by scanning the lines and collecting info
 3479     ####################################################################
 3480 
 3481     # Note that the caller had this info but we have to redo this now because
 3482     # alignment tokens may have been deleted.
 3483     my ( $rline_values, $all_monotonic ) = get_line_token_info($rlines);
 3484 
 3485     # If all the lines have levels which increase monotonically from left to
 3486     # right, then the sweep-left-to-right pass can do a better job of alignment
 3487     # than pruning, and without deleting alignments.
 3488     return if ($all_monotonic);
 3489 
 3490     # Contents of $rline_values
 3491     #   [
 3492     #     $lev_min,        $lev_max,      $rtoken_patterns, \@levs,
 3493     #     $rtoken_indexes, $is_monotonic, $imax_true,       $imax,
 3494     #   ];
 3495 
 3496     # We can work to any depth, but there is little advantage to working
 3497     # to a a depth greater than 2
 3498     my $MAX_DEPTH = 2;
 3499 
 3500     # This arrays will hold the tree of alignment tokens at different depths
 3501     # for these lines.
 3502     my @match_tree;
 3503 
 3504     # Tree nodes contain these values:
 3505     # $match_tree[$depth] = [$jbeg, $jend, $n_parent, $level, $pattern,
 3506     #                        $nc_beg_p, $nc_end_p, $rindexes];
 3507     # where
 3508     #      $depth = 0,1,2 = index of depth of the match
 3509 
 3510     #  $jbeg beginning index j of the range of lines in this match
 3511     #  $jend ending index j of the range of lines in this match
 3512     #  $n_parent = index of the containing group at $depth-1, if it exists
 3513     #  $level = actual level of code being matched in this group
 3514     #  $pattern = alignment pattern being matched
 3515     #  $nc_beg_p = first child
 3516     #  $nc_end_p = last child
 3517     #  $rindexes = ref to token indexes
 3518 
 3519     # the patterns and levels of the current group being formed at each depth
 3520     my ( @token_patterns_current, @levels_current, @token_indexes_current );
 3521 
 3522     # the patterns and levels of the next line being tested at each depth
 3523     my ( @token_patterns_next, @levels_next, @token_indexes_next );
 3524 
 3525     #########################################################
 3526     # define a recursive worker subroutine for tree construction
 3527     #########################################################
 3528 
 3529     # This is a recursive routine which is called if a match condition changes
 3530     # at any depth when a new line is encountered.  It ends the match node
 3531     # which changed plus all deeper nodes attached to it.
 3532     my $end_node;
 3533     $end_node = sub {
 3534         my ( $depth, $jl, $n_parent ) = @_;
 3535 
 3536         # $depth is the tree depth
 3537         # $jl is the  index of the line
 3538         # $n_parent is index of the parent node of this node
 3539 
 3540         return if ( $depth > $MAX_DEPTH );
 3541 
 3542         # end any current group at this depth
 3543         if (   $jl >= 0
 3544             && defined( $match_tree[$depth] )
 3545             && @{ $match_tree[$depth] }
 3546             && defined( $levels_current[$depth] ) )
 3547         {
 3548             $match_tree[$depth]->[-1]->[1] = $jl;
 3549         }
 3550 
 3551         # Define the index of the node we will create below
 3552         my $ng_self = 0;
 3553         if ( defined( $match_tree[$depth] ) ) {
 3554             $ng_self = @{ $match_tree[$depth] };
 3555         }
 3556 
 3557         # end any next deeper child node(s)
 3558         $end_node->( $depth + 1, $jl, $ng_self );
 3559 
 3560         # update the levels being matched
 3561         $token_patterns_current[$depth] = $token_patterns_next[$depth];
 3562         $token_indexes_current[$depth]  = $token_indexes_next[$depth];
 3563         $levels_current[$depth]         = $levels_next[$depth];
 3564 
 3565         # Do not start a new group at this level if it is not being used
 3566         if ( !defined( $levels_next[$depth] )
 3567             || $depth > 0
 3568             && $levels_next[$depth] <= $levels_next[ $depth - 1 ] )
 3569         {
 3570             return;
 3571         }
 3572 
 3573         # Create a node for the next group at this depth. We initially assume
 3574         # that it will continue to $jmax, and correct that later if the node
 3575         # ends earlier.
 3576         push @{ $match_tree[$depth] },
 3577           [
 3578             $jl + 1, $jmax, $n_parent, $levels_current[$depth],
 3579             $token_patterns_current[$depth],
 3580             undef, undef, $token_indexes_current[$depth],
 3581           ];
 3582 
 3583         return;
 3584     };    ## end sub end_node
 3585 
 3586     ######################################################
 3587     # Prune Tree Step 2. Loop to form the tree of matches.
 3588     ######################################################
 3589     for ( my $jp = 0 ; $jp <= $jmax ; $jp++ ) {
 3590 
 3591         # working with two adjacent line indexes, 'm'=minus, 'p'=plus
 3592         my $jm = $jp - 1;
 3593 
 3594         # Pull out needed values for the next line
 3595         my ( $lev_min, $lev_max, $rtoken_patterns, $rlevs, $rtoken_indexes,
 3596             $is_monotonic, $imax_true, $imax )
 3597           = @{ $rline_values->[$jp] };
 3598 
 3599         # Transfer levels and patterns for this line to the working arrays.
 3600         # If the number of levels differs from our chosen MAX_DEPTH ...
 3601         # if fewer than MAX_DEPTH: leave levels at missing depths undefined
 3602         # if more than MAX_DEPTH: set the MAX_DEPTH level to be the maximum
 3603         @levels_next = @{$rlevs}[ 0 .. $MAX_DEPTH ];
 3604         if ( @{$rlevs} > $MAX_DEPTH ) {
 3605             $levels_next[$MAX_DEPTH] = $rlevs->[-1];
 3606         }
 3607         my $depth = 0;
 3608         foreach (@levels_next) {
 3609             $token_patterns_next[$depth] =
 3610               defined($_) ? $rtoken_patterns->{$_} : undef;
 3611             $token_indexes_next[$depth] =
 3612               defined($_) ? $rtoken_indexes->{$_} : undef;
 3613             $depth++;
 3614         }
 3615 
 3616         # Look for a change in match groups...
 3617 
 3618         # Initialize on the first line
 3619         if ( $jp == 0 ) {
 3620             my $n_parent;
 3621             $end_node->( 0, $jm, $n_parent );
 3622         }
 3623 
 3624         # End groups if a hard flag has been set
 3625         elsif ( $rlines->[$jm]->get_end_group() ) {
 3626             my $n_parent;
 3627             $end_node->( 0, $jm, $n_parent );
 3628         }
 3629 
 3630         # Continue at hanging side comment
 3631         elsif ( $rlines->[$jp]->get_is_hanging_side_comment() ) {
 3632             next;
 3633         }
 3634 
 3635         # Otherwise see if anything changed and update the tree if so
 3636         else {
 3637             foreach my $depth ( 0 .. $MAX_DEPTH ) {
 3638 
 3639                 my $def_current = defined( $token_patterns_current[$depth] );
 3640                 my $def_next    = defined( $token_patterns_next[$depth] );
 3641                 last unless ( $def_current || $def_next );
 3642                 if (   !$def_current
 3643                     || !$def_next
 3644                     || $token_patterns_current[$depth] ne
 3645                     $token_patterns_next[$depth] )
 3646                 {
 3647                     my $n_parent;
 3648                     if ( $depth > 0 && defined( $match_tree[ $depth - 1 ] ) ) {
 3649                         $n_parent = @{ $match_tree[ $depth - 1 ] } - 1;
 3650                     }
 3651                     $end_node->( $depth, $jm, $n_parent );
 3652                     last;
 3653                 }
 3654             }
 3655         }
 3656     } ## end loop to form tree of matches
 3657 
 3658     ##########################################################
 3659     # Prune Tree Step 3. Make links from parent to child nodes
 3660     ##########################################################
 3661 
 3662     # It seemed cleaner to do this as a separate step rather than during tree
 3663     # construction.  The children nodes have links up to the parent node which
 3664     # created them.  Now make links in the opposite direction, so the parents
 3665     # can find the children.  We store the range of children nodes ($nc_beg,
 3666     # $nc_end) of each parent with two additional indexes in the orignal array.
 3667     # These will be undef if no children.
 3668     for ( my $depth = $MAX_DEPTH ; $depth > 0 ; $depth-- ) {
 3669         next unless defined( $match_tree[$depth] );
 3670         my $nc_max = @{ $match_tree[$depth] } - 1;
 3671         my $np_now;
 3672         foreach my $nc ( 0 .. $nc_max ) {
 3673             my $np = $match_tree[$depth]->[$nc]->[2];
 3674             if ( !defined($np) ) {
 3675 
 3676                 # shouldn't happen
 3677                 #print STDERR "lost child $np at depth $depth\n";
 3678                 next;
 3679             }
 3680             if ( !defined($np_now) || $np != $np_now ) {
 3681                 $np_now = $np;
 3682                 $match_tree[ $depth - 1 ]->[$np]->[5] = $nc;
 3683             }
 3684             $match_tree[ $depth - 1 ]->[$np]->[6] = $nc;
 3685         }
 3686     } ## end loop to make links down to the child nodes
 3687 
 3688     EXPLAIN_PRUNE > 0 && do {
 3689         print "Tree complete. Found these groups:\n";
 3690         foreach my $depth ( 0 .. $MAX_DEPTH ) {
 3691             Dump_tree_groups( \@{ $match_tree[$depth] }, "depth=$depth" );
 3692         }
 3693     };
 3694 
 3695     #######################################################
 3696     # Prune Tree Step 4. Make a list of nodes to be deleted
 3697     #######################################################
 3698 
 3699     #  list of lines with tokens to be deleted:
 3700     #  [$jbeg, $jend, $level_keep]
 3701     #  $jbeg..$jend is the range of line indexes,
 3702     #  $level_keep is the minimum level to keep
 3703     my @delete_list;
 3704 
 3705     #  Groups with ending comma lists and their range of sizes:
 3706     #  $ragged_comma_group{$id} = [ imax_group_min, imax_group_max ]
 3707     my %ragged_comma_group;
 3708 
 3709     # Define a threshold line count for forcing a break
 3710     my $nlines_break = 3;
 3711 
 3712     # We work with a list of nodes to visit at the next deeper depth.
 3713     my @todo_list;
 3714     if ( defined( $match_tree[0] ) ) {
 3715         @todo_list = ( 0 .. @{ $match_tree[0] } - 1 );
 3716     }
 3717 
 3718     for ( my $depth = 0 ; $depth <= $MAX_DEPTH ; $depth++ ) {
 3719         last unless (@todo_list);
 3720         my @todo_next;
 3721         foreach my $np (@todo_list) {
 3722             my ( $jbeg_p, $jend_p, $np_p, $lev_p, $pat_p, $nc_beg_p, $nc_end_p,
 3723                 $rindexes_p )
 3724               = @{ $match_tree[$depth]->[$np] };
 3725             my $nlines_p = $jend_p - $jbeg_p + 1;
 3726 
 3727             # nothing to do if no children
 3728             next unless defined($nc_beg_p);
 3729 
 3730             # Define the number of lines to either keep or delete a child node.
 3731             # This is the key decision we have to make.  We want to delete
 3732             # short runs of matched lines, and keep long runs.  It seems easier
 3733             # for the eye to follow breaks in monotonic level changes than
 3734             # non-monotonic level changes.  For example, the following looks
 3735             # best if we delete the lower level alignments:
 3736 
 3737             #  [1]                  ~~ [];
 3738             #  [ ["foo"], ["bar"] ] ~~ [ qr/o/, qr/a/ ];
 3739             #  [ qr/o/, qr/a/ ]     ~~ [ ["foo"], ["bar"] ];
 3740             #  [ "foo", "bar" ]     ~~ [ qr/o/, qr/a/ ];
 3741             #  [ qr/o/, qr/a/ ]     ~~ [ "foo", "bar" ];
 3742             #  $deep1               ~~ $deep1;
 3743 
 3744             # So we will use two thresholds.
 3745             my $nmin_mono     = $depth + 2;
 3746             my $nmin_non_mono = $depth + 6;
 3747             if ( $nmin_mono > $nlines_p - 1 ) {
 3748                 $nmin_mono = $nlines_p - 1;
 3749             }
 3750             if ( $nmin_non_mono > $nlines_p - 1 ) {
 3751                 $nmin_non_mono = $nlines_p - 1;
 3752             }
 3753 
 3754             # loop to keep or delete each child node
 3755             foreach my $nc ( $nc_beg_p .. $nc_end_p ) {
 3756                 my ( $jbeg_c, $jend_c, $np_c, $lev_c, $pat_c, $nc_beg_c,
 3757                     $nc_end_c )
 3758                   = @{ $match_tree[ $depth + 1 ]->[$nc] };
 3759                 my $nlines_c     = $jend_c - $jbeg_c + 1;
 3760                 my $is_monotonic = $rline_values->[$jbeg_c]->[5];
 3761                 my $nmin         = $is_monotonic ? $nmin_mono : $nmin_non_mono;
 3762                 if ( $nlines_c < $nmin ) {
 3763 ##print "deleting child, nlines=$nlines_c, nmin=$nmin\n";
 3764                     push @delete_list, [ $jbeg_c, $jend_c, $lev_p ];
 3765                 }
 3766                 else {
 3767 ##print "keeping child, nlines=$nlines_c, nmin=$nmin\n";
 3768                     push @todo_next, $nc;
 3769                 }
 3770             }
 3771         }
 3772         @todo_list = @todo_next;
 3773     } ## end loop to mark nodes to delete
 3774 
 3775     #############################################################
 3776     # Prune Tree Step 5. Loop to delete selected alignment tokens
 3777     #############################################################
 3778     foreach my $item (@delete_list) {
 3779         my ( $jbeg, $jend, $level_keep ) = @{$item};
 3780         foreach my $jj ( $jbeg .. $jend ) {
 3781             my $line = $rlines->[$jj];
 3782             my @idel;
 3783             my $rtokens = $line->get_rtokens();
 3784             my $imax    = @{$rtokens} - 2;
 3785             for ( my $i = 0 ; $i <= $imax ; $i++ ) {
 3786                 my $tok = $rtokens->[$i];
 3787                 my ( $raw_tok, $lev, $tag, $tok_count ) =
 3788                   decode_alignment_token($tok);
 3789                 if ( $lev > $level_keep ) {
 3790                     push @idel, $i;
 3791                 }
 3792             }
 3793             if (@idel) {
 3794                 delete_selected_tokens( $line, \@idel );
 3795             }
 3796         }
 3797     } ## end loop to delete selected alignment tokens
 3798 
 3799     return;
 3800 } ## end sub prune_alignment_tree
 3801 
 3802 sub Dump_tree_groups {
 3803     my ( $rgroup, $msg ) = @_;
 3804     print "$msg\n";
 3805     local $" = ')(';
 3806     foreach my $item ( @{$rgroup} ) {
 3807         my @fix = @{$item};
 3808         foreach (@fix) { $_ = "undef" unless defined $_; }
 3809         $fix[4] = "...";
 3810         print "(@fix)\n";
 3811     }
 3812     return;
 3813 }
 3814 
 3815 {    ## closure for sub is_marginal_match
 3816 
 3817     my %is_if_or;
 3818     my %is_assignment;
 3819     my %is_good_alignment;
 3820 
 3821     # This test did not give sufficiently better results to use as an update,
 3822     # but the flag is worth keeping as a starting point for future testing.
 3823     use constant TEST_MARGINAL_EQ_ALIGNMENT => 0;
 3824 
 3825     BEGIN {
 3826 
 3827         my @q = qw(
 3828           if unless or ||
 3829         );
 3830         @is_if_or{@q} = (1) x scalar(@q);
 3831 
 3832         @q = qw(
 3833           = **= += *= &= <<= &&=
 3834           -= /= |= >>= ||= //=
 3835           .= %= ^=
 3836           x=
 3837         );
 3838         @is_assignment{@q} = (1) x scalar(@q);
 3839 
 3840         # Vertically aligning on certain "good" tokens is usually okay
 3841         # so we can be less restrictive in marginal cases.
 3842         @q = qw( { ? => = );
 3843         push @q, (',');
 3844         @is_good_alignment{@q} = (1) x scalar(@q);
 3845     }
 3846 
 3847     sub is_marginal_match {
 3848 
 3849         my ( $line_0, $line_1, $group_level, $imax_align, $imax_prev ) = @_;
 3850 
 3851         # Decide if we should undo some or all of the common alignments of a
 3852         # group of just two lines.
 3853 
 3854         # Given:
 3855         #   $line_0 and $line_1 - the two lines
 3856         #   $group_level = the indentation level of the group being processed
 3857         #   $imax_align = the maximum index of the common alignment tokens
 3858         #                 of the two lines
 3859         #   $imax_prev  = the maximum index of the common alignment tokens
 3860         #                 with the line before $line_0 (=-1 of does not exist)
 3861 
 3862         # Return:
 3863         #   $is_marginal = true if the two lines should NOT be fully aligned
 3864         #                = false if the two lines can remain fully aligned
 3865         #   $imax_align  = the index of the highest alignment token shared by
 3866         #                  these two lines to keep if the match is marginal.
 3867 
 3868         # When we have an alignment group of just two lines like this, we are
 3869         # working in the twilight zone of what looks good and what looks bad.
 3870         # This routine is a collection of rules which work have been found to
 3871         # work fairly well, but it will need to be updated from time to time.
 3872 
 3873         my $is_marginal = 0;
 3874 
 3875         # always keep alignments of a terminal else or ternary
 3876         goto RETURN if ( defined( $line_1->get_j_terminal_match() ) );
 3877 
 3878         # always align lists
 3879         my $group_list_type = $line_0->get_list_type();
 3880         goto RETURN if ($group_list_type);
 3881 
 3882         # always align hanging side comments
 3883         my $is_hanging_side_comment = $line_1->get_is_hanging_side_comment();
 3884         goto RETURN if ($is_hanging_side_comment);
 3885 
 3886         my $jmax_0           = $line_0->get_jmax();
 3887         my $jmax_1           = $line_1->get_jmax();
 3888         my $rtokens_1        = $line_1->get_rtokens();
 3889         my $rtokens_0        = $line_0->get_rtokens();
 3890         my $rfield_lengths_0 = $line_0->get_rfield_lengths();
 3891         my $rfield_lengths_1 = $line_1->get_rfield_lengths();
 3892         my $rpatterns_0      = $line_0->get_rpatterns();
 3893         my $rpatterns_1      = $line_1->get_rpatterns();
 3894         my $imax_next        = $line_1->get_imax_pair();
 3895 
 3896         # We will scan the alignment tokens and set a flag '$is_marginal' if
 3897         # it seems that the an alignment would look bad.
 3898         my $max_pad            = 0;
 3899         my $saw_good_alignment = 0;
 3900         my $saw_if_or;        # if we saw an 'if' or 'or' at group level
 3901         my $raw_tokb = "";    # first token seen at group level
 3902         my $jfirst_bad;
 3903         my $line_ending_fat_comma;    # is last token just a '=>' ?
 3904         my $j0_eq_pad;
 3905         my $j0_max_pad = 0;
 3906 
 3907         for ( my $j = 0 ; $j < $jmax_1 - 1 ; $j++ ) {
 3908             my ( $raw_tok, $lev, $tag, $tok_count ) =
 3909               decode_alignment_token( $rtokens_1->[$j] );
 3910             if ( $raw_tok && $lev == $group_level ) {
 3911                 if ( !$raw_tokb ) { $raw_tokb = $raw_tok }
 3912                 $saw_if_or ||= $is_if_or{$raw_tok};
 3913             }
 3914 
 3915             # When the first of the two lines ends in a bare '=>' this will
 3916             # probably be marginal match.  (For a bare =>, the next field length
 3917             # will be 2 or 3, depending on side comment)
 3918             $line_ending_fat_comma =
 3919                  $j == $jmax_1 - 2
 3920               && $raw_tok eq '=>'
 3921               && $rfield_lengths_0->[ $j + 1 ] <= 3;
 3922 
 3923             my $pad = $rfield_lengths_1->[$j] - $rfield_lengths_0->[$j];
 3924             if ( $j == 0 ) {
 3925                 $pad += $line_1->get_leading_space_count() -
 3926                   $line_0->get_leading_space_count();
 3927 
 3928                 # Remember the pad at a leading equals
 3929                 if ( $raw_tok eq '=' && $lev == $group_level ) {
 3930                     $j0_eq_pad = $pad;
 3931                     $j0_max_pad =
 3932                       0.5 * ( $rfield_lengths_1->[0] + $rfield_lengths_0->[0] );
 3933                     $j0_max_pad = 4 if ( $j0_max_pad < 4 );
 3934                 }
 3935             }
 3936 
 3937             if ( $pad < 0 )        { $pad     = -$pad }
 3938             if ( $pad > $max_pad ) { $max_pad = $pad }
 3939             if ( $is_good_alignment{$raw_tok} && !$line_ending_fat_comma ) {
 3940                 $saw_good_alignment = 1;
 3941             }
 3942             else {
 3943                 $jfirst_bad = $j unless defined($jfirst_bad);
 3944             }
 3945             if ( $rpatterns_0->[$j] ne $rpatterns_1->[$j] ) {
 3946 
 3947                 # Flag this as a marginal match since patterns differ.
 3948                 # Normally, we will not allow just two lines to match if
 3949                 # marginal. But we can allow matching in some specific cases.
 3950 
 3951                 $jfirst_bad  = $j if ( !defined($jfirst_bad) );
 3952                 $is_marginal = 1  if ( $is_marginal == 0 );
 3953                 if ( $raw_tok eq '=' ) {
 3954 
 3955                     # Here is an example of a marginal match:
 3956                     #       $done{$$op} = 1;
 3957                     #       $op         = compile_bblock($op);
 3958                     # The left tokens are both identifiers, but
 3959                     # one accesses a hash and the other doesn't.
 3960                     # We'll let this be a tentative match and undo
 3961                     # it later if we don't find more than 2 lines
 3962                     # in the group.
 3963                     $is_marginal = 2;
 3964                 }
 3965             }
 3966         }
 3967 
 3968         $is_marginal = 1 if ( $is_marginal == 0 && $line_ending_fat_comma );
 3969 
 3970         # Turn off the "marginal match" flag in some cases...
 3971         # A "marginal match" occurs when the alignment tokens agree
 3972         # but there are differences in the other tokens (patterns).
 3973         # If we leave the marginal match flag set, then the rule is that we
 3974         # will align only if there are more than two lines in the group.
 3975         # We will turn of the flag if we almost have a match
 3976         # and either we have seen a good alignment token or we
 3977         # just need a small pad (2 spaces) to fit.  These rules are
 3978         # the result of experimentation.  Tokens which misaligned by just
 3979         # one or two characters are annoying.  On the other hand,
 3980         # large gaps to less important alignment tokens are also annoying.
 3981         if ( $is_marginal == 1
 3982             && ( $saw_good_alignment || $max_pad < 3 ) )
 3983         {
 3984             $is_marginal = 0;
 3985         }
 3986 
 3987         # We will use the line endings to help decide on alignments...
 3988         # See if the lines end with semicolons...
 3989         my $sc_term0;
 3990         my $sc_term1;
 3991         if ( $jmax_0 < 1 || $jmax_1 < 1 ) {
 3992 
 3993             # shouldn't happen
 3994         }
 3995         else {
 3996             my $pat0 = $rpatterns_0->[ $jmax_0 - 1 ];
 3997             my $pat1 = $rpatterns_1->[ $jmax_1 - 1 ];
 3998             $sc_term0 = $pat0 =~ /;b?$/;
 3999             $sc_term1 = $pat1 =~ /;b?$/;
 4000         }
 4001 
 4002         if ( !$is_marginal && !$sc_term0 ) {
 4003 
 4004             # First line of assignment should be semicolon terminated.
 4005             # For example, do not align here:
 4006             #  $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
 4007             #    $$href{-NUM_DIRS} = 0;
 4008             if ( $is_assignment{$raw_tokb} ) {
 4009                 $is_marginal = 1;
 4010             }
 4011         }
 4012 
 4013         # Try to avoid some undesirable alignments of opening tokens
 4014         # for example, the space between grep and { here:
 4015         #  return map { ( $_ => $_ ) }
 4016         #    grep     { /$handles/ } $self->_get_delegate_method_list;
 4017         $is_marginal ||=
 4018              ( $raw_tokb eq '(' || $raw_tokb eq '{' )
 4019           && $jmax_1 == 2
 4020           && $sc_term0 ne $sc_term1;
 4021 
 4022         ########################################
 4023         # return unless this is a marginal match
 4024         ########################################
 4025         goto RETURN if ( !$is_marginal );
 4026 
 4027         # Undo the marginal match flag in certain cases,
 4028 
 4029         # Two lines with a leading equals-like operator are allowed to
 4030         # align if the patterns to the left of the equals are the same.
 4031         # For example the following two lines are a marginal match but have
 4032         # the same left side patterns, so we will align the equals.
 4033         #     my $orig = my $format = "^<<<<< ~~\n";
 4034         #     my $abc  = "abc";
 4035         # But these have a different left pattern so they will not be
 4036         # aligned
 4037         #     $xmldoc .= $`;
 4038         #     $self->{'leftovers'} .= "<bx-seq:seq" . $';
 4039 
 4040         # First line semicolon terminated but second not, usually ok:
 4041         #               my $want = "'ab', 'a', 'b'";
 4042         #               my $got  = join( ", ",
 4043         #                    map { defined($_) ? "'$_'" : "undef" }
 4044         #                          @got );
 4045         #  First line not semicolon terminated, Not OK to match:
 4046         #   $$href{-NUM_TEXT_FILES} = $$href{-NUM_BINARY_FILES} =
 4047         #      $$href{-NUM_DIRS} = 0;
 4048         my $pat0 = $rpatterns_0->[0];
 4049         my $pat1 = $rpatterns_1->[0];
 4050 
 4051         ##########################################################
 4052         # Turn off the marginal flag for some types of assignments
 4053         ##########################################################
 4054         if ( $is_assignment{$raw_tokb} ) {
 4055 
 4056             # undo marginal flag if first line is semicolon terminated
 4057             # and leading patters match
 4058             if ($sc_term0) {    # && $sc_term1) {
 4059                 $is_marginal = $pat0 ne $pat1;
 4060             }
 4061         }
 4062         elsif ( $raw_tokb eq '=>' ) {
 4063 
 4064             # undo marginal flag if patterns match
 4065             $is_marginal = $pat0 ne $pat1 || $line_ending_fat_comma;
 4066         }
 4067         elsif ( $raw_tokb eq '=~' ) {
 4068 
 4069             # undo marginal flag if both lines are semicolon terminated
 4070             # and leading patters match
 4071             if ( $sc_term1 && $sc_term0 ) {
 4072                 $is_marginal = $pat0 ne $pat1;
 4073             }
 4074         }
 4075 
 4076         ######################################################
 4077         # Turn off the marginal flag if we saw an 'if' or 'or'
 4078         ######################################################
 4079 
 4080         # A trailing 'if' and 'or' often gives a good alignment
 4081         # For example, we can align these:
 4082         #  return -1     if $_[0] =~ m/^CHAPT|APPENDIX/;
 4083         #  return $1 + 0 if $_[0] =~ m/^SECT(\d*)$/;
 4084 
 4085         # or
 4086         #  $d_in_m[2] = 29          if ( &Date_LeapYear($y) );
 4087         #  $d         = $d_in_m[$m] if ( $d > $d_in_m[$m] );
 4088 
 4089         if ($saw_if_or) {
 4090 
 4091             # undo marginal flag if both lines are semicolon terminated
 4092             if ( $sc_term0 && $sc_term1 ) {
 4093                 $is_marginal = 0;
 4094             }
 4095         }
 4096 
 4097         # For a marginal match, only keep matches before the first 'bad' match
 4098         if (   $is_marginal
 4099             && defined($jfirst_bad)
 4100             && $imax_align > $jfirst_bad - 1 )
 4101         {
 4102             $imax_align = $jfirst_bad - 1;
 4103         }
 4104 
 4105         ###########################################################
 4106         # Allow sweep to match lines with leading '=' in some cases
 4107         ###########################################################
 4108         if ( $imax_align < 0 && defined($j0_eq_pad) ) {
 4109 
 4110             if (
 4111 
 4112                 # If there is a following line with leading equals, or
 4113                 # preceding line with leading equals, then let the sweep align
 4114                 # them without restriction.  For example, the first two lines
 4115                 # here are a marginal match, but they are followed by a line
 4116                 # with leading equals, so the sweep-lr logic can align all of
 4117                 # the lines:
 4118 
 4119                 #  $date[1] = $month_to_num{ $date[1] };            # <--line_0
 4120                 #  @xdate   = split( /[:\/\s]/, $log->field('t') ); # <--line_1
 4121                 #  $day     = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
 4122                 #  $time    = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
 4123 
 4124                 # Likewise, if we reverse the two pairs we want the same result
 4125 
 4126                 #  $day     = sprintf( "%04d/%02d/%02d", @date[ 2, 1, 0 ] );
 4127                 #  $time    = sprintf( "%02d:%02d:%02d", @date[ 3 .. 5 ] );
 4128                 #  $date[1] = $month_to_num{ $date[1] };            # <--line_0
 4129                 #  @xdate   = split( /[:\/\s]/, $log->field('t') ); # <--line_1
 4130 
 4131                 (
 4132                        $imax_next >= 0
 4133                     || $imax_prev >= 0
 4134                     || TEST_MARGINAL_EQ_ALIGNMENT
 4135                 )
 4136                 && $j0_eq_pad >= -$j0_max_pad
 4137                 && $j0_eq_pad <= $j0_max_pad
 4138               )
 4139             {
 4140 
 4141                 # But do not do this if there is a comma before the '='.
 4142                 # For example, the first two lines below have commas and
 4143                 # therefore are not allowed to align with lines 3 & 4:
 4144 
 4145                 # my ( $x, $y ) = $self->Size();                      #<--line_0
 4146                 # my ( $left, $top, $right, $bottom ) = $self->Window(); #<--l_1
 4147                 # my $vx = $right - $left;
 4148                 # my $vy = $bottom - $top;
 4149 
 4150                 if ( $rpatterns_0->[0] !~ /,/ && $rpatterns_1->[0] !~ /,/ ) {
 4151                     $imax_align = 0;
 4152                 }
 4153             }
 4154         }
 4155 
 4156       RETURN:
 4157         return ( $is_marginal, $imax_align );
 4158     }
 4159 }
 4160 
 4161 sub get_extra_leading_spaces {
 4162 
 4163     my ( $rlines, $rgroups ) = @_;
 4164 
 4165     #----------------------------------------------------------
 4166     # Define any extra indentation space (for the -lp option).
 4167     # Here is why:
 4168     # If a list has side comments, sub scan_list must dump the
 4169     # list before it sees everything.  When this happens, it sets
 4170     # the indentation to the standard scheme, but notes how
 4171     # many spaces it would have liked to use.  We may be able
 4172     # to recover that space here in the event that all of the
 4173     # lines of a list are back together again.
 4174     #----------------------------------------------------------
 4175 
 4176     return 0 unless ( @{$rlines} && @{$rgroups} );
 4177 
 4178     my $object = $rlines->[0]->get_indentation();
 4179     return 0 unless ( ref($object) );
 4180     my $extra_leading_spaces            = 0;
 4181     my $extra_indentation_spaces_wanted = get_recoverable_spaces($object);
 4182     return ($extra_leading_spaces) unless ($extra_indentation_spaces_wanted);
 4183 
 4184     my $min_spaces = $extra_indentation_spaces_wanted;
 4185     if ( $min_spaces > 0 ) { $min_spaces = 0 }
 4186 
 4187     # loop over all groups
 4188     my $ng      = -1;
 4189     my $ngroups = @{$rgroups};
 4190     foreach my $item ( @{$rgroups} ) {
 4191         $ng++;
 4192         my ( $jbeg, $jend ) = @{$item};
 4193         foreach my $j ( $jbeg .. $jend ) {
 4194             next if ( $j == 0 );
 4195 
 4196             # all indentation objects must be the same
 4197             if ( $object != $rlines->[$j]->get_indentation() ) {
 4198                 return 0;
 4199             }
 4200         }
 4201 
 4202        # find the maximum space without exceeding the line length for this group
 4203         my $avail = $rlines->[$jbeg]->get_available_space_on_right();
 4204         my $spaces =
 4205           ( $avail > $extra_indentation_spaces_wanted )
 4206           ? $extra_indentation_spaces_wanted
 4207           : $avail;
 4208 
 4209         #########################################################
 4210         # Note: min spaces can be negative; for example with -gnu
 4211         # f(
 4212         #   do { 1; !!(my $x = bless []); }
 4213         #  );
 4214         #########################################################
 4215         # The following rule is needed to match older formatting:
 4216         # For multiple groups, we will keep spaces non-negative.
 4217         # For a single group, we will allow a negative space.
 4218         if ( $ngroups > 1 && $spaces < 0 ) { $spaces = 0 }
 4219 
 4220         # update the minimum spacing
 4221         if ( $ng == 0 || $spaces < $extra_leading_spaces ) {
 4222             $extra_leading_spaces = $spaces;
 4223         }
 4224     }
 4225 
 4226     # update the indentation object because with -icp the terminal
 4227     # ');' will use the same adjustment.
 4228     $object->permanently_decrease_available_spaces( -$extra_leading_spaces );
 4229     return $extra_leading_spaces;
 4230 }
 4231 
 4232 sub forget_side_comment {
 4233     my ($self) = @_;
 4234     $self->[_last_side_comment_column_] = 0;
 4235     return;
 4236 }
 4237 
 4238 sub is_good_side_comment_column {
 4239     my ( $self, $line, $line_number, $level, $num5 ) = @_;
 4240 
 4241     # Upon encountering the first side comment of a group, decide if
 4242     # a previous side comment should be forgotten.  This involves
 4243     # checking several rules.
 4244 
 4245     # Return true to keep old comment location
 4246     # Return false to forget old comment location
 4247 
 4248     my $rfields                 = $line->get_rfields();
 4249     my $is_hanging_side_comment = $line->get_is_hanging_side_comment();
 4250 
 4251     # RULE1: Never forget comment before a hanging side comment
 4252     goto KEEP if ($is_hanging_side_comment);
 4253 
 4254     # RULE2: Forget a side comment after a short line difference,
 4255     # where 'short line difference' is computed from a formula.
 4256     # Using a smooth formula helps minimize sudden large changes.
 4257     my $line_diff = $line_number - $self->[_last_side_comment_line_number_];
 4258     my $alev_diff = abs( $level - $self->[_last_side_comment_level_] );
 4259 
 4260     # '$num5' is the number of comments in the first 5 lines after the first
 4261     # comment.  It is needed to keep a compact group of side comments from
 4262     # being influenced by a more distant side comment.
 4263     $num5 = 1 unless ($num5);
 4264 
 4265     # Some values:
 4266 
 4267     #        $adiff  $num5   $short_diff
 4268     #        0       *       12
 4269     #        1       1       6
 4270     #        1       2       4
 4271     #        1       3       3
 4272     #        1       4       2
 4273     #        2       1       4
 4274     #        2       2       2
 4275     #        2       3       1
 4276     #        3       1       3
 4277     #        3       2       1
 4278 
 4279     my $short_diff = SC_LONG_LINE_DIFF / ( 1 + $alev_diff * $num5 );
 4280 
 4281     goto FORGET
 4282       if ( $line_diff > $short_diff );
 4283 
 4284     # RULE3: Forget a side comment if this line is at lower level and
 4285     # ends a block
 4286     my $last_sc_level = $self->[_last_side_comment_level_];
 4287     goto FORGET
 4288       if ( $level < $last_sc_level
 4289         && $is_closing_block_type{ substr( $rfields->[0], 0, 1 ) } );
 4290 
 4291     # RULE 4: Forget the last side comment if this comment might join a cached
 4292     # line ...
 4293     if ( my $cached_line_type = get_cached_line_type() ) {
 4294 
 4295         # ... otherwise side comment alignment will get messed up.
 4296         # For example, in the following test script
 4297         # with using 'perltidy -sct -act=2', the last comment would try to
 4298         # align with the previous and then be in the wrong column when
 4299         # the lines are combined:
 4300 
 4301         # foreach $line (
 4302         #    [0, 1, 2], [3, 4, 5], [6, 7, 8],    # rows
 4303         #    [0, 3, 6], [1, 4, 7], [2, 5, 8],    # columns
 4304         #    [0, 4, 8], [2, 4, 6]
 4305         #  )                                     # diagonals
 4306         goto FORGET
 4307           if ( $cached_line_type == 2 || $cached_line_type == 4 );
 4308     }
 4309 
 4310     # Otherwise, keep it alive
 4311     goto KEEP;
 4312 
 4313   FORGET:
 4314     return 0;
 4315 
 4316   KEEP:
 4317     return 1;
 4318 }
 4319 
 4320 sub align_side_comments {
 4321 
 4322     my ( $self, $rlines, $rgroups ) = @_;
 4323 
 4324     # Align any side comments in this batch of lines
 4325 
 4326     # Given:
 4327     #  $rlines  - the lines
 4328     #  $rgroups - the partition of the lines into groups
 4329     #
 4330     # We will be working group-by-group because all side comments
 4331     # (real or fake) in each group are already aligned. So we just have
 4332     # to make alignments between groups wherever possible.
 4333 
 4334     # An unusual aspect is that within each group we have aligned both real
 4335     # and fake side comments.  This has the consequence that the lengths of
 4336     # long lines without real side comments can cause 'push' all side comments
 4337     # to the right.  This seems unusual, but testing with and without this
 4338     # feature shows that it is usually better this way.  Othewise, side
 4339     # comments can be hidden between long lines without side comments and
 4340     # thus be harder to read.
 4341 
 4342     my $group_level        = $self->[_group_level_];
 4343     my $continuing_sc_flow = $self->[_last_side_comment_length_] > 0
 4344       && $group_level == $self->[_last_level_written_];
 4345 
 4346     # Find groups with side comments, and remember the first nonblank comment
 4347     my $j_sc_beg;
 4348     my @todo;
 4349     my $ng = -1;
 4350     foreach my $item ( @{$rgroups} ) {
 4351         $ng++;
 4352         my ( $jbeg, $jend ) = @{$item};
 4353         foreach my $j ( $jbeg .. $jend ) {
 4354             my $line = $rlines->[$j];
 4355             my $jmax = $line->get_jmax();
 4356             if ( $line->get_rfield_lengths()->[$jmax] ) {
 4357 
 4358                 # this group has a line with a side comment
 4359                 push @todo, $ng;
 4360                 if ( !defined($j_sc_beg) ) {
 4361                     $j_sc_beg = $j;
 4362                 }
 4363                 last;
 4364             }
 4365         }
 4366     }
 4367 
 4368     # done if no groups with side comments
 4369     return unless @todo;
 4370 
 4371     # Count $num5 = number of comments in the 5 lines after the first comment
 4372     # This is an important factor in a decision formula
 4373     my $num5 = 1;
 4374     for ( my $jj = $j_sc_beg + 1 ; $jj < @{$rlines} ; $jj++ ) {
 4375         my $ldiff = $jj - $j_sc_beg;
 4376         last if ( $ldiff > 5 );
 4377         my $line   = $rlines->[$jj];
 4378         my $jmax   = $line->get_jmax();
 4379         my $sc_len = $line->get_rfield_lengths()->[$jmax];
 4380         next unless ($sc_len);
 4381         $num5++;
 4382     }
 4383 
 4384     # Forget the old side comment location if necessary
 4385     my $line = $rlines->[$j_sc_beg];
 4386     my $lnum =
 4387       $j_sc_beg + $self->[_file_writer_object_]->get_output_line_number();
 4388     my $keep_it =
 4389       $self->is_good_side_comment_column( $line, $lnum, $group_level, $num5 );
 4390     my $last_side_comment_column =
 4391       $keep_it ? $self->[_last_side_comment_column_] : 0;
 4392 
 4393     # If there are multiple groups we will do two passes
 4394     # so that we can find a common alignment for all groups.
 4395     my $MAX_PASS = @todo > 1 ? 2 : 1;
 4396 
 4397     # Loop over passes
 4398     my $max_comment_column = $last_side_comment_column;
 4399     for ( my $PASS = 1 ; $PASS <= $MAX_PASS ; $PASS++ ) {
 4400 
 4401         # If there are two passes, then on the last pass make the old column
 4402         # equal to the largest of the group.  This will result in the comments
 4403         # being aligned if possible.
 4404         if ( $PASS == $MAX_PASS ) {
 4405             $last_side_comment_column = $max_comment_column;
 4406         }
 4407 
 4408         # Loop over the groups with side comments
 4409         my $column_limit;
 4410         foreach my $ng (@todo) {
 4411             my ( $jbeg, $jend ) = @{ $rgroups->[$ng] };
 4412 
 4413             # Note that since all lines in a group have common alignments, we
 4414             # just have to work on one of the lines (the first line).
 4415             my $line                    = $rlines->[$jbeg];
 4416             my $jmax                    = $line->get_jmax();
 4417             my $is_hanging_side_comment = $line->get_is_hanging_side_comment();
 4418             last
 4419               if ( $PASS < $MAX_PASS && $is_hanging_side_comment );
 4420 
 4421             # the maximum space without exceeding the line length:
 4422             my $avail = $line->get_available_space_on_right();
 4423 
 4424             # try to use the previous comment column
 4425             my $side_comment_column = $line->get_column( $jmax - 1 );
 4426             my $move = $last_side_comment_column - $side_comment_column;
 4427 
 4428             # Remember the maximum possible column of the first line with
 4429             # side comment
 4430             if ( !defined($column_limit) ) {
 4431                 $column_limit = $side_comment_column + $avail;
 4432             }
 4433 
 4434             next if ( $jmax <= 0 );
 4435 
 4436             # but if this doesn't work, give up and use the minimum space
 4437             my $min_move = $self->[_rOpts_minimum_space_to_comment_] - 1;
 4438             if ( $move > $avail ) {
 4439                 $move = $min_move;
 4440             }
 4441 
 4442             # but we want some minimum space to the comment
 4443             if (   $move >= 0
 4444                 && $j_sc_beg == 0
 4445                 && $continuing_sc_flow )
 4446             {
 4447                 $min_move = 0;
 4448             }
 4449 
 4450             # remove constraints on hanging side comments
 4451             if ($is_hanging_side_comment) { $min_move = 0 }
 4452 
 4453             if ( $move < $min_move ) {
 4454                 $move = $min_move;
 4455             }
 4456 
 4457             # don't exceed the available space
 4458             if ( $move > $avail ) { $move = $avail }
 4459 
 4460             # We can only increase space, never decrease.
 4461             if ( $move < 0 ) { $move = 0 }
 4462 
 4463             # Discover the largest column on the preliminary  pass
 4464             if ( $PASS < $MAX_PASS ) {
 4465                 my $col = $line->get_column( $jmax - 1 ) + $move;
 4466 
 4467                 # but ignore columns too large for the starting line
 4468                 if ( $col > $max_comment_column && $col < $column_limit ) {
 4469                     $max_comment_column = $col;
 4470                 }
 4471             }
 4472 
 4473             # Make the changes on the final pass
 4474             else {
 4475                 $line->increase_field_width( $jmax - 1, $move );
 4476 
 4477                 # remember this column for the next group
 4478                 $last_side_comment_column = $line->get_column( $jmax - 1 );
 4479             }
 4480         } ## end loop over groups
 4481     } ## end loop over passes
 4482 
 4483     # Find the last side comment
 4484     my $j_sc_last;
 4485     my $ng_last = $todo[-1];
 4486     my ( $jbeg, $jend ) = @{ $rgroups->[$ng_last] };
 4487     for ( my $jj = $jend ; $jj >= $jbeg ; $jj-- ) {
 4488         my $line = $rlines->[$jj];
 4489         my $jmax = $line->get_jmax();
 4490         if ( $line->get_rfield_lengths()->[$jmax] ) {
 4491             $j_sc_last = $jj;
 4492             last;
 4493         }
 4494     }
 4495 
 4496     # Save final side comment info for possible use by the next batch
 4497     if ( defined($j_sc_last) ) {
 4498         my $line_number =
 4499           $self->[_file_writer_object_]->get_output_line_number() + $j_sc_last;
 4500         $self->[_last_side_comment_column_]      = $last_side_comment_column;
 4501         $self->[_last_side_comment_line_number_] = $line_number;
 4502         $self->[_last_side_comment_level_]       = $group_level;
 4503     }
 4504     return;
 4505 }
 4506 
 4507 ###############################
 4508 # CODE SECTION 6: Output Step A
 4509 ###############################
 4510 
 4511 sub valign_output_step_A {
 4512 
 4513     ###############################################################
 4514     # This is Step A in writing vertically aligned lines.
 4515     # The line is prepared according to the alignments which have
 4516     # been found. Then it is shipped to the next step.
 4517     ###############################################################
 4518 
 4519     my ( $self, $rinput_hash ) = @_;
 4520 
 4521     my $line                 = $rinput_hash->{line};
 4522     my $min_ci_gap           = $rinput_hash->{min_ci_gap};
 4523     my $do_not_align         = $rinput_hash->{do_not_align};
 4524     my $group_leader_length  = $rinput_hash->{group_leader_length};
 4525     my $extra_leading_spaces = $rinput_hash->{extra_leading_spaces};
 4526     my $level                = $rinput_hash->{level};
 4527 
 4528     my $rfields                   = $line->get_rfields();
 4529     my $rfield_lengths            = $line->get_rfield_lengths();
 4530     my $leading_space_count       = $line->get_leading_space_count();
 4531     my $outdent_long_lines        = $line->get_outdent_long_lines();
 4532     my $maximum_field_index       = $line->get_jmax();
 4533     my $rvertical_tightness_flags = $line->get_rvertical_tightness_flags();
 4534     my $Kend                      = $line->get_Kend();
 4535     my $level_end                 = $line->get_level_end();
 4536 
 4537     # add any extra spaces
 4538     if ( $leading_space_count > $group_leader_length ) {
 4539         $leading_space_count += $min_ci_gap;
 4540     }
 4541 
 4542     my $str     = $rfields->[0];
 4543     my $str_len = $rfield_lengths->[0];
 4544 
 4545     # loop to concatenate all fields of this line and needed padding
 4546     my $total_pad_count = 0;
 4547     for my $j ( 1 .. $maximum_field_index ) {
 4548 
 4549         # skip zero-length side comments
 4550         last
 4551           if (
 4552             ( $j == $maximum_field_index )
 4553             && ( !defined( $rfields->[$j] )
 4554                 || ( $rfield_lengths->[$j] == 0 ) )
 4555           );
 4556 
 4557         # compute spaces of padding before this field
 4558         my $col = $line->get_column( $j - 1 );
 4559         my $pad = $col - ( $str_len + $leading_space_count );
 4560 
 4561         if ($do_not_align) {
 4562             $pad =
 4563               ( $j < $maximum_field_index )
 4564               ? 0
 4565               : $self->[_rOpts_minimum_space_to_comment_] - 1;
 4566         }
 4567 
 4568         # if the -fpsc flag is set, move the side comment to the selected
 4569         # column if and only if it is possible, ignoring constraints on
 4570         # line length and minimum space to comment
 4571         if (   $self->[_rOpts_fixed_position_side_comment_]
 4572             && $j == $maximum_field_index )
 4573         {
 4574             my $newpad =
 4575               $pad + $self->[_rOpts_fixed_position_side_comment_] - $col - 1;
 4576             if ( $newpad >= 0 ) { $pad = $newpad; }
 4577         }
 4578 
 4579         # accumulate the padding
 4580         if ( $pad > 0 ) { $total_pad_count += $pad; }
 4581 
 4582         # only add padding when we have a finite field;
 4583         # this avoids extra terminal spaces if we have empty fields
 4584         if ( $rfield_lengths->[$j] > 0 ) {
 4585             $str .= ' ' x $total_pad_count;
 4586             $str_len += $total_pad_count;
 4587             $total_pad_count = 0;
 4588             $str .= $rfields->[$j];
 4589             $str_len += $rfield_lengths->[$j];
 4590         }
 4591         else {
 4592             $total_pad_count = 0;
 4593         }
 4594     }
 4595 
 4596     my $side_comment_length = $rfield_lengths->[$maximum_field_index];
 4597 
 4598     # ship this line off
 4599     $self->valign_output_step_B(
 4600         {
 4601             leading_space_count => $leading_space_count + $extra_leading_spaces,
 4602             line                => $str,
 4603             line_length         => $str_len,
 4604             side_comment_length => $side_comment_length,
 4605             outdent_long_lines  => $outdent_long_lines,
 4606             rvertical_tightness_flags => $rvertical_tightness_flags,
 4607             level                     => $level,
 4608             level_end                 => $level_end,
 4609             Kend                      => $Kend,
 4610         }
 4611     );
 4612     return;
 4613 }
 4614 
 4615 sub combine_fields {
 4616 
 4617     # We have a group of two lines for which we do not want to align tokens
 4618     # between index $imax_align and the side comment.  So we will delete fields
 4619     # between $imax_align and the side comment.  Alignments have already
 4620     # been set so we have to adjust them.
 4621 
 4622     my ( $line_0, $line_1, $imax_align ) = @_;
 4623 
 4624     if ( !defined($imax_align) ) { $imax_align = -1 }
 4625 
 4626     # First delete the unwanted tokens
 4627     my $jmax_old       = $line_0->get_jmax();
 4628     my @old_alignments = $line_0->get_alignments();
 4629     my @idel           = ( $imax_align + 1 .. $jmax_old - 2 );
 4630 
 4631     return unless (@idel);
 4632 
 4633     foreach my $line ( $line_0, $line_1 ) {
 4634         delete_selected_tokens( $line, \@idel );
 4635     }
 4636 
 4637     # Now adjust the alignments.  Note that the side comment alignment
 4638     # is always at jmax-1, and there is an ending alignment at jmax.
 4639     my @new_alignments;
 4640     if ( $imax_align >= 0 ) {
 4641         @new_alignments[ 0 .. $imax_align ] =
 4642           @old_alignments[ 0 .. $imax_align ];
 4643     }
 4644 
 4645     my $jmax_new = $line_0->get_jmax();
 4646 
 4647     $new_alignments[ $jmax_new - 1 ] = $old_alignments[ $jmax_old - 1 ];
 4648     $new_alignments[$jmax_new] = $old_alignments[$jmax_old];
 4649     $line_0->set_alignments(@new_alignments);
 4650     $line_1->set_alignments(@new_alignments);
 4651     return;
 4652 }
 4653 
 4654 sub get_output_line_number {
 4655 
 4656     # The output line number reported to a caller =
 4657     # the number of items still in the buffer +
 4658     # the number of items written.
 4659     return $_[0]->group_line_count() +
 4660       $_[0]->[_file_writer_object_]->get_output_line_number();
 4661 }
 4662 
 4663 ###############################
 4664 # CODE SECTION 7: Output Step B
 4665 ###############################
 4666 
 4667 {    ## closure for sub valign_output_step_B
 4668 
 4669     # These are values for a cache used by valign_output_step_B.
 4670     my $cached_line_text;
 4671     my $cached_line_text_length;
 4672     my $cached_line_type;
 4673     my $cached_line_flag;
 4674     my $cached_seqno;
 4675     my $cached_line_valid;
 4676     my $cached_line_leading_space_count;
 4677     my $cached_seqno_string;
 4678     my $cached_line_Kend;
 4679     my $seqno_string;
 4680     my $last_nonblank_seqno_string;
 4681 
 4682     sub get_seqno_string {
 4683         return $seqno_string;
 4684     }
 4685 
 4686     sub get_last_nonblank_seqno_string {
 4687         return $last_nonblank_seqno_string;
 4688     }
 4689 
 4690     sub set_last_nonblank_seqno_string {
 4691         my ($val) = @_;
 4692         $last_nonblank_seqno_string = $val;
 4693         return;
 4694     }
 4695 
 4696     sub get_cached_line_flag {
 4697         return $cached_line_flag;
 4698     }
 4699 
 4700     sub get_cached_line_type {
 4701         return $cached_line_type;
 4702     }
 4703 
 4704     sub set_cached_line_valid {
 4705         my ($val) = @_;
 4706         $cached_line_valid = $val;
 4707         return;
 4708     }
 4709 
 4710     sub get_cached_seqno {
 4711         return $cached_seqno;
 4712     }
 4713 
 4714     sub initialize_step_B_cache {
 4715 
 4716         # valign_output_step_B cache:
 4717         $cached_line_text                = "";
 4718         $cached_line_text_length         = 0;
 4719         $cached_line_type                = 0;
 4720         $cached_line_flag                = 0;
 4721         $cached_seqno                    = 0;
 4722         $cached_line_valid               = 0;
 4723         $cached_line_leading_space_count = 0;
 4724         $cached_seqno_string             = "";
 4725         $cached_line_Kend                = undef;
 4726 
 4727         # These vars hold a string of sequence numbers joined together used by
 4728         # the cache
 4729         $seqno_string               = "";
 4730         $last_nonblank_seqno_string = "";
 4731         return;
 4732     }
 4733 
 4734     sub _flush_cache {
 4735         my ($self) = @_;
 4736         if ($cached_line_type) {
 4737             $seqno_string = $cached_seqno_string;
 4738             $self->valign_output_step_C(
 4739                 $cached_line_text,
 4740                 $cached_line_leading_space_count,
 4741                 $self->[_last_level_written_],
 4742                 $cached_line_Kend,
 4743             );
 4744             $cached_line_type        = 0;
 4745             $cached_line_text        = "";
 4746             $cached_line_text_length = 0;
 4747             $cached_seqno_string     = "";
 4748             $cached_line_Kend        = undef;
 4749         }
 4750         return;
 4751     }
 4752 
 4753     sub valign_output_step_B {
 4754 
 4755         ###############################################################
 4756         # This is Step B in writing vertically aligned lines.
 4757         # Vertical tightness is applied according to preset flags.
 4758         # In particular this routine handles stacking of opening
 4759         # and closing tokens.
 4760         ###############################################################
 4761 
 4762         my ( $self, $rinput ) = @_;
 4763 
 4764         my $leading_space_count       = $rinput->{leading_space_count};
 4765         my $str                       = $rinput->{line};
 4766         my $str_length                = $rinput->{line_length};
 4767         my $side_comment_length       = $rinput->{side_comment_length};
 4768         my $outdent_long_lines        = $rinput->{outdent_long_lines};
 4769         my $rvertical_tightness_flags = $rinput->{rvertical_tightness_flags};
 4770         my $level                     = $rinput->{level};
 4771         my $level_end                 = $rinput->{level_end};
 4772         my $Kend                      = $rinput->{Kend};
 4773 
 4774         my $last_level_written = $self->[_last_level_written_];
 4775 
 4776         # Useful -gcs test cases for wide characters are
 4777         # perl527/(method.t.2, reg_mesg.t, mime-header.t)
 4778 
 4779         # handle outdenting of long lines:
 4780         my $is_outdented_line;
 4781         if ($outdent_long_lines) {
 4782             my $excess =
 4783               $str_length -
 4784               $side_comment_length +
 4785               $leading_space_count -
 4786               $self->maximum_line_length_for_level($level);
 4787             if ( $excess > 0 ) {
 4788                 $leading_space_count = 0;
 4789                 my $file_writer_object = $self->[_file_writer_object_];
 4790                 my $last_outdented_line_at =
 4791                   $file_writer_object->get_output_line_number();
 4792                 $self->[_last_outdented_line_at_] = $last_outdented_line_at;
 4793 
 4794                 my $outdented_line_count = $self->[_outdented_line_count_];
 4795                 unless ($outdented_line_count) {
 4796                     $self->[_first_outdented_line_at_] =
 4797                       $last_outdented_line_at;
 4798                 }
 4799                 $outdented_line_count++;
 4800                 $self->[_outdented_line_count_] = $outdented_line_count;
 4801                 $is_outdented_line = 1;
 4802             }
 4803         }
 4804 
 4805         # Make preliminary leading whitespace.  It could get changed
 4806         # later by entabbing, so we have to keep track of any changes
 4807         # to the leading_space_count from here on.
 4808         my $leading_string =
 4809           $leading_space_count > 0 ? ( ' ' x $leading_space_count ) : "";
 4810         my $leading_string_length = length($leading_string);
 4811 
 4812         # Unpack any recombination data; it was packed by
 4813         # sub send_lines_to_vertical_aligner. Contents:
 4814         #
 4815         #   [0] type: 1=opening non-block    2=closing non-block
 4816         #             3=opening block brace  4=closing block brace
 4817         #   [1] flag: if opening: 1=no multiple steps, 2=multiple steps ok
 4818         #             if closing: spaces of padding to use
 4819         #   [2] sequence number of container
 4820         #   [3] valid flag: do not append if this flag is false
 4821         #
 4822         my ( $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
 4823             $seqno_end );
 4824         if ($rvertical_tightness_flags) {
 4825             (
 4826                 $open_or_close, $tightness_flag, $seqno, $valid, $seqno_beg,
 4827                 $seqno_end
 4828             ) = @{$rvertical_tightness_flags};
 4829         }
 4830 
 4831         $seqno_string = $seqno_end;
 4832 
 4833         # handle any cached line ..
 4834         # either append this line to it or write it out
 4835         # Note: the function length() is used in this next test out of caution.
 4836         # All testing has shown that the variable $cached_line_text_length is
 4837         # correct, but its calculation is complex and a loss of cached text
 4838         # would be a disaster.
 4839         if ( length($cached_line_text) ) {
 4840 
 4841             # Dump an invalid cached line
 4842             if ( !$cached_line_valid ) {
 4843                 $self->valign_output_step_C(
 4844                     $cached_line_text,   $cached_line_leading_space_count,
 4845                     $last_level_written, $cached_line_Kend
 4846                 );
 4847             }
 4848 
 4849             # Handle cached line ending in OPENING tokens
 4850             elsif ( $cached_line_type == 1 || $cached_line_type == 3 ) {
 4851 
 4852                 my $gap = $leading_space_count - $cached_line_text_length;
 4853 
 4854                 # handle option of just one tight opening per line:
 4855                 if ( $cached_line_flag == 1 ) {
 4856                     if ( defined($open_or_close) && $open_or_close == 1 ) {
 4857                         $gap = -1;
 4858                     }
 4859                 }
 4860 
 4861                 # Do not join the lines if this might produce a one-line
 4862                 # container which exceeds the maximum line length.  This is
 4863                 # necessary prevent blinking, particularly with the combination
 4864                 # -xci -pvt=2.  In that case a one-line block alternately forms
 4865                 # and breaks, causing -xci to alternately turn on and off (case
 4866                 # b765).
 4867                 # Patched to fix cases b656 b862 b971 b972: always do the check
 4868                 # if -vmll is set.  The reason is that the -vmll option can
 4869                 # cause changes in the maximum line length, leading to blinkers
 4870                 # if not checked.
 4871                 if (
 4872                     $gap >= 0
 4873                     && ( $self->[_rOpts_variable_maximum_line_length_]
 4874                         || ( defined($level_end) && $level > $level_end ) )
 4875                   )
 4876                 {
 4877                     my $test_line_length =
 4878                       $cached_line_text_length + $gap + $str_length;
 4879                     my $maximum_line_length =
 4880                       $self->maximum_line_length_for_level($last_level_written);
 4881 
 4882                     # Add a small tolerance in the length test (fixes case b862)
 4883                     if ( $test_line_length > $maximum_line_length - 2 ) {
 4884                         $gap = -1;
 4885                     }
 4886                 }
 4887 
 4888                 if ( $gap >= 0 && defined($seqno_beg) ) {
 4889                     $leading_string        = $cached_line_text . ' ' x $gap;
 4890                     $leading_string_length = $cached_line_text_length + $gap;
 4891                     $leading_space_count   = $cached_line_leading_space_count;
 4892                     $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
 4893                     $level        = $last_level_written;
 4894                 }
 4895                 else {
 4896                     $self->valign_output_step_C(
 4897                         $cached_line_text,   $cached_line_leading_space_count,
 4898                         $last_level_written, $cached_line_Kend
 4899                     );
 4900                 }
 4901             }
 4902 
 4903             # Handle cached line ending in CLOSING tokens
 4904             else {
 4905                 my $test_line =
 4906                   $cached_line_text . ' ' x $cached_line_flag . $str;
 4907                 my $test_line_length =
 4908                   $cached_line_text_length + $cached_line_flag + $str_length;
 4909                 if (
 4910 
 4911                     # The new line must start with container
 4912                     $seqno_beg
 4913 
 4914                     # The container combination must be okay..
 4915                     && (
 4916 
 4917                         # okay to combine like types
 4918                         ( $open_or_close == $cached_line_type )
 4919 
 4920                         # closing block brace may append to non-block
 4921                         || ( $cached_line_type == 2 && $open_or_close == 4 )
 4922 
 4923                         # something like ');'
 4924                         || ( !$open_or_close && $cached_line_type == 2 )
 4925 
 4926                     )
 4927 
 4928                     # The combined line must fit
 4929                     && (
 4930                         $test_line_length <=
 4931                         $self->maximum_line_length_for_level(
 4932                             $last_level_written)
 4933                     )
 4934                   )
 4935                 {
 4936 
 4937                     $seqno_string = $cached_seqno_string . ':' . $seqno_beg;
 4938 
 4939                     # Patch to outdent closing tokens ending # in ');' If we
 4940                     # are joining a line like ');' to a previous stacked set of
 4941                     # closing tokens, then decide if we may outdent the
 4942                     # combined stack to the indentation of the ');'.  Since we
 4943                     # should not normally outdent any of the other tokens more
 4944                     # than the indentation of the lines that contained them, we
 4945                     # will only do this if all of the corresponding opening
 4946                     # tokens were on the same line.  This can happen with -sot
 4947                     # and -sct.
 4948 
 4949                     # For example, it is ok here:
 4950                     #   __PACKAGE__->load_components( qw(
 4951                     #         PK::Auto
 4952                     #         Core
 4953                     #   ));
 4954                     #
 4955                     # But, for example, we do not outdent in this example
 4956                     # because that would put the closing sub brace out farther
 4957                     # than the opening sub brace:
 4958                     #
 4959                     #   perltidy -sot -sct
 4960                     #   $c->Tk::bind(
 4961                     #       '<Control-f>' => sub {
 4962                     #           my ($c) = @_;
 4963                     #           my $e = $c->XEvent;
 4964                     #           itemsUnderArea $c;
 4965                     #       } );
 4966                     #
 4967                     if (   $str =~ /^\);/
 4968                         && $cached_line_text =~ /^[\)\}\]\s]*$/ )
 4969                     {
 4970 
 4971                         # The way to tell this is if the stacked sequence
 4972                         # numbers of this output line are the reverse of the
 4973                         # stacked sequence numbers of the previous non-blank
 4974                         # line of sequence numbers.  So we can join if the
 4975                         # previous nonblank string of tokens is the mirror
 4976                         # image.  For example if stack )}] is 13:8:6 then we
 4977                         # are looking for a leading stack like [{( which
 4978                         # is 6:8:13. We only need to check the two ends,
 4979                         # because the intermediate tokens must fall in order.
 4980                         # Note on speed: having to split on colons and
 4981                         # eliminate multiple colons might appear to be slow,
 4982                         # but it's not an issue because we almost never come
 4983                         # through here.  In a typical file we don't.
 4984 
 4985                         $seqno_string               =~ s/^:+//;
 4986                         $last_nonblank_seqno_string =~ s/^:+//;
 4987                         $seqno_string               =~ s/:+/:/g;
 4988                         $last_nonblank_seqno_string =~ s/:+/:/g;
 4989 
 4990                         # how many spaces can we outdent?
 4991                         my $diff =
 4992                           $cached_line_leading_space_count -
 4993                           $leading_space_count;
 4994                         if (   $diff > 0
 4995                             && length($seqno_string)
 4996                             && length($last_nonblank_seqno_string) ==
 4997                             length($seqno_string) )
 4998                         {
 4999                             my @seqno_last =
 5000                               ( split /:/, $last_nonblank_seqno_string );
 5001                             my @seqno_now = ( split /:/, $seqno_string );
 5002                             if (   @seqno_now
 5003                                 && @seqno_last
 5004                                 && $seqno_now[-1] == $seqno_last[0]
 5005                                 && $seqno_now[0] == $seqno_last[-1] )
 5006                             {
 5007 
 5008                                 # OK to outdent ..
 5009                                 # for absolute safety, be sure we only remove
 5010                                 # whitespace
 5011                                 my $ws = substr( $test_line, 0, $diff );
 5012                                 if ( ( length($ws) == $diff )
 5013                                     && $ws =~ /^\s+$/ )
 5014                                 {
 5015 
 5016                                     $test_line = substr( $test_line, $diff );
 5017                                     $cached_line_leading_space_count -= $diff;
 5018                                     $last_level_written =
 5019                                       $self->level_change(
 5020                                         $cached_line_leading_space_count,
 5021                                         $diff, $last_level_written );
 5022                                     $self->reduce_valign_buffer_indentation(
 5023                                         $diff);
 5024                                 }
 5025 
 5026                                 # shouldn't happen, but not critical:
 5027                                 ##else {
 5028                                 ## ERROR transferring indentation here
 5029                                 ##}
 5030                             }
 5031                         }
 5032                     }
 5033 
 5034                     $str                   = $test_line;
 5035                     $str_length            = $test_line_length;
 5036                     $leading_string        = "";
 5037                     $leading_string_length = 0;
 5038                     $leading_space_count   = $cached_line_leading_space_count;
 5039                     $level                 = $last_level_written;
 5040                 }
 5041                 else {
 5042                     $self->valign_output_step_C(
 5043                         $cached_line_text,   $cached_line_leading_space_count,
 5044                         $last_level_written, $cached_line_Kend
 5045                     );
 5046                 }
 5047             }
 5048         }
 5049         $cached_line_type        = 0;
 5050         $cached_line_text        = "";
 5051         $cached_line_text_length = 0;
 5052         $cached_line_Kend        = undef;
 5053 
 5054         # make the line to be written
 5055         my $line        = $leading_string . $str;
 5056         my $line_length = $leading_string_length + $str_length;
 5057 
 5058         # Safety check: be sure that a line to be cached as a stacked block
 5059         # brace line ends in the appropriate opening or closing block brace.
 5060         # This should always be the case if the caller set flags correctly.
 5061         # Code '3' is for -sobb, code '4' is for -scbb.
 5062         if ($open_or_close) {
 5063             if (   $open_or_close == 3 && $line !~ /\{\s*$/
 5064                 || $open_or_close == 4 && $line !~ /\}\s*$/ )
 5065             {
 5066                 $open_or_close = 0;
 5067             }
 5068         }
 5069 
 5070         # write or cache this line ...
 5071         # fix for case b999: do not cache an outdented line
 5072         if ( !$open_or_close || $side_comment_length > 0 || $is_outdented_line )
 5073         {
 5074             $self->valign_output_step_C( $line, $leading_space_count, $level,
 5075                 $Kend );
 5076         }
 5077         else {
 5078             $cached_line_text                = $line;
 5079             $cached_line_text_length         = $line_length;
 5080             $cached_line_type                = $open_or_close;
 5081             $cached_line_flag                = $tightness_flag;
 5082             $cached_seqno                    = $seqno;
 5083             $cached_line_valid               = $valid;
 5084             $cached_line_leading_space_count = $leading_space_count;
 5085             $cached_seqno_string             = $seqno_string;
 5086             $cached_line_Kend                = $Kend;
 5087         }
 5088 
 5089         $self->[_last_level_written_]       = $level;
 5090         $self->[_last_side_comment_length_] = $side_comment_length;
 5091         return;
 5092     }
 5093 }
 5094 
 5095 ###############################
 5096 # CODE SECTION 8: Output Step C
 5097 ###############################
 5098 
 5099 {    ## closure for sub valign_output_step_C
 5100 
 5101     # Vertical alignment buffer used by valign_output_step_C
 5102     my $valign_buffer_filling;
 5103     my @valign_buffer;
 5104 
 5105     sub initialize_valign_buffer {
 5106         @valign_buffer         = ();
 5107         $valign_buffer_filling = "";
 5108         return;
 5109     }
 5110 
 5111     sub dump_valign_buffer {
 5112         my ($self) = @_;
 5113         if (@valign_buffer) {
 5114             foreach (@valign_buffer) {
 5115                 $self->valign_output_step_D( @{$_} );
 5116             }
 5117             @valign_buffer = ();
 5118         }
 5119         $valign_buffer_filling = "";
 5120         return;
 5121     }
 5122 
 5123     sub reduce_valign_buffer_indentation {
 5124 
 5125         my ( $self, $diff ) = @_;
 5126         if ( $valign_buffer_filling && $diff ) {
 5127             my $max_valign_buffer = @valign_buffer;
 5128             foreach my $i ( 0 .. $max_valign_buffer - 1 ) {
 5129                 my ( $line, $leading_space_count, $level, $Kend ) =
 5130                   @{ $valign_buffer[$i] };
 5131                 my $ws = substr( $line, 0, $diff );
 5132                 if ( ( length($ws) == $diff ) && $ws =~ /^\s+$/ ) {
 5133                     $line = substr( $line, $diff );
 5134                 }
 5135                 if ( $leading_space_count >= $diff ) {
 5136                     $leading_space_count -= $diff;
 5137                     $level =
 5138                       $self->level_change( $leading_space_count, $diff,
 5139                         $level );
 5140                 }
 5141                 $valign_buffer[$i] =
 5142                   [ $line, $leading_space_count, $level, $Kend ];
 5143             }
 5144         }
 5145         return;
 5146     }
 5147 
 5148     sub valign_output_step_C {
 5149 
 5150         ###############################################################
 5151         # This is Step C in writing vertically aligned lines.
 5152         # Lines are either stored in a buffer or passed along to the next step.
 5153         # The reason for storing lines is that we may later want to reduce their
 5154         # indentation when -sot and -sct are both used.
 5155         ###############################################################
 5156         my ( $self, @args ) = @_;
 5157 
 5158         my $seqno_string               = get_seqno_string();
 5159         my $last_nonblank_seqno_string = get_last_nonblank_seqno_string();
 5160 
 5161         # Dump any saved lines if we see a line with an unbalanced opening or
 5162         # closing token.
 5163         $self->dump_valign_buffer()
 5164           if ( $seqno_string && $valign_buffer_filling );
 5165 
 5166         # Either store or write this line