"Fossies" - the Fresh Open Source Software Archive

Member "Perl-Tidy-20210717/lib/Perl/Tidy/Formatter.pm" (17 Jul 2021, 880316 Bytes) of package /linux/misc/Perl-Tidy-20210717.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Perl source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "Formatter.pm" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 20210402_vs_20210717.

    1 #####################################################################
    2 #
    3 # The Perl::Tidy::Formatter package adds indentation, whitespace, and
    4 # line breaks to the token stream
    5 #
    6 #####################################################################
    7 
    8 # Index...
    9 # CODE SECTION 1: Preliminary code, global definitions and sub new
   10 #                 sub new
   11 # CODE SECTION 2: Some Basic Utilities
   12 # CODE SECTION 3: Check and process options
   13 #                 sub check_options
   14 # CODE SECTION 4: Receive lines from the tokenizer
   15 #                 sub write_line
   16 # CODE SECTION 5: Pre-process the entire file
   17 #                 sub finish_formatting
   18 # CODE SECTION 6: Process line-by-line
   19 #                 sub process_all_lines
   20 # CODE SECTION 7: Process lines of code
   21 #                 process_line_of_CODE
   22 # CODE SECTION 8: Utilities for setting breakpoints
   23 #                 sub set_forced_breakpoint
   24 # CODE SECTION 9: Process batches of code
   25 #                 sub grind_batch_of_CODE
   26 # CODE SECTION 10: Code to break long statments
   27 #                  sub set_continuation_breaks
   28 # CODE SECTION 11: Code to break long lists
   29 #                  sub scan_list
   30 # CODE SECTION 12: Code for setting indentation
   31 # CODE SECTION 13: Preparing batches for vertical alignment
   32 #                  sub send_lines_to_vertical_aligner
   33 # CODE SECTION 14: Code for creating closing side comments
   34 #                  sub add_closing_side_comment
   35 # CODE SECTION 15: Summarize
   36 #                  sub wrapup
   37 
   38 #######################################################################
   39 # CODE SECTION 1: Preliminary code and global definitions up to sub new
   40 #######################################################################
   41 
   42 package Perl::Tidy::Formatter;
   43 use strict;
   44 use warnings;
   45 
   46 # this can be turned on for extra checking during development
   47 use constant DEVEL_MODE => 0;
   48 
   49 { #<<< A non-indenting brace to contain all lexical variables
   50 
   51 use Carp;
   52 our $VERSION = '20210717';
   53 
   54 # The Tokenizer will be loaded with the Formatter
   55 ##use Perl::Tidy::Tokenizer;    # for is_keyword()
   56 
   57 sub AUTOLOAD {
   58 
   59     # Catch any undefined sub calls so that we are sure to get
   60     # some diagnostic information.  This sub should never be called
   61     # except for a programming error.
   62     our $AUTOLOAD;
   63     return if ( $AUTOLOAD =~ /\bDESTROY$/ );
   64     my ( $pkg, $fname, $lno ) = caller();
   65     my $my_package = __PACKAGE__;
   66     print STDERR <<EOM;
   67 ======================================================================
   68 Error detected in package '$my_package', version $VERSION
   69 Received unexpected AUTOLOAD call for sub '$AUTOLOAD'
   70 Called from package: '$pkg'  
   71 Called from File '$fname'  at line '$lno'
   72 This error is probably due to a recent programming change
   73 ======================================================================
   74 EOM
   75     exit 1;
   76 }
   77 
   78 sub DESTROY {
   79     my $self = shift;
   80     $self->_decrement_count();
   81     return;
   82 }
   83 
   84 sub Die {
   85     my ($msg) = @_;
   86     Perl::Tidy::Die($msg);
   87     croak "unexpected return from Perl::Tidy::Die";
   88 }
   89 
   90 sub Warn {
   91     my ($msg) = @_;
   92     Perl::Tidy::Warn($msg);
   93     return;
   94 }
   95 
   96 sub Fault {
   97     my ($msg) = @_;
   98 
   99     # This routine is called for errors that really should not occur
  100     # except if there has been a bug introduced by a recent program change.
  101     # Please add comments at calls to Fault to explain why the call
  102     # should not occur, and where to look to fix it.
  103     my ( $package0, $filename0, $line0, $subroutine0 ) = caller(0);
  104     my ( $package1, $filename1, $line1, $subroutine1 ) = caller(1);
  105     my ( $package2, $filename2, $line2, $subroutine2 ) = caller(2);
  106     my $input_stream_name = get_input_stream_name();
  107 
  108     Die(<<EOM);
  109 ==============================================================================
  110 While operating on input stream with name: '$input_stream_name'
  111 A fault was detected at line $line0 of sub '$subroutine1'
  112 in file '$filename1'
  113 which was called from line $line1 of sub '$subroutine2'
  114 Message: '$msg'
  115 This is probably an error introduced by a recent programming change. 
  116 Perl::Tidy::Formatter.pm reports VERSION='$VERSION'.
  117 ==============================================================================
  118 EOM
  119 
  120     # We shouldn't get here, but this return is to keep Perl-Critic from
  121     # complaining.
  122     return;
  123 }
  124 
  125 sub Exit {
  126     my ($msg) = @_;
  127     Perl::Tidy::Exit($msg);
  128     croak "unexpected return from Perl::Tidy::Exit";
  129 }
  130 
  131 # Global variables ...
  132 my (
  133 
  134     ##################################################################
  135     # Section 1: Global variables which are either always constant or
  136     # are constant after being configured by user-supplied
  137     # parameters.  They remain constant as a file is being processed.
  138     ##################################################################
  139 
  140     # user parameters and shortcuts
  141     $rOpts,
  142     $rOpts_closing_side_comment_maximum_text,
  143     $rOpts_continuation_indentation,
  144     $rOpts_indent_columns,
  145     $rOpts_line_up_parentheses,
  146     $rOpts_maximum_line_length,
  147     $rOpts_variable_maximum_line_length,
  148     $rOpts_block_brace_tightness,
  149     $rOpts_block_brace_vertical_tightness,
  150     $rOpts_stack_closing_block_brace,
  151     $rOpts_maximum_consecutive_blank_lines,
  152 
  153     $rOpts_recombine,
  154     $rOpts_add_newlines,
  155     $rOpts_break_at_old_comma_breakpoints,
  156     $rOpts_ignore_old_breakpoints,
  157 
  158     $rOpts_keep_interior_semicolons,
  159     $rOpts_comma_arrow_breakpoints,
  160     $rOpts_maximum_fields_per_table,
  161     $rOpts_one_line_block_semicolons,
  162     $rOpts_break_at_old_semicolon_breakpoints,
  163 
  164     $rOpts_tee_side_comments,
  165     $rOpts_tee_block_comments,
  166     $rOpts_tee_pod,
  167     $rOpts_delete_side_comments,
  168     $rOpts_delete_closing_side_comments,
  169     $rOpts_format_skipping,
  170     $rOpts_indent_only,
  171     $rOpts_static_block_comments,
  172 
  173     $rOpts_add_whitespace,
  174     $rOpts_delete_old_whitespace,
  175     $rOpts_freeze_whitespace,
  176     $rOpts_function_paren_vertical_alignment,
  177     $rOpts_whitespace_cycle,
  178     $rOpts_ignore_side_comment_lengths,
  179 
  180     $rOpts_break_at_old_attribute_breakpoints,
  181     $rOpts_break_at_old_keyword_breakpoints,
  182     $rOpts_break_at_old_logical_breakpoints,
  183     $rOpts_break_at_old_ternary_breakpoints,
  184     $rOpts_short_concatenation_item_length,
  185     $rOpts_closing_side_comment_else_flag,
  186     $rOpts_fuzzy_line_length,
  187 
  188     # Static hashes initialized in a BEGIN block
  189     %is_assignment,
  190     %is_keyword_returning_list,
  191     %is_if_unless_and_or_last_next_redo_return,
  192     %is_if_elsif_else_unless_while_until_for_foreach,
  193     %is_if_unless_while_until_for,
  194     %is_last_next_redo_return,
  195     %is_sort_map_grep,
  196     %is_sort_map_grep_eval,
  197     %is_if_unless,
  198     %is_and_or,
  199     %is_chain_operator,
  200     %is_block_without_semicolon,
  201     %ok_to_add_semicolon_for_block_type,
  202     %is_opening_type,
  203     %is_closing_type,
  204     %is_opening_token,
  205     %is_closing_token,
  206     %is_equal_or_fat_comma,
  207     %is_block_with_ci,
  208     %is_counted_type,
  209     %is_opening_sequence_token,
  210     %is_closing_sequence_token,
  211     %is_container_label_type,
  212 
  213     @all_operators,
  214 
  215     # Initialized in check_options. These are constants and could
  216     # just as well be initialized in a BEGIN block.
  217     %is_do_follower,
  218     %is_if_brace_follower,
  219     %is_else_brace_follower,
  220     %is_anon_sub_brace_follower,
  221     %is_anon_sub_1_brace_follower,
  222     %is_other_brace_follower,
  223 
  224     # Initialized in sub initialize_whitespace_hashes;
  225     # Some can be modified according to user parameters.
  226     %binary_ws_rules,
  227     %want_left_space,
  228     %want_right_space,
  229 
  230     # Configured in sub initialize_bond_strength_hashes
  231     %right_bond_strength,
  232     %left_bond_strength,
  233 
  234     # Hashes for -kbb=s and -kba=s
  235     %keep_break_before_type,
  236     %keep_break_after_type,
  237 
  238     # Initialized in check_options, modified by prepare_cuddled_block_types:
  239     %want_one_line_block,
  240     %is_braces_left_exclude_block,
  241 
  242     # Initialized in sub prepare_cuddled_block_types
  243     $rcuddled_block_types,
  244 
  245     # Initialized and configured in check_optioms
  246     %outdent_keyword,
  247     %keyword_paren_inner_tightness,
  248 
  249     %want_break_before,
  250 
  251     %break_before_container_types,
  252     %container_indentation_options,
  253 
  254     %space_after_keyword,
  255 
  256     %tightness,
  257     %matching_token,
  258 
  259     %opening_vertical_tightness,
  260     %closing_vertical_tightness,
  261     %closing_token_indentation,
  262     $some_closing_token_indentation,
  263 
  264     %opening_token_right,
  265     %stack_opening_token,
  266     %stack_closing_token,
  267 
  268     %weld_nested_exclusion_rules,
  269     %line_up_parentheses_exclusion_rules,
  270 
  271     # regex patterns for text identification.
  272     # Most are initialized in a sub make_**_pattern during configuration.
  273     # Most can be configured by user parameters.
  274     $SUB_PATTERN,
  275     $ASUB_PATTERN,
  276     $ANYSUB_PATTERN,
  277     $static_block_comment_pattern,
  278     $static_side_comment_pattern,
  279     $format_skipping_pattern_begin,
  280     $format_skipping_pattern_end,
  281     $non_indenting_brace_pattern,
  282     $bli_pattern,
  283     $block_brace_vertical_tightness_pattern,
  284     $blank_lines_after_opening_block_pattern,
  285     $blank_lines_before_closing_block_pattern,
  286     $keyword_group_list_pattern,
  287     $keyword_group_list_comment_pattern,
  288     $closing_side_comment_prefix_pattern,
  289     $closing_side_comment_list_pattern,
  290 
  291     # Table to efficiently find indentation and max line length
  292     # from level.
  293     @maximum_line_length_at_level,
  294     @maximum_text_length_at_level,
  295 
  296     # Total number of sequence items in a weld, for quick checks
  297     $total_weld_count,
  298 
  299     #########################################################
  300     # Section 2: Work arrays for the current batch of tokens.
  301     #########################################################
  302 
  303     # These are re-initialized for each batch of code
  304     # in sub initialize_batch_variables.
  305     $max_index_to_go,
  306     @block_type_to_go,
  307     @type_sequence_to_go,
  308     @bond_strength_to_go,
  309     @forced_breakpoint_to_go,
  310     @token_lengths_to_go,
  311     @summed_lengths_to_go,
  312     @levels_to_go,
  313     @leading_spaces_to_go,
  314     @reduced_spaces_to_go,
  315     @mate_index_to_go,
  316     @ci_levels_to_go,
  317     @nesting_depth_to_go,
  318     @nobreak_to_go,
  319     @old_breakpoint_to_go,
  320     @tokens_to_go,
  321     @K_to_go,
  322     @types_to_go,
  323     @inext_to_go,
  324     @iprev_to_go,
  325     @parent_seqno_to_go,
  326 
  327 );
  328 
  329 BEGIN {
  330 
  331     # Initialize constants...
  332 
  333     # Array index names for token variables
  334     my $i = 0;
  335     use constant {
  336         _BLOCK_TYPE_        => $i++,
  337         _CI_LEVEL_          => $i++,
  338         _CUMULATIVE_LENGTH_ => $i++,
  339         _LINE_INDEX_        => $i++,
  340         _KNEXT_SEQ_ITEM_    => $i++,
  341         _LEVEL_             => $i++,
  342         _SLEVEL_            => $i++,
  343         _TOKEN_             => $i++,
  344         _TOKEN_LENGTH_      => $i++,
  345         _TYPE_              => $i++,
  346         _TYPE_SEQUENCE_     => $i++,
  347 
  348         # Number of token variables; must be last in list:
  349         _NVARS => $i++,
  350     };
  351 
  352     # Array index names for $self (which is an array ref)
  353     $i = 0;
  354     use constant {
  355         _rlines_                    => $i++,
  356         _rlines_new_                => $i++,
  357         _rLL_                       => $i++,
  358         _Klimit_                    => $i++,
  359         _K_opening_container_       => $i++,
  360         _K_closing_container_       => $i++,
  361         _K_opening_ternary_         => $i++,
  362         _K_closing_ternary_         => $i++,
  363         _K_first_seq_item_          => $i++,
  364         _rK_phantom_semicolons_     => $i++,
  365         _rtype_count_by_seqno_      => $i++,
  366         _ris_function_call_paren_   => $i++,
  367         _rlec_count_by_seqno_       => $i++,
  368         _ris_broken_container_      => $i++,
  369         _ris_permanently_broken_    => $i++,
  370         _rhas_list_                 => $i++,
  371         _rhas_broken_list_          => $i++,
  372         _rhas_broken_list_with_lec_ => $i++,
  373         _rhas_code_block_           => $i++,
  374         _rhas_broken_code_block_    => $i++,
  375         _rhas_ternary_              => $i++,
  376         _ris_excluded_lp_container_ => $i++,
  377         _rwant_reduced_ci_          => $i++,
  378         _rno_xci_by_seqno_          => $i++,
  379         _ris_bli_container_         => $i++,
  380         _rparent_of_seqno_          => $i++,
  381         _rchildren_of_seqno_        => $i++,
  382         _ris_list_by_seqno_         => $i++,
  383         _rbreak_container_          => $i++,
  384         _rshort_nested_             => $i++,
  385         _length_function_           => $i++,
  386         _is_encoded_data_           => $i++,
  387         _fh_tee_                    => $i++,
  388         _sink_object_               => $i++,
  389         _file_writer_object_        => $i++,
  390         _vertical_aligner_object_   => $i++,
  391         _logger_object_             => $i++,
  392         _radjusted_levels_          => $i++,
  393         _this_batch_                => $i++,
  394 
  395         _last_output_short_opening_token_ => $i++,
  396 
  397         _last_line_leading_type_       => $i++,
  398         _last_line_leading_level_      => $i++,
  399         _last_last_line_leading_level_ => $i++,
  400 
  401         _added_semicolon_count_    => $i++,
  402         _first_added_semicolon_at_ => $i++,
  403         _last_added_semicolon_at_  => $i++,
  404 
  405         _deleted_semicolon_count_    => $i++,
  406         _first_deleted_semicolon_at_ => $i++,
  407         _last_deleted_semicolon_at_  => $i++,
  408 
  409         _embedded_tab_count_    => $i++,
  410         _first_embedded_tab_at_ => $i++,
  411         _last_embedded_tab_at_  => $i++,
  412 
  413         _first_tabbing_disagreement_       => $i++,
  414         _last_tabbing_disagreement_        => $i++,
  415         _tabbing_disagreement_count_       => $i++,
  416         _in_tabbing_disagreement_          => $i++,
  417         _first_brace_tabbing_disagreement_ => $i++,
  418         _in_brace_tabbing_disagreement_    => $i++,
  419 
  420         _saw_VERSION_in_this_file_ => $i++,
  421         _saw_END_or_DATA_          => $i++,
  422 
  423         _rK_weld_left_         => $i++,
  424         _rK_weld_right_        => $i++,
  425         _rweld_len_right_at_K_ => $i++,
  426 
  427         _rspecial_side_comment_type_ => $i++,
  428 
  429         _rseqno_controlling_my_ci_ => $i++,
  430         _ris_seqno_controlling_ci_ => $i++,
  431         _save_logfile_             => $i++,
  432         _maximum_level_            => $i++,
  433 
  434         _rKrange_code_without_comments_ => $i++,
  435         _rbreak_before_Kfirst_          => $i++,
  436         _rbreak_after_Klast_            => $i++,
  437         _rwant_container_open_          => $i++,
  438         _converged_                     => $i++,
  439 
  440         _rstarting_multiline_qw_seqno_by_K_ => $i++,
  441         _rending_multiline_qw_seqno_by_K_   => $i++,
  442         _rKrange_multiline_qw_by_seqno_     => $i++,
  443         _rmultiline_qw_has_extra_level_     => $i++,
  444         _rbreak_before_container_by_seqno_  => $i++,
  445         _ris_essential_old_breakpoint_      => $i++,
  446         _roverride_cab3_                    => $i++,
  447         _ris_assigned_structure_            => $i++,
  448     };
  449 
  450     # Array index names for _this_batch_ (in above list)
  451     # So _this_batch_ is a sub-array of $self for
  452     # holding the batches of tokens being processed.
  453     $i = 0;
  454     use constant {
  455         _starting_in_quote_        => $i++,
  456         _ending_in_quote_          => $i++,
  457         _is_static_block_comment_  => $i++,
  458         _rlines_K_                 => $i++,
  459         _do_not_pad_               => $i++,
  460         _ibeg0_                    => $i++,
  461         _peak_batch_size_          => $i++,
  462         _max_index_to_go_          => $i++,
  463         _rK_to_go_                 => $i++,
  464         _batch_count_              => $i++,
  465         _rix_seqno_controlling_ci_ => $i++,
  466         _batch_CODE_type_          => $i++,
  467     };
  468 
  469     # Sequence number assigned to the root of sequence tree.
  470     # The minimum of the actual sequences numbers is 4, so we can use 1
  471     use constant SEQ_ROOT => 1;
  472 
  473     # Codes for insertion and deletion of blanks
  474     use constant DELETE => 0;
  475     use constant STABLE => 1;
  476     use constant INSERT => 2;
  477 
  478     # whitespace codes
  479     use constant WS_YES      => 1;
  480     use constant WS_OPTIONAL => 0;
  481     use constant WS_NO       => -1;
  482 
  483     # Token bond strengths.
  484     use constant NO_BREAK    => 10000;
  485     use constant VERY_STRONG => 100;
  486     use constant STRONG      => 2.1;
  487     use constant NOMINAL     => 1.1;
  488     use constant WEAK        => 0.8;
  489     use constant VERY_WEAK   => 0.55;
  490 
  491     # values for testing indexes in output array
  492     use constant UNDEFINED_INDEX => -1;
  493 
  494     # Maximum number of little messages; probably need not be changed.
  495     use constant MAX_NAG_MESSAGES => 6;
  496 
  497     # increment between sequence numbers for each type
  498     # For example, ?: pairs might have numbers 7,11,15,...
  499     use constant TYPE_SEQUENCE_INCREMENT => 4;
  500 
  501     # Initialize constant hashes ...
  502     my @q;
  503 
  504     @q = qw(
  505       = **= += *= &= <<= &&=
  506       -= /= |= >>= ||= //=
  507       .= %= ^=
  508       x=
  509     );
  510     @is_assignment{@q} = (1) x scalar(@q);
  511 
  512     @q = qw(
  513       grep
  514       keys
  515       map
  516       reverse
  517       sort
  518       split
  519     );
  520     @is_keyword_returning_list{@q} = (1) x scalar(@q);
  521 
  522     @q = qw(is if unless and or err last next redo return);
  523     @is_if_unless_and_or_last_next_redo_return{@q} = (1) x scalar(@q);
  524 
  525     # These block types may have text between the keyword and opening
  526     # curly.  Note: 'else' does not, but must be included to allow trailing
  527     # if/elsif text to be appended.
  528     # patch for SWITCH/CASE: added 'case' and 'when'
  529     @q = qw(if elsif else unless while until for foreach case when catch);
  530     @is_if_elsif_else_unless_while_until_for_foreach{@q} =
  531       (1) x scalar(@q);
  532 
  533     @q = qw(if unless while until for);
  534     @is_if_unless_while_until_for{@q} =
  535       (1) x scalar(@q);
  536 
  537     @q = qw(last next redo return);
  538     @is_last_next_redo_return{@q} = (1) x scalar(@q);
  539 
  540     @q = qw(sort map grep);
  541     @is_sort_map_grep{@q} = (1) x scalar(@q);
  542 
  543     @q = qw(sort map grep eval);
  544     @is_sort_map_grep_eval{@q} = (1) x scalar(@q);
  545 
  546     @q = qw(if unless);
  547     @is_if_unless{@q} = (1) x scalar(@q);
  548 
  549     @q = qw(and or err);
  550     @is_and_or{@q} = (1) x scalar(@q);
  551 
  552     # Identify certain operators which often occur in chains.
  553     # Note: the minus (-) causes a side effect of padding of the first line in
  554     # something like this (by sub set_logical_padding):
  555     #    Checkbutton => 'Transmission checked',
  556     #   -variable    => \$TRANS
  557     # This usually improves appearance so it seems ok.
  558     @q = qw(&& || and or : ? . + - * /);
  559     @is_chain_operator{@q} = (1) x scalar(@q);
  560 
  561     # Operators that the user can request break before or after.
  562     # Note that some are keywords
  563     @all_operators = qw(% + - * / x != == >= <= =~ !~ < > | &
  564       = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=
  565       . : ? && || and or err xor
  566     );
  567 
  568     # We can remove semicolons after blocks preceded by these keywords
  569     @q =
  570       qw(BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
  571       unless while until for foreach given when default);
  572     @is_block_without_semicolon{@q} = (1) x scalar(@q);
  573 
  574     # We will allow semicolons to be added within these block types
  575     # as well as sub and package blocks.
  576     # NOTES:
  577     # 1. Note that these keywords are omitted:
  578     #     switch case given when default sort map grep
  579     # 2. It is also ok to add for sub and package blocks and a labeled block
  580     # 3. But not okay for other perltidy types including:
  581     #     { } ; G t
  582     # 4. Test files: blktype.t, blktype1.t, semicolon.t
  583     @q =
  584       qw( BEGIN END CHECK INIT AUTOLOAD DESTROY UNITCHECK continue if elsif else
  585       unless do while until eval for foreach );
  586     @ok_to_add_semicolon_for_block_type{@q} = (1) x scalar(@q);
  587 
  588     # 'L' is token for opening { at hash key
  589     @q = qw< L { ( [ >;
  590     @is_opening_type{@q} = (1) x scalar(@q);
  591 
  592     # 'R' is token for closing } at hash key
  593     @q = qw< R } ) ] >;
  594     @is_closing_type{@q} = (1) x scalar(@q);
  595 
  596     @q = qw< { ( [ >;
  597     @is_opening_token{@q} = (1) x scalar(@q);
  598 
  599     @q = qw< } ) ] >;
  600     @is_closing_token{@q} = (1) x scalar(@q);
  601 
  602     @q = qw< { ( [ ? >;
  603     @is_opening_sequence_token{@q} = (1) x scalar(@q);
  604 
  605     @q = qw< } ) ] : >;
  606     @is_closing_sequence_token{@q} = (1) x scalar(@q);
  607 
  608     # a hash needed by sub scan_list for labeling containers
  609     @q = qw( k => && || ? : . );
  610     @is_container_label_type{@q} = (1) x scalar(@q);
  611 
  612     # Braces -bbht etc must follow these. Note: experimentation with
  613     # including a simple comma shows that it adds little and can lead
  614     # to poor formatting in complex lists.
  615     @q = qw( = => );
  616     @is_equal_or_fat_comma{@q} = (1) x scalar(@q);
  617 
  618     @q = qw( => ; h f );
  619     push @q, ',';
  620     @is_counted_type{@q} = (1) x scalar(@q);
  621 
  622     # These block types can take ci.  This is used by the -xci option.
  623     # Note that the 'sub' in this list is an anonymous sub.  To be more correct
  624     # we could remove sub and use ASUB pattern to also handle a
  625     # prototype/signature.  But that would slow things down and would probably
  626     # never be useful.
  627     @q = qw( do sub eval sort map grep );
  628     @is_block_with_ci{@q} = (1) x scalar(@q);
  629 
  630 }
  631 
  632 {    ## begin closure to count instanes
  633 
  634     # methods to count instances
  635     my $_count = 0;
  636     sub get_count        { return $_count; }
  637     sub _increment_count { return ++$_count }
  638     sub _decrement_count { return --$_count }
  639 } ## end closure to count instanes
  640 
  641 sub new {
  642 
  643     my ( $class, @args ) = @_;
  644 
  645     # we are given an object with a write_line() method to take lines
  646     my %defaults = (
  647         sink_object        => undef,
  648         diagnostics_object => undef,
  649         logger_object      => undef,
  650         length_function    => sub { return length( $_[0] ) },
  651         is_encoded_data    => "",
  652         fh_tee             => undef,
  653     );
  654     my %args = ( %defaults, @args );
  655 
  656     my $length_function    = $args{length_function};
  657     my $is_encoded_data    = $args{is_encoded_data};
  658     my $fh_tee             = $args{fh_tee};
  659     my $logger_object      = $args{logger_object};
  660     my $diagnostics_object = $args{diagnostics_object};
  661 
  662     # we create another object with a get_line() and peek_ahead() method
  663     my $sink_object = $args{sink_object};
  664     my $file_writer_object =
  665       Perl::Tidy::FileWriter->new( $sink_object, $rOpts, $logger_object );
  666 
  667     # initialize closure variables...
  668     set_logger_object($logger_object);
  669     set_diagnostics_object($diagnostics_object);
  670     initialize_gnu_vars();
  671     initialize_csc_vars();
  672     initialize_scan_list();
  673     initialize_saved_opening_indentation();
  674     initialize_undo_ci();
  675     initialize_process_line_of_CODE();
  676     initialize_grind_batch_of_CODE();
  677     initialize_adjusted_indentation();
  678     initialize_postponed_breakpoint();
  679     initialize_batch_variables();
  680     initialize_forced_breakpoint_vars();
  681     initialize_gnu_batch_vars();
  682     initialize_write_line();
  683 
  684     my $vertical_aligner_object = Perl::Tidy::VerticalAligner->new(
  685         rOpts              => $rOpts,
  686         file_writer_object => $file_writer_object,
  687         logger_object      => $logger_object,
  688         diagnostics_object => $diagnostics_object,
  689         length_function    => $length_function
  690     );
  691 
  692     write_logfile_entry("\nStarting tokenization pass...\n");
  693 
  694     if ( $rOpts->{'entab-leading-whitespace'} ) {
  695         write_logfile_entry(
  696 "Leading whitespace will be entabbed with $rOpts->{'entab-leading-whitespace'} spaces per tab\n"
  697         );
  698     }
  699     elsif ( $rOpts->{'tabs'} ) {
  700         write_logfile_entry("Indentation will be with a tab character\n");
  701     }
  702     else {
  703         write_logfile_entry(
  704             "Indentation will be with $rOpts->{'indent-columns'} spaces\n");
  705     }
  706 
  707     # Initialize the $self array reference.
  708     # To add an item, first add a constant index in the BEGIN block above.
  709     my $self = [];
  710 
  711     # Basic data structures...
  712     $self->[_rlines_]     = [];    # = ref to array of lines of the file
  713     $self->[_rlines_new_] = [];    # = ref to array of output lines
  714 
  715     # 'rLL' = reference to the liner array of all tokens in the file.
  716     # 'LL' stands for 'Linked List'. Using a linked list was a disaster, but
  717     # 'LL' stuck because it is easy to type.
  718     $self->[_rLL_]    = [];
  719     $self->[_Klimit_] = undef;    # = maximum K index for rLL.
  720 
  721     # Arrays for quickly traversing the structures
  722     $self->[_K_opening_container_] = {};
  723     $self->[_K_closing_container_] = {};
  724     $self->[_K_opening_ternary_]   = {};
  725     $self->[_K_closing_ternary_]   = {};
  726     $self->[_K_first_seq_item_]    = undef; # K of first token with a sequence #
  727 
  728     # Array of phantom semicolons, in case we ever need to undo them
  729     $self->[_rK_phantom_semicolons_] = undef;
  730 
  731     # Mostly list characteristics and processing flags
  732     $self->[_rtype_count_by_seqno_]      = {};
  733     $self->[_ris_function_call_paren_]   = {};
  734     $self->[_rlec_count_by_seqno_]       = {};
  735     $self->[_ris_broken_container_]      = {};
  736     $self->[_ris_permanently_broken_]    = {};
  737     $self->[_rhas_list_]                 = {};
  738     $self->[_rhas_broken_list_]          = {};
  739     $self->[_rhas_broken_list_with_lec_] = {};
  740     $self->[_rhas_code_block_]           = {};
  741     $self->[_rhas_broken_code_block_]    = {};
  742     $self->[_rhas_ternary_]              = {};
  743     $self->[_ris_excluded_lp_container_] = {};
  744     $self->[_rwant_reduced_ci_]          = {};
  745     $self->[_rno_xci_by_seqno_]          = {};
  746     $self->[_ris_bli_container_]         = {};
  747     $self->[_rparent_of_seqno_]          = {};
  748     $self->[_rchildren_of_seqno_]        = {};
  749     $self->[_ris_list_by_seqno_]         = {};
  750 
  751     $self->[_rbreak_container_] = {};                 # prevent one-line blocks
  752     $self->[_rshort_nested_]    = {};                 # blocks not forced open
  753     $self->[_length_function_]  = $length_function;
  754     $self->[_is_encoded_data_]  = $is_encoded_data;
  755 
  756     # Some objects...
  757     $self->[_fh_tee_]                  = $fh_tee;
  758     $self->[_sink_object_]             = $sink_object;
  759     $self->[_file_writer_object_]      = $file_writer_object;
  760     $self->[_vertical_aligner_object_] = $vertical_aligner_object;
  761     $self->[_logger_object_]           = $logger_object;
  762 
  763     # Reference to the batch being processed
  764     $self->[_this_batch_] = [];
  765 
  766     # Memory of processed text...
  767     $self->[_last_last_line_leading_level_]    = 0;
  768     $self->[_last_line_leading_level_]         = 0;
  769     $self->[_last_line_leading_type_]          = '#';
  770     $self->[_last_output_short_opening_token_] = 0;
  771     $self->[_added_semicolon_count_]           = 0;
  772     $self->[_first_added_semicolon_at_]        = 0;
  773     $self->[_last_added_semicolon_at_]         = 0;
  774     $self->[_deleted_semicolon_count_]         = 0;
  775     $self->[_first_deleted_semicolon_at_]      = 0;
  776     $self->[_last_deleted_semicolon_at_]       = 0;
  777     $self->[_embedded_tab_count_]              = 0;
  778     $self->[_first_embedded_tab_at_]           = 0;
  779     $self->[_last_embedded_tab_at_]            = 0;
  780     $self->[_first_tabbing_disagreement_]      = 0;
  781     $self->[_last_tabbing_disagreement_]       = 0;
  782     $self->[_tabbing_disagreement_count_]      = 0;
  783     $self->[_in_tabbing_disagreement_]         = 0;
  784     $self->[_saw_VERSION_in_this_file_]        = !$rOpts->{'pass-version-line'};
  785     $self->[_saw_END_or_DATA_]                 = 0;
  786 
  787     # Hashes related to container welding...
  788     $self->[_radjusted_levels_] = [];
  789 
  790     # Weld data structures
  791     $self->[_rK_weld_left_]         = {};
  792     $self->[_rK_weld_right_]        = {};
  793     $self->[_rweld_len_right_at_K_] = {};
  794 
  795     # -xci stuff
  796     $self->[_rseqno_controlling_my_ci_] = {};
  797     $self->[_ris_seqno_controlling_ci_] = {};
  798 
  799     $self->[_rspecial_side_comment_type_] = {};
  800     $self->[_maximum_level_]              = 0;
  801 
  802     $self->[_rKrange_code_without_comments_] = [];
  803     $self->[_rbreak_before_Kfirst_]          = {};
  804     $self->[_rbreak_after_Klast_]            = {};
  805     $self->[_rwant_container_open_]          = {};
  806     $self->[_converged_]                     = 0;
  807 
  808     # qw stuff
  809     $self->[_rstarting_multiline_qw_seqno_by_K_] = {};
  810     $self->[_rending_multiline_qw_seqno_by_K_]   = {};
  811     $self->[_rKrange_multiline_qw_by_seqno_]     = {};
  812     $self->[_rmultiline_qw_has_extra_level_]     = {};
  813 
  814     $self->[_rbreak_before_container_by_seqno_] = {};
  815     $self->[_ris_essential_old_breakpoint_]     = {};
  816     $self->[_roverride_cab3_]                   = {};
  817     $self->[_ris_assigned_structure_]           = {};
  818 
  819     # This flag will be updated later by a call to get_save_logfile()
  820     $self->[_save_logfile_] = defined($logger_object);
  821 
  822     bless $self, $class;
  823 
  824     # Safety check..this is not a class yet
  825     if ( _increment_count() > 1 ) {
  826         confess
  827 "Attempt to create more than 1 object in $class, which is not a true class yet\n";
  828     }
  829     return $self;
  830 }
  831 
  832 ######################################
  833 # CODE SECTION 2: Some Basic Utilities
  834 ######################################
  835 
  836 {    ## begin closure for logger routines
  837     my $logger_object;
  838 
  839     # Called once per file to initialize the logger object
  840     sub set_logger_object {
  841         $logger_object = shift;
  842         return;
  843     }
  844 
  845     sub get_logger_object {
  846         return $logger_object;
  847     }
  848 
  849     sub get_input_stream_name {
  850         my $input_stream_name = "";
  851         if ($logger_object) {
  852             $input_stream_name = $logger_object->get_input_stream_name();
  853         }
  854         return $input_stream_name;
  855     }
  856 
  857     # interface to Perl::Tidy::Logger routines
  858     sub warning {
  859         my ($msg) = @_;
  860         if ($logger_object) { $logger_object->warning($msg); }
  861         return;
  862     }
  863 
  864     sub complain {
  865         my ($msg) = @_;
  866         if ($logger_object) {
  867             $logger_object->complain($msg);
  868         }
  869         return;
  870     }
  871 
  872     sub write_logfile_entry {
  873         my @msg = @_;
  874         if ($logger_object) {
  875             $logger_object->write_logfile_entry(@msg);
  876         }
  877         return;
  878     }
  879 
  880     sub report_definite_bug {
  881         if ($logger_object) {
  882             $logger_object->report_definite_bug();
  883         }
  884         return;
  885     }
  886 
  887     sub get_saw_brace_error {
  888         if ($logger_object) {
  889             return $logger_object->get_saw_brace_error();
  890         }
  891         return;
  892     }
  893 
  894     sub we_are_at_the_last_line {
  895         if ($logger_object) {
  896             $logger_object->we_are_at_the_last_line();
  897         }
  898         return;
  899     }
  900 
  901 } ## end closure for logger routines
  902 
  903 {    ## begin closure for diagnostics routines
  904     my $diagnostics_object;
  905 
  906     # Called once per file to initialize the diagnostics object
  907     sub set_diagnostics_object {
  908         $diagnostics_object = shift;
  909         return;
  910     }
  911 
  912     sub write_diagnostics {
  913         my ($msg) = @_;
  914         if ($diagnostics_object) {
  915             $diagnostics_object->write_diagnostics($msg);
  916         }
  917         return;
  918     }
  919 } ## end closure for diagnostics routines
  920 
  921 sub get_convergence_check {
  922     my ($self) = @_;
  923     return $self->[_converged_];
  924 }
  925 
  926 sub get_added_semicolon_count {
  927     my $self = shift;
  928     return $self->[_added_semicolon_count_];
  929 }
  930 
  931 sub get_output_line_number {
  932     my ($self) = @_;
  933     my $vao = $self->[_vertical_aligner_object_];
  934     return $vao->get_output_line_number();
  935 }
  936 
  937 sub check_token_array {
  938     my $self = shift;
  939 
  940     # Check for errors in the array of tokens. This is only called now
  941     # when the DEVEL_MODE flag is set, so this Fault will only occur
  942     # during code development.
  943     my $rLL = $self->[_rLL_];
  944     for ( my $KK = 0 ; $KK < @{$rLL} ; $KK++ ) {
  945         my $nvars = @{ $rLL->[$KK] };
  946         if ( $nvars != _NVARS ) {
  947             my $NVARS = _NVARS;
  948             my $type  = $rLL->[$KK]->[_TYPE_];
  949             $type = '*' unless defined($type);
  950 
  951             # The number of variables per token node is _NVARS and was set when
  952             # the array indexes were generated. So if the number of variables
  953             # is different we have done something wrong, like not store all of
  954             # them in sub 'write_line' when they were received from the
  955             # tokenizer.
  956             Fault(
  957 "number of vars for node $KK, type '$type', is $nvars but should be $NVARS"
  958             );
  959         }
  960         foreach my $var ( _TOKEN_, _TYPE_ ) {
  961             if ( !defined( $rLL->[$KK]->[$var] ) ) {
  962                 my $iline = $rLL->[$KK]->[_LINE_INDEX_];
  963 
  964                 # This is a simple check that each token has some basic
  965                 # variables.  In other words, that there are no holes in the
  966                 # array of tokens.  Sub 'write_line' pushes tokens into the
  967                 # $rLL array, so this should guarantee no gaps.
  968                 Fault("Undefined variable $var for K=$KK, line=$iline\n");
  969             }
  970         }
  971     }
  972     return;
  973 }
  974 
  975 sub want_blank_line {
  976     my $self = shift;
  977     $self->flush();
  978     my $file_writer_object = $self->[_file_writer_object_];
  979     $file_writer_object->want_blank_line();
  980     return;
  981 }
  982 
  983 sub write_unindented_line {
  984     my ( $self, $line ) = @_;
  985     $self->flush();
  986     my $file_writer_object = $self->[_file_writer_object_];
  987     $file_writer_object->write_line($line);
  988     return;
  989 }
  990 
  991 sub consecutive_nonblank_lines {
  992     my ($self)             = @_;
  993     my $file_writer_object = $self->[_file_writer_object_];
  994     my $vao                = $self->[_vertical_aligner_object_];
  995     return $file_writer_object->get_consecutive_nonblank_lines() +
  996       $vao->get_cached_line_count();
  997 }
  998 
  999 sub trim {
 1000 
 1001     # trim leading and trailing whitespace from a string
 1002     my $str = shift;
 1003     $str =~ s/\s+$//;
 1004     $str =~ s/^\s+//;
 1005     return $str;
 1006 }
 1007 
 1008 sub max {
 1009     my (@vals) = @_;
 1010     my $max = shift @vals;
 1011     for (@vals) { $max = $_ > $max ? $_ : $max }
 1012     return $max;
 1013 }
 1014 
 1015 sub min {
 1016     my (@vals) = @_;
 1017     my $min = shift @vals;
 1018     for (@vals) { $min = $_ < $min ? $_ : $min }
 1019     return $min;
 1020 }
 1021 
 1022 sub split_words {
 1023 
 1024     # given a string containing words separated by whitespace,
 1025     # return the list of words
 1026     my ($str) = @_;
 1027     return unless $str;
 1028     $str =~ s/\s+$//;
 1029     $str =~ s/^\s+//;
 1030     return split( /\s+/, $str );
 1031 }
 1032 
 1033 ###########################################
 1034 # CODE SECTION 3: Check and process options
 1035 ###########################################
 1036 
 1037 sub check_options {
 1038 
 1039     # This routine is called to check the user-supplied run parameters
 1040     # and to configure the control hashes to them.
 1041     $rOpts = shift;
 1042 
 1043     initialize_whitespace_hashes();
 1044     initialize_bond_strength_hashes();
 1045 
 1046     # Make needed regex patterns for matching text.
 1047     # NOTE: sub_matching_patterns must be made first because later patterns use
 1048     # them; see RT #133130.
 1049     make_sub_matching_pattern();
 1050     make_static_block_comment_pattern();
 1051     make_static_side_comment_pattern();
 1052     make_closing_side_comment_prefix();
 1053     make_closing_side_comment_list_pattern();
 1054     $format_skipping_pattern_begin =
 1055       make_format_skipping_pattern( 'format-skipping-begin', '#<<<' );
 1056     $format_skipping_pattern_end =
 1057       make_format_skipping_pattern( 'format-skipping-end', '#>>>' );
 1058     make_non_indenting_brace_pattern();
 1059 
 1060     # If closing side comments ARE selected, then we can safely
 1061     # delete old closing side comments unless closing side comment
 1062     # warnings are requested.  This is a good idea because it will
 1063     # eliminate any old csc's which fall below the line count threshold.
 1064     # We cannot do this if warnings are turned on, though, because we
 1065     # might delete some text which has been added.  So that must
 1066     # be handled when comments are created.  And we cannot do this
 1067     # with -io because -csc will be skipped altogether.
 1068     if ( $rOpts->{'closing-side-comments'} ) {
 1069         if (   !$rOpts->{'closing-side-comment-warnings'}
 1070             && !$rOpts->{'indent-only'} )
 1071         {
 1072             $rOpts->{'delete-closing-side-comments'} = 1;
 1073         }
 1074     }
 1075 
 1076     # If closing side comments ARE NOT selected, but warnings ARE
 1077     # selected and we ARE DELETING csc's, then we will pretend to be
 1078     # adding with a huge interval.  This will force the comments to be
 1079     # generated for comparison with the old comments, but not added.
 1080     elsif ( $rOpts->{'closing-side-comment-warnings'} ) {
 1081         if ( $rOpts->{'delete-closing-side-comments'} ) {
 1082             $rOpts->{'delete-closing-side-comments'}  = 0;
 1083             $rOpts->{'closing-side-comments'}         = 1;
 1084             $rOpts->{'closing-side-comment-interval'} = 100000000;
 1085         }
 1086     }
 1087 
 1088     make_bli_pattern();
 1089     make_block_brace_vertical_tightness_pattern();
 1090     make_blank_line_pattern();
 1091     make_keyword_group_list_pattern();
 1092 
 1093     # Make initial list of desired one line block types
 1094     # They will be modified by 'prepare_cuddled_block_types'
 1095     %want_one_line_block = %is_sort_map_grep_eval;
 1096 
 1097     # Default is to exclude one-line block types from -bl formatting
 1098     # FIXME: Eventually a flag should be added to modify this.
 1099     %is_braces_left_exclude_block = %is_sort_map_grep_eval;
 1100 
 1101     prepare_cuddled_block_types();
 1102     if ( $rOpts->{'dump-cuddled-block-list'} ) {
 1103         dump_cuddled_block_list(*STDOUT);
 1104         Exit(0);
 1105     }
 1106 
 1107     if ( $rOpts->{'line-up-parentheses'} ) {
 1108 
 1109         if (   $rOpts->{'indent-only'}
 1110             || !$rOpts->{'add-newlines'}
 1111             || !$rOpts->{'delete-old-newlines'} )
 1112         {
 1113             Warn(<<EOM);
 1114 -----------------------------------------------------------------------
 1115 Conflict: -lp  conflicts with -io, -fnl, -nanl, or -ndnl; ignoring -lp
 1116     
 1117 The -lp indentation logic requires that perltidy be able to coordinate
 1118 arbitrarily large numbers of line breakpoints.  This isn't possible
 1119 with these flags.
 1120 -----------------------------------------------------------------------
 1121 EOM
 1122             $rOpts->{'line-up-parentheses'} = 0;
 1123         }
 1124 
 1125         if ( $rOpts->{'whitespace-cycle'} ) {
 1126             Warn(<<EOM);
 1127 Conflict: -wc cannot currently be used with the -lp option; ignoring -wc
 1128 EOM
 1129             $rOpts->{'whitespace-cycle'} = 0;
 1130         }
 1131     }
 1132 
 1133     # At present, tabs are not compatible with the line-up-parentheses style
 1134     # (it would be possible to entab the total leading whitespace
 1135     # just prior to writing the line, if desired).
 1136     if ( $rOpts->{'line-up-parentheses'} && $rOpts->{'tabs'} ) {
 1137         Warn(<<EOM);
 1138 Conflict: -t (tabs) cannot be used with the -lp  option; ignoring -t; see -et.
 1139 EOM
 1140         $rOpts->{'tabs'} = 0;
 1141     }
 1142 
 1143     # Likewise, tabs are not compatible with outdenting..
 1144     if ( $rOpts->{'outdent-keywords'} && $rOpts->{'tabs'} ) {
 1145         Warn(<<EOM);
 1146 Conflict: -t (tabs) cannot be used with the -okw options; ignoring -t; see -et.
 1147 EOM
 1148         $rOpts->{'tabs'} = 0;
 1149     }
 1150 
 1151     if ( $rOpts->{'outdent-labels'} && $rOpts->{'tabs'} ) {
 1152         Warn(<<EOM);
 1153 Conflict: -t (tabs) cannot be used with the -ola  option; ignoring -t; see -et.
 1154 EOM
 1155         $rOpts->{'tabs'} = 0;
 1156     }
 1157 
 1158     if ( !$rOpts->{'space-for-semicolon'} ) {
 1159         $want_left_space{'f'} = -1;
 1160     }
 1161 
 1162     if ( $rOpts->{'space-terminal-semicolon'} ) {
 1163         $want_left_space{';'} = 1;
 1164     }
 1165 
 1166     # We should put an upper bound on any -sil=n value. Otherwise enormous
 1167     # files could be created by mistake.
 1168     for ( $rOpts->{'starting-indentation-level'} ) {
 1169         if ( $_ && $_ > 100 ) {
 1170             Warn(<<EOM);
 1171 The value --starting-indentation-level=$_ is very large; a mistake? resetting to 0;
 1172 EOM
 1173             $_ = 0;
 1174         }
 1175     }
 1176 
 1177     # implement outdenting preferences for keywords
 1178     %outdent_keyword = ();
 1179     my @okw = split_words( $rOpts->{'outdent-keyword-list'} );
 1180     unless (@okw) {
 1181         @okw = qw(next last redo goto return);    # defaults
 1182     }
 1183 
 1184     # FUTURE: if not a keyword, assume that it is an identifier
 1185     foreach (@okw) {
 1186         if ( $Perl::Tidy::Tokenizer::is_keyword{$_} ) {
 1187             $outdent_keyword{$_} = 1;
 1188         }
 1189         else {
 1190             Warn("ignoring '$_' in -okwl list; not a perl keyword");
 1191         }
 1192     }
 1193 
 1194     # setup hash for -kpit option
 1195     %keyword_paren_inner_tightness = ();
 1196     my $kpit_value = $rOpts->{'keyword-paren-inner-tightness'};
 1197     if ( defined($kpit_value) && $kpit_value != 1 ) {
 1198         my @kpit =
 1199           split_words( $rOpts->{'keyword-paren-inner-tightness-list'} );
 1200         unless (@kpit) {
 1201             @kpit = qw(if elsif unless while until for foreach);    # defaults
 1202         }
 1203 
 1204         # we will allow keywords and user-defined identifiers
 1205         foreach (@kpit) {
 1206             $keyword_paren_inner_tightness{$_} = $kpit_value;
 1207         }
 1208     }
 1209 
 1210     # implement user whitespace preferences
 1211     if ( my @q = split_words( $rOpts->{'want-left-space'} ) ) {
 1212         @want_left_space{@q} = (1) x scalar(@q);
 1213     }
 1214 
 1215     if ( my @q = split_words( $rOpts->{'want-right-space'} ) ) {
 1216         @want_right_space{@q} = (1) x scalar(@q);
 1217     }
 1218 
 1219     if ( my @q = split_words( $rOpts->{'nowant-left-space'} ) ) {
 1220         @want_left_space{@q} = (-1) x scalar(@q);
 1221     }
 1222 
 1223     if ( my @q = split_words( $rOpts->{'nowant-right-space'} ) ) {
 1224         @want_right_space{@q} = (-1) x scalar(@q);
 1225     }
 1226     if ( $rOpts->{'dump-want-left-space'} ) {
 1227         dump_want_left_space(*STDOUT);
 1228         Exit(0);
 1229     }
 1230 
 1231     if ( $rOpts->{'dump-want-right-space'} ) {
 1232         dump_want_right_space(*STDOUT);
 1233         Exit(0);
 1234     }
 1235 
 1236     # default keywords for which space is introduced before an opening paren
 1237     # (at present, including them messes up vertical alignment)
 1238     my @sak = qw(my local our and or xor err eq ne if else elsif until
 1239       unless while for foreach return switch case given when catch);
 1240     %space_after_keyword = map { $_ => 1 } @sak;
 1241 
 1242     # first remove any or all of these if desired
 1243     if ( my @q = split_words( $rOpts->{'nospace-after-keyword'} ) ) {
 1244 
 1245         # -nsak='*' selects all the above keywords
 1246         if ( @q == 1 && $q[0] eq '*' ) { @q = keys(%space_after_keyword) }
 1247         @space_after_keyword{@q} = (0) x scalar(@q);
 1248     }
 1249 
 1250     # then allow user to add to these defaults
 1251     if ( my @q = split_words( $rOpts->{'space-after-keyword'} ) ) {
 1252         @space_after_keyword{@q} = (1) x scalar(@q);
 1253     }
 1254 
 1255     # implement user break preferences
 1256     my $break_after = sub {
 1257         my @toks = @_;
 1258         foreach my $tok (@toks) {
 1259             if ( $tok eq '?' ) { $tok = ':' }    # patch to coordinate ?/:
 1260             my $lbs = $left_bond_strength{$tok};
 1261             my $rbs = $right_bond_strength{$tok};
 1262             if ( defined($lbs) && defined($rbs) && $lbs < $rbs ) {
 1263                 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
 1264                   ( $lbs, $rbs );
 1265             }
 1266         }
 1267     };
 1268 
 1269     my $break_before = sub {
 1270         my @toks = @_;
 1271         foreach my $tok (@toks) {
 1272             my $lbs = $left_bond_strength{$tok};
 1273             my $rbs = $right_bond_strength{$tok};
 1274             if ( defined($lbs) && defined($rbs) && $rbs < $lbs ) {
 1275                 ( $right_bond_strength{$tok}, $left_bond_strength{$tok} ) =
 1276                   ( $lbs, $rbs );
 1277             }
 1278         }
 1279     };
 1280 
 1281     $break_after->(@all_operators) if ( $rOpts->{'break-after-all-operators'} );
 1282     $break_before->(@all_operators)
 1283       if ( $rOpts->{'break-before-all-operators'} );
 1284 
 1285     $break_after->( split_words( $rOpts->{'want-break-after'} ) );
 1286     $break_before->( split_words( $rOpts->{'want-break-before'} ) );
 1287 
 1288     # make note if breaks are before certain key types
 1289     %want_break_before = ();
 1290     foreach my $tok ( @all_operators, ',' ) {
 1291         $want_break_before{$tok} =
 1292           $left_bond_strength{$tok} < $right_bond_strength{$tok};
 1293     }
 1294 
 1295     # Coordinate ?/: breaks, which must be similar
 1296     if ( !$want_break_before{':'} ) {
 1297         $want_break_before{'?'}   = $want_break_before{':'};
 1298         $right_bond_strength{'?'} = $right_bond_strength{':'} + 0.01;
 1299         $left_bond_strength{'?'}  = NO_BREAK;
 1300     }
 1301 
 1302     # Only make a hash entry for the next parameters if values are defined.
 1303     # That allows a quick check to be made later.
 1304     %break_before_container_types = ();
 1305     for ( $rOpts->{'break-before-hash-brace'} ) {
 1306         $break_before_container_types{'{'} = $_ if $_ && $_ > 0;
 1307     }
 1308     for ( $rOpts->{'break-before-square-bracket'} ) {
 1309         $break_before_container_types{'['} = $_ if $_ && $_ > 0;
 1310     }
 1311     for ( $rOpts->{'break-before-paren'} ) {
 1312         $break_before_container_types{'('} = $_ if $_ && $_ > 0;
 1313     }
 1314 
 1315     %container_indentation_options = ();
 1316     foreach my $pair (
 1317         [ 'break-before-hash-brace-and-indent',     '{' ],
 1318         [ 'break-before-square-bracket-and-indent', '[' ],
 1319         [ 'break-before-paren-and-indent',          '(' ],
 1320       )
 1321     {
 1322         my ( $key, $tok ) = @{$pair};
 1323         my $opt = $rOpts->{$key};
 1324         if ( defined($opt) && $opt > 0 && $break_before_container_types{$tok} )
 1325         {
 1326 
 1327             # (1) -lp is not compatable with opt=2, silently set to opt=0
 1328             # (2) opt=0 and 2 give same result if -i=-ci; but opt=0 is faster
 1329             if ( $opt == 2 ) {
 1330                 if (   $rOpts->{'line-up-parentheses'}
 1331                     || $rOpts->{'indent-columns'} ==
 1332                     $rOpts->{'continuation-indentation'} )
 1333                 {
 1334                     $opt = 0;
 1335                 }
 1336             }
 1337             $container_indentation_options{$tok} = $opt;
 1338         }
 1339     }
 1340 
 1341     # Define here tokens which may follow the closing brace of a do statement
 1342     # on the same line, as in:
 1343     #   } while ( $something);
 1344     my @dof = qw(until while unless if ; : );
 1345     push @dof, ',';
 1346     @is_do_follower{@dof} = (1) x scalar(@dof);
 1347 
 1348     # What tokens may follow the closing brace of an if or elsif block?
 1349     # Not used. Previously used for cuddled else, but no longer needed.
 1350     %is_if_brace_follower = ();
 1351 
 1352     # nothing can follow the closing curly of an else { } block:
 1353     %is_else_brace_follower = ();
 1354 
 1355     # what can follow a multi-line anonymous sub definition closing curly:
 1356     my @asf = qw# ; : => or and  && || ~~ !~~ ) #;
 1357     push @asf, ',';
 1358     @is_anon_sub_brace_follower{@asf} = (1) x scalar(@asf);
 1359 
 1360     # what can follow a one-line anonymous sub closing curly:
 1361     # one-line anonymous subs also have ']' here...
 1362     # see tk3.t and PP.pm
 1363     my @asf1 = qw#  ; : => or and  && || ) ] ~~ !~~ #;
 1364     push @asf1, ',';
 1365     @is_anon_sub_1_brace_follower{@asf1} = (1) x scalar(@asf1);
 1366 
 1367     # What can follow a closing curly of a block
 1368     # which is not an if/elsif/else/do/sort/map/grep/eval/sub
 1369     # Testfiles: 'Toolbar.pm', 'Menubar.pm', bless.t, '3rules.pl'
 1370     my @obf = qw#  ; : => or and  && || ) #;
 1371     push @obf, ',';
 1372     @is_other_brace_follower{@obf} = (1) x scalar(@obf);
 1373 
 1374     $right_bond_strength{'{'} = WEAK;
 1375     $left_bond_strength{'{'}  = VERY_STRONG;
 1376 
 1377     # make -l=0  equal to -l=infinite
 1378     if ( !$rOpts->{'maximum-line-length'} ) {
 1379         $rOpts->{'maximum-line-length'} = 1000000;
 1380     }
 1381 
 1382     # make -lbl=0  equal to -lbl=infinite
 1383     if ( !$rOpts->{'long-block-line-count'} ) {
 1384         $rOpts->{'long-block-line-count'} = 1000000;
 1385     }
 1386 
 1387     my $ole = $rOpts->{'output-line-ending'};
 1388     if ($ole) {
 1389         my %endings = (
 1390             dos  => "\015\012",
 1391             win  => "\015\012",
 1392             mac  => "\015",
 1393             unix => "\012",
 1394         );
 1395 
 1396         # Patch for RT #99514, a memoization issue.
 1397         # Normally, the user enters one of 'dos', 'win', etc, and we change the
 1398         # value in the options parameter to be the corresponding line ending
 1399         # character.  But, if we are using memoization, on later passes through
 1400         # here the option parameter will already have the desired ending
 1401         # character rather than the keyword 'dos', 'win', etc.  So
 1402         # we must check to see if conversion has already been done and, if so,
 1403         # bypass the conversion step.
 1404         my %endings_inverted = (
 1405             "\015\012" => 'dos',
 1406             "\015\012" => 'win',
 1407             "\015"     => 'mac',
 1408             "\012"     => 'unix',
 1409         );
 1410 
 1411         if ( defined( $endings_inverted{$ole} ) ) {
 1412 
 1413             # we already have valid line ending, nothing more to do
 1414         }
 1415         else {
 1416             $ole = lc $ole;
 1417             unless ( $rOpts->{'output-line-ending'} = $endings{$ole} ) {
 1418                 my $str = join " ", keys %endings;
 1419                 Die(<<EOM);
 1420 Unrecognized line ending '$ole'; expecting one of: $str
 1421 EOM
 1422             }
 1423             if ( $rOpts->{'preserve-line-endings'} ) {
 1424                 Warn("Ignoring -ple; conflicts with -ole\n");
 1425                 $rOpts->{'preserve-line-endings'} = undef;
 1426             }
 1427         }
 1428     }
 1429 
 1430     # hashes used to simplify setting whitespace
 1431     %tightness = (
 1432         '{' => $rOpts->{'brace-tightness'},
 1433         '}' => $rOpts->{'brace-tightness'},
 1434         '(' => $rOpts->{'paren-tightness'},
 1435         ')' => $rOpts->{'paren-tightness'},
 1436         '[' => $rOpts->{'square-bracket-tightness'},
 1437         ']' => $rOpts->{'square-bracket-tightness'},
 1438     );
 1439     %matching_token = (
 1440         '{' => '}',
 1441         '(' => ')',
 1442         '[' => ']',
 1443         '?' => ':',
 1444     );
 1445 
 1446     # note any requested old line breaks to keep
 1447     %keep_break_before_type = ();
 1448     %keep_break_after_type  = ();
 1449     if ( !$rOpts->{'ignore-old-breakpoints'} ) {
 1450 
 1451         # FIXME: could check for valid types here.
 1452         # Invalid types are harmless but probably not intended.
 1453         my @types;
 1454         @types = ( split_words( $rOpts->{'keep-old-breakpoints-before'} ) );
 1455         @keep_break_before_type{@types} = (1) x scalar(@types);
 1456         @types = ( split_words( $rOpts->{'keep-old-breakpoints-after'} ) );
 1457         @keep_break_after_type{@types} = (1) x scalar(@types);
 1458     }
 1459     else {
 1460         if ( $rOpts->{'break-at-old-method-breakpoints'} ) {
 1461             Warn("Conflicting parameters: -iob and -bom; -bom will be ignored\n"
 1462             );
 1463             $rOpts->{'break-at-old-method-breakpoints'} = 0;
 1464         }
 1465         if ( $rOpts->{'break-at-old-comma-breakpoints'} ) {
 1466             Warn("Conflicting parameters: -iob and -boc; -boc will be ignored\n"
 1467             );
 1468             $rOpts->{'break-at-old-comma-breakpoints'} = 0;
 1469         }
 1470         if ( $rOpts->{'break-at-old-semicolon-breakpoints'} ) {
 1471             Warn("Conflicting parameters: -iob and -bos; -bos will be ignored\n"
 1472             );
 1473             $rOpts->{'break-at-old-semicolon-breakpoints'} = 0;
 1474         }
 1475         if ( $rOpts->{'keep-old-breakpoints-before'} ) {
 1476             Warn("Conflicting parameters: -iob and -kbb; -kbb will be ignored\n"
 1477             );
 1478             $rOpts->{'keep-old-breakpoints-before'} = "";
 1479         }
 1480         if ( $rOpts->{'keep-old-breakpoints-after'} ) {
 1481             Warn("Conflicting parameters: -iob and -kba; -kba will be ignored\n"
 1482             );
 1483             $rOpts->{'keep-old-breakpoints-after'} = "";
 1484         }
 1485 
 1486         # Note: These additional parameters are made inactive by -iob.
 1487         # They are silently turned off here because they are on by default.
 1488         # We would generate unexpected warnings if we issued a warning.
 1489         $rOpts->{'break-at-old-keyword-breakpoints'}   = 0;
 1490         $rOpts->{'break-at-old-logical-breakpoints'}   = 0;
 1491         $rOpts->{'break-at-old-ternary-breakpoints'}   = 0;
 1492         $rOpts->{'break-at-old-attribute-breakpoints'} = 0;
 1493     }
 1494 
 1495     #############################################################
 1496     # Make global vars for frequently used options for efficiency
 1497     #############################################################
 1498 
 1499     $rOpts_closing_side_comment_maximum_text =
 1500       $rOpts->{'closing-side-comment-maximum-text'};
 1501     $rOpts_continuation_indentation = $rOpts->{'continuation-indentation'};
 1502     $rOpts_indent_columns           = $rOpts->{'indent-columns'};
 1503     $rOpts_line_up_parentheses      = $rOpts->{'line-up-parentheses'};
 1504     $rOpts_maximum_line_length      = $rOpts->{'maximum-line-length'};
 1505     $rOpts_variable_maximum_line_length =
 1506       $rOpts->{'variable-maximum-line-length'};
 1507     $rOpts_block_brace_tightness = $rOpts->{'block-brace-tightness'};
 1508     $rOpts_block_brace_vertical_tightness =
 1509       $rOpts->{'block-brace-vertical-tightness'};
 1510     $rOpts_stack_closing_block_brace = $rOpts->{'stack-closing-block-brace'};
 1511     $rOpts_maximum_consecutive_blank_lines =
 1512       $rOpts->{'maximum-consecutive-blank-lines'};
 1513     $rOpts_recombine    = $rOpts->{'recombine'};
 1514     $rOpts_add_newlines = $rOpts->{'add-newlines'};
 1515     $rOpts_break_at_old_comma_breakpoints =
 1516       $rOpts->{'break-at-old-comma-breakpoints'};
 1517     $rOpts_ignore_old_breakpoints    = $rOpts->{'ignore-old-breakpoints'};
 1518     $rOpts_keep_interior_semicolons  = $rOpts->{'keep-interior-semicolons'};
 1519     $rOpts_comma_arrow_breakpoints   = $rOpts->{'comma-arrow-breakpoints'};
 1520     $rOpts_maximum_fields_per_table  = $rOpts->{'maximum-fields-per-table'};
 1521     $rOpts_one_line_block_semicolons = $rOpts->{'one-line-block-semicolons'};
 1522     $rOpts_break_at_old_semicolon_breakpoints =
 1523       $rOpts->{'break-at-old-semicolon-breakpoints'};
 1524 
 1525     $rOpts_tee_side_comments    = $rOpts->{'tee-side-comments'};
 1526     $rOpts_tee_block_comments   = $rOpts->{'tee-block-comments'};
 1527     $rOpts_tee_pod              = $rOpts->{'tee-pod'};
 1528     $rOpts_delete_side_comments = $rOpts->{'delete-side-comments'};
 1529     $rOpts_delete_closing_side_comments =
 1530       $rOpts->{'delete-closing-side-comments'};
 1531     $rOpts_format_skipping       = $rOpts->{'format-skipping'};
 1532     $rOpts_indent_only           = $rOpts->{'indent-only'};
 1533     $rOpts_static_block_comments = $rOpts->{'static-block-comments'};
 1534 
 1535     $rOpts_add_whitespace        = $rOpts->{'add-whitespace'};
 1536     $rOpts_delete_old_whitespace = $rOpts->{'delete-old-whitespace'};
 1537     $rOpts_freeze_whitespace     = $rOpts->{'freeze-whitespace'};
 1538 
 1539     $rOpts_function_paren_vertical_alignment =
 1540       $rOpts->{'function-paren-vertical-alignment'};
 1541     $rOpts_ignore_side_comment_lengths =
 1542       $rOpts->{'ignore-side-comment-lengths'};
 1543 
 1544     $rOpts_break_at_old_attribute_breakpoints =
 1545       $rOpts->{'break-at-old-attribute-breakpoints'};
 1546     $rOpts_break_at_old_keyword_breakpoints =
 1547       $rOpts->{'break-at-old-keyword-breakpoints'};
 1548     $rOpts_break_at_old_logical_breakpoints =
 1549       $rOpts->{'break-at-old-logical-breakpoints'};
 1550     $rOpts_break_at_old_ternary_breakpoints =
 1551       $rOpts->{'break-at-old-ternary-breakpoints'};
 1552     $rOpts_short_concatenation_item_length =
 1553       $rOpts->{'short-concatenation-item-length'};
 1554     $rOpts_closing_side_comment_else_flag =
 1555       $rOpts->{'closing-side-comment-else-flag'};
 1556     $rOpts_fuzzy_line_length = $rOpts->{'fuzzy-line-length'};
 1557 
 1558     # Note that both opening and closing tokens can access the opening
 1559     # and closing flags of their container types.
 1560     %opening_vertical_tightness = (
 1561         '(' => $rOpts->{'paren-vertical-tightness'},
 1562         '{' => $rOpts->{'brace-vertical-tightness'},
 1563         '[' => $rOpts->{'square-bracket-vertical-tightness'},
 1564         ')' => $rOpts->{'paren-vertical-tightness'},
 1565         '}' => $rOpts->{'brace-vertical-tightness'},
 1566         ']' => $rOpts->{'square-bracket-vertical-tightness'},
 1567     );
 1568 
 1569     %closing_vertical_tightness = (
 1570         '(' => $rOpts->{'paren-vertical-tightness-closing'},
 1571         '{' => $rOpts->{'brace-vertical-tightness-closing'},
 1572         '[' => $rOpts->{'square-bracket-vertical-tightness-closing'},
 1573         ')' => $rOpts->{'paren-vertical-tightness-closing'},
 1574         '}' => $rOpts->{'brace-vertical-tightness-closing'},
 1575         ']' => $rOpts->{'square-bracket-vertical-tightness-closing'},
 1576     );
 1577 
 1578     # assume flag for '>' same as ')' for closing qw quotes
 1579     %closing_token_indentation = (
 1580         ')' => $rOpts->{'closing-paren-indentation'},
 1581         '}' => $rOpts->{'closing-brace-indentation'},
 1582         ']' => $rOpts->{'closing-square-bracket-indentation'},
 1583         '>' => $rOpts->{'closing-paren-indentation'},
 1584     );
 1585 
 1586     # flag indicating if any closing tokens are indented
 1587     $some_closing_token_indentation =
 1588          $rOpts->{'closing-paren-indentation'}
 1589       || $rOpts->{'closing-brace-indentation'}
 1590       || $rOpts->{'closing-square-bracket-indentation'}
 1591       || $rOpts->{'indent-closing-brace'};
 1592 
 1593     %opening_token_right = (
 1594         '(' => $rOpts->{'opening-paren-right'},
 1595         '{' => $rOpts->{'opening-hash-brace-right'},
 1596         '[' => $rOpts->{'opening-square-bracket-right'},
 1597     );
 1598 
 1599     %stack_opening_token = (
 1600         '(' => $rOpts->{'stack-opening-paren'},
 1601         '{' => $rOpts->{'stack-opening-hash-brace'},
 1602         '[' => $rOpts->{'stack-opening-square-bracket'},
 1603     );
 1604 
 1605     %stack_closing_token = (
 1606         ')' => $rOpts->{'stack-closing-paren'},
 1607         '}' => $rOpts->{'stack-closing-hash-brace'},
 1608         ']' => $rOpts->{'stack-closing-square-bracket'},
 1609     );
 1610 
 1611     # Create a table of maximum line length vs level for later efficient use.
 1612     # We will make the tables very long to be sure it will not be exceeded.
 1613     # But we have to choose a fixed length.  A check will be made at the start
 1614     # of sub 'finish_formatting' to be sure it is not exceeded.  Note, some of
 1615     # my standard test problems have indentation levels of about 150, so this
 1616     # should be fairly large.  If the choice of a maximum level ever becomes
 1617     # an issue then these table values could be returned in a sub with a simple
 1618     # memoization scheme.
 1619 
 1620     # Also create a table of the maximum spaces available for text due to the
 1621     # level only.  If a line has continuation indentation, then that space must
 1622     # be subtracted from the table value.  This table is used for preliminary
 1623     # estimates in welding, extended_ci, BBX, and marking short blocks.
 1624     my $level_max = 1000;
 1625 
 1626     # The basic scheme:
 1627     foreach my $level ( 0 .. $level_max ) {
 1628         my $indent = $level * $rOpts_indent_columns;
 1629         $maximum_line_length_at_level[$level] = $rOpts_maximum_line_length;
 1630         $maximum_text_length_at_level[$level] =
 1631           $rOpts_maximum_line_length - $indent;
 1632     }
 1633 
 1634     # Correct the maximum_text_length table if the -wc=n flag is used
 1635     $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'};
 1636     if ($rOpts_whitespace_cycle) {
 1637         if ( $rOpts_whitespace_cycle > 0 ) {
 1638             foreach my $level ( 0 .. $level_max ) {
 1639                 my $level_mod = $level % $rOpts_whitespace_cycle;
 1640                 my $indent    = $level_mod * $rOpts_indent_columns;
 1641                 $maximum_text_length_at_level[$level] =
 1642                   $rOpts_maximum_line_length - $indent;
 1643             }
 1644         }
 1645         else {
 1646             $rOpts_whitespace_cycle = $rOpts->{'whitespace-cycle'} = 0;
 1647         }
 1648     }
 1649 
 1650     # Correct the tables if the -vmll flag is used.  These values override the
 1651     # previous values.
 1652     if ($rOpts_variable_maximum_line_length) {
 1653         foreach my $level ( 0 .. $level_max ) {
 1654             $maximum_text_length_at_level[$level] = $rOpts_maximum_line_length;
 1655             $maximum_line_length_at_level[$level] =
 1656               $rOpts_maximum_line_length + $level * $rOpts_indent_columns;
 1657         }
 1658     }
 1659 
 1660     initialize_weld_nested_exclusion_rules($rOpts);
 1661     initialize_line_up_parentheses_exclusion_rules($rOpts);
 1662     return;
 1663 }
 1664 
 1665 sub initialize_weld_nested_exclusion_rules {
 1666     my ($rOpts) = @_;
 1667     %weld_nested_exclusion_rules = ();
 1668 
 1669     my $opt_name = 'weld-nested-exclusion-list';
 1670     my $str      = $rOpts->{$opt_name};
 1671     return unless ($str);
 1672     $str =~ s/^\s+//;
 1673     $str =~ s/\s+$//;
 1674     return unless ($str);
 1675 
 1676     # There are four container tokens.
 1677     my %token_keys = (
 1678         '(' => '(',
 1679         '[' => '[',
 1680         '{' => '{',
 1681         'q' => 'q',
 1682     );
 1683 
 1684     # We are parsing an exclusion list for nested welds. The list is a string
 1685     # with spaces separating any number of items.  Each item consists of three
 1686     # pieces of information:
 1687     # <optional position> <optional type> <type of container>
 1688     # <     ^ or .      > <    k or K   > <     ( [ {       >
 1689 
 1690     # The last character is the required container type and must be one of:
 1691     # ( = paren
 1692     # [ = square bracket
 1693     # { = brace
 1694 
 1695     # An optional leading position indicator:
 1696     # ^ means the leading token position in the weld
 1697     # . means a secondary token position in the weld
 1698     #   no position indicator means all positions match
 1699 
 1700     # An optional alphanumeric character between the position and container
 1701     # token selects to which the rule applies:
 1702     # k = any keyword
 1703     # K = any non-keyword
 1704     # f = function call
 1705     # F = not a function call
 1706     # w = function or keyword
 1707     # W = not a function or keyword
 1708     #     no letter means any preceding type matches
 1709 
 1710     # Examples:
 1711     # ^(  - the weld must not start with a paren
 1712     # .(  - the second and later tokens may not be parens
 1713     # (   - no parens in weld
 1714     # ^K(  - exclude a leading paren not preceded by a keyword
 1715     # .k(  - exclude a secondary paren preceded by a keyword
 1716     # [ {  - exclude all brackets and braces
 1717 
 1718     my @items = split /\s+/, $str;
 1719     my $msg1;
 1720     my $msg2;
 1721     foreach my $item (@items) {
 1722         my $item_save = $item;
 1723         my $tok       = chop($item);
 1724         my $key       = $token_keys{$tok};
 1725         if ( !defined($key) ) {
 1726             $msg1 .= " '$item_save'";
 1727             next;
 1728         }
 1729         if ( !defined( $weld_nested_exclusion_rules{$key} ) ) {
 1730             $weld_nested_exclusion_rules{$key} = [];
 1731         }
 1732         my $rflags = $weld_nested_exclusion_rules{$key};
 1733 
 1734         # A 'q' means do not weld quotes
 1735         if ( $tok eq 'q' ) {
 1736             $rflags->[0] = '*';
 1737             $rflags->[1] = '*';
 1738             next;
 1739         }
 1740 
 1741         my $pos    = '*';
 1742         my $select = '*';
 1743         if ($item) {
 1744             if ( $item =~ /^([\^\.])?([kKfFwW])?$/ ) {
 1745                 $pos    = $1 if ($1);
 1746                 $select = $2 if ($2);
 1747             }
 1748             else {
 1749                 $msg1 .= " '$item_save'";
 1750                 next;
 1751             }
 1752         }
 1753 
 1754         my $err;
 1755         if ( $pos eq '^' || $pos eq '*' ) {
 1756             if ( defined( $rflags->[0] ) && $rflags->[0] ne $select ) {
 1757                 $err = 1;
 1758             }
 1759             $rflags->[0] = $select;
 1760         }
 1761         if ( $pos eq '.' || $pos eq '*' ) {
 1762             if ( defined( $rflags->[1] ) && $rflags->[1] ne $select ) {
 1763                 $err = 1;
 1764             }
 1765             $rflags->[1] = $select;
 1766         }
 1767         if ($err) { $msg2 .= " '$item_save'"; }
 1768     }
 1769     if ($msg1) {
 1770         Warn(<<EOM);
 1771 Unexpecting symbol(s) encountered in --$opt_name will be ignored:
 1772 $msg1
 1773 EOM
 1774     }
 1775     if ($msg2) {
 1776         Warn(<<EOM);
 1777 Multiple specifications were encountered in the --weld-nested-exclusion-list for:
 1778 $msg2
 1779 Only the last will be used.
 1780 EOM
 1781     }
 1782     return;
 1783 }
 1784 
 1785 sub initialize_line_up_parentheses_exclusion_rules {
 1786     my ($rOpts) = @_;
 1787     %line_up_parentheses_exclusion_rules = ();
 1788     my $opt_name = 'line-up-parentheses-exclusion-list';
 1789     my $str      = $rOpts->{$opt_name};
 1790     return unless ($str);
 1791     $str =~ s/^\s+//;
 1792     $str =~ s/\s+$//;
 1793     return unless ($str);
 1794 
 1795     # The format is space separated items, where each item must consist of a
 1796     # string with a token type preceded by an optional text token and followed
 1797     # by an integer:
 1798     # For example:
 1799     #    W(1
 1800     #  = (flag1)(key)(flag2), where
 1801     #    flag1 = 'W'
 1802     #    key = '('
 1803     #    flag2 = '1'
 1804 
 1805     my @items = split /\s+/, $str;
 1806     my $msg1;
 1807     my $msg2;
 1808     foreach my $item (@items) {
 1809         my $item_save = $item;
 1810         my ( $flag1, $key, $flag2 );
 1811         if ( $item =~ /^([^\(\]\{]*)?([\(\{\[])(\d)?$/ ) {
 1812             $flag1 = $1 if $1;
 1813             $key   = $2 if $2;
 1814             $flag2 = $3 if $3;
 1815         }
 1816         else {
 1817             $msg1 .= " '$item_save'";
 1818             next;
 1819         }
 1820 
 1821         if ( !defined($key) ) {
 1822             $msg1 .= " '$item_save'";
 1823             next;
 1824         }
 1825 
 1826         # Check for valid flag1
 1827         if    ( !defined($flag1) ) { $flag1 = '*' }
 1828         elsif ( $flag1 !~ /^[kKfFwW\*]$/ ) {
 1829             $msg1 .= " '$item_save'";
 1830             next;
 1831         }
 1832 
 1833         # Check for valid flag2
 1834         # 0 or blank: ignore container contents
 1835         # 1 all containers with sublists match
 1836         # 2 all containers with sublists, code blocks or ternary operators match
 1837         # ... this could be extended in the future
 1838         if    ( !defined($flag2) ) { $flag2 = 0 }
 1839         elsif ( $flag2 !~ /^[012]$/ ) {
 1840             $msg1 .= " '$item_save'";
 1841             next;
 1842         }
 1843 
 1844         if ( !defined( $line_up_parentheses_exclusion_rules{$key} ) ) {
 1845             $line_up_parentheses_exclusion_rules{$key} = [ $flag1, $flag2 ];
 1846             next;
 1847         }
 1848 
 1849         # check for multiple conflicting specifications
 1850         my $rflags = $line_up_parentheses_exclusion_rules{$key};
 1851         my $err;
 1852         if ( defined( $rflags->[0] ) && $rflags->[0] ne $flag1 ) {
 1853             $err = 1;
 1854             $rflags->[0] = $flag1;
 1855         }
 1856         if ( defined( $rflags->[1] ) && $rflags->[1] ne $flag2 ) {
 1857             $err = 1;
 1858             $rflags->[1] = $flag2;
 1859         }
 1860         $msg2 .= " '$item_save'" if ($err);
 1861         next;
 1862     }
 1863     if ($msg1) {
 1864         Warn(<<EOM);
 1865 Unexpecting symbol(s) encountered in --$opt_name will be ignored:
 1866 $msg1
 1867 EOM
 1868     }
 1869     if ($msg2) {
 1870         Warn(<<EOM);
 1871 Multiple specifications were encountered in the $opt_name at:
 1872 $msg2
 1873 Only the last will be used.
 1874 EOM
 1875     }
 1876 
 1877     # Possible speedup: we could turn off -lp if it is not actually used
 1878     my $all_off = 1;
 1879     foreach my $key (qw# ( { [ #) {
 1880         my $rflags = $line_up_parentheses_exclusion_rules{$key};
 1881         if ( defined($rflags) ) {
 1882             my ( $flag1, $flag2 ) = @{$rflags};
 1883             if ( $flag1 && $flag1 ne '*' ) { $all_off = 0; last }
 1884             if ($flag2)                    { $all_off = 0; last }
 1885         }
 1886     }
 1887     if ($all_off) {
 1888 
 1889         # FIXME: This speedup works but is currently deactivated because at
 1890         # present users of -lp could see some discontinuities in formatting,
 1891         # such as those involving the choice of breaks at '='.  Only if/when
 1892         # these issues have been checked and resolved it should be reactivated
 1893         # as a speedup.
 1894         ## $rOpts->{'line-up-parentheses'} = "";
 1895     }
 1896 
 1897     return;
 1898 }
 1899 
 1900 sub initialize_whitespace_hashes {
 1901 
 1902     # This is called once before formatting begins to initialize these global
 1903     # hashes, which control the use of whitespace around tokens:
 1904     #
 1905     # %binary_ws_rules
 1906     # %want_left_space
 1907     # %want_right_space
 1908     # %space_after_keyword
 1909     #
 1910     # Many token types are identical to the tokens themselves.
 1911     # See the tokenizer for a complete list. Here are some special types:
 1912     #   k = perl keyword
 1913     #   f = semicolon in for statement
 1914     #   m = unary minus
 1915     #   p = unary plus
 1916     # Note that :: is excluded since it should be contained in an identifier
 1917     # Note that '->' is excluded because it never gets space
 1918     # parentheses and brackets are excluded since they are handled specially
 1919     # curly braces are included but may be overridden by logic, such as
 1920     # newline logic.
 1921 
 1922     # NEW_TOKENS: create a whitespace rule here.  This can be as
 1923     # simple as adding your new letter to @spaces_both_sides, for
 1924     # example.
 1925 
 1926     my @opening_type = qw< L { ( [ >;
 1927     @is_opening_type{@opening_type} = (1) x scalar(@opening_type);
 1928 
 1929     my @closing_type = qw< R } ) ] >;
 1930     @is_closing_type{@closing_type} = (1) x scalar(@closing_type);
 1931 
 1932     my @spaces_both_sides = qw#
 1933       + - * / % ? = . : x < > | & ^ .. << >> ** && .. || // => += -=
 1934       .= %= x= &= |= ^= *= <> <= >= == =~ !~ /= != ... <<= >>= ~~ !~~
 1935       &&= ||= //= <=> A k f w F n C Y U G v
 1936       #;
 1937 
 1938     my @spaces_left_side = qw<
 1939       t ! ~ m p { \ h pp mm Z j
 1940     >;
 1941     push( @spaces_left_side, '#' );    # avoids warning message
 1942 
 1943     my @spaces_right_side = qw<
 1944       ; } ) ] R J ++ -- **=
 1945     >;
 1946     push( @spaces_right_side, ',' );    # avoids warning message
 1947 
 1948     %want_left_space  = ();
 1949     %want_right_space = ();
 1950     %binary_ws_rules  = ();
 1951 
 1952     # Note that we setting defaults here.  Later in processing
 1953     # the values of %want_left_space and  %want_right_space
 1954     # may be overridden by any user settings specified by the
 1955     # -wls and -wrs parameters.  However the binary_whitespace_rules
 1956     # are hardwired and have priority.
 1957     @want_left_space{@spaces_both_sides} =
 1958       (1) x scalar(@spaces_both_sides);
 1959     @want_right_space{@spaces_both_sides} =
 1960       (1) x scalar(@spaces_both_sides);
 1961     @want_left_space{@spaces_left_side} =
 1962       (1) x scalar(@spaces_left_side);
 1963     @want_right_space{@spaces_left_side} =
 1964       (-1) x scalar(@spaces_left_side);
 1965     @want_left_space{@spaces_right_side} =
 1966       (-1) x scalar(@spaces_right_side);
 1967     @want_right_space{@spaces_right_side} =
 1968       (1) x scalar(@spaces_right_side);
 1969     $want_left_space{'->'}      = WS_NO;
 1970     $want_right_space{'->'}     = WS_NO;
 1971     $want_left_space{'**'}      = WS_NO;
 1972     $want_right_space{'**'}     = WS_NO;
 1973     $want_right_space{'CORE::'} = WS_NO;
 1974 
 1975     # These binary_ws_rules are hardwired and have priority over the above
 1976     # settings.  It would be nice to allow adjustment by the user,
 1977     # but it would be complicated to specify.
 1978     #
 1979     # hash type information must stay tightly bound
 1980     # as in :  ${xxxx}
 1981     $binary_ws_rules{'i'}{'L'} = WS_NO;
 1982     $binary_ws_rules{'i'}{'{'} = WS_YES;
 1983     $binary_ws_rules{'k'}{'{'} = WS_YES;
 1984     $binary_ws_rules{'U'}{'{'} = WS_YES;
 1985     $binary_ws_rules{'i'}{'['} = WS_NO;
 1986     $binary_ws_rules{'R'}{'L'} = WS_NO;
 1987     $binary_ws_rules{'R'}{'{'} = WS_NO;
 1988     $binary_ws_rules{'t'}{'L'} = WS_NO;
 1989     $binary_ws_rules{'t'}{'{'} = WS_NO;
 1990     $binary_ws_rules{'t'}{'='} = WS_OPTIONAL;    # for signatures; fixes b1123
 1991     $binary_ws_rules{'}'}{'L'} = WS_NO;
 1992     $binary_ws_rules{'}'}{'{'} = WS_OPTIONAL;    # RT#129850; was WS_NO
 1993     $binary_ws_rules{'$'}{'L'} = WS_NO;
 1994     $binary_ws_rules{'$'}{'{'} = WS_NO;
 1995     $binary_ws_rules{'@'}{'L'} = WS_NO;
 1996     $binary_ws_rules{'@'}{'{'} = WS_NO;
 1997     $binary_ws_rules{'='}{'L'} = WS_YES;
 1998     $binary_ws_rules{'J'}{'J'} = WS_YES;
 1999 
 2000     # the following includes ') {'
 2001     # as in :    if ( xxx ) { yyy }
 2002     $binary_ws_rules{']'}{'L'} = WS_NO;
 2003     $binary_ws_rules{']'}{'{'} = WS_NO;
 2004     $binary_ws_rules{')'}{'{'} = WS_YES;
 2005     $binary_ws_rules{')'}{'['} = WS_NO;
 2006     $binary_ws_rules{']'}{'['} = WS_NO;
 2007     $binary_ws_rules{']'}{'{'} = WS_NO;
 2008     $binary_ws_rules{'}'}{'['} = WS_NO;
 2009     $binary_ws_rules{'R'}{'['} = WS_NO;
 2010 
 2011     $binary_ws_rules{']'}{'++'} = WS_NO;
 2012     $binary_ws_rules{']'}{'--'} = WS_NO;
 2013     $binary_ws_rules{')'}{'++'} = WS_NO;
 2014     $binary_ws_rules{')'}{'--'} = WS_NO;
 2015 
 2016     $binary_ws_rules{'R'}{'++'} = WS_NO;
 2017     $binary_ws_rules{'R'}{'--'} = WS_NO;
 2018 
 2019     $binary_ws_rules{'i'}{'Q'} = WS_YES;
 2020     $binary_ws_rules{'n'}{'('} = WS_YES;    # occurs in 'use package n ()'
 2021 
 2022     $binary_ws_rules{'i'}{'('} = WS_NO;
 2023 
 2024     $binary_ws_rules{'w'}{'('} = WS_NO;
 2025     $binary_ws_rules{'w'}{'{'} = WS_YES;
 2026     return;
 2027 
 2028 } ## end initialize_whitespace_hashes
 2029 
 2030 sub set_whitespace_flags {
 2031 
 2032     # This routine is called once per file to set whitespace flags for that
 2033     # file.  This routine examines each pair of nonblank tokens and sets a flag
 2034     # indicating if white space is needed.
 2035     #
 2036     # $rwhitespace_flags->[$j] is a flag indicating whether a white space
 2037     # BEFORE token $j is needed, with the following values:
 2038     #
 2039     #             WS_NO      = -1 do not want a space BEFORE token $j
 2040     #             WS_OPTIONAL=  0 optional space or $j is a whitespace
 2041     #             WS_YES     =  1 want a space BEFORE token $j
 2042     #
 2043 
 2044     my $self = shift;
 2045     my $rLL  = $self->[_rLL_];
 2046     use constant DEBUG_WHITE => 0;
 2047 
 2048     my $rOpts_space_keyword_paren   = $rOpts->{'space-keyword-paren'};
 2049     my $rOpts_space_backslash_quote = $rOpts->{'space-backslash-quote'};
 2050     my $rOpts_space_function_paren  = $rOpts->{'space-function-paren'};
 2051 
 2052     my $rwhitespace_flags       = [];
 2053     my $ris_function_call_paren = {};
 2054 
 2055     my %is_for_foreach = ( 'for' => 1, 'foreach' => 1 );
 2056 
 2057     my ( $token, $type, $block_type, $seqno, $input_line_no );
 2058     my (
 2059         $last_token, $last_type, $last_block_type,
 2060         $last_seqno, $last_input_line_no
 2061     );
 2062 
 2063     my $j_tight_closing_paren = -1;
 2064 
 2065     $token              = ' ';
 2066     $type               = 'b';
 2067     $block_type         = '';
 2068     $seqno              = '';
 2069     $input_line_no      = 0;
 2070     $last_token         = ' ';
 2071     $last_type          = 'b';
 2072     $last_block_type    = '';
 2073     $last_seqno         = '';
 2074     $last_input_line_no = 0;
 2075 
 2076     my $jmax = @{$rLL} - 1;
 2077 
 2078     my ($ws);
 2079 
 2080     # This is some logic moved to a sub to avoid deep nesting of if stmts
 2081     my $ws_in_container = sub {
 2082 
 2083         my ($j) = @_;
 2084         my $ws = WS_YES;
 2085         if ( $j + 1 > $jmax ) { return (WS_NO) }
 2086 
 2087         # Patch to count '-foo' as single token so that
 2088         # each of  $a{-foo} and $a{foo} and $a{'foo'} do
 2089         # not get spaces with default formatting.
 2090         my $j_here = $j;
 2091         ++$j_here
 2092           if ( $token eq '-'
 2093             && $last_token eq '{'
 2094             && $rLL->[ $j + 1 ]->[_TYPE_] eq 'w' );
 2095 
 2096         # Patch to count a sign separated from a number as a single token, as
 2097         # in the following line. Otherwise, it takes two steps to converge:
 2098         #    deg2rad(-  0.5)
 2099         if (   ( $type eq 'm' || $type eq 'p' )
 2100             && $j < $jmax + 1
 2101             && $rLL->[ $j + 1 ]->[_TYPE_] eq 'b'
 2102             && $rLL->[ $j + 2 ]->[_TYPE_] eq 'n'
 2103             && $rLL->[ $j + 2 ]->[_TOKEN_] =~ /^\d/ )
 2104         {
 2105             $j_here = $j + 2;
 2106         }
 2107 
 2108         # $j_next is where a closing token should be if
 2109         # the container has a single token
 2110         if ( $j_here + 1 > $jmax ) { return (WS_NO) }
 2111         my $j_next =
 2112           ( $rLL->[ $j_here + 1 ]->[_TYPE_] eq 'b' )
 2113           ? $j_here + 2
 2114           : $j_here + 1;
 2115 
 2116         if ( $j_next > $jmax ) { return WS_NO }
 2117         my $tok_next  = $rLL->[$j_next]->[_TOKEN_];
 2118         my $type_next = $rLL->[$j_next]->[_TYPE_];
 2119 
 2120         # for tightness = 1, if there is just one token
 2121         # within the matching pair, we will keep it tight
 2122         if (
 2123             $tok_next eq $matching_token{$last_token}
 2124 
 2125             # but watch out for this: [ [ ]    (misc.t)
 2126             && $last_token ne $token
 2127 
 2128             # double diamond is usually spaced
 2129             && $token ne '<<>>'
 2130 
 2131           )
 2132         {
 2133 
 2134             # remember where to put the space for the closing paren
 2135             $j_tight_closing_paren = $j_next;
 2136             return (WS_NO);
 2137         }
 2138         return (WS_YES);
 2139     };
 2140 
 2141     # Local hashes to set spaces around container tokens according to their
 2142     # sequence numbers.  These are set as keywords are examined.
 2143     # They are controlled by the -kpit and -kpitl flags.
 2144     my %opening_container_inside_ws;
 2145     my %closing_container_inside_ws;
 2146     my $set_container_ws_by_keyword = sub {
 2147 
 2148         return unless (%keyword_paren_inner_tightness);
 2149 
 2150         my ( $word, $sequence_number ) = @_;
 2151 
 2152         # We just saw a keyword (or other function name) followed by an opening
 2153         # paren. Now check to see if the following paren should have special
 2154         # treatment for its inside space.  If so we set a hash value using the
 2155         # sequence number as key.
 2156         if ( $word && $sequence_number ) {
 2157             my $tightness = $keyword_paren_inner_tightness{$word};
 2158             if ( defined($tightness) && $tightness != 1 ) {
 2159                 my $ws_flag = $tightness == 0 ? WS_YES : WS_NO;
 2160                 $opening_container_inside_ws{$sequence_number} = $ws_flag;
 2161                 $closing_container_inside_ws{$sequence_number} = $ws_flag;
 2162             }
 2163         }
 2164     };
 2165 
 2166     my $ws_opening_container_override = sub {
 2167         my ( $ws, $sequence_number ) = @_;
 2168         return $ws unless (%opening_container_inside_ws);
 2169         if ($sequence_number) {
 2170             my $ws_override = $opening_container_inside_ws{$sequence_number};
 2171             if ($ws_override) { $ws = $ws_override }
 2172         }
 2173         return $ws;
 2174     };
 2175 
 2176     my $ws_closing_container_override = sub {
 2177         my ( $ws, $sequence_number ) = @_;
 2178         return $ws unless (%closing_container_inside_ws);
 2179         if ($sequence_number) {
 2180             my $ws_override = $closing_container_inside_ws{$sequence_number};
 2181             if ($ws_override) { $ws = $ws_override }
 2182         }
 2183         return $ws;
 2184     };
 2185 
 2186     # main loop over all tokens to define the whitespace flags
 2187     for ( my $j = 0 ; $j <= $jmax ; $j++ ) {
 2188 
 2189         my $rtokh = $rLL->[$j];
 2190 
 2191         # Set a default
 2192         $rwhitespace_flags->[$j] = WS_OPTIONAL;
 2193 
 2194         if ( $rtokh->[_TYPE_] eq 'b' ) {
 2195             next;
 2196         }
 2197 
 2198         # set a default value, to be changed as needed
 2199         $ws                 = undef;
 2200         $last_token         = $token;
 2201         $last_type          = $type;
 2202         $last_block_type    = $block_type;
 2203         $last_seqno         = $seqno;
 2204         $last_input_line_no = $input_line_no;
 2205         $token              = $rtokh->[_TOKEN_];
 2206         $type               = $rtokh->[_TYPE_];
 2207         $block_type         = $rtokh->[_BLOCK_TYPE_];
 2208         $seqno              = $rtokh->[_TYPE_SEQUENCE_];
 2209         $input_line_no      = $rtokh->[_LINE_INDEX_];
 2210 
 2211         #---------------------------------------------------------------
 2212         # Whitespace Rules Section 1:
 2213         # Handle space on the inside of opening braces.
 2214         #---------------------------------------------------------------
 2215 
 2216         #    /^[L\{\(\[]$/
 2217         if ( $is_opening_type{$last_type} ) {
 2218 
 2219             $j_tight_closing_paren = -1;
 2220 
 2221             # let us keep empty matched braces together: () {} []
 2222             # except for BLOCKS
 2223             if ( $token eq $matching_token{$last_token} ) {
 2224                 if ($block_type) {
 2225                     $ws = WS_YES;
 2226                 }
 2227                 else {
 2228                     $ws = WS_NO;
 2229                 }
 2230             }
 2231             else {
 2232 
 2233                 # we're considering the right of an opening brace
 2234                 # tightness = 0 means always pad inside with space
 2235                 # tightness = 1 means pad inside if "complex"
 2236                 # tightness = 2 means never pad inside with space
 2237 
 2238                 my $tightness;
 2239                 if (   $last_type eq '{'
 2240                     && $last_token eq '{'
 2241                     && $last_block_type )
 2242                 {
 2243                     $tightness = $rOpts_block_brace_tightness;
 2244                 }
 2245                 else { $tightness = $tightness{$last_token} }
 2246 
 2247                #=============================================================
 2248                # Patch for test problem <<snippets/fabrice_bug.in>>
 2249                # We must always avoid spaces around a bare word beginning
 2250                # with ^ as in:
 2251                #    my $before = ${^PREMATCH};
 2252                # Because all of the following cause an error in perl:
 2253                #    my $before = ${ ^PREMATCH };
 2254                #    my $before = ${ ^PREMATCH};
 2255                #    my $before = ${^PREMATCH };
 2256                # So if brace tightness flag is -bt=0 we must temporarily reset
 2257                # to bt=1.  Note that here we must set tightness=1 and not 2 so
 2258                # that the closing space
 2259                # is also avoided (via the $j_tight_closing_paren flag in coding)
 2260                 if ( $type eq 'w' && $token =~ /^\^/ ) { $tightness = 1 }
 2261 
 2262                 #=============================================================
 2263 
 2264                 if ( $tightness <= 0 ) {
 2265                     $ws = WS_YES;
 2266                 }
 2267                 elsif ( $tightness > 1 ) {
 2268                     $ws = WS_NO;
 2269                 }
 2270                 else {
 2271                     $ws = $ws_in_container->($j);
 2272                 }
 2273             }
 2274 
 2275             # check for special cases which override the above rules
 2276             $ws = $ws_opening_container_override->( $ws, $last_seqno );
 2277 
 2278         }    # end setting space flag inside opening tokens
 2279         my $ws_1;
 2280         $ws_1 = $ws
 2281           if DEBUG_WHITE;
 2282 
 2283         #---------------------------------------------------------------
 2284         # Whitespace Rules Section 2:
 2285         # Handle space on inside of closing brace pairs.
 2286         #---------------------------------------------------------------
 2287 
 2288         #   /[\}\)\]R]/
 2289         if ( $is_closing_type{$type} ) {
 2290 
 2291             if ( $j == $j_tight_closing_paren ) {
 2292 
 2293                 $j_tight_closing_paren = -1;
 2294                 $ws                    = WS_NO;
 2295             }
 2296             else {
 2297 
 2298                 if ( !defined($ws) ) {
 2299 
 2300                     my $tightness;
 2301                     if ( $type eq '}' && $token eq '}' && $block_type ) {
 2302                         $tightness = $rOpts_block_brace_tightness;
 2303                     }
 2304                     else { $tightness = $tightness{$token} }
 2305 
 2306                     $ws = ( $tightness > 1 ) ? WS_NO : WS_YES;
 2307                 }
 2308             }
 2309 
 2310             # check for special cases which override the above rules
 2311             $ws = $ws_closing_container_override->( $ws, $seqno );
 2312 
 2313         }    # end setting space flag inside closing tokens
 2314 
 2315         my $ws_2;
 2316         $ws_2 = $ws
 2317           if DEBUG_WHITE;
 2318 
 2319         #---------------------------------------------------------------
 2320         # Whitespace Rules Section 3:
 2321         # Use the binary rule table.
 2322         #---------------------------------------------------------------
 2323         if ( !defined($ws) ) {
 2324             $ws = $binary_ws_rules{$last_type}{$type};
 2325         }
 2326         my $ws_3;
 2327         $ws_3 = $ws
 2328           if DEBUG_WHITE;
 2329 
 2330         #---------------------------------------------------------------
 2331         # Whitespace Rules Section 4:
 2332         # Handle some special cases.
 2333         #---------------------------------------------------------------
 2334         if ( $token eq '(' ) {
 2335 
 2336             # This will have to be tweaked as tokenization changes.
 2337             # We usually want a space at '} (', for example:
 2338             # <<snippets/space1.in>>
 2339             #     map { 1 * $_; } ( $y, $M, $w, $d, $h, $m, $s );
 2340             #
 2341             # But not others:
 2342             #     &{ $_->[1] }( delete $_[$#_]{ $_->[0] } );
 2343             # At present, the above & block is marked as type L/R so this case
 2344             # won't go through here.
 2345             if ( $last_type eq '}' && $last_token ne ')' ) { $ws = WS_YES }
 2346 
 2347             # NOTE: some older versions of Perl had occasional problems if
 2348             # spaces are introduced between keywords or functions and opening
 2349             # parens.  So the default is not to do this except is certain
 2350             # cases.  The current Perl seems to tolerate spaces.
 2351 
 2352             # Space between keyword and '('
 2353             elsif ( $last_type eq 'k' ) {
 2354                 $ws = WS_NO
 2355                   unless ( $rOpts_space_keyword_paren
 2356                     || $space_after_keyword{$last_token} );
 2357 
 2358                 # Set inside space flag if requested
 2359                 $set_container_ws_by_keyword->( $last_token, $seqno );
 2360             }
 2361 
 2362             # Space between function and '('
 2363             # -----------------------------------------------------
 2364             # 'w' and 'i' checks for something like:
 2365             #   myfun(    &myfun(   ->myfun(
 2366             # -----------------------------------------------------
 2367 
 2368             # Note that at this point an identifier may still have a leading
 2369             # arrow, but the arrow will be split off during token respacing.
 2370             # After that, the token may become a bare word without leading
 2371             # arrow.  The point is, it is best to mark function call parens
 2372             # right here before that happens.
 2373             # Patch: added 'C' to prevent blinker, case b934, i.e. 'pi()'
 2374             # NOTE: this would be the place to allow spaces between repeated
 2375             # parens, like () () (), as in case c017, but I decided that would
 2376             # not be a good idea.
 2377             elsif (( $last_type =~ /^[wCUG]$/ )
 2378                 || ( $last_type =~ /^[wi]$/ && $last_token =~ /^([\&]|->)/ ) )
 2379             {
 2380                 $ws = $rOpts_space_function_paren ? WS_YES : WS_NO;
 2381                 $set_container_ws_by_keyword->( $last_token, $seqno );
 2382                 $ris_function_call_paren->{$seqno} = 1;
 2383             }
 2384 
 2385             # space between something like $i and ( in <<snippets/space2.in>>
 2386             # for $i ( 0 .. 20 ) {
 2387             # FIXME: eventually, type 'i' could be split into multiple
 2388             # token types so this can be a hardwired rule.
 2389             elsif ( $last_type eq 'i' && $last_token =~ /^[\$\%\@]/ ) {
 2390                 $ws = WS_YES;
 2391             }
 2392 
 2393             # allow constant function followed by '()' to retain no space
 2394             elsif ($last_type eq 'C'
 2395                 && $rLL->[ $j + 1 ]->[_TOKEN_] eq ')' )
 2396             {
 2397                 $ws = WS_NO;
 2398             }
 2399         }
 2400 
 2401         # patch for SWITCH/CASE: make space at ']{' optional
 2402         # since the '{' might begin a case or when block
 2403         elsif ( ( $token eq '{' && $type ne 'L' ) && $last_token eq ']' ) {
 2404             $ws = WS_OPTIONAL;
 2405         }
 2406 
 2407         # keep space between 'sub' and '{' for anonymous sub definition
 2408         if ( $type eq '{' ) {
 2409             if ( $last_token eq 'sub' ) {
 2410                 $ws = WS_YES;
 2411             }
 2412 
 2413             # this is needed to avoid no space in '){'
 2414             if ( $last_token eq ')' && $token eq '{' ) { $ws = WS_YES }
 2415 
 2416             # avoid any space before the brace or bracket in something like
 2417             #  @opts{'a','b',...}
 2418             if ( $last_type eq 'i' && $last_token =~ /^\@/ ) {
 2419                 $ws = WS_NO;
 2420             }
 2421         }
 2422 
 2423         elsif ( $type eq 'i' ) {
 2424 
 2425             # never a space before ->
 2426             if ( substr( $token, 0, 2 ) eq '->' ) {
 2427                 $ws = WS_NO;
 2428             }
 2429         }
 2430 
 2431         # retain any space between '-' and bare word
 2432         elsif ( $type eq 'w' || $type eq 'C' ) {
 2433             $ws = WS_OPTIONAL if $last_type eq '-';
 2434 
 2435             # never a space before ->
 2436             if ( substr( $token, 0, 2 ) eq '->' ) {
 2437                 $ws = WS_NO;
 2438             }
 2439         }
 2440 
 2441         # retain any space between '-' and bare word; for example
 2442         # avoid space between 'USER' and '-' here: <<snippets/space2.in>>
 2443         #   $myhash{USER-NAME}='steve';
 2444         elsif ( $type eq 'm' || $type eq '-' ) {
 2445             $ws = WS_OPTIONAL if ( $last_type eq 'w' );
 2446         }
 2447 
 2448         # always space before side comment
 2449         elsif ( $type eq '#' ) { $ws = WS_YES if $j > 0 }
 2450 
 2451         # always preserver whatever space was used after a possible
 2452         # filehandle (except _) or here doc operator
 2453         if (
 2454             $type ne '#'
 2455             && ( ( $last_type eq 'Z' && $last_token ne '_' )
 2456                 || $last_type eq 'h' )
 2457           )
 2458         {
 2459             $ws = WS_OPTIONAL;
 2460         }
 2461 
 2462         # space_backslash_quote; RT #123774  <<snippets/rt123774.in>>
 2463         # allow a space between a backslash and single or double quote
 2464         # to avoid fooling html formatters
 2465         elsif ( $last_type eq '\\' && $type eq 'Q' && $token =~ /^[\"\']/ ) {
 2466             if ($rOpts_space_backslash_quote) {
 2467                 if ( $rOpts_space_backslash_quote == 1 ) {
 2468                     $ws = WS_OPTIONAL;
 2469                 }
 2470                 elsif ( $rOpts_space_backslash_quote == 2 ) { $ws = WS_YES }
 2471                 else { }    # shouldnt happen
 2472             }
 2473             else {
 2474                 $ws = WS_NO;
 2475             }
 2476         }
 2477         elsif ( $type eq 'k' ) {
 2478 
 2479             # Keywords 'for', 'foreach' are special cases for -kpit since the
 2480             # opening paren does not always immediately follow the keyword. So
 2481             # we have to search forward for the paren in this case.  I have
 2482             # limited the search to 10 tokens ahead, just in case somebody
 2483             # has a big file and no opening paren.  This should be enough for
 2484             # all normal code.
 2485             if (   $is_for_foreach{$token}
 2486                 && %keyword_paren_inner_tightness
 2487                 && defined( $keyword_paren_inner_tightness{$token} )
 2488                 && $j < $jmax )
 2489             {
 2490                 my $jp = $j;
 2491                 for ( my $inc = 1 ; $inc < 10 ; $inc++ ) {
 2492                     $jp++;
 2493                     last if ( $jp > $jmax );
 2494                     next unless ( $rLL->[$jp]->[_TOKEN_] eq '(' );
 2495                     my $seqno = $rLL->[$jp]->[_TYPE_SEQUENCE_];
 2496                     $set_container_ws_by_keyword->( $token, $seqno );
 2497                     last;
 2498                 }
 2499             }
 2500         }
 2501 
 2502         my $ws_4;
 2503         $ws_4 = $ws
 2504           if DEBUG_WHITE;
 2505 
 2506         #---------------------------------------------------------------
 2507         # Whitespace Rules Section 5:
 2508         # Apply default rules not covered above.
 2509         #---------------------------------------------------------------
 2510 
 2511         # If we fall through to here, look at the pre-defined hash tables for
 2512         # the two tokens, and:
 2513         #  if (they are equal) use the common value
 2514         #  if (either is zero or undef) use the other
 2515         #  if (either is -1) use it
 2516         # That is,
 2517         # left  vs right
 2518         #  1    vs    1     -->  1
 2519         #  0    vs    0     -->  0
 2520         # -1    vs   -1     --> -1
 2521         #
 2522         #  0    vs   -1     --> -1
 2523         #  0    vs    1     -->  1
 2524         #  1    vs    0     -->  1
 2525         # -1    vs    0     --> -1
 2526         #
 2527         # -1    vs    1     --> -1
 2528         #  1    vs   -1     --> -1
 2529         if ( !defined($ws) ) {
 2530             my $wl = $want_left_space{$type};
 2531             my $wr = $want_right_space{$last_type};
 2532             if ( !defined($wl) ) { $wl = 0 }
 2533             if ( !defined($wr) ) { $wr = 0 }
 2534             $ws = ( ( $wl == $wr ) || ( $wl == -1 ) || !$wr ) ? $wl : $wr;
 2535         }
 2536 
 2537         if ( !defined($ws) ) {
 2538             $ws = 0;
 2539             write_diagnostics(
 2540                 "WS flag is undefined for tokens $last_token $token\n");
 2541         }
 2542 
 2543         # Treat newline as a whitespace. Otherwise, we might combine
 2544         # 'Send' and '-recipients' here according to the above rules:
 2545         # <<snippets/space3.in>>
 2546         #    my $msg = new Fax::Send
 2547         #      -recipients => $to,
 2548         #      -data => $data;
 2549         if ( $ws == 0 && $input_line_no != $last_input_line_no ) { $ws = 1 }
 2550 
 2551         $rwhitespace_flags->[$j] = $ws;
 2552 
 2553         DEBUG_WHITE && do {
 2554             my $str = substr( $last_token, 0, 15 );
 2555             $str .= ' ' x ( 16 - length($str) );
 2556             if ( !defined($ws_1) ) { $ws_1 = "*" }
 2557             if ( !defined($ws_2) ) { $ws_2 = "*" }
 2558             if ( !defined($ws_3) ) { $ws_3 = "*" }
 2559             if ( !defined($ws_4) ) { $ws_4 = "*" }
 2560             print STDOUT
 2561 "NEW WHITE:  i=$j $str $last_type $type $ws_1 : $ws_2 : $ws_3 : $ws_4 : $ws \n";
 2562         };
 2563     } ## end main loop
 2564 
 2565     if ( $rOpts->{'tight-secret-operators'} ) {
 2566         new_secret_operator_whitespace( $rLL, $rwhitespace_flags );
 2567     }
 2568     $self->[_ris_function_call_paren_] = $ris_function_call_paren;
 2569     return $rwhitespace_flags;
 2570 
 2571 } ## end sub set_whitespace_flags
 2572 
 2573 sub dump_want_left_space {
 2574     my $fh = shift;
 2575     local $" = "\n";
 2576     $fh->print(<<EOM);
 2577 These values are the main control of whitespace to the left of a token type;
 2578 They may be altered with the -wls parameter.
 2579 For a list of token types, use perltidy --dump-token-types (-dtt)
 2580  1 means the token wants a space to its left
 2581 -1 means the token does not want a space to its left
 2582 ------------------------------------------------------------------------
 2583 EOM
 2584     foreach my $key ( sort keys %want_left_space ) {
 2585         $fh->print("$key\t$want_left_space{$key}\n");
 2586     }
 2587     return;
 2588 }
 2589 
 2590 sub dump_want_right_space {
 2591     my $fh = shift;
 2592     local $" = "\n";
 2593     $fh->print(<<EOM);
 2594 These values are the main control of whitespace to the right of a token type;
 2595 They may be altered with the -wrs parameter.
 2596 For a list of token types, use perltidy --dump-token-types (-dtt)
 2597  1 means the token wants a space to its right
 2598 -1 means the token does not want a space to its right
 2599 ------------------------------------------------------------------------
 2600 EOM
 2601     foreach my $key ( sort keys %want_right_space ) {
 2602         $fh->print("$key\t$want_right_space{$key}\n");
 2603     }
 2604     return;
 2605 }
 2606 
 2607 {    ## begin closure is_essential_whitespace
 2608 
 2609     my %is_sort_grep_map;
 2610     my %is_for_foreach;
 2611     my %is_digraph;
 2612     my %is_trigraph;
 2613     my %essential_whitespace_filter_l1;
 2614     my %essential_whitespace_filter_r1;
 2615     my %essential_whitespace_filter_l2;
 2616     my %essential_whitespace_filter_r2;
 2617     my %is_type_with_space_before_bareword;
 2618 
 2619     BEGIN {
 2620 
 2621         my @q;
 2622         @q = qw(sort grep map);
 2623         @is_sort_grep_map{@q} = (1) x scalar(@q);
 2624 
 2625         @q = qw(for foreach);
 2626         @is_for_foreach{@q} = (1) x scalar(@q);
 2627 
 2628         @q = qw(
 2629           .. :: << >> ** && .. || // -> => += -= .= %= &= |= ^= *= <>
 2630           <= >= == =~ !~ != ++ -- /= x= ~~ ~. |. &. ^.
 2631         );
 2632         @is_digraph{@q} = (1) x scalar(@q);
 2633 
 2634         @q = qw( ... **= <<= >>= &&= ||= //= <=> !~~ &.= |.= ^.= <<~);
 2635         @is_trigraph{@q} = (1) x scalar(@q);
 2636 
 2637         # These are used as a speedup filters for sub is_essential_whitespace.
 2638 
 2639         # Filter 1:
 2640         # These left side token types USUALLY do not require a space:
 2641         @q = qw( ; { } [ ] L R );
 2642         push @q, ',';
 2643         push @q, ')';
 2644         push @q, '(';
 2645         @essential_whitespace_filter_l1{@q} = (1) x scalar(@q);
 2646 
 2647         # BUT some might if followed by these right token types
 2648         @q = qw( pp mm << <<= h );
 2649         @essential_whitespace_filter_r1{@q} = (1) x scalar(@q);
 2650 
 2651         # Filter 2:
 2652         # These right side filters usually do not require a space
 2653         @q = qw( ; ] R } );
 2654         push @q, ',';
 2655         push @q, ')';
 2656         @essential_whitespace_filter_r2{@q} = (1) x scalar(@q);
 2657 
 2658         # BUT some might if followed by these left token types
 2659         @q = qw( h Z );
 2660         @essential_whitespace_filter_l2{@q} = (1) x scalar(@q);
 2661 
 2662         # Keep a space between certain types and any bareword:
 2663         # Q: keep a space between a quote and a bareword to prevent the
 2664         #    bareword from becoming a quote modifier.
 2665         # &: do not remove space between an '&' and a bare word because
 2666         #    it may turn into a function evaluation, like here
 2667         #    between '&' and 'O_ACCMODE', producing a syntax error [File.pm]
 2668         #      $opts{rdonly} = (($opts{mode} & O_ACCMODE) == O_RDONLY);
 2669         @q = qw( Q & );
 2670         @is_type_with_space_before_bareword{@q} = (1) x scalar(@q);
 2671 
 2672     }
 2673 
 2674     sub is_essential_whitespace {
 2675 
 2676         # Essential whitespace means whitespace which cannot be safely deleted
 2677         # without risking the introduction of a syntax error.
 2678         # We are given three tokens and their types:
 2679         # ($tokenl, $typel) is the token to the left of the space in question
 2680         # ($tokenr, $typer) is the token to the right of the space in question
 2681         # ($tokenll, $typell) is previous nonblank token to the left of $tokenl
 2682         #
 2683         # Note1: This routine should almost never need to be changed.  It is
 2684         # for avoiding syntax problems rather than for formatting.
 2685 
 2686         # Note2: The -mangle option causes large numbers of calls to this
 2687         # routine and therefore is a good test. So if a change is made, be sure
 2688         # to run a large number of files with the -mangle option and check for
 2689         # differences.
 2690 
 2691         my ( $tokenll, $typell, $tokenl, $typel, $tokenr, $typer ) = @_;
 2692 
 2693         # This is potentially a very slow routine but the following quick
 2694         # filters typically catch and handle over 90% of the calls.
 2695 
 2696         # Filter 1: usually no space required after common types ; , [ ] { } ( )
 2697         return
 2698           if ( $essential_whitespace_filter_l1{$typel}
 2699             && !$essential_whitespace_filter_r1{$typer} );
 2700 
 2701         # Filter 2: usually no space before common types ; ,
 2702         return
 2703           if ( $essential_whitespace_filter_r2{$typer}
 2704             && !$essential_whitespace_filter_l2{$typel} );
 2705 
 2706         # Filter 3: Handle side comments: a space is only essential if the left
 2707         # token ends in '$' For example, we do not want to create $#foo below:
 2708 
 2709         #   sub t086
 2710         #       ( #foo)))
 2711         #       $ #foo)))
 2712         #       a #foo)))
 2713         #       ) #foo)))
 2714         #       { ... }
 2715 
 2716         # Also, I prefer not to put a ? and # together because ? used to be
 2717         # a pattern delmiter and spacing was used if guessing was needed.
 2718 
 2719         if ( $typer eq '#' ) {
 2720 
 2721             return 1
 2722               if ( $tokenl
 2723                 && ( $typel eq '?' || substr( $tokenl, -1 ) eq '$' ) );
 2724             return;
 2725         }
 2726 
 2727         my $tokenr_is_bareword   = $tokenr =~ /^\w/ && $tokenr !~ /^\d/;
 2728         my $tokenr_is_open_paren = $tokenr eq '(';
 2729         my $token_joined         = $tokenl . $tokenr;
 2730         my $tokenl_is_dash       = $tokenl eq '-';
 2731 
 2732         my $result =
 2733 
 2734           # never combine two bare words or numbers
 2735           # examples:  and ::ok(1)
 2736           #            return ::spw(...)
 2737           #            for bla::bla:: abc
 2738           # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
 2739           #            $input eq"quit" to make $inputeq"quit"
 2740           #            my $size=-s::SINK if $file;  <==OK but we won't do it
 2741           # don't join something like: for bla::bla:: abc
 2742           # example is "%overload:: and" in files Dumpvalue.pm or colonbug.pl
 2743           (      ( $tokenl =~ /([\'\w]|\:\:)$/ && $typel ne 'CORE::' )
 2744               && ( $tokenr =~ /^([\'\w]|\:\:)/ ) )
 2745 
 2746           # do not combine a number with a concatenation dot
 2747           # example: pom.caputo:
 2748           # $vt100_compatible ? "\e[0;0H" : ('-' x 78 . "\n");
 2749           || $typel eq 'n' && $tokenr eq '.'
 2750           || $typer eq 'n'
 2751           && $tokenl eq '.'
 2752 
 2753           # cases of a space before a bareword...
 2754           || (
 2755             $tokenr_is_bareword && (
 2756 
 2757                 # do not join a minus with a bare word, because you might form
 2758                 # a file test operator.  Example from Complex.pm:
 2759                 # if (CORE::abs($z - i) < $eps);
 2760                 # "z-i" would be taken as a file test.
 2761                 $tokenl_is_dash && length($tokenr) == 1
 2762 
 2763                 # and something like this could become ambiguous without space
 2764                 # after the '-':
 2765                 #   use constant III=>1;
 2766                 #   $a = $b - III;
 2767                 # and even this:
 2768                 #   $a = - III;
 2769                 || $tokenl_is_dash && $typer =~ /^[wC]$/
 2770 
 2771                 # keep space between types Q & and a bareword
 2772                 || $is_type_with_space_before_bareword{$typel}
 2773 
 2774                 # +-: binary plus and minus before a bareword could get
 2775                 # converted into unary plus and minus on next pass through the
 2776                 # tokenizer. This can lead to blinkers: cases b660 b670 b780
 2777                 # b781 b787 b788 b790 So we keep a space unless the +/- clearly
 2778                 # follows an operator
 2779                 || ( ( $typel eq '+' || $typel eq '-' )
 2780                     && $typell !~ /^[niC\)\}\]R]$/ )
 2781 
 2782                 # keep a space between a token ending in '$' and any word;
 2783                 # this caused trouble:  "die @$ if $@"
 2784                 || $typel eq 'i' && $tokenl =~ /\$$/
 2785 
 2786                 # don't combine $$ or $# with any alphanumeric
 2787                 # (testfile mangle.t with --mangle)
 2788                 || $tokenl =~ /^\$[\$\#]$/
 2789 
 2790             )
 2791           )    ## end $tokenr_is_bareword
 2792 
 2793           # OLD, not used
 2794           # '= -' should not become =- or you will get a warning
 2795           # about reversed -=
 2796           # || ($tokenr eq '-')
 2797 
 2798           # do not join a bare word with a minus, like between 'Send' and
 2799           # '-recipients' here <<snippets/space3.in>>
 2800           #   my $msg = new Fax::Send
 2801           #     -recipients => $to,
 2802           #     -data => $data;
 2803           # This is the safest thing to do. If we had the token to the right of
 2804           # the minus we could do a better check.
 2805           #
 2806           # And do not combine a bareword and a quote, like this:
 2807           #    oops "Your login, $Bad_Login, is not valid";
 2808           # It can cause a syntax error if oops is a sub
 2809           || $typel eq 'w' && ( $tokenr eq '-' || $typer eq 'Q' )
 2810 
 2811           # perl is very fussy about spaces before <<
 2812           || $tokenr =~ /^\<\</
 2813 
 2814           # avoid combining tokens to create new meanings. Example:
 2815           #     $a+ +$b must not become $a++$b
 2816           || ( $is_digraph{$token_joined} )
 2817           || $is_trigraph{$token_joined}
 2818 
 2819           # another example: do not combine these two &'s:
 2820           #     allow_options & &OPT_EXECCGI
 2821           || $is_digraph{ $tokenl . substr( $tokenr, 0, 1 ) }
 2822 
 2823           # retain any space after possible filehandle
 2824           # (testfiles prnterr1.t with --extrude and mangle.t with --mangle)
 2825           || $typel eq 'Z'
 2826 
 2827           # Added 'Y' here 16 Jan 2021 to prevent -mangle option from removing
 2828           # space after type Y. Otherwise, it will get parsed as type 'Z' later
 2829           # and any space would have to be added back manually if desired.
 2830           || $typel eq 'Y'
 2831 
 2832           # Perl is sensitive to whitespace after the + here:
 2833           #  $b = xvals $a + 0.1 * yvals $a;
 2834           || $typell eq 'Z' && $typel =~ /^[\/\?\+\-\*]$/
 2835 
 2836           || (
 2837             $tokenr_is_open_paren && (
 2838 
 2839                 # keep paren separate in 'use Foo::Bar ()'
 2840                 ( $typel eq 'w' && $typell eq 'k' && $tokenll eq 'use' )
 2841 
 2842                 # OLD: keep any space between filehandle and paren:
 2843                 # file mangle.t with --mangle:
 2844                 # NEW: this test is no longer necessary here (moved above)
 2845                 ## || $typel eq 'Y'
 2846 
 2847                 # must have space between grep and left paren; "grep(" will fail
 2848                 || $is_sort_grep_map{$tokenl}
 2849 
 2850                 # don't stick numbers next to left parens, as in:
 2851                 #use Mail::Internet 1.28 (); (see Entity.pm, Head.pm, Test.pm)
 2852                 || $typel eq 'n'
 2853             )
 2854           )    ## end $tokenr_is_open_paren
 2855 
 2856           # retain any space after here doc operator ( hereerr.t)
 2857           || $typel eq 'h'
 2858 
 2859           # be careful with a space around ++ and --, to avoid ambiguity as to
 2860           # which token it applies
 2861           || $typer  =~ /^(pp|mm)$/ && $tokenl !~ /^[\;\{\(\[]/
 2862           || $typel  =~ /^(\+\+|\-\-)$/
 2863           && $tokenr !~ /^[\;\}\)\]]/
 2864 
 2865           # need space after foreach my; for example, this will fail in
 2866           # older versions of Perl:
 2867           # foreach my$ft(@filetypes)...
 2868           || (
 2869             $tokenl eq 'my'
 2870 
 2871             #  /^(for|foreach)$/
 2872             && $is_for_foreach{$tokenll}
 2873             && $tokenr =~ /^\$/
 2874           )
 2875 
 2876           # We must be sure that a space between a ? and a quoted string
 2877           # remains if the space before the ? remains.  [Loca.pm, lockarea]
 2878           # ie,
 2879           #    $b=join $comma ? ',' : ':', @_;  # ok
 2880           #    $b=join $comma?',' : ':', @_;    # ok!
 2881           #    $b=join $comma ?',' : ':', @_;   # error!
 2882           # Not really required:
 2883           ## || ( ( $typel eq '?' ) && ( $typer eq 'Q' ) )
 2884 
 2885           # Space stacked labels...
 2886           # Not really required: Perl seems to accept non-spaced labels.
 2887           ## || $typel eq 'J' && $typer eq 'J'
 2888 
 2889           ;    # the value of this long logic sequence is the result we want
 2890         return $result;
 2891     }
 2892 } ## end closure is_essential_whitespace
 2893 
 2894 {    ## begin closure new_secret_operator_whitespace
 2895 
 2896     my %secret_operators;
 2897     my %is_leading_secret_token;
 2898 
 2899     BEGIN {
 2900 
 2901         # token lists for perl secret operators as compiled by Philippe Bruhat
 2902         # at: https://metacpan.org/module/perlsecret
 2903         %secret_operators = (
 2904             'Goatse'             => [qw#= ( ) =#],        #=( )=
 2905             'Venus1'             => [qw#0 +#],            # 0+
 2906             'Venus2'             => [qw#+ 0#],            # +0
 2907             'Enterprise'         => [qw#) x ! !#],        # ()x!!
 2908             'Kite1'              => [qw#~ ~ <>#],         # ~~<>
 2909             'Kite2'              => [qw#~~ <>#],          # ~~<>
 2910             'Winking Fat Comma'  => [ ( ',', '=>' ) ],    # ,=>
 2911             'Bang bang         ' => [qw#! !#],            # !!
 2912         );
 2913 
 2914         # The following operators and constants are not included because they
 2915         # are normally kept tight by perltidy:
 2916         # ~~ <~>
 2917         #
 2918 
 2919         # Make a lookup table indexed by the first token of each operator:
 2920         # first token => [list, list, ...]
 2921         foreach my $value ( values(%secret_operators) ) {
 2922             my $tok = $value->[0];
 2923             push @{ $is_leading_secret_token{$tok} }, $value;
 2924         }
 2925     }
 2926 
 2927     sub new_secret_operator_whitespace {
 2928 
 2929         my ( $rlong_array, $rwhitespace_flags ) = @_;
 2930 
 2931         # Loop over all tokens in this line
 2932         my ( $token, $type );
 2933         my $jmax = @{$rlong_array} - 1;
 2934         foreach my $j ( 0 .. $jmax ) {
 2935 
 2936             $token = $rlong_array->[$j]->[_TOKEN_];
 2937             $type  = $rlong_array->[$j]->[_TYPE_];
 2938 
 2939             # Skip unless this token might start a secret operator
 2940             next if ( $type eq 'b' );
 2941             next unless ( $is_leading_secret_token{$token} );
 2942 
 2943             #      Loop over all secret operators with this leading token
 2944             foreach my $rpattern ( @{ $is_leading_secret_token{$token} } ) {
 2945                 my $jend = $j - 1;
 2946                 foreach my $tok ( @{$rpattern} ) {
 2947                     $jend++;
 2948                     $jend++
 2949 
 2950                       if ( $jend <= $jmax
 2951                         && $rlong_array->[$jend]->[_TYPE_] eq 'b' );
 2952                     if (   $jend > $jmax
 2953                         || $tok ne $rlong_array->[$jend]->[_TOKEN_] )
 2954                     {
 2955                         $jend = undef;
 2956                         last;
 2957                     }
 2958                 }
 2959 
 2960                 if ($jend) {
 2961 
 2962                     # set flags to prevent spaces within this operator
 2963                     foreach my $jj ( $j + 1 .. $jend ) {
 2964                         $rwhitespace_flags->[$jj] = WS_NO;
 2965                     }
 2966                     $j = $jend;
 2967                     last;
 2968                 }
 2969             }    ##      End Loop over all operators
 2970         }    ## End loop over all tokens
 2971         return;
 2972     }    # End sub
 2973 } ## end closure new_secret_operator_whitespace
 2974 
 2975 {    ## begin closure set_bond_strengths
 2976 
 2977     # These routines and variables are involved in deciding where to break very
 2978     # long lines.
 2979 
 2980     my %is_good_keyword_breakpoint;
 2981     my %is_lt_gt_le_ge;
 2982     my %is_container_token;
 2983 
 2984     my %binary_bond_strength_nospace;
 2985     my %binary_bond_strength;
 2986     my %nobreak_lhs;
 2987     my %nobreak_rhs;
 2988 
 2989     my @bias_tokens;
 2990     my %bias_hash;
 2991     my %bias;
 2992     my $delta_bias;
 2993 
 2994     sub initialize_bond_strength_hashes {
 2995 
 2996         my @q;
 2997         @q = qw(if unless while until for foreach);
 2998         @is_good_keyword_breakpoint{@q} = (1) x scalar(@q);
 2999 
 3000         @q = qw(lt gt le ge);
 3001         @is_lt_gt_le_ge{@q} = (1) x scalar(@q);
 3002 
 3003         @q = qw/ ( [ { } ] ) /;
 3004         @is_container_token{@q} = (1) x scalar(@q);
 3005 
 3006         # The decision about where to break a line depends upon a "bond
 3007         # strength" between tokens.  The LOWER the bond strength, the MORE
 3008         # likely a break.  A bond strength may be any value but to simplify
 3009         # things there are several pre-defined strength levels:
 3010 
 3011         #    NO_BREAK    => 10000;
 3012         #    VERY_STRONG => 100;
 3013         #    STRONG      => 2.1;
 3014         #    NOMINAL     => 1.1;
 3015         #    WEAK        => 0.8;
 3016         #    VERY_WEAK   => 0.55;
 3017 
 3018         # The strength values are based on trial-and-error, and need to be
 3019         # tweaked occasionally to get desired results.  Some comments:
 3020         #
 3021         #   1. Only relative strengths are important.  small differences
 3022         #      in strengths can make big formatting differences.
 3023         #   2. Each indentation level adds one unit of bond strength.
 3024         #   3. A value of NO_BREAK makes an unbreakable bond
 3025         #   4. A value of VERY_WEAK is the strength of a ','
 3026         #   5. Values below NOMINAL are considered ok break points.
 3027         #   6. Values above NOMINAL are considered poor break points.
 3028         #
 3029         # The bond strengths should roughly follow precedence order where
 3030         # possible.  If you make changes, please check the results very
 3031         # carefully on a variety of scripts.  Testing with the -extrude
 3032         # options is particularly helpful in exercising all of the rules.
 3033 
 3034         # Wherever possible, bond strengths are defined in the following
 3035         # tables.  There are two main stages to setting bond strengths and
 3036         # two types of tables:
 3037         #
 3038         # The first stage involves looking at each token individually and
 3039         # defining left and right bond strengths, according to if we want
 3040         # to break to the left or right side, and how good a break point it
 3041         # is.  For example tokens like =, ||, && make good break points and
 3042         # will have low strengths, but one might want to break on either
 3043         # side to put them at the end of one line or beginning of the next.
 3044         #
 3045         # The second stage involves looking at certain pairs of tokens and
 3046         # defining a bond strength for that particular pair.  This second
 3047         # stage has priority.
 3048 
 3049         #---------------------------------------------------------------
 3050         # Bond Strength BEGIN Section 1.
 3051         # Set left and right bond strengths of individual tokens.
 3052         #---------------------------------------------------------------
 3053 
 3054         # NOTE: NO_BREAK's set in this section first are HINTS which will
 3055         # probably not be honored. Essential NO_BREAKS's should be set in
 3056         # BEGIN Section 2 or hardwired in the NO_BREAK coding near the end
 3057         # of this subroutine.
 3058 
 3059         # Note that we are setting defaults in this section.  The user
 3060         # cannot change bond strengths but can cause the left and right
 3061         # bond strengths of any token type to be swapped through the use of
 3062         # the -wba and -wbb flags. In this way the user can determine if a
 3063         # breakpoint token should appear at the end of one line or the
 3064         # beginning of the next line.
 3065 
 3066         %right_bond_strength          = ();
 3067         %left_bond_strength           = ();
 3068         %binary_bond_strength_nospace = ();
 3069         %binary_bond_strength         = ();
 3070         %nobreak_lhs                  = ();
 3071         %nobreak_rhs                  = ();
 3072 
 3073         # The hash keys in this section are token types, plus the text of
 3074         # certain keywords like 'or', 'and'.
 3075 
 3076         # no break around possible filehandle
 3077         $left_bond_strength{'Z'}  = NO_BREAK;
 3078         $right_bond_strength{'Z'} = NO_BREAK;
 3079 
 3080         # never put a bare word on a new line:
 3081         # example print (STDERR, "bla"); will fail with break after (
 3082         $left_bond_strength{'w'} = NO_BREAK;
 3083 
 3084         # blanks always have infinite strength to force breaks after
 3085         # real tokens
 3086         $right_bond_strength{'b'} = NO_BREAK;
 3087 
 3088         # try not to break on exponentation
 3089         @q                       = qw# ** .. ... <=> #;
 3090         @left_bond_strength{@q}  = (STRONG) x scalar(@q);
 3091         @right_bond_strength{@q} = (STRONG) x scalar(@q);
 3092 
 3093         # The comma-arrow has very low precedence but not a good break point
 3094         $left_bond_strength{'=>'}  = NO_BREAK;
 3095         $right_bond_strength{'=>'} = NOMINAL;
 3096 
 3097         # ok to break after label
 3098         $left_bond_strength{'J'}  = NO_BREAK;
 3099         $right_bond_strength{'J'} = NOMINAL;
 3100         $left_bond_strength{'j'}  = STRONG;
 3101         $right_bond_strength{'j'} = STRONG;
 3102         $left_bond_strength{'A'}  = STRONG;
 3103         $right_bond_strength{'A'} = STRONG;
 3104 
 3105         $left_bond_strength{'->'}  = STRONG;
 3106         $right_bond_strength{'->'} = VERY_STRONG;
 3107 
 3108         $left_bond_strength{'CORE::'}  = NOMINAL;
 3109         $right_bond_strength{'CORE::'} = NO_BREAK;
 3110 
 3111         # breaking AFTER modulus operator is ok:
 3112         @q = qw< % >;
 3113         @left_bond_strength{@q} = (STRONG) x scalar(@q);
 3114         @right_bond_strength{@q} =
 3115           ( 0.1 * NOMINAL + 0.9 * STRONG ) x scalar(@q);
 3116 
 3117         # Break AFTER math operators * and /
 3118         @q                       = qw< * / x  >;
 3119         @left_bond_strength{@q}  = (STRONG) x scalar(@q);
 3120         @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
 3121 
 3122         # Break AFTER weakest math operators + and -
 3123         # Make them weaker than * but a bit stronger than '.'
 3124         @q = qw< + - >;
 3125         @left_bond_strength{@q} = (STRONG) x scalar(@q);
 3126         @right_bond_strength{@q} =
 3127           ( 0.91 * NOMINAL + 0.09 * WEAK ) x scalar(@q);
 3128 
 3129         # Define left strength of unary plus and minus (fixes case b511)
 3130         $left_bond_strength{p} = $left_bond_strength{'+'};
 3131         $left_bond_strength{m} = $left_bond_strength{'-'};
 3132 
 3133         # And make right strength of unary plus and minus very high.
 3134         # Fixes cases b670 b790
 3135         $right_bond_strength{p} = NO_BREAK;
 3136         $right_bond_strength{m} = NO_BREAK;
 3137 
 3138         # breaking BEFORE these is just ok:
 3139         @q                       = qw# >> << #;
 3140         @right_bond_strength{@q} = (STRONG) x scalar(@q);
 3141         @left_bond_strength{@q}  = (NOMINAL) x scalar(@q);
 3142 
 3143         # breaking before the string concatenation operator seems best
 3144         # because it can be hard to see at the end of a line
 3145         $right_bond_strength{'.'} = STRONG;
 3146         $left_bond_strength{'.'}  = 0.9 * NOMINAL + 0.1 * WEAK;
 3147 
 3148         @q                       = qw< } ] ) R >;
 3149         @left_bond_strength{@q}  = (STRONG) x scalar(@q);
 3150         @right_bond_strength{@q} = (NOMINAL) x scalar(@q);
 3151 
 3152         # make these a little weaker than nominal so that they get
 3153         # favored for end-of-line characters
 3154         @q = qw< != == =~ !~ ~~ !~~ >;
 3155         @left_bond_strength{@q} = (STRONG) x scalar(@q);
 3156         @right_bond_strength{@q} =
 3157           ( 0.9 * NOMINAL + 0.1 * WEAK ) x scalar(@q);
 3158 
 3159         # break AFTER these
 3160         @q = qw# < >  | & >= <= #;
 3161         @left_bond_strength{@q} = (VERY_STRONG) x scalar(@q);
 3162         @right_bond_strength{@q} =
 3163           ( 0.8 * NOMINAL + 0.2 * WEAK ) x scalar(@q);
 3164 
 3165         # breaking either before or after a quote is ok
 3166         # but bias for breaking before a quote
 3167         $left_bond_strength{'Q'}  = NOMINAL;
 3168         $right_bond_strength{'Q'} = NOMINAL + 0.02;
 3169         $left_bond_strength{'q'}  = NOMINAL;
 3170         $right_bond_strength{'q'} = NOMINAL;
 3171 
 3172         # starting a line with a keyword is usually ok
 3173         $left_bond_strength{'k'} = NOMINAL;
 3174 
 3175         # we usually want to bond a keyword strongly to what immediately
 3176         # follows, rather than leaving it stranded at the end of a line
 3177         $right_bond_strength{'k'} = STRONG;
 3178 
 3179         $left_bond_strength{'G'}  = NOMINAL;
 3180         $right_bond_strength{'G'} = STRONG;
 3181 
 3182         # assignment operators
 3183         @q = qw(
 3184           = **= += *= &= <<= &&=
 3185           -= /= |= >>= ||= //=
 3186           .= %= ^=
 3187           x=
 3188         );
 3189 
 3190         # Default is to break AFTER various assignment operators
 3191         @left_bond_strength{@q} = (STRONG) x scalar(@q);
 3192         @right_bond_strength{@q} =
 3193           ( 0.4 * WEAK + 0.6 * VERY_WEAK ) x scalar(@q);
 3194 
 3195         # Default is to break BEFORE '&&' and '||' and '//'
 3196         # set strength of '||' to same as '=' so that chains like
 3197         # $a = $b || $c || $d   will break before the first '||'
 3198         $right_bond_strength{'||'} = NOMINAL;
 3199         $left_bond_strength{'||'}  = $right_bond_strength{'='};
 3200 
 3201         # same thing for '//'
 3202         $right_bond_strength{'//'} = NOMINAL;
 3203         $left_bond_strength{'//'}  = $right_bond_strength{'='};
 3204 
 3205         # set strength of && a little higher than ||
 3206         $right_bond_strength{'&&'} = NOMINAL;
 3207         $left_bond_strength{'&&'}  = $left_bond_strength{'||'} + 0.1;
 3208 
 3209         $left_bond_strength{';'}  = VERY_STRONG;
 3210         $right_bond_strength{';'} = VERY_WEAK;
 3211         $left_bond_strength{'f'}  = VERY_STRONG;
 3212 
 3213         # make right strength of for ';' a little less than '='
 3214         # to make for contents break after the ';' to avoid this:
 3215         #   for ( $j = $number_of_fields - 1 ; $j < $item_count ; $j +=
 3216         #     $number_of_fields )
 3217         # and make it weaker than ',' and 'and' too
 3218         $right_bond_strength{'f'} = VERY_WEAK - 0.03;
 3219 
 3220         # The strengths of ?/: should be somewhere between
 3221         # an '=' and a quote (NOMINAL),
 3222         # make strength of ':' slightly less than '?' to help
 3223         # break long chains of ? : after the colons
 3224         $left_bond_strength{':'}  = 0.4 * WEAK + 0.6 * NOMINAL;
 3225         $right_bond_strength{':'} = NO_BREAK;
 3226         $left_bond_strength{'?'}  = $left_bond_strength{':'} + 0.01;
 3227         $right_bond_strength{'?'} = NO_BREAK;
 3228 
 3229         $left_bond_strength{','}  = VERY_STRONG;
 3230         $right_bond_strength{','} = VERY_WEAK;
 3231 
 3232         # remaining digraphs and trigraphs not defined above
 3233         @q                       = qw( :: <> ++ --);
 3234         @left_bond_strength{@q}  = (WEAK) x scalar(@q);
 3235         @right_bond_strength{@q} = (STRONG) x scalar(@q);
 3236 
 3237         # Set bond strengths of certain keywords
 3238         # make 'or', 'err', 'and' slightly weaker than a ','
 3239         $left_bond_strength{'and'}  = VERY_WEAK - 0.01;
 3240         $left_bond_strength{'or'}   = VERY_WEAK - 0.02;
 3241         $left_bond_strength{'err'}  = VERY_WEAK - 0.02;
 3242         $left_bond_strength{'xor'}  = VERY_WEAK - 0.01;
 3243         $right_bond_strength{'and'} = NOMINAL;
 3244         $right_bond_strength{'or'}  = NOMINAL;
 3245         $right_bond_strength{'err'} = NOMINAL;
 3246         $right_bond_strength{'xor'} = NOMINAL;
 3247 
 3248         #---------------------------------------------------------------
 3249         # Bond Strength BEGIN Section 2.
 3250         # Set binary rules for bond strengths between certain token types.
 3251         #---------------------------------------------------------------
 3252 
 3253         #  We have a little problem making tables which apply to the
 3254         #  container tokens.  Here is a list of container tokens and
 3255         #  their types:
 3256         #
 3257         #   type    tokens // meaning
 3258         #      {    {, [, ( // indent
 3259         #      }    }, ], ) // outdent
 3260         #      [    [ // left non-structural [ (enclosing an array index)
 3261         #      ]    ] // right non-structural square bracket
 3262         #      (    ( // left non-structural paren
 3263         #      )    ) // right non-structural paren
 3264         #      L    { // left non-structural curly brace (enclosing a key)
 3265         #      R    } // right non-structural curly brace
 3266         #
 3267         #  Some rules apply to token types and some to just the token
 3268         #  itself.  We solve the problem by combining type and token into a
 3269         #  new hash key for the container types.
 3270         #
 3271         #  If a rule applies to a token 'type' then we need to make rules
 3272         #  for each of these 'type.token' combinations:
 3273         #  Type    Type.Token
 3274         #  {       {{, {[, {(
 3275         #  [       [[
 3276         #  (       ((
 3277         #  L       L{
 3278         #  }       }}, }], })
 3279         #  ]       ]]
 3280         #  )       ))
 3281         #  R       R}
 3282         #
 3283         #  If a rule applies to a token then we need to make rules for
 3284         #  these 'type.token' combinations:
 3285         #  Token   Type.Token
 3286         #  {       {{, L{
 3287         #  [       {[, [[
 3288         #  (       {(, ((
 3289         #  }       }}, R}
 3290         #  ]       }], ]]
 3291         #  )       }), ))
 3292 
 3293         # allow long lines before final { in an if statement, as in:
 3294         #    if (..........
 3295         #      ..........)
 3296         #    {
 3297         #
 3298         # Otherwise, the line before the { tends to be too short.
 3299 
 3300         $binary_bond_strength{'))'}{'{{'} = VERY_WEAK + 0.03;
 3301         $binary_bond_strength{'(('}{'{{'} = NOMINAL;
 3302 
 3303         # break on something like '} (', but keep this stronger than a ','
 3304         # example is in 'howe.pl'
 3305         $binary_bond_strength{'R}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
 3306         $binary_bond_strength{'}}'}{'(('} = 0.8 * VERY_WEAK + 0.2 * WEAK;
 3307 
 3308         # keep matrix and hash indices together
 3309         # but make them a little below STRONG to allow breaking open
 3310         # something like {'some-word'}{'some-very-long-word'} at the }{
 3311         # (bracebrk.t)
 3312         $binary_bond_strength{']]'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
 3313         $binary_bond_strength{']]'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
 3314         $binary_bond_strength{'R}'}{'[['} = 0.9 * STRONG + 0.1 * NOMINAL;
 3315         $binary_bond_strength{'R}'}{'L{'} = 0.9 * STRONG + 0.1 * NOMINAL;
 3316 
 3317         # increase strength to the point where a break in the following
 3318         # will be after the opening paren rather than at the arrow:
 3319         #    $a->$b($c);
 3320         $binary_bond_strength{'i'}{'->'} = 1.45 * STRONG;
 3321 
 3322     # Note that the following alternative strength would make the break at the
 3323     # '->' rather than opening the '('.  Both have advantages and disadvantages.
 3324     # $binary_bond_strength{'i'}{'->'} = 0.5*STRONG + 0.5 * NOMINAL; #
 3325 
 3326         $binary_bond_strength{'))'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
 3327         $binary_bond_strength{']]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
 3328         $binary_bond_strength{'})'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
 3329         $binary_bond_strength{'}]'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
 3330         $binary_bond_strength{'}}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
 3331         $binary_bond_strength{'R}'}{'->'} = 0.1 * STRONG + 0.9 * NOMINAL;
 3332 
 3333         $binary_bond_strength{'))'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
 3334         $binary_bond_strength{'})'}{'[['} = 0.2 * STRONG + 0.8 * NOMINAL;
 3335         $binary_bond_strength{'))'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
 3336         $binary_bond_strength{'})'}{'{['} = 0.2 * STRONG + 0.8 * NOMINAL;
 3337 
 3338         #---------------------------------------------------------------
 3339         # Binary NO_BREAK rules
 3340         #---------------------------------------------------------------
 3341 
 3342         # use strict requires that bare word and => not be separated
 3343         $binary_bond_strength{'C'}{'=>'} = NO_BREAK;
 3344         $binary_bond_strength{'U'}{'=>'} = NO_BREAK;
 3345 
 3346         # Never break between a bareword and a following paren because
 3347         # perl may give an error.  For example, if a break is placed
 3348         # between 'to_filehandle' and its '(' the following line will
 3349         # give a syntax error [Carp.pm]: my( $no) =fileno(
 3350         # to_filehandle( $in)) ;
 3351         $binary_bond_strength{'C'}{'(('} = NO_BREAK;
 3352         $binary_bond_strength{'C'}{'{('} = NO_BREAK;
 3353         $binary_bond_strength{'U'}{'(('} = NO_BREAK;
 3354         $binary_bond_strength{'U'}{'{('} = NO_BREAK;
 3355 
 3356         # use strict requires that bare word within braces not start new
 3357         # line
 3358         $binary_bond_strength{'L{'}{'w'} = NO_BREAK;
 3359 
 3360         $binary_bond_strength{'w'}{'R}'} = NO_BREAK;
 3361 
 3362         # The following two rules prevent a syntax error caused by breaking up
 3363         # a construction like '{-y}'.  The '-' quotes the 'y' and prevents
 3364         # it from being taken as a transliteration. We have to keep
 3365         # token types 'L m w' together to prevent this error.
 3366         $binary_bond_strength{'L{'}{'m'}        = NO_BREAK;
 3367         $binary_bond_strength_nospace{'m'}{'w'} = NO_BREAK;
 3368 
 3369         # keep 'bareword-' together, but only if there is no space between
 3370         # the word and dash. Do not keep together if there is a space.
 3371         # example 'use perl6-alpha'
 3372         $binary_bond_strength_nospace{'w'}{'m'} = NO_BREAK;
 3373 
 3374         # use strict requires that bare word and => not be separated
 3375         $binary_bond_strength{'w'}{'=>'} = NO_BREAK;
 3376 
 3377         # use strict does not allow separating type info from trailing { }
 3378         # testfile is readmail.pl
 3379         $binary_bond_strength{'t'}{'L{'} = NO_BREAK;
 3380         $binary_bond_strength{'i'}{'L{'} = NO_BREAK;
 3381 
 3382         # As a defensive measure, do not break between a '(' and a
 3383         # filehandle.  In some cases, this can cause an error.  For
 3384         # example, the following program works:
 3385         #    my $msg="hi!\n";
 3386         #    print
 3387         #    ( STDOUT
 3388         #    $msg
 3389         #    );
 3390         #
 3391         # But this program fails:
 3392         #    my $msg="hi!\n";
 3393         #    print
 3394         #    (
 3395         #    STDOUT
 3396         #    $msg
 3397         #    );
 3398         #
 3399         # This is normally only a problem with the 'extrude' option
 3400         $binary_bond_strength{'(('}{'Y'} = NO_BREAK;
 3401         $binary_bond_strength{'{('}{'Y'} = NO_BREAK;
 3402 
 3403         # never break between sub name and opening paren
 3404         $binary_bond_strength{'w'}{'(('} = NO_BREAK;
 3405         $binary_bond_strength{'w'}{'{('} = NO_BREAK;
 3406 
 3407         # keep '}' together with ';'
 3408         $binary_bond_strength{'}}'}{';'} = NO_BREAK;
 3409 
 3410         # Breaking before a ++ can cause perl to guess wrong. For
 3411         # example the following line will cause a syntax error
 3412         # with -extrude if we break between '$i' and '++' [fixstyle2]
 3413         #   print( ( $i++ & 1 ) ? $_ : ( $change{$_} || $_ ) );
 3414         $nobreak_lhs{'++'} = NO_BREAK;
 3415 
 3416         # Do not break before a possible file handle
 3417         $nobreak_lhs{'Z'} = NO_BREAK;
 3418 
 3419         # use strict hates bare words on any new line.  For
 3420         # example, a break before the underscore here provokes the
 3421         # wrath of use strict:
 3422         # if ( -r $fn && ( -s _ || $AllowZeroFilesize)) {
 3423         $nobreak_rhs{'F'}      = NO_BREAK;
 3424         $nobreak_rhs{'CORE::'} = NO_BREAK;
 3425 
 3426         # To prevent the tokenizer from switching between types 'w' and 'G' we
 3427         # need to avoid breaking between type 'G' and the following code block
 3428         # brace. Fixes case b929.
 3429         $nobreak_rhs{G} = NO_BREAK;
 3430 
 3431         #---------------------------------------------------------------
 3432         # Bond Strength BEGIN Section 3.
 3433         # Define tables and values for applying a small bias to the above
 3434         # values.
 3435         #---------------------------------------------------------------
 3436         # Adding a small 'bias' to strengths is a simple way to make a line
 3437         # break at the first of a sequence of identical terms.  For
 3438         # example, to force long string of conditional operators to break
 3439         # with each line ending in a ':', we can add a small number to the
 3440         # bond strength of each ':' (colon.t)
 3441         @bias_tokens = qw( : && || f and or . );       # tokens which get bias
 3442         %bias_hash   = map { $_ => 0 } @bias_tokens;
 3443         $delta_bias  = 0.0001;    # a very small strength level
 3444         return;
 3445 
 3446     } ## end sub initialize_bond_strength_hashes
 3447 
 3448     use constant DEBUG_BOND => 0;
 3449 
 3450     sub set_bond_strengths {
 3451 
 3452         my ($self) = @_;
 3453 
 3454         my $rK_weld_right = $self->[_rK_weld_right_];
 3455         my $rK_weld_left  = $self->[_rK_weld_left_];
 3456 
 3457         # patch-its always ok to break at end of line
 3458         $nobreak_to_go[$max_index_to_go] = 0;
 3459 
 3460         # we start a new set of bias values for each line
 3461         %bias = %bias_hash;
 3462 
 3463         my $code_bias = -.01;    # bias for closing block braces
 3464 
 3465         my $type         = 'b';
 3466         my $token        = ' ';
 3467         my $token_length = 1;
 3468         my $last_type;
 3469         my $last_nonblank_type  = $type;
 3470         my $last_nonblank_token = $token;
 3471         my $list_str            = $left_bond_strength{'?'};
 3472 
 3473         my ( $block_type, $i_next, $i_next_nonblank, $next_nonblank_token,
 3474             $next_nonblank_type, $next_token, $next_type,
 3475             $total_nesting_depth, );
 3476 
 3477         # main loop to compute bond strengths between each pair of tokens
 3478         foreach my $i ( 0 .. $max_index_to_go ) {
 3479             $last_type = $type;
 3480             if ( $type ne 'b' ) {
 3481                 $last_nonblank_type  = $type;
 3482                 $last_nonblank_token = $token;
 3483             }
 3484             $type = $types_to_go[$i];
 3485 
 3486             # strength on both sides of a blank is the same
 3487             if ( $type eq 'b' && $last_type ne 'b' ) {
 3488                 $bond_strength_to_go[$i] = $bond_strength_to_go[ $i - 1 ];
 3489                 next;
 3490             }
 3491 
 3492             $token               = $tokens_to_go[$i];
 3493             $token_length        = $token_lengths_to_go[$i];
 3494             $block_type          = $block_type_to_go[$i];
 3495             $i_next              = $i + 1;
 3496             $next_type           = $types_to_go[$i_next];
 3497             $next_token          = $tokens_to_go[$i_next];
 3498             $total_nesting_depth = $nesting_depth_to_go[$i_next];
 3499             $i_next_nonblank     = ( ( $next_type eq 'b' ) ? $i + 2 : $i + 1 );
 3500             $next_nonblank_type  = $types_to_go[$i_next_nonblank];
 3501             $next_nonblank_token = $tokens_to_go[$i_next_nonblank];
 3502 
 3503             my $seqno               = $type_sequence_to_go[$i];
 3504             my $next_nonblank_seqno = $type_sequence_to_go[$i_next_nonblank];
 3505 
 3506             # We are computing the strength of the bond between the current
 3507             # token and the NEXT token.
 3508 
 3509             #---------------------------------------------------------------
 3510             # Bond Strength Section 1:
 3511             # First Approximation.
 3512             # Use minimum of individual left and right tabulated bond
 3513             # strengths.
 3514             #---------------------------------------------------------------
 3515             my $bsr = $right_bond_strength{$type};
 3516             my $bsl = $left_bond_strength{$next_nonblank_type};
 3517 
 3518             # define right bond strengths of certain keywords
 3519             if ( $type eq 'k' && defined( $right_bond_strength{$token} ) ) {
 3520                 $bsr = $right_bond_strength{$token};
 3521             }
 3522             elsif ( $token eq 'ne' or $token eq 'eq' ) {
 3523                 $bsr = NOMINAL;
 3524             }
 3525 
 3526             # set terminal bond strength to the nominal value
 3527             # this will cause good preceding breaks to be retained
 3528             if ( $i_next_nonblank > $max_index_to_go ) {
 3529                 $bsl = NOMINAL;
 3530             }
 3531 
 3532             # define right bond strengths of certain keywords
 3533             if ( $next_nonblank_type eq 'k'
 3534                 && defined( $left_bond_strength{$next_nonblank_token} ) )
 3535             {
 3536                 $bsl = $left_bond_strength{$next_nonblank_token};
 3537             }
 3538             elsif ($next_nonblank_token eq 'ne'
 3539                 or $next_nonblank_token eq 'eq' )
 3540             {
 3541                 $bsl = NOMINAL;
 3542             }
 3543             elsif ( $is_lt_gt_le_ge{$next_nonblank_token} ) {
 3544                 $bsl = 0.9 * NOMINAL + 0.1 * STRONG;
 3545             }
 3546 
 3547             # Use the minimum of the left and right strengths.  Note: it might
 3548             # seem that we would want to keep a NO_BREAK if either token has
 3549             # this value.  This didn't work, for example because in an arrow
 3550             # list, it prevents the comma from separating from the following
 3551             # bare word (which is probably quoted by its arrow).  So necessary
 3552             # NO_BREAK's have to be handled as special cases in the final
 3553             # section.
 3554             if ( !defined($bsr) ) { $bsr = VERY_STRONG }
 3555             if ( !defined($bsl) ) { $bsl = VERY_STRONG }
 3556             my $bond_str   = ( $bsr < $bsl ) ? $bsr : $bsl;
 3557             my $bond_str_1 = $bond_str;
 3558 
 3559             #---------------------------------------------------------------
 3560             # Bond Strength Section 2:
 3561             # Apply hardwired rules..
 3562             #---------------------------------------------------------------
 3563 
 3564             # Patch to put terminal or clauses on a new line: Weaken the bond
 3565             # at an || followed by die or similar keyword to make the terminal
 3566             # or clause fall on a new line, like this:
 3567             #
 3568             #   my $class = shift
 3569             #     || die "Cannot add broadcast:  No class identifier found";
 3570             #
 3571             # Otherwise the break will be at the previous '=' since the || and
 3572             # = have the same starting strength and the or is biased, like
 3573             # this:
 3574             #
 3575             # my $class =
 3576             #   shift || die "Cannot add broadcast:  No class identifier found";
 3577             #
 3578             # In any case if the user places a break at either the = or the ||
 3579             # it should remain there.
 3580             if ( $type eq '||' || $type eq 'k' && $token eq 'or' ) {
 3581                 if ( $next_nonblank_token =~ /^(die|confess|croak|warn)$/ ) {
 3582                     if ( $want_break_before{$token} && $i > 0 ) {
 3583                         $bond_strength_to_go[ $i - 1 ] -= $delta_bias;
 3584 
 3585                         # keep bond strength of a token and its following blank
 3586                         # the same
 3587                         if ( $types_to_go[ $i - 1 ] eq 'b' && $i > 2 ) {
 3588                             $bond_strength_to_go[ $i - 2 ] -= $delta_bias;
 3589                         }
 3590                     }
 3591                     else {
 3592                         $bond_str -= $delta_bias;
 3593                     }
 3594                 }
 3595             }
 3596 
 3597             # good to break after end of code blocks
 3598             if ( $type eq '}' && $block_type && $next_nonblank_type ne ';' ) {
 3599 
 3600                 $bond_str = 0.5 * WEAK + 0.5 * VERY_WEAK + $code_bias;
 3601                 $code_bias += $delta_bias;
 3602             }
 3603 
 3604             if ( $type eq 'k' ) {
 3605 
 3606                 # allow certain control keywords to stand out
 3607                 if (   $next_nonblank_type eq 'k'
 3608                     && $is_last_next_redo_return{$token} )
 3609                 {
 3610                     $bond_str = 0.45 * WEAK + 0.55 * VERY_WEAK;
 3611                 }
 3612 
 3613                 # Don't break after keyword my.  This is a quick fix for a
 3614                 # rare problem with perl. An example is this line from file
 3615                 # Container.pm:
 3616 
 3617                 # foreach my $question( Debian::DebConf::ConfigDb::gettree(
 3618                 # $this->{'question'} ) )
 3619 
 3620                 if ( $token eq 'my' ) {
 3621                     $bond_str = NO_BREAK;
 3622                 }
 3623 
 3624             }
 3625 
 3626             # good to break before 'if', 'unless', etc
 3627             if ( $is_if_brace_follower{$next_nonblank_token} ) {
 3628                 $bond_str = VERY_WEAK;
 3629             }
 3630 
 3631             if ( $next_nonblank_type eq 'k' && $type ne 'CORE::' ) {
 3632 
 3633                 if ( $is_keyword_returning_list{$next_nonblank_token} ) {
 3634                     $bond_str = $list_str if ( $bond_str > $list_str );
 3635                 }
 3636 
 3637                 # keywords like 'unless', 'if', etc, within statements
 3638                 # make good breaks
 3639                 if ( $is_good_keyword_breakpoint{$next_nonblank_token} ) {
 3640                     $bond_str = VERY_WEAK / 1.05;
 3641                 }
 3642             }
 3643 
 3644             # try not to break before a comma-arrow
 3645             elsif ( $next_nonblank_type eq '=>' ) {
 3646                 if ( $bond_str < STRONG ) { $bond_str = STRONG }
 3647             }
 3648 
 3649             #---------------------------------------------------------------
 3650             # Additional hardwired NOBREAK rules
 3651             #---------------------------------------------------------------
 3652 
 3653             # map1.t -- correct for a quirk in perl
 3654             if (   $token eq '('
 3655                 && $next_nonblank_type eq 'i'
 3656                 && $last_nonblank_type eq 'k'
 3657                 && $is_sort_map_grep{$last_nonblank_token} )
 3658 
 3659               #     /^(sort|map|grep)$/ )
 3660             {
 3661                 $bond_str = NO_BREAK;
 3662             }
 3663 
 3664             # extrude.t: do not break before paren at:
 3665             #    -l pid_filename(
 3666             if ( $last_nonblank_type eq 'F' && $next_nonblank_token eq '(' ) {
 3667                 $bond_str = NO_BREAK;
 3668             }
 3669 
 3670             # in older version of perl, use strict can cause problems with
 3671             # breaks before bare words following opening parens.  For example,
 3672             # this will fail under older versions if a break is made between
 3673             # '(' and 'MAIL': use strict; open( MAIL, "a long filename or
 3674             # command"); close MAIL;
 3675             if ( $type eq '{' ) {
 3676 
 3677                 if ( $token eq '(' && $next_nonblank_type eq 'w' ) {
 3678 
 3679                     # but it's fine to break if the word is followed by a '=>'
 3680                     # or if it is obviously a sub call
 3681                     my $i_next_next_nonblank = $i_next_nonblank + 1;
 3682                     my $next_next_type = $types_to_go[$i_next_next_nonblank];
 3683                     if (   $next_next_type eq 'b'
 3684                         && $i_next_nonblank < $max_index_to_go )
 3685                     {
 3686                         $i_next_next_nonblank++;
 3687                         $next_next_type = $types_to_go[$i_next_next_nonblank];
 3688                     }
 3689 
 3690                     # We'll check for an old breakpoint and keep a leading
 3691                     # bareword if it was that way in the input file.
 3692                     # Presumably it was ok that way.  For example, the
 3693                     # following would remain unchanged:
 3694                     #
 3695                     # @months = (
 3696                     #   January,   February, March,    April,
 3697                     #   May,       June,     July,     August,
 3698                     #   September, October,  November, December,
 3699                     # );
 3700                     #
 3701                     # This should be sufficient:
 3702                     if (
 3703                         !$old_breakpoint_to_go[$i]
 3704                         && (   $next_next_type eq ','
 3705                             || $next_next_type eq '}' )
 3706                       )
 3707                     {
 3708                         $bond_str = NO_BREAK;
 3709                     }
 3710                 }
 3711             }
 3712 
 3713             # Do not break between a possible filehandle and a ? or / and do
 3714             # not introduce a break after it if there is no blank
 3715             # (extrude.t)
 3716             elsif ( $type eq 'Z' ) {
 3717 
 3718                 # don't break..
 3719                 if (
 3720 
 3721                     # if there is no blank and we do not want one. Examples:
 3722                     #    print $x++    # do not break after $x
 3723                     #    print HTML"HELLO"   # break ok after HTML
 3724                     (
 3725                            $next_type ne 'b'
 3726                         && defined( $want_left_space{$next_type} )
 3727                         && $want_left_space{$next_type} == WS_NO
 3728                     )
 3729 
 3730                     # or we might be followed by the start of a quote,
 3731                     # and this is not an existing breakpoint; fixes c039.
 3732                     || !$old_breakpoint_to_go[$i]
 3733                     && substr( $next_nonblank_token, 0, 1 ) eq '/'
 3734 
 3735                   )
 3736                 {
 3737                     $bond_str = NO_BREAK;
 3738                 }
 3739             }
 3740 
 3741             # Breaking before a ? before a quote can cause trouble if
 3742             # they are not separated by a blank.
 3743             # Example: a syntax error occurs if you break before the ? here
 3744             #  my$logic=join$all?' && ':' || ',@regexps;
 3745             # From: Professional_Perl_Programming_Code/multifind.pl
 3746             if ( $next_nonblank_type eq '?' ) {
 3747                 $bond_str = NO_BREAK
 3748                   if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'Q' );
 3749             }
 3750 
 3751             # Breaking before a . followed by a number
 3752             # can cause trouble if there is no intervening space
 3753             # Example: a syntax error occurs if you break before the .2 here
 3754             #  $str .= pack($endian.2, ensurrogate($ord));
 3755             # From: perl58/Unicode.pm
 3756             elsif ( $next_nonblank_type eq '.' ) {
 3757                 $bond_str = NO_BREAK
 3758                   if ( $types_to_go[ $i_next_nonblank + 1 ] eq 'n' );
 3759             }
 3760 
 3761             # Fix for c039
 3762             elsif ( $type eq 'w' ) {
 3763                 $bond_str = NO_BREAK
 3764                   if ( !$old_breakpoint_to_go[$i]
 3765                     && substr( $next_nonblank_token, 0, 1 ) eq '/' );
 3766             }
 3767 
 3768             my $bond_str_2 = $bond_str;
 3769 
 3770             #---------------------------------------------------------------
 3771             # End of hardwired rules
 3772             #---------------------------------------------------------------
 3773 
 3774             #---------------------------------------------------------------
 3775             # Bond Strength Section 3:
 3776             # Apply table rules. These have priority over the above
 3777             # hardwired rules.
 3778             #---------------------------------------------------------------
 3779 
 3780             my $tabulated_bond_str;
 3781             my $ltype = $type;
 3782             my $rtype = $next_nonblank_type;
 3783             if ( $seqno && $is_container_token{$token} ) {
 3784                 $ltype = $type . $token;
 3785             }
 3786 
 3787             if (   $next_nonblank_seqno
 3788                 && $is_container_token{$next_nonblank_token} )
 3789             {
 3790                 $rtype = $next_nonblank_type . $next_nonblank_token;
 3791             }
 3792 
 3793             # apply binary rules which apply regardless of space between tokens
 3794             if ( $binary_bond_strength{$ltype}{$rtype} ) {
 3795                 $bond_str           = $binary_bond_strength{$ltype}{$rtype};
 3796                 $tabulated_bond_str = $bond_str;
 3797             }
 3798 
 3799             # apply binary rules which apply only if no space between tokens
 3800             if ( $binary_bond_strength_nospace{$ltype}{$next_type} ) {
 3801                 $bond_str           = $binary_bond_strength{$ltype}{$next_type};
 3802                 $tabulated_bond_str = $bond_str;
 3803             }
 3804 
 3805             if ( $nobreak_rhs{$ltype} || $nobreak_lhs{$rtype} ) {
 3806                 $bond_str           = NO_BREAK;
 3807                 $tabulated_bond_str = $bond_str;
 3808             }
 3809             my $bond_str_3 = $bond_str;
 3810 
 3811             # If the hardwired rules conflict with the tabulated bond
 3812             # strength then there is an inconsistency that should be fixed
 3813             DEBUG_BOND
 3814               && $tabulated_bond_str
 3815               && $bond_str_1
 3816               && $bond_str_1 != $bond_str_2
 3817               && $bond_str_2 != $tabulated_bond_str
 3818               && do {
 3819                 print STDERR
 3820 "BOND_TABLES: ltype=$ltype rtype=$rtype $bond_str_1->$bond_str_2->$bond_str_3\n";
 3821               };
 3822 
 3823            #-----------------------------------------------------------------
 3824            # Bond Strength Section 4:
 3825            # Modify strengths of certain tokens which often occur in sequence
 3826            # by adding a small bias to each one in turn so that the breaks
 3827            # occur from left to right.
 3828            #
 3829            # Note that we only changing strengths by small amounts here,
 3830            # and usually increasing, so we should not be altering any NO_BREAKs.
 3831            # Other routines which check for NO_BREAKs will use a tolerance
 3832            # of one to avoid any problem.
 3833            #-----------------------------------------------------------------
 3834 
 3835             # The bias tables use special keys:
 3836             #   $type - if not keyword
 3837             #   $token - if keyword, but map some keywords together
 3838             my $left_key =
 3839               $type eq 'k' ? $token eq 'err' ? 'or' : $token : $type;
 3840             my $right_key =
 3841                 $next_nonblank_type eq 'k'
 3842               ? $next_nonblank_token eq 'err'
 3843                   ? 'or'
 3844                   : $next_nonblank_token
 3845               : $next_nonblank_type;
 3846 
 3847             if ( $type eq ',' ) {
 3848 
 3849                 # add any bias set by sub scan_list at old comma break points
 3850                 $bond_str += $bond_strength_to_go[$i];
 3851 
 3852             }
 3853 
 3854             # bias left token
 3855             elsif ( defined( $bias{$left_key} ) ) {
 3856                 if ( !$want_break_before{$left_key} ) {
 3857                     $bias{$left_key} += $delta_bias;
 3858                     $bond_str += $bias{$left_key};
 3859                 }
 3860             }
 3861 
 3862             # bias right token
 3863             if ( defined( $bias{$right_key} ) ) {
 3864                 if ( $want_break_before{$right_key} ) {
 3865 
 3866                     # for leading '.' align all but 'short' quotes; the idea
 3867                     # is to not place something like "\n" on a single line.
 3868                     if ( $right_key eq '.' ) {
 3869                         unless (
 3870                             $last_nonblank_type eq '.'
 3871                             && ( $token_length <=
 3872                                 $rOpts_short_concatenation_item_length )
 3873                             && ( !$is_closing_token{$token} )
 3874                           )
 3875                         {
 3876                             $bias{$right_key} += $delta_bias;
 3877                         }
 3878                     }
 3879                     else {
 3880                         $bias{$right_key} += $delta_bias;
 3881                     }
 3882                     $bond_str += $bias{$right_key};
 3883                 }
 3884             }
 3885             my $bond_str_4 = $bond_str;
 3886 
 3887             #---------------------------------------------------------------
 3888             # Bond Strength Section 5:
 3889             # Fifth Approximation.
 3890             # Take nesting depth into account by adding the nesting depth
 3891             # to the bond strength.
 3892             #---------------------------------------------------------------
 3893             my $strength;
 3894 
 3895             if ( defined($bond_str) && !$nobreak_to_go[$i] ) {
 3896                 if ( $total_nesting_depth > 0 ) {
 3897                     $strength = $bond_str + $total_nesting_depth;
 3898                 }
 3899                 else {
 3900                     $strength = $bond_str;
 3901                 }
 3902             }
 3903             else {
 3904                 $strength = NO_BREAK;
 3905 
 3906                 # For critical code such as lines with here targets we must
 3907                 # be absolutely sure that we do not allow a break.  So for
 3908                 # these the nobreak flag exceeds 1 as a signal. Otherwise we
 3909                 # can run into trouble when small tolerances are added.
 3910                 $strength += 1 if ( $nobreak_to_go[$i] > 1 );
 3911             }
 3912 
 3913             #---------------------------------------------------------------
 3914             # Bond Strength Section 6:
 3915             # Sixth Approximation. Welds.
 3916             #---------------------------------------------------------------
 3917 
 3918             # Do not allow a break within welds
 3919             if ( $total_weld_count && $seqno ) {
 3920                 my $KK = $K_to_go[$i];
 3921                 if ( $rK_weld_right->{$KK} ) {
 3922                     $strength = NO_BREAK;
 3923                 }
 3924 
 3925                 # But encourage breaking after opening welded tokens
 3926                 elsif ($rK_weld_left->{$KK}
 3927                     && $is_opening_token{$token} )
 3928                 {
 3929                     $strength -= 1;
 3930                 }
 3931             }
 3932 
 3933             # always break after side comment
 3934             if ( $type eq '#' ) { $strength = 0 }
 3935 
 3936             $bond_strength_to_go[$i] = $strength;
 3937 
 3938             # Fix for case c001: be sure NO_BREAK's are enforced by later
 3939             # routines, except at a '?' because '?' as quote delimiter is
 3940             # deprecated.
 3941             if ( $strength >= NO_BREAK && $next_nonblank_type ne '?' ) {
 3942                 $nobreak_to_go[$i] ||= 1;
 3943             }
 3944 
 3945             DEBUG_BOND && do {
 3946                 my $str = substr( $token, 0, 15 );
 3947                 $str .= ' ' x ( 16 - length($str) );
 3948                 print STDOUT
 3949 "BOND:  i=$i $str $type $next_nonblank_type depth=$total_nesting_depth strength=$bond_str_1 -> $bond_str_2 -> $bond_str_3 -> $bond_str_4 $bond_str -> $strength \n";
 3950             };
 3951         } ## end main loop
 3952         return;
 3953     } ## end sub set_bond_strengths
 3954 } ## end closure set_bond_strengths
 3955 
 3956 sub bad_pattern {
 3957 
 3958     # See if a pattern will compile. We have to use a string eval here,
 3959     # but it should be safe because the pattern has been constructed
 3960     # by this program.
 3961     my ($pattern) = @_;
 3962     eval "'##'=~/$pattern/";
 3963     return $@;
 3964 }
 3965 
 3966 {    ## begin closure prepare_cuddled_block_types
 3967 
 3968     my %no_cuddle;
 3969 
 3970     # Add keywords here which really should not be cuddled
 3971     BEGIN {
 3972         my @q = qw(if unless for foreach while);
 3973         @no_cuddle{@q} = (1) x scalar(@q);
 3974     }
 3975 
 3976     sub prepare_cuddled_block_types {
 3977 
 3978         # the cuddled-else style, if used, is controlled by a hash that
 3979         # we construct here
 3980 
 3981         # Include keywords here which should not be cuddled
 3982 
 3983         my $cuddled_string = "";
 3984         if ( $rOpts->{'cuddled-else'} ) {
 3985 
 3986             # set the default
 3987             $cuddled_string = 'elsif else continue catch finally'
 3988               unless ( $rOpts->{'cuddled-block-list-exclusive'} );
 3989 
 3990             # This is the old equivalent but more complex version
 3991             # $cuddled_string = 'if-elsif-else unless-elsif-else -continue ';
 3992 
 3993             # Add users other blocks to be cuddled
 3994             my $cuddled_block_list = $rOpts->{'cuddled-block-list'};
 3995             if ($cuddled_block_list) {
 3996                 $cuddled_string .= " " . $cuddled_block_list;
 3997             }
 3998 
 3999         }
 4000 
 4001         # If we have a cuddled string of the form
 4002         #  'try-catch-finally'
 4003 
 4004         # we want to prepare a hash of the form
 4005 
 4006         # $rcuddled_block_types = {
 4007         #    'try' => {
 4008         #        'catch'   => 1,
 4009         #        'finally' => 1
 4010         #    },
 4011         # };
 4012 
 4013         # use -dcbl to dump this hash
 4014 
 4015         # Multiple such strings are input as a space or comma separated list
 4016 
 4017         # If we get two lists with the same leading type, such as
 4018         #   -cbl = "-try-catch-finally  -try-catch-otherwise"
 4019         # then they will get merged as follows:
 4020         # $rcuddled_block_types = {
 4021         #    'try' => {
 4022         #        'catch'     => 1,
 4023         #        'finally'   => 2,
 4024         #        'otherwise' => 1,
 4025         #    },
 4026         # };
 4027         # This will allow either type of chain to be followed.
 4028 
 4029         $cuddled_string =~ s/,/ /g;    # allow space or comma separated lists
 4030         my @cuddled_strings = split /\s+/, $cuddled_string;
 4031 
 4032         $rcuddled_block_types = {};
 4033 
 4034         # process each dash-separated string...
 4035         my $string_count = 0;
 4036         foreach my $string (@cuddled_strings) {
 4037             next unless $string;
 4038             my @words = split /-+/, $string;    # allow multiple dashes
 4039 
 4040             # we could look for and report possible errors here...
 4041             next unless ( @words > 0 );
 4042 
 4043            # allow either '-continue' or *-continue' for arbitrary starting type
 4044             my $start = '*';
 4045 
 4046             # a single word without dashes is a secondary block type
 4047             if ( @words > 1 ) {
 4048                 $start = shift @words;
 4049             }
 4050 
 4051             # always make an entry for the leading word. If none follow, this
 4052             # will still prevent a wildcard from matching this word.
 4053             if ( !defined( $rcuddled_block_types->{$start} ) ) {
 4054                 $rcuddled_block_types->{$start} = {};
 4055             }
 4056 
 4057             # The count gives the original word order in case we ever want it.
 4058             $string_count++;
 4059             my $word_count = 0;
 4060             foreach my $word (@words) {
 4061                 next unless $word;
 4062                 if ( $no_cuddle{$word} ) {
 4063                     Warn(
 4064 "## Ignoring keyword '$word' in -cbl; does not seem right\n"
 4065                     );
 4066                     next;
 4067                 }
 4068                 $word_count++;
 4069                 $rcuddled_block_types->{$start}->{$word} =
 4070                   1;    #"$string_count.$word_count";
 4071 
 4072                 # git#9: Remove this word from the list of desired one-line
 4073                 # blocks
 4074                 $want_one_line_block{$word} = 0;
 4075             }
 4076         }
 4077         return;
 4078     }
 4079 }    ## begin closure prepare_cuddled_block_types
 4080 
 4081 sub dump_cuddled_block_list {
 4082     my ($fh) = @_;
 4083 
 4084     # ORIGINAL METHOD: Here is the format of the cuddled block type hash
 4085     # which controls this routine
 4086     #    my $rcuddled_block_types = {
 4087     #        'if' => {
 4088     #            'else'  => 1,
 4089     #            'elsif' => 1
 4090     #        },
 4091     #        'try' => {
 4092     #            'catch'   => 1,
 4093     #            'finally' => 1
 4094     #        },
 4095     #    };
 4096 
 4097     # SIMPLFIED METHOD: the simplified method uses a wildcard for
 4098     # the starting block type and puts all cuddled blocks together:
 4099     #    my $rcuddled_block_types = {
 4100     #        '*' => {
 4101     #            'else'  => 1,
 4102     #            'elsif' => 1
 4103     #            'catch'   => 1,
 4104     #            'finally' => 1
 4105     #        },
 4106     #    };
 4107 
 4108     # Both methods work, but the simplified method has proven to be adequate and
 4109     # easier to manage.
 4110 
 4111     my $cuddled_string = $rOpts->{'cuddled-block-list'};
 4112     $cuddled_string = '' unless $cuddled_string;
 4113 
 4114     my $flags = "";
 4115     $flags .= "-ce" if ( $rOpts->{'cuddled-else'} );
 4116     $flags .= " -cbl='$cuddled_string'";
 4117 
 4118     unless ( $rOpts->{'cuddled-else'} ) {
 4119         $flags .= "\nNote: You must specify -ce to generate a cuddled hash";
 4120     }
 4121 
 4122     $fh->print(<<EOM);
 4123 ------------------------------------------------------------------------
 4124 Hash of cuddled block types prepared for a run with these parameters:
 4125   $flags
 4126 ------------------------------------------------------------------------
 4127 EOM
 4128 
 4129     use Data::Dumper;
 4130     $fh->print( Dumper($rcuddled_block_types) );
 4131 
 4132     $fh->print(<<EOM);
 4133 ------------------------------------------------------------------------
 4134 EOM
 4135     return;
 4136 }
 4137 
 4138 sub make_static_block_comment_pattern {
 4139 
 4140     # create the pattern used to identify static block comments
 4141     $static_block_comment_pattern = '^\s*##';
 4142 
 4143     # allow the user to change it
 4144     if ( $rOpts->{'static-block-comment-prefix'} ) {
 4145         my $prefix = $rOpts->{'static-block-comment-prefix'};
 4146         $prefix =~ s/^\s*//;
 4147         my $pattern = $prefix;
 4148 
 4149         # user may give leading caret to force matching left comments only
 4150         if ( $prefix !~ /^\^#/ ) {
 4151             if ( $prefix !~ /^#/ ) {
 4152                 Die(
 4153 "ERROR: the -sbcp prefix is '$prefix' but must begin with '#' or '^#'\n"
 4154                 );
 4155             }
 4156             $pattern = '^\s*' . $prefix;
 4157         }
 4158         if ( bad_pattern($pattern) ) {
 4159             Die(
 4160 "ERROR: the -sbc prefix '$prefix' causes the invalid regex '$pattern'\n"
 4161             );
 4162         }
 4163         $static_block_comment_pattern = $pattern;
 4164     }
 4165     return;
 4166 }
 4167 
 4168 sub make_format_skipping_pattern {
 4169     my ( $opt_name, $default ) = @_;
 4170     my $param = $rOpts->{$opt_name};
 4171     unless ($param) { $param = $default }
 4172     $param =~ s/^\s*//;
 4173     if ( $param !~ /^#/ ) {
 4174         Die("ERROR: the $opt_name parameter '$param' must begin with '#'\n");
 4175     }
 4176     my $pattern = '^' . $param . '\s';
 4177     if ( bad_pattern($pattern) ) {
 4178         Die(
 4179 "ERROR: the $opt_name parameter '$param' causes the invalid regex '$pattern'\n"
 4180         );
 4181     }
 4182     return $pattern;
 4183 }
 4184 
 4185 sub make_non_indenting_brace_pattern {
 4186 
 4187     # Create the pattern used to identify static side comments.
 4188     # Note that we are ending the pattern in a \s. This will allow
 4189     # the pattern to be followed by a space and some text, or a newline.
 4190     # The pattern is used in sub 'non_indenting_braces'
 4191     $non_indenting_brace_pattern = '^#<<<\s';
 4192 
 4193     # allow the user to change it
 4194     if ( $rOpts->{'non-indenting-brace-prefix'} ) {
 4195         my $prefix = $rOpts->{'non-indenting-brace-prefix'};
 4196         $prefix =~ s/^\s*//;
 4197         if ( $prefix !~ /^#/ ) {
 4198             Die("ERROR: the -nibp parameter '$prefix' must begin with '#'\n");
 4199         }
 4200         my $pattern = '^' . $prefix . '\s';
 4201         if ( bad_pattern($pattern) ) {
 4202             Die(
 4203 "ERROR: the -nibp prefix '$prefix' causes the invalid regex '$pattern'\n"
 4204             );
 4205         }
 4206         $non_indenting_brace_pattern = $pattern;
 4207     }
 4208     return;
 4209 }
 4210 
 4211 sub make_closing_side_comment_list_pattern {
 4212 
 4213     # turn any input list into a regex for recognizing selected block types
 4214     $closing_side_comment_list_pattern = '^\w+';
 4215     if ( defined( $rOpts->{'closing-side-comment-list'} )
 4216         && $rOpts->{'closing-side-comment-list'} )
 4217     {
 4218         $closing_side_comment_list_pattern =
 4219           make_block_pattern( '-cscl', $rOpts->{'closing-side-comment-list'} );
 4220     }
 4221     return;
 4222 }
 4223 
 4224 sub make_sub_matching_pattern {
 4225 
 4226     # Patterns for standardizing matches to block types for regular subs and
 4227     # anonymous subs. Examples
 4228     #  'sub process' is a named sub
 4229     #  'sub ::m' is a named sub
 4230     #  'sub' is an anonymous sub
 4231     #  'sub:' is a label, not a sub
 4232     #  'substr' is a keyword
 4233     $SUB_PATTERN    = '^sub\s+(::|\w)';    # match normal sub
 4234     $ASUB_PATTERN   = '^sub$';             # match anonymous sub
 4235     $ANYSUB_PATTERN = '^sub\b';            # match either type of sub
 4236 
 4237     # Note (see also RT #133130): These patterns are used by
 4238     # sub make_block_pattern, which is used for making most patterns.
 4239     # So this sub needs to be called before other pattern-making routines.
 4240 
 4241     if ( $rOpts->{'sub-alias-list'} ) {
 4242 
 4243         # Note that any 'sub-alias-list' has been preprocessed to
 4244         # be a trimmed, space-separated list which includes 'sub'
 4245         # for example, it might be 'sub method fun'
 4246         my $sub_alias_list = $rOpts->{'sub-alias-list'};
 4247         $sub_alias_list =~ s/\s+/\|/g;
 4248         $SUB_PATTERN    =~ s/sub/\($sub_alias_list\)/;
 4249         $ASUB_PATTERN   =~ s/sub/\($sub_alias_list\)/;
 4250         $ANYSUB_PATTERN =~ s/sub/\($sub_alias_list\)/;
 4251     }
 4252     return;
 4253 }
 4254 
 4255 sub make_bli_pattern {
 4256 
 4257     # default list of block types for which -bli would apply
 4258     my $bli_list_string = 'if else elsif unless while for foreach do : sub';
 4259 
 4260     if ( defined( $rOpts->{'brace-left-and-indent-list'} )
 4261         && $rOpts->{'brace-left-and-indent-list'} )
 4262     {
 4263         $bli_list_string = $rOpts->{'brace-left-and-indent-list'};
 4264     }
 4265 
 4266     $bli_pattern = make_block_pattern( '-blil', $bli_list_string );
 4267     return;
 4268 }
 4269 
 4270 sub make_keyword_group_list_pattern {
 4271 
 4272     # turn any input list into a regex for recognizing selected block types.
 4273     # Here are the defaults:
 4274     $keyword_group_list_pattern         = '^(our|local|my|use|require|)$';
 4275     $keyword_group_list_comment_pattern = '';
 4276     if ( defined( $rOpts->{'keyword-group-blanks-list'} )
 4277         && $rOpts->{'keyword-group-blanks-list'} )
 4278     {
 4279         my @words = split /\s+/, $rOpts->{'keyword-group-blanks-list'};
 4280         my @keyword_list;
 4281         my @comment_list;
 4282         foreach my $word (@words) {
 4283             if ( $word =~ /^(BC|SBC)$/ ) {
 4284                 push @comment_list, $word;
 4285                 if ( $word eq 'SBC' ) { push @comment_list, 'SBCX' }
 4286             }
 4287             else {
 4288                 push @keyword_list, $word;
 4289             }
 4290         }
 4291         $keyword_group_list_pattern =
 4292           make_block_pattern( '-kgbl', $rOpts->{'keyword-group-blanks-list'} );
 4293         $keyword_group_list_comment_pattern =
 4294           make_block_pattern( '-kgbl', join( ' ', @comment_list ) );
 4295     }
 4296     return;
 4297 }
 4298 
 4299 sub make_block_brace_vertical_tightness_pattern {
 4300 
 4301     # turn any input list into a regex for recognizing selected block types
 4302     $block_brace_vertical_tightness_pattern =
 4303       '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
 4304     if ( defined( $rOpts->{'block-brace-vertical-tightness-list'} )
 4305         && $rOpts->{'block-brace-vertical-tightness-list'} )
 4306     {
 4307         $block_brace_vertical_tightness_pattern =
 4308           make_block_pattern( '-bbvtl',
 4309             $rOpts->{'block-brace-vertical-tightness-list'} );
 4310     }
 4311     return;
 4312 }
 4313 
 4314 sub make_blank_line_pattern {
 4315 
 4316     $blank_lines_before_closing_block_pattern = $SUB_PATTERN;
 4317     my $key = 'blank-lines-before-closing-block-list';
 4318     if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
 4319         $blank_lines_before_closing_block_pattern =
 4320           make_block_pattern( '-blbcl', $rOpts->{$key} );
 4321     }
 4322 
 4323     $blank_lines_after_opening_block_pattern = $SUB_PATTERN;
 4324     $key = 'blank-lines-after-opening-block-list';
 4325     if ( defined( $rOpts->{$key} ) && $rOpts->{$key} ) {
 4326         $blank_lines_after_opening_block_pattern =
 4327           make_block_pattern( '-blaol', $rOpts->{$key} );
 4328     }
 4329     return;
 4330 }
 4331 
 4332 sub make_block_pattern {
 4333 
 4334     #  given a string of block-type keywords, return a regex to match them
 4335     #  The only tricky part is that labels are indicated with a single ':'
 4336     #  and the 'sub' token text may have additional text after it (name of
 4337     #  sub).
 4338     #
 4339     #  Example:
 4340     #
 4341     #   input string: "if else elsif unless while for foreach do : sub";
 4342     #   pattern:  '^((if|else|elsif|unless|while|for|foreach|do|\w+:)$|sub)';
 4343 
 4344     #  Minor Update:
 4345     #
 4346     #  To distinguish between anonymous subs and named subs, use 'sub' to
 4347     #   indicate a named sub, and 'asub' to indicate an anonymous sub
 4348 
 4349     my ( $abbrev, $string ) = @_;
 4350     my @list  = split_words($string);
 4351     my @words = ();
 4352     my %seen;
 4353     for my $i (@list) {
 4354         if ( $i eq '*' ) { my $pattern = '^.*'; return $pattern }
 4355         next if $seen{$i};
 4356         $seen{$i} = 1;
 4357         if ( $i eq 'sub' ) {
 4358         }
 4359         elsif ( $i eq 'asub' ) {
 4360         }
 4361         elsif ( $i eq ';' ) {
 4362             push @words, ';';
 4363         }
 4364         elsif ( $i eq '{' ) {
 4365             push @words, '\{';
 4366         }
 4367         elsif ( $i eq ':' ) {
 4368             push @words, '\w+:';
 4369         }
 4370         elsif ( $i =~ /^\w/ ) {
 4371             push @words, $i;
 4372         }
 4373         else {
 4374             Warn("unrecognized block type $i after $abbrev, ignoring\n");
 4375         }
 4376     }
 4377     my $pattern      = '(' . join( '|', @words ) . ')$';
 4378     my $sub_patterns = "";
 4379     if ( $seen{'sub'} ) {
 4380         $sub_patterns .= '|' . $SUB_PATTERN;
 4381     }
 4382     if ( $seen{'asub'} ) {
 4383         $sub_patterns .= '|' . $ASUB_PATTERN;
 4384     }
 4385     if ($sub_patterns) {
 4386         $pattern = '(' . $pattern . $sub_patterns . ')';
 4387     }
 4388     $pattern = '^' . $pattern;
 4389     return $pattern;
 4390 }
 4391 
 4392 sub make_static_side_comment_pattern {
 4393 
 4394     # create the pattern used to identify static side comments
 4395     $static_side_comment_pattern = '^##';
 4396 
 4397     # allow the user to change it
 4398     if ( $rOpts->{'static-side-comment-prefix'} ) {
 4399         my $prefix = $rOpts->{'static-side-comment-prefix'};
 4400         $prefix =~ s/^\s*//;
 4401         my $pattern = '^' . $prefix;
 4402         if ( bad_pattern($pattern) ) {
 4403             Die(
 4404 "ERROR: the -sscp prefix '$prefix' causes the invalid regex '$pattern'\n"
 4405             );
 4406         }
 4407         $static_side_comment_pattern = $pattern;
 4408     }
 4409     return;
 4410 }
 4411 
 4412 sub make_closing_side_comment_prefix {
 4413 
 4414     # Be sure we have a valid closing side comment prefix
 4415     my $csc_prefix = $rOpts->{'closing-side-comment-prefix'};
 4416     my $csc_prefix_pattern;
 4417     if ( !defined($csc_prefix) ) {
 4418         $csc_prefix         = '## end';
 4419         $csc_prefix_pattern = '^##\s+end';
 4420     }
 4421     else {
 4422         my $test_csc_prefix = $csc_prefix;
 4423         if ( $test_csc_prefix !~ /^#/ ) {
 4424             $test_csc_prefix = '#' . $test_csc_prefix;
 4425         }
 4426 
 4427         # make a regex to recognize the prefix
 4428         my $test_csc_prefix_pattern = $test_csc_prefix;
 4429 
 4430         # escape any special characters
 4431         $test_csc_prefix_pattern =~ s/([^#\s\w])/\\$1/g;
 4432 
 4433         $test_csc_prefix_pattern = '^' . $test_csc_prefix_pattern;
 4434 
 4435         # allow exact number of intermediate spaces to vary
 4436         $test_csc_prefix_pattern =~ s/\s+/\\s\+/g;
 4437 
 4438         # make sure we have a good pattern
 4439         # if we fail this we probably have an error in escaping
 4440         # characters.
 4441 
 4442         if ( bad_pattern($test_csc_prefix_pattern) ) {
 4443 
 4444             # shouldn't happen..must have screwed up escaping, above
 4445             report_definite_bug();
 4446             Warn(
 4447 "Program Error: the -cscp prefix '$csc_prefix' caused the invalid regex '$csc_prefix_pattern'\n"
 4448             );
 4449 
 4450             # just warn and keep going with defaults
 4451             Warn("Please consider using a simpler -cscp prefix\n");
 4452             Warn("Using default -cscp instead; please check output\n");
 4453         }
 4454         else {
 4455             $csc_prefix         = $test_csc_prefix;
 4456             $csc_prefix_pattern = $test_csc_prefix_pattern;
 4457         }
 4458     }
 4459     $rOpts->{'closing-side-comment-prefix'} = $csc_prefix;
 4460     $closing_side_comment_prefix_pattern = $csc_prefix_pattern;
 4461     return;
 4462 }
 4463 
 4464 ##################################################
 4465 # CODE SECTION 4: receive lines from the tokenizer
 4466 ##################################################
 4467 
 4468 {    ## begin closure write_line
 4469 
 4470     my $Last_line_had_side_comment;
 4471     my $In_format_skipping_section;
 4472     my $Saw_VERSION_in_this_file;
 4473 
 4474     sub initialize_write_line {
 4475 
 4476         $Last_line_had_side_comment = 0;
 4477         $In_format_skipping_section = 0;
 4478         $Saw_VERSION_in_this_file   = 0;
 4479 
 4480         return;
 4481     }
 4482 
 4483     sub write_line {
 4484 
 4485       # This routine originally received lines of code and immediately processed
 4486       # them.  That was efficient when memory was limited, but now it just saves
 4487       # the lines it receives.  They get processed all together after the last
 4488       # line is received.
 4489 
 4490        # As tokenized lines are received they are converted to the format needed
 4491        # for the final formatting.
 4492         my ( $self, $line_of_tokens_old ) = @_;
 4493         my $rLL           = $self->[_rLL_];
 4494         my $Klimit        = $self->[_Klimit_];
 4495         my $rlines_new    = $self->[_rlines_];
 4496         my $maximum_level = $self->[_maximum_level_];
 4497 
 4498         my $Kfirst;
 4499         my $line_of_tokens = {};
 4500         foreach my $key (
 4501             qw(
 4502             _curly_brace_depth
 4503             _ending_in_quote
 4504             _guessed_indentation_level
 4505             _line_number
 4506             _line_text
 4507             _line_type
 4508             _paren_depth
 4509             _quote_character
 4510             _square_bracket_depth
 4511             _starting_in_quote
 4512             )
 4513           )
 4514         {
 4515             $line_of_tokens->{$key} = $line_of_tokens_old->{$key};
 4516         }
 4517 
 4518         # Data needed by Logger
 4519         $line_of_tokens->{_level_0}          = 0;
 4520         $line_of_tokens->{_ci_level_0}       = 0;
 4521         $line_of_tokens->{_nesting_blocks_0} = "";
 4522         $line_of_tokens->{_nesting_tokens_0} = "";
 4523 
 4524         # Needed to avoid trimming quotes
 4525         $line_of_tokens->{_ended_in_blank_token} = undef;
 4526 
 4527         my $line_type     = $line_of_tokens_old->{_line_type};
 4528         my $input_line_no = $line_of_tokens_old->{_line_number};
 4529         my $CODE_type     = "";
 4530         my $tee_output;
 4531 
 4532         # Handle line of non-code
 4533         if ( $line_type ne 'CODE' ) {
 4534             $tee_output ||= $rOpts_tee_pod
 4535               && substr( $line_type, 0, 3 ) eq 'POD';
 4536         }
 4537 
 4538         # Handle line of code
 4539         else {
 4540 
 4541             my $rtokens         = $line_of_tokens_old->{_rtokens};
 4542             my $rtoken_type     = $line_of_tokens_old->{_rtoken_type};
 4543             my $rblock_type     = $line_of_tokens_old->{_rblock_type};
 4544             my $rcontainer_type = $line_of_tokens_old->{_rcontainer_type};
 4545             my $rcontainer_environment =
 4546               $line_of_tokens_old->{_rcontainer_environment};
 4547             my $rtype_sequence  = $line_of_tokens_old->{_rtype_sequence};
 4548             my $rlevels         = $line_of_tokens_old->{_rlevels};
 4549             my $rslevels        = $line_of_tokens_old->{_rslevels};
 4550             my $rci_levels      = $line_of_tokens_old->{_rci_levels};
 4551             my $rnesting_blocks = $line_of_tokens_old->{_rnesting_blocks};
 4552             my $rnesting_tokens = $line_of_tokens_old->{_rnesting_tokens};
 4553 
 4554             my $jmax = @{$rtokens} - 1;
 4555             if ( $jmax >= 0 ) {
 4556                 $Kfirst = defined($Klimit) ? $Klimit + 1 : 0;
 4557                 foreach my $j ( 0 .. $jmax ) {
 4558 
 4559                  # Clip negative nesting depths to zero to avoid problems.
 4560                  # Negative values can occur in files with unbalanced containers
 4561                     my $slevel = $rslevels->[$j];
 4562                     if ( $slevel < 0 ) { $slevel = 0 }
 4563 
 4564                     if ( $rlevels->[$j] > $maximum_level ) {
 4565                         $maximum_level = $rlevels->[$j];
 4566                     }
 4567 
 4568                     # But do not clip the 'level' variable yet. We will do this
 4569                     # later, in sub 'store_token_to_go'. The reason is that in
 4570                     # files with level errors, the logic in 'weld_cuddled_else'
 4571                     # uses a stack logic that will give bad welds if we clip
 4572                     # levels here.
 4573                     ## if ( $rlevels->[$j] < 0 ) { $rlevels->[$j] = 0 }
 4574 
 4575                     my @tokary;
 4576                     @tokary[
 4577                       _TOKEN_,         _TYPE_,  _BLOCK_TYPE_,
 4578                       _TYPE_SEQUENCE_, _LEVEL_, _SLEVEL_,
 4579                       _CI_LEVEL_,      _LINE_INDEX_,
 4580                       ]
 4581                       = (
 4582                         $rtokens->[$j],     $rtoken_type->[$j],
 4583                         $rblock_type->[$j], $rtype_sequence->[$j],
 4584                         $rlevels->[$j],     $slevel,
 4585                         $rci_levels->[$j],  $input_line_no - 1,
 4586                       );
 4587                     push @{$rLL}, \@tokary;
 4588                 } ## end foreach my $j ( 0 .. $jmax )
 4589 
 4590                 $Klimit = @{$rLL} - 1;
 4591 
 4592                 # Need to remember if we can trim the input line
 4593                 $line_of_tokens->{_ended_in_blank_token} =
 4594                   $rtoken_type->[$jmax] eq 'b';
 4595 
 4596                 $line_of_tokens->{_level_0}          = $rlevels->[0];
 4597                 $line_of_tokens->{_ci_level_0}       = $rci_levels->[0];
 4598                 $line_of_tokens->{_nesting_blocks_0} = $rnesting_blocks->[0];
 4599                 $line_of_tokens->{_nesting_tokens_0} = $rnesting_tokens->[0];
 4600             } ## end if ( $jmax >= 0 )
 4601 
 4602             $CODE_type =
 4603               $self->get_CODE_type( $line_of_tokens, $Kfirst, $Klimit,
 4604                 $input_line_no );
 4605 
 4606             $tee_output ||=
 4607                  $rOpts_tee_block_comments
 4608               && $jmax == 0
 4609               && $rLL->[$Kfirst]->[_TYPE_] eq '#';
 4610 
 4611             $tee_output ||=
 4612                  $rOpts_tee_side_comments
 4613               && defined($Kfirst)
 4614               && $Klimit > $Kfirst
 4615               && $rLL->[$Klimit]->[_TYPE_] eq '#';
 4616 
 4617             # Handle any requested side comment deletions. It is easier to get
 4618             # this done here rather than farther down the pipeline because IO
 4619             # lines take a different route, and because lines with deleted HSC
 4620             # become BL lines.  An since we are deleting now, we have to also
 4621             # handle any tee- requests before the side comments vanish.
 4622             my $delete_side_comment =
 4623                  $rOpts_delete_side_comments
 4624               && defined($Kfirst)
 4625               && $rLL->[$Klimit]->[_TYPE_] eq '#'
 4626               && ( $Klimit > $Kfirst || $CODE_type eq 'HSC' )
 4627               && (!$CODE_type
 4628                 || $CODE_type eq 'HSC'
 4629                 || $CODE_type eq 'IO'
 4630                 || $CODE_type eq 'NIN' );
 4631 
 4632             if (
 4633                    $rOpts_delete_closing_side_comments
 4634                 && !$delete_side_comment
 4635                 && defined($Kfirst)
 4636                 && $Klimit > $Kfirst
 4637                 && $rLL->[$Klimit]->[_TYPE_] eq '#'
 4638                 && (  !$CODE_type
 4639                     || $CODE_type eq 'HSC'
 4640                     || $CODE_type eq 'IO'
 4641                     || $CODE_type eq 'NIN' )
 4642               )
 4643             {
 4644                 my $token  = $rLL->[$Klimit]->[_TOKEN_];
 4645                 my $K_m    = $Klimit - 1;
 4646                 my $type_m = $rLL->[$K_m]->[_TYPE_];
 4647                 if ( $type_m eq 'b' && $K_m > $Kfirst ) { $K_m-- }
 4648                 my $last_nonblank_block_type = $rLL->[$K_m]->[_BLOCK_TYPE_];
 4649                 if (   $token =~ /$closing_side_comment_prefix_pattern/
 4650                     && $last_nonblank_block_type =~
 4651                     /$closing_side_comment_list_pattern/ )
 4652                 {
 4653                     $delete_side_comment = 1;
 4654                 }
 4655             } ## end if ( $rOpts_delete_closing_side_comments...)
 4656 
 4657             if ($delete_side_comment) {
 4658                 pop @{$rLL};
 4659                 $Klimit -= 1;
 4660                 if (   $Klimit > $Kfirst
 4661                     && $rLL->[$Klimit]->[_TYPE_] eq 'b' )
 4662                 {
 4663                     pop @{$rLL};
 4664                     $Klimit -= 1;
 4665                 }
 4666 
 4667                 # The -io option outputs the line text, so we have to update
 4668                 # the line text so that the comment does not reappear.
 4669                 if ( $CODE_type eq 'IO' ) {
 4670                     my $line = "";
 4671                     foreach my $KK ( $Kfirst .. $Klimit ) {
 4672                         $line .= $rLL->[$KK]->[_TOKEN_];
 4673                     }
 4674                     $line_of_tokens->{_line_text} = $line . "\n";
 4675                 }
 4676 
 4677                 # If we delete a hanging side comment the line becomes blank.
 4678                 if ( $CODE_type eq 'HSC' ) { $CODE_type = 'BL' }
 4679             }
 4680 
 4681         } ## end if ( $line_type eq 'CODE')
 4682 
 4683         # Finish storing line variables
 4684         if ($tee_output) {
 4685             my $fh_tee    = $self->[_fh_tee_];
 4686             my $line_text = $line_of_tokens_old->{_line_text};
 4687             $fh_tee->print($line_text) if ($fh_tee);
 4688         }
 4689 
 4690         $line_of_tokens->{_rK_range}  = [ $Kfirst, $Klimit ];
 4691         $line_of_tokens->{_code_type} = $CODE_type;
 4692         $self->[_Klimit_]             = $Klimit;
 4693         $self->[_maximum_level_]      = $maximum_level;
 4694 
 4695         push @{$rlines_new}, $line_of_tokens;
 4696         return;
 4697     }
 4698 
 4699     sub get_CODE_type {
 4700         my ( $self, $line_of_tokens, $Kfirst, $Klast, $input_line_no ) = @_;
 4701 
 4702         # We are looking at a line of code and setting a flag to
 4703         # describe any special processing that it requires
 4704 
 4705         # Possible CODE_types
 4706         # 'VB'  = Verbatim - line goes out verbatim (a quote)
 4707         # 'FS'  = Format Skipping - line goes out verbatim
 4708         # 'BL'  = Blank Line
 4709         # 'HSC' = Hanging Side Comment - fix this hanging side comment
 4710         # 'SBCX'= Static Block Comment Without Leading Space
 4711         # 'SBC' = Static Block Comment
 4712         # 'BC'  = Block Comment - an ordinary full line comment
 4713         # 'IO'  = Indent Only - line goes out unchanged except for indentation
 4714         # 'NIN' = No Internal Newlines - line does not get broken
 4715         # 'VER' = VERSION statement
 4716         # ''    = ordinary line of code with no restructions
 4717 
 4718         my $rLL = $self->[_rLL_];
 4719 
 4720         my $CODE_type  = "";
 4721         my $input_line = $line_of_tokens->{_line_text};
 4722         my $jmax       = defined($Kfirst) ? $Klast - $Kfirst : -1;
 4723 
 4724         my $is_block_comment = 0;
 4725         my $has_side_comment = 0;
 4726 
 4727         if ( $jmax >= 0 && $rLL->[$Klast]->[_TYPE_] eq '#' ) {
 4728             if   ( $jmax == 0 ) { $is_block_comment = 1; }
 4729             else                { $has_side_comment = 1 }
 4730         }
 4731 
 4732         # Write line verbatim if we are in a formatting skip section
 4733         if ($In_format_skipping_section) {
 4734 
 4735             # Note: extra space appended to comment simplifies pattern matching
 4736             if ( $is_block_comment
 4737                 && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
 4738                 /$format_skipping_pattern_end/ )
 4739             {
 4740                 $In_format_skipping_section = 0;
 4741                 write_logfile_entry(
 4742                     "Line $input_line_no: Exiting format-skipping section\n");
 4743             }
 4744             $CODE_type = 'FS';
 4745             goto RETURN;
 4746         }
 4747 
 4748         # Check for a continued quote..
 4749         if ( $line_of_tokens->{_starting_in_quote} ) {
 4750 
 4751             # A line which is entirely a quote or pattern must go out
 4752             # verbatim.  Note: the \n is contained in $input_line.
 4753             if ( $jmax <= 0 ) {
 4754                 if ( ( $input_line =~ "\t" ) ) {
 4755                     my $input_line_number = $line_of_tokens->{_line_number};
 4756                     $self->note_embedded_tab($input_line_number);
 4757                 }
 4758                 $CODE_type = 'VB';
 4759                 goto RETURN;
 4760             }
 4761         }
 4762 
 4763         # See if we are entering a formatting skip section
 4764         if (   $rOpts_format_skipping
 4765             && $is_block_comment
 4766             && ( $rLL->[$Kfirst]->[_TOKEN_] . " " ) =~
 4767             /$format_skipping_pattern_begin/ )
 4768         {
 4769             $In_format_skipping_section = 1;
 4770             write_logfile_entry(
 4771                 "Line $input_line_no: Entering format-skipping section\n");
 4772             $CODE_type = 'FS';
 4773             goto RETURN;
 4774         }
 4775 
 4776         # ignore trailing blank tokens (they will get deleted later)
 4777         if ( $jmax > 0 && $rLL->[$Klast]->[_TYPE_] eq 'b' ) {
 4778             $jmax--;
 4779         }
 4780 
 4781         # blank line..
 4782         if ( $jmax < 0 ) {
 4783             $CODE_type = 'BL';
 4784             goto RETURN;
 4785         }
 4786 
 4787         # see if this is a static block comment (starts with ## by default)
 4788         my $is_static_block_comment                       = 0;
 4789         my $is_static_block_comment_without_leading_space = 0;
 4790         if (   $is_block_comment
 4791             && $rOpts->{'static-block-comments'}
 4792             && $input_line =~ /$static_block_comment_pattern/ )
 4793         {
 4794             $is_static_block_comment = 1;
 4795             $is_static_block_comment_without_leading_space =
 4796               substr( $input_line, 0, 1 ) eq '#';
 4797         }
 4798 
 4799         # Check for comments which are line directives
 4800         # Treat exactly as static block comments without leading space
 4801         # reference: perlsyn, near end, section Plain Old Comments (Not!)
 4802         # example: '# line 42 "new_filename.plx"'
 4803         if (
 4804                $is_block_comment
 4805             && $input_line =~ /^\#   \s*
 4806                                line \s+ (\d+)   \s*
 4807                                (?:\s("?)([^"]+)\2)? \s*
 4808                                $/x
 4809           )
 4810         {
 4811             $is_static_block_comment                       = 1;
 4812             $is_static_block_comment_without_leading_space = 1;
 4813         }
 4814 
 4815         # look for hanging side comment
 4816         if (
 4817                $is_block_comment
 4818             && $Last_line_had_side_comment  # last line had side comment
 4819             && $input_line =~ /^\s/         # there is some leading space
 4820             && !$is_static_block_comment    # do not make static comment hanging
 4821             && $rOpts->{'hanging-side-comments'}    # user is allowing
 4822                                                     # hanging side comments
 4823                                                     # like this
 4824           )
 4825         {
 4826             $has_side_comment = 1;
 4827             $CODE_type        = 'HSC';
 4828             goto RETURN;
 4829         }
 4830 
 4831         # Handle a block (full-line) comment..
 4832         if ($is_block_comment) {
 4833 
 4834             if ($is_static_block_comment_without_leading_space) {
 4835                 $CODE_type = 'SBCX';
 4836                 goto RETURN;
 4837             }
 4838             elsif ($is_static_block_comment) {
 4839                 $CODE_type = 'SBC';
 4840                 goto RETURN;
 4841             }
 4842             elsif ($Last_line_had_side_comment
 4843                 && !$rOpts_maximum_consecutive_blank_lines
 4844                 && $rLL->[$Kfirst]->[_LEVEL_] > 0 )
 4845             {
 4846                 # Emergency fix to keep a block comment from becoming a hanging
 4847                 # side comment.  This fix is for the case that blank lines
 4848                 # cannot be inserted.  There is related code in sub
 4849                 # 'process_line_of_CODE'
 4850                 $CODE_type = 'SBCX';
 4851                 goto RETURN;
 4852             }
 4853             else {
 4854                 $CODE_type = 'BC';
 4855                 goto RETURN;
 4856             }
 4857         }
 4858 
 4859         # End of comments. Handle a line of normal code:
 4860 
 4861         if ($rOpts_indent_only) {
 4862             $CODE_type = 'IO';
 4863             goto RETURN;
 4864         }
 4865 
 4866         if ( !$rOpts_add_newlines ) {
 4867             $CODE_type = 'NIN';
 4868             goto RETURN;
 4869         }
 4870 
 4871         #   Patch needed for MakeMaker.  Do not break a statement
 4872         #   in which $VERSION may be calculated.  See MakeMaker.pm;
 4873         #   this is based on the coding in it.
 4874         #   The first line of a file that matches this will be eval'd:
 4875         #       /([\$*])(([\w\:\']*)\bVERSION)\b.*\=/
 4876         #   Examples:
 4877         #     *VERSION = \'1.01';
 4878         #     ( $VERSION ) = '$Revision: 1.74 $ ' =~ /\$Revision:\s+([^\s]+)/;
 4879         #   We will pass such a line straight through without breaking
 4880         #   it unless -npvl is used.
 4881 
 4882         #   Patch for problem reported in RT #81866, where files
 4883         #   had been flattened into a single line and couldn't be
 4884         #   tidied without -npvl.  There are two parts to this patch:
 4885         #   First, it is not done for a really long line (80 tokens for now).
 4886         #   Second, we will only allow up to one semicolon
 4887         #   before the VERSION.  We need to allow at least one semicolon
 4888         #   for statements like this:
 4889         #      require Exporter;  our $VERSION = $Exporter::VERSION;
 4890         #   where both statements must be on a single line for MakeMaker
 4891 
 4892         my $is_VERSION_statement = 0;
 4893         if (  !$Saw_VERSION_in_this_file
 4894             && $jmax < 80
 4895             && $input_line =~
 4896             /^[^;]*;?[^;]*([\$*])(([\w\:\']*)\bVERSION)\b.*\=/ )
 4897         {
 4898             $Saw_VERSION_in_this_file = 1;
 4899             write_logfile_entry("passing VERSION line; -npvl deactivates\n");
 4900 
 4901             # This code type has lower priority than others
 4902             $CODE_type = 'VER';
 4903             goto RETURN;
 4904         }
 4905 
 4906       RETURN:
 4907         $Last_line_had_side_comment = $has_side_comment;
 4908         return $CODE_type;
 4909     }
 4910 
 4911 } ## end closure write_line
 4912 
 4913 #############################################
 4914 # CODE SECTION 5: Pre-process the entire file
 4915 #############################################
 4916 
 4917 sub finish_formatting {
 4918 
 4919     my ( $self, $severe_error ) = @_;
 4920 
 4921     # The file has been tokenized and is ready to be formatted.
 4922     # All of the relevant data is stored in $self, ready to go.
 4923 
 4924     # Check the maximum level. If it is extremely large we will
 4925     # give up and output the file verbatim.
 4926     my $maximum_level       = $self->[_maximum_level_];
 4927     my $maximum_table_index = $#maximum_line_length_at_level;
 4928     if ( !$severe_error && $maximum_level > $maximum_table_index ) {
 4929         $severe_error ||= 1;
 4930         Warn(<<EOM);
 4931 The maximum indentation level, $maximum_level, exceeds the builtin limit of $maximum_table_index.
 4932 Something may be wrong; formatting will be skipped. 
 4933 EOM
 4934     }
 4935 
 4936     # output file verbatim if severe error or no formatting requested
 4937     if ( $severe_error || $rOpts->{notidy} ) {
 4938         $self->dump_verbatim();
 4939         $self->wrapup();
 4940         return;
 4941     }
 4942 
 4943     # Update the 'save_logfile' flag based to include any tokenization errors.
 4944     # We can save time by skipping logfile calls if it is not going to be saved.
 4945     my $logger_object = $self->[_logger_object_];
 4946     if ($logger_object) {
 4947         $self->[_save_logfile_] = $logger_object->get_save_logfile();
 4948     }
 4949 
 4950     # Make a pass through all tokens, adding or deleting any whitespace as
 4951     # required.  Also make any other changes, such as adding semicolons.
 4952     # All token changes must be made here so that the token data structure
 4953     # remains fixed for the rest of this iteration.
 4954     $self->respace_tokens();
 4955 
 4956     $self->find_multiline_qw();
 4957 
 4958     $self->keep_old_line_breaks();
 4959 
 4960     # Implement any welding needed for the -wn or -cb options
 4961     $self->weld_containers();
 4962 
 4963     # Locate small nested blocks which should not be broken
 4964     $self->mark_short_nested_blocks();
 4965 
 4966     $self->adjust_indentation_levels();
 4967 
 4968     $self->set_excluded_lp_containers();
 4969 
 4970     # Finishes formatting and write the result to the line sink.
 4971     # Eventually this call should just change the 'rlines' data according to the
 4972     # new line breaks and then return so that we can do an internal iteration
 4973     # before continuing with the next stages of formatting.
 4974     $self->process_all_lines();
 4975 
 4976     # A final routine to tie up any loose ends
 4977     $self->wrapup();
 4978     return;
 4979 }
 4980 
 4981 sub dump_verbatim {
 4982     my $self   = shift;
 4983     my $rlines = $self->[_rlines_];
 4984     foreach my $line ( @{$rlines} ) {
 4985         my $input_line = $line->{_line_text};
 4986         $self->write_unindented_line($input_line);
 4987     }
 4988     return;
 4989 }
 4990 
 4991 my %wU;
 4992 my %wiq;
 4993 my %is_nonlist_keyword;
 4994 my %is_nonlist_type;
 4995 
 4996 BEGIN {
 4997 
 4998     # added 'U' to fix cases b1125 b1126 b1127
 4999     my @q = qw(w U);
 5000     @{wU}{@q} = (1) x scalar(@q);
 5001 
 5002     @q = qw(w i q Q G C Z);
 5003     @{wiq}{@q} = (1) x scalar(@q);
 5004 
 5005     # Parens following these keywords will not be marked as lists. Note that
 5006     # 'for' is not included and is handled separately, by including 'f' in the
 5007     # hash %is_counted_type, since it may or may not be a c-style for loop.
 5008     @q = qw( if elsif unless and or );
 5009     @is_nonlist_keyword{@q} = (1) x scalar(@q);
 5010 
 5011     # Parens following these types will not be marked as lists
 5012     @q = qw( && || );
 5013     @is_nonlist_type{@q} = (1) x scalar(@q);
 5014 
 5015 }
 5016 
 5017 sub respace_tokens {
 5018 
 5019     my $self = shift;
 5020     return if $rOpts->{'indent-only'};
 5021 
 5022     # This routine is called once per file to do as much formatting as possible
 5023     # before new line breaks are set.
 5024 
 5025     # This routine makes all necessary and possible changes to the tokenization
 5026     # after the initial tokenization of the file. This is a tedious routine,
 5027     # but basically it consists of inserting and deleting whitespace between
 5028     # nonblank tokens according to the selected parameters. In a few cases
 5029     # non-space characters are added, deleted or modified.
 5030 
 5031     # The goal of this routine is to create a new token array which only needs
 5032     # the definition of new line breaks and padding to complete formatting.  In
 5033     # a few cases we have to cheat a little to achieve this goal.  In
 5034     # particular, we may not know if a semicolon will be needed, because it
 5035     # depends on how the line breaks go.  To handle this, we include the
 5036     # semicolon as a 'phantom' which can be displayed as normal or as an empty
 5037     # string.
 5038 
 5039     # Method: The old tokens are copied one-by-one, with changes, from the old
 5040     # linear storage array $rLL to a new array $rLL_new.
 5041 
 5042     my $rLL             = $self->[_rLL_];
 5043     my $Klimit_old      = $self->[_Klimit_];
 5044     my $rlines          = $self->[_rlines_];
 5045     my $length_function = $self->[_length_function_];
 5046     my $is_encoded_data = $self->[_is_encoded_data_];
 5047 
 5048     my $rLL_new = [];    # This is the new array
 5049     my $rtoken_vars;
 5050     my $Ktoken_vars;                   # the old K value of $rtoken_vars
 5051     my ( $Kfirst_old, $Klast_old );    # Range of old line
 5052     my $Klast_old_code;                # K of last token if side comment
 5053     my $Kmax = @{$rLL} - 1;
 5054 
 5055     my $CODE_type = "