"Fossies" - the Fresh Open Source Software Archive

Member "install-tl-20200916/tlpkg/tlperl/lib/Pod/Simple.pm" (8 Mar 2018, 53927 Bytes) of package /windows/misc/install-tl.zip:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Perl source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 
    2 require 5;
    3 package Pod::Simple;
    4 use strict;
    5 use Carp ();
    6 BEGIN           { *DEBUG = sub () {0} unless defined &DEBUG }
    7 use integer;
    8 use Pod::Escapes 1.04 ();
    9 use Pod::Simple::LinkSection ();
   10 use Pod::Simple::BlackBox ();
   11 #use utf8;
   12 
   13 use vars qw(
   14   $VERSION @ISA
   15   @Known_formatting_codes  @Known_directives
   16   %Known_formatting_codes  %Known_directives
   17   $NL
   18 );
   19 
   20 @ISA = ('Pod::Simple::BlackBox');
   21 $VERSION = '3.35';
   22 
   23 @Known_formatting_codes = qw(I B C L E F S X Z); 
   24 %Known_formatting_codes = map(($_=>1), @Known_formatting_codes);
   25 @Known_directives       = qw(head1 head2 head3 head4 item over back); 
   26 %Known_directives       = map(($_=>'Plain'), @Known_directives);
   27 $NL = $/ unless defined $NL;
   28 
   29 #-----------------------------------------------------------------------------
   30 # Set up some constants:
   31 
   32 BEGIN {
   33   if(defined &ASCII)    { }
   34   elsif(chr(65) eq 'A') { *ASCII = sub () {1}  }
   35   else                  { *ASCII = sub () {''} }
   36 
   37   unless(defined &MANY_LINES) { *MANY_LINES = sub () {20} }
   38   DEBUG > 4 and print STDERR "MANY_LINES is ", MANY_LINES(), "\n";
   39   unless(MANY_LINES() >= 1) {
   40     die "MANY_LINES is too small (", MANY_LINES(), ")!\nAborting";
   41   }
   42   if(defined &UNICODE) { }
   43   elsif($] >= 5.008)   { *UNICODE = sub() {1}  }
   44   else                 { *UNICODE = sub() {''} }
   45 }
   46 if(DEBUG > 2) {
   47   print STDERR "# We are ", ASCII ? '' : 'not ', "in ASCII-land\n";
   48   print STDERR "# We are under a Unicode-safe Perl.\n";
   49 }
   50 
   51 # The NO BREAK SPACE and SOFT HYHPEN are used in several submodules.
   52 if ($] ge 5.007_003) {  # On sufficiently modern Perls we can handle any
   53                         # character set
   54   $Pod::Simple::nbsp = chr utf8::unicode_to_native(0xA0);
   55   $Pod::Simple::shy  = chr utf8::unicode_to_native(0xAD);
   56 }
   57 elsif (Pod::Simple::ASCII) {  # Hard code ASCII early Perl
   58   $Pod::Simple::nbsp = "\xA0";
   59   $Pod::Simple::shy  = "\xAD";
   60 }
   61 else { # EBCDIC on early Perl.  We know what the values are for the code
   62         # pages supported then.
   63   $Pod::Simple::nbsp = "\x41";
   64   $Pod::Simple::shy  = "\xCA";
   65 }
   66 
   67 # Design note:
   68 # This is a parser for Pod.  It is not a parser for the set of Pod-like
   69 #  languages which happens to contain Pod -- it is just for Pod, plus possibly
   70 #  some extensions.
   71 
   72 # @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @
   73 #@ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @ @
   74 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
   75 
   76 __PACKAGE__->_accessorize(
   77   'nbsp_for_S',        # Whether to map S<...>'s to \xA0 characters
   78   'source_filename',   # Filename of the source, for use in warnings
   79   'source_dead',       # Whether to consider this parser's source dead
   80 
   81   'output_fh',         # The filehandle we're writing to, if applicable.
   82                        # Used only in some derived classes.
   83 
   84   'hide_line_numbers', # For some dumping subclasses: whether to pointedly
   85                        # suppress the start_line attribute
   86 
   87   'line_count',        # the current line number
   88   'pod_para_count',    # count of pod paragraphs seen so far
   89 
   90   'no_whining',        # whether to suppress whining
   91   'no_errata_section', # whether to suppress the errata section
   92   'complain_stderr',   # whether to complain to stderr
   93 
   94   'doc_has_started',   # whether we've fired the open-Document event yet
   95 
   96   'bare_output',       # For some subclasses: whether to prepend
   97                        #  header-code and postpend footer-code
   98 
   99   'keep_encoding_directive',  # whether to emit =encoding
  100   'nix_X_codes',       # whether to ignore X<...> codes
  101   'merge_text',        # whether to avoid breaking a single piece of
  102                        #  text up into several events
  103 
  104   'preserve_whitespace', # whether to try to keep whitespace as-is
  105   'strip_verbatim_indent', # What indent to strip from verbatim
  106 
  107   'parse_characters',  # Whether parser should expect chars rather than octets
  108 
  109  'content_seen',      # whether we've seen any real Pod content
  110  'errors_seen',       # TODO: document.  whether we've seen any errors (fatal or not)
  111 
  112  'codes_in_verbatim', # for PseudoPod extensions
  113 
  114  'code_handler',      # coderef to call when a code (non-pod) line is seen
  115  'cut_handler',       # ... when a =cut line is seen
  116  'pod_handler',       # ... when a =pod line is seen
  117  'whiteline_handler', # ... when a line with only whitespace is seen
  118  #Called like:
  119  # $code_handler->($line, $self->{'line_count'}, $self) if $code_handler;
  120  #  $cut_handler->($line, $self->{'line_count'}, $self) if $cut_handler;
  121  #  $pod_handler->($line, $self->{'line_count'}, $self) if $pod_handler;
  122  #   $wl_handler->($line, $self->{'line_count'}, $self) if $wl_handler;
  123  'parse_empty_lists', # whether to acknowledge empty =over/=back blocks
  124  'raw_mode',          # to report entire raw lines instead of Pod elements
  125 );
  126 
  127 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  128 
  129 sub any_errata_seen {  # good for using as an exit() value...
  130   return shift->{'errors_seen'} || 0;
  131 }
  132 
  133 sub errata_seen {
  134   return shift->{'all_errata'} || {};
  135 }
  136 
  137 # Returns the encoding only if it was recognized as being handled and set
  138 sub detected_encoding {
  139   return shift->{'detected_encoding'};
  140 }
  141 
  142 sub encoding {
  143   my $this = shift;
  144   return $this->{'encoding'} unless @_;  # GET.
  145 
  146   $this->_handle_encoding_line("=encoding $_[0]");
  147   if ($this->{'_processed_encoding'}) {
  148     delete $this->{'_processed_encoding'};
  149     if(! $this->{'encoding_command_statuses'} ) {
  150       DEBUG > 2 and print STDERR " CRAZY ERROR: encoding wasn't really handled?!\n";
  151     } elsif( $this->{'encoding_command_statuses'}[-1] ) {
  152       $this->scream( "=encoding $_[0]",
  153          sprintf "Couldn't do %s: %s",
  154          $this->{'encoding_command_reqs'  }[-1],
  155          $this->{'encoding_command_statuses'}[-1],
  156       );
  157     } else {
  158       DEBUG > 2 and print STDERR " (encoding successfully handled.)\n";
  159     }
  160     return $this->{'encoding'};
  161   } else {
  162     return undef;
  163   }
  164 }
  165 
  166 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  167 # Pull in some functions that, for some reason, I expect to see here too:
  168 BEGIN {
  169   *pretty        = \&Pod::Simple::BlackBox::pretty;
  170   *stringify_lol = \&Pod::Simple::BlackBox::stringify_lol;
  171 }
  172 
  173 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  174 
  175 sub version_report {
  176   my $class = ref($_[0]) || $_[0];
  177   if($class eq __PACKAGE__) {
  178     return "$class $VERSION";
  179   } else {
  180     my $v = $class->VERSION;
  181     return "$class $v (" . __PACKAGE__ . " $VERSION)";
  182   }
  183 }
  184 
  185 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  186 
  187 #sub curr_open { # read-only list accessor
  188 #  return @{ $_[0]{'curr_open'} || return() };
  189 #}
  190 #sub _curr_open_listref { $_[0]{'curr_open'} ||= [] }
  191 
  192 
  193 sub output_string {
  194   # Works by faking out output_fh.  Simplifies our code.
  195   #
  196   my $this = shift;
  197   return $this->{'output_string'} unless @_;  # GET.
  198   
  199   require Pod::Simple::TiedOutFH;
  200   my $x = (defined($_[0]) and ref($_[0])) ? $_[0] : \( $_[0] );
  201   $$x = '' unless defined $$x;
  202   DEBUG > 4 and print STDERR "# Output string set to $x ($$x)\n";
  203   $this->{'output_fh'} = Pod::Simple::TiedOutFH->handle_on($_[0]);
  204   return
  205     $this->{'output_string'} = $_[0];
  206     #${ ${ $this->{'output_fh'} } };
  207 }
  208 
  209 sub abandon_output_string { $_[0]->abandon_output_fh; delete $_[0]{'output_string'} }
  210 sub abandon_output_fh     { $_[0]->output_fh(undef) }
  211 # These don't delete the string or close the FH -- they just delete our
  212 #  references to it/them.
  213 # TODO: document these
  214 
  215 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  216 
  217 sub new {
  218   # takes no parameters
  219   my $class = ref($_[0]) || $_[0];
  220   #Carp::croak(__PACKAGE__ . " is a virtual base class -- see perldoc "
  221   #  . __PACKAGE__ );
  222   return bless {
  223     'accept_codes'      => { map( ($_=>$_), @Known_formatting_codes ) },
  224     'accept_directives' => { %Known_directives },
  225     'accept_targets'    => {},
  226   }, $class;
  227 }
  228 
  229 
  230 
  231 # TODO: an option for whether to interpolate E<...>'s, or just resolve to codes.
  232 
  233 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  234 
  235 sub _handle_element_start {     # OVERRIDE IN DERIVED CLASS
  236   my($self, $element_name, $attr_hash_r) = @_;
  237   return;
  238 }
  239 
  240 sub _handle_element_end {       # OVERRIDE IN DERIVED CLASS
  241   my($self, $element_name) = @_;
  242   return;
  243 }
  244 
  245 sub _handle_text          {     # OVERRIDE IN DERIVED CLASS
  246   my($self, $text) = @_;
  247   return;
  248 }
  249 
  250 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  251 #
  252 # And now directives (not targets)
  253 
  254 sub accept_directive_as_verbatim  { shift->_accept_directives('Verbatim', @_) }
  255 sub accept_directive_as_data      { shift->_accept_directives('Data',     @_) }
  256 sub accept_directive_as_processed { shift->_accept_directives('Plain',    @_) }
  257 
  258 sub _accept_directives {
  259   my($this, $type) = splice @_,0,2;
  260   foreach my $d (@_) {
  261     next unless defined $d and length $d;
  262     Carp::croak "\"$d\" isn't a valid directive name"
  263      unless $d =~ m/^[a-zA-Z][a-zA-Z0-9]*$/s;
  264     Carp::croak "\"$d\" is already a reserved Pod directive name"
  265      if exists $Known_directives{$d};
  266     $this->{'accept_directives'}{$d} = $type;
  267     DEBUG > 2 and print STDERR "Learning to accept \"=$d\" as directive of type $type\n";
  268   }
  269   DEBUG > 6 and print STDERR "$this\'s accept_directives : ",
  270    pretty($this->{'accept_directives'}), "\n";
  271   
  272   return sort keys %{ $this->{'accept_directives'} } if wantarray;
  273   return;
  274 }
  275 
  276 #--------------------------------------------------------------------------
  277 # TODO: document these:
  278 
  279 sub unaccept_directive { shift->unaccept_directives(@_) };
  280 
  281 sub unaccept_directives {
  282   my $this = shift;
  283   foreach my $d (@_) {
  284     next unless defined $d and length $d;
  285     Carp::croak "\"$d\" isn't a valid directive name"
  286      unless $d =~ m/^[a-zA-Z][a-zA-Z0-9]*$/s;
  287     Carp::croak "But you must accept \"$d\" directives -- it's a builtin!"
  288      if exists $Known_directives{$d};
  289     delete $this->{'accept_directives'}{$d};
  290     DEBUG > 2 and print STDERR "OK, won't accept \"=$d\" as directive.\n";
  291   }
  292   return sort keys %{ $this->{'accept_directives'} } if wantarray;
  293   return
  294 }
  295 
  296 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  297 #
  298 # And now targets (not directives)
  299 
  300 sub accept_target         { shift->accept_targets(@_)         } # alias
  301 sub accept_target_as_text { shift->accept_targets_as_text(@_) } # alias
  302 
  303 
  304 sub accept_targets         { shift->_accept_targets('1', @_) }
  305 
  306 sub accept_targets_as_text { shift->_accept_targets('force_resolve', @_) }
  307  # forces them to be processed, even when there's no ":".
  308 
  309 sub _accept_targets {
  310   my($this, $type) = splice @_,0,2;
  311   foreach my $t (@_) {
  312     next unless defined $t and length $t;
  313     # TODO: enforce some limitations on what a target name can be?
  314     $this->{'accept_targets'}{$t} = $type;
  315     DEBUG > 2 and print STDERR "Learning to accept \"$t\" as target of type $type\n";
  316   }    
  317   return sort keys %{ $this->{'accept_targets'} } if wantarray;
  318   return;
  319 }
  320 
  321 #--------------------------------------------------------------------------
  322 sub unaccept_target         { shift->unaccept_targets(@_) }
  323 
  324 sub unaccept_targets {
  325   my $this = shift;
  326   foreach my $t (@_) {
  327     next unless defined $t and length $t;
  328     # TODO: enforce some limitations on what a target name can be?
  329     delete $this->{'accept_targets'}{$t};
  330     DEBUG > 2 and print STDERR "OK, won't accept \"$t\" as target.\n";
  331   }    
  332   return sort keys %{ $this->{'accept_targets'} } if wantarray;
  333   return;
  334 }
  335 
  336 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  337 #
  338 # And now codes (not targets or directives)
  339 
  340 # XXX Probably it is an error that the digit '9' is excluded from these re's.
  341 # Broken for early Perls on EBCDIC
  342 my $xml_name_re = eval "qr/[^-.0-8:A-Z_a-z[:^ascii:]]/";
  343 if (! defined $xml_name_re) {
  344     $xml_name_re = qr/[\x00-\x2C\x2F\x39\x3B-\x40\x5B-\x5E\x60\x7B-\x7F]/;
  345 }
  346 
  347 sub accept_code { shift->accept_codes(@_) } # alias
  348 
  349 sub accept_codes {  # Add some codes
  350   my $this = shift;
  351   
  352   foreach my $new_code (@_) {
  353     next unless defined $new_code and length $new_code;
  354     # A good-enough check that it's good as an XML Name symbol:
  355     Carp::croak "\"$new_code\" isn't a valid element name"
  356       if $new_code =~ $xml_name_re
  357           # Characters under 0x80 that aren't legal in an XML Name.
  358       or $new_code =~ m/^[-\.0-9]/s
  359       or $new_code =~ m/:[-\.0-9]/s;
  360           # The legal under-0x80 Name characters that
  361           #  an XML Name still can't start with.
  362 
  363     $this->{'accept_codes'}{$new_code} = $new_code;
  364 
  365     # Yes, map to itself -- just so that when we
  366     #  see "=extend W [whatever] thatelementname", we say that W maps
  367     #  to whatever $this->{accept_codes}{thatelementname} is,
  368     #  i.e., "thatelementname".  Then when we go re-mapping,
  369     #  a "W" in the treelet turns into "thatelementname".  We only
  370     #  remap once.
  371     # If we say we accept "W", then a "W" in the treelet simply turns
  372     #  into "W".
  373   }
  374   
  375   return;
  376 }
  377 
  378 #--------------------------------------------------------------------------
  379 sub unaccept_code { shift->unaccept_codes(@_) }
  380 
  381 sub unaccept_codes { # remove some codes
  382   my $this = shift;
  383   
  384   foreach my $new_code (@_) {
  385     next unless defined $new_code and length $new_code;
  386     # A good-enough check that it's good as an XML Name symbol:
  387     Carp::croak "\"$new_code\" isn't a valid element name"
  388       if $new_code =~ $xml_name_re
  389           # Characters under 0x80 that aren't legal in an XML Name.
  390       or $new_code =~ m/^[-\.0-9]/s
  391       or $new_code =~ m/:[-\.0-9]/s;
  392           # The legal under-0x80 Name characters that
  393           #  an XML Name still can't start with.
  394 
  395     Carp::croak "But you must accept \"$new_code\" codes -- it's a builtin!"
  396      if grep $new_code eq $_, @Known_formatting_codes;
  397 
  398     delete $this->{'accept_codes'}{$new_code};
  399 
  400     DEBUG > 2 and print STDERR "OK, won't accept the code $new_code<...>.\n";
  401   }
  402   
  403   return;
  404 }
  405 
  406 
  407 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  408 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  409 
  410 sub parse_string_document {
  411   my $self = shift;
  412   my @lines;
  413   foreach my $line_group (@_) {
  414     next unless defined $line_group and length $line_group;
  415     pos($line_group) = 0;
  416     while($line_group =~
  417       m/([^\n\r]*)(\r?\n?)/g # supports \r, \n ,\r\n
  418       #m/([^\n\r]*)((?:\r?\n)?)/g
  419     ) {
  420       #print(">> $1\n"),
  421       $self->parse_lines($1)
  422        if length($1) or length($2)
  423         or pos($line_group) != length($line_group);
  424        # I.e., unless it's a zero-length "empty line" at the very
  425        #  end of "foo\nbar\n" (i.e., between the \n and the EOS).
  426     }
  427   }
  428   $self->parse_lines(undef); # to signal EOF
  429   return $self;
  430 }
  431 
  432 sub _init_fh_source {
  433   my($self, $source) = @_;
  434 
  435   #DEBUG > 1 and print STDERR "Declaring $source as :raw for starters\n";
  436   #$self->_apply_binmode($source, ':raw');
  437   #binmode($source, ":raw");
  438 
  439   return;
  440 }
  441 
  442 #:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.
  443 #
  444 
  445 sub parse_file {
  446   my($self, $source) = (@_);
  447 
  448   if(!defined $source) {
  449     Carp::croak("Can't use empty-string as a source for parse_file");
  450   } elsif(ref(\$source) eq 'GLOB') {
  451     $self->{'source_filename'} = '' . ($source);
  452   } elsif(ref $source) {
  453     $self->{'source_filename'} = '' . ($source);
  454   } elsif(!length $source) {
  455     Carp::croak("Can't use empty-string as a source for parse_file");
  456   } else {
  457     {
  458       local *PODSOURCE;
  459       open(PODSOURCE, "<$source") || Carp::croak("Can't open $source: $!");
  460       $self->{'source_filename'} = $source;
  461       $source = *PODSOURCE{IO};
  462     }
  463     $self->_init_fh_source($source);
  464   }
  465   # By here, $source is a FH.
  466 
  467   $self->{'source_fh'} = $source;
  468 
  469   my($i, @lines);
  470   until( $self->{'source_dead'} ) {
  471     splice @lines;
  472 
  473     for($i = MANY_LINES; $i--;) {  # read those many lines at a time
  474       local $/ = $NL;
  475       push @lines, scalar(<$source>);  # readline
  476       last unless defined $lines[-1];
  477        # but pass thru the undef, which will set source_dead to true
  478     }
  479 
  480     my $at_eof = ! $lines[-1]; # keep track of the undef
  481     pop @lines if $at_eof; # silence warnings
  482 
  483     # be eol agnostic
  484     s/\r\n?/\n/g for @lines;
  485  
  486     # make sure there are only one line elements for parse_lines
  487     @lines = split(/(?<=\n)/, join('', @lines));
  488 
  489     # push the undef back after popping it to set source_dead to true
  490     push @lines, undef if $at_eof;
  491 
  492     $self->parse_lines(@lines);
  493   }
  494   delete($self->{'source_fh'}); # so it can be GC'd
  495   return $self;
  496 }
  497 
  498 #:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.
  499 
  500 sub parse_from_file {
  501   # An emulation of Pod::Parser's interface, for the sake of Perldoc.
  502   # Basically just a wrapper around parse_file.
  503 
  504   my($self, $source, $to) = @_;
  505   $self = $self->new unless ref($self); # so we tolerate being a class method
  506   
  507   if(!defined $source)             { $source = *STDIN{IO}
  508   } elsif(ref(\$source) eq 'GLOB') { # stet
  509   } elsif(ref($source)           ) { # stet
  510   } elsif(!length $source
  511      or $source eq '-' or $source =~ m/^<&(?:STDIN|0)$/i
  512   ) { 
  513     $source = *STDIN{IO};
  514   }
  515 
  516   if(!defined $to) {             $self->output_fh( *STDOUT{IO}   );
  517   } elsif(ref(\$to) eq 'GLOB') { $self->output_fh( $to );
  518   } elsif(ref($to)) {            $self->output_fh( $to );
  519   } elsif(!length $to
  520      or $to eq '-' or $to =~ m/^>&?(?:STDOUT|1)$/i
  521   ) {
  522     $self->output_fh( *STDOUT{IO} );
  523   } elsif($to =~ m/^>&(?:STDERR|2)$/i) {
  524     $self->output_fh( *STDERR{IO} );
  525   } else {
  526     require Symbol;
  527     my $out_fh = Symbol::gensym();
  528     DEBUG and print STDERR "Write-opening to $to\n";
  529     open($out_fh, ">$to")  or  Carp::croak "Can't write-open $to: $!";
  530     binmode($out_fh)
  531      if $self->can('write_with_binmode') and $self->write_with_binmode;
  532     $self->output_fh($out_fh);
  533   }
  534 
  535   return $self->parse_file($source);
  536 }
  537 
  538 #-----------------------------------------------------------------------------
  539 
  540 sub whine {
  541   #my($self,$line,$complaint) = @_;
  542   my $self = shift(@_);
  543   ++$self->{'errors_seen'};
  544   if($self->{'no_whining'}) {
  545     DEBUG > 9 and print STDERR "Discarding complaint (at line $_[0]) $_[1]\n because no_whining is on.\n";
  546     return;
  547   }
  548   push @{$self->{'all_errata'}{$_[0]}}, $_[1];
  549   return $self->_complain_warn(@_) if $self->{'complain_stderr'};
  550   return $self->_complain_errata(@_);
  551 }
  552 
  553 sub scream {    # like whine, but not suppressible
  554   #my($self,$line,$complaint) = @_;
  555   my $self = shift(@_);
  556   ++$self->{'errors_seen'};
  557   push @{$self->{'all_errata'}{$_[0]}}, $_[1];
  558   return $self->_complain_warn(@_) if $self->{'complain_stderr'};
  559   return $self->_complain_errata(@_);
  560 }
  561 
  562 sub _complain_warn {
  563   my($self,$line,$complaint) = @_;
  564   return printf STDERR "%s around line %s: %s\n",
  565     $self->{'source_filename'} || 'Pod input', $line, $complaint;
  566 }
  567 
  568 sub _complain_errata {
  569   my($self,$line,$complaint) = @_;
  570   if( $self->{'no_errata_section'} ) {
  571     DEBUG > 9 and print STDERR "Discarding erratum (at line $line) $complaint\n because no_errata_section is on.\n";
  572   } else {
  573     DEBUG > 9 and print STDERR "Queuing erratum (at line $line) $complaint\n";
  574     push @{$self->{'errata'}{$line}}, $complaint
  575       # for a report to be generated later!
  576   }
  577   return 1;
  578 }
  579 
  580 #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  581 
  582 sub _get_initial_item_type {
  583   # A hack-wrapper here for when you have like "=over\n\n=item 456\n\n"
  584   my($self, $para) = @_;
  585   return $para->[1]{'~type'}  if $para->[1]{'~type'};
  586 
  587   return $para->[1]{'~type'} = 'text'
  588    if join("\n", @{$para}[2 .. $#$para]) =~ m/^\s*(\d+)\.?\s*$/s and $1 ne '1';
  589   # Else fall thru to the general case:
  590   return $self->_get_item_type($para);
  591 }
  592 
  593 
  594 
  595 sub _get_item_type {       # mutates the item!!
  596   my($self, $para) = @_;
  597   return $para->[1]{'~type'} if $para->[1]{'~type'};
  598 
  599 
  600   # Otherwise we haven't yet been to this node.  Maybe alter it...
  601   
  602   my $content = join "\n", @{$para}[2 .. $#$para];
  603 
  604   if($content =~ m/^\s*\*\s*$/s or $content =~ m/^\s*$/s) {
  605     # Like: "=item *", "=item   *   ", "=item"
  606     splice @$para, 2; # so it ends up just being ['=item', { attrhash } ]
  607     $para->[1]{'~orig_content'} = $content;
  608     return $para->[1]{'~type'} = 'bullet';
  609 
  610   } elsif($content =~ m/^\s*\*\s+(.+)/s) {  # tolerance
  611   
  612     # Like: "=item * Foo bar baz";
  613     $para->[1]{'~orig_content'}      = $content;
  614     $para->[1]{'~_freaky_para_hack'} = $1;
  615     DEBUG > 2 and print STDERR " Tolerating $$para[2] as =item *\\n\\n$1\n";
  616     splice @$para, 2; # so it ends up just being ['=item', { attrhash } ]
  617     return $para->[1]{'~type'} = 'bullet';
  618 
  619   } elsif($content =~ m/^\s*(\d+)\.?\s*$/s) {
  620     # Like: "=item 1.", "=item    123412"
  621     
  622     $para->[1]{'~orig_content'} = $content;
  623     $para->[1]{'number'} = $1;  # Yes, stores the number there!
  624 
  625     splice @$para, 2; # so it ends up just being ['=item', { attrhash } ]
  626     return $para->[1]{'~type'} = 'number';
  627     
  628   } else {
  629     # It's anything else.
  630     return $para->[1]{'~type'} = 'text';
  631 
  632   }
  633 }
  634 
  635 #-----------------------------------------------------------------------------
  636 
  637 sub _make_treelet {
  638   my $self = shift;  # and ($para, $start_line)
  639   my $treelet;
  640   if(!@_) {
  641     return [''];
  642   } if(ref $_[0] and ref $_[0][0] and $_[0][0][0] eq '~Top') {
  643     # Hack so we can pass in fake-o pre-cooked paragraphs:
  644     #  just have the first line be a reference to a ['~Top', {}, ...]
  645     # We use this feechure in gen_errata and stuff.
  646 
  647     DEBUG and print STDERR "Applying precooked treelet hack to $_[0][0]\n";
  648     $treelet = $_[0][0];
  649     splice @$treelet, 0, 2;  # lop the top off
  650     return $treelet;
  651   } else {
  652     $treelet = $self->_treelet_from_formatting_codes(@_);
  653   }
  654   
  655   if( $self->_remap_sequences($treelet) ) {
  656     $self->_treat_Zs($treelet);  # Might as well nix these first
  657     $self->_treat_Ls($treelet);  # L has to precede E and S
  658     $self->_treat_Es($treelet);
  659     $self->_treat_Ss($treelet);  # S has to come after E
  660 
  661     $self->_wrap_up($treelet); # Nix X's and merge texties
  662     
  663   } else {
  664     DEBUG and print STDERR "Formatless treelet gets fast-tracked.\n";
  665      # Very common case!
  666   }
  667   
  668   splice @$treelet, 0, 2;  # lop the top off
  669 
  670   return $treelet;
  671 }
  672 
  673 #:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.
  674 
  675 sub _wrap_up {
  676   my($self, @stack) = @_;
  677   my $nixx  = $self->{'nix_X_codes'};
  678   my $merge = $self->{'merge_text' };
  679   return unless $nixx or $merge;
  680 
  681   DEBUG > 2 and print STDERR "\nStarting _wrap_up traversal.\n",
  682    $merge ? (" Merge mode on\n") : (),
  683    $nixx  ? (" Nix-X mode on\n") : (),
  684   ;    
  685   
  686 
  687   my($i, $treelet);
  688   while($treelet = shift @stack) {
  689     DEBUG > 3 and print STDERR " Considering children of this $treelet->[0] node...\n";
  690     for($i = 2; $i < @$treelet; ++$i) { # iterate over children
  691       DEBUG > 3 and print STDERR " Considering child at $i ", pretty($treelet->[$i]), "\n";
  692       if($nixx and ref $treelet->[$i] and $treelet->[$i][0] eq 'X') {
  693         DEBUG > 3 and print STDERR "   Nixing X node at $i\n";
  694         splice(@$treelet, $i, 1); # just nix this node (and its descendants)
  695         # no need to back-update the counter just yet
  696         redo;
  697 
  698       } elsif($merge and $i != 2 and  # non-initial
  699          !ref $treelet->[$i] and !ref $treelet->[$i - 1]
  700       ) {
  701         DEBUG > 3 and print STDERR "   Merging ", $i-1,
  702          ":[$treelet->[$i-1]] and $i\:[$treelet->[$i]]\n";
  703         $treelet->[$i-1] .= ( splice(@$treelet, $i, 1) )[0];
  704         DEBUG > 4 and print STDERR "    Now: ", $i-1, ":[$treelet->[$i-1]]\n";
  705         --$i;
  706         next; 
  707         # since we just pulled the possibly last node out from under
  708         #  ourselves, we can't just redo()
  709 
  710       } elsif( ref $treelet->[$i] ) {
  711         DEBUG > 4 and print STDERR "  Enqueuing ", pretty($treelet->[$i]), " for traversal.\n";
  712         push @stack, $treelet->[$i];
  713 
  714         if($treelet->[$i][0] eq 'L') {
  715           my $thing;
  716           foreach my $attrname ('section', 'to') {        
  717             if(defined($thing = $treelet->[$i][1]{$attrname}) and ref $thing) {
  718               unshift @stack, $thing;
  719               DEBUG > 4 and print STDERR "  +Enqueuing ",
  720                pretty( $treelet->[$i][1]{$attrname} ),
  721                " as an attribute value to tweak.\n";
  722             }
  723           }
  724         }
  725       }
  726     }
  727   }
  728   DEBUG > 2 and print STDERR "End of _wrap_up traversal.\n\n";
  729 
  730   return;
  731 }
  732 
  733 #:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.
  734 
  735 sub _remap_sequences {
  736   my($self,@stack) = @_;
  737   
  738   if(@stack == 1 and @{ $stack[0] } == 3 and !ref $stack[0][2]) {
  739     # VERY common case: abort it.
  740     DEBUG and print STDERR "Skipping _remap_sequences: formatless treelet.\n";
  741     return 0;
  742   }
  743   
  744   my $map = ($self->{'accept_codes'} || die "NO accept_codes in $self?!?");
  745 
  746   my $start_line = $stack[0][1]{'start_line'};
  747   DEBUG > 2 and printf
  748    "\nAbout to start _remap_sequences on treelet from line %s.\n",
  749    $start_line || '[?]'
  750   ;
  751   DEBUG > 3 and print STDERR " Map: ",
  752     join('; ', map "$_=" . (
  753         ref($map->{$_}) ? join(",", @{$map->{$_}}) : $map->{$_}
  754       ),
  755       sort keys %$map ),
  756     ("B~C~E~F~I~L~S~X~Z" eq join '~', sort keys %$map)
  757      ? "  (all normal)\n" : "\n"
  758   ;
  759 
  760   # A recursive algorithm implemented iteratively!  Whee!
  761   
  762   my($is, $was, $i, $treelet); # scratch
  763   while($treelet = shift @stack) {
  764     DEBUG > 3 and print STDERR " Considering children of this $treelet->[0] node...\n";
  765     for($i = 2; $i < @$treelet; ++$i) { # iterate over children
  766       next unless ref $treelet->[$i];  # text nodes are uninteresting
  767       
  768       DEBUG > 4 and print STDERR "  Noting child $i : $treelet->[$i][0]<...>\n";
  769       
  770       $is = $treelet->[$i][0] = $map->{ $was = $treelet->[$i][0] };
  771       if( DEBUG > 3 ) {
  772         if(!defined $is) {
  773           print STDERR "   Code $was<> is UNKNOWN!\n";
  774         } elsif($is eq $was) {
  775           DEBUG > 4 and print STDERR "   Code $was<> stays the same.\n";
  776         } else  {
  777           print STDERR "   Code $was<> maps to ",
  778            ref($is)
  779             ? ( "tags ", map("$_<", @$is), '...', map('>', @$is), "\n" )
  780             : "tag $is<...>.\n";
  781         }
  782       }
  783       
  784       if(!defined $is) {
  785         $self->whine($start_line, "Deleting unknown formatting code $was<>");
  786         $is = $treelet->[$i][0] = '1';  # But saving the children!
  787         # I could also insert a leading "$was<" and tailing ">" as
  788         # children of this node, but something about that seems icky.
  789       }
  790       if(ref $is) {
  791         my @dynasty = @$is;
  792         DEBUG > 4 and print STDERR "    Renaming $was node to $dynasty[-1]\n";
  793         $treelet->[$i][0] = pop @dynasty;
  794         my $nugget;
  795         while(@dynasty) {
  796           DEBUG > 4 and printf
  797            "    Grafting a new %s node between %s and %s\n",
  798            $dynasty[-1], $treelet->[0], $treelet->[$i][0], 
  799           ;
  800           
  801           #$nugget = ;
  802           splice @$treelet, $i, 1, [pop(@dynasty), {}, $treelet->[$i]];
  803             # relace node with a new parent
  804         }
  805       } elsif($is eq '0') {
  806         splice(@$treelet, $i, 1); # just nix this node (and its descendants)
  807         --$i;  # back-update the counter
  808       } elsif($is eq '1') {
  809         splice(@$treelet, $i, 1 # replace this node with its children!
  810           => splice @{ $treelet->[$i] },2
  811               # (not catching its first two (non-child) items)
  812         );
  813         --$i;  # back up for new stuff
  814       } else {
  815         # otherwise it's unremarkable
  816         unshift @stack, $treelet->[$i];  # just recurse
  817       }
  818     }
  819   }
  820   
  821   DEBUG > 2 and print STDERR "End of _remap_sequences traversal.\n\n";
  822 
  823   if(@_ == 2 and @{ $_[1] } == 3 and !ref $_[1][2]) {
  824     DEBUG and print STDERR "Noting that the treelet is now formatless.\n";
  825     return 0;
  826   }
  827   return 1;
  828 }
  829 
  830 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
  831 
  832 sub _ponder_extend {
  833 
  834   # "Go to an extreme, move back to a more comfortable place"
  835   #  -- /Oblique Strategies/,  Brian Eno and Peter Schmidt
  836   
  837   my($self, $para) = @_;
  838   my $content = join ' ', splice @$para, 2;
  839   $content =~ s/^\s+//s;
  840   $content =~ s/\s+$//s;
  841 
  842   DEBUG > 2 and print STDERR "Ogling extensor: =extend $content\n";
  843 
  844   if($content =~
  845     m/^
  846       (\S+)         # 1 : new item
  847       \s+
  848       (\S+)         # 2 : fallback(s)
  849       (?:\s+(\S+))? # 3 : element name(s)
  850       \s*
  851       $
  852     /xs
  853   ) {
  854     my $new_letter = $1;
  855     my $fallbacks_one = $2;
  856     my $elements_one;
  857     $elements_one = defined($3) ? $3 : $1;
  858 
  859     DEBUG > 2 and print STDERR "Extensor has good syntax.\n";
  860 
  861     unless($new_letter =~ m/^[A-Z]$/s or $new_letter) {
  862       DEBUG > 2 and print STDERR " $new_letter isn't a valid thing to entend.\n";
  863       $self->whine(
  864         $para->[1]{'start_line'},
  865         "You can extend only formatting codes A-Z, not like \"$new_letter\""
  866       );
  867       return;
  868     }
  869     
  870     if(grep $new_letter eq $_, @Known_formatting_codes) {
  871       DEBUG > 2 and print STDERR " $new_letter isn't a good thing to extend, because known.\n";
  872       $self->whine(
  873         $para->[1]{'start_line'},
  874         "You can't extend an established code like \"$new_letter\""
  875       );
  876       
  877       #TODO: or allow if last bit is same?
  878       
  879       return;
  880     }
  881 
  882     unless($fallbacks_one =~ m/^[A-Z](,[A-Z])*$/s  # like "B", "M,I", etc.
  883       or $fallbacks_one eq '0' or $fallbacks_one eq '1'
  884     ) {
  885       $self->whine(
  886         $para->[1]{'start_line'},
  887         "Format for second =extend parameter must be like"
  888         . " M or 1 or 0 or M,N or M,N,O but you have it like "
  889         . $fallbacks_one
  890       );
  891       return;
  892     }
  893     
  894     unless($elements_one =~ m/^[^ ,]+(,[^ ,]+)*$/s) { # like "B", "M,I", etc.
  895       $self->whine(
  896         $para->[1]{'start_line'},
  897         "Format for third =extend parameter: like foo or bar,Baz,qu:ux but not like "
  898         . $elements_one
  899       );
  900       return;
  901     }
  902 
  903     my @fallbacks  = split ',', $fallbacks_one,  -1;
  904     my @elements   = split ',', $elements_one, -1;
  905 
  906     foreach my $f (@fallbacks) {
  907       next if exists $Known_formatting_codes{$f} or $f eq '0' or $f eq '1';
  908       DEBUG > 2 and print STDERR "  Can't fall back on unknown code $f\n";
  909       $self->whine(
  910         $para->[1]{'start_line'},
  911         "Can't use unknown formatting code '$f' as a fallback for '$new_letter'"
  912       );
  913       return;
  914     }
  915 
  916     DEBUG > 3 and printf STDERR "Extensor: Fallbacks <%s> Elements <%s>.\n",
  917      @fallbacks, @elements;
  918 
  919     my $canonical_form;
  920     foreach my $e (@elements) {
  921       if(exists $self->{'accept_codes'}{$e}) {
  922         DEBUG > 1 and print STDERR " Mapping '$new_letter' to known extension '$e'\n";
  923         $canonical_form = $e;
  924         last; # first acceptable elementname wins!
  925       } else {
  926         DEBUG > 1 and print STDERR " Can't map '$new_letter' to unknown extension '$e'\n";
  927       }
  928     }
  929 
  930 
  931     if( defined $canonical_form ) {
  932       # We found a good N => elementname mapping
  933       $self->{'accept_codes'}{$new_letter} = $canonical_form;
  934       DEBUG > 2 and print
  935        "Extensor maps $new_letter => known element $canonical_form.\n";
  936     } else {
  937       # We have to use the fallback(s), which might be '0', or '1'.
  938       $self->{'accept_codes'}{$new_letter}
  939         = (@fallbacks == 1) ? $fallbacks[0] : \@fallbacks;
  940       DEBUG > 2 and print
  941        "Extensor maps $new_letter => fallbacks @fallbacks.\n";
  942     }
  943 
  944   } else {
  945     DEBUG > 2 and print STDERR "Extensor has bad syntax.\n";
  946     $self->whine(
  947       $para->[1]{'start_line'},
  948       "Unknown =extend syntax: $content"
  949     )
  950   }
  951   return;
  952 }
  953 
  954 
  955 #:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.:.
  956 
  957 sub _treat_Zs {  # Nix Z<...>'s
  958   my($self,@stack) = @_;
  959 
  960   my($i, $treelet);
  961   my $start_line = $stack[0][1]{'start_line'};
  962 
  963   # A recursive algorithm implemented iteratively!  Whee!
  964 
  965   while($treelet = shift @stack) {
  966     for($i = 2; $i < @$treelet; ++$i) { # iterate over children
  967       next unless ref $treelet->[$i];  # text nodes are uninteresting
  968       unless($treelet->[$i][0] eq 'Z') {
  969         unshift @stack, $treelet->[$i]; # recurse
  970         next;
  971       }
  972         
  973       DEBUG > 1 and print STDERR "Nixing Z node @{$treelet->[$i]}\n";
  974         
  975       # bitch UNLESS it's empty
  976       unless(  @{$treelet->[$i]} == 2
  977            or (@{$treelet->[$i]} == 3 and $treelet->[$i][2] eq '')
  978       ) {
  979         $self->whine( $start_line, "A non-empty Z<>" );
  980       }      # but kill it anyway
  981         
  982       splice(@$treelet, $i, 1); # thereby just nix this node.
  983       --$i;
  984         
  985     }
  986   }
  987   
  988   return;
  989 }
  990 
  991 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
  992 
  993 # Quoting perlpodspec:
  994 
  995 # In parsing an L<...> code, Pod parsers must distinguish at least four
  996 # attributes:
  997 
  998 ############# Not used.  Expressed via the element children plus
  999 #############  the value of the "content-implicit" flag.
 1000 # First:
 1001 # The link-text. If there is none, this must be undef. (E.g., in "L<Perl
 1002 # Functions|perlfunc>", the link-text is "Perl Functions". In
 1003 # "L<Time::HiRes>" and even "L<|Time::HiRes>", there is no link text. Note
 1004 # that link text may contain formatting.)
 1005 # 
 1006 
 1007 ############# The element children
 1008 # Second:
 1009 # The possibly inferred link-text -- i.e., if there was no real link text,
 1010 # then this is the text that we'll infer in its place. (E.g., for
 1011 # "L<Getopt::Std>", the inferred link text is "Getopt::Std".)
 1012 #
 1013 
 1014 ############# The "to" attribute (which might be text, or a treelet)
 1015 # Third:
 1016 # The name or URL, or undef if none. (E.g., in "L<Perl
 1017 # Functions|perlfunc>", the name -- also sometimes called the page -- is
 1018 # "perlfunc". In "L</CAVEATS>", the name is undef.)
 1019 # 
 1020 
 1021 ############# The "section" attribute (which might be next, or a treelet)
 1022 # Fourth:
 1023 # The section (AKA "item" in older perlpods), or undef if none. E.g., in
 1024 # Getopt::Std/DESCRIPTION, "DESCRIPTION" is the section. (Note that this
 1025 # is not the same as a manpage section like the "5" in "man 5 crontab".
 1026 # "Section Foo" in the Pod sense means the part of the text that's
 1027 # introduced by the heading or item whose text is "Foo".)
 1028 # 
 1029 # Pod parsers may also note additional attributes including:
 1030 #
 1031 
 1032 ############# The "type" attribute.
 1033 # Fifth:
 1034 # A flag for whether item 3 (if present) is a URL (like
 1035 # "http://lists.perl.org" is), in which case there should be no section
 1036 # attribute; a Pod name (like "perldoc" and "Getopt::Std" are); or
 1037 # possibly a man page name (like "crontab(5)" is).
 1038 #
 1039 
 1040 ############# The "raw" attribute that is already there.
 1041 # Sixth:
 1042 # The raw original L<...> content, before text is split on "|", "/", etc,
 1043 # and before E<...> codes are expanded.
 1044 
 1045 
 1046 # For L<...> codes without a "name|" part, only E<...> and Z<> codes may
 1047 # occur -- no other formatting codes. That is, authors should not use
 1048 # "L<B<Foo::Bar>>".
 1049 #
 1050 # Note, however, that formatting codes and Z<>'s can occur in any and all
 1051 # parts of an L<...> (i.e., in name, section, text, and url).
 1052 
 1053 sub _treat_Ls {  # Process our dear dear friends, the L<...> sequences
 1054 
 1055   # L<name>
 1056   # L<name/"sec"> or L<name/sec>
 1057   # L</"sec"> or L</sec> or L<"sec">
 1058   # L<text|name>
 1059   # L<text|name/"sec"> or L<text|name/sec>
 1060   # L<text|/"sec"> or L<text|/sec> or L<text|"sec">
 1061   # L<scheme:...>
 1062   # L<text|scheme:...>
 1063 
 1064   my($self,@stack) = @_;
 1065 
 1066   my($i, $treelet);
 1067   my $start_line = $stack[0][1]{'start_line'};
 1068 
 1069   # A recursive algorithm implemented iteratively!  Whee!
 1070 
 1071   while($treelet = shift @stack) {
 1072     for(my $i = 2; $i < @$treelet; ++$i) {
 1073       # iterate over children of current tree node
 1074       next unless ref $treelet->[$i];  # text nodes are uninteresting
 1075       unless($treelet->[$i][0] eq 'L') {
 1076         unshift @stack, $treelet->[$i]; # recurse
 1077         next;
 1078       }
 1079       
 1080       
 1081       # By here, $treelet->[$i] is definitely an L node
 1082       my $ell = $treelet->[$i];
 1083       DEBUG > 1 and print STDERR "Ogling L node $ell\n";
 1084         
 1085       # bitch if it's empty
 1086       if(  @{$ell} == 2
 1087        or (@{$ell} == 3 and $ell->[2] eq '')
 1088       ) {
 1089         $self->whine( $start_line, "An empty L<>" );
 1090         $treelet->[$i] = 'L<>';  # just make it a text node
 1091         next;  # and move on
 1092       }
 1093 
 1094       if( (! ref $ell->[2]  && $ell->[2] =~ /\A\s/)
 1095         ||(! ref $ell->[-1] && $ell->[-1] =~ /\s\z/)
 1096       ) {
 1097         $self->whine( $start_line, "L<> starts or ends with whitespace" );
 1098       }
 1099      
 1100       # Catch URLs:
 1101 
 1102       # there are a number of possible cases:
 1103       # 1) text node containing url: http://foo.com
 1104       #   -> [ 'http://foo.com' ]
 1105       # 2) text node containing url and text: foo|http://foo.com
 1106       #   -> [ 'foo|http://foo.com' ]
 1107       # 3) text node containing url start: mailto:xE<at>foo.com
 1108       #   -> [ 'mailto:x', [ E ... ], 'foo.com' ]
 1109       # 4) text node containing url start and text: foo|mailto:xE<at>foo.com
 1110       #   -> [ 'foo|mailto:x', [ E ... ], 'foo.com' ]
 1111       # 5) other nodes containing text and url start: OE<39>Malley|http://foo.com
 1112       #   -> [ 'O', [ E ... ], 'Malley', '|http://foo.com' ]
 1113       # ... etc.
 1114 
 1115       # anything before the url is part of the text.
 1116       # anything after it is part of the url.
 1117       # the url text node itself may contain parts of both.
 1118 
 1119       if (my ($url_index, $text_part, $url_part) =
 1120         # grep is no good here; we want to bail out immediately so that we can
 1121         # use $1, $2, etc. without having to do the match twice.
 1122         sub {
 1123           for (2..$#$ell) {
 1124             next if ref $ell->[$_];
 1125             next unless $ell->[$_] =~ m/^(?:([^|]*)\|)?(\w+:[^:\s]\S*)$/s;
 1126             return ($_, $1, $2);
 1127           }
 1128           return;
 1129         }->()
 1130       ) {
 1131         $ell->[1]{'type'} = 'url';
 1132 
 1133         my @text = @{$ell}[2..$url_index-1];
 1134         push @text, $text_part if defined $text_part;
 1135 
 1136         my @url  = @{$ell}[$url_index+1..$#$ell];
 1137         unshift @url, $url_part;
 1138 
 1139         unless (@text) {
 1140           $ell->[1]{'content-implicit'} = 'yes';
 1141           @text = @url;
 1142         }
 1143 
 1144         $ell->[1]{to} = Pod::Simple::LinkSection->new(
 1145           @url == 1
 1146           ? $url[0]
 1147           : [ '', {}, @url ],
 1148         );
 1149 
 1150         splice @$ell, 2, $#$ell, @text;
 1151 
 1152         next;
 1153       }
 1154       
 1155       # Catch some very simple and/or common cases
 1156       if(@{$ell} == 3 and ! ref $ell->[2]) {
 1157         my $it = $ell->[2];
 1158         if($it =~ m{^[^/|]+[(][-a-zA-Z0-9]+[)]$}s) { # man sections
 1159           # Hopefully neither too broad nor too restrictive a RE
 1160           DEBUG > 1 and print STDERR "Catching \"$it\" as manpage link.\n";
 1161           $ell->[1]{'type'} = 'man';
 1162           # This's the only place where man links can get made.
 1163           $ell->[1]{'content-implicit'} = 'yes';
 1164           $ell->[1]{'to'  } =
 1165             Pod::Simple::LinkSection->new( $it ); # treelet!
 1166 
 1167           next;
 1168         }
 1169         if($it =~ m/^[^\/\|,\$\%\@\ \"\<\>\:\#\&\*\{\}\[\]\(\)]+(\:\:[^\/\|,\$\%\@\ \"\<\>\:\#\&\*\{\}\[\]\(\)]+)*$/s) {
 1170           # Extremely forgiving idea of what constitutes a bare
 1171           #  modulename link like L<Foo::Bar> or even L<Thing::1.0::Docs::Tralala>
 1172           DEBUG > 1 and print STDERR "Catching \"$it\" as ho-hum L<Modulename> link.\n";
 1173           $ell->[1]{'type'} = 'pod';
 1174           $ell->[1]{'content-implicit'} = 'yes';
 1175           $ell->[1]{'to'  } =
 1176             Pod::Simple::LinkSection->new( $it ); # treelet!
 1177           next;
 1178         }
 1179         # else fall thru...
 1180       }
 1181       
 1182       
 1183 
 1184       # ...Uhoh, here's the real L<...> parsing stuff...
 1185       # "With the ill behavior, with the ill behavior, with the ill behavior..."
 1186 
 1187       DEBUG > 1 and print STDERR "Running a real parse on this non-trivial L\n";
 1188       
 1189       
 1190       my $link_text; # set to an arrayref if found
 1191       my @ell_content = @$ell;
 1192       splice @ell_content,0,2; # Knock off the 'L' and {} bits
 1193 
 1194       DEBUG > 3 and print STDERR " Ell content to start: ",
 1195        pretty(@ell_content), "\n";
 1196 
 1197 
 1198       # Look for the "|" -- only in CHILDREN (not all underlings!)
 1199       # Like L<I like the strictness|strict>
 1200       DEBUG > 3 and
 1201          print STDERR "  Peering at L content for a '|' ...\n";
 1202       for(my $j = 0; $j < @ell_content; ++$j) {
 1203         next if ref $ell_content[$j];
 1204         DEBUG > 3 and
 1205          print STDERR "    Peering at L-content text bit \"$ell_content[$j]\" for a '|'.\n";
 1206 
 1207         if($ell_content[$j] =~ m/^([^\|]*)\|(.*)$/s) {
 1208           my @link_text = ($1);   # might be 0-length
 1209           $ell_content[$j] = $2;  # might be 0-length
 1210 
 1211           DEBUG > 3 and
 1212            print STDERR "     FOUND a '|' in it.  Splitting into [$1] + [$2]\n";
 1213 
 1214           if ($link_text[0] =~ m{[|/]}) {
 1215             $self->whine(
 1216               $start_line,
 1217               "alternative text '$link_text[0]' contains non-escaped | or /"
 1218             );
 1219           }
 1220 
 1221           unshift @link_text, splice @ell_content, 0, $j;
 1222             # leaving only things at J and after
 1223           @ell_content =  grep ref($_)||length($_), @ell_content ;
 1224           $link_text   = [grep ref($_)||length($_), @link_text  ];
 1225           DEBUG > 3 and printf
 1226            "  So link text is %s\n  and remaining ell content is %s\n",
 1227             pretty($link_text), pretty(@ell_content);
 1228           last;
 1229         }
 1230       }
 1231       
 1232       
 1233       # Now look for the "/" -- only in CHILDREN (not all underlings!)
 1234       # And afterward, anything left in @ell_content will be the raw name
 1235       # Like L<Foo::Bar/Object Methods>
 1236       my $section_name;  # set to arrayref if found
 1237       DEBUG > 3 and print STDERR "  Peering at L-content for a '/' ...\n";
 1238       for(my $j = 0; $j < @ell_content; ++$j) {
 1239         next if ref $ell_content[$j];
 1240         DEBUG > 3 and
 1241          print STDERR "    Peering at L-content text bit \"$ell_content[$j]\" for a '/'.\n";
 1242 
 1243         if($ell_content[$j] =~ m/^([^\/]*)\/(.*)$/s) {
 1244           my @section_name = ($2); # might be 0-length
 1245           $ell_content[$j] =  $1;  # might be 0-length
 1246 
 1247           DEBUG > 3 and
 1248            print STDERR "     FOUND a '/' in it.",
 1249              "  Splitting to page [...$1] + section [$2...]\n";
 1250 
 1251           push @section_name, splice @ell_content, 1+$j;
 1252             # leaving only things before and including J
 1253           
 1254           @ell_content  = grep ref($_)||length($_), @ell_content  ;
 1255           @section_name = grep ref($_)||length($_), @section_name ;
 1256 
 1257           # Turn L<.../"foo"> into L<.../foo>
 1258           if(@section_name
 1259             and !ref($section_name[0]) and !ref($section_name[-1])
 1260             and $section_name[ 0] =~ m/^\"/s
 1261             and $section_name[-1] =~ m/\"$/s
 1262             and !( # catch weird degenerate case of L<"> !
 1263               @section_name == 1 and $section_name[0] eq '"'
 1264             )
 1265           ) {
 1266             $section_name[ 0] =~ s/^\"//s;
 1267             $section_name[-1] =~ s/\"$//s;
 1268             DEBUG > 3 and
 1269              print STDERR "     Quotes removed: ", pretty(@section_name), "\n";
 1270           } else {
 1271             DEBUG > 3 and
 1272              print STDERR "     No need to remove quotes in ", pretty(@section_name), "\n";
 1273           }
 1274 
 1275           $section_name = \@section_name;
 1276           last;
 1277         }
 1278       }
 1279 
 1280       # Turn L<"Foo Bar"> into L</Foo Bar>
 1281       if(!$section_name and @ell_content
 1282          and !ref($ell_content[0]) and !ref($ell_content[-1])
 1283          and $ell_content[ 0] =~ m/^\"/s
 1284          and $ell_content[-1] =~ m/\"$/s
 1285          and !( # catch weird degenerate case of L<"> !
 1286            @ell_content == 1 and $ell_content[0] eq '"'
 1287          )
 1288       ) {
 1289         $section_name = [splice @ell_content];
 1290         $section_name->[ 0] =~ s/^\"//s;
 1291         $section_name->[-1] =~ s/\"$//s;
 1292       }
 1293 
 1294       # Turn L<Foo Bar> into L</Foo Bar>.
 1295       if(!$section_name and !$link_text and @ell_content
 1296          and grep !ref($_) && m/ /s, @ell_content
 1297       ) {
 1298         $section_name = [splice @ell_content];
 1299         # That's support for the now-deprecated syntax.
 1300         # (Maybe generate a warning eventually?)
 1301         # Note that it deliberately won't work on L<...|Foo Bar>
 1302       }
 1303 
 1304 
 1305       # Now make up the link_text
 1306       # L<Foo>     -> L<Foo|Foo>
 1307       # L</Bar>    -> L<"Bar"|Bar>
 1308       # L<Foo/Bar> -> L<"Bar" in Foo/Foo>
 1309       unless($link_text) {
 1310         $ell->[1]{'content-implicit'} = 'yes';
 1311         $link_text = [];
 1312         push @$link_text, '"', @$section_name, '"' if $section_name;
 1313 
 1314         if(@ell_content) {
 1315           $link_text->[-1] .= ' in ' if $section_name;
 1316           push @$link_text, @ell_content;
 1317         }
 1318       }
 1319 
 1320 
 1321       # And the E resolver will have to deal with all our treeletty things:
 1322 
 1323       if(@ell_content == 1 and !ref($ell_content[0])
 1324          and $ell_content[0] =~ m{^[^/]+[(][-a-zA-Z0-9]+[)]$}s
 1325       ) {
 1326         $ell->[1]{'type'}    = 'man';
 1327         DEBUG > 3 and print STDERR "Considering this ($ell_content[0]) a man link.\n";
 1328       } else {
 1329         $ell->[1]{'type'}    = 'pod';
 1330         DEBUG > 3 and print STDERR "Considering this a pod link (not man or url).\n";
 1331       }
 1332 
 1333       if( defined $section_name ) {
 1334         $ell->[1]{'section'} = Pod::Simple::LinkSection->new(
 1335           ['', {}, @$section_name]
 1336         );
 1337         DEBUG > 3 and print STDERR "L-section content: ", pretty($ell->[1]{'section'}), "\n";
 1338       }
 1339 
 1340       if( @ell_content ) {
 1341         $ell->[1]{'to'} = Pod::Simple::LinkSection->new(
 1342           ['', {}, @ell_content]
 1343         );
 1344         DEBUG > 3 and print STDERR "L-to content: ", pretty($ell->[1]{'to'}), "\n";
 1345       }
 1346       
 1347       # And update children to be the link-text:
 1348       @$ell = (@$ell[0,1], defined($link_text) ? splice(@$link_text) : '');
 1349       
 1350       DEBUG > 2 and print STDERR "End of L-parsing for this node $treelet->[$i]\n";
 1351 
 1352       unshift @stack, $treelet->[$i]; # might as well recurse
 1353     }
 1354   }
 1355 
 1356   return;
 1357 }
 1358 
 1359 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
 1360 
 1361 sub _treat_Es {
 1362   my($self,@stack) = @_;
 1363 
 1364   my($i, $treelet, $content, $replacer, $charnum);
 1365   my $start_line = $stack[0][1]{'start_line'};
 1366 
 1367   # A recursive algorithm implemented iteratively!  Whee!
 1368 
 1369 
 1370   # Has frightening side effects on L nodes' attributes.
 1371 
 1372   #my @ells_to_tweak;
 1373 
 1374   while($treelet = shift @stack) {
 1375     for(my $i = 2; $i < @$treelet; ++$i) { # iterate over children
 1376       next unless ref $treelet->[$i];  # text nodes are uninteresting
 1377       if($treelet->[$i][0] eq 'L') {
 1378         # SPECIAL STUFF for semi-processed L<>'s
 1379         
 1380         my $thing;
 1381         foreach my $attrname ('section', 'to') {        
 1382           if(defined($thing = $treelet->[$i][1]{$attrname}) and ref $thing) {
 1383             unshift @stack, $thing;
 1384             DEBUG > 2 and print STDERR "  Enqueuing ",
 1385              pretty( $treelet->[$i][1]{$attrname} ),
 1386              " as an attribute value to tweak.\n";
 1387           }
 1388         }
 1389         
 1390         unshift @stack, $treelet->[$i]; # recurse
 1391         next;
 1392       } elsif($treelet->[$i][0] ne 'E') {
 1393         unshift @stack, $treelet->[$i]; # recurse
 1394         next;
 1395       }
 1396       
 1397       DEBUG > 1 and print STDERR "Ogling E node ", pretty($treelet->[$i]), "\n";
 1398 
 1399       # bitch if it's empty
 1400       if(  @{$treelet->[$i]} == 2
 1401        or (@{$treelet->[$i]} == 3 and $treelet->[$i][2] eq '')
 1402       ) {
 1403         $self->whine( $start_line, "An empty E<>" );
 1404         $treelet->[$i] = 'E<>'; # splice in a literal
 1405         next;
 1406       }
 1407         
 1408       # bitch if content is weird
 1409       unless(@{$treelet->[$i]} == 3 and !ref($content = $treelet->[$i][2])) {
 1410         $self->whine( $start_line, "An E<...> surrounding strange content" );
 1411         $replacer = $treelet->[$i]; # scratch
 1412         splice(@$treelet, $i, 1,   # fake out a literal
 1413           'E<',
 1414           splice(@$replacer,2), # promote its content
 1415           '>'
 1416         );
 1417         # Don't need to do --$i, as the 'E<' we just added isn't interesting.
 1418         next;
 1419       }
 1420 
 1421       DEBUG > 1 and print STDERR "Ogling E<$content>\n";
 1422 
 1423       # XXX E<>'s contents *should* be a valid char in the scope of the current
 1424       # =encoding directive. Defaults to iso-8859-1, I believe. Fix this in the
 1425       # future sometime.
 1426 
 1427       $charnum  = Pod::Escapes::e2charnum($content);
 1428       DEBUG > 1 and print STDERR " Considering E<$content> with char ",
 1429         defined($charnum) ? $charnum : "undef", ".\n";
 1430 
 1431       if(!defined( $charnum )) {
 1432         DEBUG > 1 and print STDERR "I don't know how to deal with E<$content>.\n";
 1433         $self->whine( $start_line, "Unknown E content in E<$content>" );
 1434         $replacer = "E<$content>"; # better than nothing
 1435       } elsif($charnum >= 255 and !UNICODE) {
 1436         $replacer = ASCII ? "\xA4" : "?";
 1437         DEBUG > 1 and print STDERR "This Perl version can't handle ",
 1438           "E<$content> (chr $charnum), so replacing with $replacer\n";
 1439       } else {
 1440         $replacer = Pod::Escapes::e2char($content);
 1441         DEBUG > 1 and print STDERR " Replacing E<$content> with $replacer\n";
 1442       }
 1443 
 1444       splice(@$treelet, $i, 1, $replacer); # no need to back up $i, tho
 1445     }
 1446   }
 1447 
 1448   return;
 1449 }
 1450 
 1451 
 1452 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
 1453 
 1454 sub _treat_Ss {
 1455   my($self,$treelet) = @_;
 1456   
 1457   _change_S_to_nbsp($treelet,0) if $self->{'nbsp_for_S'};
 1458 
 1459   # TODO: or a change_nbsp_to_S
 1460   #  Normalizing nbsp's to S is harder: for each text node, make S content
 1461   #  out of anything matching m/([^ \xA0]*(?:\xA0+[^ \xA0]*)+)/
 1462 
 1463 
 1464   return;
 1465 }
 1466 
 1467 sub _change_S_to_nbsp { #  a recursive function
 1468   # Sanely assumes that the top node in the excursion won't be an S node.
 1469   my($treelet, $in_s) = @_;
 1470   
 1471   my $is_s = ('S' eq $treelet->[0]);
 1472   $in_s ||= $is_s; # So in_s is on either by this being an S element,
 1473                    #  or by an ancestor being an S element.
 1474 
 1475   for(my $i = 2; $i < @$treelet; ++$i) {
 1476     if(ref $treelet->[$i]) {
 1477       if( _change_S_to_nbsp( $treelet->[$i], $in_s ) ) {
 1478         my $to_pull_up = $treelet->[$i];
 1479         splice @$to_pull_up,0,2;   # ...leaving just its content
 1480         splice @$treelet, $i, 1, @$to_pull_up;  # Pull up content
 1481         $i +=  @$to_pull_up - 1;   # Make $i skip the pulled-up stuff
 1482       }
 1483     } else {
 1484       $treelet->[$i] =~ s/\s/$Pod::Simple::nbsp/g if $in_s;
 1485        
 1486        # Note that if you apply nbsp_for_S to text, and so turn
 1487        # "foo S<bar baz> quux" into "foo bar&#160;faz quux", you
 1488        # end up with something that fails to say "and don't hyphenate
 1489        # any part of 'bar baz'".  However, hyphenation is such a vexing
 1490        # problem anyway, that most Pod renderers just don't render it
 1491        # at all.  But if you do want to implement hyphenation, I guess
 1492        # that you'd better have nbsp_for_S off.
 1493     }
 1494   }
 1495 
 1496   return $is_s;
 1497 }
 1498 
 1499 #-----------------------------------------------------------------------------
 1500 
 1501 sub _accessorize {  # A simple-minded method-maker
 1502   no strict 'refs';
 1503   foreach my $attrname (@_) {
 1504     next if $attrname =~ m/::/; # a hack
 1505     *{caller() . '::' . $attrname} = sub {
 1506       use strict;
 1507       $Carp::CarpLevel = 1,  Carp::croak(
 1508        "Accessor usage: \$obj->$attrname() or \$obj->$attrname(\$new_value)"
 1509       ) unless (@_ == 1 or @_ == 2) and ref $_[0];
 1510       (@_ == 1) ?  $_[0]->{$attrname}
 1511                 : ($_[0]->{$attrname} = $_[1]);
 1512     };
 1513   }
 1514   # Ya know, they say accessories make the ensemble!
 1515   return;
 1516 }
 1517 
 1518 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
 1519 # . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .
 1520 #=============================================================================
 1521 
 1522 sub filter {
 1523   my($class, $source) = @_;
 1524   my $new = $class->new;
 1525   $new->output_fh(*STDOUT{IO});
 1526   
 1527   if(ref($source || '') eq 'SCALAR') {
 1528     $new->parse_string_document( $$source );
 1529   } elsif(ref($source)) {  # it's a file handle
 1530     $new->parse_file($source);
 1531   } else {  # it's a filename
 1532     $new->parse_file($source);
 1533   }
 1534   
 1535   return $new;
 1536 }
 1537 
 1538 
 1539 #-----------------------------------------------------------------------------
 1540 
 1541 sub _out {
 1542   # For use in testing: Class->_out($source)
 1543   #  returns the transformation of $source
 1544   
 1545   my $class = shift(@_);
 1546 
 1547   my $mutor = shift(@_) if @_ and ref($_[0] || '') eq 'CODE';
 1548 
 1549   DEBUG and print STDERR "\n\n", '#' x 76,
 1550    "\nAbout to parse source: {{\n$_[0]\n}}\n\n";
 1551   
 1552   
 1553   my $parser = ref $class && $class->isa(__PACKAGE__) ? $class : $class->new;
 1554   $parser->hide_line_numbers(1);
 1555 
 1556   my $out = '';
 1557   $parser->output_string( \$out );
 1558   DEBUG and print STDERR " _out to ", \$out, "\n";
 1559   
 1560   $mutor->($parser) if $mutor;
 1561 
 1562   $parser->parse_string_document( $_[0] );
 1563   # use Data::Dumper; print STDERR Dumper($parser), "\n";
 1564   return $out;
 1565 }
 1566 
 1567 
 1568 sub _duo {
 1569   # For use in testing: Class->_duo($source1, $source2)
 1570   #  returns the parse trees of $source1 and $source2.
 1571   # Good in things like: &ok( Class->duo(... , ...) );
 1572   
 1573   my $class = shift(@_);
 1574   
 1575   Carp::croak "But $class->_duo is useful only in list context!"
 1576    unless wantarray;
 1577 
 1578   my $mutor = shift(@_) if @_ and ref($_[0] || '') eq 'CODE';
 1579 
 1580   Carp::croak "But $class->_duo takes two parameters, not: @_"
 1581    unless @_ == 2;
 1582 
 1583   my(@out);
 1584   
 1585   while( @_ ) {
 1586     my $parser = $class->new;
 1587 
 1588     push @out, '';
 1589     $parser->output_string( \( $out[-1] ) );
 1590 
 1591     DEBUG and print STDERR " _duo out to ", $parser->output_string(),
 1592       " = $parser->{'output_string'}\n";
 1593 
 1594     $parser->hide_line_numbers(1);
 1595     $mutor->($parser) if $mutor;
 1596     $parser->parse_string_document( shift( @_ ) );
 1597     # use Data::Dumper; print STDERR Dumper($parser), "\n";
 1598   }
 1599 
 1600   return @out;
 1601 }
 1602 
 1603 
 1604 
 1605 #-----------------------------------------------------------------------------
 1606 1;
 1607 __END__
 1608 
 1609 TODO:
 1610 A start_formatting_code and end_formatting_code methods, which in the
 1611 base class call start_L, end_L, start_C, end_C, etc., if they are
 1612 defined.
 1613 
 1614 have the POD FORMATTING ERRORS section note the localtime, and the
 1615 version of Pod::Simple.
 1616 
 1617 option to delete all E<shy>s?
 1618 option to scream if under-0x20 literals are found in the input, or
 1619 under-E<32> E codes are found in the tree. And ditto \x7f-\x9f
 1620 
 1621 Option to turn highbit characters into their compromised form? (applies
 1622 to E parsing too)
 1623 
 1624 TODO: BOM/encoding things.
 1625 
 1626 TODO: ascii-compat things in the XML classes?
 1627