"Fossies" - the Fresh Open Source Software Archive

Member "install-tl-20200916/tlpkg/tlperl/lib/Text/ParseWords.pm" (5 Apr 2016, 8144 Bytes) of package /windows/misc/install-tl.zip:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Perl source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 package Text::ParseWords;
    2 
    3 use strict;
    4 require 5.006;
    5 our $VERSION = "3.30";
    6 
    7 
    8 use Exporter;
    9 our @ISA = qw(Exporter);
   10 our @EXPORT = qw(shellwords quotewords nested_quotewords parse_line);
   11 our @EXPORT_OK = qw(old_shellwords);
   12 our $PERL_SINGLE_QUOTE;
   13 
   14 
   15 sub shellwords {
   16     my (@lines) = @_;
   17     my @allwords;
   18 
   19     foreach my $line (@lines) {
   20     $line =~ s/^\s+//;
   21     my @words = parse_line('\s+', 0, $line);
   22     pop @words if (@words and !defined $words[-1]);
   23     return() unless (@words || !length($line));
   24     push(@allwords, @words);
   25     }
   26     return(@allwords);
   27 }
   28 
   29 
   30 
   31 sub quotewords {
   32     my($delim, $keep, @lines) = @_;
   33     my($line, @words, @allwords);
   34 
   35     foreach $line (@lines) {
   36     @words = parse_line($delim, $keep, $line);
   37     return() unless (@words || !length($line));
   38     push(@allwords, @words);
   39     }
   40     return(@allwords);
   41 }
   42 
   43 
   44 
   45 sub nested_quotewords {
   46     my($delim, $keep, @lines) = @_;
   47     my($i, @allwords);
   48 
   49     for ($i = 0; $i < @lines; $i++) {
   50     @{$allwords[$i]} = parse_line($delim, $keep, $lines[$i]);
   51     return() unless (@{$allwords[$i]} || !length($lines[$i]));
   52     }
   53     return(@allwords);
   54 }
   55 
   56 
   57 
   58 sub parse_line {
   59     my($delimiter, $keep, $line) = @_;
   60     my($word, @pieces);
   61 
   62     no warnings 'uninitialized';    # we will be testing undef strings
   63 
   64     while (length($line)) {
   65         # This pattern is optimised to be stack conservative on older perls.
   66         # Do not refactor without being careful and testing it on very long strings.
   67         # See Perl bug #42980 for an example of a stack busting input.
   68         $line =~ s/^
   69                     (?: 
   70                         # double quoted string
   71                         (")                             # $quote
   72                         ((?>[^\\"]*(?:\\.[^\\"]*)*))"   # $quoted 
   73             |   # --OR--
   74                         # singe quoted string
   75                         (')                             # $quote
   76                         ((?>[^\\']*(?:\\.[^\\']*)*))'   # $quoted
   77                     |   # --OR--
   78                         # unquoted string
   79                 (                               # $unquoted 
   80                             (?:\\.|[^\\"'])*?           
   81                         )       
   82                         # followed by
   83                 (                               # $delim
   84                             \Z(?!\n)                    # EOL
   85                         |   # --OR--
   86                             (?-x:$delimiter)            # delimiter
   87                         |   # --OR--                    
   88                             (?!^)(?=["'])               # a quote
   89                         )  
   90             )//xs or return;        # extended layout                  
   91         my ($quote, $quoted, $unquoted, $delim) = (($1 ? ($1,$2) : ($3,$4)), $5, $6);
   92 
   93 
   94     return() unless( defined($quote) || length($unquoted) || length($delim));
   95 
   96         if ($keep) {
   97         $quoted = "$quote$quoted$quote";
   98     }
   99         else {
  100         $unquoted =~ s/\\(.)/$1/sg;
  101         if (defined $quote) {
  102         $quoted =~ s/\\(.)/$1/sg if ($quote eq '"');
  103         $quoted =~ s/\\([\\'])/$1/g if ( $PERL_SINGLE_QUOTE && $quote eq "'");
  104             }
  105     }
  106         $word .= substr($line, 0, 0);   # leave results tainted
  107         $word .= defined $quote ? $quoted : $unquoted;
  108  
  109         if (length($delim)) {
  110             push(@pieces, $word);
  111             push(@pieces, $delim) if ($keep eq 'delimiters');
  112             undef $word;
  113         }
  114         if (!length($line)) {
  115             push(@pieces, $word);
  116     }
  117     }
  118     return(@pieces);
  119 }
  120 
  121 
  122 
  123 sub old_shellwords {
  124 
  125     # Usage:
  126     #   use ParseWords;
  127     #   @words = old_shellwords($line);
  128     #   or
  129     #   @words = old_shellwords(@lines);
  130     #   or
  131     #   @words = old_shellwords();  # defaults to $_ (and clobbers it)
  132 
  133     no warnings 'uninitialized';    # we will be testing undef strings
  134     local *_ = \join('', @_) if @_;
  135     my (@words, $snippet);
  136 
  137     s/\A\s+//;
  138     while ($_ ne '') {
  139     my $field = substr($_, 0, 0);   # leave results tainted
  140     for (;;) {
  141         if (s/\A"(([^"\\]|\\.)*)"//s) {
  142         ($snippet = $1) =~ s#\\(.)#$1#sg;
  143         }
  144         elsif (/\A"/) {
  145         require Carp;
  146         Carp::carp("Unmatched double quote: $_");
  147         return();
  148         }
  149         elsif (s/\A'(([^'\\]|\\.)*)'//s) {
  150         ($snippet = $1) =~ s#\\(.)#$1#sg;
  151         }
  152         elsif (/\A'/) {
  153         require Carp;
  154         Carp::carp("Unmatched single quote: $_");
  155         return();
  156         }
  157         elsif (s/\A\\(.?)//s) {
  158         $snippet = $1;
  159         }
  160         elsif (s/\A([^\s\\'"]+)//) {
  161         $snippet = $1;
  162         }
  163         else {
  164         s/\A\s+//;
  165         last;
  166         }
  167         $field .= $snippet;
  168     }
  169     push(@words, $field);
  170     }
  171     return @words;
  172 }
  173 
  174 1;
  175 
  176 __END__
  177 
  178 =head1 NAME
  179 
  180 Text::ParseWords - parse text into an array of tokens or array of arrays
  181 
  182 =head1 SYNOPSIS
  183 
  184   use Text::ParseWords;
  185   @lists = nested_quotewords($delim, $keep, @lines);
  186   @words = quotewords($delim, $keep, @lines);
  187   @words = shellwords(@lines);
  188   @words = parse_line($delim, $keep, $line);
  189   @words = old_shellwords(@lines); # DEPRECATED!
  190 
  191 =head1 DESCRIPTION
  192 
  193 The &nested_quotewords() and &quotewords() functions accept a delimiter 
  194 (which can be a regular expression)
  195 and a list of lines and then breaks those lines up into a list of
  196 words ignoring delimiters that appear inside quotes.  &quotewords()
  197 returns all of the tokens in a single long list, while &nested_quotewords()
  198 returns a list of token lists corresponding to the elements of @lines.
  199 &parse_line() does tokenizing on a single string.  The &*quotewords()
  200 functions simply call &parse_line(), so if you're only splitting
  201 one line you can call &parse_line() directly and save a function
  202 call.
  203 
  204 The $keep argument is a boolean flag.  If true, then the tokens are
  205 split on the specified delimiter, but all other characters (including
  206 quotes and backslashes) are kept in the tokens.  If $keep is false then the
  207 &*quotewords() functions remove all quotes and backslashes that are
  208 not themselves backslash-escaped or inside of single quotes (i.e.,
  209 &quotewords() tries to interpret these characters just like the Bourne
  210 shell).  NB: these semantics are significantly different from the
  211 original version of this module shipped with Perl 5.000 through 5.004.
  212 As an additional feature, $keep may be the keyword "delimiters" which
  213 causes the functions to preserve the delimiters in each string as
  214 tokens in the token lists, in addition to preserving quote and
  215 backslash characters.
  216 
  217 &shellwords() is written as a special case of &quotewords(), and it
  218 does token parsing with whitespace as a delimiter-- similar to most
  219 Unix shells.
  220 
  221 =head1 EXAMPLES
  222 
  223 The sample program:
  224 
  225   use Text::ParseWords;
  226   @words = quotewords('\s+', 0, q{this   is "a test" of\ quotewords \"for you});
  227   $i = 0;
  228   foreach (@words) {
  229       print "$i: <$_>\n";
  230       $i++;
  231   }
  232 
  233 produces:
  234 
  235   0: <this>
  236   1: <is>
  237   2: <a test>
  238   3: <of quotewords>
  239   4: <"for>
  240   5: <you>
  241 
  242 demonstrating:
  243 
  244 =over 4
  245 
  246 =item 0Z<>
  247 
  248 a simple word
  249 
  250 =item 1Z<>
  251 
  252 multiple spaces are skipped because of our $delim
  253 
  254 =item 2Z<>
  255 
  256 use of quotes to include a space in a word
  257 
  258 =item 3Z<>
  259 
  260 use of a backslash to include a space in a word
  261 
  262 =item 4Z<>
  263 
  264 use of a backslash to remove the special meaning of a double-quote
  265 
  266 =item 5Z<>
  267 
  268 another simple word (note the lack of effect of the
  269 backslashed double-quote)
  270 
  271 =back
  272 
  273 Replacing C<quotewords('\s+', 0, q{this   is...})>
  274 with C<shellwords(q{this   is...})>
  275 is a simpler way to accomplish the same thing.
  276 
  277 =head1 SEE ALSO
  278 
  279 L<Text::CSV> - for parsing CSV files
  280 
  281 =head1 AUTHORS
  282 
  283 Maintainer: Alexandr Ciornii <alexchornyATgmail.com>.
  284 
  285 Previous maintainer: Hal Pomeranz <pomeranz@netcom.com>, 1994-1997 (Original
  286 author unknown).  Much of the code for &parse_line() (including the
  287 primary regexp) from Joerk Behrends <jbehrends@multimediaproduzenten.de>.
  288 
  289 Examples section another documentation provided by John Heidemann 
  290 <johnh@ISI.EDU>
  291 
  292 Bug reports, patches, and nagging provided by lots of folks-- thanks
  293 everybody!  Special thanks to Michael Schwern <schwern@envirolink.org>
  294 for assuring me that a &nested_quotewords() would be useful, and to 
  295 Jeff Friedl <jfriedl@yahoo-inc.com> for telling me not to worry about
  296 error-checking (sort of-- you had to be there).
  297 
  298 =head1 COPYRIGHT AND LICENSE
  299 
  300 This library is free software; you may redistribute and/or modify it
  301 under the same terms as Perl itself.
  302 
  303 =cut