"Fossies" - the Fresh Open Source Software Archive

Member "install-tl-20200916/tlpkg/tlperl/lib/File/GlobMapper.pm" (7 Mar 2020, 15678 Bytes) of package /windows/misc/install-tl.zip:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Perl source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 package File::GlobMapper;
    2 
    3 use strict;
    4 use warnings;
    5 use Carp;
    6 
    7 our ($CSH_GLOB);
    8 
    9 BEGIN
   10 {
   11     if ($] < 5.006)
   12     {
   13         require File::BSDGlob; import File::BSDGlob qw(:glob) ;
   14         $CSH_GLOB = File::BSDGlob::GLOB_CSH() ;
   15         *globber = \&File::BSDGlob::csh_glob;
   16     }
   17     else
   18     {
   19         require File::Glob; import File::Glob qw(:glob) ;
   20         $CSH_GLOB = File::Glob::GLOB_CSH() ;
   21         #*globber = \&File::Glob::bsd_glob;
   22         *globber = \&File::Glob::csh_glob;
   23     }
   24 }
   25 
   26 our ($Error);
   27 
   28 our ($VERSION, @EXPORT_OK);
   29 $VERSION = '1.001';
   30 @EXPORT_OK = qw( globmap );
   31 
   32 
   33 our ($noPreBS, $metachars, $matchMetaRE, %mapping, %wildCount);
   34 $noPreBS = '(?<!\\\)' ; # no preceding backslash
   35 $metachars = '.*?[](){}';
   36 $matchMetaRE = '[' . quotemeta($metachars) . ']';
   37 
   38 %mapping = (
   39                 '*' => '([^/]*)',
   40                 '?' => '([^/])',
   41                 '.' => '\.',
   42                 '[' => '([',
   43                 '(' => '(',
   44                 ')' => ')',
   45            );
   46 
   47 %wildCount = map { $_ => 1 } qw/ * ? . { ( [ /;
   48 
   49 sub globmap ($$;)
   50 {
   51     my $inputGlob = shift ;
   52     my $outputGlob = shift ;
   53 
   54     my $obj = new File::GlobMapper($inputGlob, $outputGlob, @_)
   55         or croak "globmap: $Error" ;
   56     return $obj->getFileMap();
   57 }
   58 
   59 sub new
   60 {
   61     my $class = shift ;
   62     my $inputGlob = shift ;
   63     my $outputGlob = shift ;
   64     # TODO -- flags needs to default to whatever File::Glob does
   65     my $flags = shift || $CSH_GLOB ;
   66     #my $flags = shift ;
   67 
   68     $inputGlob =~ s/^\s*\<\s*//;
   69     $inputGlob =~ s/\s*\>\s*$//;
   70 
   71     $outputGlob =~ s/^\s*\<\s*//;
   72     $outputGlob =~ s/\s*\>\s*$//;
   73 
   74     my %object =
   75             (   InputGlob   => $inputGlob,
   76                 OutputGlob  => $outputGlob,
   77                 GlobFlags   => $flags,
   78                 Braces      => 0,
   79                 WildCount   => 0,
   80                 Pairs       => [],
   81                 Sigil       => '#',
   82             );
   83 
   84     my $self = bless \%object, ref($class) || $class ;
   85 
   86     $self->_parseInputGlob()
   87         or return undef ;
   88 
   89     $self->_parseOutputGlob()
   90         or return undef ;
   91 
   92     my @inputFiles = globber($self->{InputGlob}, $flags) ;
   93 
   94     if (GLOB_ERROR)
   95     {
   96         $Error = $!;
   97         return undef ;
   98     }
   99 
  100     #if (whatever)
  101     {
  102         my $missing = grep { ! -e $_ } @inputFiles ;
  103 
  104         if ($missing)
  105         {
  106             $Error = "$missing input files do not exist";
  107             return undef ;
  108         }
  109     }
  110 
  111     $self->{InputFiles} = \@inputFiles ;
  112 
  113     $self->_getFiles()
  114         or return undef ;
  115 
  116     return $self;
  117 }
  118 
  119 sub _retError
  120 {
  121     my $string = shift ;
  122     $Error = "$string in input fileglob" ;
  123     return undef ;
  124 }
  125 
  126 sub _unmatched
  127 {
  128     my $delimeter = shift ;
  129 
  130     _retError("Unmatched $delimeter");
  131     return undef ;
  132 }
  133 
  134 sub _parseBit
  135 {
  136     my $self = shift ;
  137 
  138     my $string = shift ;
  139 
  140     my $out = '';
  141     my $depth = 0 ;
  142 
  143     while ($string =~ s/(.*?)$noPreBS(,|$matchMetaRE)//)
  144     {
  145         $out .= quotemeta($1) ;
  146         $out .= $mapping{$2} if defined $mapping{$2};
  147 
  148         ++ $self->{WildCount} if $wildCount{$2} ;
  149 
  150         if ($2 eq ',')
  151         {
  152             return _unmatched("(")
  153                 if $depth ;
  154 
  155             $out .= '|';
  156         }
  157         elsif ($2 eq '(')
  158         {
  159             ++ $depth ;
  160         }
  161         elsif ($2 eq ')')
  162         {
  163             return _unmatched(")")
  164                 if ! $depth ;
  165 
  166             -- $depth ;
  167         }
  168         elsif ($2 eq '[')
  169         {
  170             # TODO -- quotemeta & check no '/'
  171             # TODO -- check for \]  & other \ within the []
  172             $string =~ s#(.*?\])##
  173                 or return _unmatched("[");
  174             $out .= "$1)" ;
  175         }
  176         elsif ($2 eq ']')
  177         {
  178             return _unmatched("]");
  179         }
  180         elsif ($2 eq '{' || $2 eq '}')
  181         {
  182             return _retError("Nested {} not allowed");
  183         }
  184     }
  185 
  186     $out .= quotemeta $string;
  187 
  188     return _unmatched("(")
  189         if $depth ;
  190 
  191     return $out ;
  192 }
  193 
  194 sub _parseInputGlob
  195 {
  196     my $self = shift ;
  197 
  198     my $string = $self->{InputGlob} ;
  199     my $inGlob = '';
  200 
  201     # Multiple concatenated *'s don't make sense
  202     #$string =~ s#\*\*+#*# ;
  203 
  204     # TODO -- Allow space to delimit patterns?
  205     #my @strings = split /\s+/, $string ;
  206     #for my $str (@strings)
  207     my $out = '';
  208     my $depth = 0 ;
  209 
  210     while ($string =~ s/(.*?)$noPreBS($matchMetaRE)//)
  211     {
  212         $out .= quotemeta($1) ;
  213         $out .= $mapping{$2} if defined $mapping{$2};
  214         ++ $self->{WildCount} if $wildCount{$2} ;
  215 
  216         if ($2 eq '(')
  217         {
  218             ++ $depth ;
  219         }
  220         elsif ($2 eq ')')
  221         {
  222             return _unmatched(")")
  223                 if ! $depth ;
  224 
  225             -- $depth ;
  226         }
  227         elsif ($2 eq '[')
  228         {
  229             # TODO -- quotemeta & check no '/' or '(' or ')'
  230             # TODO -- check for \]  & other \ within the []
  231             $string =~ s#(.*?\])##
  232                 or return _unmatched("[");
  233             $out .= "$1)" ;
  234         }
  235         elsif ($2 eq ']')
  236         {
  237             return _unmatched("]");
  238         }
  239         elsif ($2 eq '}')
  240         {
  241             return _unmatched("}");
  242         }
  243         elsif ($2 eq '{')
  244         {
  245             # TODO -- check no '/' within the {}
  246             # TODO -- check for \}  & other \ within the {}
  247 
  248             my $tmp ;
  249             unless ( $string =~ s/(.*?)$noPreBS\}//)
  250             {
  251                 return _unmatched("{");
  252             }
  253             #$string =~ s#(.*?)\}##;
  254 
  255             #my $alt = join '|',
  256             #          map { quotemeta $_ }
  257             #          split "$noPreBS,", $1 ;
  258             my $alt = $self->_parseBit($1);
  259             defined $alt or return 0 ;
  260             $out .= "($alt)" ;
  261 
  262             ++ $self->{Braces} ;
  263         }
  264     }
  265 
  266     return _unmatched("(")
  267         if $depth ;
  268 
  269     $out .= quotemeta $string ;
  270 
  271 
  272     $self->{InputGlob} =~ s/$noPreBS[\(\)]//g;
  273     $self->{InputPattern} = $out ;
  274 
  275     #print "# INPUT '$self->{InputGlob}' => '$out'\n";
  276 
  277     return 1 ;
  278 
  279 }
  280 
  281 sub _parseOutputGlob
  282 {
  283     my $self = shift ;
  284 
  285     my $string = $self->{OutputGlob} ;
  286     my $maxwild = $self->{WildCount};
  287 
  288     if ($self->{GlobFlags} & GLOB_TILDE)
  289     #if (1)
  290     {
  291         $string =~ s{
  292               ^ ~             # find a leading tilde
  293               (               # save this in $1
  294                   [^/]        # a non-slash character
  295                         *     # repeated 0 or more times (0 means me)
  296               )
  297             }{
  298               $1
  299                   ? (getpwnam($1))[7]
  300                   : ( $ENV{HOME} || $ENV{LOGDIR} )
  301             }ex;
  302 
  303     }
  304 
  305     # max #1 must be == to max no of '*' in input
  306     while ( $string =~ m/#(\d)/g )
  307     {
  308         croak "Max wild is #$maxwild, you tried #$1"
  309             if $1 > $maxwild ;
  310     }
  311 
  312     my $noPreBS = '(?<!\\\)' ; # no preceding backslash
  313     #warn "noPreBS = '$noPreBS'\n";
  314 
  315     #$string =~ s/${noPreBS}\$(\d)/\${$1}/g;
  316     $string =~ s/${noPreBS}#(\d)/\${$1}/g;
  317     $string =~ s#${noPreBS}\*#\${inFile}#g;
  318     $string = '"' . $string . '"';
  319 
  320     #print "OUTPUT '$self->{OutputGlob}' => '$string'\n";
  321     $self->{OutputPattern} = $string ;
  322 
  323     return 1 ;
  324 }
  325 
  326 sub _getFiles
  327 {
  328     my $self = shift ;
  329 
  330     my %outInMapping = ();
  331     my %inFiles = () ;
  332 
  333     foreach my $inFile (@{ $self->{InputFiles} })
  334     {
  335         next if $inFiles{$inFile} ++ ;
  336 
  337         my $outFile = $inFile ;
  338 
  339         if ( $inFile =~ m/$self->{InputPattern}/ )
  340         {
  341             no warnings 'uninitialized';
  342             eval "\$outFile = $self->{OutputPattern};" ;
  343 
  344             if (defined $outInMapping{$outFile})
  345             {
  346                 $Error =  "multiple input files map to one output file";
  347                 return undef ;
  348             }
  349             $outInMapping{$outFile} = $inFile;
  350             push @{ $self->{Pairs} }, [$inFile, $outFile];
  351         }
  352     }
  353 
  354     return 1 ;
  355 }
  356 
  357 sub getFileMap
  358 {
  359     my $self = shift ;
  360 
  361     return $self->{Pairs} ;
  362 }
  363 
  364 sub getHash
  365 {
  366     my $self = shift ;
  367 
  368     return { map { $_->[0] => $_->[1] } @{ $self->{Pairs} } } ;
  369 }
  370 
  371 1;
  372 
  373 __END__
  374 
  375 =head1 NAME
  376 
  377 File::GlobMapper - Extend File Glob to Allow Input and Output Files
  378 
  379 =head1 SYNOPSIS
  380 
  381     use File::GlobMapper qw( globmap );
  382 
  383     my $aref = globmap $input => $output
  384         or die $File::GlobMapper::Error ;
  385 
  386     my $gm = new File::GlobMapper $input => $output
  387         or die $File::GlobMapper::Error ;
  388 
  389 
  390 =head1 DESCRIPTION
  391 
  392 This module needs Perl5.005 or better.
  393 
  394 This module takes the existing C<File::Glob> module as a starting point and
  395 extends it to allow new filenames to be derived from the files matched by
  396 C<File::Glob>.
  397 
  398 This can be useful when carrying out batch operations on multiple files that
  399 have both an input filename and output filename and the output file can be
  400 derived from the input filename. Examples of operations where this can be
  401 useful include, file renaming, file copying and file compression.
  402 
  403 
  404 =head2 Behind The Scenes
  405 
  406 To help explain what C<File::GlobMapper> does, consider what code you
  407 would write if you wanted to rename all files in the current directory
  408 that ended in C<.tar.gz> to C<.tgz>. So say these files are in the
  409 current directory
  410 
  411     alpha.tar.gz
  412     beta.tar.gz
  413     gamma.tar.gz
  414 
  415 and they need renamed to this
  416 
  417     alpha.tgz
  418     beta.tgz
  419     gamma.tgz
  420 
  421 Below is a possible implementation of a script to carry out the rename
  422 (error cases have been omitted)
  423 
  424     foreach my $old ( glob "*.tar.gz" )
  425     {
  426         my $new = $old;
  427         $new =~ s#(.*)\.tar\.gz$#$1.tgz# ;
  428 
  429         rename $old => $new
  430             or die "Cannot rename '$old' to '$new': $!\n;
  431     }
  432 
  433 Notice that a file glob pattern C<*.tar.gz> was used to match the
  434 C<.tar.gz> files, then a fairly similar regular expression was used in
  435 the substitute to allow the new filename to be created.
  436 
  437 Given that the file glob is just a cut-down regular expression and that it
  438 has already done a lot of the hard work in pattern matching the filenames,
  439 wouldn't it be handy to be able to use the patterns in the fileglob to
  440 drive the new filename?
  441 
  442 Well, that's I<exactly> what C<File::GlobMapper> does.
  443 
  444 Here is same snippet of code rewritten using C<globmap>
  445 
  446     for my $pair (globmap '<*.tar.gz>' => '<#1.tgz>' )
  447     {
  448         my ($from, $to) = @$pair;
  449         rename $from => $to
  450             or die "Cannot rename '$old' to '$new': $!\n;
  451     }
  452 
  453 So how does it work?
  454 
  455 Behind the scenes the C<globmap> function does a combination of a
  456 file glob to match existing filenames followed by a substitute
  457 to create the new filenames.
  458 
  459 Notice how both parameters to C<globmap> are strings that are delimited by <>.
  460 This is done to make them look more like file globs - it is just syntactic
  461 sugar, but it can be handy when you want the strings to be visually
  462 distinctive. The enclosing <> are optional, so you don't have to use them - in
  463 fact the first thing globmap will do is remove these delimiters if they are
  464 present.
  465 
  466 The first parameter to C<globmap>, C<*.tar.gz>, is an I<Input File Glob>.
  467 Once the enclosing "< ... >" is removed, this is passed (more or
  468 less) unchanged to C<File::Glob> to carry out a file match.
  469 
  470 Next the fileglob C<*.tar.gz> is transformed behind the scenes into a
  471 full Perl regular expression, with the additional step of wrapping each
  472 transformed wildcard metacharacter sequence in parenthesis.
  473 
  474 In this case the input fileglob C<*.tar.gz> will be transformed into
  475 this Perl regular expression
  476 
  477     ([^/]*)\.tar\.gz
  478 
  479 Wrapping with parenthesis allows the wildcard parts of the Input File
  480 Glob to be referenced by the second parameter to C<globmap>, C<#1.tgz>,
  481 the I<Output File Glob>. This parameter operates just like the replacement
  482 part of a substitute command. The difference is that the C<#1> syntax
  483 is used to reference sub-patterns matched in the input fileglob, rather
  484 than the C<$1> syntax that is used with perl regular expressions. In
  485 this case C<#1> is used to refer to the text matched by the C<*> in the
  486 Input File Glob. This makes it easier to use this module where the
  487 parameters to C<globmap> are typed at the command line.
  488 
  489 The final step involves passing each filename matched by the C<*.tar.gz>
  490 file glob through the derived Perl regular expression in turn and
  491 expanding the output fileglob using it.
  492 
  493 The end result of all this is a list of pairs of filenames. By default
  494 that is what is returned by C<globmap>. In this example the data structure
  495 returned will look like this
  496 
  497      ( ['alpha.tar.gz' => 'alpha.tgz'],
  498        ['beta.tar.gz'  => 'beta.tgz' ],
  499        ['gamma.tar.gz' => 'gamma.tgz']
  500      )
  501 
  502 
  503 Each pair is an array reference with two elements - namely the I<from>
  504 filename, that C<File::Glob> has matched, and a I<to> filename that is
  505 derived from the I<from> filename.
  506 
  507 
  508 
  509 =head2 Limitations
  510 
  511 C<File::GlobMapper> has been kept simple deliberately, so it isn't intended to
  512 solve all filename mapping operations. Under the hood C<File::Glob> (or for
  513 older versions of Perl, C<File::BSDGlob>) is used to match the files, so you
  514 will never have the flexibility of full Perl regular expression.
  515 
  516 =head2 Input File Glob
  517 
  518 The syntax for an Input FileGlob is identical to C<File::Glob>, except
  519 for the following
  520 
  521 =over 5
  522 
  523 =item 1.
  524 
  525 No nested {}
  526 
  527 =item 2.
  528 
  529 Whitespace does not delimit fileglobs.
  530 
  531 =item 3.
  532 
  533 The use of parenthesis can be used to capture parts of the input filename.
  534 
  535 =item 4.
  536 
  537 If an Input glob matches the same file more than once, only the first
  538 will be used.
  539 
  540 =back
  541 
  542 The syntax
  543 
  544 =over 5
  545 
  546 =item B<~>
  547 
  548 =item B<~user>
  549 
  550 
  551 =item B<.>
  552 
  553 Matches a literal '.'.
  554 Equivalent to the Perl regular expression
  555 
  556     \.
  557 
  558 =item B<*>
  559 
  560 Matches zero or more characters, except '/'. Equivalent to the Perl
  561 regular expression
  562 
  563     [^/]*
  564 
  565 =item B<?>
  566 
  567 Matches zero or one character, except '/'. Equivalent to the Perl
  568 regular expression
  569 
  570     [^/]?
  571 
  572 =item B<\>
  573 
  574 Backslash is used, as usual, to escape the next character.
  575 
  576 =item  B<[]>
  577 
  578 Character class.
  579 
  580 =item  B<{,}>
  581 
  582 Alternation
  583 
  584 =item  B<()>
  585 
  586 Capturing parenthesis that work just like perl
  587 
  588 =back
  589 
  590 Any other character it taken literally.
  591 
  592 =head2 Output File Glob
  593 
  594 The Output File Glob is a normal string, with 2 glob-like features.
  595 
  596 The first is the '*' metacharacter. This will be replaced by the complete
  597 filename matched by the input file glob. So
  598 
  599     *.c *.Z
  600 
  601 The second is
  602 
  603 Output FileGlobs take the
  604 
  605 =over 5
  606 
  607 =item "*"
  608 
  609 The "*" character will be replaced with the complete input filename.
  610 
  611 =item #1
  612 
  613 Patterns of the form /#\d/ will be replaced with the
  614 
  615 =back
  616 
  617 =head2 Returned Data
  618 
  619 
  620 =head1 EXAMPLES
  621 
  622 =head2 A Rename script
  623 
  624 Below is a simple "rename" script that uses C<globmap> to determine the
  625 source and destination filenames.
  626 
  627     use File::GlobMapper qw(globmap) ;
  628     use File::Copy;
  629 
  630     die "rename: Usage rename 'from' 'to'\n"
  631         unless @ARGV == 2 ;
  632 
  633     my $fromGlob = shift @ARGV;
  634     my $toGlob   = shift @ARGV;
  635 
  636     my $pairs = globmap($fromGlob, $toGlob)
  637         or die $File::GlobMapper::Error;
  638 
  639     for my $pair (@$pairs)
  640     {
  641         my ($from, $to) = @$pair;
  642         move $from => $to ;
  643     }
  644 
  645 
  646 
  647 Here is an example that renames all c files to cpp.
  648 
  649     $ rename '*.c' '#1.cpp'
  650 
  651 =head2 A few example globmaps
  652 
  653 Below are a few examples of globmaps
  654 
  655 To copy all your .c file to a backup directory
  656 
  657     '</my/home/*.c>'    '</my/backup/#1.c>'
  658 
  659 If you want to compress all
  660 
  661     '</my/home/*.[ch]>'    '<*.gz>'
  662 
  663 To uncompress
  664 
  665     '</my/home/*.[ch].gz>'    '</my/home/#1.#2>'
  666 
  667 =head1 SEE ALSO
  668 
  669 L<File::Glob|File::Glob>
  670 
  671 =head1 AUTHOR
  672 
  673 The I<File::GlobMapper> module was written by Paul Marquess, F<pmqs@cpan.org>.
  674 
  675 =head1 COPYRIGHT AND LICENSE
  676 
  677 Copyright (c) 2005 Paul Marquess. All rights reserved.
  678 This program is free software; you can redistribute it and/or
  679 modify it under the same terms as Perl itself.