"Fossies" - the Fresh Open Source Software Archive

Member "sitecrunch_1.0/site_crunch.pl" (26 Sep 2010, 11037 Bytes) of package /linux/www/sitecrunch_1.0.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Perl source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "site_crunch.pl" see the Fossies "Dox" file reference documentation.

    1 #!/usr/bin/perl
    2 
    3 ################################################################################
    4 # Copyright 2010 CIRT, Inc.
    5 #
    6 # Author: Chris Sullo / sullo@cirt.net
    7 #
    8 # http://cirt.net/SiteCrunch
    9 #
   10 # License: http://www.opensource.org/licenses/rpl1.5.txt
   11 # 
   12 # Unless explicitly acquired and licensed from Licensor under another
   13 # license, the contents of this file are subject to the Reciprocal Public
   14 # License ("RPL") Version 1.5, or subsequent versions as allowed by the RPL,
   15 # and You may not copy or use this file in either source code or executable
   16 # form, except in compliance with the terms and conditions of the RPL.
   17 # 
   18 # All software distributed under the RPL is provided strictly on an "AS
   19 # IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESS OR IMPLIED, AND
   20 # LICENSOR HEREBY DISCLAIMS ALL SUCH WARRANTIES, INCLUDING WITHOUT
   21 # LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
   22 # PURPOSE, QUIET ENJOYMENT, OR NON-INFRINGEMENT. See the RPL for specific
   23 # language governing rights and limitations under the RPL.
   24 ################################################################################
   25 
   26 use strict;
   27 use File::Find;
   28 use Getopt::Long;
   29 use File::Copy::Recursive qw(dircopy);
   30 use JavaScript::Minifier;
   31 use CSS::Minifier;
   32 use HTML::Clean;
   33 
   34 use vars qw/%OPTIONS %PROG/;
   35 $OPTIONS{'options'} = join(" ", @ARGV);
   36 
   37 ################################################################################
   38 ## Program locations
   39 ## variable subs:
   40 # *INFILE* = name of input file (helpful for over-write)
   41 
   42 $PROG{'png'}->{'exe'}     = '/usr/local/bin/pngout';
   43 $PROG{'png'}->{'options'} = "";
   44 
   45 $PROG{'jpg'}->{'exe'}     = '/usr/local/bin/jpegtran';
   46 $PROG{'jpg'}->{'options'} = "-copy none -progressive -outfile *INFILE*";
   47 
   48 parse_options();
   49 
   50 ################################################################################
   51 
   52 if ($OPTIONS{'logfile'} ne '') { 
   53     open(LOG, ">>$OPTIONS{'logfile'}") || die print "Error opening '$OPTIONS{'logfile'}: $!\n";
   54     logentry("*****************************************");
   55     logentry("Starting $0 with options: $OPTIONS{'options'}");
   56     }
   57 
   58 # Mirror?
   59 if ($OPTIONS{'mirror'} ne '') {
   60     if (copy_tree($OPTIONS{'startdir'}, $OPTIONS{'mirror'})) {
   61         $OPTIONS{'sourcedir'} = $OPTIONS{'mirror'};
   62         }
   63     else {
   64         logentry("ERROR: Unable to mirror directory.");
   65         exit;
   66         }
   67     }
   68 else {
   69     $OPTIONS{'sourcedir'} = $OPTIONS{'startdir'};
   70     }
   71 
   72 my $old_size = `du -c -k $OPTIONS{'sourcedir'}|grep total|cut -f 1`;
   73 chomp($old_size);
   74 my ($total, $processed) = 0;
   75 
   76 # Now process directory
   77 find(\&process, $OPTIONS{'sourcedir'});
   78 
   79 my $new_size = `du -c -k $OPTIONS{'sourcedir'}|grep total|cut -f 1`;
   80 chomp($new_size);
   81 my $diff = $old_size - $new_size;
   82 my $perc = ($new_size / $old_size) * 10;
   83 $perc =~ s/((?:[0-9]+)?\.[0-9]{2}).*$/$1/;
   84 logentry("\nFiles:\t\t$processed/$total");
   85 logentry("Old size:\t$old_size kb");
   86 logentry("New size:\t$new_size kb");
   87 logentry("Diff:\t-$diff kb ($perc\%)");
   88 
   89 logentry("$0 ending");
   90 logentry("*****************************************");
   91 if ($OPTIONS{'logfile'} ne '') {
   92     close(LOG);
   93     }
   94 exit;
   95 ################################################################################
   96 
   97 sub process {
   98     my $file = $_;
   99     $total++;
  100 
  101     # Skip anything not matching our regex
  102     my $fullpath = $File::Find::dir . "/" . $file;
  103     if (($OPTIONS{'skipregex'} ne '') && ($fullpath =~ /$OPTIONS{'skipregex'}/)) {
  104         logentry("Skipping:\t$fullpath (regex match)");
  105         next;
  106         }
  107 
  108     my $type = get_file_type($file);
  109     if (($OPTIONS{'process'}->{$type} eq 1) && ($type ne '') && ($file !~ /^\./)) {
  110 
  111         #process the file
  112         $file =~ s/([ &'"])/\\$1/g;
  113         $processed++;
  114         logentry("Processing:\t$fullpath");
  115         if ($type eq 'js') {
  116             jsminify($file);
  117             }
  118         elsif ($type eq 'css') {
  119             cssminify($file);
  120             }
  121         elsif ($type eq 'htm') {
  122             htmminify($file);
  123             }
  124         else {
  125             my $opts = $PROG{$type}->{'options'};
  126             $opts =~ s/\*INFILE\*/$file/g;
  127             `$PROG{$type}->{'exe'} $opts $file`;
  128             }
  129         }
  130     else {
  131         logentry("Skipping:\t$fullpath");
  132         }
  133     }
  134 
  135 sub htmminify {
  136     my ($infile, $outfile) = @_;
  137     if ($infile eq '') { return; }
  138     if ($outfile eq '') { $outfile = $infile; }
  139     $outfile .= ".min";
  140 
  141     open(OUTFILE, ">$outfile") or die print "Unable to open '$infile': $!\n";
  142     open(INFILE,  "<$infile")  or die print "Unable to open '$infile': $!\n";
  143     my @F = <INFILE>;
  144     close(INFILE);
  145     my $in = join("", @F);
  146 
  147     if ($OPTIONS{'htmltidy'}) {
  148         my $tidy = HTML::Tidy->new({ 'bare' => 1,
  149                                      'output-html' => 1,
  150                                      'drop-empty-paras' => 0,
  151                                      'hide-endtags' => 1,
  152                                      'join-classes' => 1,
  153                                      'join-styles'  => 1,
  154                                      'literal-attributes' => 1,
  155                                      'hide-comments' => 1,
  156                                      'wrap' => 0,
  157                                      'quote-ampersand' => 0,
  158                                      'indent-spaces' => 0,
  159                                      'doctype'    => 'omit',
  160                                      'clean'      => 1,
  161                                      'word-2000' => 1
  162                                      }
  163                                    );
  164                                     # 'preserve-entities' => 1, # disabled due to HTML::Tidy bug
  165         $in = $tidy->clean($in);
  166         }
  167 
  168     my $h = new HTML::Clean(\$in);
  169     $h->strip({ 'whitespace'    => 1,
  170                 'comments'      => 1,
  171                 'dequote'       => 1,
  172                 'defcolor'      => 1,
  173                 'javascript'    => 1,
  174                 'htmldefaults'  => 1,
  175                 'lowercasetags' => 1,
  176                 'meta'          => "GENERATOR FORMATTER",
  177                 'emptytags'     => "b i font center",
  178                 }
  179               );
  180     my $data = $h->data();
  181     print OUTFILE $$data;
  182 
  183     close(OUTFILE);
  184     rename($outfile, $infile);
  185     }
  186 
  187 sub cssminify {
  188     my ($infile, $outfile) = @_;
  189     if ($infile eq '') { return; }
  190     if ($outfile eq '') { $outfile = $infile; }
  191     $outfile .= ".min";
  192     open(INFILE,  $infile)     or die print "Unable to open '$infile': $!\n";
  193     open(OUTFILE, ">$outfile") or die print "Unable to open '$infile': $!\n";
  194     CSS::Minifier::minify(input => *INFILE, outfile => *OUTFILE);
  195     close(INFILE);
  196     close(OUTFILE);
  197     rename($outfile, $infile);
  198     }
  199 
  200 sub jsminify {
  201     my ($infile, $outfile) = @_;
  202     if ($infile eq '') { return; }
  203     if ($outfile eq '') { $outfile = $infile; }
  204     $outfile .= ".min";
  205     open(INFILE,  $infile)     or die print "Unable to open '$infile': $!\n";
  206     open(OUTFILE, ">$outfile") or die print "Unable to open '$infile': $!\n";
  207     JavaScript::Minifier::minify(input => *INFILE, outfile => *OUTFILE);
  208     close(INFILE);
  209     close(OUTFILE);
  210     rename($outfile, $infile);
  211     }
  212 
  213 sub is_exec {
  214     my $exe = $_[0] || return 0;
  215     if (!-e $exe) { return 0; }
  216     if (!-r $exe) { return 0; }
  217     if (!-x $exe) { return 0; }
  218     return 1;
  219     }
  220 
  221 sub get_file_type {
  222     my $file = $_[0] || return "";
  223     if ($file =~ /\.png$/i)   { return "png"; }
  224     if ($file =~ /\.gif$/i)   { return "gif"; }
  225     if ($file =~ /\.jpe?g$/i) { return "jpg"; }
  226     if ($file =~ /\.js$/i)    { return "js"; }
  227     if ($file =~ /\.css$/i)   { return "css"; }
  228     if ($file =~ /\.html?$/i) { return "htm"; }
  229     return "";
  230     }
  231 
  232 sub parse_options {
  233     GetOptions("dir=s"       => \$OPTIONS{'startdir'},
  234                "mirror=s"    => \$OPTIONS{'mirror'},
  235                "type=s"      => \$OPTIONS{'type'},
  236                "checksetup"  => \$OPTIONS{'checksetup'},
  237                "skipregex=s" => \$OPTIONS{'skipregex'},
  238                "htmltidy"    => \$OPTIONS{'htmltidy'},
  239                "logfile=s"   => \$OPTIONS{'logfile'}
  240                );
  241 
  242     checksetup();
  243     if ($OPTIONS{'startdir'} eq '') { usage(); }
  244     if ($OPTIONS{'type'} eq '') { $OPTIONS{'type'} = "all"; }
  245     set_type_regex($OPTIONS{'type'});
  246 
  247     # run skipregex through something to make sure it's valid
  248     if ($OPTIONS{'skipregex'} ne '') {
  249         $OPTIONS{'startdir'} =~ /$OPTIONS{'skipregex'}/;
  250         }
  251 
  252     if (!-d $OPTIONS{'startdir'}) {
  253         logentry("ERROR: $OPTIONS{'startdir'} is not a directory.");
  254         exit;
  255         }
  256     if (!-r $OPTIONS{'startdir'}) {
  257         logentry("ERROR: $OPTIONS{'startdir'} is not readable.");
  258         exit;
  259         }
  260 
  261     if (($OPTIONS{'mirror'} ne '') && (-e $OPTIONS{'mirror'})) {
  262         logentry("ERROR: Mirror dir $OPTIONS{'mirror'} exists.");
  263         exit;
  264         }
  265     }
  266 
  267 sub checksetup {
  268     my $errors = 0;
  269     print "Checking required software...\n" if $OPTIONS{'checksetup'};
  270 
  271     # Check requirements
  272     foreach my $type (keys %PROG) {
  273         if (!is_exec($PROG{$type}->{'exe'})) {
  274             logentry("$PROG{$type}->{'exe'} is not ok (check that it exists and is executable)");
  275             $errors = 1;
  276             }
  277         }
  278 
  279     if ($errors) {
  280         print "**Not all requirement components are set up properly.**\n";
  281         }
  282 
  283     if ($OPTIONS{'htmltidy'} || $OPTIONS{'checksetup'}) {
  284         print "Checking optional software... \n" if $OPTIONS{'checksetup'};
  285         use HTML::Tidy;
  286         }
  287 
  288     if ($errors || $OPTIONS{'checksetup'}) {
  289         exit;
  290         }
  291     }
  292 
  293 sub copy_tree {
  294     my $src = $_[0] || return 0;
  295     my $dst = $_[1] || return 0;
  296     logentry("Attempting to copy $src to $dst");
  297     my $total = dircopy($src, $dst);
  298     logentry("Copied $total files from $src to $dst");
  299     return 1;
  300     }
  301 
  302 sub logentry {
  303     my $entry = join("", @_);
  304     print "$entry\n" unless $OPTIONS{'quiet'};
  305     if ($OPTIONS{'logfile'} ne '') {
  306         print LOG "$entry\n";
  307     }
  308     return;
  309     }
  310 
  311 sub set_type_regex {
  312     my $types = $_[0] || return;
  313     if (($types eq 'all') || ($types =~ /jpe?g/i)) { $OPTIONS{'process'}->{'jpg'} = 1; }
  314     if (($types eq 'all') || ($types =~ /png/i))   { $OPTIONS{'process'}->{'png'} = 1; }
  315     if (($types eq 'all') || ($types =~ /js/i))    { $OPTIONS{'process'}->{'js'}  = 1; }
  316     if (($types eq 'all') || ($types =~ /css/i))   { $OPTIONS{'process'}->{'css'} = 1; }
  317     if (($types eq 'all') || ($types =~ /html?/i)) { $OPTIONS{'process'}->{'htm'} = 1; }
  318     }
  319 
  320 sub usage {
  321     print "$0\n";
  322     print " -checksetup Check for required components\n";
  323     print " -dir+*      directory to start in\n";
  324     print " -htmltidy   run Tidy against HTML files\n";
  325     print " -logfile+   flog file\n";
  326     print " -mirror+    mirror start directory to here (*all* files copied)\n";
  327     print " -skipregex+ don't process files/dirs that match this regex\n";
  328     print " -type+      type of file to process (or csv list) (jpg/png/css/htm/js or all)\n";
  329     print "\n";
  330     print " + requires value\n";
  331     print " * required option\n";
  332     exit;
  333     }