"Fossies" - the Fresh Open Source Software Archive

Member "install-tl-20200916/tlpkg/tlperl/site/lib/WWW/RobotRules/AnyDBM_File.pm" (5 Apr 2016, 3617 Bytes) of package /windows/misc/install-tl.zip:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Perl source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 package WWW::RobotRules::AnyDBM_File;
    2 
    3 require  WWW::RobotRules;
    4 @ISA = qw(WWW::RobotRules);
    5 $VERSION = "6.00";
    6 
    7 use Carp ();
    8 use AnyDBM_File;
    9 use Fcntl;
   10 use strict;
   11 
   12 =head1 NAME
   13 
   14 WWW::RobotRules::AnyDBM_File - Persistent RobotRules
   15 
   16 =head1 SYNOPSIS
   17 
   18  require WWW::RobotRules::AnyDBM_File;
   19  require LWP::RobotUA;
   20 
   21  # Create a robot useragent that uses a diskcaching RobotRules
   22  my $rules = WWW::RobotRules::AnyDBM_File->new( 'my-robot/1.0', 'cachefile' );
   23  my $ua = WWW::RobotUA->new( 'my-robot/1.0', 'me@foo.com', $rules );
   24 
   25  # Then just use $ua as usual
   26  $res = $ua->request($req);
   27 
   28 =head1 DESCRIPTION
   29 
   30 This is a subclass of I<WWW::RobotRules> that uses the AnyDBM_File
   31 package to implement persistent diskcaching of F<robots.txt> and host
   32 visit information.
   33 
   34 The constructor (the new() method) takes an extra argument specifying
   35 the name of the DBM file to use.  If the DBM file already exists, then
   36 you can specify undef as agent name as the name can be obtained from
   37 the DBM database.
   38 
   39 =cut
   40 
   41 sub new 
   42 { 
   43   my ($class, $ua, $file) = @_;
   44   Carp::croak('WWW::RobotRules::AnyDBM_File filename required') unless $file;
   45 
   46   my $self = bless { }, $class;
   47   $self->{'filename'} = $file;
   48   tie %{$self->{'dbm'}}, 'AnyDBM_File', $file, O_CREAT|O_RDWR, 0640
   49     or Carp::croak("Can't open $file: $!");
   50   
   51   if ($ua) {
   52       $self->agent($ua);
   53   }
   54   else {
   55       # Try to obtain name from DBM file
   56       $ua = $self->{'dbm'}{"|ua-name|"};
   57       Carp::croak("No agent name specified") unless $ua;
   58   }
   59 
   60   $self;
   61 }
   62 
   63 sub agent {
   64     my($self, $newname) = @_;
   65     my $old = $self->{'dbm'}{"|ua-name|"};
   66     if (defined $newname) {
   67     $newname =~ s!/?\s*\d+.\d+\s*$!!;  # loose version
   68     unless ($old && $old eq $newname) {
   69     # Old info is now stale.
   70         my $file = $self->{'filename'};
   71         untie %{$self->{'dbm'}};
   72         tie %{$self->{'dbm'}}, 'AnyDBM_File', $file, O_TRUNC|O_RDWR, 0640;
   73         %{$self->{'dbm'}} = ();
   74         $self->{'dbm'}{"|ua-name|"} = $newname;
   75     }
   76     }
   77     $old;
   78 }
   79 
   80 sub no_visits {
   81     my ($self, $netloc) = @_;
   82     my $t = $self->{'dbm'}{"$netloc|vis"};
   83     return 0 unless $t;
   84     (split(/;\s*/, $t))[0];
   85 }
   86 
   87 sub last_visit {
   88     my ($self, $netloc) = @_;
   89     my $t = $self->{'dbm'}{"$netloc|vis"};
   90     return undef unless $t;
   91     (split(/;\s*/, $t))[1];
   92 }
   93 
   94 sub fresh_until {
   95     my ($self, $netloc, $fresh) = @_;
   96     my $old = $self->{'dbm'}{"$netloc|exp"};
   97     if ($old) {
   98     $old =~ s/;.*//;  # remove cleartext
   99     }
  100     if (defined $fresh) {
  101     $fresh .= "; " . localtime($fresh);
  102     $self->{'dbm'}{"$netloc|exp"} = $fresh;
  103     }
  104     $old;
  105 }
  106 
  107 sub visit {
  108     my($self, $netloc, $time) = @_;
  109     $time ||= time;
  110 
  111     my $count = 0;
  112     my $old = $self->{'dbm'}{"$netloc|vis"};
  113     if ($old) {
  114     my $last;
  115     ($count,$last) = split(/;\s*/, $old);
  116     $time = $last if $last > $time;
  117     }
  118     $count++;
  119     $self->{'dbm'}{"$netloc|vis"} = "$count; $time; " . localtime($time);
  120 }
  121 
  122 sub push_rules {
  123     my($self, $netloc, @rules) = @_;
  124     my $cnt = 1;
  125     $cnt++ while $self->{'dbm'}{"$netloc|r$cnt"};
  126 
  127     foreach (@rules) {
  128     $self->{'dbm'}{"$netloc|r$cnt"} = $_;
  129     $cnt++;
  130     }
  131 }
  132 
  133 sub clear_rules {
  134     my($self, $netloc) = @_;
  135     my $cnt = 1;
  136     while ($self->{'dbm'}{"$netloc|r$cnt"}) {
  137     delete $self->{'dbm'}{"$netloc|r$cnt"};
  138     $cnt++;
  139     }
  140 }
  141 
  142 sub rules {
  143     my($self, $netloc) = @_;
  144     my @rules = ();
  145     my $cnt = 1;
  146     while (1) {
  147     my $rule = $self->{'dbm'}{"$netloc|r$cnt"};
  148     last unless $rule;
  149     push(@rules, $rule);
  150     $cnt++;
  151     }
  152     @rules;
  153 }
  154 
  155 sub dump
  156 {
  157 }
  158 
  159 1;
  160 
  161 =head1 SEE ALSO
  162 
  163 L<WWW::RobotRules>, L<LWP::RobotUA>
  164 
  165 =head1 AUTHORS
  166 
  167 Hakan Ardo E<lt>hakan@munin.ub2.lu.se>, Gisle Aas E<lt>aas@sn.no>
  168 
  169 =cut
  170