You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
170 lines
3.8 KiB
170 lines
3.8 KiB
# $Id: AnyDBM_File.pm,v 1.9 1998/01/06 10:07:00 aas Exp $
|
|
|
|
package WWW::RobotRules::AnyDBM_File;
|
|
|
|
require WWW::RobotRules;
|
|
@ISA = qw(WWW::RobotRules);
|
|
$VERSION = sprintf("%d.%02d", q$Revision: 1.9 $ =~ /(\d+)\.(\d+)/);
|
|
|
|
use Carp ();
|
|
use AnyDBM_File;
|
|
use Fcntl;
|
|
use strict;
|
|
|
|
=head1 NAME
|
|
|
|
WWW::RobotRules::AnyDBM_File - Persistent RobotRules
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
require WWW::RobotRules::AnyDBM_File;
|
|
require LWP::RobotUA;
|
|
|
|
# Create a robot useragent that uses a diskcaching RobotRules
|
|
my $rules = new WWW::RobotRules::AnyDBM_File 'my-robot/1.0', 'cachefile';
|
|
my $ua = new WWW::RobotUA 'my-robot/1.0', '[email protected]', $rules;
|
|
|
|
# Then just use $ua as usual
|
|
$res = $ua->request($req);
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
This is a subclass of I<WWW::RobotRules> that uses the AnyDBM_File
|
|
package to implement persistent diskcaching of F<robots.txt> and host
|
|
visit information.
|
|
|
|
The constructor (the new() method) takes an extra argument specifying
|
|
the name of the DBM file to use. If the DBM file already exists, then
|
|
you can specify undef as agent name as the name can be obtained from
|
|
the DBM database.
|
|
|
|
=cut
|
|
|
|
sub new
|
|
{
|
|
my ($class, $ua, $file) = @_;
|
|
Carp::croak('WWW::RobotRules::AnyDBM_File filename required') unless $file;
|
|
|
|
my $self = bless { }, $class;
|
|
$self->{'filename'} = $file;
|
|
tie %{$self->{'dbm'}}, 'AnyDBM_File', $file, O_CREAT|O_RDWR, 0640
|
|
or Carp::croak("Can't open $file: $!");
|
|
|
|
if ($ua) {
|
|
$self->agent($ua);
|
|
} else {
|
|
# Try to obtain name from DBM file
|
|
$ua = $self->{'dbm'}{"|ua-name|"};
|
|
Carp::croak("No agent name specified") unless $ua;
|
|
}
|
|
|
|
$self;
|
|
}
|
|
|
|
sub agent {
|
|
my($self, $newname) = @_;
|
|
my $old = $self->{'dbm'}{"|ua-name|"};
|
|
if (defined $newname) {
|
|
$newname =~ s!/?\s*\d+.\d+\s*$!!; # loose version
|
|
unless ($old && $old eq $newname) {
|
|
# Old info is now stale.
|
|
my $file = $self->{'filename'};
|
|
untie %{$self->{'dbm'}};
|
|
tie %{$self->{'dbm'}}, 'AnyDBM_File', $file, O_TRUNC|O_RDWR, 0640;
|
|
$self->{'dbm'}{"|ua-name|"} = $newname;
|
|
}
|
|
}
|
|
$old;
|
|
}
|
|
|
|
sub no_visits {
|
|
my ($self, $netloc) = @_;
|
|
my $t = $self->{'dbm'}{"$netloc|vis"};
|
|
return 0 unless $t;
|
|
(split(/;\s*/, $t))[0];
|
|
}
|
|
|
|
sub last_visit {
|
|
my ($self, $netloc) = @_;
|
|
my $t = $self->{'dbm'}{"$netloc|vis"};
|
|
return undef unless $t;
|
|
(split(/;\s*/, $t))[1];
|
|
}
|
|
|
|
sub fresh_until {
|
|
my ($self, $netloc, $fresh) = @_;
|
|
my $old = $self->{'dbm'}{"$netloc|exp"};
|
|
if ($old) {
|
|
$old =~ s/;.*//; # remove cleartext
|
|
}
|
|
if (defined $fresh) {
|
|
$fresh .= "; " . localtime($fresh);
|
|
$self->{'dbm'}{"$netloc|exp"} = $fresh;
|
|
}
|
|
$old;
|
|
}
|
|
|
|
sub visit {
|
|
my($self, $netloc, $time) = @_;
|
|
$time ||= time;
|
|
|
|
my $count = 0;
|
|
my $old = $self->{'dbm'}{"$netloc|vis"};
|
|
if ($old) {
|
|
my $last;
|
|
($count,$last) = split(/;\s*/, $old);
|
|
$time = $last if $last > $time;
|
|
}
|
|
$count++;
|
|
$self->{'dbm'}{"$netloc|vis"} = "$count; $time; " . localtime($time);
|
|
}
|
|
|
|
sub push_rules {
|
|
my($self, $netloc, @rules) = @_;
|
|
my $cnt = 1;
|
|
$cnt++ while $self->{'dbm'}{"$netloc|r$cnt"};
|
|
|
|
foreach (@rules) {
|
|
$self->{'dbm'}{"$netloc|r$cnt"} = $_;
|
|
$cnt++;
|
|
}
|
|
}
|
|
|
|
sub clear_rules {
|
|
my($self, $netloc) = @_;
|
|
my $cnt = 1;
|
|
while ($self->{'dbm'}{"$netloc|r$cnt"}) {
|
|
delete $self->{'dbm'}{"$netloc|r$cnt"};
|
|
$cnt++;
|
|
}
|
|
}
|
|
|
|
sub rules {
|
|
my($self, $netloc) = @_;
|
|
my @rules = ();
|
|
my $cnt = 1;
|
|
while (1) {
|
|
my $rule = $self->{'dbm'}{"$netloc|r$cnt"};
|
|
last unless $rule;
|
|
push(@rules, $rule);
|
|
$cnt++;
|
|
}
|
|
@rules;
|
|
}
|
|
|
|
sub dump
|
|
{
|
|
}
|
|
|
|
1;
|
|
|
|
=head1 SEE ALSO
|
|
|
|
L<WWW::RobotRules>, L<LWP::RobotUA>
|
|
|
|
=head1 AUTHORS
|
|
|
|
Hakan Ardo E<lt>[email protected]>, Gisle Aas E<lt>[email protected]>
|
|
|
|
=cut
|
|
|