Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

170 lines
3.8 KiB

  1. # $Id: AnyDBM_File.pm,v 1.9 1998/01/06 10:07:00 aas Exp $
  2. package WWW::RobotRules::AnyDBM_File;
  3. require WWW::RobotRules;
  4. @ISA = qw(WWW::RobotRules);
  5. $VERSION = sprintf("%d.%02d", q$Revision: 1.9 $ =~ /(\d+)\.(\d+)/);
  6. use Carp ();
  7. use AnyDBM_File;
  8. use Fcntl;
  9. use strict;
  10. =head1 NAME
  11. WWW::RobotRules::AnyDBM_File - Persistent RobotRules
  12. =head1 SYNOPSIS
  13. require WWW::RobotRules::AnyDBM_File;
  14. require LWP::RobotUA;
  15. # Create a robot useragent that uses a diskcaching RobotRules
  16. my $rules = new WWW::RobotRules::AnyDBM_File 'my-robot/1.0', 'cachefile';
  17. my $ua = new WWW::RobotUA 'my-robot/1.0', '[email protected]', $rules;
  18. # Then just use $ua as usual
  19. $res = $ua->request($req);
  20. =head1 DESCRIPTION
  21. This is a subclass of I<WWW::RobotRules> that uses the AnyDBM_File
  22. package to implement persistent diskcaching of F<robots.txt> and host
  23. visit information.
  24. The constructor (the new() method) takes an extra argument specifying
  25. the name of the DBM file to use. If the DBM file already exists, then
  26. you can specify undef as agent name as the name can be obtained from
  27. the DBM database.
  28. =cut
  29. sub new
  30. {
  31. my ($class, $ua, $file) = @_;
  32. Carp::croak('WWW::RobotRules::AnyDBM_File filename required') unless $file;
  33. my $self = bless { }, $class;
  34. $self->{'filename'} = $file;
  35. tie %{$self->{'dbm'}}, 'AnyDBM_File', $file, O_CREAT|O_RDWR, 0640
  36. or Carp::croak("Can't open $file: $!");
  37. if ($ua) {
  38. $self->agent($ua);
  39. } else {
  40. # Try to obtain name from DBM file
  41. $ua = $self->{'dbm'}{"|ua-name|"};
  42. Carp::croak("No agent name specified") unless $ua;
  43. }
  44. $self;
  45. }
  46. sub agent {
  47. my($self, $newname) = @_;
  48. my $old = $self->{'dbm'}{"|ua-name|"};
  49. if (defined $newname) {
  50. $newname =~ s!/?\s*\d+.\d+\s*$!!; # loose version
  51. unless ($old && $old eq $newname) {
  52. # Old info is now stale.
  53. my $file = $self->{'filename'};
  54. untie %{$self->{'dbm'}};
  55. tie %{$self->{'dbm'}}, 'AnyDBM_File', $file, O_TRUNC|O_RDWR, 0640;
  56. $self->{'dbm'}{"|ua-name|"} = $newname;
  57. }
  58. }
  59. $old;
  60. }
  61. sub no_visits {
  62. my ($self, $netloc) = @_;
  63. my $t = $self->{'dbm'}{"$netloc|vis"};
  64. return 0 unless $t;
  65. (split(/;\s*/, $t))[0];
  66. }
  67. sub last_visit {
  68. my ($self, $netloc) = @_;
  69. my $t = $self->{'dbm'}{"$netloc|vis"};
  70. return undef unless $t;
  71. (split(/;\s*/, $t))[1];
  72. }
  73. sub fresh_until {
  74. my ($self, $netloc, $fresh) = @_;
  75. my $old = $self->{'dbm'}{"$netloc|exp"};
  76. if ($old) {
  77. $old =~ s/;.*//; # remove cleartext
  78. }
  79. if (defined $fresh) {
  80. $fresh .= "; " . localtime($fresh);
  81. $self->{'dbm'}{"$netloc|exp"} = $fresh;
  82. }
  83. $old;
  84. }
  85. sub visit {
  86. my($self, $netloc, $time) = @_;
  87. $time ||= time;
  88. my $count = 0;
  89. my $old = $self->{'dbm'}{"$netloc|vis"};
  90. if ($old) {
  91. my $last;
  92. ($count,$last) = split(/;\s*/, $old);
  93. $time = $last if $last > $time;
  94. }
  95. $count++;
  96. $self->{'dbm'}{"$netloc|vis"} = "$count; $time; " . localtime($time);
  97. }
  98. sub push_rules {
  99. my($self, $netloc, @rules) = @_;
  100. my $cnt = 1;
  101. $cnt++ while $self->{'dbm'}{"$netloc|r$cnt"};
  102. foreach (@rules) {
  103. $self->{'dbm'}{"$netloc|r$cnt"} = $_;
  104. $cnt++;
  105. }
  106. }
  107. sub clear_rules {
  108. my($self, $netloc) = @_;
  109. my $cnt = 1;
  110. while ($self->{'dbm'}{"$netloc|r$cnt"}) {
  111. delete $self->{'dbm'}{"$netloc|r$cnt"};
  112. $cnt++;
  113. }
  114. }
  115. sub rules {
  116. my($self, $netloc) = @_;
  117. my @rules = ();
  118. my $cnt = 1;
  119. while (1) {
  120. my $rule = $self->{'dbm'}{"$netloc|r$cnt"};
  121. last unless $rule;
  122. push(@rules, $rule);
  123. $cnt++;
  124. }
  125. @rules;
  126. }
  127. sub dump
  128. {
  129. }
  130. 1;
  131. =head1 SEE ALSO
  132. L<WWW::RobotRules>, L<LWP::RobotUA>
  133. =head1 AUTHORS
  134. Hakan Ardo E<lt>hakan@munin.ub2.lu.se>, Gisle Aas E<lt>aas@sn.no>
  135. =cut