# $Id: Protocol.pm,v 1.36 2000/04/09 11:20:48 gisle Exp $ package LWP::Protocol; =head1 NAME LWP::Protocol - Base class for LWP protocols =head1 SYNOPSIS package LWP::Protocol::foo; require LWP::Protocol; @ISA=qw(LWP::Protocol); =head1 DESCRIPTION This class is used a the base class for all protocol implementations supported by the LWP library. When creating an instance of this class using C, and you get an initialised subclass appropriate for that access method. In other words, the LWP::Protocol::create() function calls the constructor for one of its subclasses. All derived LWP::Protocol classes need to override the request() method which is used to service a request. The overridden method can make use of the collect() function to collect together chunks of data as it is received. The following methods and functions are provided: =over 4 =cut ##################################################################### require LWP::MemberMixin; @ISA = qw(LWP::MemberMixin); $VERSION = sprintf("%d.%02d", q$Revision: 1.36 $ =~ /(\d+)\.(\d+)/); use strict; use Carp (); use HTTP::Status (); use HTTP::Response; require HTML::HeadParser; my %ImplementedBy = (); # scheme => classname =item $prot = LWP::Protocol->new() The LWP::Protocol constructor is inherited by subclasses. As this is a virtual base class this method should B be called directly. =cut sub new { my($class) = @_; my $self = bless { 'timeout' => 0, 'parse_head' => 1, }, $class; $self; } =item $prot = LWP::Protocol::create($url) Create an object of the class implementing the protocol to handle the given scheme. This is a function, not a method. It is more an object factory than a constructor. This is the function user agents should use to access protocols. =cut sub create { my $scheme = shift; my $impclass = LWP::Protocol::implementor($scheme) or Carp::croak("Protocol scheme '$scheme' is not supported"); # hand-off to scheme specific implementation sub-class return $impclass->new($scheme); } =item $class = LWP::Protocol::implementor($scheme, [$class]) Get and/or set implementor class for a scheme. Returns '' if the specified scheme is not supported. =cut sub implementor { my($scheme, $impclass) = @_; if ($impclass) { $ImplementedBy{$scheme} = $impclass; } my $ic = $ImplementedBy{$scheme}; return $ic if $ic; return '' unless $scheme =~ /^([.+\-\w]+)$/; # check valid URL schemes $scheme = $1; # untaint $scheme =~ s/[.+\-]/_/g; # make it a legal module name # scheme not yet known, look for a 'use'd implementation $ic = "LWP::Protocol::$scheme"; # default location $ic = "LWP::Protocol::nntp" if $scheme eq 'news'; #XXX ugly hack no strict 'refs'; # check we actually have one for the scheme: unless (@{"${ic}::ISA"}) { # try to autoload it eval "require $ic"; if ($@) { if ($@ =~ /Can't locate/) { #' #emacs get confused by ' $ic = ''; } else { die "$@\n"; } } } $ImplementedBy{$scheme} = $ic if $ic; $ic; } =item $prot->request(...) $response = $protocol->request($request, $proxy, undef); $response = $protocol->request($request, $proxy, '/tmp/sss'); $response = $protocol->request($request, $proxy, \&callback, 1024); Dispactches a request over the protocol, and returns a response object. This method needs to be overridden in subclasses. Referer to L for description of the arguments. =cut sub request { my($self, $request, $proxy, $arg, $size, $timeout) = @_; Carp::croak('LWP::Protocol::request() needs to be overridden in subclasses'); } =item $prot->timeout($seconds) Get and set the timeout value in seconds =item $prot->parse_head($yesno) Should we initialize response headers from the section of HTML documents. =cut sub timeout { shift->_elem('timeout', @_); } sub parse_head { shift->_elem('parse_head', @_); } sub max_size { shift->_elem('max_size', @_); } =item $prot->collect($arg, $response, $collector) Called to collect the content of a request, and process it appropriately into a scalar, file, or by calling a callback. If $arg is undefined, then the content is stored within the $response. If $arg is a simple scalar, then $arg is interpreted as a file name and the content is written to this file. If $arg is a reference to a routine, then content is passed to this routine. The $collector is a routine that will be called and which is reponsible for returning pieces (as ref to scalar) of the content to process. The $collector signals EOF by returning a reference to an empty sting. The return value from collect() is the $response object reference. B We will only use the callback or file argument if $response->is_success(). This avoids sendig content data for redirects and authentization responses to the callback which would be confusing. =cut sub collect { my ($self, $arg, $response, $collector) = @_; my $content; my($parse_head, $timeout, $max_size) = @{$self}{qw(parse_head timeout max_size)}; my $parser; if ($parse_head && $response->content_type eq 'text/html') { $parser = HTML::HeadParser->new($response->{'_headers'}); } my $content_size = 0; if (!defined($arg) || !$response->is_success) { # scalar while ($content = &$collector, length $$content) { if ($parser) { $parser->parse($$content) or undef($parser); } LWP::Debug::debug("read " . length($$content) . " bytes"); $response->add_content($$content); $content_size += length($$content); if ($max_size && $content_size > $max_size) { LWP::Debug::debug("Aborting because size limit exceeded"); my $tot = $response->header("Content-Length") || 0; $response->header("X-Content-Range", "bytes 0-$content_size/$tot"); last; } } } elsif (!ref($arg)) { # filename open(OUT, ">$arg") or return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR, "Cannot write to '$arg': $!"); binmode(OUT); local($\) = ""; # ensure standard $OUTPUT_RECORD_SEPARATOR while ($content = &$collector, length $$content) { if ($parser) { $parser->parse($$content) or undef($parser); } LWP::Debug::debug("read " . length($$content) . " bytes"); print OUT $$content; $content_size += length($$content); if ($max_size && $content_size > $max_size) { LWP::Debug::debug("Aborting because size limit exceeded"); my $tot = $response->header("Content-Length") || 0; $response->header("X-Content-Range", "bytes 0-$content_size/$tot"); last; } } close(OUT); } elsif (ref($arg) eq 'CODE') { # read into callback while ($content = &$collector, length $$content) { if ($parser) { $parser->parse($$content) or undef($parser); } LWP::Debug::debug("read " . length($$content) . " bytes"); eval { &$arg($$content, $response, $self); }; if ($@) { chomp($@); $response->header('X-Died' => $@); last; } } } else { return HTTP::Response->new(&HTTP::Status::RC_INTERNAL_SERVER_ERROR, "Unexpected collect argument '$arg'"); } $response; } =item $prot->collect_once($arg, $response, $content) Can be called when the whole response content is available as $content. This will invoke collect() with a collector callback that returns a reference to $content the first time and an empty string the next. =cut sub collect_once { my($self, $arg, $response) = @_; my $content = \ $_[3]; my $first = 1; $self->collect($arg, $response, sub { return $content if $first--; return \ ""; }); } 1; =head1 SEE ALSO Inspect the F and F files for examples of usage. =head1 COPYRIGHT Copyright 1995-2000 Gisle Aas. This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut