Leaked source code of windows server 2003
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

333 lines
8.2 KiB

  1. #
  2. # $Id: Simple.pm,v 1.33 2000/05/24 09:40:43 gisle Exp $
  3. =head1 NAME
  4. get, head, getprint, getstore, mirror - Procedural LWP interface
  5. =head1 SYNOPSIS
  6. perl -MLWP::Simple -e 'getprint "http://www.sn.no"'
  7. use LWP::Simple;
  8. $content = get("http://www.sn.no/")
  9. if (mirror("http://www.sn.no/", "foo") == RC_NOT_MODIFIED) {
  10. ...
  11. }
  12. if (is_success(getprint("http://www.sn.no/"))) {
  13. ...
  14. }
  15. =head1 DESCRIPTION
  16. This interface is intended for those who want a simplified view of the
  17. libwww-perl library. It should also be suitable for one-liners. If
  18. you need more control or access to the header fields in the requests
  19. sent and responses received you should use the full object oriented
  20. interface provided by the C<LWP::UserAgent> module.
  21. The following functions are provided (and exported) by this module:
  22. =over 3
  23. =item get($url)
  24. The get() function will fetch the document identified by the given URL
  25. and return it. It returns C<undef> if it fails. The $url argument can
  26. be either a simple string or a reference to a URI object.
  27. You will not be able to examine the response code or response headers
  28. (like 'Content-Type') when you are accessing the web using this
  29. function. If you need that information you should use the full OO
  30. interface (see L<LWP::UserAgent>).
  31. =item head($url)
  32. Get document headers. Returns the following 5 values if successful:
  33. ($content_type, $document_length, $modified_time, $expires, $server)
  34. Returns an empty list if it fails. In scalar context returns TRUE if
  35. successful.
  36. =item getprint($url)
  37. Get and print a document identified by a URL. The document is printed
  38. to STDOUT as data is received from the network. If the request fails,
  39. then the status code and message are printed on STDERR. The return
  40. value is the HTTP response code.
  41. =item getstore($url, $file)
  42. Gets a document identified by a URL and stores it in the file. The
  43. return value is the HTTP response code.
  44. =item mirror($url, $file)
  45. Get and store a document identified by a URL, using
  46. I<If-modified-since>, and checking the I<Content-Length>. Returns
  47. the HTTP response code.
  48. =back
  49. This module also exports the HTTP::Status constants and procedures.
  50. These can be used when you check the response code from getprint(),
  51. getstore() and mirror(). The constants are:
  52. RC_CONTINUE
  53. RC_SWITCHING_PROTOCOLS
  54. RC_OK
  55. RC_CREATED
  56. RC_ACCEPTED
  57. RC_NON_AUTHORITATIVE_INFORMATION
  58. RC_NO_CONTENT
  59. RC_RESET_CONTENT
  60. RC_PARTIAL_CONTENT
  61. RC_MULTIPLE_CHOICES
  62. RC_MOVED_PERMANENTLY
  63. RC_MOVED_TEMPORARILY
  64. RC_SEE_OTHER
  65. RC_NOT_MODIFIED
  66. RC_USE_PROXY
  67. RC_BAD_REQUEST
  68. RC_UNAUTHORIZED
  69. RC_PAYMENT_REQUIRED
  70. RC_FORBIDDEN
  71. RC_NOT_FOUND
  72. RC_METHOD_NOT_ALLOWED
  73. RC_NOT_ACCEPTABLE
  74. RC_PROXY_AUTHENTICATION_REQUIRED
  75. RC_REQUEST_TIMEOUT
  76. RC_CONFLICT
  77. RC_GONE
  78. RC_LENGTH_REQUIRED
  79. RC_PRECONDITION_FAILED
  80. RC_REQUEST_ENTITY_TOO_LARGE
  81. RC_REQUEST_URI_TOO_LARGE
  82. RC_UNSUPPORTED_MEDIA_TYPE
  83. RC_INTERNAL_SERVER_ERROR
  84. RC_NOT_IMPLEMENTED
  85. RC_BAD_GATEWAY
  86. RC_SERVICE_UNAVAILABLE
  87. RC_GATEWAY_TIMEOUT
  88. RC_HTTP_VERSION_NOT_SUPPORTED
  89. The HTTP::Status classification functions are:
  90. =over 3
  91. =item is_success($rc)
  92. True if response code indicated a successful request.
  93. =item is_error($rc)
  94. True if response code indicated that an error occured.
  95. =back
  96. The module will also export the LWP::UserAgent object as C<$ua> if you
  97. ask for it explicitly.
  98. The user agent created by this module will identify itself as
  99. "LWP::Simple/#.##" (where "#.##" is the libwww-perl version number)
  100. and will initialize its proxy defaults from the environment (by
  101. calling $ua->env_proxy).
  102. =head1 SEE ALSO
  103. L<LWP>, L<LWP::UserAgent>, L<HTTP::Status>, L<lwp-request>,
  104. L<lwp-mirror>
  105. =cut
  106. package LWP::Simple;
  107. use strict;
  108. use vars qw($ua %loop_check $FULL_LWP @EXPORT @EXPORT_OK $VERSION);
  109. require Exporter;
  110. @EXPORT = qw(get head getprint getstore mirror);
  111. @EXPORT_OK = qw($ua);
  112. # I really hate this. I was a bad idea to do it in the first place.
  113. # Wonder how to get rid of it??? (It even makes LWP::Simple 7% slower
  114. # for trivial tests)
  115. use HTTP::Status;
  116. push(@EXPORT, @HTTP::Status::EXPORT);
  117. $VERSION = sprintf("%d.%02d", q$Revision: 1.33 $ =~ /(\d+)\.(\d+)/);
  118. $FULL_LWP++ if grep {lc($_) eq "http_proxy"} keys %ENV;
  119. sub import
  120. {
  121. my $pkg = shift;
  122. my $callpkg = caller;
  123. if (grep $_ eq '$ua', @_) {
  124. $FULL_LWP++;
  125. _init_ua();
  126. }
  127. Exporter::export($pkg, $callpkg, @_);
  128. }
  129. sub _init_ua
  130. {
  131. require LWP;
  132. require LWP::UserAgent;
  133. require HTTP::Status;
  134. require HTTP::Date;
  135. $ua = new LWP::UserAgent; # we create a global UserAgent object
  136. my $ver = $LWP::VERSION = $LWP::VERSION; # avoid warning
  137. $ua->agent("LWP::Simple/$LWP::VERSION");
  138. $ua->env_proxy;
  139. }
  140. sub get ($)
  141. {
  142. %loop_check = ();
  143. goto \&_get;
  144. }
  145. sub get_old ($)
  146. {
  147. my($url) = @_;
  148. _init_ua() unless $ua;
  149. my $request = HTTP::Request->new(GET => $url);
  150. my $response = $ua->request($request);
  151. return $response->content if $response->is_success;
  152. return undef;
  153. }
  154. sub head ($)
  155. {
  156. my($url) = @_;
  157. _init_ua() unless $ua;
  158. my $request = HTTP::Request->new(HEAD => $url);
  159. my $response = $ua->request($request);
  160. if ($response->is_success) {
  161. return $response unless wantarray;
  162. return (scalar $response->header('Content-Type'),
  163. scalar $response->header('Content-Length'),
  164. HTTP::Date::str2time($response->header('Last-Modified')),
  165. HTTP::Date::str2time($response->header('Expires')),
  166. scalar $response->header('Server'),
  167. );
  168. }
  169. return;
  170. }
  171. sub getprint ($)
  172. {
  173. my($url) = @_;
  174. _init_ua() unless $ua;
  175. my $request = HTTP::Request->new(GET => $url);
  176. local($\) = ""; # ensure standard $OUTPUT_RECORD_SEPARATOR
  177. my $callback = sub { print $_[0] };
  178. if ($^O eq "MacOS") {
  179. $callback = sub { $_[0] =~ s/\015?\012/\n/g; print $_[0] }
  180. }
  181. my $response = $ua->request($request, $callback);
  182. unless ($response->is_success) {
  183. print STDERR $response->status_line, " <URL:$url>\n";
  184. }
  185. $response->code;
  186. }
  187. sub getstore ($$)
  188. {
  189. my($url, $file) = @_;
  190. _init_ua() unless $ua;
  191. my $request = HTTP::Request->new(GET => $url);
  192. my $response = $ua->request($request, $file);
  193. $response->code;
  194. }
  195. sub mirror ($$)
  196. {
  197. my($url, $file) = @_;
  198. _init_ua() unless $ua;
  199. my $response = $ua->mirror($url, $file);
  200. $response->code;
  201. }
  202. sub _get
  203. {
  204. my $url = shift;
  205. my $ret;
  206. if (!$FULL_LWP && $url =~ m,^http://([^/:\@]+)(?::(\d+))?(/\S*)?$,) {
  207. my $host = $1;
  208. my $port = $2 || 80;
  209. my $path = $3;
  210. $path = "/" unless defined($path);
  211. return _trivial_http_get($host, $port, $path);
  212. } else {
  213. _init_ua() unless $ua;
  214. my $request = HTTP::Request->new(GET => $url);
  215. my $response = $ua->request($request);
  216. return $response->is_success ? $response->content : undef;
  217. }
  218. }
  219. sub _trivial_http_get
  220. {
  221. my($host, $port, $path) = @_;
  222. #print "HOST=$host, PORT=$port, PATH=$path\n";
  223. require IO::Socket;
  224. local($^W) = 0;
  225. my $sock = IO::Socket::INET->new(PeerAddr => $host,
  226. PeerPort => $port,
  227. Proto => 'tcp',
  228. Timeout => 60) || return;
  229. $sock->autoflush;
  230. my $netloc = $host;
  231. $netloc .= ":$port" if $port != 80;
  232. print $sock join("\015\012" =>
  233. "GET $path HTTP/1.0",
  234. "Host: $netloc",
  235. "User-Agent: lwp-trivial/$VERSION",
  236. "", "");
  237. my $buf = "";
  238. my $n;
  239. 1 while $n = sysread($sock, $buf, 8*1024, length($buf));
  240. return undef unless defined($n);
  241. if ($buf =~ m,^HTTP/\d+\.\d+\s+(\d+)[^\012]*\012,) {
  242. my $code = $1;
  243. #print "CODE=$code\n$buf\n";
  244. if ($code =~ /^30[1237]/ && $buf =~ /\012Location:\s*(\S+)/) {
  245. # redirect
  246. my $url = $1;
  247. return undef if $loop_check{$url}++;
  248. return _get($url);
  249. }
  250. return undef unless $code =~ /^2/;
  251. $buf =~ s/.+?\015?\012\015?\012//s; # zap header
  252. }
  253. return $buf;
  254. }
  255. 1;