You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
211 lines
5.6 KiB
211 lines
5.6 KiB
package URI::Heuristic;
|
|
|
|
# $Id: Heuristic.pm,v 4.12 2001/01/09 20:44:54 gisle Exp $
|
|
|
|
=head1 NAME
|
|
|
|
uf_uristr - Expand URI using heuristics
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
use URI::Heuristic qw(uf_uristr);
|
|
$u = uf_uristr("perl"); # http://www.perl.com
|
|
$u = uf_uristr("www.sol.no/sol"); # http://www.sol.no/sol
|
|
$u = uf_uristr("aas"); # http://www.aas.no
|
|
$u = uf_uristr("ftp.funet.fi"); # ftp://ftp.funet.fi
|
|
$u = uf_uristr("/etc/passwd"); # file:/etc/passwd
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
This module provides functions that expand strings into real absolute
|
|
URIs using some builtin heuristics. Strings that already represent
|
|
absolute URIs (i.e. start with a C<scheme:> part) are never modified
|
|
and are returned unchanged. The main use of these functions are to
|
|
allow abbreviated URIs similar to what many web browsers allow for URIs
|
|
typed in by the user.
|
|
|
|
The following functions are provided:
|
|
|
|
=over 4
|
|
|
|
=item uf_uristr($str)
|
|
|
|
The uf_uristr() function will try to make the string passed as argument
|
|
into a proper absolute URI string. The "uf_" prefix stands for "User
|
|
Friendly". Under MacOS, it assumes that any string with a common URL
|
|
scheme (http, ftp, etc.) is a URL rather than a local path. So don't name
|
|
your volumes after common URL schemes and expect uf_uristr() to construct
|
|
valid file: URL's on those volumes for you, because it won't.
|
|
|
|
=item uf_uri($str)
|
|
|
|
This functions work the same way as uf_uristr() but it will
|
|
return a C<URI> object.
|
|
|
|
=back
|
|
|
|
=head1 ENVIRONMENT
|
|
|
|
If the hostname portion of a URI does not contain any dots, then
|
|
certain qualified guesses will be made. These guesses are governed be
|
|
the following two environment variables.
|
|
|
|
=over 10
|
|
|
|
=item COUNTRY
|
|
|
|
This is the two letter country code (ISO 3166) for your location. If
|
|
the domain name of your host ends with two letters, then it is taken
|
|
to be the default country. See also L<Locale::Country>.
|
|
|
|
=item URL_GUESS_PATTERN
|
|
|
|
Contain a space separated list of URL patterns to try. The string
|
|
"ACME" is for some reason used as a placeholder for the host name in
|
|
the URL provided. Example:
|
|
|
|
URL_GUESS_PATTERN="www.ACME.no www.ACME.se www.ACME.com"
|
|
export URL_GUESS_PATTERN
|
|
|
|
Specifying URL_GUESS_PATTERN disables any guessing rules based on
|
|
country. An empty URL_GUESS_PATTERN disables any guessing that
|
|
involves host name lookups.
|
|
|
|
=back
|
|
|
|
=head1 COPYRIGHT
|
|
|
|
Copyright 1997-1998, Gisle Aas
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
modify it under the same terms as Perl itself.
|
|
|
|
=cut
|
|
|
|
use strict;
|
|
|
|
use vars qw(@EXPORT_OK $VERSION $MY_COUNTRY %LOCAL_GUESSING $DEBUG);
|
|
|
|
require Exporter;
|
|
*import = \&Exporter::import;
|
|
@EXPORT_OK = qw(uf_uri uf_uristr uf_url uf_urlstr);
|
|
$VERSION = sprintf("%d.%02d", q$Revision: 4.12 $ =~ /(\d+)\.(\d+)/);
|
|
|
|
eval {
|
|
require Net::Domain;
|
|
my $fqdn = Net::Domain::hostfqdn();
|
|
$MY_COUNTRY = lc($1) if $fqdn =~ /\.([a-zA-Z]{2})$/;
|
|
|
|
# Some other heuristics to guess country? Perhaps looking
|
|
# at some environment variable (LANG, LC_ALL, ???)
|
|
$MY_COUNTRY = $ENV{COUNTRY} if exists $ENV{COUNTRY};
|
|
};
|
|
|
|
%LOCAL_GUESSING =
|
|
(
|
|
'us' => [qw(www.ACME.gov www.ACME.mil)],
|
|
'uk' => [qw(www.ACME.co.uk www.ACME.org.uk www.ACME.ac.uk)],
|
|
'au' => [qw(www.ACME.com.au www.ACME.org.au www.ACME.edu.au)],
|
|
'il' => [qw(www.ACME.co.il www.ACME.org.il www.ACME.net.il)],
|
|
# send corrections and new entries to <[email protected]>
|
|
);
|
|
|
|
|
|
sub uf_uristr ($)
|
|
{
|
|
local($_) = @_;
|
|
print STDERR "uf_uristr: resolving $_\n" if $DEBUG;
|
|
return unless defined;
|
|
|
|
s/^\s+//;
|
|
s/\s+$//;
|
|
|
|
if (/^(www|web|home)\./) {
|
|
$_ = "http://$_";
|
|
|
|
} elsif (/^(ftp|gopher|news|wais|http|https)\./) {
|
|
$_ = "$1://$_";
|
|
|
|
} elsif ($^O ne "MacOS" &&
|
|
(m,^/, || # absolute file name
|
|
m,^\.\.?/, || # relative file name
|
|
m,^[a-zA-Z]:[/\\],) # dosish file name
|
|
)
|
|
{
|
|
$_ = "file:$_";
|
|
|
|
} elsif ($^O eq "MacOS" && m/:/) {
|
|
# potential MacOS file name
|
|
unless (m/^(ftp|gopher|news|wais|http|https|mailto):/) {
|
|
require URI::file;
|
|
my $a = URI::file->new($_)->as_string;
|
|
$_ = ($a =~ m/^file:/) ? $a : "file:$a";
|
|
}
|
|
} elsif (/^\w+([\.\-]\w+)*\@(\w+\.)+\w{2,3}$/) {
|
|
$_ = "mailto:$_";
|
|
|
|
} elsif (!/^[.+\-\w]+:/) { # no scheme specified
|
|
if (s/^([-\w]+(?:\.[-\w]+)*)([\/:\?\#]|$)/$2/) {
|
|
my $host = $1;
|
|
|
|
if ($host !~ /\./ && $host ne "localhost") {
|
|
my @guess;
|
|
if (exists $ENV{URL_GUESS_PATTERN}) {
|
|
@guess = map { s/\bACME\b/$host/; $_ }
|
|
split(' ', $ENV{URL_GUESS_PATTERN});
|
|
} else {
|
|
if ($MY_COUNTRY) {
|
|
my $special = $LOCAL_GUESSING{$MY_COUNTRY};
|
|
if ($special) {
|
|
my @special = @$special;
|
|
push(@guess, map { s/\bACME\b/$host/; $_ }
|
|
@special);
|
|
} else {
|
|
push(@guess, "www.$host.$MY_COUNTRY");
|
|
}
|
|
}
|
|
push(@guess, map "www.$host.$_",
|
|
"com", "org", "net", "edu", "int");
|
|
}
|
|
|
|
|
|
my $guess;
|
|
for $guess (@guess) {
|
|
print STDERR "uf_uristr: gethostbyname('$guess')..."
|
|
if $DEBUG;
|
|
if (gethostbyname($guess)) {
|
|
print STDERR "yes\n" if $DEBUG;
|
|
$host = $guess;
|
|
last;
|
|
}
|
|
print STDERR "no\n" if $DEBUG;
|
|
}
|
|
}
|
|
$_ = "http://$host$_";
|
|
|
|
} else {
|
|
# pure junk, just return it unchanged...
|
|
|
|
}
|
|
}
|
|
print STDERR "uf_uristr: ==> $_\n" if $DEBUG;
|
|
|
|
$_;
|
|
}
|
|
|
|
sub uf_uri ($)
|
|
{
|
|
require URI;
|
|
URI->new(uf_uristr($_[0]));
|
|
}
|
|
|
|
# legacy
|
|
*uf_urlstr = \*uf_uristr;
|
|
|
|
sub uf_url ($)
|
|
{
|
|
require URI::URL;
|
|
URI::URL->new(uf_uristr($_[0]));
|
|
}
|
|
|
|
1;
|