"Fossies" - the Fresh Open Source Software Archive

Member "install-tl-20200916/tlpkg/tlperl/site/lib/URI/Heuristic.pm" (5 Apr 2016, 6524 Bytes) of package /windows/misc/install-tl.zip:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Perl source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 package URI::Heuristic;
    2 
    3 =head1 NAME
    4 
    5 URI::Heuristic - Expand URI using heuristics
    6 
    7 =head1 SYNOPSIS
    8 
    9  use URI::Heuristic qw(uf_uristr);
   10  $u = uf_uristr("perl");             # http://www.perl.com
   11  $u = uf_uristr("www.sol.no/sol");   # http://www.sol.no/sol
   12  $u = uf_uristr("aas");              # http://www.aas.no
   13  $u = uf_uristr("ftp.funet.fi");     # ftp://ftp.funet.fi
   14  $u = uf_uristr("/etc/passwd");      # file:/etc/passwd
   15 
   16 =head1 DESCRIPTION
   17 
   18 This module provides functions that expand strings into real absolute
   19 URIs using some built-in heuristics.  Strings that already represent
   20 absolute URIs (i.e. that start with a C<scheme:> part) are never modified
   21 and are returned unchanged.  The main use of these functions is to
   22 allow abbreviated URIs similar to what many web browsers allow for URIs
   23 typed in by the user.
   24 
   25 The following functions are provided:
   26 
   27 =over 4
   28 
   29 =item uf_uristr($str)
   30 
   31 Tries to make the argument string
   32 into a proper absolute URI string.  The "uf_" prefix stands for "User 
   33 Friendly".  Under MacOS, it assumes that any string with a common URL 
   34 scheme (http, ftp, etc.) is a URL rather than a local path.  So don't name 
   35 your volumes after common URL schemes and expect uf_uristr() to construct 
   36 valid file: URL's on those volumes for you, because it won't.
   37 
   38 =item uf_uri($str)
   39 
   40 Works the same way as uf_uristr() but
   41 returns a C<URI> object.
   42 
   43 =back
   44 
   45 =head1 ENVIRONMENT
   46 
   47 If the hostname portion of a URI does not contain any dots, then
   48 certain qualified guesses are made.  These guesses are governed by
   49 the following environment variables:
   50 
   51 =over 10
   52 
   53 =item COUNTRY
   54 
   55 The two-letter country code (ISO 3166) for your location.  If
   56 the domain name of your host ends with two letters, then it is taken
   57 to be the default country. See also L<Locale::Country>.
   58 
   59 =item HTTP_ACCEPT_LANGUAGE, LC_ALL, LANG
   60 
   61 If COUNTRY is not set, these standard environment variables are
   62 examined and country (not language) information possibly found in them
   63 is used as the default country.
   64 
   65 =item URL_GUESS_PATTERN
   66 
   67 Contains a space-separated list of URL patterns to try.  The string
   68 "ACME" is for some reason used as a placeholder for the host name in
   69 the URL provided.  Example:
   70 
   71  URL_GUESS_PATTERN="www.ACME.no www.ACME.se www.ACME.com"
   72  export URL_GUESS_PATTERN
   73 
   74 Specifying URL_GUESS_PATTERN disables any guessing rules based on
   75 country.  An empty URL_GUESS_PATTERN disables any guessing that
   76 involves host name lookups.
   77 
   78 =back
   79 
   80 =head1 COPYRIGHT
   81 
   82 Copyright 1997-1998, Gisle Aas
   83 
   84 This library is free software; you can redistribute it and/or
   85 modify it under the same terms as Perl itself.
   86 
   87 =cut
   88 
   89 use strict;
   90 use warnings;
   91 
   92 use Exporter 5.57 'import';
   93 our @EXPORT_OK = qw(uf_uri uf_uristr uf_url uf_urlstr);
   94 our $VERSION = "4.20";
   95 
   96 our ($MY_COUNTRY, $DEBUG);
   97 
   98 sub MY_COUNTRY() {
   99     for ($MY_COUNTRY) {
  100     return $_ if defined;
  101 
  102     # First try the environment.
  103     $_ = $ENV{COUNTRY};
  104     return $_ if defined;
  105 
  106     # Try the country part of LC_ALL and LANG from environment
  107     my @srcs = ($ENV{LC_ALL}, $ENV{LANG});
  108     # ...and HTTP_ACCEPT_LANGUAGE before those if present
  109     if (my $httplang = $ENV{HTTP_ACCEPT_LANGUAGE}) {
  110         # TODO: q-value processing/ordering
  111         for $httplang (split(/\s*,\s*/, $httplang)) {
  112         if ($httplang =~ /^\s*([a-zA-Z]+)[_-]([a-zA-Z]{2})\s*$/) {
  113             unshift(@srcs, "${1}_${2}");
  114             last;
  115         }
  116         }
  117     }
  118     for (@srcs) {
  119         next unless defined;
  120         return lc($1) if /^[a-zA-Z]+_([a-zA-Z]{2})(?:[.@]|$)/;
  121     }
  122 
  123     # Last bit of domain name.  This may access the network.
  124     require Net::Domain;
  125     my $fqdn = Net::Domain::hostfqdn();
  126     $_ = lc($1) if $fqdn =~ /\.([a-zA-Z]{2})$/;
  127     return $_ if defined;
  128 
  129     # Give up.  Defined but false.
  130     return ($_ = 0);
  131     }
  132 }
  133 
  134 our %LOCAL_GUESSING =
  135 (
  136  'us' => [qw(www.ACME.gov www.ACME.mil)],
  137  'gb' => [qw(www.ACME.co.uk www.ACME.org.uk www.ACME.ac.uk)],
  138  'au' => [qw(www.ACME.com.au www.ACME.org.au www.ACME.edu.au)],
  139  'il' => [qw(www.ACME.co.il www.ACME.org.il www.ACME.net.il)],
  140  # send corrections and new entries to <gisle@aas.no>
  141 );
  142 # Backwards compatibility; uk != United Kingdom in ISO 3166
  143 $LOCAL_GUESSING{uk} = $LOCAL_GUESSING{gb};
  144 
  145 
  146 sub uf_uristr ($)
  147 {
  148     local($_) = @_;
  149     print STDERR "uf_uristr: resolving $_\n" if $DEBUG;
  150     return unless defined;
  151 
  152     s/^\s+//;
  153     s/\s+$//;
  154 
  155     if (/^(www|web|home)[a-z0-9-]*(?:\.|$)/i) {
  156     $_ = "http://$_";
  157 
  158     } elsif (/^(ftp|gopher|news|wais|https|http)[a-z0-9-]*(?:\.|$)/i) {
  159     $_ = lc($1) . "://$_";
  160 
  161     } elsif ($^O ne "MacOS" && 
  162         (m,^/,      ||          # absolute file name
  163          m,^\.\.?/, ||          # relative file name
  164          m,^[a-zA-Z]:[/\\],)    # dosish file name
  165         )
  166     {
  167     $_ = "file:$_";
  168 
  169     } elsif ($^O eq "MacOS" && m/:/) {
  170         # potential MacOS file name
  171     unless (m/^(ftp|gopher|news|wais|http|https|mailto):/) {
  172         require URI::file;
  173         my $a = URI::file->new($_)->as_string;
  174         $_ = ($a =~ m/^file:/) ? $a : "file:$a";
  175     }
  176     } elsif (/^\w+([\.\-]\w+)*\@(\w+\.)+\w{2,3}$/) {
  177     $_ = "mailto:$_";
  178 
  179     } elsif (!/^[a-zA-Z][a-zA-Z0-9.+\-]*:/) {      # no scheme specified
  180     if (s/^([-\w]+(?:\.[-\w]+)*)([\/:\?\#]|$)/$2/) {
  181         my $host = $1;
  182 
  183         my $scheme = "http";
  184         if (/^:(\d+)\b/) {
  185         # Some more or less well known ports
  186         if ($1 =~ /^[56789]?443$/) {
  187             $scheme = "https";
  188         } elsif ($1 eq "21") {
  189             $scheme = "ftp";
  190         }
  191         }
  192 
  193         if ($host !~ /\./ && $host ne "localhost") {
  194         my @guess;
  195         if (exists $ENV{URL_GUESS_PATTERN}) {
  196             @guess = map { s/\bACME\b/$host/; $_ }
  197                      split(' ', $ENV{URL_GUESS_PATTERN});
  198         } else {
  199             if (MY_COUNTRY()) {
  200             my $special = $LOCAL_GUESSING{MY_COUNTRY()};
  201             if ($special) {
  202                 my @special = @$special;
  203                 push(@guess, map { s/\bACME\b/$host/; $_ }
  204                                                @special);
  205             } else {
  206                 push(@guess, "www.$host." . MY_COUNTRY());
  207             }
  208             }
  209             push(@guess, map "www.$host.$_",
  210                          "com", "org", "net", "edu", "int");
  211         }
  212 
  213 
  214         my $guess;
  215         for $guess (@guess) {
  216             print STDERR "uf_uristr: gethostbyname('$guess.')..."
  217               if $DEBUG;
  218             if (gethostbyname("$guess.")) {
  219             print STDERR "yes\n" if $DEBUG;
  220             $host = $guess;
  221             last;
  222             }
  223             print STDERR "no\n" if $DEBUG;
  224         }
  225         }
  226         $_ = "$scheme://$host$_";
  227 
  228     } else {
  229         # pure junk, just return it unchanged...
  230 
  231     }
  232     }
  233     print STDERR "uf_uristr: ==> $_\n" if $DEBUG;
  234 
  235     $_;
  236 }
  237 
  238 sub uf_uri ($)
  239 {
  240     require URI;
  241     URI->new(uf_uristr($_[0]));
  242 }
  243 
  244 # legacy
  245 *uf_urlstr = \*uf_uristr;
  246 
  247 sub uf_url ($)
  248 {
  249     require URI::URL;
  250     URI::URL->new(uf_uristr($_[0]));
  251 }
  252 
  253 1;