-# Loads the given URL, returns: $http, $head, $body.
-#
-sub get_url_1($;$) {
- my ($url, $referer) = @_;
-
- $url =~ s@^feed:@http:@si;
- if (! ($url =~ m@^http://@i)) {
- error ("not an HTTP URL: $url");
- }
-
- my ($url_proto, $dummy, $serverstring, $path) = split(/\//, $url, 4);
- $path = "" unless $path;
-
- my ($them,$port) = split(/:/, $serverstring);
- $port = 80 unless $port;
-
- my $them2 = $them;
- my $port2 = $port;
- if ($http_proxy) {
- $serverstring = $http_proxy if $http_proxy;
- $serverstring =~ s@^[a-z]+://@@;
- ($them2,$port2) = split(/:/, $serverstring);
- $port2 = 80 unless $port2;
- }
-
- my ($remote, $iaddr, $paddr, $proto, $line);
- $remote = $them2;
- if ($port2 =~ /\D/) { $port2 = getservbyname($port2, 'tcp') }
- if (!$port2) {
- error ("unrecognised port in $url");
- }
-
- $iaddr = inet_aton($remote);
- error ("host not found: $remote") unless ($iaddr);
-
- $paddr = sockaddr_in($port2, $iaddr);
-
-
- my $head = "";
- my $body = "";
-
- $proto = getprotobyname('tcp');
- if (!socket(S, PF_INET, SOCK_STREAM, $proto)) {
- error ("socket: $!");
- }
- if (!connect(S, $paddr)) {
- error ("connect($serverstring): $!");
- }
-
- select(S); $| = 1; select(STDOUT);
-
- my $user_agent = "$progname/$version";
-
- my $hdrs = ("GET " . ($http_proxy ? $url : "/$path") . " HTTP/1.0\r\n" .
- "Host: $them\r\n" .
- "User-Agent: $user_agent\r\n");
- if ($referer) {
- $hdrs .= "Referer: $referer\r\n";
- }
- $hdrs .= "\r\n";
-
- if ($verbose > 3) {
- foreach (split('\r?\n', $hdrs)) {
- print STDERR " ==> $_\n";
- }
- }
- print S $hdrs;
- my $http = <S> || "";
-
- $_ = $http;
- s/[\r\n]+$//s;
- print STDERR " <== $_\n" if ($verbose > 3);
-
- while (<S>) {
- $head .= $_;
- s/[\r\n]+$//s;
- last if m@^$@;
- print STDERR " <== $_\n" if ($verbose > 3);
- }
-
- print STDERR " <== \n" if ($verbose > 4);
- my $lines = 0;
- while (<S>) {
- s/\r\n/\n/gs;
- print STDERR " <== $_" if ($verbose > 4);
- $body .= $_;
- $lines++;
- }
-
- print STDERR " <== [ body ]: $lines lines, " . length($body) . " bytes\n"
- if ($verbose == 4);
-
- close S;
-
- if (!$http) {
- error ("null response: $url");
- }
-
- return ( $http, $head, $body );
-}
-
-
-# Loads the given URL, processes redirects, returns (content-type, body).
-#
-sub get_url($;$) {
- my ($url, $referer) = @_;
-
- print STDERR "$progname: loading $url\n" if ($verbose > 2);
-
- my $orig_url = $url;
- my $loop_count = 0;
- my $max_loop_count = 10;
-
- do {
- my ( $http, $head, $body ) = get_url_1 ($url, $referer);
-
- $http =~ s/[\r\n]+$//s;
-
- if ( $http =~ m@^HTTP/[0-9.]+ 30[123]@ ) {
- $_ = $head;
-
- my ( $location ) = m@^location:[ \t]*(.*)$@im;
- if ( $location ) {
- $location =~ s/[\r\n]$//;
-
- print STDERR "$progname: redirect from $url to $location\n"
- if ($verbose > 3);
-
- $referer = $url;
- $url = $location;
-
- if ($url =~ m@^/@) {
- $referer =~ m@^(http://[^/]+)@i;
- $url = $1 . $url;
- } elsif (! ($url =~ m@^[a-z]+:@i)) {
- $_ = $referer;
- s@[^/]+$@@g if m@^http://[^/]+/@i;
- $_ .= "/" if m@^http://[^/]+$@i;
- $url = $_ . $url;
- }
-
- } else {
- error ("no Location with \"$http\"");
- }
-
- if ($loop_count++ > $max_loop_count) {
- error ("too many redirects ($max_loop_count) from $orig_url");
- }
-
- } elsif ( $http =~ m@^HTTP/[0-9.]+ ([4-9][0-9][0-9].*)$@ ) {
- error ("failed: $1 ($url)");
-
- } else {
- my $ct = 'text/plain';
- $ct = $1 if ($head =~ m/^content-type:\s*([^\s]+)/mi);
- return ($ct, $body);
- }
- } while (1);
-}
-
-