#!/usr/bin/perl
# -*- perl -*-

sub rebase_url {
    local($tag, $uri, $rest) = @_;

    if ($uri =~ /^http:/) {
        $u = $uri;
    } elsif ($uri =~ /^[a-z]+:/) {
        if ($tag) {
            return "$tag=\"$uri\"$rest";
        } else {
            return $uri;
        }
    } elsif ($uri =~ m@^/@) {
        $u = $urlhead . $uri;
    } else {
        $u = $urlpath . $uri;
        0 while $u =~ s@/\./@/@g;
        0 while $u =~ s@/[^/]+/\.\./@/@g;
        0 while $u =~ s@/[^/]+/\.\.$@/@g;
        $u = $urlhead . $u;
    }

    $u =~ s/([^a-zA-Z0-9\._\#-])/sprintf("%%%2x", ord($1));/ge;
    $u = $me . $u;

    if ($tag) {
        return "$tag=\"$u\"$rest";
    } else {
        return $u;
    }
}

sub canonical_url {
    local($base, $uri) = @_;
    local($urlhead, $urlpath) = $base =~ m@([^/]+/+[^/]+)(.*)@;
    $urlpath =~ s@/[^/]+$@/@;
    $urlpath = "/" unless $urlpath;

    if ($uri =~ /^[a-z]+:/) {
        return $uri;
    } elsif ($uri =~ m@^/@) {
        $u = $urlhead . $uri;
    } else {
        $u = $urlpath . $uri;
        0 while $u =~ s@/\./@/@g;
        0 while $u =~ s@/[^/]+/\.\./@/@g;
        0 while $u =~ s@/[^/]+/\.\.$@/@g;
        $u = $urlhead . $u;
    }
    return $u;
}

#-----------------------------------------------------------------------------

sub webget {
    local($url) = @_;

    if ($times_webgot++ > 5) {
        return "<h1>Error</h1>too many levels of HTTP redirection\n";
    }

    ($host,$port,$uri) = $url =~ m@http://([^/:]+)(:\d+)?(.*)@;
#    $port =~ s/://;
#    $port = 80 unless $port;
#    $uri = "/" unless $uri;

    ($name,$aliases,$type,$len,$addr) = gethostbyname("delorie.com");
    if (! $addr) {
        return "<h1>Error</h1>Host $host unknown";
    }
    $them = pack($sockaddr, &AF_INET, 9000, $addr);

    unless (socket(S, &AF_INET, &SOCK_STREAM, $proto)) {
        return "<h1>Error</h1>Unable to create socket";
    }

    unless (connect(S, $them)) {
        return "<h1>Error</h1>Unable to connect: $!";
    }

    select(S); $| = 1; select(STDOUT);

    print "webgot: $url\n" unless $ENV{'SERVER_NAME'};
    $webgot_url = $url;
    print S "GET $url HTTP/1.0\n";
    print S "Host: $host\n";
    print S "User-Agent: $agent\n" if $agent;
    if ($ENV{"HTTP_PRAGMA"} =~ /no-cache/) {
        print S "Pragma: no-cache\n";
    }
    print S "\n";

    $line = scalar(<S>);
    ($webstatus) = $line =~ m@ (\d+)@;
    if ($webstatus ne "200") {
        $resp = "\n<h1>Error</h1>Server returned error code $webstatus<pre>";
        $resp .= $line;
        while (<S>) {
            $resp .= $_;
        }
        close(S);
        if ($webstatus eq '302' || $webstatus eq '301') {
            ($loc) = $resp =~ m@Location:\s*(.*\S)@;
            $loc = &canonical_url($url, $loc);
            return &webget($loc);
        }
        return $resp;
    }

    # Read the response header
    while (<S>) {
        $web_header .= $_ unless /Content-length/i;
        last unless /\S/;
    }

    $resp = "";
    # Read the data
    while (<S>) {
        $resp .= $_;
    }

    close(S);
    return $resp;
}

1;

