#!/usr/public/bin/perl
# $Id: testlinks,v 1.2 1994/09/21 01:23:18 fielding Exp $
# ---------------------------------------------------------------------------
# GET and extract the links from the URLs passed as arguments, test them
# using HEAD requests, and output an HTML index fragment describing the
# results. Relative links are resolved relative to the URL $base.
#
# Note that this is a non-recursive, completely inefficient version
# of MOMspider's index without the visual cues for problem links. See
# for more information.
#
# 21 Apr 1994 (RTF): Initial Version
# 12 Jul 1994 (RTF): Rewritten to work with libwww-perl
# 20 Jul 1994 (RTF): The default From header is now set by www.pl
# and &www'set_def_header() is called to set User-Agent.
# Added to libwww-perl distribution.
# 20 Sep 1994 (RTF): Added initialization of $headers
#
# Created by Roy Fielding to test MOMspider and the libwww-perl system
#-----------------------------------------------------------------
if ($libloc = $ENV{'LIBWWW_PERL'}) { unshift(@INC, $libloc); }
require "www.pl";
require "wwwurl.pl";
require "wwwhtml.pl";
require "wwwerror.pl";
require "wwwdates.pl";
$pname = $0; # Method = program name
$pname =~ s#^.*/([^/]+)$#$1#; # lose the path
&www'set_def_header('http', 'User-Agent', "$pname/0.3");
# Set up User-Agent: header
$pwd = ( $ENV{'PWD'} || $ENV{'cwd'} || '' );
$base = "file://localhost$pwd/"; # Set up initial Base URL
$vidx = 'tl0001';
#-----------------------------------------------------------------
while ($ARGV[0])
{
$rel = shift;
$url = &wwwurl'absolute($base, $rel);
$content = '';
$headers = '';
%headers = ();
$response = &www'request('GET', $url, *headers, *content, 30);
@TestLinks = ();
@TestAbs = ();
@TestOrig = ();
@TestType = ();
&wwwhtml'extract_links($url, *headers, *content,
*TestLinks, *TestAbs, *TestOrig, *TestType);
# Now print out the index entry for this URL
$nextbit = ($headers{'title'} || $url);
print "
\n";
$vidx++;
print "$response $wwwerror'RespMessage{$response}\n",
"\n$url";
if ($nextbit = ($headers{'uri'} || $headers{'location'}))
{
print "
\nURI: $nextbit";
}
if ($nextbit = $headers{'last-modified'})
{
print "
\nLast-modified: $nextbit";
}
if ($nextbit = $headers{'expires'})
{
print "
\nExpires: $nextbit";
}
if ($nextbit = $headers{'reply-to'})
{
print "
\nReply-to: $nextbit";
}
print "\n";
undef $content;
undef $headers;
undef %headers;
if ($TestLinks[0])
{
print "\n";
foreach $idx (0 .. $#TestLinks)
{
$nextbit = (($TestType[$idx] eq 'L') && 'Link') ||
(($TestType[$idx] eq 'I') && 'Image') ||
(($TestType[$idx] eq 'Q') && 'Query');
print "- $nextbit\n";
$vidx++;
&test_child($url, $TestLinks[$idx], $TestAbs[$idx],
$TestOrig[$idx]);
}
print "
\n";
}
print "\n";
undef @TestLinks;
undef @TestAbs;
undef @TestOrig;
undef @TestType;
}
exit(0);
sub test_child
{
local($parent, $link, $labs, $lorig) = @_;
local($response, $nextbit) = 0;
local($content) = '';
local($headers) = '';
local(%headers) = ();
if ($parent) { $headers{'Referer'} = $parent; }
if ($link =~ /^http/) { sleep(20); }
$response = &www'request('HEAD', $link, *headers, *content, 30);
print " $response $wwwerror'RespMessage{$response}\n",
" \n $lorig";
if ($nextbit = ($headers{'uri'} || $headers{'location'}))
{
print "
\n URI: $nextbit";
}
if ($nextbit = $headers{'last-modified'})
{
print "
\n Last-modified: $nextbit";
}
if ($nextbit = $headers{'expires'})
{
print "
\n Expires: $nextbit";
}
if ($nextbit = $headers{'reply-to'})
{
print "
\n Reply-To: $nextbit";
}
print "\n";
}