#!/usr/bin/perl
use HTTP::Request;
use LWP::UserAgent;
use HTML::LinkExtor;
use URI::URL;
use File::Find;
use strict;
my $reposit = "/tmp/test" . $$ . ".txt"; # This is only a temporary file.
my $base_url = "http://www.brie.com;
my $base_dir = "/home/www/htdocs";
if (! -f "$base_dir/testlinks.html") {
open (OUTFILE , ">$base_dir/testlinks.html");
} else {
die "$base_dir/testlinks.html already exists.\n Remove or move it before running this\n";
}
open (REPOSIT,">$reposit");
finddepth(\&wanted, $base_dir);
close (REPOSIT);
open (INREPOSIT,"<$reposit"); # Pages to evaluate
my $oldhandle = select(OUTFILE);
$~ = "BEGIN_HTML";
select ($oldhandle);
write OUTFILE;
my $i=0;
while (my $test_page = <INREPOSIT>) {
chomp $test_page;
my $request = new HTTP::Request("GET","$test_page");
my $ua = new LWP::UserAgent;
my $response = $ua -> request($request);
if ($i % 5 == 0) { print OUTFILE "\n"};
$i++;
printf OUTFILE ("<DT>%5d",$i);
if ($response->is_success) {
print OUTFILE qq{ <A HREF="},$test_page, qq{">},$test_page,"</A>\n";
my $parser = HTML::LinkExtor->new(\&test_urls_cb,"$test_page");
$parser->parse($response->content);
} else {
print OUTFILE "--NOT--$test_page\n";
}
}
$oldhandle = select(OUTFILE);
$~ = "END_HTML";
select ($oldhandle);
write OUTFILE;
unlink ($reposit) or warn "Could not unlink $reposit: $!\n";
sub get_test_pages_cb {
my ($tag, %attributes) = @_;
return unless ($tag =~ m/a/i);
foreach my $name (sort keys %attributes) {
my $url = $attributes{$name};
next unless ($url =~ m/^http:/i);
print REPOSIT $url,"\n";
}
}
sub test_urls_cb {
my ($tag, %attributes) = @_;
return unless ($tag =~ m/a/i);
foreach my $name (sort keys %attributes) {
my $url = $attributes{$name};
next unless ($url =~ m/^http:/i);
# print $url,"\n";
my $request = new HTTP::Request("GET","$url");
my $ua = new LWP::UserAgent;
my $response = $ua -> request($request);
if ($response->is_success) {
# Add commands here is link is successful
# print OUTFILE "<DD>$url\n";
} else {
print OUTFILE "<DD> --BAD--$url\n";
}
}
}
sub wanted {
if (/\.html$/) {
my $temp = $File::Find::name;
$temp =~ s/\Q$base_dir//;
print REPOSIT $base_url , $temp , "\n";
}
}
format BEGIN_HTML =
<html>
<head>
<title>Dead Links</title>
<body>
<h1>@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< Fix Your Links Map</h1>
$base_url
Lynx Rules! <BR>
<PRE>
.
format END_HTML =
</PRE>
<P>
Originally developed by: <br>
<address>
<a href="mailto:brian@brie.com">Brian Lavender</a>
"@"
</address>
</html>
.