#!/usr/bin/perl

use HTTP::Request;
use LWP::UserAgent;
use HTML::LinkExtor;
use URI::URL;
use File::Find;
use strict;

my $reposit = "/tmp/test" . $$ . ".txt"; # This is only a temporary file.
my $base_url = "http://www.brie.com;
my $base_dir = "/home/www/htdocs";

if (! -f "$base_dir/testlinks.html") {
open (OUTFILE , ">$base_dir/testlinks.html");
} else {
die "$base_dir/testlinks.html already exists.\n Remove or move it before running this\n";
}

open (REPOSIT,">$reposit");

finddepth(\&wanted, $base_dir);

close (REPOSIT);

open (INREPOSIT,"<$reposit"); # Pages to evaluate



my $oldhandle = select(OUTFILE);
$~ = "BEGIN_HTML";
select ($oldhandle);
write OUTFILE;

my $i=0;

while (my $test_page = <INREPOSIT>) {
chomp $test_page;

my $request = new HTTP::Request("GET","$test_page");
my $ua = new LWP::UserAgent;
my $response = $ua -> request($request);

if ($i % 5 == 0) { print OUTFILE "\n"};
$i++;
printf OUTFILE ("<DT>%5d",$i);

if ($response->is_success) {
print OUTFILE qq{ <A HREF="},$test_page, qq{">},$test_page,"</A>\n";
my $parser = HTML::LinkExtor->new(\&test_urls_cb,"$test_page");
$parser->parse($response->content);
} else {
print OUTFILE "--NOT--$test_page\n";
}
}

$oldhandle = select(OUTFILE);
$~ = "END_HTML";
select ($oldhandle);
write OUTFILE;

unlink ($reposit) or warn "Could not unlink $reposit: $!\n";



sub get_test_pages_cb {
my ($tag, %attributes) = @_;
return unless ($tag =~ m/a/i);
foreach my $name (sort keys %attributes) {
my $url = $attributes{$name};
next unless ($url =~ m/^http:/i);
print REPOSIT $url,"\n";
}
}


sub test_urls_cb {
my ($tag, %attributes) = @_;
return unless ($tag =~ m/a/i);
foreach my $name (sort keys %attributes) {
my $url = $attributes{$name};
next unless ($url =~ m/^http:/i);
# print $url,"\n";
my $request = new HTTP::Request("GET","$url");
my $ua = new LWP::UserAgent;
my $response = $ua -> request($request);

if ($response->is_success) {
# Add commands here is link is successful
# print OUTFILE "<DD>$url\n";
} else {
print OUTFILE "<DD> --BAD--$url\n";
}
}
}


sub wanted {

if (/\.html$/) {
my $temp = $File::Find::name;
$temp =~ s/\Q$base_dir//;
print REPOSIT $base_url , $temp , "\n";
}

}

format BEGIN_HTML =
<html>
<head>
<title>Dead Links</title>
<body>
<h1>@<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< Fix Your Links Map</h1>
$base_url
Lynx Rules! <BR>
<PRE>
.

format END_HTML =
</PRE>
<P>
Originally developed by: <br>
<address>
<a href="mailto:brian@brie.com">Brian Lavender</a>
"@"
</address>
</html>
.