#!/usr/bin/perl

$headerpath = "../../header.htm"; # path to your header
$footerpath = "../../footer.htm";  # path to your footer

#~ DO NOT EDIT BELOW THIS LINE ======================================

# Get FORM data #
if ($ENV{'REQUEST_METHOD'} eq "GET") {
  $buffer = $ENV{'QUERY_STRING'};
} else {
  read(STDIN, $buffer, $ENV{'CONTENT_LENGTH'});
}
@pairs = split(/&/, $buffer);
foreach $pair (@pairs) {
  ($name, $value) = split(/=/, $pair);
  $value =~ tr/+/ /;
  $value =~ s/%([a-fA-F0-9][a-fA-F0-9])/pack("C", hex($1))/eg;
  $FORM{$name} = $value;
}

$url = $FORM{'url'};
$min = $FORM{'min'};

use LWP::Simple;
$page = get $url;

print "Content-type: text/html\n\n";

$page =~ s/(\n|\r|\t)//isg;
$page =~ s/<script.+?<\/script>//isg;
$page =~ s/<.+?>/ /isg;
$page =~ s/&nbp;|&nbsp;|&copy;/ /isg;
$page =~ s/\.|\/|\:|\||\|\!|\@|\,|\|\?|\)|\(|\%/ /isg;
$page =~ s/\s+/ /isg;
$page = lc($page);

@pairs = split(/ /, $page);
foreach (@pairs) {
	$seen{$_}++;
    }

$uniq_words = keys %seen;

open (TEMP, $headerpath);
 $header = join ('', <TEMP>);
 close (TEMP); 
 open (TEMP, $footerpath);
 $footer = join ('', <TEMP>);
 close (TEMP); 

print $header;

print "There are <b>$uniq_words</b> unique words in <b>$url</b><br>Showing words with more then $min characters in length<br><br>\n";
print "<table border=0 align=center cellpadding=0 cellspacing=0 bgcolor=\"\#CCCCCC\"><tr><td><table width=300 border=0 cellspacing=1 cellpadding=2><tr bgcolor=\"\#FFFFFF\"><td align=center><b>keyword</b></td><td align=center><b>word count</b></td><td align=center><b>%</b></td></tr>\n";
foreach $word ( sort { $seen{$b} <=> $seen{$a} } keys %seen) {
	if (length($word) >= $min) {
		$procent = int(($seen{$word}/$uniq_words)*100*100)/100;
		print "<tr bgcolor=\"\#FFFFFF\"><td>$word</td><td align=center>$seen{$word}</td><td align=center>$procent \%</td></tr>\n";
	}
}
print "</table></td></tr></table><br><br>Script by <a href=http://www.web4future.com target=_blank>Web4Future.com</a>";
print $footer;
