#!/usr/bin/perl -w

# spam-stat.pl -- process the mail.log file created by Spamassassin and
# figure out some simple stats.
#
# Licensed under the GPL.  Copyright 2004 by John R. Ackermann (jra@febo.com)
# Version 0.9 -- 30 June 2004

# Make sure we've got a file list
@ARGV or die "Usage: spam-stat.pl mail1.log [...]\n";

$clean = 0;
$spam = 0;
$near_spam = 0;
$near_clean = 0;
$big_spam = 0;
$max_score = 0;
$min_score = 5;
$clean_score = 0;
$spam_score = 0;
$big_spam_score = 0;

# Loop through the files
foreach my $file (@ARGV) {
        open F, "< $file" or die "Can't open $file : $!";
	while ($reading=<F>) {
		$score = 0;
		if ($reading =~ "clean") {
			@buf = split (" ",$reading);
			@buf2 = split ("/",$buf[7]);
			$score = substr($buf2[0],1);
			$clean_score += $score;
			if ($score < $min_score) {
				$min_score = $score;
				}
			if ($score >= 4.0) {
				$near_spam++;
				}
			$clean++;
			}
		if ($reading =~ "identified") {
			@buf = split (" ",$reading);
			@buf2 = split ("/",$buf[7]);
			$score = substr($buf2[0],1);
			if ($score <= 6.0) {
				$near_clean++;
				}
			if ($score >= 10) {
				$big_spam_score += $score;
				$big_spam++;
				} else {
				$spam++;
				$spam_score += $score;
				}
			if ($score > $max_score) {
				$max_score = $score;
				}
			}
		};
	close F;
	$total = $big_spam + $spam + $clean;
	$spam_percent = (($big_spam+$spam)/$total)*100;
	$clean_avg = $clean_score/$clean;
	$spam_avg = $spam_score/$spam;
	$big_spam_avg = $big_spam_score/$big_spam;
	print "\n";
	print "Processed ",$total," incoming messages.\n";
	print "\n";
	print "Of these, ",$big_spam + $spam," (";
	printf "%.2f",$spam_percent;
	print "%) were spam, while ",$clean," were not.\n";
	print "\n";
	print "Of the spam, ",$big_spam," had a score of 10 or higher, while ";
	print $spam," did not.\n";
	print "\n";
	print "The maximum spam score was ",$max_score,"\n";
	printf "The average 'big spam' score was %.2f ",$big_spam_avg;
	printf "while the average\nregular spam score was %.2f.\n",$spam_avg;
	print "\n";
	print "The minimum clean score was ",$min_score;
	printf " while the average clean score was %.2f.\n",$clean_avg;
	print "\n";
	print "Of the ",$total," spam messages, ",$near_clean," had ";
	print "a score of 6.0 or lower.\n";
	print "Of the ",$clean," clean messages, ",$near_spam;
	print " had a score of 4.0 or higher.\n";
	print "\n";
	}
exit 0;
