#!/usr/bin/perl -w # ---------------- OSBF-Lua email feedback program -------------------- # # The spamfilter.lua program requires feedback when it # incorrectly classifies an email. The "normal" way is to reply to # the email to yourself with embedded re-classification commands, but # this isn't very easy to use for non-technophiles. # # This program makes the process easier, by scanning two Mbox folders for # messages to train as spam and as ham, respectively. (It should also work for # Maildir folders, but I haven't tested this.) The user drags messages that # have been mis-classified or require reinforcement into the appropriate # folder, and this program is run by a cron entry periodically. # # This program is loosely based on a program writted by John Johnston for # CRM114 which can be found here: # http://www.johnjohnston.org/fix-spam-classification.txt # # It is in turn based on a program writted by Michael J. Chudobiak # which works on Maildir instead of MBox folders, available at # http://www.avtechpulse.com/opensource/fix-spam-classification.txt # # This version was created by Christian Siefkes . # # v1.0 - 2006-08-26 - Original Version # v1.0.1 - 2006-09-09 - Adapted to OSBF-Lua v2.0.1 # v1.0.2 - 2007-10-09 - Small documentation fixes # # Feel free to use/modify this program any way you like. # ------------------------------------------------------------------- use strict; # to catch stupid errors use Mail::Box::Manager; # to manipulate mail folders # ---------- Configuration ------------------------------------------ # User's home directory (convenience variable, no need to change it) my $home = $ENV{"HOME"}; # The directory where the mail folders exist. # Frequently it's "$home/mail" but in my installation just the $home dir. my $maildir = "$home"; # The names of the mail folders containing ham-to-train and spam-to-train my $hamFolder = "TrainAsHam"; my $spamFolder = "TrainAsSpam"; # Your e-mail address (to send training messages to) -- the local part # (without @...) should usually be sufficient my $mailaddr = ""; # The password defined for training osbf my $passwd = ""; # File to write log messages to (typically in your local OSBF directory) my $logfile = "$home/.osbf-lua/train.log"; # chmod permission (there is no need to change this) my $filemode = 0600; # ---------- End of Configuration ----------------------------------- # Check configuration die "Configure your mail address and password" unless ($mailaddr && $passwd); # Open log file + train from both folders open(LOGFILE, ">>$logfile") or warn "Can't open logfile: $!"; train($hamFolder, "nonspam", "+", "H"); train($spamFolder, "spam", "-", "S"); close(LOGFILE); # Training subroutine sub train { # Arguments: # name of the folder to retrain # the correct class of the messages (nonspam or spam) # The labels used by OSBF-Lua for unsure and sure members of this class my($foldername, $class, $unsureLabel, $sureLabel) = @_; my $full = "$maildir/$foldername"; # See if the train folder exists. If so, proceed. if (-e $full) { # loop through the messages in the folder my $mgr = Mail::Box::Manager->new; my $folder = $mgr->open(folder => $full, access => 'rw', lock_method => 'DOTLOCK'); my ($msg, $msgid, $sfid, $scoreHeader, $orgLabel, $subject, $subjectStart, $logMsg, $train, $reinforcement, $trainSubject); for (my $msgno=0; $msgno < $folder->messages; $msgno++) { $msg = $folder->message($msgno); # If there are multiple message IDs (this can happen in bad spam), # we'll join them in a space-separated string $msgid = join " ", $msg->head()->get("Message-ID"); $scoreHeader = $msg->get("X-OSBF-Lua-Score"); $subject = $msg->subject; # skip pseudo-messages generated by some mailservers next if ($subject eq "DON'T DELETE THIS MESSAGE -- FOLDER INTERNAL DATA"); # sfid is a comment (enclosed in parantheses) to the message ID ($sfid) = $msgid ? $msgid =~ /\((sfid-[^)]+)\)/ : ""; # Original classification is a letter enclosed by [ ] ($orgLabel) = $scoreHeader ? $scoreHeader =~ /\[(.)\]/ : ""; # use start of subject for logging if (length($subject) > 33) { $subjectStart = substr($subject, 0, 30) . "..."; } else { $subjectStart = $subject; } if ($sfid && $orgLabel) { # check how the message was originally classified if ($orgLabel eq $unsureLabel) { # reinforcement training $train = 1; $reinforcement = 1; } elsif ($orgLabel eq $sureLabel) { # don't train: classification was correct $train = 0; $reinforcement = 0; } else { # message was misclassified $train = 1; $reinforcement = 0; } if ($train) { # Send training command to the spamfilter $trainSubject = "learn $passwd $class $sfid"; open(TRAINMAIL, "| mail -s '$trainSubject' $mailaddr 2> /dev/null") or die "Can't invoke mail command: $!"; print TRAINMAIL "This is an automatic training message.\n"; close TRAINMAIL or die "Couldn't send training mail for $sfid: $?"; $logMsg = ($reinforcement ? "Reinforced" : "Trained") . " as $class: $subjectStart" } else { $logMsg = "No need to train correctly classified $class: " . "$subjectStart"; } } else { # Can't train, since either sfid or orgLabel or both are empty $logMsg = "Missing "; $logMsg .= "SFID " unless $sfid; $logMsg .= "and " unless $sfid || $orgLabel; $logMsg .= "original label "unless $orgLabel; $logMsg .= "-- cannot train: $subjectStart"; } # write log message, prefixed by date print LOGFILE localtime(time()) . ": $logMsg\n"; # delete the message $folder->message($msgno)->delete; } # for $msgno # write the changes to the mail folder $folder->write; # For some reason the combination of deleting the message and writing # the folder might change the permissions to 644. # This is to set the permissions back to user-readable only. chmod $filemode, $full; # remove the file lock $folder->locker->unlock; } # if -e } # sub train