#!/usr/bin/procmail #TODO: rewrite to use SPAM variable, and do not autotrain spam here, only ham PMDIR=${PMDIR:-$HOME/.etc/mailfilter/procmail} :0 * !PMVAR ?? . { # PMVAR is not defined, so we are being called as filter # thus source the standard defines INCLUDERC=$PMDIR/defines # prevent feeding back to procmail and delete the leading From line PROCMAIL='/bin/cat' # and tell the fucking procmail piece-of-shit to continue to be a filter DEFAULT='|$PROCMAIL' } #VERBOSE=yes # no need to reprocess messages that went into a spamtrap # UPDATE: retrain them only if diagnosed as non-spam, see below # Note: add E flag to next recipe when uncommenting #:0 #* SPAMTRAPPED ?? . #{ # LOG="spamfilter: skipping checks for spamtrapped message$NL" # :0 fw # |$FORMAIL -I"X-Spam: spamtrapped" #} # check whether this message is being resubmitted :0 *$ $MSG_DEJAVU { :0 * TRAINED_AS ?? . { LOG="spamfilter: skipping already trained $TRAINED_AS$NL" :0 fw |$FORMAIL -I"X-Spam: $TRAINED_AS (already trained)" :0 * TRAINED_AS ?? spam { IS_SPAM=already-trained } } :0 E { LOG="spamfilter: skipping resubmitted message$NL" :0 fw |$FORMAIL -I"X-Spam: unknown (resubmitted)" } } # do not run spamfilters if the message destination is already set :0 E * DEST ?? . { LOG="spamfilter: message already routed to '$DEST'$NL" :0 fw |$FORMAIL -I"X-Spam: unknown (already routed)" SPAM_UNKNOWN=already-destined } # let earlier parts of the mailfilter cause bypassing the checks :0 E * SKIP_SPAMCHECKS ?? . { LOG="spamfilter: skipping checks as requested: $SKIP_SPAMCHECKS$NL" :0 fw |$FORMAIL -I"X-Spam: unknown (skip requested)" SPAM_UNKNOWN=skip-requested } # honour skip-spamchecks to exclude certain messages from spam checks # altogether :0 EBH * ? $EGREP -qif $CONF/skip-spamchecks { LOG="spamfilter: skipping checks as per skip-spamchecks$NL" :0 fw |$FORMAIL -I"X-Spam: unknown (check skipped)" SPAM_UNKNOWN=skip-match SKIP_SPAMCHECKS=match } # sanity check on message size :0 E * > $SPAMCHECK_MAX_MESSAGE_SIZE { LOG="spamfilter: skipping check because message size exceeds $SPAMCHECK_MAX_MESSAGE_SIZE bytes$NL" :0 fw |$FORMAIL -I"X-Spam: unknown (message larger than $SPAMCHECK_MAX_MESSAGE_SIZE bytes)" SPAM_UNKNOWN=too-large } # now run the spamfilters :0 E { INCLUDERC=$PMDIR/spamtraps INCLUDERC=$PMDIR/spammers INCLUDERC=$PMDIR/spampat INCLUDERC=$PMDIR/pre-spam-cleanup # crm114 CRM_SPAM=UNKNOWN CRM_SCORE=0 :0 * !SKIP_CRM ?? . { #TODO: somehow filter out headers we added :0 fw |$CRM114 :0 * ^X-CRM114-Status: \/[A-Z]+ { CRM_SPAM=$MATCH } :0 * ^X-CRM114-Status: .+\([ ]*\/-?[.0-9]+ { CRM_SCORE=$MATCH } LOG="crm114: $CRM_SPAM/$CRM_SCORE$NL" } # spamassassin SA_STATUS=Unknown SA_SCORE=0 SA_TESTS=none :0 * !SKIP_SA ?? . { :0 fw |$SPAMC :0 * ^X-Spam-Status: \/[A-Za-z]+ { SA_SPAM=$MATCH } :0 * ^X-Spam-Status: .+score=\/-?[.0-9]+ { SA_SCORE=$MATCH } :0 * ^X-Spam-Status: .+tests=\/[^ ]+ { SA_TESTS=$MATCH } LOG="SA: $SA_SPAM/$SA_SCORE/$SA_TESTS$NL" } ## CASE 0: crm114 is unsure/untrained :0 * CRM_SPAM ?? UNSURE { # retrain spamtrapped message :0 * SPAMTRAPPED ?? . { LOG="spamfilter: scheduling retraining with SPAM due to spamtrap$NL" :0 fw |$FORMAIL -A "X-CRM114-Autotrain: spam, due to spamtrap" RETRAIN=spam } # retrain as ham :0 E * ? perl -e "$SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM || exit 1" { LOG="spamfilter: scheduling retraining with HAM (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)$NL" :0 fw |$FORMAIL -A "X-CRM114-Autotrain: ham, according to SA (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)" RETRAIN=ham } # retrain as spam :0 E * 1^0 ? perl -e "$SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM || exit 1" { LOG="spamfilter: scheduling retraining with SPAM (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)$NL" :0 fw |$FORMAIL -A "X-CRM114-Autotrain: spam, according to SA (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)" RETRAIN=spam } # skip retraining if SA is not convinced :0 E { LOG="spamfilter: will not autotrain crm114 because SA is not convinced ($CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM <= $SA_SCORE < $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)$NL" :0 fw |$FORMAIL -A "X-CRM114-Autotrain: SA is unsure ($CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM <= $SA_SCORE < $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)" SPAM_UNSURE=sa-unsure } } ## CASE 1: disagreement, SA sees ham :0 E * CRM_SPAM ?? SPAM * SA_SPAM ?? No { # message was spamtrapped anyway :0 * SPAMTRAPPED ?? . { LOG="spamfilter: resolving crm114/SA disagreement due to spamtrap ($CRM_SCORE/$SA_SCORE)$NL" RETRAIN=spam :0 fw |$FORMAIL -A "X-CRM114-Retrain: spam, due to spamtrap" } # SA is convincing, so retrain crm114 :0 E * ? perl -e "$SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM || exit 1" { LOG="spamfilter: crm114 found spam ($CRM_SCORE), but SA is more convincing ($SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM)$NL" RETRAIN=ham :0 fw |$FORMAIL -A "X-CRM114-Retrain: ham, according to SA (score $SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM)" } # SA is not convincing, mark as disagreement :0 E { LOG="spamfilter: crm114 found spam ($CRM_SCORE), but SA thinks it's ham ($SA_SCORE)$NL" SPAM_DISAGREE=sa-ham :0 fw |$FORMAIL -I "X-Spam: disagree (crm114:spam/$CRM_SCORE SA:ham/$SA_SCORE)" } } ## CASE 1: disagreement, SA sees spam :0 E * CRM_SPAM ?? GOOD * SA_SPAM ?? Yes { # message was spamtrapped anyway :0 * SPAMTRAPPED ?? . { LOG="spamfilter: resolving crm114/SA disagreement due to spamtrap ($CRM_SCORE/$SA_SCORE)$NL" RETRAIN=spam :0 fw |$FORMAIL -A "X-CRM114-Retrain: spam, due to spamtrap" } # SA is convincing, so retrain crm114 :0 * ? perl -e "$SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM || exit 1" { LOG="spamfilter: crm114 found ham ($CRM_SCORE), but SA is more convincing ($SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM)$NL" RETRAIN=spam :0 fw |$FORMAIL -A "X-CRM114-Retrain: spam, according to SA (score $SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM)" } # SA is not convincing, mark as disagreement :0 E { LOG="spamfilter: crm114 found ham ($CRM_SCORE), but SA thinks it's spam ($SA_SCORE)$NL" SPAM_DISAGREE=sa-spam :0 fw |$FORMAIL -I "X-Spam: disagree (crm114:ham/$CRM_SCORE SA:spam/$SA_SCORE)" } } :0 E * CRM_SPAM ?? SPAM * SA_SPAM ?? Yes { IS_SPAM=sa+crm :0 fw |$FORMAIL -I"X-Spam: yes (crm114:$CRM_SCORE SA:$SA_SCORE)" } :0 Efw |$FORMAIL -I"X-Spam: no (crm114:$CRM_SCORE SA:$SA_SCORE)" } # schedule spamtrapped ham for retraining as spam :0 * SPAMTRAPPED ?? . * ! SKIP_SPAMCHECKS ?? . * ! IS_SPAM ?? . { LOG="spamfilter: found spamtrapped ham, retraining...$NL" :0 fw |$FORMAIL -I"X-Spam: spamtrapped ham" IS_SPAM=spamtrapped-ham RETRAIN=spam SPAM_UNSURE } #VERBOSE=no