X-Git-Url: https://git.madduck.net/etc/mailfilter.git/blobdiff_plain/80cfa738b4c61d211347ac9038d8048a3a9b8799..2e7884b9fa8f9c3961a4acb15d58e6ec534ecd5a:/procmail/spamfilter diff --git a/procmail/spamfilter b/procmail/spamfilter index 0fab788..9e6d787 100755 --- a/procmail/spamfilter +++ b/procmail/spamfilter @@ -18,10 +18,9 @@ PMDIR=${PMDIR:-$HOME/.etc/mailfilter/procmail} #VERBOSE=yes -INCLUDERC=$PMDIR/pre-spam-cleanup - # no need to reprocess messages that went into a spamtrap # UPDATE: retrain them only if diagnosed as non-spam, see below +# Note: add E flag to next recipe when uncommenting #:0 #* SPAMTRAPPED ?? . #{ @@ -30,6 +29,18 @@ INCLUDERC=$PMDIR/pre-spam-cleanup # |$FORMAIL -I"X-Spam: spamtrapped" #} +# check whether this message is being reinjected +TRAINED_AS +:0 +*$ $REPROC_MESSAGE +* ^X-Trained-As: \/(h|sp)am +{ + LOG="spamfilter: skipping already trained $MATCH$NL" + :0 + * MATCH ?? spam + { IS_SPAM=already-trained } +} + # let earlier parts of the mailfilter cause bypassing the checks :0 E * SKIP_SPAMCHECKS ?? . @@ -64,6 +75,8 @@ INCLUDERC=$PMDIR/pre-spam-cleanup # now run the spamfilters :0 E { + INCLUDERC=$PMDIR/pre-spam-cleanup + # crm114 CRM_SPAM=UNKNOWN CRM_SCORE=0 @@ -113,11 +126,21 @@ INCLUDERC=$PMDIR/pre-spam-cleanup :0 * CRM_SPAM ?? UNSURE { - # retrain as ham + # retrain spamtrapped message :0 + * SPAMTRAPPED ?? . + { + LOG="spamfilter: scheduling retraining with SPAM due to spamtrap$NL" + :0 fw + |$FORMAIL -A "X-CRM114-Autotrain: spam, due to spamtrap" + RETRAIN=spam + } + + # retrain as ham + :0 E * ? perl -e "$SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM || exit 1" { - LOG="spamfilter: scheduling crm114 retraining with HAM (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)$NL" + LOG="spamfilter: scheduling retraining with HAM (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)$NL" :0 fw |$FORMAIL -A "X-CRM114-Autotrain: ham, according to SA (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)" RETRAIN=ham @@ -125,9 +148,9 @@ INCLUDERC=$PMDIR/pre-spam-cleanup # retrain as spam :0 E - * ? perl -e "$SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM || exit 1" + * 1^0 ? perl -e "$SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM || exit 1" { - LOG="spamfilter: scheduling crm114 retraining with SPAM (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)$NL" + LOG="spamfilter: scheduling retraining with SPAM (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)$NL" :0 fw |$FORMAIL -A "X-CRM114-Autotrain: spam, according to SA (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)" RETRAIN=spam @@ -148,8 +171,18 @@ INCLUDERC=$PMDIR/pre-spam-cleanup * CRM_SPAM ?? SPAM * SA_SPAM ?? No { - # SA is convincing, so retrain crm114 + # message was spamtrapped anyway :0 + * SPAMTRAPPED ?? . + { + LOG="spamfilter: resolving crm114/SA disagreement due to spamtrap ($CRM_SCORE/$SA_SCORE)$NL" + RETRAIN=spam + :0 fw + |$FORMAIL -A "X-CRM114-Retrain: spam, due to spamtrap" + } + + # SA is convincing, so retrain crm114 + :0 E * ? perl -e "$SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM || exit 1" { LOG="spamfilter: crm114 found spam ($CRM_SCORE), but SA is more convincing ($SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM)$NL" @@ -173,6 +206,16 @@ INCLUDERC=$PMDIR/pre-spam-cleanup * CRM_SPAM ?? GOOD * SA_SPAM ?? Yes { + # message was spamtrapped anyway + :0 + * SPAMTRAPPED ?? . + { + LOG="spamfilter: resolving crm114/SA disagreement due to spamtrap ($CRM_SCORE/$SA_SCORE)$NL" + RETRAIN=spam + :0 fw + |$FORMAIL -A "X-CRM114-Retrain: spam, due to spamtrap" + } + # SA is convincing, so retrain crm114 :0 * ? perl -e "$SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM || exit 1" @@ -210,7 +253,13 @@ INCLUDERC=$PMDIR/pre-spam-cleanup :0 * SPAMTRAPPED ?? . * ! IS_SPAM ?? . -{ RETRAIN=spam } +{ + LOG="spamfilter: found spamtrapped ham, retraining...$NL" + :0 fw + |$FORMAIL -I"X-Spam: spamtrapped ham" + IS_SPAM=spamtrapped-ham + RETRAIN=spam +} INCLUDERC=$PMDIR/handlespam #VERBOSE=no