X-Git-Url: https://git.madduck.net/etc/mailfilter.git/blobdiff_plain/5af333ca3b83425e5cb3aa704b37654d576856c1..5838a8589ee04815b122cbab71bba7c7bd21fd9f:/procmail/spamfilter diff --git a/procmail/spamfilter b/procmail/spamfilter index a882087..491f157 100755 --- a/procmail/spamfilter +++ b/procmail/spamfilter @@ -18,22 +18,55 @@ PMDIR=${PMDIR:-$HOME/.etc/mailfilter/procmail} #VERBOSE=yes -INCLUDERC=$PMDIR/pre-spam-cleanup - # no need to reprocess messages that went into a spamtrap +# UPDATE: retrain them only if diagnosed as non-spam, see below +# Note: add E flag to next recipe when uncommenting +#:0 +#* SPAMTRAPPED ?? . +#{ +# LOG="spamfilter: skipping checks for spamtrapped message$NL" +# :0 fw +# |$FORMAIL -I"X-Spam: spamtrapped" +#} + +# check whether this message is being resubmitted :0 -* SPAMTRAPPED ?? . -{ - LOG="spamfilter: skipping checks for spamtrapped message$NL" +*$ $MSG_DEJAVU +{ + :0 + * TRAINED_AS ?? . + { + LOG="spamfilter: skipping already trained $TRAINED_AS$NL" + :0 fw + |$FORMAIL -I"X-Spam: $TRAINED_AS (already trained)" + :0 + * TRAINED_AS ?? spam + { IS_SPAM=already-trained } + } + + :0 E + { + LOG="spamfilter: skipping resubmitted message$NL" + :0 fw + |$FORMAIL -I"X-Spam: unknown (resubmitted)" + } +} + +# do not run spamfilters if the message destination is already set +:0 E +* DEST ?? . +{ + LOG="spamfilter: message already routed to '$DEST'$NL" :0 fw - |$FORMAIL -I"X-Spam: spamtrapped" + |$FORMAIL -I"X-Spam: unknown (already routed)" + SPAM_UNKNOWN=already-destined } # let earlier parts of the mailfilter cause bypassing the checks :0 E * SKIP_SPAMCHECKS ?? . -{ - LOG="spamfilter: skipping checks as requested: $SKIP_SPAMCHECKS$NL" +{ + LOG="spamfilter: skipping checks as requested: $SKIP_SPAMCHECKS$NL" :0 fw |$FORMAIL -I"X-Spam: unknown (skip requested)" SPAM_UNKNOWN=skip-requested @@ -48,6 +81,7 @@ INCLUDERC=$PMDIR/pre-spam-cleanup :0 fw |$FORMAIL -I"X-Spam: unknown (check skipped)" SPAM_UNKNOWN=skip-match + SKIP_SPAMCHECKS=match } # sanity check on message size @@ -63,12 +97,18 @@ INCLUDERC=$PMDIR/pre-spam-cleanup # now run the spamfilters :0 E { + INCLUDERC=$PMDIR/spamtraps + INCLUDERC=$PMDIR/spammers + INCLUDERC=$PMDIR/spampat + INCLUDERC=$PMDIR/pre-spam-cleanup + # crm114 CRM_SPAM=UNKNOWN CRM_SCORE=0 :0 * !SKIP_CRM ?? . { + #TODO: somehow filter out headers we added :0 fw |$CRM114 @@ -112,24 +152,34 @@ INCLUDERC=$PMDIR/pre-spam-cleanup :0 * CRM_SPAM ?? UNSURE { - # retrain as ham + # retrain spamtrapped message :0 + * SPAMTRAPPED ?? . + { + LOG="spamfilter: scheduling retraining with SPAM due to spamtrap$NL" + :0 fw + |$FORMAIL -A "X-CRM114-Autotrain: spam, due to spamtrap" + RETRAIN=spam + } + + # retrain as ham + :0 E * ? perl -e "$SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM || exit 1" { - LOG="spamfilter: scheduling crm114 retraining with HAM (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)$NL" + LOG="spamfilter: scheduling retraining with HAM (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)$NL" :0 fw |$FORMAIL -A "X-CRM114-Autotrain: ham, according to SA (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)" - CRM_RETRAIN=ham + RETRAIN=ham } # retrain as spam :0 E - * ? perl -e "$SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM || exit 1" + * 1^0 ? perl -e "$SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM || exit 1" { - LOG="spamfilter: scheduling crm114 retraining with SPAM (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)$NL" + LOG="spamfilter: scheduling retraining with SPAM (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)$NL" :0 fw |$FORMAIL -A "X-CRM114-Autotrain: spam, according to SA (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)" - CRM_RETRAIN=spam + RETRAIN=spam } # skip retraining if SA is not convinced @@ -147,12 +197,22 @@ INCLUDERC=$PMDIR/pre-spam-cleanup * CRM_SPAM ?? SPAM * SA_SPAM ?? No { - # SA is convincing, so retrain crm114 + # message was spamtrapped anyway :0 + * SPAMTRAPPED ?? . + { + LOG="spamfilter: resolving crm114/SA disagreement due to spamtrap ($CRM_SCORE/$SA_SCORE)$NL" + RETRAIN=spam + :0 fw + |$FORMAIL -A "X-CRM114-Retrain: spam, due to spamtrap" + } + + # SA is convincing, so retrain crm114 + :0 E * ? perl -e "$SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM || exit 1" { LOG="spamfilter: crm114 found spam ($CRM_SCORE), but SA is more convincing ($SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM)$NL" - CRM_RETRAIN=ham + RETRAIN=ham :0 fw |$FORMAIL -A "X-CRM114-Retrain: ham, according to SA (score $SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM)" } @@ -172,12 +232,22 @@ INCLUDERC=$PMDIR/pre-spam-cleanup * CRM_SPAM ?? GOOD * SA_SPAM ?? Yes { + # message was spamtrapped anyway + :0 + * SPAMTRAPPED ?? . + { + LOG="spamfilter: resolving crm114/SA disagreement due to spamtrap ($CRM_SCORE/$SA_SCORE)$NL" + RETRAIN=spam + :0 fw + |$FORMAIL -A "X-CRM114-Retrain: spam, due to spamtrap" + } + # SA is convincing, so retrain crm114 :0 * ? perl -e "$SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM || exit 1" { LOG="spamfilter: crm114 found ham ($CRM_SCORE), but SA is more convincing ($SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM)$NL" - CRM_RETRAIN=spam + RETRAIN=spam :0 fw |$FORMAIL -A "X-CRM114-Retrain: spam, according to SA (score $SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM)" } @@ -195,7 +265,7 @@ INCLUDERC=$PMDIR/pre-spam-cleanup :0 E * CRM_SPAM ?? SPAM * SA_SPAM ?? Yes - { + { IS_SPAM=sa+crm :0 fw |$FORMAIL -I"X-Spam: yes (crm114:$CRM_SCORE SA:$SA_SCORE)" @@ -205,5 +275,18 @@ INCLUDERC=$PMDIR/pre-spam-cleanup |$FORMAIL -I"X-Spam: no (crm114:$CRM_SCORE SA:$SA_SCORE)" } -INCLUDERC=$PMDIR/handlespam +# schedule spamtrapped ham for retraining as spam +:0 +* SPAMTRAPPED ?? . +* ! SKIP_SPAMCHECKS ?? . +* ! IS_SPAM ?? . +{ + LOG="spamfilter: found spamtrapped ham, retraining...$NL" + :0 fw + |$FORMAIL -I"X-Spam: spamtrapped ham" + IS_SPAM=spamtrapped-ham + RETRAIN=spam + SPAM_UNSURE +} + #VERBOSE=no