]>
git.madduck.net Git - etc/mailfilter.git/blobdiff - procmail/spamfilter
madduck's git repository
Every one of the projects in this repository is available at the canonical
URL git://git.madduck.net/madduck/pub/<projectpath> — see
each project's metadata for the exact URL.
All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@ git. madduck. net .
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
SSH access, as well as push access can be individually
arranged .
If you use my repositories frequently, consider adding the following
snippet to ~/.gitconfig and using the third clone URL listed for each
project:
[url "git://git.madduck.net/madduck/"]
insteadOf = madduck:
-INCLUDERC=$PMDIR/pre-spam-cleanup
-
# no need to reprocess messages that went into a spamtrap
# no need to reprocess messages that went into a spamtrap
+# UPDATE: retrain them only if diagnosed as non-spam, see below
+# Note: add E flag to next recipe when uncommenting
+#:0
+#* SPAMTRAPPED ?? .
+#{
+# LOG="spamfilter: skipping checks for spamtrapped message$NL"
+# :0 fw
+# |$FORMAIL -I"X-Spam: spamtrapped"
+#}
+
+# check whether this message is being resubmitted
-* SPAMTRAPPED ?? .
-{
- LOG="spamfilter: skipping checks for spamtrapped message$NL"
+*$ $MSG_DEJAVU
+{
+ :0
+ * TRAINED_AS ?? .
+ {
+ LOG="spamfilter: skipping already trained $TRAINED_AS$NL"
+ :0 fw
+ |$FORMAIL -I"X-Spam: $TRAINED_AS (already trained)"
+ :0
+ * TRAINED_AS ?? spam
+ { IS_SPAM=already-trained }
+ }
+
+ :0 E
+ {
+ LOG="spamfilter: skipping resubmitted message$NL"
+ :0 fw
+ |$FORMAIL -I"X-Spam: unknown (resubmitted)"
+ }
+}
+
+# do not run spamfilters if the message destination is already set
+:0 E
+* DEST ?? .
+{
+ LOG="spamfilter: message already routed to '$DEST'$NL"
- |$FORMAIL -I"X-Spam: spamtrapped"
+ |$FORMAIL -I"X-Spam: unknown (already routed)"
+ SPAM_UNKNOWN=already-destined
}
# let earlier parts of the mailfilter cause bypassing the checks
:0 E
* SKIP_SPAMCHECKS ?? .
}
# let earlier parts of the mailfilter cause bypassing the checks
:0 E
* SKIP_SPAMCHECKS ?? .
-{
- LOG="spamfilter: skipping checks as requested: $SKIP_SPAMCHECKS$NL"
+{
+ LOG="spamfilter: skipping checks as requested: $SKIP_SPAMCHECKS$NL"
:0 fw
|$FORMAIL -I"X-Spam: unknown (skip requested)"
SPAM_UNKNOWN=skip-requested
:0 fw
|$FORMAIL -I"X-Spam: unknown (skip requested)"
SPAM_UNKNOWN=skip-requested
:0 fw
|$FORMAIL -I"X-Spam: unknown (check skipped)"
SPAM_UNKNOWN=skip-match
:0 fw
|$FORMAIL -I"X-Spam: unknown (check skipped)"
SPAM_UNKNOWN=skip-match
}
# sanity check on message size
}
# sanity check on message size
# now run the spamfilters
:0 E
{
# now run the spamfilters
:0 E
{
+ INCLUDERC=$PMDIR/spamtraps
+ INCLUDERC=$PMDIR/spammers
+ INCLUDERC=$PMDIR/spampat
+ INCLUDERC=$PMDIR/pre-spam-cleanup
+
# crm114
CRM_SPAM=UNKNOWN
CRM_SCORE=0
:0
* !SKIP_CRM ?? .
{
# crm114
CRM_SPAM=UNKNOWN
CRM_SCORE=0
:0
* !SKIP_CRM ?? .
{
+ #TODO: somehow filter out headers we added
:0
* CRM_SPAM ?? UNSURE
{
:0
* CRM_SPAM ?? UNSURE
{
+ # retrain spamtrapped message
+ * SPAMTRAPPED ?? .
+ {
+ LOG="spamfilter: scheduling retraining with SPAM due to spamtrap$NL"
+ :0 fw
+ |$FORMAIL -A "X-CRM114-Autotrain: spam, due to spamtrap"
+ RETRAIN=spam
+ }
+
+ # retrain as ham
+ :0 E
* ? perl -e "$SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM || exit 1"
{
* ? perl -e "$SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM || exit 1"
{
- LOG="spamfilter: scheduling crm114 retraining with HAM (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)$NL"
+ LOG="spamfilter: scheduling retraining with HAM (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)$NL"
:0 fw
|$FORMAIL -A "X-CRM114-Autotrain: ham, according to SA (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)"
:0 fw
|$FORMAIL -A "X-CRM114-Autotrain: ham, according to SA (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)"
- * ? perl -e "$SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
+ * 1^0 ? perl -e "$SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
- LOG="spamfilter: scheduling crm114 retraining with SPAM (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)$NL"
+ LOG="spamfilter: scheduling retraining with SPAM (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)$NL"
:0 fw
|$FORMAIL -A "X-CRM114-Autotrain: spam, according to SA (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)"
:0 fw
|$FORMAIL -A "X-CRM114-Autotrain: spam, according to SA (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)"
}
# skip retraining if SA is not convinced
}
# skip retraining if SA is not convinced
* CRM_SPAM ?? SPAM
* SA_SPAM ?? No
{
* CRM_SPAM ?? SPAM
* SA_SPAM ?? No
{
- # SA is convincing, so retrain crm114
+ # message was spamtrapped anyway
+ * SPAMTRAPPED ?? .
+ {
+ LOG="spamfilter: resolving crm114/SA disagreement due to spamtrap ($CRM_SCORE/$SA_SCORE)$NL"
+ RETRAIN=spam
+ :0 fw
+ |$FORMAIL -A "X-CRM114-Retrain: spam, due to spamtrap"
+ }
+
+ # SA is convincing, so retrain crm114
+ :0 E
* ? perl -e "$SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM || exit 1"
{
LOG="spamfilter: crm114 found spam ($CRM_SCORE), but SA is more convincing ($SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM)$NL"
* ? perl -e "$SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM || exit 1"
{
LOG="spamfilter: crm114 found spam ($CRM_SCORE), but SA is more convincing ($SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM)$NL"
:0 fw
|$FORMAIL -A "X-CRM114-Retrain: ham, according to SA (score $SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM)"
}
:0 fw
|$FORMAIL -A "X-CRM114-Retrain: ham, according to SA (score $SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM)"
}
* CRM_SPAM ?? GOOD
* SA_SPAM ?? Yes
{
* CRM_SPAM ?? GOOD
* SA_SPAM ?? Yes
{
+ # message was spamtrapped anyway
+ :0
+ * SPAMTRAPPED ?? .
+ {
+ LOG="spamfilter: resolving crm114/SA disagreement due to spamtrap ($CRM_SCORE/$SA_SCORE)$NL"
+ RETRAIN=spam
+ :0 fw
+ |$FORMAIL -A "X-CRM114-Retrain: spam, due to spamtrap"
+ }
+
# SA is convincing, so retrain crm114
:0
* ? perl -e "$SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
{
LOG="spamfilter: crm114 found ham ($CRM_SCORE), but SA is more convincing ($SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM)$NL"
# SA is convincing, so retrain crm114
:0
* ? perl -e "$SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
{
LOG="spamfilter: crm114 found ham ($CRM_SCORE), but SA is more convincing ($SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM)$NL"
:0 fw
|$FORMAIL -A "X-CRM114-Retrain: spam, according to SA (score $SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM)"
}
:0 fw
|$FORMAIL -A "X-CRM114-Retrain: spam, according to SA (score $SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM)"
}
:0 E
* CRM_SPAM ?? SPAM
* SA_SPAM ?? Yes
:0 E
* CRM_SPAM ?? SPAM
* SA_SPAM ?? Yes
IS_SPAM=sa+crm
:0 fw
|$FORMAIL -I"X-Spam: yes (crm114:$CRM_SCORE SA:$SA_SCORE)"
IS_SPAM=sa+crm
:0 fw
|$FORMAIL -I"X-Spam: yes (crm114:$CRM_SCORE SA:$SA_SCORE)"
|$FORMAIL -I"X-Spam: no (crm114:$CRM_SCORE SA:$SA_SCORE)"
}
|$FORMAIL -I"X-Spam: no (crm114:$CRM_SCORE SA:$SA_SCORE)"
}
-INCLUDERC=$PMDIR/handlespam
+# schedule spamtrapped ham for retraining as spam
+:0
+* SPAMTRAPPED ?? .
+* ! SKIP_SPAMCHECKS ?? .
+* ! IS_SPAM ?? .
+{
+ LOG="spamfilter: found spamtrapped ham, retraining...$NL"
+ :0 fw
+ |$FORMAIL -I"X-Spam: spamtrapped ham"
+ IS_SPAM=spamtrapped-ham
+ RETRAIN=spam
+ SPAM_UNSURE
+}
+