]> git.madduck.net Git - etc/mailfilter.git/blobdiff - procmail/spamfilter

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

filter riss@ul
[etc/mailfilter.git] / procmail / spamfilter
index 0fab78829bc4b9ded784a405f259ab3377c082a8..2ffec514a5310a4093cb13f7355eb5a1b22c6921 100755 (executable)
@@ -18,23 +18,34 @@ PMDIR=${PMDIR:-$HOME/.etc/mailfilter/procmail}
 
 #VERBOSE=yes
 
-INCLUDERC=$PMDIR/pre-spam-cleanup
-
 # no need to reprocess messages that went into a spamtrap
 # UPDATE: retrain them only if diagnosed as non-spam, see below
+# Note: add E flag to next recipe when uncommenting
 #:0
 #* SPAMTRAPPED ?? .
-#{ 
+#{
 #  LOG="spamfilter:  skipping checks for spamtrapped message$NL"
 #  :0 fw
 #  |$FORMAIL -I"X-Spam: spamtrapped"
 #}
 
+# check whether this message is being reinjected
+TRAINED_AS
+:0
+*$ $MSG_DEJAVU
+* ^X-Trained-As: \/(h|sp)am
+{
+  LOG="spamfilter:  skipping already trained $MATCH$NL"
+  :0
+  * MATCH ?? spam
+  { IS_SPAM=already-trained }
+}
+
 # let earlier parts of the mailfilter cause bypassing the checks
 :0 E
 * SKIP_SPAMCHECKS ?? .
-{ 
-  LOG="spamfilter:  skipping checks as requested: $SKIP_SPAMCHECKS$NL" 
+{
+  LOG="spamfilter:  skipping checks as requested: $SKIP_SPAMCHECKS$NL"
   :0 fw
   |$FORMAIL -I"X-Spam: unknown (skip requested)"
   SPAM_UNKNOWN=skip-requested
@@ -49,6 +60,7 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   :0 fw
   |$FORMAIL -I"X-Spam: unknown (check skipped)"
   SPAM_UNKNOWN=skip-match
+  SKIP_SPAMCHECKS=match
 }
 
 # sanity check on message size
@@ -64,6 +76,8 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
 # now run the spamfilters
 :0 E
 {
+  INCLUDERC=$PMDIR/pre-spam-cleanup
+
   # crm114
   CRM_SPAM=UNKNOWN
   CRM_SCORE=0
@@ -113,11 +127,21 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   :0
   * CRM_SPAM ?? UNSURE
   {
-    # retrain as ham
+    # retrain spamtrapped message
     :0
+    * SPAMTRAPPED ?? .
+    {
+      LOG="spamfilter:  scheduling retraining with SPAM due to spamtrap$NL"
+      :0 fw
+      |$FORMAIL -A "X-CRM114-Autotrain: spam, due to spamtrap"
+      RETRAIN=spam
+    }
+
+    # retrain as ham
+    :0 E
     * ? perl -e "$SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM || exit 1"
     {
-      LOG="spamfilter:  scheduling crm114 retraining with HAM (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)$NL"
+      LOG="spamfilter:  scheduling retraining with HAM (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)$NL"
       :0 fw
       |$FORMAIL -A "X-CRM114-Autotrain: ham, according to SA (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)"
       RETRAIN=ham
@@ -125,9 +149,9 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
 
     # retrain as spam
     :0 E
-    * ? perl -e "$SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
+    * 1^0 ? perl -e "$SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
     {
-      LOG="spamfilter:  scheduling crm114 retraining with SPAM (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)$NL"
+      LOG="spamfilter:  scheduling retraining with SPAM (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)$NL"
       :0 fw
       |$FORMAIL -A "X-CRM114-Autotrain: spam, according to SA (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)"
       RETRAIN=spam
@@ -148,8 +172,18 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   * CRM_SPAM ?? SPAM
   * SA_SPAM ?? No
   {
-    # SA is convincing, so retrain crm114
+    # message was spamtrapped anyway
     :0
+    * SPAMTRAPPED ?? .
+    {
+      LOG="spamfilter:  resolving crm114/SA disagreement due to spamtrap ($CRM_SCORE/$SA_SCORE)$NL"
+      RETRAIN=spam
+      :0 fw
+      |$FORMAIL -A "X-CRM114-Retrain: spam, due to spamtrap"
+    }
+
+    # SA is convincing, so retrain crm114
+    :0 E
     * ? perl -e "$SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM || exit 1"
     {
       LOG="spamfilter:  crm114 found spam ($CRM_SCORE), but SA is more convincing ($SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM)$NL"
@@ -173,6 +207,16 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   * CRM_SPAM ?? GOOD
   * SA_SPAM ?? Yes
   {
+    # message was spamtrapped anyway
+    :0
+    * SPAMTRAPPED ?? .
+    {
+      LOG="spamfilter:  resolving crm114/SA disagreement due to spamtrap ($CRM_SCORE/$SA_SCORE)$NL"
+      RETRAIN=spam
+      :0 fw
+      |$FORMAIL -A "X-CRM114-Retrain: spam, due to spamtrap"
+    }
+
     # SA is convincing, so retrain crm114
     :0
     * ? perl -e "$SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
@@ -196,7 +240,7 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   :0 E
   * CRM_SPAM ?? SPAM
   * SA_SPAM ?? Yes
-  { 
+  {
     IS_SPAM=sa+crm
     :0 fw
     |$FORMAIL -I"X-Spam: yes (crm114:$CRM_SCORE SA:$SA_SCORE)"
@@ -209,8 +253,15 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
 # schedule spamtrapped ham for retraining as spam
 :0
 * SPAMTRAPPED ?? .
+* ! SKIP_SPAMCHECKS ?? .
 * ! IS_SPAM ?? .
-{ RETRAIN=spam }
+{
+  LOG="spamfilter:  found spamtrapped ham, retraining...$NL"
+  :0 fw
+  |$FORMAIL -I"X-Spam: spamtrapped ham"
+  IS_SPAM=spamtrapped-ham
+  RETRAIN=spam
+  SPAM_UNSURE
+}
 
-INCLUDERC=$PMDIR/handlespam
 #VERBOSE=no