]> git.madduck.net Git - etc/mailfilter.git/blobdiff - procmail/spamfilter

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

remove all references to albatross, which we do not use anymore
[etc/mailfilter.git] / procmail / spamfilter
index a882087f3a732d052e0501f46452d00530bed70e..66a725e37e5cb406e3ff76bef1887d2c5697c275 100755 (executable)
@@ -18,15 +18,27 @@ PMDIR=${PMDIR:-$HOME/.etc/mailfilter/procmail}
 
 #VERBOSE=yes
 
-INCLUDERC=$PMDIR/pre-spam-cleanup
-
 # no need to reprocess messages that went into a spamtrap
+# UPDATE: retrain them only if diagnosed as non-spam, see below
+# Note: add E flag to next recipe when uncommenting
+#:0
+#* SPAMTRAPPED ?? .
+#{ 
+#  LOG="spamfilter:  skipping checks for spamtrapped message$NL"
+#  :0 fw
+#  |$FORMAIL -I"X-Spam: spamtrapped"
+#}
+
+# check whether this message is being reinjected
+TRAINED_AS
 :0
-* SPAMTRAPPED ?? .
-{ 
-  LOG="spamfilter:  skipping checks for spamtrapped message$NL"
-  :0 fw
-  |$FORMAIL -I"X-Spam: spamtrapped"
+*$ $REPROC_MESSAGE
+* ^X-Trained-As: \/(h|sp)am
+{
+  LOG="spamfilter:  skipping already trained $MATCH$NL"
+  :0
+  * MATCH ?? spam
+  { IS_SPAM=already-trained }
 }
 
 # let earlier parts of the mailfilter cause bypassing the checks
@@ -48,6 +60,7 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   :0 fw
   |$FORMAIL -I"X-Spam: unknown (check skipped)"
   SPAM_UNKNOWN=skip-match
+  SKIP_SPAMCHECKS=match
 }
 
 # sanity check on message size
@@ -63,6 +76,8 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
 # now run the spamfilters
 :0 E
 {
+  INCLUDERC=$PMDIR/pre-spam-cleanup
+
   # crm114
   CRM_SPAM=UNKNOWN
   CRM_SCORE=0
@@ -112,24 +127,34 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   :0
   * CRM_SPAM ?? UNSURE
   {
-    # retrain as ham
+    # retrain spamtrapped message
     :0
+    * SPAMTRAPPED ?? .
+    {
+      LOG="spamfilter:  scheduling retraining with SPAM due to spamtrap$NL"
+      :0 fw
+      |$FORMAIL -A "X-CRM114-Autotrain: spam, due to spamtrap"
+      RETRAIN=spam
+    }
+
+    # retrain as ham
+    :0 E
     * ? perl -e "$SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM || exit 1"
     {
-      LOG="spamfilter:  scheduling crm114 retraining with HAM (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)$NL"
+      LOG="spamfilter:  scheduling retraining with HAM (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)$NL"
       :0 fw
       |$FORMAIL -A "X-CRM114-Autotrain: ham, according to SA (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)"
-      CRM_RETRAIN=ham
+      RETRAIN=ham
     }
 
     # retrain as spam
     :0 E
-    * ? perl -e "$SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
+    * 1^0 ? perl -e "$SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
     {
-      LOG="spamfilter:  scheduling crm114 retraining with SPAM (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)$NL"
+      LOG="spamfilter:  scheduling retraining with SPAM (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)$NL"
       :0 fw
       |$FORMAIL -A "X-CRM114-Autotrain: spam, according to SA (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)"
-      CRM_RETRAIN=spam
+      RETRAIN=spam
     }
 
     # skip retraining if SA is not convinced
@@ -147,12 +172,22 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   * CRM_SPAM ?? SPAM
   * SA_SPAM ?? No
   {
-    # SA is convincing, so retrain crm114
+    # message was spamtrapped anyway
     :0
+    * SPAMTRAPPED ?? .
+    {
+      LOG="spamfilter:  resolving crm114/SA disagreement due to spamtrap ($CRM_SCORE/$SA_SCORE)$NL"
+      RETRAIN=spam
+      :0 fw
+      |$FORMAIL -A "X-CRM114-Retrain: spam, due to spamtrap"
+    }
+
+    # SA is convincing, so retrain crm114
+    :0 E
     * ? perl -e "$SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM || exit 1"
     {
       LOG="spamfilter:  crm114 found spam ($CRM_SCORE), but SA is more convincing ($SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM)$NL"
-      CRM_RETRAIN=ham
+      RETRAIN=ham
       :0 fw
       |$FORMAIL -A "X-CRM114-Retrain: ham, according to SA (score $SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM)"
     }
@@ -172,12 +207,22 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   * CRM_SPAM ?? GOOD
   * SA_SPAM ?? Yes
   {
+    # message was spamtrapped anyway
+    :0
+    * SPAMTRAPPED ?? .
+    {
+      LOG="spamfilter:  resolving crm114/SA disagreement due to spamtrap ($CRM_SCORE/$SA_SCORE)$NL"
+      RETRAIN=spam
+      :0 fw
+      |$FORMAIL -A "X-CRM114-Retrain: spam, due to spamtrap"
+    }
+
     # SA is convincing, so retrain crm114
     :0
     * ? perl -e "$SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
     {
       LOG="spamfilter:  crm114 found ham ($CRM_SCORE), but SA is more convincing ($SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM)$NL"
-      CRM_RETRAIN=spam
+      RETRAIN=spam
       :0 fw
       |$FORMAIL -A "X-CRM114-Retrain: spam, according to SA (score $SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM)"
     }
@@ -205,5 +250,18 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   |$FORMAIL -I"X-Spam: no (crm114:$CRM_SCORE SA:$SA_SCORE)"
 }
 
+# schedule spamtrapped ham for retraining as spam
+:0
+* SPAMTRAPPED ?? .
+* ! SKIP_SPAMCHECKS ?? .
+* ! IS_SPAM ?? .
+{ 
+  LOG="spamfilter:  found spamtrapped ham, retraining...$NL"
+  :0 fw
+  |$FORMAIL -I"X-Spam: spamtrapped ham"
+  IS_SPAM=spamtrapped-ham
+  RETRAIN=spam
+}
+
 INCLUDERC=$PMDIR/handlespam
 #VERBOSE=no