X-Git-Url: https://git.madduck.net/etc/mailfilter.git/blobdiff_plain/5af333ca3b83425e5cb3aa704b37654d576856c1..935de264355e2d9ece92897148936d22c014e68e:/procmail/spamfilter

diff --git a/procmail/spamfilter b/procmail/spamfilter
index a882087..491f157 100755
--- a/procmail/spamfilter
+++ b/procmail/spamfilter
@@ -18,22 +18,55 @@ PMDIR=${PMDIR:-$HOME/.etc/mailfilter/procmail}
 
 #VERBOSE=yes
 
-INCLUDERC=$PMDIR/pre-spam-cleanup
-
 # no need to reprocess messages that went into a spamtrap
+# UPDATE: retrain them only if diagnosed as non-spam, see below
+# Note: add E flag to next recipe when uncommenting
+#:0
+#* SPAMTRAPPED ?? .
+#{
+#  LOG="spamfilter:  skipping checks for spamtrapped message$NL"
+#  :0 fw
+#  |$FORMAIL -I"X-Spam: spamtrapped"
+#}
+
+# check whether this message is being resubmitted
 :0
-* SPAMTRAPPED ?? .
-{ 
-  LOG="spamfilter:  skipping checks for spamtrapped message$NL"
+*$ $MSG_DEJAVU
+{
+  :0
+  * TRAINED_AS ?? .
+  {
+    LOG="spamfilter:  skipping already trained $TRAINED_AS$NL"
+    :0 fw
+    |$FORMAIL -I"X-Spam: $TRAINED_AS (already trained)"
+    :0
+    * TRAINED_AS ?? spam
+    { IS_SPAM=already-trained }
+  }
+
+  :0 E
+  {
+    LOG="spamfilter:  skipping resubmitted message$NL"
+    :0 fw
+    |$FORMAIL -I"X-Spam: unknown (resubmitted)"
+  }
+}
+
+# do not run spamfilters if the message destination is already set
+:0 E
+* DEST ?? .
+{
+  LOG="spamfilter:  message already routed to '$DEST'$NL"
   :0 fw
-  |$FORMAIL -I"X-Spam: spamtrapped"
+  |$FORMAIL -I"X-Spam: unknown (already routed)"
+  SPAM_UNKNOWN=already-destined
 }
 
 # let earlier parts of the mailfilter cause bypassing the checks
 :0 E
 * SKIP_SPAMCHECKS ?? .
-{ 
-  LOG="spamfilter:  skipping checks as requested: $SKIP_SPAMCHECKS$NL" 
+{
+  LOG="spamfilter:  skipping checks as requested: $SKIP_SPAMCHECKS$NL"
   :0 fw
   |$FORMAIL -I"X-Spam: unknown (skip requested)"
   SPAM_UNKNOWN=skip-requested
@@ -48,6 +81,7 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   :0 fw
   |$FORMAIL -I"X-Spam: unknown (check skipped)"
   SPAM_UNKNOWN=skip-match
+  SKIP_SPAMCHECKS=match
 }
 
 # sanity check on message size
@@ -63,12 +97,18 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
 # now run the spamfilters
 :0 E
 {
+  INCLUDERC=$PMDIR/spamtraps
+  INCLUDERC=$PMDIR/spammers
+  INCLUDERC=$PMDIR/spampat
+  INCLUDERC=$PMDIR/pre-spam-cleanup
+
   # crm114
   CRM_SPAM=UNKNOWN
   CRM_SCORE=0
   :0
   * !SKIP_CRM ?? .
   {
+    #TODO: somehow filter out headers we added
     :0 fw
     |$CRM114
 
@@ -112,24 +152,34 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   :0
   * CRM_SPAM ?? UNSURE
   {
-    # retrain as ham
+    # retrain spamtrapped message
     :0
+    * SPAMTRAPPED ?? .
+    {
+      LOG="spamfilter:  scheduling retraining with SPAM due to spamtrap$NL"
+      :0 fw
+      |$FORMAIL -A "X-CRM114-Autotrain: spam, due to spamtrap"
+      RETRAIN=spam
+    }
+
+    # retrain as ham
+    :0 E
     * ? perl -e "$SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM || exit 1"
     {
-      LOG="spamfilter:  scheduling crm114 retraining with HAM (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)$NL"
+      LOG="spamfilter:  scheduling retraining with HAM (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)$NL"
       :0 fw
       |$FORMAIL -A "X-CRM114-Autotrain: ham, according to SA (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)"
-      CRM_RETRAIN=ham
+      RETRAIN=ham
     }
 
     # retrain as spam
     :0 E
-    * ? perl -e "$SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
+    * 1^0 ? perl -e "$SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
     {
-      LOG="spamfilter:  scheduling crm114 retraining with SPAM (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)$NL"
+      LOG="spamfilter:  scheduling retraining with SPAM (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)$NL"
       :0 fw
       |$FORMAIL -A "X-CRM114-Autotrain: spam, according to SA (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)"
-      CRM_RETRAIN=spam
+      RETRAIN=spam
     }
 
     # skip retraining if SA is not convinced
@@ -147,12 +197,22 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   * CRM_SPAM ?? SPAM
   * SA_SPAM ?? No
   {
-    # SA is convincing, so retrain crm114
+    # message was spamtrapped anyway
     :0
+    * SPAMTRAPPED ?? .
+    {
+      LOG="spamfilter:  resolving crm114/SA disagreement due to spamtrap ($CRM_SCORE/$SA_SCORE)$NL"
+      RETRAIN=spam
+      :0 fw
+      |$FORMAIL -A "X-CRM114-Retrain: spam, due to spamtrap"
+    }
+
+    # SA is convincing, so retrain crm114
+    :0 E
     * ? perl -e "$SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM || exit 1"
     {
       LOG="spamfilter:  crm114 found spam ($CRM_SCORE), but SA is more convincing ($SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM)$NL"
-      CRM_RETRAIN=ham
+      RETRAIN=ham
       :0 fw
       |$FORMAIL -A "X-CRM114-Retrain: ham, according to SA (score $SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM)"
     }
@@ -172,12 +232,22 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   * CRM_SPAM ?? GOOD
   * SA_SPAM ?? Yes
   {
+    # message was spamtrapped anyway
+    :0
+    * SPAMTRAPPED ?? .
+    {
+      LOG="spamfilter:  resolving crm114/SA disagreement due to spamtrap ($CRM_SCORE/$SA_SCORE)$NL"
+      RETRAIN=spam
+      :0 fw
+      |$FORMAIL -A "X-CRM114-Retrain: spam, due to spamtrap"
+    }
+
     # SA is convincing, so retrain crm114
     :0
     * ? perl -e "$SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
     {
       LOG="spamfilter:  crm114 found ham ($CRM_SCORE), but SA is more convincing ($SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM)$NL"
-      CRM_RETRAIN=spam
+      RETRAIN=spam
       :0 fw
       |$FORMAIL -A "X-CRM114-Retrain: spam, according to SA (score $SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM)"
     }
@@ -195,7 +265,7 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   :0 E
   * CRM_SPAM ?? SPAM
   * SA_SPAM ?? Yes
-  { 
+  {
     IS_SPAM=sa+crm
     :0 fw
     |$FORMAIL -I"X-Spam: yes (crm114:$CRM_SCORE SA:$SA_SCORE)"
@@ -205,5 +275,18 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   |$FORMAIL -I"X-Spam: no (crm114:$CRM_SCORE SA:$SA_SCORE)"
 }
 
-INCLUDERC=$PMDIR/handlespam
+# schedule spamtrapped ham for retraining as spam
+:0
+* SPAMTRAPPED ?? .
+* ! SKIP_SPAMCHECKS ?? .
+* ! IS_SPAM ?? .
+{
+  LOG="spamfilter:  found spamtrapped ham, retraining...$NL"
+  :0 fw
+  |$FORMAIL -I"X-Spam: spamtrapped ham"
+  IS_SPAM=spamtrapped-ham
+  RETRAIN=spam
+  SPAM_UNSURE
+}
+
 #VERBOSE=no