]> git.madduck.net Git - etc/mailfilter.git/blobdiff - procmail/spamfilter

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

write justme headers at the end
[etc/mailfilter.git] / procmail / spamfilter
index 0fab78829bc4b9ded784a405f259ab3377c082a8..1b0d0d1b83a5e339190e98d1a3d39d84a033887e 100755 (executable)
@@ -18,10 +18,9 @@ PMDIR=${PMDIR:-$HOME/.etc/mailfilter/procmail}
 
 #VERBOSE=yes
 
 
 #VERBOSE=yes
 
-INCLUDERC=$PMDIR/pre-spam-cleanup
-
 # no need to reprocess messages that went into a spamtrap
 # UPDATE: retrain them only if diagnosed as non-spam, see below
 # no need to reprocess messages that went into a spamtrap
 # UPDATE: retrain them only if diagnosed as non-spam, see below
+# Note: add E flag to next recipe when uncommenting
 #:0
 #* SPAMTRAPPED ?? .
 #{ 
 #:0
 #* SPAMTRAPPED ?? .
 #{ 
@@ -30,6 +29,18 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
 #  |$FORMAIL -I"X-Spam: spamtrapped"
 #}
 
 #  |$FORMAIL -I"X-Spam: spamtrapped"
 #}
 
+# check whether this message is being reinjected
+TRAINED_AS
+:0
+*$ $MSG_DEJAVU
+* ^X-Trained-As: \/(h|sp)am
+{
+  LOG="spamfilter:  skipping already trained $MATCH$NL"
+  :0
+  * MATCH ?? spam
+  { IS_SPAM=already-trained }
+}
+
 # let earlier parts of the mailfilter cause bypassing the checks
 :0 E
 * SKIP_SPAMCHECKS ?? .
 # let earlier parts of the mailfilter cause bypassing the checks
 :0 E
 * SKIP_SPAMCHECKS ?? .
@@ -49,6 +60,7 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   :0 fw
   |$FORMAIL -I"X-Spam: unknown (check skipped)"
   SPAM_UNKNOWN=skip-match
   :0 fw
   |$FORMAIL -I"X-Spam: unknown (check skipped)"
   SPAM_UNKNOWN=skip-match
+  SKIP_SPAMCHECKS=match
 }
 
 # sanity check on message size
 }
 
 # sanity check on message size
@@ -64,6 +76,8 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
 # now run the spamfilters
 :0 E
 {
 # now run the spamfilters
 :0 E
 {
+  INCLUDERC=$PMDIR/pre-spam-cleanup
+
   # crm114
   CRM_SPAM=UNKNOWN
   CRM_SCORE=0
   # crm114
   CRM_SPAM=UNKNOWN
   CRM_SCORE=0
@@ -113,11 +127,21 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   :0
   * CRM_SPAM ?? UNSURE
   {
   :0
   * CRM_SPAM ?? UNSURE
   {
-    # retrain as ham
+    # retrain spamtrapped message
     :0
     :0
+    * SPAMTRAPPED ?? .
+    {
+      LOG="spamfilter:  scheduling retraining with SPAM due to spamtrap$NL"
+      :0 fw
+      |$FORMAIL -A "X-CRM114-Autotrain: spam, due to spamtrap"
+      RETRAIN=spam
+    }
+
+    # retrain as ham
+    :0 E
     * ? perl -e "$SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM || exit 1"
     {
     * ? perl -e "$SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM || exit 1"
     {
-      LOG="spamfilter:  scheduling crm114 retraining with HAM (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)$NL"
+      LOG="spamfilter:  scheduling retraining with HAM (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)$NL"
       :0 fw
       |$FORMAIL -A "X-CRM114-Autotrain: ham, according to SA (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)"
       RETRAIN=ham
       :0 fw
       |$FORMAIL -A "X-CRM114-Autotrain: ham, according to SA (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)"
       RETRAIN=ham
@@ -125,9 +149,9 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
 
     # retrain as spam
     :0 E
 
     # retrain as spam
     :0 E
-    * ? perl -e "$SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
+    * 1^0 ? perl -e "$SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
     {
     {
-      LOG="spamfilter:  scheduling crm114 retraining with SPAM (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)$NL"
+      LOG="spamfilter:  scheduling retraining with SPAM (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)$NL"
       :0 fw
       |$FORMAIL -A "X-CRM114-Autotrain: spam, according to SA (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)"
       RETRAIN=spam
       :0 fw
       |$FORMAIL -A "X-CRM114-Autotrain: spam, according to SA (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)"
       RETRAIN=spam
@@ -148,8 +172,18 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   * CRM_SPAM ?? SPAM
   * SA_SPAM ?? No
   {
   * CRM_SPAM ?? SPAM
   * SA_SPAM ?? No
   {
-    # SA is convincing, so retrain crm114
+    # message was spamtrapped anyway
     :0
     :0
+    * SPAMTRAPPED ?? .
+    {
+      LOG="spamfilter:  resolving crm114/SA disagreement due to spamtrap ($CRM_SCORE/$SA_SCORE)$NL"
+      RETRAIN=spam
+      :0 fw
+      |$FORMAIL -A "X-CRM114-Retrain: spam, due to spamtrap"
+    }
+
+    # SA is convincing, so retrain crm114
+    :0 E
     * ? perl -e "$SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM || exit 1"
     {
       LOG="spamfilter:  crm114 found spam ($CRM_SCORE), but SA is more convincing ($SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM)$NL"
     * ? perl -e "$SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM || exit 1"
     {
       LOG="spamfilter:  crm114 found spam ($CRM_SCORE), but SA is more convincing ($SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM)$NL"
@@ -173,6 +207,16 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   * CRM_SPAM ?? GOOD
   * SA_SPAM ?? Yes
   {
   * CRM_SPAM ?? GOOD
   * SA_SPAM ?? Yes
   {
+    # message was spamtrapped anyway
+    :0
+    * SPAMTRAPPED ?? .
+    {
+      LOG="spamfilter:  resolving crm114/SA disagreement due to spamtrap ($CRM_SCORE/$SA_SCORE)$NL"
+      RETRAIN=spam
+      :0 fw
+      |$FORMAIL -A "X-CRM114-Retrain: spam, due to spamtrap"
+    }
+
     # SA is convincing, so retrain crm114
     :0
     * ? perl -e "$SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
     # SA is convincing, so retrain crm114
     :0
     * ? perl -e "$SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
@@ -209,8 +253,15 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
 # schedule spamtrapped ham for retraining as spam
 :0
 * SPAMTRAPPED ?? .
 # schedule spamtrapped ham for retraining as spam
 :0
 * SPAMTRAPPED ?? .
+* ! SKIP_SPAMCHECKS ?? .
 * ! IS_SPAM ?? .
 * ! IS_SPAM ?? .
-{ RETRAIN=spam }
+{ 
+  LOG="spamfilter:  found spamtrapped ham, retraining...$NL"
+  :0 fw
+  |$FORMAIL -I"X-Spam: spamtrapped ham"
+  IS_SPAM=spamtrapped-ham
+  RETRAIN=spam
+}
 
 INCLUDERC=$PMDIR/handlespam
 #VERBOSE=no
 
 INCLUDERC=$PMDIR/handlespam
 #VERBOSE=no