]> git.madduck.net Git - etc/mailfilter.git/blobdiff - procmail/spamfilter

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

ignore a couple of feeds for now
[etc/mailfilter.git] / procmail / spamfilter
index a882087f3a732d052e0501f46452d00530bed70e..491f157939f95f423f179921b24fd06e7430c42e 100755 (executable)
@@ -18,22 +18,55 @@ PMDIR=${PMDIR:-$HOME/.etc/mailfilter/procmail}
 
 #VERBOSE=yes
 
 
 #VERBOSE=yes
 
-INCLUDERC=$PMDIR/pre-spam-cleanup
-
 # no need to reprocess messages that went into a spamtrap
 # no need to reprocess messages that went into a spamtrap
+# UPDATE: retrain them only if diagnosed as non-spam, see below
+# Note: add E flag to next recipe when uncommenting
+#:0
+#* SPAMTRAPPED ?? .
+#{
+#  LOG="spamfilter:  skipping checks for spamtrapped message$NL"
+#  :0 fw
+#  |$FORMAIL -I"X-Spam: spamtrapped"
+#}
+
+# check whether this message is being resubmitted
 :0
 :0
-* SPAMTRAPPED ?? .
-{ 
-  LOG="spamfilter:  skipping checks for spamtrapped message$NL"
+*$ $MSG_DEJAVU
+{
+  :0
+  * TRAINED_AS ?? .
+  {
+    LOG="spamfilter:  skipping already trained $TRAINED_AS$NL"
+    :0 fw
+    |$FORMAIL -I"X-Spam: $TRAINED_AS (already trained)"
+    :0
+    * TRAINED_AS ?? spam
+    { IS_SPAM=already-trained }
+  }
+
+  :0 E
+  {
+    LOG="spamfilter:  skipping resubmitted message$NL"
+    :0 fw
+    |$FORMAIL -I"X-Spam: unknown (resubmitted)"
+  }
+}
+
+# do not run spamfilters if the message destination is already set
+:0 E
+* DEST ?? .
+{
+  LOG="spamfilter:  message already routed to '$DEST'$NL"
   :0 fw
   :0 fw
-  |$FORMAIL -I"X-Spam: spamtrapped"
+  |$FORMAIL -I"X-Spam: unknown (already routed)"
+  SPAM_UNKNOWN=already-destined
 }
 
 # let earlier parts of the mailfilter cause bypassing the checks
 :0 E
 * SKIP_SPAMCHECKS ?? .
 }
 
 # let earlier parts of the mailfilter cause bypassing the checks
 :0 E
 * SKIP_SPAMCHECKS ?? .
-{ 
-  LOG="spamfilter:  skipping checks as requested: $SKIP_SPAMCHECKS$NL" 
+{
+  LOG="spamfilter:  skipping checks as requested: $SKIP_SPAMCHECKS$NL"
   :0 fw
   |$FORMAIL -I"X-Spam: unknown (skip requested)"
   SPAM_UNKNOWN=skip-requested
   :0 fw
   |$FORMAIL -I"X-Spam: unknown (skip requested)"
   SPAM_UNKNOWN=skip-requested
@@ -48,6 +81,7 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   :0 fw
   |$FORMAIL -I"X-Spam: unknown (check skipped)"
   SPAM_UNKNOWN=skip-match
   :0 fw
   |$FORMAIL -I"X-Spam: unknown (check skipped)"
   SPAM_UNKNOWN=skip-match
+  SKIP_SPAMCHECKS=match
 }
 
 # sanity check on message size
 }
 
 # sanity check on message size
@@ -63,12 +97,18 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
 # now run the spamfilters
 :0 E
 {
 # now run the spamfilters
 :0 E
 {
+  INCLUDERC=$PMDIR/spamtraps
+  INCLUDERC=$PMDIR/spammers
+  INCLUDERC=$PMDIR/spampat
+  INCLUDERC=$PMDIR/pre-spam-cleanup
+
   # crm114
   CRM_SPAM=UNKNOWN
   CRM_SCORE=0
   :0
   * !SKIP_CRM ?? .
   {
   # crm114
   CRM_SPAM=UNKNOWN
   CRM_SCORE=0
   :0
   * !SKIP_CRM ?? .
   {
+    #TODO: somehow filter out headers we added
     :0 fw
     |$CRM114
 
     :0 fw
     |$CRM114
 
@@ -112,24 +152,34 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   :0
   * CRM_SPAM ?? UNSURE
   {
   :0
   * CRM_SPAM ?? UNSURE
   {
-    # retrain as ham
+    # retrain spamtrapped message
     :0
     :0
+    * SPAMTRAPPED ?? .
+    {
+      LOG="spamfilter:  scheduling retraining with SPAM due to spamtrap$NL"
+      :0 fw
+      |$FORMAIL -A "X-CRM114-Autotrain: spam, due to spamtrap"
+      RETRAIN=spam
+    }
+
+    # retrain as ham
+    :0 E
     * ? perl -e "$SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM || exit 1"
     {
     * ? perl -e "$SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM || exit 1"
     {
-      LOG="spamfilter:  scheduling crm114 retraining with HAM (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)$NL"
+      LOG="spamfilter:  scheduling retraining with HAM (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)$NL"
       :0 fw
       |$FORMAIL -A "X-CRM114-Autotrain: ham, according to SA (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)"
       :0 fw
       |$FORMAIL -A "X-CRM114-Autotrain: ham, according to SA (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)"
-      CRM_RETRAIN=ham
+      RETRAIN=ham
     }
 
     # retrain as spam
     :0 E
     }
 
     # retrain as spam
     :0 E
-    * ? perl -e "$SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
+    * 1^0 ? perl -e "$SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
     {
     {
-      LOG="spamfilter:  scheduling crm114 retraining with SPAM (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)$NL"
+      LOG="spamfilter:  scheduling retraining with SPAM (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)$NL"
       :0 fw
       |$FORMAIL -A "X-CRM114-Autotrain: spam, according to SA (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)"
       :0 fw
       |$FORMAIL -A "X-CRM114-Autotrain: spam, according to SA (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)"
-      CRM_RETRAIN=spam
+      RETRAIN=spam
     }
 
     # skip retraining if SA is not convinced
     }
 
     # skip retraining if SA is not convinced
@@ -147,12 +197,22 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   * CRM_SPAM ?? SPAM
   * SA_SPAM ?? No
   {
   * CRM_SPAM ?? SPAM
   * SA_SPAM ?? No
   {
-    # SA is convincing, so retrain crm114
+    # message was spamtrapped anyway
     :0
     :0
+    * SPAMTRAPPED ?? .
+    {
+      LOG="spamfilter:  resolving crm114/SA disagreement due to spamtrap ($CRM_SCORE/$SA_SCORE)$NL"
+      RETRAIN=spam
+      :0 fw
+      |$FORMAIL -A "X-CRM114-Retrain: spam, due to spamtrap"
+    }
+
+    # SA is convincing, so retrain crm114
+    :0 E
     * ? perl -e "$SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM || exit 1"
     {
       LOG="spamfilter:  crm114 found spam ($CRM_SCORE), but SA is more convincing ($SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM)$NL"
     * ? perl -e "$SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM || exit 1"
     {
       LOG="spamfilter:  crm114 found spam ($CRM_SCORE), but SA is more convincing ($SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM)$NL"
-      CRM_RETRAIN=ham
+      RETRAIN=ham
       :0 fw
       |$FORMAIL -A "X-CRM114-Retrain: ham, according to SA (score $SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM)"
     }
       :0 fw
       |$FORMAIL -A "X-CRM114-Retrain: ham, according to SA (score $SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM)"
     }
@@ -172,12 +232,22 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   * CRM_SPAM ?? GOOD
   * SA_SPAM ?? Yes
   {
   * CRM_SPAM ?? GOOD
   * SA_SPAM ?? Yes
   {
+    # message was spamtrapped anyway
+    :0
+    * SPAMTRAPPED ?? .
+    {
+      LOG="spamfilter:  resolving crm114/SA disagreement due to spamtrap ($CRM_SCORE/$SA_SCORE)$NL"
+      RETRAIN=spam
+      :0 fw
+      |$FORMAIL -A "X-CRM114-Retrain: spam, due to spamtrap"
+    }
+
     # SA is convincing, so retrain crm114
     :0
     * ? perl -e "$SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
     {
       LOG="spamfilter:  crm114 found ham ($CRM_SCORE), but SA is more convincing ($SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM)$NL"
     # SA is convincing, so retrain crm114
     :0
     * ? perl -e "$SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
     {
       LOG="spamfilter:  crm114 found ham ($CRM_SCORE), but SA is more convincing ($SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM)$NL"
-      CRM_RETRAIN=spam
+      RETRAIN=spam
       :0 fw
       |$FORMAIL -A "X-CRM114-Retrain: spam, according to SA (score $SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM)"
     }
       :0 fw
       |$FORMAIL -A "X-CRM114-Retrain: spam, according to SA (score $SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM)"
     }
@@ -195,7 +265,7 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   :0 E
   * CRM_SPAM ?? SPAM
   * SA_SPAM ?? Yes
   :0 E
   * CRM_SPAM ?? SPAM
   * SA_SPAM ?? Yes
-  { 
+  {
     IS_SPAM=sa+crm
     :0 fw
     |$FORMAIL -I"X-Spam: yes (crm114:$CRM_SCORE SA:$SA_SCORE)"
     IS_SPAM=sa+crm
     :0 fw
     |$FORMAIL -I"X-Spam: yes (crm114:$CRM_SCORE SA:$SA_SCORE)"
@@ -205,5 +275,18 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   |$FORMAIL -I"X-Spam: no (crm114:$CRM_SCORE SA:$SA_SCORE)"
 }
 
   |$FORMAIL -I"X-Spam: no (crm114:$CRM_SCORE SA:$SA_SCORE)"
 }
 
-INCLUDERC=$PMDIR/handlespam
+# schedule spamtrapped ham for retraining as spam
+:0
+* SPAMTRAPPED ?? .
+* ! SKIP_SPAMCHECKS ?? .
+* ! IS_SPAM ?? .
+{
+  LOG="spamfilter:  found spamtrapped ham, retraining...$NL"
+  :0 fw
+  |$FORMAIL -I"X-Spam: spamtrapped ham"
+  IS_SPAM=spamtrapped-ham
+  RETRAIN=spam
+  SPAM_UNSURE
+}
+
 #VERBOSE=no
 #VERBOSE=no