]> git.madduck.net Git - etc/mailfilter.git/blobdiff - procmail/spamfilter

madduck's git repository

Every one of the projects in this repository is available at the canonical URL git://git.madduck.net/madduck/pub/<projectpath> — see each project's metadata for the exact URL.

All patches and comments are welcome. Please squash your changes to logical commits before using git-format-patch and git-send-email to patches@git.madduck.net. If you'd read over the Git project's submission guidelines and adhered to them, I'd be especially grateful.

SSH access, as well as push access can be individually arranged.

If you use my repositories frequently, consider adding the following snippet to ~/.gitconfig and using the third clone URL listed for each project:

[url "git://git.madduck.net/madduck/"]
  insteadOf = madduck:

add word boundaries to generated regexps
[etc/mailfilter.git] / procmail / spamfilter
index 0fab78829bc4b9ded784a405f259ab3377c082a8..80d59cf6b62d564bff6bae7ef9b6a7f5c304112b 100755 (executable)
@@ -18,23 +18,47 @@ PMDIR=${PMDIR:-$HOME/.etc/mailfilter/procmail}
 
 #VERBOSE=yes
 
 
 #VERBOSE=yes
 
-INCLUDERC=$PMDIR/pre-spam-cleanup
-
 # no need to reprocess messages that went into a spamtrap
 # UPDATE: retrain them only if diagnosed as non-spam, see below
 # no need to reprocess messages that went into a spamtrap
 # UPDATE: retrain them only if diagnosed as non-spam, see below
+# Note: add E flag to next recipe when uncommenting
 #:0
 #* SPAMTRAPPED ?? .
 #:0
 #* SPAMTRAPPED ?? .
-#{ 
+#{
 #  LOG="spamfilter:  skipping checks for spamtrapped message$NL"
 #  :0 fw
 #  |$FORMAIL -I"X-Spam: spamtrapped"
 #}
 
 #  LOG="spamfilter:  skipping checks for spamtrapped message$NL"
 #  :0 fw
 #  |$FORMAIL -I"X-Spam: spamtrapped"
 #}
 
+# check whether this message is being resubmitted
+:0
+*$ $MSG_DEJAVU
+{
+  :0
+  * TRAINED_AS ?? .
+  {
+    LOG="spamfilter:  skipping already trained $TRAINED_AS$NL"
+    :0
+    * TRAINED_AS ?? spam
+    { IS_SPAM=already-trained }
+  }
+
+  :0 E
+  { LOG="spamfilter:  skipping resubmitted message$NL" }
+}
+
+# do not run spamfilters if the message destination is already set
+:0 E
+* DEST ?? .
+{
+  LOG="spamfilter:  message already routed to '$DEST'$NL"
+  SPAM_UNKNOWN=already-destined
+}
+
 # let earlier parts of the mailfilter cause bypassing the checks
 :0 E
 * SKIP_SPAMCHECKS ?? .
 # let earlier parts of the mailfilter cause bypassing the checks
 :0 E
 * SKIP_SPAMCHECKS ?? .
-{ 
-  LOG="spamfilter:  skipping checks as requested: $SKIP_SPAMCHECKS$NL" 
+{
+  LOG="spamfilter:  skipping checks as requested: $SKIP_SPAMCHECKS$NL"
   :0 fw
   |$FORMAIL -I"X-Spam: unknown (skip requested)"
   SPAM_UNKNOWN=skip-requested
   :0 fw
   |$FORMAIL -I"X-Spam: unknown (skip requested)"
   SPAM_UNKNOWN=skip-requested
@@ -49,6 +73,7 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   :0 fw
   |$FORMAIL -I"X-Spam: unknown (check skipped)"
   SPAM_UNKNOWN=skip-match
   :0 fw
   |$FORMAIL -I"X-Spam: unknown (check skipped)"
   SPAM_UNKNOWN=skip-match
+  SKIP_SPAMCHECKS=match
 }
 
 # sanity check on message size
 }
 
 # sanity check on message size
@@ -64,12 +89,17 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
 # now run the spamfilters
 :0 E
 {
 # now run the spamfilters
 :0 E
 {
+  INCLUDERC=$PMDIR/spamtraps
+  INCLUDERC=$PMDIR/spammers
+  INCLUDERC=$PMDIR/pre-spam-cleanup
+
   # crm114
   CRM_SPAM=UNKNOWN
   CRM_SCORE=0
   :0
   * !SKIP_CRM ?? .
   {
   # crm114
   CRM_SPAM=UNKNOWN
   CRM_SCORE=0
   :0
   * !SKIP_CRM ?? .
   {
+    #TODO: somehow filter out headers we added
     :0 fw
     |$CRM114
 
     :0 fw
     |$CRM114
 
@@ -113,11 +143,21 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   :0
   * CRM_SPAM ?? UNSURE
   {
   :0
   * CRM_SPAM ?? UNSURE
   {
-    # retrain as ham
+    # retrain spamtrapped message
     :0
     :0
+    * SPAMTRAPPED ?? .
+    {
+      LOG="spamfilter:  scheduling retraining with SPAM due to spamtrap$NL"
+      :0 fw
+      |$FORMAIL -A "X-CRM114-Autotrain: spam, due to spamtrap"
+      RETRAIN=spam
+    }
+
+    # retrain as ham
+    :0 E
     * ? perl -e "$SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM || exit 1"
     {
     * ? perl -e "$SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM || exit 1"
     {
-      LOG="spamfilter:  scheduling crm114 retraining with HAM (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)$NL"
+      LOG="spamfilter:  scheduling retraining with HAM (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)$NL"
       :0 fw
       |$FORMAIL -A "X-CRM114-Autotrain: ham, according to SA (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)"
       RETRAIN=ham
       :0 fw
       |$FORMAIL -A "X-CRM114-Autotrain: ham, according to SA (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)"
       RETRAIN=ham
@@ -125,9 +165,9 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
 
     # retrain as spam
     :0 E
 
     # retrain as spam
     :0 E
-    * ? perl -e "$SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
+    * 1^0 ? perl -e "$SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
     {
     {
-      LOG="spamfilter:  scheduling crm114 retraining with SPAM (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)$NL"
+      LOG="spamfilter:  scheduling retraining with SPAM (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)$NL"
       :0 fw
       |$FORMAIL -A "X-CRM114-Autotrain: spam, according to SA (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)"
       RETRAIN=spam
       :0 fw
       |$FORMAIL -A "X-CRM114-Autotrain: spam, according to SA (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)"
       RETRAIN=spam
@@ -148,8 +188,18 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   * CRM_SPAM ?? SPAM
   * SA_SPAM ?? No
   {
   * CRM_SPAM ?? SPAM
   * SA_SPAM ?? No
   {
-    # SA is convincing, so retrain crm114
+    # message was spamtrapped anyway
     :0
     :0
+    * SPAMTRAPPED ?? .
+    {
+      LOG="spamfilter:  resolving crm114/SA disagreement due to spamtrap ($CRM_SCORE/$SA_SCORE)$NL"
+      RETRAIN=spam
+      :0 fw
+      |$FORMAIL -A "X-CRM114-Retrain: spam, due to spamtrap"
+    }
+
+    # SA is convincing, so retrain crm114
+    :0 E
     * ? perl -e "$SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM || exit 1"
     {
       LOG="spamfilter:  crm114 found spam ($CRM_SCORE), but SA is more convincing ($SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM)$NL"
     * ? perl -e "$SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM || exit 1"
     {
       LOG="spamfilter:  crm114 found spam ($CRM_SCORE), but SA is more convincing ($SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM)$NL"
@@ -173,6 +223,16 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   * CRM_SPAM ?? GOOD
   * SA_SPAM ?? Yes
   {
   * CRM_SPAM ?? GOOD
   * SA_SPAM ?? Yes
   {
+    # message was spamtrapped anyway
+    :0
+    * SPAMTRAPPED ?? .
+    {
+      LOG="spamfilter:  resolving crm114/SA disagreement due to spamtrap ($CRM_SCORE/$SA_SCORE)$NL"
+      RETRAIN=spam
+      :0 fw
+      |$FORMAIL -A "X-CRM114-Retrain: spam, due to spamtrap"
+    }
+
     # SA is convincing, so retrain crm114
     :0
     * ? perl -e "$SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
     # SA is convincing, so retrain crm114
     :0
     * ? perl -e "$SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
@@ -196,7 +256,7 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
   :0 E
   * CRM_SPAM ?? SPAM
   * SA_SPAM ?? Yes
   :0 E
   * CRM_SPAM ?? SPAM
   * SA_SPAM ?? Yes
-  { 
+  {
     IS_SPAM=sa+crm
     :0 fw
     |$FORMAIL -I"X-Spam: yes (crm114:$CRM_SCORE SA:$SA_SCORE)"
     IS_SPAM=sa+crm
     :0 fw
     |$FORMAIL -I"X-Spam: yes (crm114:$CRM_SCORE SA:$SA_SCORE)"
@@ -209,8 +269,15 @@ INCLUDERC=$PMDIR/pre-spam-cleanup
 # schedule spamtrapped ham for retraining as spam
 :0
 * SPAMTRAPPED ?? .
 # schedule spamtrapped ham for retraining as spam
 :0
 * SPAMTRAPPED ?? .
+* ! SKIP_SPAMCHECKS ?? .
 * ! IS_SPAM ?? .
 * ! IS_SPAM ?? .
-{ RETRAIN=spam }
+{
+  LOG="spamfilter:  found spamtrapped ham, retraining...$NL"
+  :0 fw
+  |$FORMAIL -I"X-Spam: spamtrapped ham"
+  IS_SPAM=spamtrapped-ham
+  RETRAIN=spam
+  SPAM_UNSURE
+}
 
 
-INCLUDERC=$PMDIR/handlespam
 #VERBOSE=no
 #VERBOSE=no