All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
   3 #TODO: rewrite to use SPAM variable, and do not autotrain spam here, only ham
 
   5 PMDIR=${PMDIR:-$HOME/.etc/mailfilter/procmail}
 
  10   # PMVAR is not defined, so we are being called as filter
 
  11   # thus source the standard defines
 
  12   INCLUDERC=$PMDIR/defines
 
  13   # prevent feeding back to procmail and delete the leading From line
 
  15   # and tell the fucking procmail piece-of-shit to continue to be a filter
 
  21 # no need to reprocess messages that went into a spamtrap
 
  22 # UPDATE: retrain them only if diagnosed as non-spam, see below
 
  23 # Note: add E flag to next recipe when uncommenting
 
  27 #  LOG="spamfilter:  skipping checks for spamtrapped message$NL"
 
  29 #  |$FORMAIL -I"X-Spam: spamtrapped"
 
  32 # check whether this message is being resubmitted
 
  39     LOG="spamfilter:  skipping already trained $TRAINED_AS$NL"
 
  42     { IS_SPAM=already-trained }
 
  46   { LOG="spamfilter:  skipping resubmitted message$NL" }
 
  49 # let earlier parts of the mailfilter cause bypassing the checks
 
  51 * SKIP_SPAMCHECKS ?? .
 
  53   LOG="spamfilter:  skipping checks as requested: $SKIP_SPAMCHECKS$NL"
 
  55   |$FORMAIL -I"X-Spam: unknown (skip requested)"
 
  56   SPAM_UNKNOWN=skip-requested
 
  59 # honour skip-spamchecks to exclude certain messages from spam checks
 
  62 * ? $EGREP -qif $CONF/skip-spamchecks
 
  64   LOG="spamfilter:  skipping checks as per skip-spamchecks$NL"
 
  66   |$FORMAIL -I"X-Spam: unknown (check skipped)"
 
  67   SPAM_UNKNOWN=skip-match
 
  71 # sanity check on message size
 
  73 * > $SPAMCHECK_MAX_MESSAGE_SIZE
 
  75   LOG="spamfilter:  skipping check because message size exceeds $SPAMCHECK_MAX_MESSAGE_SIZE bytes$NL"
 
  77   |$FORMAIL -I"X-Spam: unknown (message larger than $SPAMCHECK_MAX_MESSAGE_SIZE bytes)"
 
  78   SPAM_UNKNOWN=too-large
 
  81 # now run the spamfilters
 
  84   INCLUDERC=$PMDIR/pre-spam-cleanup
 
  96     * ^X-CRM114-Status: \/[A-Z]+
 
 100     * ^X-CRM114-Status: .+\([ ]*\/-?[.0-9]+
 
 103     LOG="crm114:      $CRM_SPAM/$CRM_SCORE$NL"
 
 117     * ^X-Spam-Status: \/[A-Za-z]+
 
 121     * ^X-Spam-Status: .+score=\/-?[.0-9]+
 
 125     * ^X-Spam-Status: .+tests=\/[^ ]+
 
 128     LOG="SA:          $SA_SPAM/$SA_SCORE/$SA_TESTS$NL"
 
 131   ## CASE 0: crm114 is unsure/untrained
 
 135     # retrain spamtrapped message
 
 139       LOG="spamfilter:  scheduling retraining with SPAM due to spamtrap$NL"
 
 141       |$FORMAIL -A "X-CRM114-Autotrain: spam, due to spamtrap"
 
 147     * ? perl -e "$SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM || exit 1"
 
 149       LOG="spamfilter:  scheduling retraining with HAM (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)$NL"
 
 151       |$FORMAIL -A "X-CRM114-Autotrain: ham, according to SA (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)"
 
 157     * 1^0 ? perl -e "$SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
 
 159       LOG="spamfilter:  scheduling retraining with SPAM (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)$NL"
 
 161       |$FORMAIL -A "X-CRM114-Autotrain: spam, according to SA (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)"
 
 165     # skip retraining if SA is not convinced
 
 168       LOG="spamfilter:  will not autotrain crm114 because SA is not convinced ($CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM <= $SA_SCORE < $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)$NL"
 
 170       |$FORMAIL -A "X-CRM114-Autotrain: SA is unsure ($CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM <= $SA_SCORE < $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)"
 
 171       SPAM_UNSURE=sa-unsure
 
 175   ## CASE 1: disagreement, SA sees ham
 
 180     # message was spamtrapped anyway
 
 184       LOG="spamfilter:  resolving crm114/SA disagreement due to spamtrap ($CRM_SCORE/$SA_SCORE)$NL"
 
 187       |$FORMAIL -A "X-CRM114-Retrain: spam, due to spamtrap"
 
 190     # SA is convincing, so retrain crm114
 
 192     * ? perl -e "$SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM || exit 1"
 
 194       LOG="spamfilter:  crm114 found spam ($CRM_SCORE), but SA is more convincing ($SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM)$NL"
 
 197       |$FORMAIL -A "X-CRM114-Retrain: ham, according to SA (score $SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM)"
 
 200     # SA is not convincing, mark as disagreement
 
 203       LOG="spamfilter:  crm114 found spam ($CRM_SCORE), but SA thinks it's ham ($SA_SCORE)$NL"
 
 206       |$FORMAIL -I "X-Spam: disagree (crm114:spam/$CRM_SCORE SA:ham/$SA_SCORE)"
 
 210   ## CASE 1: disagreement, SA sees spam
 
 215     # message was spamtrapped anyway
 
 219       LOG="spamfilter:  resolving crm114/SA disagreement due to spamtrap ($CRM_SCORE/$SA_SCORE)$NL"
 
 222       |$FORMAIL -A "X-CRM114-Retrain: spam, due to spamtrap"
 
 225     # SA is convincing, so retrain crm114
 
 227     * ? perl -e "$SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
 
 229       LOG="spamfilter:  crm114 found ham ($CRM_SCORE), but SA is more convincing ($SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM)$NL"
 
 232       |$FORMAIL -A "X-CRM114-Retrain: spam, according to SA (score $SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM)"
 
 235     # SA is not convincing, mark as disagreement
 
 238       LOG="spamfilter:  crm114 found ham ($CRM_SCORE), but SA thinks it's spam ($SA_SCORE)$NL"
 
 239       SPAM_DISAGREE=sa-spam
 
 241       |$FORMAIL -I "X-Spam: disagree (crm114:ham/$CRM_SCORE SA:spam/$SA_SCORE)"
 
 251     |$FORMAIL -I"X-Spam: yes (crm114:$CRM_SCORE SA:$SA_SCORE)"
 
 255   |$FORMAIL -I"X-Spam: no (crm114:$CRM_SCORE SA:$SA_SCORE)"
 
 258 # schedule spamtrapped ham for retraining as spam
 
 261 * ! SKIP_SPAMCHECKS ?? .
 
 264   LOG="spamfilter:  found spamtrapped ham, retraining...$NL"
 
 266   |$FORMAIL -I"X-Spam: spamtrapped ham"
 
 267   IS_SPAM=spamtrapped-ham