All patches and comments are welcome. Please squash your changes to logical
commits before using git-format-patch and git-send-email to
patches@git.madduck.net.
If you'd read over the Git project's submission guidelines and adhered to them,
I'd be especially grateful.
3 #TODO: rewrite to use SPAM variable, and do not autotrain spam here, only ham
5 PMDIR=${PMDIR:-$HOME/.etc/mailfilter/procmail}
10 # PMVAR is not defined, so we are being called as filter
11 # thus source the standard defines
12 INCLUDERC=$PMDIR/defines
13 # prevent feeding back to procmail and delete the leading From line
15 # and tell the fucking procmail piece-of-shit to continue to be a filter
21 # no need to reprocess messages that went into a spamtrap
22 # UPDATE: retrain them only if diagnosed as non-spam, see below
23 # Note: add E flag to next recipe when uncommenting
27 # LOG="spamfilter: skipping checks for spamtrapped message$NL"
29 # |$FORMAIL -I"X-Spam: spamtrapped"
32 # check whether this message is being resubmitted
39 LOG="spamfilter: skipping already trained $TRAINED_AS$NL"
41 |$FORMAIL -I"X-Spam: $TRAINED_AS (already trained)"
44 { IS_SPAM=already-trained }
49 LOG="spamfilter: skipping resubmitted message$NL"
51 |$FORMAIL -I"X-Spam: unknown (resubmitted)"
55 # do not run spamfilters if the message destination is already set
59 LOG="spamfilter: message already routed to '$DEST'$NL"
61 |$FORMAIL -I"X-Spam: unknown (already routed)"
62 SPAM_UNKNOWN=already-destined
65 # let earlier parts of the mailfilter cause bypassing the checks
67 * SKIP_SPAMCHECKS ?? .
69 LOG="spamfilter: skipping checks as requested: $SKIP_SPAMCHECKS$NL"
71 |$FORMAIL -I"X-Spam: unknown (skip requested)"
72 SPAM_UNKNOWN=skip-requested
75 # honour skip-spamchecks to exclude certain messages from spam checks
78 * ? $EGREP -qif $CONF/skip-spamchecks
80 LOG="spamfilter: skipping checks as per skip-spamchecks$NL"
82 |$FORMAIL -I"X-Spam: unknown (check skipped)"
83 SPAM_UNKNOWN=skip-match
87 # sanity check on message size
89 * > $SPAMCHECK_MAX_MESSAGE_SIZE
91 LOG="spamfilter: skipping check because message size exceeds $SPAMCHECK_MAX_MESSAGE_SIZE bytes$NL"
93 |$FORMAIL -I"X-Spam: unknown (message larger than $SPAMCHECK_MAX_MESSAGE_SIZE bytes)"
94 SPAM_UNKNOWN=too-large
97 # now run the spamfilters
100 INCLUDERC=$PMDIR/spamtraps
101 INCLUDERC=$PMDIR/spammers
102 INCLUDERC=$PMDIR/spampat
103 INCLUDERC=$PMDIR/pre-spam-cleanup
111 #TODO: somehow filter out headers we added
116 * ^X-CRM114-Status: \/[A-Z]+
120 * ^X-CRM114-Status: .+\([ ]*\/-?[.0-9]+
123 LOG="crm114: $CRM_SPAM/$CRM_SCORE$NL"
137 * ^X-Spam-Status: \/[A-Za-z]+
141 * ^X-Spam-Status: .+score=\/-?[.0-9]+
145 * ^X-Spam-Status: .+tests=\/[^ ]+
148 LOG="SA: $SA_SPAM/$SA_SCORE/$SA_TESTS$NL"
151 ## CASE 0: crm114 is unsure/untrained
155 # retrain spamtrapped message
159 LOG="spamfilter: scheduling retraining with SPAM due to spamtrap$NL"
161 |$FORMAIL -A "X-CRM114-Autotrain: spam, due to spamtrap"
167 * ? perl -e "$SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM || exit 1"
169 LOG="spamfilter: scheduling retraining with HAM (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)$NL"
171 |$FORMAIL -A "X-CRM114-Autotrain: ham, according to SA (score $SA_SCORE <= $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM)"
177 * 1^0 ? perl -e "$SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
179 LOG="spamfilter: scheduling retraining with SPAM (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)$NL"
181 |$FORMAIL -A "X-CRM114-Autotrain: spam, according to SA (score $SA_SCORE > $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)"
185 # skip retraining if SA is not convinced
188 LOG="spamfilter: will not autotrain crm114 because SA is not convinced ($CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM <= $SA_SCORE < $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)$NL"
190 |$FORMAIL -A "X-CRM114-Autotrain: SA is unsure ($CRM_UNSURE_SA_AUTOTRAIN_LIMIT_HAM <= $SA_SCORE < $CRM_UNSURE_SA_AUTOTRAIN_LIMIT_SPAM)"
191 SPAM_UNSURE=sa-unsure
195 ## CASE 1: disagreement, SA sees ham
200 # message was spamtrapped anyway
204 LOG="spamfilter: resolving crm114/SA disagreement due to spamtrap ($CRM_SCORE/$SA_SCORE)$NL"
207 |$FORMAIL -A "X-CRM114-Retrain: spam, due to spamtrap"
210 # SA is convincing, so retrain crm114
212 * ? perl -e "$SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM || exit 1"
214 LOG="spamfilter: crm114 found spam ($CRM_SCORE), but SA is more convincing ($SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM)$NL"
217 |$FORMAIL -A "X-CRM114-Retrain: ham, according to SA (score $SA_SCORE <= $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_HAM)"
220 # SA is not convincing, mark as disagreement
223 LOG="spamfilter: crm114 found spam ($CRM_SCORE), but SA thinks it's ham ($SA_SCORE)$NL"
226 |$FORMAIL -I "X-Spam: disagree (crm114:spam/$CRM_SCORE SA:ham/$SA_SCORE)"
230 ## CASE 1: disagreement, SA sees spam
235 # message was spamtrapped anyway
239 LOG="spamfilter: resolving crm114/SA disagreement due to spamtrap ($CRM_SCORE/$SA_SCORE)$NL"
242 |$FORMAIL -A "X-CRM114-Retrain: spam, due to spamtrap"
245 # SA is convincing, so retrain crm114
247 * ? perl -e "$SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM || exit 1"
249 LOG="spamfilter: crm114 found ham ($CRM_SCORE), but SA is more convincing ($SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM)$NL"
252 |$FORMAIL -A "X-CRM114-Retrain: spam, according to SA (score $SA_SCORE > $CRM_MISCLASSIFY_SA_AUTOTRAIN_LIMIT_SPAM)"
255 # SA is not convincing, mark as disagreement
258 LOG="spamfilter: crm114 found ham ($CRM_SCORE), but SA thinks it's spam ($SA_SCORE)$NL"
259 SPAM_DISAGREE=sa-spam
261 |$FORMAIL -I "X-Spam: disagree (crm114:ham/$CRM_SCORE SA:spam/$SA_SCORE)"
271 |$FORMAIL -I"X-Spam: yes (crm114:$CRM_SCORE SA:$SA_SCORE)"
275 |$FORMAIL -I"X-Spam: no (crm114:$CRM_SCORE SA:$SA_SCORE)"
278 # schedule spamtrapped ham for retraining as spam
281 * ! SKIP_SPAMCHECKS ?? .
284 LOG="spamfilter: found spamtrapped ham, retraining...$NL"
286 |$FORMAIL -I"X-Spam: spamtrapped ham"
287 IS_SPAM=spamtrapped-ham