From: martin f. krafft Date: Mon, 25 Feb 2008 15:58:35 +0000 (+0100) Subject: add threshold values X-Git-Url: https://git.madduck.net/etc/mailfilter.git/commitdiff_plain/45b624514190876194da3dcb2c0efe9ee3639407 add threshold values --- diff --git a/crm114/mailfilter.cf b/crm114/mailfilter.cf index 95006bd..f6e5bba 100644 --- a/crm114/mailfilter.cf +++ b/crm114/mailfilter.cf @@ -88,6 +88,11 @@ # ########################################################################## +# ----------- define an optional target for where to send spam (that is, +# ------------ emails that we want to "fail", or reject to another +# ------------ address. Note that this is NOT a "program fault" address, +# ------------ but where to send "bad" email to in the general case. +# ------------ You can specify tightly controlled conditions too, # ----------- define an optional target for where to send spam, # ----------- To NOT forward this to another account, just leave the # ----------- address as the empty string, which is '//'. @@ -103,7 +108,8 @@ # -------- If you would prefer to send specific kinds of spam to # -------- different mailboxes, here's where to do it. -# ----------(be sure to uncomment the line!) +# -------- (be sure to uncomment the line!). Again, these are +# --------- not "program fault" conditions, just different filter results. # # :fail_priority_mail_to: /where_priority_fails_go/ # :fail_blacklist_mail_to: /where_blacklist_fails_go/ @@ -216,6 +222,10 @@ :log_all_mail_to_file: // #:log_all_mail_to_file: /my_personal_mail_log_file_name.txt/ +# --------- When we log messages, should we use a mail separator? +# --------- And, if so, what? +:mail_separator: /\n-=-=-=-=-=-=- cut here -=-=-=-=-=-=-\n/ + # # ---------- Message Cacheing for retraining - do we keep a cache of # ---------- messages we've classified recently "in the wild" as retrain @@ -259,8 +269,8 @@ # ------ options to shuffle.crm will work fine. Alternatively, # ------ you can use the "sort --key 1.2" on date-named files to # ----- achieve chronological training -:trainer_randomizer_command: / .\/shuffle.crm / -#:trainer_randomizer_command: / .\/crm114_tre shuffle.crm / +:trainer_randomizer_command: /.\/shuffle.crm/ +#:trainer_randomizer_command: /.\/crm114 shuffle.crm/ #:trainer_randomizer_command: /sort --key 1.2/ @@ -366,24 +376,30 @@ # # # - -# --------- If you are using thick-threshold training of any sort, -# ---------- (for OSBF or otherwise) put the threshold here. -# ----------- Remember, the polarity of the thick threshold value -# ------------ is that anything scoring less than this value -# ------------- should be considered a trainable error, and it "flips over" -# -------------- so that both good mail and spam have the right symmetrical -# ---------------- polarity. - -# ------ a very small thick threshold (or zero!) works for Markovian. -#:thick_threshold: /0/ -#:thick_threshold: /.001/ -# ----- a thick threshold of 5 to 20 seems good for OSB, OSBF, -# Hyperspace, Bit-Entropy, and Winnow -#:thick_threshold: /5.0/ -:thick_threshold: /10.0/ -#:thick_threshold: /20.0/ +# --------Thresholds for GOOD/UNSURE/SPAM thick-threshold training +# ------- +# ------ A very small thick threshold (or zero!) works for Markovian. +# ----- A thick threshold of 5 to 20 seems good for OSB, OSBF, +# ---- Hyperspace, Bit-Entropy, and Winnow. If you want an asymmetric +# --- threshold system, you can do that by having :good_threshold: +# -- be different from :spam_threshold:. The defaults are +/- 10.0 # +# +# ---- Things rated equal to or better than this are GOOD email +#:good_threshold: /0.01/ +#:good_threshold: /5.0/ +:good_threshold: /10.0/ +#:good_threshold: /20.0/ +# +# ---- Things rated less than or equal to this are SPAM +#:spam_threshold: /-0.01/ +#:spam_threshold: /-5.0/ +:spam_threshold: /-10.0/ +#:spam_threshold: /-20.0/ + +# ---- mailfilter uses a single threshold and operates symmetrically. +# --- (this is only to provide backward compatibility) +:thick_threshold: /10.0/ # ---- What regex do we use for LEARN/CLASSIFY? the first is the # ---- "old standard". Other ones are handy for different spam @@ -391,7 +407,7 @@ # ---- packed HTML spam, which is almost everybody in 2003, so it # ---- used to be the default. But since spammers have shifted away # ---- from this, it isn't the default any longer. IF you change -# ---- this, you MUST rebuild your .css files with roughly equal +# ---- this, you MUST rebuild your .css files with decent # ---- amounts of locally-grown spam and nonspam ( if you've been # ---- following instructions and using the "reaver" cache, this is # ---- easily done! )