/usr/share/doc/spambayes/utilities/mkreversemap.py is in spambayes 1.1b1-4.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | #!/usr/bin/env python
"""
Create mapping from features to message ids
usage %(prog)s [ options ] mailbox ...
-d mapfile - identify file which will hold mapping information (required)
-t ham|spam - identify the type of messages in the input mailbox(es)
-h - print this documentation and exit
One of '-t ham' or '-t spam' must be given, as must one or more message
sources.
"""
import sys
import getopt
from spambayes.mboxutils import getmbox
from spambayes.tokenizer import tokenize
from spambayes.Options import options
from spambayes.classifier import Classifier
from spambayes.safepickle import pickle_read, pickle_write
prog = sys.argv[0]
def usage(msg=None):
if msg is not None:
print >> sys.stderr, msg
print >> sys.stderr, __doc__.strip() % globals()
def mapmessages(f, mboxtype, mapdb):
i = 0
for msg in getmbox(f):
i += 1
sys.stdout.write('\r%s: %d' % (f, i))
sys.stdout.flush()
msgid = msg.get("message-id")
if msgid is None:
continue
for t in tokenize(msg):
ham, spam = mapdb.get(t, ({}, {}))
if mboxtype == "ham":
msgids = ham.get(f, set())
msgids.add(msgid)
ham[f] = msgids
else:
msgids = spam.get(f, set())
msgids.add(msgid)
spam[f] = msgids
mapdb[t] = (ham, spam)
if options["Classifier", "x-use_bigrams"]:
for t in Classifier()._enhance_wordstream(tokenize(msg)):
ham, spam = mapdb.get(t, ({}, {}))
if mboxtype == "ham":
msgids = ham.get(f, set())
msgids.add(msgid)
ham[f] = msgids
else:
msgids = spam.get(f, set())
msgids.add(msgid)
spam[f] = msgids
mapdb[t] = (ham, spam)
sys.stdout.write("\n")
def main(args):
try:
opts, args = getopt.getopt(args, "hd:t:",
["type=", "help", "database="])
except getopt.GetoptError, msg:
usage(msg)
return 1
mapfile = None
mboxtype = None
for opt, arg in opts:
if opt in ("-h", "--help"):
usage()
return 0
elif opt in ("-d", "--database"):
mapfile = arg
elif opt in ("-t", "--type"):
mboxtype = arg
if mapfile is None:
usage("'-d mapfile' is required")
return 1
if mboxtype is None:
usage("'-t ham|spam' is required")
return 1
if mboxtype not in ("ham", "spam"):
usage("mboxtype must be 'ham' or 'spam'")
return 1
try:
mapd = pickle_read(mapfile)
except IOError:
mapd = {}
for f in args:
mapmessages(f, mboxtype, mapd)
pickle_write(mapfile, mapd)
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))
|