This file is indexed.

/usr/share/doc/spambayes/utilities/mkreversemap.py is in spambayes 1.1b1-4.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/env python

"""
Create mapping from features to message ids

usage %(prog)s [ options ] mailbox ...

-d mapfile - identify file which will hold mapping information (required)

-t ham|spam - identify the type of messages in the input mailbox(es)

-h - print this documentation and exit

One of '-t ham' or '-t spam' must be given, as must one or more message
sources.
"""

import sys
import getopt

from spambayes.mboxutils import getmbox
from spambayes.tokenizer import tokenize
from spambayes.Options import options
from spambayes.classifier import Classifier
from spambayes.safepickle import pickle_read, pickle_write

prog = sys.argv[0]

def usage(msg=None):
    if msg is not None:
        print >> sys.stderr, msg
    print >> sys.stderr, __doc__.strip() % globals()

def mapmessages(f, mboxtype, mapdb):
    i = 0
    for msg in getmbox(f):
        i += 1
        sys.stdout.write('\r%s: %d' % (f, i))
        sys.stdout.flush()
        msgid = msg.get("message-id")
        if msgid is None:
            continue
        for t in tokenize(msg):
            ham, spam = mapdb.get(t, ({}, {}))
            if mboxtype == "ham":
                msgids = ham.get(f, set())
                msgids.add(msgid)
                ham[f] = msgids
            else:
                msgids = spam.get(f, set())
                msgids.add(msgid)
                spam[f] = msgids
            mapdb[t] = (ham, spam)
        if options["Classifier", "x-use_bigrams"]:
            for t in Classifier()._enhance_wordstream(tokenize(msg)):
                ham, spam = mapdb.get(t, ({}, {}))
                if mboxtype == "ham":
                    msgids = ham.get(f, set())
                    msgids.add(msgid)
                    ham[f] = msgids
                else:
                    msgids = spam.get(f, set())
                    msgids.add(msgid)
                    spam[f] = msgids
                mapdb[t] = (ham, spam)
    sys.stdout.write("\n")

def main(args):
    try:
        opts, args = getopt.getopt(args, "hd:t:",
                                   ["type=", "help", "database="])
    except getopt.GetoptError, msg:
        usage(msg)
        return 1

    mapfile = None
    mboxtype = None
    for opt, arg in opts:
        if opt in ("-h", "--help"):
            usage()
            return 0
        elif opt in ("-d", "--database"):
            mapfile = arg

        elif opt in ("-t", "--type"):
            mboxtype = arg

    if mapfile is None:
        usage("'-d mapfile' is required")
        return 1

    if mboxtype is None:
        usage("'-t ham|spam' is required")
        return 1

    if mboxtype not in ("ham", "spam"):
        usage("mboxtype must be 'ham' or 'spam'")
        return 1

    try:
        mapd = pickle_read(mapfile)
    except IOError:
        mapd = {}

    for f in args:
        mapmessages(f, mboxtype, mapd)
    pickle_write(mapfile, mapd)

if __name__ == "__main__":
    sys.exit(main(sys.argv[1:]))