This file is indexed.

/usr/share/pyshared/chemfp/pattern_fingerprinter.py is in python-chemfp 1.1p1-2.

This file is owned by root:root, with mode 0o644.

The actual contents of the file can be viewed below.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
import os

class UnsupportedPatternError(KeyError):
    def __init__(self, pattern, reason=None):
        KeyError.__init__(self, pattern)
        self.pattern = pattern
        if reason is None:
            reason = "Cannot interpret pattern definition"
        self.reason = reason
        self.filename = None
        self.lineno = None
    def __str__(self):
        msg = self.reason  + " " + repr(self.pattern)
        if self.lineno is not None:
            msg += " at line %d" % (self.lineno,)
        if self.filename is not None:
            msg += " in file %r" % (self.filename,)
        return msg

class PatternFile(object):
    def __init__(self, filename, max_bit, bit_definitions):
        assert max_bit >= 0, max_bit
        self.filename = filename
        self.max_bit = max_bit
        self.bit_definitions = bit_definitions
        self._bit_to_bit_definition = dict((bitdef.bit, bitdef) for bitdef in bit_definitions)

    def __getitem__(self, bit):
        return self._bit_to_bit_definition[bit]

    def __iter__(self):
        return iter(self._bit_to_bit_definition)

    
class BitDefinition(object):
    __slots__ = ("bit", "count", "pattern", "description", "lineno")
    def __init__(self, bit, count, pattern, description, lineno):
        self.bit = bit
        self.count = count
        self.pattern = pattern
        self.description = description
        self.lineno = lineno
    

def load_patterns(infile):
    if isinstance(infile, basestring):
        infile = open(infile, "rU")
    filename = getattr(infile, "name", "<unknown>")
    bit_definitions = list(read_patterns(infile))
    max_bit = max(bitdef.bit for bitdef in bit_definitions)
    
    return PatternFile(filename, max_bit, bit_definitions)

def read_patterns(infile):
    seen_bits = {}

    for lineno, line in enumerate(infile):
        lineno += 1

        # Leading and trailing whitespace is ignored
        line = line.strip()
        
        # Ignore blank lines or those with a leading "#"
        if not line or line.startswith("#"):
            continue

        # The first three columns, plus everything else for the description
        fields = line.split(None, 3)
        if len(fields) != 4:
            raise TypeError("Not enough fields on line %d: %r" % (lineno, line))

        # Normalize whitespace for the description
        fields[3] = " ".join(fields[3].split())

        # Do some type checking and error reporting
        bit, count, pattern, description = fields
        if not bit.isdigit():
            raise TypeError(
                "First field of line %d must be a non-negative bit position, not %r" %
                            (lineno, bit))
        bit = int(bit)

        if not count.isdigit() or int(count) == 0:
            raise TypeError(
                "Second field of line %d must be a positive minimum match count, not %r" %
                            (lineno, bit))
        count = int(count)
        
        if bit in seen_bits:
            raise TypeError("Line %d redefines bit %d, already set by line %d" %
                            (lineno, bit, seen_bits[bit]))
        seen_bits[bit] = lineno

        yield BitDefinition(bit, count, pattern, description, lineno)

class CountInfo(object):
    __slots__ = ("count", "bit", "byteno", "bitmask")
    def __init__(self, count, bit):
        self.count = count  # minimum count needed to enable this bit
        self.bit = bit  # used to set not_implemented, and useful for debugging
        
        # These simplify the fingerprint generation code
        self.byteno = bit//8
        self.bitmask = 1<<(bit%8)


def _bit_definition_to_pattern_definition(bit_definitions):
    "Helper function to organize the bit defintions based on pattern instead of bit"
    
    # A pattern definition is of the form:
    #  (pattern string, count_info_list)
    #     where the count_info list elements are sorted by count

    # I want to preserve the pattern order so that patterns which
    # are defined first are evaluated first
    ordered_patterns = []
    pattern_info = {}

    # Find all of the bit definitions for a given pattern
    for bitdef in bit_definitions:
        if bitdef.pattern not in pattern_info:
            pattern_info[bitdef.pattern] = []
            ordered_patterns.append(bitdef.pattern)
        pattern_info[bitdef.pattern].append( CountInfo(bitdef.count, bitdef.bit) )

    # Put them into a slighly more useful form
    #  - sorted now makes it easier to test when done
    #  - knowing the max match count lets some matchers optmize how to match
    for pattern in ordered_patterns:
        count_info_list = pattern_info[pattern]
        count_info_list.sort(key=lambda count_info: count_info.count)
        yield (pattern,
               count_info_list[-1].count,  # the largest count
               tuple(count_info_list)
               )

class LimitedMatcher(object):
    def __init__(self, max_supported, matcher):
        self.max_supported = max_supported
        self.matcher = matcher
                 
def _build_matchers(patterns, pattern_definitions, compile_pattern):
    not_implemented = set()
    matcher_definitions = []
    for (pattern, largest_count, count_info_tuple) in pattern_definitions:
        if pattern == "<0>":
            # Special case support for setting (or rather, ignoring) the 0 bit
            continue

        matcher = compile_pattern(pattern, largest_count)
        if isinstance(matcher, LimitedMatcher):
            max_supported = matcher.max_supported
            new_count_info = []
            for count_info in count_info_tuple:
                if count_info.count <= max_supported:
                    new_count_info.append(count_info)
                else:
                    not_implemented.add(count_info.bit)
            
            matcher = matcher.matcher
            count_info_tuple = tuple(new_count_info)
            if not count_info_tuple:
                continue

        if matcher is None:
            # During development I sometimes forgot to return a matcher
            # This catches those cases
            raise UnsupportedPatternError(pattern)
        
        matcher_definitions.append( (matcher, largest_count, count_info_tuple) )

    return not_implemented, tuple(matcher_definitions)

def make_matchers(patterns, compile_pattern):
    pattern_definitions = _bit_definition_to_pattern_definition(patterns.bit_definitions)
    try:
        return _build_matchers(patterns, pattern_definitions, compile_pattern)
    except UnsupportedPatternError, err:
        err.filename = patterns.filename
        
        pattern = err.args[0]
        for bitdef in patterns.bit_definitions:
            if bitdef.pattern == pattern:
                err.lineno = bitdef.lineno
                raise
        raise
        

class PatternFingerprinter(object):
    def __init__(self, patterns, compile_pattern):
        self.patterns = patterns

        self.num_bytes = (patterns.max_bit // 8) + 1
        self.not_implemented, self.matcher_definitions = (
            make_matchers(patterns, compile_pattern)   )

    def describe(self, bit):
        description = self.patterns[bit].description
        if bit in self.not_implemented:
             description + " (NOT IMPLEMENTED)"
        return description

    def fingerprint(self, mol):
        raise NotImplemented("Must be implemented by a derived class")


def _load_named_patterns(name):
    filename = os.path.join(os.path.dirname(__file__), name + ".patterns")
    return load_patterns(filename)