/usr/share/pyshared/chemfp/pattern_fingerprinter.py is in python-chemfp 1.1p1-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 | import os
class UnsupportedPatternError(KeyError):
def __init__(self, pattern, reason=None):
KeyError.__init__(self, pattern)
self.pattern = pattern
if reason is None:
reason = "Cannot interpret pattern definition"
self.reason = reason
self.filename = None
self.lineno = None
def __str__(self):
msg = self.reason + " " + repr(self.pattern)
if self.lineno is not None:
msg += " at line %d" % (self.lineno,)
if self.filename is not None:
msg += " in file %r" % (self.filename,)
return msg
class PatternFile(object):
def __init__(self, filename, max_bit, bit_definitions):
assert max_bit >= 0, max_bit
self.filename = filename
self.max_bit = max_bit
self.bit_definitions = bit_definitions
self._bit_to_bit_definition = dict((bitdef.bit, bitdef) for bitdef in bit_definitions)
def __getitem__(self, bit):
return self._bit_to_bit_definition[bit]
def __iter__(self):
return iter(self._bit_to_bit_definition)
class BitDefinition(object):
__slots__ = ("bit", "count", "pattern", "description", "lineno")
def __init__(self, bit, count, pattern, description, lineno):
self.bit = bit
self.count = count
self.pattern = pattern
self.description = description
self.lineno = lineno
def load_patterns(infile):
if isinstance(infile, basestring):
infile = open(infile, "rU")
filename = getattr(infile, "name", "<unknown>")
bit_definitions = list(read_patterns(infile))
max_bit = max(bitdef.bit for bitdef in bit_definitions)
return PatternFile(filename, max_bit, bit_definitions)
def read_patterns(infile):
seen_bits = {}
for lineno, line in enumerate(infile):
lineno += 1
# Leading and trailing whitespace is ignored
line = line.strip()
# Ignore blank lines or those with a leading "#"
if not line or line.startswith("#"):
continue
# The first three columns, plus everything else for the description
fields = line.split(None, 3)
if len(fields) != 4:
raise TypeError("Not enough fields on line %d: %r" % (lineno, line))
# Normalize whitespace for the description
fields[3] = " ".join(fields[3].split())
# Do some type checking and error reporting
bit, count, pattern, description = fields
if not bit.isdigit():
raise TypeError(
"First field of line %d must be a non-negative bit position, not %r" %
(lineno, bit))
bit = int(bit)
if not count.isdigit() or int(count) == 0:
raise TypeError(
"Second field of line %d must be a positive minimum match count, not %r" %
(lineno, bit))
count = int(count)
if bit in seen_bits:
raise TypeError("Line %d redefines bit %d, already set by line %d" %
(lineno, bit, seen_bits[bit]))
seen_bits[bit] = lineno
yield BitDefinition(bit, count, pattern, description, lineno)
class CountInfo(object):
__slots__ = ("count", "bit", "byteno", "bitmask")
def __init__(self, count, bit):
self.count = count # minimum count needed to enable this bit
self.bit = bit # used to set not_implemented, and useful for debugging
# These simplify the fingerprint generation code
self.byteno = bit//8
self.bitmask = 1<<(bit%8)
def _bit_definition_to_pattern_definition(bit_definitions):
"Helper function to organize the bit defintions based on pattern instead of bit"
# A pattern definition is of the form:
# (pattern string, count_info_list)
# where the count_info list elements are sorted by count
# I want to preserve the pattern order so that patterns which
# are defined first are evaluated first
ordered_patterns = []
pattern_info = {}
# Find all of the bit definitions for a given pattern
for bitdef in bit_definitions:
if bitdef.pattern not in pattern_info:
pattern_info[bitdef.pattern] = []
ordered_patterns.append(bitdef.pattern)
pattern_info[bitdef.pattern].append( CountInfo(bitdef.count, bitdef.bit) )
# Put them into a slighly more useful form
# - sorted now makes it easier to test when done
# - knowing the max match count lets some matchers optmize how to match
for pattern in ordered_patterns:
count_info_list = pattern_info[pattern]
count_info_list.sort(key=lambda count_info: count_info.count)
yield (pattern,
count_info_list[-1].count, # the largest count
tuple(count_info_list)
)
class LimitedMatcher(object):
def __init__(self, max_supported, matcher):
self.max_supported = max_supported
self.matcher = matcher
def _build_matchers(patterns, pattern_definitions, compile_pattern):
not_implemented = set()
matcher_definitions = []
for (pattern, largest_count, count_info_tuple) in pattern_definitions:
if pattern == "<0>":
# Special case support for setting (or rather, ignoring) the 0 bit
continue
matcher = compile_pattern(pattern, largest_count)
if isinstance(matcher, LimitedMatcher):
max_supported = matcher.max_supported
new_count_info = []
for count_info in count_info_tuple:
if count_info.count <= max_supported:
new_count_info.append(count_info)
else:
not_implemented.add(count_info.bit)
matcher = matcher.matcher
count_info_tuple = tuple(new_count_info)
if not count_info_tuple:
continue
if matcher is None:
# During development I sometimes forgot to return a matcher
# This catches those cases
raise UnsupportedPatternError(pattern)
matcher_definitions.append( (matcher, largest_count, count_info_tuple) )
return not_implemented, tuple(matcher_definitions)
def make_matchers(patterns, compile_pattern):
pattern_definitions = _bit_definition_to_pattern_definition(patterns.bit_definitions)
try:
return _build_matchers(patterns, pattern_definitions, compile_pattern)
except UnsupportedPatternError, err:
err.filename = patterns.filename
pattern = err.args[0]
for bitdef in patterns.bit_definitions:
if bitdef.pattern == pattern:
err.lineno = bitdef.lineno
raise
raise
class PatternFingerprinter(object):
def __init__(self, patterns, compile_pattern):
self.patterns = patterns
self.num_bytes = (patterns.max_bit // 8) + 1
self.not_implemented, self.matcher_definitions = (
make_matchers(patterns, compile_pattern) )
def describe(self, bit):
description = self.patterns[bit].description
if bit in self.not_implemented:
description + " (NOT IMPLEMENTED)"
return description
def fingerprint(self, mol):
raise NotImplemented("Must be implemented by a derived class")
def _load_named_patterns(name):
filename = os.path.join(os.path.dirname(__file__), name + ".patterns")
return load_patterns(filename)
|