/usr/share/pyshared/chemfp/rdkit_patterns.py is in python-chemfp 1.1p1-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 | from __future__ import absolute_import
from rdkit import Chem
from . import pattern_fingerprinter
from . import rdkit
from . import types
from . import __version__ as chemfp_version
SOFTWARE = rdkit.SOFTWARE + (" chemfp/%s" % (chemfp_version,))
class HydrogenMatcher(object):
def has_match(self, mol):
for atom in mol.GetAtoms():
if atom.GetAtomicNum() == 1:
return 1
if atom.GetTotalNumHs():
return 1
return 0
def num_matches(self, mol, largest_count):
num_hydrogens = 0
for atom in mol.GetAtoms():
if atom.GetAtomicNum() == 1:
num_hydrogens += 1
num_hydrogens += atom.GetTotalNumHs()
if num_hydrogens >= largest_count:
return num_hydrogens
return num_hydrogens
class AromaticRings(object):
def __init__(self):
# The single ring case is easy; if there's an aromatic atom in a ring
# then there's a ring
self._single_matcher = Chem.MolFromSmarts("[aR]")
def has_match(self, mol):
return mol.HasSubstructMatch(self._single_matcher)
def num_matches(self, mol, largest_count):
nArom = 0
for ring in mol.GetRingInfo().BondRings():
if all(mol.GetBondWithIdx(bondIdx).GetIsAromatic() for bondIdx in ring):
nArom += 1
if nArom == largest_count:
return nArom
return nArom
def _is_hetereo_aromatic_atom(atom):
return atom.GetIsAromatic() and atom.GetAtomicNum() not in (1, 6)
class HeteroAromaticRings(object):
def __init__(self):
# In the single match case, if there's an aromatic non-carbon atom
# then it's a hetereo ring
self._single_matcher = Chem.MolFromSmarts("[aR;!#6]")
def has_match(self, mol):
return mol.HasSubstructMatch(self._single_matcher)
def num_matches(self, mol, largest_count):
nArom = 0
for ring in mol.GetRingInfo().AtomRings():
if any(_is_hetereo_aromatic_atom(mol.GetAtomWithIdx(atomIdx))
for atomIdx in ring):
nArom += 1
if nArom == largest_count:
return nArom
return nArom
class NumFragments(object):
def has_match(self, mol):
return mol.GetNumAtoms() > 0
def num_matches(self, mol, largest_count):
return len(Chem.GetMolFrags(mol))
# RDKit matches "molecule.HasSubstructMatch(match_pattern)"
# while every other toolkit does something like "match_pattern.HasSubstructMatch(molecule)"
# Since SMARTS doesn't handle all the pattern cases, I prefer the second ordering.
# This class inverts the order so I can do that.
class InvertedMatcher(object):
def __init__(self, matcher):
self.matcher = matcher
def has_match(self, mol):
return mol.HasSubstructMatch(self.matcher)
def num_matches(self, mol, max_count):
return len(mol.GetSubstructMatches(self.matcher))
_pattern_classes = {
"<H>": HydrogenMatcher,
"<aromatic-rings>": AromaticRings,
"<hetero-aromatic-rings>": HeteroAromaticRings,
"<fragments>": NumFragments,
}
def rdkit_compile_pattern(pattern, max_count):
if pattern in _pattern_classes:
return _pattern_classes[pattern]()
elif pattern.startswith("<"):
raise NotImplementedError(pattern)
#return NotImplemented
# Everything else must be a SMARTS pattern
matcher = Chem.MolFromSmarts(pattern)
if matcher is None:
raise pattern_fingerprinter.UnsupportedPatternError(
pattern, "Can not interpret SMARTS pattern")
return InvertedMatcher(matcher)
class RDKitPatternFingerprinter(pattern_fingerprinter.PatternFingerprinter):
def __init__(self, patterns):
assert patterns is not None
super(RDKitPatternFingerprinter, self).__init__(patterns, rdkit_compile_pattern)
def fingerprint(self, mol):
bytes = [0] * self.num_bytes
for matcher, largest_count, count_info_tuple in self.matcher_definitions:
if largest_count == 1:
if matcher.has_match(mol):
count_info = count_info_tuple[0]
bytes[count_info.byteno] |= count_info.bitmask
else:
actual_count = matcher.num_matches(mol, largest_count)
if actual_count:
for count_info in count_info_tuple:
if actual_count >= count_info.count:
bytes[count_info.byteno] |= count_info.bitmask
else:
break
return "".join(map(chr, bytes))
class _CachedFingerprinters(dict):
def __missing__(self, name):
patterns = pattern_fingerprinter._load_named_patterns(name)
fingerprinter = RDKitPatternFingerprinter(patterns)
self[name] = fingerprinter
return fingerprinter
_cached_fingerprinters = _CachedFingerprinters()
_base = rdkit._base.clone(
software = SOFTWARE)
SubstructRDKitFingerprinter_v1 = _base.clone(
name = "ChemFP-Substruct-RDKit/1",
num_bits = 881,
make_fingerprinter = lambda: _cached_fingerprinters["substruct"].fingerprint)
# def describe(self, bitno):
# return self._fingerprinter.describe(bitno)
RDMACCSRDKitFingerprinter_v1 = _base.clone(
name = "RDMACCS-RDKit/1",
num_bits = 166,
make_fingerprinter = lambda: _cached_fingerprinters["rdmaccs"].fingerprint)
|