/usr/share/pyshared/chemfp/openeye_patterns.py is in python-chemfp 1.1p1-2.
This file is owned by root:root, with mode 0o644.
The actual contents of the file can be viewed below.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 | from __future__ import absolute_import
from openeye.oechem import (
OESubSearch, OEChemGetRelease, OEChemGetVersion, OEGraphMol, OEAndAtom,
OENotAtom, OEIsAromaticAtom, OEIsCarbon, OEIsAromaticBond, OEAtomIsInRing, OEHasBondIdx,
OEFindRingAtomsAndBonds, OEDetermineAromaticRingSystems, OEDetermineComponents)
from . import openeye
from . import pattern_fingerprinter
from . import types
from . import __version__ as chemfp_version
class HydrogenMatcher(object):
def __init__(self, max_count):
self.max_count = max_count
def SingleMatch(self, mol):
for atom in mol.GetAtoms():
if atom.GetAtomicNum() == 1:
return 1
if atom.GetImplicitHCount():
return 1
return 0
def Match(self, mol, flg=True):
max_count = self.max_count
count = 0
for atom in mol.GetAtoms():
if atom.GetAtomicNum() == 1:
count += 1
count += atom.GetImplicitHCount()
if count > max_count:
return [0] * count
# OpenEye famously does not include SSSR functionality in OEChem.
# Search for "Smallest Set of Smallest Rings (SSSR) Considered Harmful"
# After much thought, I agree. But it makes this sort of code harder.
# That's why I only support up to max_count = 2. Then again, I know
# that this code does the right thing, while I'm not sure about the
# SSSR-based implementations.
class AromaticRings(object):
def __init__(self, max_count):
if max_count > 2:
raise NotImplementedError("No support for >=3 aromatic rings")
self.max_count = max_count
self._single_aromatic = OESubSearch("[aR]")
# In OpenEye SMARTS, [a;!R2] will find aromatic atoms in at least two rings
# The following finds atoms which are members of at least two aromatic rings
self._multiring_aromatic = OESubSearch("[a;!R2](:a)(:a):a")
def SingleMatch(self, mol):
# This is easy; if there's one aromatic atom then there's one
# aromatic ring.
return self._single_aromatic.SingleMatch(mol)
def Match(self, mol, flg=True):
# We're trying to find if there are two aromatic rings.
if not self._single_aromatic.SingleMatch(mol):
return ()
if self._multiring_aromatic.SingleMatch(mol):
# then obviously there are two aromatic rings
return (1,2)
# Since no aromatic atom is in two aromatic rings that means
# the aromatic ring systems are disjoint, so this gives me the
# number of ring systems
num_aromatic_systems, parts = OEDetermineAromaticRingSystems(mol)
if num_aromatic_systems >= self.max_count:
return [0]*self.max_count
assert num_aromatic_systems != 0, "there is supposed to be an aromatic ring"
if num_aromatic_systems == 1:
return (1,)
raise AssertionError("Should not get here")
_is_hetereo_aromatic = OEAndAtom(OEAndAtom(OEIsAromaticAtom(), OENotAtom(OEIsCarbon())), OEAtomIsInRing())
class HeteroAromaticRings(object):
def __init__(self, max_count):
if max_count > 2:
raise NotImplementedError("No support for >=3 hetero-aromatic rings")
self.max_count = max_count
def SingleMatch(self, mol):
for atom in mol.GetAtoms(_is_hetereo_aromatic):
return True
return False
def Match(self, mol, flg=True):
# Find all the hetero-aromatic atoms
hetero_atoms = [atom for atom in mol.GetAtoms(_is_hetereo_aromatic)]
if len(hetero_atoms) < 2:
# The caller just needs an iterable
return hetero_atoms
# There are at least two hetero-aromatic atoms.
# Are there multiple ring systems?
num_aromatic_systems, parts = OEDetermineAromaticRingSystems(mol)
assert num_aromatic_systems >= 1
# Are there hetero-atoms in different systems?
atom_components = set(parts[atom.GetIdx()] for atom in hetero_atoms)
if len(atom_components) > 1:
return (1,2)
# The answer now is "at least one". But are there two?
# All of the hetero-aromatic atoms are in the same ring system
# This is the best answer I could think of, and it only works
# with the OEChem toolkit: remove one of the bonds, re-find
# the rings, and see if there's still an aromatic hetero-atom.
hetero_atom = hetero_atoms[0]
for bond in hetero_atom.GetBonds(OEIsAromaticBond()):
newmol = OEGraphMol(mol)
newmol_bond = newmol.GetBond(OEHasBondIdx(bond.GetIdx()))
for atom in newmol.GetAtoms(_is_hetereo_aromatic):
return (1,2)
return (1,)
class NumFragments(object):
def __init__(self, max_count):
def SingleMatch(self, mol):
return mol.NumAtoms() > 0
def Match(self, mol, flg=True):
count, parts = OEDetermineComponents(mol)
# parts is a list of component numbers.
# Turn them into a set to get the unique set of component numbers
# Sets are iterable, so I don't need to do more for the API
return set(parts)
# Grrr. The substructure keys want up to 4 aromatic rings. The above
# code only works for up to 2. The API doesn't let me say "I can
# handle up to 2; please set the remainder to 0."
# XXX Well, I can change that.
def aromatic_rings(max_count):
if max_count > 2:
return pattern_fingerprinter.LimitedMatcher(2, AromaticRings(2))
return AromaticRings(max_count)
def hetero_aromatic_rings(max_count):
if max_count > 2:
return pattern_fingerprinter.LimitedMatcher(2, HeteroAromaticRings(2))
return HeteroAromaticRings(max_count)
_pattern_classes = {
"<H>": HydrogenMatcher,
"<aromatic-rings>": aromatic_rings,
"<hetero-aromatic-rings>": hetero_aromatic_rings,
"<fragments>": NumFragments,
def oechem_compile_pattern(pattern, max_count):
if pattern in _pattern_classes:
return _pattern_classes[pattern](max_count)
elif pattern.startswith("<"):
raise NotImplementedError(pattern) # No other special patterns are supported
pat = OESubSearch()
if not pat.Init(pattern):
raise pattern_fingerprinter.UnsupportedPatternError(
pattern, "Uninterpretable SMARTS pattern")
return pat
class OEChemPatternFingerprinter(pattern_fingerprinter.PatternFingerprinter):
def __init__(self, patterns):
assert patterns is not None
super(OEChemPatternFingerprinter, self).__init__(patterns, oechem_compile_pattern)
def fingerprint(self, mol):
bytes = [0] * self.num_bytes
for matcher, largest_count, count_info_tuple in self.matcher_definitions:
if matcher is NotImplemented:
#print matcher, largest_count, count_info_tuple
if largest_count == 1:
if matcher.SingleMatch(mol):
count_info = count_info_tuple[0]
bytes[count_info.byteno] |= count_info.bitmask
actual_count = sum(1 for ignore in matcher.Match(mol, True)) # unique matches
if actual_count:
for count_info in count_info_tuple:
if actual_count >= count_info.count:
bytes[count_info.byteno] |= count_info.bitmask
return "".join(map(chr, bytes))
class _CachedFingerprinters(dict):
def __missing__(self, name):
patterns = pattern_fingerprinter._load_named_patterns(name)
fingerprinter = OEChemPatternFingerprinter(patterns)
self[name] = fingerprinter
return fingerprinter
_cached_fingerprinters = _CachedFingerprinters()
SOFTWARE = "OEChem/%(release)s (%(version)s) chemfp/%(chemfp)s" % dict(
release = OEChemGetRelease(),
version = OEChemGetVersion(),
chemfp = chemfp_version)
# XXX Why are there two "Fingerprinter" classes?
# XX Shouldn't they be merged?
_base = openeye._base.clone(
software = SOFTWARE)
SubstructOpenEyeFingerprinter_v1 = _base.clone(
name = "ChemFP-Substruct-OpenEye/1",
num_bits = 881,
make_fingerprinter = lambda : _cached_fingerprinters["substruct"].fingerprint)
# def describe(self, bitno):
# return self._fingerprinter.describe(bitno)
RDMACCSOpenEyeFingerprinter_v1 = _base.clone(
name = "RDMACCS-OpenEye/1",
num_bits = 166,
make_fingerprinter = lambda : _cached_fingerprinters["rdmaccs"].fingerprint)