"""
Conversions between Schrodinger structure objects (mmct) and RDKit mol objects.
There are some structural/philosophic differences between these two formats,
stemming from their distinct origins (RDKit being originally used for
chemiformatics, and schrodinger/mmct being originally used for molecular
modeling.)
Notably:
Schrodinger wants all atoms to have positions in space. RDKit allows
unspecified position, or multiple conformers.
Schrodinger wants all Hydrogens to be fully specified (position and bonding).
My understanding is that RDKit has three types of hydrogens::
* Implicit - calculated based on valence. These are not shown in SMILES.
* Explicit - as a property of the associated heavy atom. These are shown in
SMILES like [cH]
* Included in connectivity graph - (only these can have coordinates or
other properties). These are show in SMILES like c([H]).
There are other distinctions, for instance Schrodinger is aware of dative, or
zero-order bonds, whereas RDKit is aware of aromatic and conjugated bonds.
"""
import contextlib
import re
from rdkit import Chem
from rdkit import RDLogger
from schrodinger import adapter
from schrodinger import structure
from schrodinger.infra import structure as infrastructure
from schrodinger.infra import mm
CT_PROP_FORMAT = re.compile(r'[birs]_[^_ ]+_\w*')
BOND_TYPES_S2R = {
structure.BondType.Single: Chem.BondType.SINGLE,
structure.BondType.Double: Chem.BondType.DOUBLE,
structure.BondType.Triple: Chem.BondType.TRIPLE,
structure.BondType.Zero: Chem.BondType.ZERO,
structure.BondType.Dative: Chem.BondType.DATIVE
}
ANNOTATION_PROP = adapter.RDK_INDEX #: i_rdk_index
[docs]class InconsistentStructureError(ValueError):
pass
[docs]class UnsupportedStructureError(NotImplementedError):
"""For structures that can't be translated between RDKit and Schrodinger yet"""
[docs]def to_rdkit(st,
implicitH=False,
include_properties=True,
include_coordinates=True,
sanitize=True,
include_stereo=True):
"""
Create a RdKit molecule from a Schrodinger structure (aka mmct).
:param st: The schrodinger structure to be translated to RDKit. The input
structure remains unmodified.
:type st: schrodinger.structure.Structure
:param implicitH: Should hydrogens be listed implicitly? If False, hydrogens
will be included in the connectivity graph, and 3D coordinates and
properties of the hydrogens will be translated. Some pattern matching
in RDKit requires implicit hydrogens, however.
:type implicitH: bool
:param include_properties: Should atom and structure level properties
be copied from the schrodinger structure to the RDKit mol?
:type include_properties: bool
:param include_coordinates: Should the coordinates of the structure be
copied to a conformer associated with the RDKit mol?
:param sanitize: Should the molecule be sanitized? Sanitization discerns
aromaticity, for example. But it rejects invalid molecules.
:type sanitize: bool
:param include_stereo: Whether the stereochemistry of the structure should
be translated into the RDKit mol.
:type include_stereo: bool
:return: An rdkit mol representing the same structure as the input st
:rtype: rdkit.Mol
:raises InconsistentStructureError: if the input structure has inconsistent or
incorrect stereochemical labels.
"""
if not isinstance(st, (structure.Structure, infrastructure.Structure)):
raise TypeError(
'Input structure to to_rdkit() should be a Schrodinger Structure')
options = adapter.RDKitOptions()
options.hydrogens = adapter.Hydrogens.MakeImplicit if implicitH else adapter.Hydrogens.Retain
options.label_atoms = adapter.LabelAtoms.Enable
options.properties = adapter.Properties.Copy if include_properties else adapter.Properties.Ignore
options.coordinates = adapter.Coordinates.Copy if include_coordinates else adapter.Coordinates.Ignore
options.sanitize = adapter.Sanitize.Enable if sanitize else adapter.Sanitize.Disable
options.stereochemistry = adapter.StereoChemistry.Copy if include_stereo else adapter.StereoChemistry.Ignore
try:
mol = adapter.to_rdkit(st, options)
except adapter.InconsistentStructureError as err:
raise InconsistentStructureError(*err.args)
except adapter.UnsupportedStructureError as err:
raise UnsupportedStructureError(*err.args)
return mol
[docs]def from_rdkit(mol,
include_properties=True,
generate_coordinates=False,
conformer=None,
include_stereo=True):
"""
Create a Schrodinger structure from an RdKit molecule.
For correct behavoir, requires that the RdKit molecule be sanitized
beforehand.
If the RDKit molecule does not have 3d structure, one can be generated
using fast3d.
:param mol: RDKit mol to be converted to Schrodinger space. It will not be
modified.
:type mol: rdkit.Mol
:param include_properties: Should atom and molecule properties be copied
from the RDKit mol?
:type include_properties: bool
:param generate_coordinates: Should 3D coordinates be generated if the
RDKit mol does not have associated coordinates? Uses fast3d.
:type generate_coordinates: bool
:param conformer: If the RDKit mol has more than one associated conformer,
choose one to turn into a Schrodinger structure.
:type conformer: NoneType or int
:return: A schrodinger.Structure representing the same molecule as the
input mol
:rtype: schrodinger.Structure
:raises ValueError: If there is more than one conformer associated with the
structure or if a specific conformer is requested and is unavailable.
"""
if not isinstance(mol, Chem.Mol):
raise TypeError('Input mol to from_rdkit() should be an RdKit Mol')
if conformer is None and mol.GetNumConformers() > 1:
msg = ("There are {} conformers associated with this molecule and no "
"conformer number was requested.".format(mol.GetNumConformers()))
raise ValueError(msg)
if conformer is not None and mol.GetNumConformers() < conformer + 1:
msg = ("Requested conformer #{}, but there are only {} conformers "
"available.".format(conformer, mol.GetNumConformers()))
raise ValueError(msg)
has_coords = mol.GetNumConformers() > 0
properties = adapter.Properties.Copy if include_properties else adapter.Properties.Ignore
conformer = None
try:
conformer = mol.GetConformer(conformer or 0)
except ValueError:
# No geometry associated with this structure.
# Coordinates are generated using fast3d below (if generate_coordinates=True)
pass
stereo = adapter.StereoChemistry.Copy if include_stereo else adapter.StereoChemistry.Ignore
try:
st = adapter.to_structure(mol, adapter.LabelAtoms.Enable, properties,
conformer, stereo)
except adapter.InconsistentStructureError as err:
raise InconsistentStructureError(*err.args)
except adapter.UnsupportedStructureError as err:
raise UnsupportedStructureError(*err.args)
if generate_coordinates and not has_coords:
st.generate3dConformation()
return st
[docs]def translate_rdkit_props_dict(props):
"""
Make a copy of a property dict like the one returned by mol.GetPropsAsDict,
in which property names that don't look like mmct properties are prefixed
with <typechar>_rdkit_.
:param props: property dictionary
:type props: dict
:return: new property dictionary
:rtype: dict
"""
new_props = {}
for name, value in props.items():
is_schrodinger_property = bool(CT_PROP_FORMAT.match(name))
if is_schrodinger_property:
new_name = name
if name.startswith('s_st_Chirality') or name == 's_m_title':
continue
elif name.startswith('s_'):
# Workaround for quirk of GetPropsAsDict(), which returns strings
# that look like a number as floats, breaking the call to
# mmct_ct_property_set_string().
value = str(value)
elif name in ('origNoImplicit', 'isImplicit'):
# Generated hydrogens. Not an important property
continue
elif isinstance(value, bool):
new_name = 'b_rdkit_' + name
elif isinstance(value, int):
new_name = 'i_rdkit_' + name
elif isinstance(value, float):
new_name = 'r_rdkit_' + name
elif isinstance(value, str):
new_name = 's_rdkit_' + name
else:
# Ignore weird properties such as RDKit internals.
continue
new_props[new_name] = value
return new_props
[docs]@contextlib.contextmanager
def annotate(mol):
"""
Deprecated.
Use the RDK_INDEX or SCHRODINGER_INDEX property added in the adapter by default.
"""
yield
def _find_matches(mol, st):
"""Which atoms match?"""
# it's possible that atoms were added or deleted. We hope that this is only
# hydrogens
rdk_non_hydrogens = sum(1 for a in mol.GetAtoms() if a.GetAtomicNum() != 1)
schro_non_hydrogens = sum(1 for a in st.atom if a.atomic_number != 1)
if rdk_non_hydrogens != schro_non_hydrogens:
msg = (f"Schrodinger structure doesn't match RDKit mol, they have "
f"different numbers of non-hydrogen atoms (RDKit: "
f"{rdk_non_hydrogens} != Schrodinger: {schro_non_hydrogens})")
raise InconsistentStructureError(msg)
matches = {}
for a in st.atom:
try:
idx = a.property[adapter.RDK_INDEX]
except KeyError:
continue
rdkit_atom = mol.GetAtomWithIdx(idx)
if rdkit_atom:
matches[a] = rdkit_atom
return matches
[docs]def copy_lewis_structure_and_hydrogens(st, mol, kekulize=True):
"""
Applies bond orders and charges from st to mol. Updates #implicitH to match
Assumes st includes all hydrogens. Adds implicit and explicit hydrogens to the
mol, but does not add any graph hydrogens. May remove graph hydrogens.
"""
editmol = Chem.RWMol(mol)
matches = _find_matches(editmol, st)
if not kekulize:
aromatic_bonds = set()
rings = (r for r in st.ring if r.isAromatic())
for r in rings:
r = r.getAtomIndices()
for a0, a1 in zip(r, r[1:]):
aromatic_bonds.add(frozenset((a0, a1)))
aromatic_bonds.add(frozenset((r[0], r[-1])))
# Adjust bond orders, retaining aromicity if requested
for bond in st.bond:
a1, a2 = bond.atom1, bond.atom2
rdk_a1 = matches.get(a1)
rdk_a2 = matches.get(a2)
if not rdk_a1 or not rdk_a2:
continue
rdbond = editmol.GetBondBetweenAtoms(rdk_a1.GetIdx(), rdk_a2.GetIdx())
if not kekulize and {a1.index, a2.index} in aromatic_bonds:
new_type = Chem.BondType.AROMATIC
rdk_a1.SetIsAromatic(True)
rdk_a2.SetIsAromatic(True)
else:
new_type = BOND_TYPES_S2R[bond.type]
if new_type != rdbond.GetBondType():
rdbond.SetBondType(new_type)
# Adjust charges - assumes that the mmct charge is good.
for mmct_atom, rdkit_atom in matches.items():
charge = mmct_atom.formal_charge
if charge != rdkit_atom.GetFormalCharge():
rdkit_atom.SetFormalCharge(charge)
Chem.SanitizeMol(editmol, sanitizeOps=Chem.SANITIZE_ADJUSTHS)
graph_hydrogens_to_delete = []
for mmct_atom, rdkit_atom in matches.items():
mmct_hydrogens = sum(
(1 for a in mmct_atom.bonded_atoms if a.atomic_number == 1))
mmct_hydrogens += mm.mmat_get_nhua(mmct_atom.atom_type)
# hydrogen count is implicit + explicit (shown in smiles) + graph hydrogens
rdk_hydrogens = rdkit_atom.GetTotalNumHs(includeNeighbors=True)
# A hydrogen was added or removed on the Schrodinger side
if rdk_hydrogens != mmct_hydrogens:
new_explicit_h_count = rdkit_atom.GetNumExplicitHs(
) + mmct_hydrogens - rdk_hydrogens
if new_explicit_h_count >= 0:
rdkit_atom.SetNumExplicitHs(new_explicit_h_count)
else:
# Need to delete graph hydrogens
hydrogens = [
a.GetIdx()
for a in rdkit_atom.GetNeighbors()
if a.GetAtomicNum() == 1
]
remove_count = rdk_hydrogens - mmct_hydrogens
assert len(hydrogens) >= remove_count
for h_index in hydrogens[:remove_count]:
graph_hydrogens_to_delete.append(h_index)
graph_hydrogens_to_delete.sort(reverse=True)
for atom in graph_hydrogens_to_delete:
editmol.RemoveAtom(atom)
Chem.SanitizeMol(editmol, sanitizeOps=Chem.SANITIZE_ADJUSTHS)
return editmol.GetMol()
[docs]@contextlib.contextmanager
def suppress_rdkit_log():
"""
Disable all RDKIT logging.
"""
RDLogger.DisableLog('rdApp.*')
try:
yield
finally:
RDLogger.EnableLog('rdApp.*')