Source code for schrodinger.thirdparty.rdkit_adapter

"""
Conversions between Schrodinger structure objects (mmct) and RDKit mol objects.

There are some structural/philosophic differences between these two formats,
stemming from their distinct origins (RDKit being originally used for
chemiformatics, and schrodinger/mmct being originally used for molecular
modeling.)

Notably:
Schrodinger wants all atoms to have positions in space. RDKit allows
unspecified position, or multiple conformers.

Schrodinger wants all Hydrogens to be fully specified (position and bonding).
My understanding is that RDKit has three types of hydrogens::

    * Implicit - calculated based on valence. These are not shown in SMILES.
    * Explicit - as a property of the associated heavy atom. These are shown in
    SMILES like [cH]
    * Included in connectivity graph - (only these can have coordinates or
    other properties). These are show in SMILES like c([H]).

There are other distinctions, for instance Schrodinger is aware of dative, or
zero-order bonds, whereas RDKit is aware of aromatic and conjugated bonds.

"""

import contextlib
import re

from rdkit import Chem
from rdkit import RDLogger

from schrodinger import adapter
from schrodinger import structure
from schrodinger.infra import structure as infrastructure
from schrodinger.infra import mm

CT_PROP_FORMAT = re.compile(r'[birs]_[^_ ]+_\w*')
BOND_TYPES_S2R = {
    structure.BondType.Single: Chem.BondType.SINGLE,
    structure.BondType.Double: Chem.BondType.DOUBLE,
    structure.BondType.Triple: Chem.BondType.TRIPLE,
    structure.BondType.Zero: Chem.BondType.ZERO,
    structure.BondType.Dative: Chem.BondType.DATIVE
}

ANNOTATION_PROP = adapter.RDK_INDEX  #: i_rdk_index


[docs]class InconsistentStructureError(ValueError): pass
[docs]class UnsupportedStructureError(NotImplementedError): """For structures that can't be translated between RDKit and Schrodinger yet"""
[docs]def to_rdkit(st, implicitH=False, include_properties=True, include_coordinates=True, sanitize=True, include_stereo=True): """ Create a RdKit molecule from a Schrodinger structure (aka mmct). :param st: The schrodinger structure to be translated to RDKit. The input structure remains unmodified. :type st: schrodinger.structure.Structure :param implicitH: Should hydrogens be listed implicitly? If False, hydrogens will be included in the connectivity graph, and 3D coordinates and properties of the hydrogens will be translated. Some pattern matching in RDKit requires implicit hydrogens, however. :type implicitH: bool :param include_properties: Should atom and structure level properties be copied from the schrodinger structure to the RDKit mol? :type include_properties: bool :param include_coordinates: Should the coordinates of the structure be copied to a conformer associated with the RDKit mol? :param sanitize: Should the molecule be sanitized? Sanitization discerns aromaticity, for example. But it rejects invalid molecules. :type sanitize: bool :param include_stereo: Whether the stereochemistry of the structure should be translated into the RDKit mol. :type include_stereo: bool :return: An rdkit mol representing the same structure as the input st :rtype: rdkit.Mol :raises InconsistentStructureError: if the input structure has inconsistent or incorrect stereochemical labels. """ if not isinstance(st, (structure.Structure, infrastructure.Structure)): raise TypeError( 'Input structure to to_rdkit() should be a Schrodinger Structure') options = adapter.RDKitOptions() options.hydrogens = adapter.Hydrogens.MakeImplicit if implicitH else adapter.Hydrogens.Retain options.label_atoms = adapter.LabelAtoms.Enable options.properties = adapter.Properties.Copy if include_properties else adapter.Properties.Ignore options.coordinates = adapter.Coordinates.Copy if include_coordinates else adapter.Coordinates.Ignore options.sanitize = adapter.Sanitize.Enable if sanitize else adapter.Sanitize.Disable options.stereochemistry = adapter.StereoChemistry.Copy if include_stereo else adapter.StereoChemistry.Ignore try: mol = adapter.to_rdkit(st, options) except adapter.InconsistentStructureError as err: raise InconsistentStructureError(*err.args) except adapter.UnsupportedStructureError as err: raise UnsupportedStructureError(*err.args) return mol
[docs]def from_rdkit(mol, include_properties=True, generate_coordinates=False, conformer=None, include_stereo=True): """ Create a Schrodinger structure from an RdKit molecule. For correct behavoir, requires that the RdKit molecule be sanitized beforehand. If the RDKit molecule does not have 3d structure, one can be generated using fast3d. :param mol: RDKit mol to be converted to Schrodinger space. It will not be modified. :type mol: rdkit.Mol :param include_properties: Should atom and molecule properties be copied from the RDKit mol? :type include_properties: bool :param generate_coordinates: Should 3D coordinates be generated if the RDKit mol does not have associated coordinates? Uses fast3d. :type generate_coordinates: bool :param conformer: If the RDKit mol has more than one associated conformer, choose one to turn into a Schrodinger structure. :type conformer: NoneType or int :return: A schrodinger.Structure representing the same molecule as the input mol :rtype: schrodinger.Structure :raises ValueError: If there is more than one conformer associated with the structure or if a specific conformer is requested and is unavailable. """ if not isinstance(mol, Chem.Mol): raise TypeError('Input mol to from_rdkit() should be an RdKit Mol') if conformer is None and mol.GetNumConformers() > 1: msg = ("There are {} conformers associated with this molecule and no " "conformer number was requested.".format(mol.GetNumConformers())) raise ValueError(msg) if conformer is not None and mol.GetNumConformers() < conformer + 1: msg = ("Requested conformer #{}, but there are only {} conformers " "available.".format(conformer, mol.GetNumConformers())) raise ValueError(msg) has_coords = mol.GetNumConformers() > 0 properties = adapter.Properties.Copy if include_properties else adapter.Properties.Ignore conformer = None try: conformer = mol.GetConformer(conformer or 0) except ValueError: # No geometry associated with this structure. # Coordinates are generated using fast3d below (if generate_coordinates=True) pass stereo = adapter.StereoChemistry.Copy if include_stereo else adapter.StereoChemistry.Ignore try: st = adapter.to_structure(mol, adapter.LabelAtoms.Enable, properties, conformer, stereo) except adapter.InconsistentStructureError as err: raise InconsistentStructureError(*err.args) except adapter.UnsupportedStructureError as err: raise UnsupportedStructureError(*err.args) if generate_coordinates and not has_coords: st.generate3dConformation() return st
[docs]def translate_rdkit_props_dict(props): """ Make a copy of a property dict like the one returned by mol.GetPropsAsDict, in which property names that don't look like mmct properties are prefixed with <typechar>_rdkit_. :param props: property dictionary :type props: dict :return: new property dictionary :rtype: dict """ new_props = {} for name, value in props.items(): is_schrodinger_property = bool(CT_PROP_FORMAT.match(name)) if is_schrodinger_property: new_name = name if name.startswith('s_st_Chirality') or name == 's_m_title': continue elif name.startswith('s_'): # Workaround for quirk of GetPropsAsDict(), which returns strings # that look like a number as floats, breaking the call to # mmct_ct_property_set_string(). value = str(value) elif name in ('origNoImplicit', 'isImplicit'): # Generated hydrogens. Not an important property continue elif isinstance(value, bool): new_name = 'b_rdkit_' + name elif isinstance(value, int): new_name = 'i_rdkit_' + name elif isinstance(value, float): new_name = 'r_rdkit_' + name elif isinstance(value, str): new_name = 's_rdkit_' + name else: # Ignore weird properties such as RDKit internals. continue new_props[new_name] = value return new_props
[docs]@contextlib.contextmanager def annotate(mol): """ Deprecated. Use the RDK_INDEX or SCHRODINGER_INDEX property added in the adapter by default. """ yield
def _find_matches(mol, st): """Which atoms match?""" # it's possible that atoms were added or deleted. We hope that this is only # hydrogens rdk_non_hydrogens = sum(1 for a in mol.GetAtoms() if a.GetAtomicNum() != 1) schro_non_hydrogens = sum(1 for a in st.atom if a.atomic_number != 1) if rdk_non_hydrogens != schro_non_hydrogens: msg = (f"Schrodinger structure doesn't match RDKit mol, they have " f"different numbers of non-hydrogen atoms (RDKit: " f"{rdk_non_hydrogens} != Schrodinger: {schro_non_hydrogens})") raise InconsistentStructureError(msg) matches = {} for a in st.atom: try: idx = a.property[adapter.RDK_INDEX] except KeyError: continue rdkit_atom = mol.GetAtomWithIdx(idx) if rdkit_atom: matches[a] = rdkit_atom return matches
[docs]def copy_lewis_structure_and_hydrogens(st, mol, kekulize=True): """ Applies bond orders and charges from st to mol. Updates #implicitH to match Assumes st includes all hydrogens. Adds implicit and explicit hydrogens to the mol, but does not add any graph hydrogens. May remove graph hydrogens. """ editmol = Chem.RWMol(mol) matches = _find_matches(editmol, st) if not kekulize: aromatic_bonds = set() rings = (r for r in st.ring if r.isAromatic()) for r in rings: r = r.getAtomIndices() for a0, a1 in zip(r, r[1:]): aromatic_bonds.add(frozenset((a0, a1))) aromatic_bonds.add(frozenset((r[0], r[-1]))) # Adjust bond orders, retaining aromicity if requested for bond in st.bond: a1, a2 = bond.atom1, bond.atom2 rdk_a1 = matches.get(a1) rdk_a2 = matches.get(a2) if not rdk_a1 or not rdk_a2: continue rdbond = editmol.GetBondBetweenAtoms(rdk_a1.GetIdx(), rdk_a2.GetIdx()) if not kekulize and {a1.index, a2.index} in aromatic_bonds: new_type = Chem.BondType.AROMATIC rdk_a1.SetIsAromatic(True) rdk_a2.SetIsAromatic(True) else: new_type = BOND_TYPES_S2R[bond.type] if new_type != rdbond.GetBondType(): rdbond.SetBondType(new_type) # Adjust charges - assumes that the mmct charge is good. for mmct_atom, rdkit_atom in matches.items(): charge = mmct_atom.formal_charge if charge != rdkit_atom.GetFormalCharge(): rdkit_atom.SetFormalCharge(charge) Chem.SanitizeMol(editmol, sanitizeOps=Chem.SANITIZE_ADJUSTHS) graph_hydrogens_to_delete = [] for mmct_atom, rdkit_atom in matches.items(): mmct_hydrogens = sum( (1 for a in mmct_atom.bonded_atoms if a.atomic_number == 1)) mmct_hydrogens += mm.mmat_get_nhua(mmct_atom.atom_type) # hydrogen count is implicit + explicit (shown in smiles) + graph hydrogens rdk_hydrogens = rdkit_atom.GetTotalNumHs(includeNeighbors=True) # A hydrogen was added or removed on the Schrodinger side if rdk_hydrogens != mmct_hydrogens: new_explicit_h_count = rdkit_atom.GetNumExplicitHs( ) + mmct_hydrogens - rdk_hydrogens if new_explicit_h_count >= 0: rdkit_atom.SetNumExplicitHs(new_explicit_h_count) else: # Need to delete graph hydrogens hydrogens = [ a.GetIdx() for a in rdkit_atom.GetNeighbors() if a.GetAtomicNum() == 1 ] remove_count = rdk_hydrogens - mmct_hydrogens assert len(hydrogens) >= remove_count for h_index in hydrogens[:remove_count]: graph_hydrogens_to_delete.append(h_index) graph_hydrogens_to_delete.sort(reverse=True) for atom in graph_hydrogens_to_delete: editmol.RemoveAtom(atom) Chem.SanitizeMol(editmol, sanitizeOps=Chem.SANITIZE_ADJUSTHS) return editmol.GetMol()
[docs]@contextlib.contextmanager def suppress_rdkit_log(): """ Disable all RDKIT logging. """ RDLogger.DisableLog('rdApp.*') try: yield finally: RDLogger.EnableLog('rdApp.*')