"""
Collection of functions intended as bbchem web endpoints.
Copyright Schrodinger, LLC. All rights reserved.
"""
from typing import Iterator
from typing import List
from typing import NamedTuple
from typing import Optional
from typing import Union
from rdkit import Chem
from rdkit.Chem import EnumerateStereoisomers
from rdkit.Chem import rdChemReactions
from rdkit.Chem import rdmolops
from rdkit.Chem import rdRGroupDecomposition
from schrodinger.livedesign import convert
from schrodinger.livedesign import draw
from schrodinger.livedesign import molhash
from schrodinger.livedesign import preprocessor
from schrodinger.livedesign import substructure
from schrodinger.livedesign.molhash import ATOM_PROP_MAP_NUMBER
from schrodinger.livedesign.preprocessor import ATOM_PROP_DUMMY_LABEL
from schrodinger.livedesign.preprocessor import MOL_PROP_R_LABEL
DEFAULT_MAX_STEREOISOMERS = 512
[docs]class RegistrationData(NamedTuple):
sdf: Union[str, bytes]
requested_hash: str
no_stereo_hash: str
display_smiles: str
molecular_weight: float
molecular_formula: str
has_attachment_point: bool
[docs]def registration_process(
data: Union[str, bytes],
options: Optional[preprocessor.PreprocessorOptions] = None,
hash_scheme: molhash.HashScheme = molhash.HashScheme.ALL_LAYERS
) -> Iterator[RegistrationData]:
"""
Runs through the registration pipeline for each compound provided in the
input data, which includes the preprocessor and canonicalization.
:param data: input text string to be deserialized into RDKit mols
:param options: preprocessor options
"""
def has_attachment_point(mol):
return any(atom.GetSymbol() == "*" for atom in mol.GetAtoms())
for mol in convert.get_sd_reader(data):
mol = preprocessor.preprocess(mol, options)
hash_layers = molhash.get_mol_layers(mol)
yield RegistrationData(
sdf=preprocessor.convert_to_molblock(mol, options),
requested_hash=molhash.get_molhash(hash_layers, hash_scheme),
no_stereo_hash=molhash.get_molhash(
hash_layers, molhash.HashScheme.NO_STEREO_LAYERS),
display_smiles=hash_layers[molhash.HashLayer.CANONICAL_SMILES],
molecular_weight=Chem.Descriptors.MolWt(mol),
molecular_formula=Chem.rdMolDescriptors.CalcMolFormula(mol),
has_attachment_point=has_attachment_point(mol))
[docs]def generate_image(
mol: Chem.rdchem.Mol,
alignment_mol: Optional[Chem.rdchem.Mol] = None,
substructure_options: Optional[substructure.QueryOptions] = None,
highlight_mol: Optional[Chem.rdchem.Mol] = None,
draw_options: Optional[draw.ImageGenOptions] = None
) -> Union[str, bytes]:
"""
Generates an image used in LiveDesign which may have a request for compound
alignment, or substructure highlighting, or both.
:param mol: compound to generate an image of
:param alignment_mol: molecule to align to prior to image generation
:param substructure_options: substructure matching options
:param highlight_mol: core to highlight in generated image
:param draw_options: image generation options
:return: generated image as a string
"""
if alignment_mol:
substructure.apply_substructure_coordinates(mol, alignment_mol,
substructure_options)
if highlight_mol:
draw_options = draw.set_highlight(mol, highlight_mol,
substructure_options, draw_options)
return draw.draw_image(mol, draw_options)
[docs]def generate_sar_analysis_image(
mol: Chem.rdchem.Mol,
core_mol: Chem.rdchem.Mol,
rgroup_mols: List[Chem.rdchem.Mol],
options: Optional[draw.ImageGenOptions] = None) -> Union[str, bytes]:
"""
Generates an image used in LiveDesign that is specifically from SAR analysis
output, highlighting the core and all r-groups from the decomposition.
:param mol: compound to generate an image of
:param core_mol: core to highlight in generated image
:param rgroup_mols: rgroups to highlight in generated image
:param options: image generation options
:return: generated image as a string
"""
options = draw.set_rgroup_highlight(mol, core_mol, rgroup_mols, options)
return draw.draw_image(mol, options)
[docs]def pop_properties(mol: Chem.rdchem.Mol) -> dict:
"""
:param mol: molecule to extract, then clear all properties from
:return: map of all removed properties as strings
"""
props = {k: str(v) for k, v in mol.GetPropsAsDict().items()}
preprocessor.remove_properties(mol)
return props
[docs]def set_properties(mol: Chem.rdchem.Mol, new_props: dict):
"""
:param mol: molecule to clear, then set given properties on
:param new_props: map of properties to add onto the molecule
"""
preprocessor.remove_properties(mol)
for key, value in new_props.items():
mol.SetProp(key, str(value))
[docs]def num_substructure_matches(
match_mol: Chem.rdchem.Mol,
query_mol: Chem.rdchem.Mol,
options: Optional[substructure.QueryOptions] = None) -> int:
"""
Returns the number of substructure matches between two molecules.
:param match_mol: molecule to find substructure matches in
:param query_mol: substructure molecule on which to find matches
:param options: substructure query options
:return: number of substructure matches found
"""
return sum(1 for match in substructure.substructure_matches(
match_mol, query_mol, options))
[docs]def enumerate_stereoisomers(
mol: Chem.rdchem.Mol,
max_stereoisomers: int = DEFAULT_MAX_STEREOISOMERS
) -> Iterator[Chem.rdchem.Mol]:
"""
Generates stereoisomers from a specified SDF structure string.
:param structure: structure from which to generate stereoisomers
:param max_stereoisomers: maximum number of stereoisomers to generate
:return: generated stereoisomers
"""
options = EnumerateStereoisomers.StereoEnumerationOptions(
unique=True, maxIsomers=max_stereoisomers, rand=0xF00D)
return EnumerateStereoisomers.EnumerateStereoisomers(mol, options)
def _replace_rgroups_with_dummy_atoms(mol: Chem.rdchem.Mol) -> Chem.rdchem.Mol:
updated_rgroup = Chem.Mol(mol)
for at in updated_rgroup.GetAtoms():
if at.HasProp(ATOM_PROP_MAP_NUMBER) and at.HasProp(
ATOM_PROP_DUMMY_LABEL) and at.HasProp(MOL_PROP_R_LABEL):
at.ClearProp(ATOM_PROP_MAP_NUMBER)
at.ClearProp(ATOM_PROP_DUMMY_LABEL)
at.SetAtomicNum(0)
# dummy atoms should be represented as * in sdfs
params = rdmolops.AdjustQueryParameters.NoAdjustments()
params.makeDummiesQueries = True
return rdmolops.AdjustQueryProperties(updated_rgroup, params)
[docs]def rgroup_decompose(scaffold_mol: Chem.rdchem.Mol,
match_mol: Chem.rdchem.Mol,
stereospecific: bool = True) -> List[dict]:
"""
Returns R-groups of a scaffold given a list of match cores.
:param scaffold_mol: scaffold molecule on which to find R-groups
:param match_mol: source molecules for R-group decomposition
:param stereospecific: whether to consider bond stereochemistry and atom
chirality of scaffold
:return: list of dicts of R-group matches
"""
scaffold_copy = Chem.Mol(scaffold_mol)
if not stereospecific:
# remove bond stereochemistry and chiral tags from scaffold_mol
Chem.RemoveStereochemistry(scaffold_copy)
decomp_params = rdRGroupDecomposition.RGroupDecompositionParameters()
decomp_params.onlyMatchAtRGroups = True
decomp_params.substructMatchParams.useEnhancedStereo = True
decomp_params.removeAllHydrogenRGroups = False
decomp_params.removeAllHydrogenRGroupsAndLabels = False
matches, no_match = rdRGroupDecomposition.RGroupDecompose(
[scaffold_copy], [match_mol], options=decomp_params)
if no_match:
return None
# Otherwise return the single result
match = matches.pop(0)
# RDKit's RGroup decomposition returns rgroup mols that use rgroups
# (instead of dummy atoms or attachment points) to represent where the
# rgroup attaches to the scaffold. We want to replace these rgroups with
# dummy atoms to be consistent with rgroups ran through the preprocessor
# and allow rgroups to render correctly.
for rlabel, mol in match.items():
if rlabel.startswith("R"):
match[rlabel] = _replace_rgroups_with_dummy_atoms(mol)
return match
[docs]def get_rgroup_labels(scaffold_mol: Chem.rdchem.Mol) -> List[str]:
"""
:param scaffold_mol: scaffold molecule
:return: R-group labels present on the scaffold
"""
rlabels = set()
for atm in scaffold_mol.GetAtoms():
if atm.GetAtomicNum() == 0 and atm.HasProp(MOL_PROP_R_LABEL):
rlabel = atm.GetIntProp(MOL_PROP_R_LABEL)
while rlabel in rlabels:
rlabel += 1
rlabels.add(rlabel)
return sorted([f"R{x}" for x in rlabels])
[docs]def setup_reaction(rxn_input: str) -> str:
"""
Tidy up and convert user sketched reactions into a format that can be used
for reaction enumeration.
:param rxn_input: a SMARTS string describing the user's reaction.
:return: a SMARTS string describing the cleaned up reaction
"""
# The setup_reaction module has some heavy dependencies which we
# prefer to avoid importing unless required.
from schrodinger.livedesign import setup_reaction
try:
rxn = rdChemReactions.ReactionFromSmarts(rxn_input)
except ValueError: # FIXME: LiveDesign current uses MDL RXN input
rxn = rdChemReactions.ReactionFromRxnBlock(rxn_input)
cleaned_rxn = setup_reaction.setup_reaction(rxn)
return rdChemReactions.ReactionToSmarts(cleaned_rxn)