Source code for schrodinger.livedesign.bbchem_endpoints
"""
Collection of functions intended as bbchem web endpoints.
Copyright Schrodinger, LLC. All rights reserved.
"""
from typing import Iterator
from typing import List
from typing import NamedTuple
from typing import Optional
from typing import Union
from rdkit import Chem
from rdkit.Chem import EnumerateStereoisomers
from rdkit.Chem import rdRGroupDecomposition
from rdkit.Chem.MolStandardize import rdMolStandardize
from schrodinger.livedesign import convert
from schrodinger.livedesign import molhash
from schrodinger.livedesign import preprocessor
DEFAULT_MAX_STEREOISOMERS = 512
[docs]class RegistrationData(NamedTuple):
sdf: Union[str, bytes]
hash_layers: molhash.Layers
molecular_weight: float
molecular_formula: str
has_attachment_point: bool
[docs]def registration_process(
data: Union[str, bytes],
options: Optional[preprocessor.PreprocessorOptions] = None
) -> Iterator[RegistrationData]:
"""
Runs through the registration pipeline for each compound provided in the
input data, which includes the preprocessor and canonicalization.
:param data: input text string to be deserialized into RDKit mols
:param options: preprocessor options
"""
# TODO: Find out what this information is used for in LiveDesign
def has_attachment_point(mol):
return any(
atom.HasProp(preprocessor.MOL_PROP_ATTACHPT)
for atom in mol.GetAtoms())
for mol in convert.get_sd_reader(data):
mol = preprocessor.preprocess(mol, options)
yield RegistrationData(
sdf=preprocessor.convert_to_molblock(mol, options),
hash_layers=molhash.get_mol_layers(mol),
molecular_weight=Chem.Descriptors.MolWt(mol),
molecular_formula=Chem.rdMolDescriptors.CalcMolFormula(mol),
has_attachment_point=has_attachment_point(mol))
[docs]def pop_properties(mol: Chem.rdchem.Mol) -> dict:
"""
:param mol: molecule to extract, then clear all properties from
:return: map of all removed properties as strings
"""
props = {k: str(v) for k, v in mol.GetPropsAsDict().items()}
preprocessor.remove_properties(mol)
return props
[docs]def set_properties(mol: Chem.rdchem.Mol, new_props: dict):
"""
:param mol: molecule to clear, then set given properties on
:param new_props: map of properties to add onto the molecule
"""
preprocessor.remove_properties(mol)
for key, value in new_props.items():
mol.SetProp(key, str(value))
[docs]def num_substructure_matches(match_mol: Chem.rdchem.Mol,
query_mol: Chem.rdchem.Mol) -> int:
"""
Returns the number of substructure matches between two molecules.
:param match_mol: molecule to find substructure matches in
:param query_mol: substructure molecule on which to find matches
:return: number of substructure matches found
"""
return len(match_mol.GetSubstructMatches(query_mol))
[docs]def enumerate_stereoisomers(mol: Chem.rdchem.Mol,
max_stereoisomers: int = DEFAULT_MAX_STEREOISOMERS
) -> Iterator[Chem.rdchem.Mol]:
"""
Generates stereoisomers from a specified SDF structure string.
:param structure: structure from which to generate stereoisomers
:param max_stereoisomers: maximum number of stereoisomers to generate
:return: generated stereoisomers
"""
options = EnumerateStereoisomers.StereoEnumerationOptions(
unique=True, maxIsomers=max_stereoisomers, rand=0xF00D)
return EnumerateStereoisomers.EnumerateStereoisomers(mol, options)
[docs]def rgroup_decompose(scaffold_mol: Chem.rdchem.Mol,
match_mols: List[Chem.rdchem.Mol],
stereospecific: bool = True) -> List[dict]:
"""
Returns R-groups of a scaffold given a list of match cores.
:param scaffold_mol: scaffold molecule on which to find R-groups
:param match_mol: source molecules for R-group decomposition
:param stereospecific: whether to consider bond stereochemistry and atom chirality of scaffold
:return: list of dicts of R-group matches
"""
scaffold_copy = Chem.Mol(scaffold_mol)
if not stereospecific:
# remove bond stereochemistry and chiral tags from scaffold_mol
Chem.RemoveStereochemistry(scaffold_copy)
decomp_params = rdRGroupDecomposition.RGroupDecompositionParameters()
decomp_params.onlyMatchAtRGroups = True
res, _ = rdRGroupDecomposition.RGroupDecompose(
[scaffold_copy], match_mols, options=decomp_params)
return res