Source code for schrodinger.livedesign.convert
import enum
from rdkit import Chem
from schrodinger.thirdparty import rdkit_adapter
[docs]def convert(data: str, input_format: Format, output_format: Format) -> str:
"""
:param data: input text string
:param input_format: expected format for input string
:param output_format: desired format for output string
:return: converted text string
"""
str_to_mol = {
Format.SDF: sdf_to_rdkit,
Format.SMILES: Chem.MolFromSmiles,
Format.CXSMILES: _cxsmiles_to_rdkit,
}
mol_to_str = {
Format.SDF: rdkit_to_sdf,
Format.SMILES: Chem.MolToSmiles,
Format.CXSMILES: _rdkit_to_cxsmiles,
}
with rdkit_adapter.convert_log_to_exception():
mol = str_to_mol[input_format](data)
return mol_to_str[output_format](mol)
[docs]def get_sd_reader(molblock: str) -> Chem.SDMolSupplier:
"""
:param molblock: given SDF molblock
:return: iterator through each mol in the given SDF data
"""
reader = Chem.SDMolSupplier()
reader.SetData(
molblock, sanitize=False, removeHs=False, strictParsing=False)
return reader
[docs]def sdf_to_rdkit(molblock: str) -> Chem.rdchem.Mol:
"""
:param molblock: given SDF molblock
:return: corresponding RDKit mol
"""
reader = get_sd_reader(molblock)
molblock_count = len(reader)
if molblock_count != 1:
raise RuntimeError(
f"Single molblock required; {molblock_count} present")
with rdkit_adapter.convert_log_to_exception():
return next(reader)
[docs]def rdkit_to_sdf(mol: Chem.rdchem.Mol) -> str:
"""
:param mol: given RDKit mol
:return: corresponding SDF molblock
"""
return Chem.SDWriter.GetText(mol, force_v3000=True)
def _cxsmiles_to_rdkit(cxsmiles_str):
molecule = Chem.RWMol(Chem.MolFromSmiles(cxsmiles_str, sanitize=False))
# kekulizing this now saves us a bunch of work when we remove atoms
Chem.Kekulize(molecule)
to_remove = []
for atom in list(molecule.GetAtoms()):
# remove dummy atoms marked as attachment points and flag their neighbor
# so that ATTCHPT markers end up in the output
if atom.GetAtomicNum():
continue
if atom.GetDegree() != 1:
continue
if not atom.HasProp("atomLabel"):
continue
if not atom.GetProp("atomLabel").startswith("_AP"):
continue
neighbor = atom.GetNeighbors()[0]
label = atom.GetProp("atomLabel")
label = int(label.replace("_AP", ""))
neighbor.SetIntProp("molAttchpt", label)
to_remove.append(atom.GetIdx())
for atom_idx in sorted(to_remove, reverse=True):
molecule.RemoveAtom(atom_idx)
# TODO(novak): remove this after RDKit fix https://crucible.bb.schrodinger.com/cru/SB-19069#c268131
if molecule.HasProp("_CXSMILES_Data"):
molecule.ClearProp("_CXSMILES_Data")
return molecule
def _rdkit_to_cxsmiles(molecule):
# remove all conformers so that the we don't get coordinates in CXSMILES
molecule.RemoveAllConformers()
nmol = Chem.RWMol(molecule)
for atom in list(nmol.GetAtoms()):
if atom.HasProp("molAttchpt"):
# Explicitly adding attachment points so they are not lost on conversion to CXSMILES
newIdx = nmol.AddAtom(Chem.Atom(0))
nmol.GetAtomWithIdx(newIdx).SetProp(
"atomLabel", f"_AP{atom.GetIntProp('molAttchpt')}")
atom.ClearProp("molAttchpt")
nmol.AddBond(atom.GetIdx(), newIdx, Chem.BondType.SINGLE)
return Chem.MolToCXSmiles(nmol)