"""Script to setup cosolvent system"""
import os
import tempfile
from pathlib import Path
from typing import List
from typing import Optional
from typing import Tuple
import numpy as np
import schrodinger.application.desmond.system_builder_inp as sb_inp
from schrodinger import structure
from schrodinger.application.desmond import constants
from schrodinger.application.desmond import envir
from schrodinger.application.desmond import struc
from schrodinger.application.desmond.cms import get_box
from schrodinger.job import jobcontrol
from schrodinger.structure import Structure
from schrodinger.structutils.analyze import evaluate_asl
from schrodinger.utils import fileutils
from schrodinger.utils import log
from schrodinger.infra.util import cached_property
logger = log.get_output_logger(name="mixed_solvent")
WATER_MOLECULAR_MASS = 18.01528
BIG_BOX_BUFFER = 15.0
SHRINK_BOX_BY = 0.01
BOX_DATA = Path(envir.CONST.MMSHARE_SYSTEM_BUILDER_DATA_DIR) / 'mxmd'
DEFAULT_PROBES_STR = "acetonitrile,isopropanol,pyrimidine"
SYS_BUILDER = os.path.join(envir.CONST.SCHRODINGER_UTILITIES, "system_builder")
BUILTIN_PROBE_NAMES = [
box_fname.replace('.box.mae', '') for box_fname in os.listdir(BOX_DATA)
]
[docs]class Cosolvent:
[docs] def __init__(self, filename: str):
"""
`filename` should follow the pattern `<probe_name>.box.mae`
"""
self._filename = filename
self.name = Path(filename).name[:-8] # strip off .box.mae
@cached_property
def box_strucs(self) -> List[Structure]:
return list(structure.StructureReader(self._filename))
@property
def _ct0(self):
return self.box_strucs[0]
@cached_property
def probe_pdbres(self):
res = next(iter(self._ct0.residue))
return res.pdbres
@cached_property
def density(self) -> float:
"""
Return average density of the box in g/cm^3
"""
# FIXME: AU_TO_KG is a misleading name
# but the return value has the correct units of g/cm^3.
mass_kg = self._ct0.total_weight * constants.Conversion.AU_TO_KG
all_boxes = [
np.array(get_box(ct)).reshape((3, 3)) for ct in self.box_strucs
]
vol = np.array([np.linalg.det(box) for box in all_boxes])
return np.mean(mass_kg / vol)
@cached_property
def probe_nheavy(self) -> int:
"""
Return number of heavy atoms per probe molecule
"""
return len(
[a for a in self._ct0.molecule[1].atom if a.atomic_number > 1])
@cached_property
def probe_mass(self) -> float:
"""
Return molecular weight of the probe
"""
mol = self._ct0.molecule[1]
return sum([a.atomic_weight for a in mol.atom])
@property
def filename(self):
return self._filename
[docs]def get_probe_paths(probe_names: List[str],
custom_probe_dir: Optional[str]=None) \
-> Tuple[List[str], List[str], List[str]]:
"""
Process the given probe names and return paths to their .box.mae file.
Additionally return any missing probes.
:return: A 3-tuple of two lists containing probe paths for custom probe
and builtin probes and a list containing probe names with missing files.
"""
custom = []
builtin = []
missing_probes = []
for probe_name in probe_names:
if custom_probe_dir:
probe_path = _get_probe_path(probe_name, custom_probe_dir)
if Path(probe_path).exists():
custom.append(probe_path)
continue
builtin_probe_name = _get_probe_path(probe_name)
if Path(builtin_probe_name).exists():
builtin.append(builtin_probe_name)
continue
missing_probes.append(probe_name)
return custom, builtin, missing_probes
def _get_probe_path(probe_name: str, probe_dir: str = BOX_DATA):
"""
Get a path to the given probe's solvent box structure file
"""
return os.path.join(probe_dir, f"{probe_name}.box.mae")
[docs]def convert_vv2mol(probe: Cosolvent, target_vv_ratio: float = 5.0) -> int:
'''
This function reports the number of water molecules required to maintain
the required volume/volume ratio for each molecules of the probe
Assumes that density is provided in gm/cm3 molecular mass also should be
specified in gm/mol
:param probe_name: Name of the cosolvent used to build mixed-solvent box
:param target_vv_ratio: Volume over volume ratio in percent
'''
return int((100.0 - target_vv_ratio) / WATER_MOLECULAR_MASS) / ((
target_vv_ratio * probe.density) / probe.probe_mass)
[docs]class CosolventBoxGenerator:
"""
Example:
gen = CosolventBoxGenerator(
inp_fname,
'probe_directory/acetonitrile.box.mae',
1,
cosolvent_layer_size=5.0,
cosolvent_volume_ratio=2.0)
gen.generate(out_fname)
"""
[docs] def __init__(self,
solute_fname: str,
probe_path: str,
box_number: int,
init_water_buffer: float = BIG_BOX_BUFFER,
cosolvent_layer_size: float = 7.0,
cosolvent_volume_ratio: float = 5.0,
cosolvent_vdw_scaling: Optional[float] = None,
water='SPC'):
self._solute_fname = Path(solute_fname).absolute()
self._box_number = box_number
self._init_water_buffer = init_water_buffer
self._water_model = water
self._vdw_scaling = cosolvent_vdw_scaling
self._layer_size = cosolvent_layer_size
self._volume_ratio = cosolvent_volume_ratio
self._probe = Cosolvent(probe_path)
self._probe_name = self._probe.name
[docs] def generate(self, fname: str):
mx_strucs = self._generate_cosolvent_box()
if not mx_strucs:
return
self._shrink_water_box(mx_strucs, self._volume_ratio)
with structure.StructureWriter(fname) as ct_writer:
for ct in mx_strucs:
ct_writer.append(ct)
def _generate_cosolvent_box(self) -> List[Structure]:
'''
Creates solvated mixed solvent system in three steps:
1) Solvate the protein with cosolvent, and extract a layer of cosolvents
around the protein
2) Solvate the protein-cosolvent system with specified water. Intentionally
use a larger box so that it can laber be shrunk to match the targeted
volume over volume (v/v) ratio.
3) Extract cosolvent from solute CT into a separate 'cosolvent' CT.
'''
with fileutils.in_temporary_directory():
logger.info(f'Temp dir set to: {os.getcwd()}')
layered_box_fname = self._make_layered_box()
layered_shell_fname = self._make_layered_shell(layered_box_fname)
mx_strucs = solvate_cosolvent(layered_shell_fname,
self._water_model.lower(),
self._init_water_buffer)
self._split_cosolvent_from_solute(mx_strucs)
return mx_strucs
def _make_layered_box(self) -> Path:
layered_box_fname = Path('layered_box.mae')
csb_fn = 'create_cosolvent_box.csb'
cosolvent_struc = self._probe.box_strucs[self._box_number]
cosolvent_box_fname = 'cosolvent_box.mae'
cosolvent_struc.write(cosolvent_box_fname)
# backup reference coordinates for solute
solute_ct = Structure.read(self._solute_fname)
struc.set_ct_reference_coordinates(solute_ct)
out_solute_fname = 'out_solute.mae'
solute_ct.write(out_solute_fname)
system = sb_inp.SystemBuilderInput()
system.setSolute(out_solute_fname)
# move non zero-ordered water molecules from solute to solvent CT
system.treatSolventFromSolute()
cosolvent_vdw_scaling = (
self._vdw_scaling or
1.0 - (self._probe.probe_nheavy * 0.07)) # yapf: disabled
system.setVdwScalingFactor(cosolvent_vdw_scaling)
system.setSolvent(cosolvent_box_fname)
system.setNeutralize(0)
system.setSoluteAlignment('rezero')
system.setWriteMaeFile(str(layered_box_fname))
system.setBoundaryCondition(
use_buffer=1, boundary_condition='cubic', a=self._layer_size)
system.write(csb_fn)
cmd = [SYS_BUILDER, csb_fn]
logger.info(
f'Creating a box of {self._probe_name} molecules around the '
f'protein, with vdw scaling: {cosolvent_vdw_scaling:.3f}')
job = jobcontrol.launch_job(cmd, print_output=True, launch_dir='.')
job.wait()
return layered_box_fname
def _make_layered_shell(self, layered_box_fname: Path) -> Path:
full_system_ct = structure.Structure.read(layered_box_fname, index=1)
layered_shell_fn = Path('layered_shell_raw.mae')
logger.info(
f"Converting the cosolvent box "
f"({self._probe.filename}, "
f"index={self._box_number}) to a layer of cosolvent molecules around "
f"the protein.")
# Keep cosolvents within a radius and delete xtal waters
full_system_ct.deleteAtoms(
evaluate_asl(
full_system_ct,
f'(fillmol beyond {self._layer_size} (protein or nucleic_acids)) '
'or atom.i_desmond_crystal_water 1'))
full_system_ct.title = structure.Structure.read(
self._solute_fname).title
full_system_ct.write(layered_shell_fn)
logger.info(f'Written {layered_shell_fn}')
return layered_shell_fn
def _split_cosolvent_from_solute(self, mx_strucs: List[Structure]) -> None:
'''
Split cosolvent molecules from solute CT into a seaparate CT. This CT is
then inserted before solvent CT. Input `mx_strucs` will be mutated.
:param mx_strucs: List of structures that assumes the same order as in Cms
file structure (fullsystem, solute, solvent CTs)
:param probe_name: name of the cosolvent used to build mixed-solvent box
'''
probe_name = self._probe_name
SOLUTE_CT_IDX = 1
cosol_atom_indices = evaluate_asl(
mx_strucs[1], f'res.ptype "{self._probe.probe_pdbres}"')
if not cosol_atom_indices:
mx_strucs.clear()
return
cosolvent_ct = mx_strucs[1].extract(cosol_atom_indices)
mx_strucs[SOLUTE_CT_IDX].deleteAtoms(cosol_atom_indices)
cosolvent_ct.property[
constants.CT_TYPE] = constants.CT_TYPE.VAL.COSOLVENT
cosolvent_ct.property[constants.NUM_COMPONENT] = 1
cosolvent_ct.title = f'Cosolvent - {probe_name}'
cosolvent_ct.property[constants.MXMD_COSOLVENT_PROBE] = probe_name
for prop in constants.SIM_BOX:
cosolvent_ct.property[prop] = mx_strucs[SOLUTE_CT_IDX].property[
prop]
for ires, res in enumerate(cosolvent_ct.residue, start=1):
res.resnum = ires
# Insert cosolvent CT to be before the solvent CT
mx_strucs.insert(-1, cosolvent_ct)
def _shrink_water_box(self, mx_strucs: List[Structure],
target_vv_ratio: float) -> None:
"""
Shrink the water box to match volume-over-volume ratio
:param mx_strucs: List of structures that assumes the same order as in Cms
file structure (fullsystem, solute, solvent CTs)
:param target_vv_ratio: Volume over volume ratio in percent
"""
cosol_ct_idx, wat_ct_idx = -2, -1
num_cosolvents = mx_strucs[cosol_ct_idx].mol_total
nwat = mx_strucs[wat_ct_idx].mol_total
required_nwat = int(
num_cosolvents * convert_vv2mol(self._probe, target_vv_ratio))
logger.info(
'Shrinking the water box to match volume over volume ratio of '
f'{target_vv_ratio}%. Current box size is {self._init_water_buffer}'
)
shrink_cosolvent_box(mx_strucs, required_nwat)
logger.info(
'Done generating mixed-solvent box with targeted volume/volume ratio.'
)
[docs]def solvate_cosolvent(inp_mae: Path,
water_model: str = 'spc',
init_water_buffer=BIG_BOX_BUFFER
) -> List[structure.Structure]:
"""
Solvate the cosolvent ct with water
:param inp_mae: The path to the input mae. This is a system containing the
cosolvent.
:param job_dir: A path to the directory to run the job in
:param water_model: The water type to use as the solvent
:param init_water_buffer: The initial size of water box
:return: Three structures from system_builder output: the fullsystem,
solute, and solvent
"""
# Solvate Protein-cosolvent CT with water
solvated_fn = 'solvated.mae'
csb_fn = 'create_water_box.csb'
inp = sb_inp.SystemBuilderInput()
inp.setSolute(str(inp_mae))
inp.setSolvent(f"{water_model}.box.mae")
inp.setNeutralize(0)
inp.setWriteMaeFile(solvated_fn)
inp.setBoundaryCondition(
use_buffer=1, boundary_condition='cubic', a=init_water_buffer)
inp.write(csb_fn)
cmd = [SYS_BUILDER, csb_fn]
logger.info("Creating a solvated mixed-solvent system")
job = jobcontrol.launch_job(cmd, print_output=True)
job.wait()
logger.info(f"Mixed-solvent ({solvated_fn}) system created.")
return list(structure.StructureReader(solvated_fn))
[docs]def shrink_cosolvent_box(cts: List[structure.Structure], required_nwat: int):
"""
Shrink the system box so that it contains a given number of water molecules.
:param cts: The input structures in the order of:
[full system, non-solvent ct..., cosolvent ct, water ct]. These cts
are modified in place.
:param required_nwat: How many waters should remain after shrinking system
"""
fsys_ct, *non_water_cts, water_ct = cts
# delete excess waters, to match 50% weight/weight ratio
nwat = water_ct.mol_total
logger.info(f'Required number of water molecules is {required_nwat} for '
f'{non_water_cts[-1].mol_total} cosolvent molecules. '
f'Current system contains {nwat} waters.')
if required_nwat > nwat:
raise ValueError("Number of water molecules smaller than required. "
"Cannot shrink system")
half_box = water_ct.property[constants.SIM_BOX[0]] / 2.0
noh_wat_aid = evaluate_asl(water_ct, "water and not a.e H")
noh_wat_idx = [i - 1 for i in noh_wat_aid]
xyz = np.abs(water_ct.getXYZ()[noh_wat_idx])
while nwat > required_nwat:
nwat = len(xyz[np.max(xyz, axis=1) <= half_box])
half_box -= SHRINK_BOX_BY
# add half of SHRINK_BOX_BY for a guess closer to the target
half_box += SHRINK_BOX_BY / 2.0
logger.info('Cubic box length will be shrunk from '
f'{water_ct.property[constants.SIM_BOX[0]]:.3f} to '
f'{(half_box * 2):.3f} Angstroms, with {nwat} water molecules.')
# remove water molecues from solvent and full_system CTs
wat_atom_idx_to_delete = []
for mol in water_ct.molecule:
if np.max(np.abs(mol.atom[1].xyz)) > half_box:
wat_atom_idx_to_delete += mol.getAtomIndices()
water_ct.deleteAtoms(wat_atom_idx_to_delete)
# offset atom indices number of atoms prior to solvent block
offset = sum([ct.atom_total for ct in non_water_cts])
fsys_ct.deleteAtoms([idx + offset for idx in wat_atom_idx_to_delete])
# update box values for ax, by, cz, of a cubic box
for ct in cts:
for prop in constants.SIM_BOX_DIAGONAL:
ct.property[prop] = half_box * 2
if __name__ == '__main__': # pragma: no cover
# For quick test.
import sys
in_fname, out_fname, cosolvent_layer_size = sys.argv[1:4]
cosolvent_layer_size = float(cosolvent_layer_size)
gen = CosolventBoxGenerator(
in_fname,
'acetonitrile',
1,
cosolvent_layer_size=cosolvent_layer_size,
cosolvent_vdw_scaling=1.0)
gen.generate(out_fname)