Source code for schrodinger.application.desmond.mxmd.mxmd_system_builder

"""Script to setup cosolvent system"""

import os
import tempfile
from pathlib import Path
from typing import List
from typing import Optional
from typing import Tuple

import numpy as np

import schrodinger.application.desmond.system_builder_inp as sb_inp
from schrodinger import structure
from schrodinger.application.desmond import constants
from schrodinger.application.desmond import envir
from schrodinger.application.desmond import struc
from schrodinger.application.desmond.cms import get_box
from schrodinger.job import jobcontrol
from schrodinger.structure import Structure
from schrodinger.structutils.analyze import evaluate_asl
from schrodinger.utils import fileutils
from schrodinger.utils import log
from schrodinger.infra.util import cached_property

logger = log.get_output_logger(name="mixed_solvent")

WATER_MOLECULAR_MASS = 18.01528
BIG_BOX_BUFFER = 15.0
SHRINK_BOX_BY = 0.01
BOX_DATA = Path(envir.CONST.MMSHARE_SYSTEM_BUILDER_DATA_DIR) / 'mxmd'
DEFAULT_PROBES_STR = "acetonitrile,isopropanol,pyrimidine"
SYS_BUILDER = os.path.join(envir.CONST.SCHRODINGER_UTILITIES, "system_builder")
BUILTIN_PROBE_NAMES = [
    box_fname.replace('.box.mae', '') for box_fname in os.listdir(BOX_DATA)
]


[docs]class Cosolvent:
[docs] def __init__(self, filename: str): """ `filename` should follow the pattern `<probe_name>.box.mae` """ self._filename = filename self.name = Path(filename).name[:-8] # strip off .box.mae
@cached_property def box_strucs(self) -> List[Structure]: return list(structure.StructureReader(self._filename)) @property def _ct0(self): return self.box_strucs[0] @cached_property def probe_pdbres(self): res = next(iter(self._ct0.residue)) return res.pdbres @cached_property def density(self) -> float: """ Return average density of the box in g/cm^3 """ # FIXME: AU_TO_KG is a misleading name # but the return value has the correct units of g/cm^3. mass_kg = self._ct0.total_weight * constants.Conversion.AU_TO_KG all_boxes = [ np.array(get_box(ct)).reshape((3, 3)) for ct in self.box_strucs ] vol = np.array([np.linalg.det(box) for box in all_boxes]) return np.mean(mass_kg / vol) @cached_property def probe_nheavy(self) -> int: """ Return number of heavy atoms per probe molecule """ return len( [a for a in self._ct0.molecule[1].atom if a.atomic_number > 1]) @cached_property def probe_mass(self) -> float: """ Return molecular weight of the probe """ mol = self._ct0.molecule[1] return sum([a.atomic_weight for a in mol.atom]) @property def filename(self): return self._filename
[docs]def get_probe_paths(probe_names: List[str], custom_probe_dir: Optional[str]=None) \ -> Tuple[List[str], List[str], List[str]]: """ Process the given probe names and return paths to their .box.mae file. Additionally return any missing probes. :return: A 3-tuple of two lists containing probe paths for custom probe and builtin probes and a list containing probe names with missing files. """ custom = [] builtin = [] missing_probes = [] for probe_name in probe_names: if custom_probe_dir: probe_path = _get_probe_path(probe_name, custom_probe_dir) if Path(probe_path).exists(): custom.append(probe_path) continue builtin_probe_name = _get_probe_path(probe_name) if Path(builtin_probe_name).exists(): builtin.append(builtin_probe_name) continue missing_probes.append(probe_name) return custom, builtin, missing_probes
def _get_probe_path(probe_name: str, probe_dir: str = BOX_DATA): """ Get a path to the given probe's solvent box structure file """ return os.path.join(probe_dir, f"{probe_name}.box.mae")
[docs]def convert_vv2mol(probe: Cosolvent, target_vv_ratio: float = 5.0) -> int: ''' This function reports the number of water molecules required to maintain the required volume/volume ratio for each molecules of the probe Assumes that density is provided in gm/cm3 molecular mass also should be specified in gm/mol :param probe_name: Name of the cosolvent used to build mixed-solvent box :param target_vv_ratio: Volume over volume ratio in percent ''' return int((100.0 - target_vv_ratio) / WATER_MOLECULAR_MASS) / (( target_vv_ratio * probe.density) / probe.probe_mass)
[docs]class CosolventBoxGenerator: """ Example: gen = CosolventBoxGenerator( inp_fname, 'probe_directory/acetonitrile.box.mae', 1, cosolvent_layer_size=5.0, cosolvent_volume_ratio=2.0) gen.generate(out_fname) """
[docs] def __init__(self, solute_fname: str, probe_path: str, box_number: int, init_water_buffer: float = BIG_BOX_BUFFER, cosolvent_layer_size: float = 7.0, cosolvent_volume_ratio: float = 5.0, cosolvent_vdw_scaling: Optional[float] = None, water='SPC'): self._solute_fname = Path(solute_fname).absolute() self._box_number = box_number self._init_water_buffer = init_water_buffer self._water_model = water self._vdw_scaling = cosolvent_vdw_scaling self._layer_size = cosolvent_layer_size self._volume_ratio = cosolvent_volume_ratio self._probe = Cosolvent(probe_path) self._probe_name = self._probe.name
[docs] def generate(self, fname: str): mx_strucs = self._generate_cosolvent_box() if not mx_strucs: return self._shrink_water_box(mx_strucs, self._volume_ratio) with structure.StructureWriter(fname) as ct_writer: for ct in mx_strucs: ct_writer.append(ct)
def _generate_cosolvent_box(self) -> List[Structure]: ''' Creates solvated mixed solvent system in three steps: 1) Solvate the protein with cosolvent, and extract a layer of cosolvents around the protein 2) Solvate the protein-cosolvent system with specified water. Intentionally use a larger box so that it can laber be shrunk to match the targeted volume over volume (v/v) ratio. 3) Extract cosolvent from solute CT into a separate 'cosolvent' CT. ''' with fileutils.in_temporary_directory(): logger.info(f'Temp dir set to: {os.getcwd()}') layered_box_fname = self._make_layered_box() layered_shell_fname = self._make_layered_shell(layered_box_fname) mx_strucs = solvate_cosolvent(layered_shell_fname, self._water_model.lower(), self._init_water_buffer) self._split_cosolvent_from_solute(mx_strucs) return mx_strucs def _make_layered_box(self) -> Path: layered_box_fname = Path('layered_box.mae') csb_fn = 'create_cosolvent_box.csb' cosolvent_struc = self._probe.box_strucs[self._box_number] cosolvent_box_fname = 'cosolvent_box.mae' cosolvent_struc.write(cosolvent_box_fname) # backup reference coordinates for solute solute_ct = Structure.read(self._solute_fname) struc.set_ct_reference_coordinates(solute_ct) out_solute_fname = 'out_solute.mae' solute_ct.write(out_solute_fname) system = sb_inp.SystemBuilderInput() system.setSolute(out_solute_fname) # move non zero-ordered water molecules from solute to solvent CT system.treatSolventFromSolute() cosolvent_vdw_scaling = ( self._vdw_scaling or 1.0 - (self._probe.probe_nheavy * 0.07)) # yapf: disabled system.setVdwScalingFactor(cosolvent_vdw_scaling) system.setSolvent(cosolvent_box_fname) system.setNeutralize(0) system.setSoluteAlignment('rezero') system.setWriteMaeFile(str(layered_box_fname)) system.setBoundaryCondition( use_buffer=1, boundary_condition='cubic', a=self._layer_size) system.write(csb_fn) cmd = [SYS_BUILDER, csb_fn] logger.info( f'Creating a box of {self._probe_name} molecules around the ' f'protein, with vdw scaling: {cosolvent_vdw_scaling:.3f}') job = jobcontrol.launch_job(cmd, print_output=True, launch_dir='.') job.wait() return layered_box_fname def _make_layered_shell(self, layered_box_fname: Path) -> Path: full_system_ct = structure.Structure.read(layered_box_fname, index=1) layered_shell_fn = Path('layered_shell_raw.mae') logger.info( f"Converting the cosolvent box " f"({self._probe.filename}, " f"index={self._box_number}) to a layer of cosolvent molecules around " f"the protein.") # Keep cosolvents within a radius and delete xtal waters full_system_ct.deleteAtoms( evaluate_asl( full_system_ct, f'(fillmol beyond {self._layer_size} (protein or nucleic_acids)) ' 'or atom.i_desmond_crystal_water 1')) full_system_ct.title = structure.Structure.read( self._solute_fname).title full_system_ct.write(layered_shell_fn) logger.info(f'Written {layered_shell_fn}') return layered_shell_fn def _split_cosolvent_from_solute(self, mx_strucs: List[Structure]) -> None: ''' Split cosolvent molecules from solute CT into a seaparate CT. This CT is then inserted before solvent CT. Input `mx_strucs` will be mutated. :param mx_strucs: List of structures that assumes the same order as in Cms file structure (fullsystem, solute, solvent CTs) :param probe_name: name of the cosolvent used to build mixed-solvent box ''' probe_name = self._probe_name SOLUTE_CT_IDX = 1 cosol_atom_indices = evaluate_asl( mx_strucs[1], f'res.ptype "{self._probe.probe_pdbres}"') if not cosol_atom_indices: mx_strucs.clear() return cosolvent_ct = mx_strucs[1].extract(cosol_atom_indices) mx_strucs[SOLUTE_CT_IDX].deleteAtoms(cosol_atom_indices) cosolvent_ct.property[ constants.CT_TYPE] = constants.CT_TYPE.VAL.COSOLVENT cosolvent_ct.property[constants.NUM_COMPONENT] = 1 cosolvent_ct.title = f'Cosolvent - {probe_name}' cosolvent_ct.property[constants.MXMD_COSOLVENT_PROBE] = probe_name for prop in constants.SIM_BOX: cosolvent_ct.property[prop] = mx_strucs[SOLUTE_CT_IDX].property[ prop] for ires, res in enumerate(cosolvent_ct.residue, start=1): res.resnum = ires # Insert cosolvent CT to be before the solvent CT mx_strucs.insert(-1, cosolvent_ct) def _shrink_water_box(self, mx_strucs: List[Structure], target_vv_ratio: float) -> None: """ Shrink the water box to match volume-over-volume ratio :param mx_strucs: List of structures that assumes the same order as in Cms file structure (fullsystem, solute, solvent CTs) :param target_vv_ratio: Volume over volume ratio in percent """ cosol_ct_idx, wat_ct_idx = -2, -1 num_cosolvents = mx_strucs[cosol_ct_idx].mol_total nwat = mx_strucs[wat_ct_idx].mol_total required_nwat = int( num_cosolvents * convert_vv2mol(self._probe, target_vv_ratio)) logger.info( 'Shrinking the water box to match volume over volume ratio of ' f'{target_vv_ratio}%. Current box size is {self._init_water_buffer}' ) shrink_cosolvent_box(mx_strucs, required_nwat) logger.info( 'Done generating mixed-solvent box with targeted volume/volume ratio.' )
[docs]def solvate_cosolvent(inp_mae: Path, water_model: str = 'spc', init_water_buffer=BIG_BOX_BUFFER ) -> List[structure.Structure]: """ Solvate the cosolvent ct with water :param inp_mae: The path to the input mae. This is a system containing the cosolvent. :param job_dir: A path to the directory to run the job in :param water_model: The water type to use as the solvent :param init_water_buffer: The initial size of water box :return: Three structures from system_builder output: the fullsystem, solute, and solvent """ # Solvate Protein-cosolvent CT with water solvated_fn = 'solvated.mae' csb_fn = 'create_water_box.csb' inp = sb_inp.SystemBuilderInput() inp.setSolute(str(inp_mae)) inp.setSolvent(f"{water_model}.box.mae") inp.setNeutralize(0) inp.setWriteMaeFile(solvated_fn) inp.setBoundaryCondition( use_buffer=1, boundary_condition='cubic', a=init_water_buffer) inp.write(csb_fn) cmd = [SYS_BUILDER, csb_fn] logger.info("Creating a solvated mixed-solvent system") job = jobcontrol.launch_job(cmd, print_output=True) job.wait() logger.info(f"Mixed-solvent ({solvated_fn}) system created.") return list(structure.StructureReader(solvated_fn))
[docs]def shrink_cosolvent_box(cts: List[structure.Structure], required_nwat: int): """ Shrink the system box so that it contains a given number of water molecules. :param cts: The input structures in the order of: [full system, non-solvent ct..., cosolvent ct, water ct]. These cts are modified in place. :param required_nwat: How many waters should remain after shrinking system """ fsys_ct, *non_water_cts, water_ct = cts # delete excess waters, to match 50% weight/weight ratio nwat = water_ct.mol_total logger.info(f'Required number of water molecules is {required_nwat} for ' f'{non_water_cts[-1].mol_total} cosolvent molecules. ' f'Current system contains {nwat} waters.') if required_nwat > nwat: raise ValueError("Number of water molecules smaller than required. " "Cannot shrink system") half_box = water_ct.property[constants.SIM_BOX[0]] / 2.0 noh_wat_aid = evaluate_asl(water_ct, "water and not a.e H") noh_wat_idx = [i - 1 for i in noh_wat_aid] xyz = np.abs(water_ct.getXYZ()[noh_wat_idx]) while nwat > required_nwat: nwat = len(xyz[np.max(xyz, axis=1) <= half_box]) half_box -= SHRINK_BOX_BY # add half of SHRINK_BOX_BY for a guess closer to the target half_box += SHRINK_BOX_BY / 2.0 logger.info('Cubic box length will be shrunk from ' f'{water_ct.property[constants.SIM_BOX[0]]:.3f} to ' f'{(half_box * 2):.3f} Angstroms, with {nwat} water molecules.') # remove water molecues from solvent and full_system CTs wat_atom_idx_to_delete = [] for mol in water_ct.molecule: if np.max(np.abs(mol.atom[1].xyz)) > half_box: wat_atom_idx_to_delete += mol.getAtomIndices() water_ct.deleteAtoms(wat_atom_idx_to_delete) # offset atom indices number of atoms prior to solvent block offset = sum([ct.atom_total for ct in non_water_cts]) fsys_ct.deleteAtoms([idx + offset for idx in wat_atom_idx_to_delete]) # update box values for ax, by, cz, of a cubic box for ct in cts: for prop in constants.SIM_BOX_DIAGONAL: ct.property[prop] = half_box * 2
if __name__ == '__main__': # pragma: no cover # For quick test. import sys in_fname, out_fname, cosolvent_layer_size = sys.argv[1:4] cosolvent_layer_size = float(cosolvent_layer_size) gen = CosolventBoxGenerator( in_fname, 'acetonitrile', 1, cosolvent_layer_size=cosolvent_layer_size, cosolvent_vdw_scaling=1.0) gen.generate(out_fname)