Source code for schrodinger.application.matsci.msutils

"""
Utility functions and classes for MatSci workflows.

Copyright Schrodinger, LLC. All rights reserved.
"""

import os
import re

import contextlib
from distutils import util as dutil
import numpy
from math import log10, floor
from rdkit import Chem
import warnings
from requests.packages.urllib3.exceptions import InsecureRequestWarning

import schrodinger
from schrodinger import structure
from schrodinger.infra import mm
from schrodinger.structutils import analyze
from schrodinger.utils import fileutils
from schrodinger.thirdparty import rdkit_adapter

# Importing matsci modules here should be avoided to prevent circular importing

# desmond.constants and property_names don't import much
from schrodinger.application.desmond import cms
from schrodinger.application.desmond import constants as dconst
from schrodinger.application.matsci import property_names as pnames
from schrodinger.application.matsci import coarsegrain

maestro = schrodinger.get_maestro()

APP_DIR = 'matsci_templates'

# -2 is the official dummy atomic number, but some sources use 0, such as
# Maestro builder ligands (MATSCI-6328, SHARED-6160)
DUMMY_ATOMIC_NUMBERS = {0, -2}
DUMMY_ELEMENT_SYMBOLS = {'Du', 'DU'}
DUMMY_ATOM_NAMES = {'Xpi'}  # Jaguar mmjag uses this

PROP_TYPE_GET_DICT = {
    'r': mm.mmct_atom_property_get_real,
    'b': mm.mmct_atom_property_get_bool,
    'i': mm.mmct_atom_property_get_int,
    's': mm.mmct_atom_property_get_string,
}


[docs]def remove_properties(struct, props=None, matches=None, atom_props=None, atom_matches=None): """ Remove all the matching structure and atom properties. No error is thrown if the given properties do not actually exist on the structure. :type struct: `structure.Structure` or `cms.Cms` :param struct: The structure to remove properties from :type props: list :param props: A list of structure properties to delete :type matches: list :param matches: Remove all structure properties whose name contains any of these strings :type atom_props: list :param atom_props: A list of atom properties to delete :type atom_matches: list :param atom_matches: Remove all atom properties whose name contains any of these strings """ def _get_proplist(prop_names, given, strings): if given is None: proplist = [] else: proplist = given[:] if strings is not None: for name in prop_names: for astring in strings: if astring in name: proplist.append(name) return proplist is_cms = isinstance(struct, cms.Cms) for prop in _get_proplist(struct.getPropertyNames(), props, matches): if is_cms: remove_cms_property(struct, prop) else: struct.property.pop(prop, None) for prop in _get_proplist(struct.getAtomPropertyNames(), atom_props, atom_matches): remove_atom_property(struct, prop)
[docs]def remove_atom_property(struct, prop): """ Delete atom property from all atoms in a structure (structure will be modified). :type struct: `structure.Structure` or `cms.Cms` :param struct: Structure object to be modified :type prop: str :param prop: Atom property to be removed """ if isinstance(struct, cms.Cms): remove_cms_atom_property(struct, prop) try: # The below call is vastly faster than iterating over all atoms mm.mmct_atom_property_delete(struct, prop, mm.MMCT_ATOMS_ALL) except mm.MmException as mmexc: if mmexc.rc != mm.MMCT_ATOM_PROPERTY_NOT_DEFINED_IN_CT: # Should not happen, but just to be sure we don't silently # ignore important errors... raise
[docs]def remove_cms_property(cms_model, propname): """ Delete a property from a cms model :param cms_model: cms model :type cms_model: `cms.Cms` :param propname: property name :type propname: str """ for struct in [cms_model._raw_fsys_ct, cms_model.fsys_ct ] + cms_model.comp_ct: struct.property.pop(propname, None)
[docs]def remove_cms_atom_property(cms_model, propname): """ Delete an atom property from a cms model :param cms_model: cms model :type cms_model: `cms.Cms` :param propname: property name :type propname: str """ for struct in [cms_model._raw_fsys_ct, cms_model.fsys_ct ] + cms_model.comp_ct: remove_atom_property(struct, propname)
[docs]def has_atom_property(struct, prop): """ Check if structure has any atom with the property set. :param structure.Structure: Input structure :param str: Property name :raise KeyError: If property name doesn't start with: s_, r_, i_, b_ :raise mm.MmException: If unexpected error occurred :return bool: True of property is present, False otherwise """ if struct.atom_total == 0: return False # get function based on the property first char funct = PROP_TYPE_GET_DICT[prop[0]] try: funct(struct, prop, 1) except mm.MmException as mmexc: if mmexc.rc == mm.MMCT_ATOM_PROPERTY_NOT_DEFINED_IN_CT: # Property does not exist on any atom return False elif mmexc.rc == mm.MMCT_ATOM_PROPERTY_UNDEFINED_ATOM: # Property exists on at least one atom, but not this one return True else: # An unexpected error occurred, don't be silent about it raise return True
[docs]def roundup(inp): """ Round away from zero (to +/-infinity). :type inp: float or numpy.array :param inp: value to be rounded :rtype: float or numpy.array :return: Rounded value """ # This is NOT default behavior of python3, see also: # mail.python.org/pipermail/numpy-discussion/2014-October/071302.html return numpy.trunc(inp + numpy.copysign(.5, inp))
[docs]def getstr(ret): """ Convert binary string (or other data) to str. :type ret: binary_type or any other type convertable to str :param ret: Value to be converted to str :rtype: str :return: Value converted to str """ if isinstance(ret, bytes): ret = ret.decode() return str(ret)
[docs]def sig_fig_round(value, significant_figure=5): """ :type value: float :param value: change the significant figures of this value :type significant_figure: int :param significant_figure: significant figure of the displayed value :rtype: str :return: str representation of a number with proper significant figures """ sig_fig_fmt = '%.{}g'.format(significant_figure) return sig_fig_fmt % value
[docs]def get_significant_figures(number): """ Get significant digits of a number :type number: float or int or str :param number: number to find significant digits :rtype: int :return: number of significant digits """ sig_fig = 0 check_first = True decimal = False track_zero = 0 for digit in str(number): if digit == '.': decimal = True continue if digit == '0' and check_first: continue else: check_first = False sig_fig += 1 if digit == '0': track_zero += 1 else: track_zero = 0 if digit == 'E' or digit == 'e' or digit == 'x': sig_fig -= 1 break if not decimal: sig_fig -= track_zero return sig_fig
[docs]def set_significant_figures(number, sig_fig): """ Set significant digits of a number :type number: float or int :param number: number to set significant digits :type sig_fig: int :param sig_fig: number of significant digits :rtype: float :return: number with desired significant digits """ round_to = sig_fig - int(floor(log10(abs(number)))) - 1 return numpy.round(number, round_to)
[docs]def get_project_group_hierarchy(st=None, row=None): """ Return the project group hierarchy for the given structure or row. :type st: `schrodinger.structure.Structure` :param st: the structure :type row: `schrodinger.project.ProjectRow` :param row: the project row :raise ValueError: if there is an issue :rtype: list :return: the hierarchy (outermost to innermost) """ if (not st and not row) or (st and row): msg = ('Either a structure or a project row must be given.') raise ValueError(msg) if st: hierarchy = st.property.get(mm.M2IO_DATA_SUBGROUP_TITLE) if hierarchy: return hierarchy.split(mm.M2IO_SUBGROUP_SEPARATOR) eid = st.property.get('s_m_entry_id') if eid is None: msg = ('Structure has no entry ID.') raise ValueError(msg) if not maestro: msg = ('No Maestro session is active.') raise ValueError(msg) pt = maestro.project_table_get() row = pt.getRow(eid) if not row: msg = ('Structure is not in the project.') raise ValueError(msg) if row: hierarchy = row.property.get(mm.M2IO_DATA_SUBGROUP_TITLE) if hierarchy: return hierarchy.split(mm.M2IO_SUBGROUP_SEPARATOR) hierarchy = [] group = row.group while group: hierarchy.append(group.name) group = group.getParentGroup() hierarchy.reverse() return hierarchy
[docs]def set_project_group_hierarchy(st, hierarchy, collapsed=False): """ Set the project group hierarchy for the given structure. :type st: `schrodinger.structure.Structure` :param st: the structure :type hierarchy: list :param hierarchy: the hierarchy (outermost to innermost) :param bool collapsed: Whether the group should initially be collapsed """ if hierarchy: hierarchy = mm.M2IO_SUBGROUP_SEPARATOR.join(hierarchy) st.property[mm.M2IO_DATA_SUBGROUP_TITLE] = hierarchy st.property[mm.M2IO_DATA_SUBGROUPID] = hierarchy st.property[mm.M2IO_DATA_SUBGROUP_COLLAPSED] = collapsed
[docs]def get_matsci_user_data_dir(): """ Get the absolute path to the user's local MatSci data directory for storing custom templates, protocols, etc. Directory is created if it doesn't exist. :rtype: str :return: The absolute path the Materials Science data parent directory """ data_dir = os.path.join( fileutils.get_directory_path(fileutils.LOCAL_APPDATA), APP_DIR) fileutils.mkdir_p(data_dir) return data_dir
[docs]def structure_reader(filename, log=None): """ Read structures from a file until the end or the first structure with an error. :param str filename: filename :param function log: Log function, if None, nothing is called :yield schrodinger.structure.Structure: Next structure in the file """ # Send to /dev/null if log is None log = log if log else lambda x: None with structure.StructureReader(filename) as reader: try: yield from reader except Exception: log('Failed to read a structure from %s.' % filename)
[docs]def is_dummy_atom(atom): """ Return True if the given atom is a dummy atom. :type atom: schrodinger.structure._StructureAtom :param atom: the atom :rtype: bool :return: return True if the given atom is a dummy atom """ return (atom.atomic_number in DUMMY_ATOMIC_NUMBERS or atom.element in DUMMY_ELEMENT_SYMBOLS or atom.atom_name in DUMMY_ATOM_NAMES)
[docs]def validate_no_dummy_atoms(structs): """ Validate that the passed structures don't have dummy atoms :param iterable structs: Structures to check :rtype: bool or (bool, str) :rtype: True if no structures has dummy atoms, False and error message if at least one structure does """ for struct in structs: if any(is_dummy_atom(atom) for atom in struct.atom): return False, (f'At least one structure ({struct.title}) has dummy ' 'atoms and cannot be used with this workflow.') return True
[docs]def add_or_update_bond_order(ct, atom1, atom2, bond_order): """ Create a new bond, or update the existing bond order of this bond. :type ct: schrodinger.structure.Structure :type atom1: schrodinger.structure._StructureAtom :type atom2: schrodinger.structure._StructureAtom :type bond_type: int (0-3) :rtype: schrodinger.structure._Bond """ bond = ct.getBond(atom1, atom2) if not bond: return ct.addBond(atom1, atom2, bond_order) bond.order = bond_order return bond
[docs]def add_or_update_bond_type(ct, atom1, atom2, bond_type): """ Create a new bond, or update the existing bond type of this bond. :type ct: schrodinger.structure.Structure :type atom1: schrodinger.structure._StructureAtom :type atom2: schrodinger.structure._StructureAtom :type bond_type: schrodinger.structure.BondType :rtype: schrodinger.structure._Bond """ bond = ct.getBond(atom1, atom2) if not bond: return ct.addBond(atom1, atom2, bond_type) bond.type = bond_type return bond
[docs]def polyfit(xdata, ydata, degree): """ Fit a polynomial of rang degree to data. :param numpy.array xdata: X data :param numpy.array ydata: Y data :param int degree: Degree of the fitting polynomial :rtype: numpy.array, float :return: Fitting coefficients, coefficient of determination (R^2) """ coeffs = numpy.polyfit(xdata, ydata, degree) # Get polynomial polynomial = numpy.poly1d(coeffs) # fit values, and mean yhat = polynomial(xdata) ybar = numpy.sum(ydata) / len(ydata) ssreg = numpy.sum((yhat - ybar)**2) sstot = numpy.sum((ydata - ybar)**2) return coeffs, ssreg / sstot
[docs]def trim_str(text, max_len, suffix='...'): """ Trim the string to approximately max_len. Add a suffix if the string is longer than max_len. :param text: String to trim :param int max_len: Max length of the string :param str suffix: Suffix to add if the string is to be trimmed :return str: Trimmed string """ # For usage see msutils_test.py :: test_trim_str trim_len = len(suffix) if max_len <= len(suffix): trim_len = 0 if len(text) > max_len: return text[:max_len - trim_len] + suffix return text
[docs]@contextlib.contextmanager def mmlewis_apply(): """ Context manager that initializes mm and returns mm.mmlews_apply method. :yield: mm.mmlewis_apply method. Example usage: with msutils.mmlewis_apply() as lewis_apply: assert lewis_apply(struct) is None """ mm.mmerr_initialize() mm.mmlewis_initialize(mm.MMERR_DEFAULT_HANDLER) yield mm.mmlewis_apply mm.mmlewis_terminate() mm.mmerr_terminate()
[docs]def get_disorder_groups(struct): """ Get atoms that have s_cif_disorder_group set and occupancies are not 0 or 1. :param structure.Structure: Input structure :rtype: dict or None :return: Dict with keys disorder group labels, values list of atom indices or None if nothing is present in the structure """ if not has_atom_property(struct, mm.CIF_DISORDER_GROUP): return groups = {} occupancies = {} # asl "?*" search for non-empty string for idx in analyze.evaluate_asl(struct, 'atom.%s ?*' % mm.CIF_DISORDER_GROUP): atom = struct.atom[idx] group = atom.property[mm.CIF_DISORDER_GROUP] groups.setdefault(group, []).append(idx) # Keep only one non-zero occupancy (if present) for each group occupancies[group] = (occupancies.get(group) or atom.property[mm.M2IO_DATA_PDB_OCCUPANCY]) # Delete groups with occupancy 0 or 1 for group, occupancy in occupancies.items(): if occupancy in [0, 1]: groups.pop(group) return groups if groups else None
[docs]def get_atom_ffio_velocity(atom): """ Get FFIO atom velocities. :param structure._StructureAtom atom: Input atom :return numpy.array: Array of velocities """ vel = [ atom.property.get(pnames.FFIO_ATOM_VEL(axis), 0.) for axis in ['x', 'y', 'z'] ] return numpy.array(vel)
[docs]def set_atom_ffio_velocity(atom, velocity): """ Set FFIO atom velocities. :param structure._StructureAtom atom: Atom to modify :param list velocity: List of velocities (x, y, z components) """ for val, axis in zip(velocity, ['x', 'y', 'z']): atom.property[pnames.FFIO_ATOM_VEL(axis)] = val
[docs]def get_unique_name(new_name, existing_names): """ Add a suffix to new_name to make a unique name if it already exists in existing_names. :param str new_name: The new name :param list existing_names: Existing names :rtype: str :return: The unique version of new_name """ names_set = set(existing_names) unique_name = new_name index = 1 while unique_name in names_set: unique_name = new_name + f"_{index}" index += 1 return unique_name
[docs]def setting_to_bool(string, empty_is_false=True): """ Convert a yes/no/true/false/1/0/on/off type string to a Python boolean :param str string: The string to convert :param empty_is_false: If the string is empty or None, return False :rtype: bool :return: True if the string is a "true"-y word (TRUE, true, t, yes, on, 1, etc), False if it is a "false"-y word (FALSE, false, f, no, off, 0). :raise ValueError: If the string cannot be interpreted in a True/False manner :raise AttributeError: If something other than a string is passed in """ if empty_is_false and (string is None or string == ""): return False return bool(dutil.strtobool(string))
[docs]def flatten(alist, afunc=None): """ Flatten the given list into a set. :type alist: list :param alist: elements contain iterable data :type afunc: function or None :param afunc: function used to extract iterable data from the given list elements or None if there isn't one :rtype: set :return: a flattened set of data from the given list """ if afunc is None: afunc = lambda x: x return set([j for i in alist for j in afunc(i)])
[docs]def remove_pbc_props(struct): """ Remove the pbc properties of the given structure. :param schrodinger.structure.Structure struct: crystal structure from which all the pbc properties are to be removed """ DESMOND_BOX_SHAPE = 's_m_BC_Box_Shape' DESMOND_BOX_A = 'r_m_BC_Box_A' DESMOND_BOX_B = 'r_m_BC_Box_B' DESMOND_BOX_C = 'r_m_BC_Box_C' DESMOND_BOX_ALPHA = 'r_m_BC_Box_Alpha' DESMOND_BOX_BETA = 'r_m_BC_Box_Beta' DESMOND_BOX_GAMMA = 'r_m_BC_Box_Gamma' LATTICE_A = 's_matsci_Lattice_Vector_A/(3*Ang.)' LATTICE_B = 's_matsci_Lattice_Vector_B/(3*Ang.)' LATTICE_C = 's_matsci_Lattice_Vector_C/(3*Ang.)' props_to_remove = [ mm.M2IO_PDB_CRYSTAL_SPACE_GROUP, mm.M2IO_PDB_CRYSTAL_Z, pnames.UNIT_CELL_VOLUME_KEY, pnames.UNIT_CELL_DENSITY_KEY, pnames.UNIT_CELL_FORMULA_KEY, pnames.SPACE_GROUP_ID_KEY, pnames.PBC_POSITION_KEY, DESMOND_BOX_SHAPE, DESMOND_BOX_A, DESMOND_BOX_B, DESMOND_BOX_C, DESMOND_BOX_ALPHA, DESMOND_BOX_BETA, DESMOND_BOX_GAMMA, LATTICE_A, LATTICE_B, LATTICE_C ] + list(dconst.SIM_BOX) + list(pnames.PDB_BOX_PROPS) remove_properties(struct, props=props_to_remove)
[docs]def get_trajectory_frames(options, frames=None): """ Return a generator for trajectory frames in the range that the user specified, as well as the number of frames Requires that trajectory flags in parserutils are used in the parser. :param `argparse.Namespace` options: Parsed commandline options :type frames: iterable or None :param frames: Trajectory frames. If not provided, they will be read from the path in options :rtype: generator, int :return: The trajectory frame generator, and the number of frames """ from schrodinger.application.desmond.packages import traj from schrodinger.application.matsci import desmondutils if frames is None: frames = traj.read_traj(options.trj, return_iter=True) start = options.trj_min or 0 end = options.trj_max if options.trj_max is not None else float('inf') target_frames = [ frame for index, frame in enumerate(frames) if start <= index <= end ] n_frames = len(target_frames) generator = desmondutils.get_desmond_trj_frames(traj_frames=target_frames) return generator, n_frames
[docs]def get_atomic_element(atomic_number): """Given atomic number return chemical element. :param int atomic_number: Atomic number :rtype: str :return: Chemical element """ # TODO: Remove this when SHARED-7629 is implemented return mm.mmat_get_element_by_atomic_number(atomic_number).rstrip()
[docs]@contextlib.contextmanager def ignore_ssl_warnings(): """ Context manager to temporarily ignore InsecureRequestWarning warning. """ # Disable InsecureRequestWarning - currently our ssl distribution is platform # dependent, and our clients are not uniform in their ssl configuration, # so we cannot use ssl verification. with warnings.catch_warnings(): warnings.simplefilter('ignore', InsecureRequestWarning) yield
[docs]def get_index_from_default_name(atom_name): """ Find the atom index from string of element name and atom index :type atom_name: str :param atom_name: concatenated string of element symbol with the atom index :rtype index: int or None :para atom_index: Atom index """ index = re.findall(r'\d+', atom_name) if not index: return return int(index[0])
[docs]def title_case(original, exceptions=('an', 'of', 'the', 'for'), skip_single_letters=True): """ Convert the string to title case, optionally ignoring articles and single letters Examples with default kwargs: "Number of molecules": "Number of Molecules" "axis b": "Axis b" :param str original: The string to make title case :param tuple exceptions: The words to not capitalize :param bool skip_single_letters: Whether single letters should not be capitalized :rtype: str :return: The string in title case """ parts = re.split(' ', original) title_parts = [parts[0].capitalize()] for part in parts[1:]: if part in exceptions or part.isupper() or \ (skip_single_letters and len(part) == 1): title_parts.append(part) else: title_parts.append(part.capitalize()) return ' '.join(title_parts)
[docs]def generate_smiles(struct): """ Return a SMILES string for `st`. For more options, see the `schrodinger.structutils.smiles.SmilesGenerator` class. :type struct: `Structure` :param struct: Structure for which SMILES string is desired. :rtype: str :return: SMILES string representing `st`. """ # TODO: investigate RKDIT for generating smiles return analyze.generate_smiles(struct)