Source code for schrodinger.protein.assignment

"""
Module for optimizing hydroxyl, thiol and water orientiations, Chi-flips of
asparagine, glutamine and histidine, and protonation states of aspartic acid,
glutamic acid, and histidine.

Usage: ProtAssign(st)

Copyright Schrodinger, LLC. All rights reserved.

"""
import copy
import itertools
import math
import operator
import random
import re
import sys
from collections import defaultdict
from dataclasses import dataclass
from dataclasses import field
from past.utils import old_div
from typing import List
from typing import Tuple
from typing import Union

import numpy as np
from scipy.sparse import dok_matrix

from schrodinger import structure
from schrodinger.application.bioluminate import protein
from schrodinger.infra import mm
from schrodinger.structutils import analyze
from schrodinger.structutils import build
from schrodinger.structutils import minimize
from schrodinger.structutils import transform
from schrodinger.structutils.measure import create_distance_cell

LOG_NONE = 0
LOG_BASIC = 1
LOG_EXTRA = 2
LOG_DEBUG = 3
LOG_FULL_DEBUG = 4
LOG_SCORE_DEBUG = 5
DEFAULT_LOG_LEVEL = LOG_BASIC
log_level = DEFAULT_LOG_LEVEL
label_color = 13

pH_vlow = 'very_low'
pH_low = 'low'
pH_neutral = 'neutral'
pH_high = 'high'

pka_property = 'r_pa_PropKa_pKa'
ACCEPTOR_PROPERTY = 'b_pa_acceptor'
DONOR_PROPERTY = 'b_pa_donor'
CLASHER_PROPERTY = 'b_pa_clasher'
ION_PROPERTY = 'b_pa_ion'
# Flag whether a donor or acceptor is static
STATIC_PROPERTY = 'b_pa_static'
APROPNAME_PA_STATE = 's_pa_state'

# Max distance between changeables to be clustered together
CLUSTERING_DISTANCE = 4.0
# Distance between the heavies of changeables after which the interaction
# energy drops to 0
MAX_HEAVY_HEAVY_INTERACTION_DISTANCE = 4.5
# Maximum distance between certain changeables to orient its hydrogen for
# adding states.
MAX_ORIENT_HYDROGEN_DISTANCE = 5.0
# H-H clash cutoff involving two polar H
POLAR_CLASH_CUTOFF = 2.0
# H-H clash cutoff involving at least one non-polar H
NONPOLAR_CLASH_CUTOFF = 1.77

BOND_MSG_TEMPLATE = ('Cannot determine flip state of {resstr} due to unexpected'
                     'covalent bond.')

FLIP = 'Flip'
NO_FLIP = 'No Flip'

RESNAME_FLIPSTATES_MAP = {'ASN': (NO_FLIP, FLIP), 'GLN': (NO_FLIP, FLIP)}
for resname in ('HID', 'HIE', 'HIP'):
    RESNAME_FLIPSTATES_MAP[resname] = (resname, f'{FLIP} {resname}')
RESNAME_FLIPSTATES_MAP['HIS'] = RESNAME_FLIPSTATES_MAP['HID']

ND1 = 'ND1'
ND2 = 'ND2'
NE2 = 'NE2'
CD = 'CD'
CD2 = 'CD2'
CE1 = 'CE1'
CG = 'CG'
HIS_PDBNAMES = {CD2, CE1, CG, ND1, NE2}
AMD_PDBNAME_GROUPS = ({CG, 'OD1', ND2}, {CD, 'OE1', NE2})
HIS_NEIGHBOR_MAP = {
    ND1: {CG, CE1},
    NE2: {CE1, CD2},
    CE1: {ND1, NE2},
    CD2: {NE2, CG}
}


[docs]class PropKaException(Exception):
[docs] def __init__(self, value): self.parameter = value
[docs]def report(message_level=1, message=''): if message_level <= log_level: print(message) sys.stdout.flush() return
[docs]def measure(ct, atom1=None, atom2=None, atom3=None, atom4=None, use_xtal=False, max_dist=10.0): # A replacement for ct.measure() which also supports xtal symmetry, and can return the coordinates of # the nearest xtal copies of each atom when requested. def find_nearest_atom_image(ct, atom1, atom2): lowest_distance = None best_match = None atoms = analyze.evaluate_asl(ct, 'atom.i_pa_atomindex %d' % atom2) for atom in atoms: distance = ct.measure(atom1, atom) if lowest_distance is None or distance < lowest_distance: lowest_distance = distance best_match = int(atom) return best_match def extract(ct, atoms): new_ct = structure.create_new_structure(len(atoms)) for i, atom in enumerate(atoms): new_atom = new_ct.atom[i + 1] old_atom = ct.atom[atom] new_atom.element = old_atom.element new_atom.xyz = old_atom.xyz new_atom.color = old_atom.color new_atom.atom_type = old_atom.atom_type new_atom.property['i_pa_atomindex'] = old_atom.property[ 'i_pa_atomindex'] for property in [ 's_pdb_PDB_CRYST1_Space_Group', 'r_pdb_PDB_CRYST1_a', 'r_pdb_PDB_CRYST1_b', 'r_pdb_PDB_CRYST1_c', 'r_pdb_PDB_CRYST1_alpha', 'r_pdb_PDB_CRYST1_beta', 'r_pdb_PDB_CRYST1_gamma' ]: new_ct.property[property] = ct.property[property] return new_ct def merge_mates(atom_ct, atom_ct_mates): atom_ct_mates.insert(0, atom_ct) for i, mate in enumerate(atom_ct_mates): for atom in mate.atom: atom.property['i_pa_xtalatom'] = i for i in range(1, len(atom_ct_mates)): atom_ct_mates[0].extend(atom_ct_mates[i]) return atom_ct_mates def find_atom1(atom_ct_mates, atom1): for atom in atom_ct_mates[0].atom: if atom.property['i_pa_xtalatom'] == 0 and atom.property['i_pa_atomindex'] == atom1: xtal_atom1 = int(atom) break return xtal_atom1 def create_list(atom1, atom2, atom3, atom4): if atom4 is not None: atoms = [atom1, atom2, atom3, atom4] elif atom3 is not None: atoms = [atom1, atom2, atom3] else: atoms = [atom1, atom2] return atoms def find_other_atoms(atom_ct_mates, xtal_atom1, atom2, atom3, atom4): xtal_atom2 = find_nearest_atom_image(atom_ct_mates[0], xtal_atom1, atom2) if atom3 is not None: xtal_atom3 = find_nearest_atom_image(atom_ct_mates[0], xtal_atom2, atom3) else: xtal_atom3 = None if atom4 is not None: xtal_atom4 = find_nearest_atom_image(atom_ct_mates[0], xtal_atom3, atom4) else: xtal_atom4 = None return (xtal_atom2, xtal_atom3, xtal_atom4) # Only do the expensive xtal version of measure if it's a surface atom (or if not sure if it's a surface atom). if use_xtal and ('i_pa_surfaceatom' not in ct.atom[atom1].property or ct.atom[atom1].property['i_pa_surfaceatom'] == 1): atoms = create_list(atom1, atom2, atom3, atom4) atom_ct = extract(ct, atoms) atom_ct_mates = analyze.generate_crystal_mates(atom_ct, radius=max_dist) atom_ct_mates = merge_mates(atom_ct, atom_ct_mates) # Get atom1 (in the primary cell). xtal_atom1 = find_atom1(atom_ct_mates, atom1) # Get atom2 (the closest to atom1), and so on. (xtal_atom2, xtal_atom3, xtal_atom4) = find_other_atoms( atom_ct_mates, xtal_atom1, atom2, atom3, atom4) return atom_ct_mates[0].measure(xtal_atom1, xtal_atom2, xtal_atom3, xtal_atom4) else: value = ct.measure(atom1, atom2, atom3, atom4) return value
[docs]def calculate_interaction_matrix(ct, iatoms, distance, use_xtal=False): """ Create an interaction matrix based on the changeable_index atom property :param ct: Structure with annotated atoms having set the i_pa_changeable_index corresponding the the index of the changeable :type ct: structure.Structure :param iatoms: List of atom indices which take part in interaction :type iatoms: List[int] :param distance: Max distance between interacting atoms :type distance: float :param use_xtal: Take into account crystal symmetry mates :param use_xtal: bool :return: interaction matrix allowing double indexing: interact[i][j] :rtype: defaultdict(lambda: defaultdict(bool)) """ # Interacting matrix implemented as a nested dict so we can do # interact[i][j] and be consistent with original implementation while # saving on memory. interact = defaultdict(lambda: defaultdict(bool)) # Create a distance cell to query containing only the atoms of interest if not iatoms: return interact ct_part = ct.extract(iatoms, copy_props=True) if use_xtal: mates = analyze.generate_crystal_mates(ct_part, distance) for mate in mates[1:]: ct_part.extend(mate) dcell = create_distance_cell(ct_part, distance, honor_pbc=False) for iatom1 in iatoms: atom1 = ct.atom[iatom1] index1 = atom1.property['i_pa_changeable_index'] for iatom2 in dcell.query(*atom1.xyz): index2 = ct_part.atom[iatom2].property['i_pa_changeable_index'] # Changeables can currently only interact with other # changeables. It is possible that a changeable has # self-interaction with its crystal mate, but this is not # implemented down-stream, so we ignore that for now. if index1 == index2: continue interact[index1][index2] = True interact[index2][index1] = True return interact
[docs]def annotate_structure_interactors(ct, acceptors, donors, clashers): """ Set atom property for each interactor class :param ct: Structure to annotate :type ct: Structure :param acceptors: List of acceptor atom indices :type acceptors: List[int] :param donors: List of donor pair atom indices :type donors: List[tuple[int, int]] :param clashers: List of clasher atom indices :type clashers: List[int] :return: None but sets atom properties :rtype: NoneType """ for acceptor in acceptors: ct.atom[acceptor].property[ACCEPTOR_PROPERTY] = 1 for heavy, hydrogen in donors: if heavy == hydrogen: ct.atom[heavy].property[ION_PROPERTY] = 1 else: ct.atom[heavy].property[DONOR_PROPERTY] = 1 ct.atom[hydrogen].property[DONOR_PROPERTY] = 1 for clasher in clashers: ct.atom[clasher].property[CLASHER_PROPERTY] = 1
[docs]def check_residue_flip_state(res: structure._Residue) -> tuple: """ Determine whether a residue cannot be flipped, is, or is not flipped. :param res: a protein residue :return: a tuple of `(state, msg)`, where `state` describes whether the residue is flipped (`True`), is not flipped (`False`), or cannot be flipped (`None`); if `None`, `msg` will contain an explanation :rtype: tuple[bool or NoneType, str] """ pdbname_atom_map = { atom.pdbname.strip(): atom for atom in res.atom if atom.atomic_number != 1 } is_amd = any( all(n in pdbname_atom_map for n in pdbnames) for pdbnames in AMD_PDBNAME_GROUPS) is_his = all(name in pdbname_atom_map for name in HIS_PDBNAMES) is_flipped, msg = None, '' if not is_amd and not is_his: msg = (f'Cannot determine flip state of {get_residue_string(res)}: only' f' {RESNAME_FLIPSTATES_MAP.keys()} can be flipped.') elif is_amd: # Residue is an amide; verify that there are no unexpected bonds nitrogen = pdbname_atom_map.get(ND2) or pdbname_atom_map.get(NE2) carbon = pdbname_atom_map.get(CD) or pdbname_atom_map.get(CG) bound_atoms = [a for a in get_heavy_neighbors(nitrogen) if a != carbon] if bound_atoms: msg = BOND_MSG_TEMPLATE.format(resstr=get_residue_string(res)) else: is_flipped = carbon.property.get(APROPNAME_PA_STATE) == FLIP else: # Residue is a histidine; verify that there are no unexpected bonds for pdbname, neighbor_pdbnames in HIS_NEIGHBOR_MAP.items(): atom = pdbname_atom_map[pdbname] exp_neighbors = [ pdbname_atom_map[name] for name in neighbor_pdbnames ] bound_atoms = [ a for a in get_heavy_neighbors(atom) if a not in exp_neighbors ] if bound_atoms: msg = BOND_MSG_TEMPLATE.format(resstr=get_residue_string(res)) break else: carbon = pdbname_atom_map.get(CG) flip_tag = carbon.property.get(APROPNAME_PA_STATE) is_flipped = flip_tag is not None and flip_tag.startswith(FLIP) return is_flipped, msg
[docs]def get_residue_flip_state(res: structure._Residue) -> Union[bool, type(None)]: """ Return the flip state of a protein residue. A truncated version of `check_residue_flip_state()`. :param res: a protein residue :return: the flip state of a residue """ flip_state, _ = check_residue_flip_state(res) return flip_state
[docs]def get_residue_string(residue: structure._Residue) -> str: """ Return a string describing a residue. The string will match the format <chain>:<residue PDB code> <residue number>[<insertion code>] :param residue: a protein residue :return: a string describing the residue """ chain = '_' if residue.chain == ' ' else residue.chain pdbres = residue.pdbres.strip() inscode = residue.inscode.strip() return f'{chain}:{pdbres} {residue.resnum}{inscode}'
[docs]def get_heavy_neighbors(atom: structure._StructureAtom) -> list: """ :param atom: an atom :return: a list of heavy (non-H) atoms covalently bound to `atom` :rtype: list[structure._StructureAtom] """ return [ bond.atom2 for bond in atom.bond if bond.atom2.atomic_number != 1 and bond.order != 0 ]
@dataclass class _WaterState: """ Container for water states :param coordinates: Hydrogen coordinates of water state :type coordinates: Tuple[List[float], List[float]] :param donating_to: List of atom indices to which water is donating :type donating_to: List[int] :param accepting_from: List of atom indices from water is accepting a hydrogen :type accepting_from: List[int] """ # hydrogen1 and hydrogen2 coordinates coordinates: Tuple[List[float], List[float]] donating_to: List[int] = field(default_factory=list) accepting_from: List[int] = field(default_factory=list)
[docs]class WaterStateEnumerator: """ Enumerate discrete water states that are hydrogen bonding with nearby acceptors and donors The goal is to sample likely states while also limiting the number of states as solving the combinatorial problem gets harder with more and more states. """ # OH bond lenght of water in angstrom OH_LENGTH = 1.000 # HOH angle of water in degrees HOH_ANGLE = 109.5 # Minimum required hydrogen to non-static heavy donor atom distance for it # to be not considered clashing. MIN_HYDROGEN_NONSTATIC_DONOR_DISTANCE = 2.5
[docs] def __init__(self, ct, oxygen, acceptors, donors): """ :param ct: Annotated structure with donor/acceptor and static flags. :type ct: Structure :param oxygen: Oxygen atom of water :type oxygen: _StructureAtom :param acceptors: List of acceptor atom indices :type acceptors: List[int] :param donors: List of donor atom indices :type donors: List[Tuple[int, int]] """ self.ct = ct self.oxygen = oxygen self.hydrogen1, self.hydrogen2 = list(oxygen.bonded_atoms) self.acceptors = acceptors self.donors = donors # These attributes will be filled by precalculate and will contain a # mapping of atom index to distance or unit vector from oxygen to atom self._vector_lookup = {} self._distance_lookup = {} self._precalculate() self._filter_acceptor_donors() self._separate_donors()
def _precalculate(self): """ Pre-calculate all unit vectors and distances from water oxygen to acceptor and heavy donor atoms """ oxygen_xyz = np.asarray(self.oxygen.xyz, float) for iheavy in itertools.chain(self.acceptors, *self.donors): heavy = self.ct.atom[iheavy] if heavy.atomic_number == 1: continue if iheavy == self.oxygen.index: continue vector = heavy.xyz - oxygen_xyz distance = np.linalg.norm(vector) if distance < 0.01: report( LOG_BASIC, f"Water is severily clashing with {heavy.pdbres} {heavy.resnum}{heavy.inscode} {heavy.chain}" ) continue vector /= distance self._vector_lookup[iheavy] = vector self._distance_lookup[iheavy] = distance def _filter_acceptor_donors(self): """ Filter acceptor and donor atoms up to a certain distance and remove water from donor list. This is to limit the number of states that are enumerated. Resets the acceptors and donors attributes """ distance_lookup = self._distance_lookup if distance_lookup: # The typical max distance between heavy atoms is 4.0 of the # acceptors and donors. This is probably too far and we can reduce # the distance and filter the acceptors and donors for more # efficient state generation. # Look for interactors within a certain distance. To limit the number # of states generated, stop after at least 2 interactors are found. We # start at a distance of 3.5 which is pretty generous. distances = np.asarray(list(distance_lookup.values())) for max_distance in np.linspace(3.5, 4.0, num=11, endpoint=True): ninteractors = (distances <= max_distance).sum() if ninteractors > 1: break # Filter acceptors filtered_acceptors = [] for iheavy in self.acceptors: distance = distance_lookup.get(iheavy) if distance is None: continue if distance <= max_distance: filtered_acceptors.append(iheavy) # Filter donors filtered_donors = [] for iheavy, ihydrogen in self.donors: distance = distance_lookup.get(iheavy) if distance is None: continue if distance <= max_distance: filtered_donors.append((iheavy, ihydrogen)) self.acceptors = filtered_acceptors self.donors = filtered_donors def _separate_donors(self): """Separate the donors into metals and a list of heavy atom""" # The donor list also contains metals. Separate these out. self.donor_heavies = [] for heavy, hydrogen in self.donors: # Its a metal if heavy == hydrogen if heavy != hydrogen: self.donor_heavies.append(heavy) def _is_clashing_with_static_donor(self, donor_heavy): """ Check if any of the water hydogens is clashing with a static hydrogen donor :param donor_heavy: Donor heavy atom :type donor_heavy: _StructureAtom :return: True if clashing, False if not :rtype: bool """ p = donor_heavy.property if not (p.get(DONOR_PROPERTY) and p.get(STATIC_PROPERTY)): return False for iatom in donor_heavy.bonded_atoms: atom = self.ct.atom[iatom] if atom.atomic_number != 1: continue for hydrogen in (self.hydrogen1, self.hydrogen2): distance = self.ct.measure(iatom, hydrogen) if distance < POLAR_CLASH_CUTOFF: return True return False def _might_clash_with_nonstatic_donor(self, hydrogen, donor_heavy): """ Check if hydrogen might be clashing with hydrogen of a nonstatic donor :param hydrogen: Water hydrogen to check :type hydrogen: _StructureAtom :param donor_heavy: Donor heavy atom :type donor_heavy: _StructureAtom :return: True if clashing, False if not :rtype: bool """ p = donor_heavy.property if not p.get(DONOR_PROPERTY) or p.get(STATIC_PROPERTY): return False distance = self.ct.measure(hydrogen, donor_heavy) return distance < self.MIN_HYDROGEN_NONSTATIC_DONOR_DISTANCE
[docs] def enumerate_acceptor_acceptor_states(self): """ Enumerate states where water is donating to two acceptors at the same time :return: List of water states :rtype: List[_WaterState] """ # Align waters so it is hydrogen bonded with 2 acceptors. # Rename so we don't have self everywhere oxygen = self.oxygen hydrogen1 = self.hydrogen1 hydrogen2 = self.hydrogen2 ct = self.ct states = [] heavies = itertools.chain(self.acceptors, self.donor_heavies) for iheavy1, iheavy2 in itertools.combinations(heavies, 2): angle = ct.measure(iheavy1, oxygen, iheavy2) # Check if angle falls within a reasonable interval so the # water can actually hydrogen bond to both at the same time if not (90 < angle <= 150): continue vector1 = self._vector_lookup[iheavy1] vector2 = self._vector_lookup[iheavy2] hydrogen1.xyz = oxygen.xyz + vector1 * self.OH_LENGTH hydrogen2.xyz = oxygen.xyz + vector2 * self.OH_LENGTH # Since the scoring function used during optimization is so # dependent on distance, align perfectly with the closest # acceptor heavy1 = ct.atom[iheavy1] heavy2 = ct.atom[iheavy2] distance1 = self._distance_lookup[iheavy1] distance2 = self._distance_lookup[iheavy2] if distance1 <= distance2: ct.adjust(self.HOH_ANGLE, hydrogen1, oxygen, hydrogen2) else: ct.adjust(self.HOH_ANGLE, hydrogen2, oxygen, hydrogen1) # Add the state if the water is not clashing with a static donor clashing = any( self._is_clashing_with_static_donor(heavy) for heavy in (heavy1, heavy2)) if clashing: continue state = _WaterState( coordinates=(hydrogen1.xyz, hydrogen2.xyz), donating_to=[iheavy1, iheavy2]) states.append(state) return states
[docs] def enumerate_donor_donor_states(self): """ Enumerate states where water is accepting from two donors at the same time :return: List of water states :rtype: List[_WaterState] """ # Generate states where water is accepting from 2 donors oxygen = self.oxygen hydrogen1 = self.hydrogen1 hydrogen2 = self.hydrogen2 states = [] angle = 180 - self.HOH_ANGLE / 2.0 for donor1, donor2 in itertools.combinations(self.donors, 2): heavy1 = self.ct.atom[donor1[0]] heavy2 = self.ct.atom[donor2[0]] average_xyz = (np.asarray(heavy1.xyz) + heavy2.xyz) / 2.0 vector = -(average_xyz - oxygen.xyz) distance = np.linalg.norm(vector) if distance < 0.01: continue vector /= distance hydrogen1.xyz = np.asarray(oxygen.xyz) + vector * self.OH_LENGTH hydrogen2.xyz = hydrogen1.xyz # Temporarily set the heavy coordinate to the average to place # the hydrogens heavy1_xyz = heavy1.xyz heavy1.xyz = average_xyz self.ct.adjust(angle, heavy1, oxygen, hydrogen1) self.ct.adjust(-angle, heavy1, oxygen, hydrogen2) heavy1.xyz = heavy1_xyz state = _WaterState( coordinates=(hydrogen1.xyz, hydrogen2.xyz), accepting_from=[int(heavy1), int(heavy2)]) states.append(state) return states
[docs] def enumerate_acceptor_states(self): """ Enumerate states where water is donating to a single acceptor :return: List of water states :rtype: List[_WaterState] """ # Generate states where water is donating to a single acceptor, # while letting other hydrogen not point towards static donors. oxygen = self.oxygen hydrogen1 = self.hydrogen1 hydrogen2 = self.hydrogen2 states = [] for iheavy in itertools.chain(self.acceptors, self.donor_heavies): vector = self._vector_lookup[iheavy] heavy = self.ct.atom[iheavy] hydrogen1.xyz = oxygen.xyz + vector * self.OH_LENGTH # Move hydrogen2 so it makes the correct angle with hydrogen1 hydrogen2.xyz = hydrogen1.xyz if self._is_clashing_with_static_donor(heavy): continue self.ct.adjust(self.HOH_ANGLE, hydrogen1, oxygen, hydrogen2) # Keep a list of the other heavy donors to calculate clashes other_donor_heavies = list(self.donor_heavies) try: other_donor_heavies.remove(iheavy) except ValueError: pass # Check if hydrogen2 is interacting with a static donor. If # so, generate a state where it is not clashing axis = self._vector_lookup[iheavy] for angle in [None, 120, 120]: if angle is not None: self.rotate_hydrogens_along_axis(axis, angle) clashing = any( self._is_clashing_with_static_donor(self.ct.atom[iheavy2]) for iheavy2 in other_donor_heavies) if clashing: continue state = _WaterState( coordinates=(hydrogen1.xyz, hydrogen2.xyz), donating_to=[iheavy]) states.append(state) # Check if the current configuration is interacting with a # non-static donor. If so, move on to the next dihedral angle iteration # to add another state so the donor could donate to the # water in some of its configurations. # If not, hydrogen2 is on a stable non-clashing position # and we are done. clashing = any( self._might_clash_with_nonstatic_donor( hydrogen2, self.ct.atom[iheavy2]) for iheavy2 in other_donor_heavies) if not clashing: break return states
[docs] def enumerate_donor_states(self): """ Enumerate states where water is accepting from a single donor :return: List of water states :rtype: List[_WaterState] """ oxygen = self.oxygen hydrogen1 = self.hydrogen1 hydrogen2 = self.hydrogen2 ct = self.ct states = [] hydrogen_angle = 180 - self.HOH_ANGLE / 2.0 oxygen_xyz = np.asarray(oxygen.xyz) for iheavy, ihydrogen in self.donors: ct.adjust(hydrogen_angle, ihydrogen, oxygen, hydrogen1) hydrogen2.xyz = hydrogen1.xyz ct.adjust(-hydrogen_angle, ihydrogen, oxygen, hydrogen2) axis = -self._vector_lookup[iheavy] # Keep a list of the other heavy donors to calculate clashes other_donor_heavies = list(self.donor_heavies) try: other_donor_heavies.remove(iheavy) except ValueError: pass for angle in [None, 120, 120]: if angle is not None: self.rotate_hydrogens_along_axis(axis, angle) # Check if hydrogen is clashing with static donor clashing = any( self._is_clashing_with_static_donor(ct.atom[iheavy2]) for iheavy2 in other_donor_heavies) if clashing: continue # State is valid. Add it to the pool state = _WaterState( coordinates=(hydrogen1.xyz, hydrogen2.xyz), accepting_from=[iheavy]) states.append(state) # Check if this state might be clashing with the hydrogen of a # non-static donor. If so, generate another state. clashing = any( self._might_clash_with_nonstatic_donor( hydrogen2, ct.atom[iheavy2]) for iheavy2 in other_donor_heavies) if not clashing: break return states
[docs] def rotate_hydrogens_along_axis(self, axis, angle): """ Rotate the water hydrogens along an axis by an angle. Does not return anything but moves hydrogens in place. :param axis: Axis along to rotate to :type axis: 3 floats :param angle: Angle to rotate in degrees :type angle: float """ # Rotate water along fixed oxygen - donor axis. ct = self.ct x, y, z = self.oxygen.xyz hydrogens = [int(self.hydrogen1), int(self.hydrogen2)] # Move to origin transform.translate_structure(ct, -x, -y, -z, atom_index_list=hydrogens) # Perform the rotation rotation = transform.get_rotation_matrix(axis, angle) transform.transform_structure(ct, rotation, atom_index_list=hydrogens) # Move back to original position transform.translate_structure(ct, x, y, z, atom_index_list=hydrogens)
[docs]class ProtAssign:
[docs] class changeable: asl = 'none' max_hbond_distance = 3.5 hbond_min_angle = 150.0 hbond_heavy_min_angle = 80.0 hbond_heavy_max_angle = 140.0
[docs] def __init__(self, ct, iatom): self.index = 0 self.current_state = 0 #determine self.initial_state = 0 #determine self.treated = False self.locked = False self.valid = True return
[docs] def pre_treat_1(self, ct): return
[docs] def pre_treat_2(self, ct): return
[docs] def pre_treat(self, ct): self.pre_treat_1(ct) build.add_hydrogens(ct, atom_list=self.get_heavies()) self.pre_treat_2(ct) self.treated = True return
[docs] def enumerate_states(self, ct, acceptors, donors, pH, do_flips=True, include_initial=False): return
[docs] def lock_protonation(self): return
[docs] def add_current_to_states(self, ct): return
[docs] def assign_state(self, ct, istate, add_labels=True, label_pkas=False, state_gap=None, verbose=False): if not self.treated: self.pre_treat(ct) self.treated = False return []
[docs] def assign_state_gap(self, atom, state_gaps, report_gaps=True): """ Write the Gap in energy between the lowest energy state and the state with different protonation states or heavy atom positions to the output ct :param atom: The atom that should have properties written to it :type atom:structure.StructureAtom :param state_gaps: The energy gaps between states for a given changeable position. :type state_gaps: A dictionary where the keys are strings ( state names) and the values are floats ( energy in kcals of the lowest energy combination that has that state) :param report_gaps: Whether to report the gaps to the log file as well :type report_gaps: Boolean """ for p in "r_pa_state_gap", : if p in atom.property: del atom.property[p] if state_gaps is None or len(state_gaps) <= 1: return min_gap = sorted(state_gaps[state] for state in state_gaps)[1] atom.property["r_pa_state_gap"] = min_gap if log_level >= LOG_EXTRA: cutoff = 100 else: cutoff = 2.5 if report_gaps and min_gap <= cutoff: report(LOG_BASIC, ( " Alternative States for {0:10s} Gap {1:5.2f} kcals/mol". format(self.name, min_gap))) for state in sorted(state_gaps, key=lambda k: state_gaps[k]): if state_gaps[state] <= cutoff: report(LOG_BASIC, " {0:8s} {1:5.2f} kcals/mol".format( state, state_gaps[state]))
[docs] def update_atom_indices(self, ct, new_indices): return
[docs] def get_new_index(self, ct, atom_index, new_indices): if atom_index in new_indices: return new_indices[atom_index] else: return None
[docs] def get_view_atoms(self): return []
[docs] def get_residue_name(self, ct, iatom): resnum = ct.atom[iatom].resnum inscode = ct.atom[iatom].inscode chain = ct.atom[iatom].chain if chain == " ": chain = "_" pdbres = ct.atom[iatom].pdbres residue = chain + ":" + pdbres + str(resnum) + inscode residue.rstrip() return residue
[docs] def get_atom_name(self, ct, iatom): return self.get_residue_name(ct, iatom) + ':' + ct.atom[iatom].pdbname
[docs] def swap_atoms(self, ct, atom1, atom2): (temp_x, temp_y, temp_z) = ct.atom[atom1].xyz ct.atom[atom1].xyz = ct.atom[atom2].xyz ct.atom[atom2].xyz = (temp_x, temp_y, temp_z) return
[docs] def get_penalty(self, istate): return 0.0
[docs] def get_adjustable_atoms(self): return []
[docs] def change_pka(self, pka, propka_pH): return False
[docs] def get_dihedral_atoms(self, ct, h): # Borrowed from rotH.py. ret_list = [] # The first atom will be H itself: ret_list.append(h) # Now find the O or S attached to the H. Note that bonds (like everything # associated with structures) are indexed from "1" so we are actually # looking for the first bond: Xatom = ct.atom[h].bond[1].atom2 ret_list.append(int(Xatom)) # Now find a suitable non-H atom bonded to the X: C1atom = -1 for b in Xatom.bond: conn_atom = b.atom2 # This is probably the easiest way to find a non-H atom: if not conn_atom.atomic_number == 1: C1atom = int(conn_atom) ret_list.append(C1atom) # Leave the loop: break # Check to see that we did find a C1 atom: if C1atom < 0: return ret_list # Now look for the final atom we need: for b in ct.atom[C1atom].bond: conn_atom = b.atom2 if int(conn_atom) != ret_list[1]: # This is probably the easiest way to find a non-H atom: #if not conn_atom.atomic_number == 1 : C2atom = int(conn_atom) ret_list.append(C2atom) # Leave the loop: break return ret_list
[docs] def get_close_interactors(self, ct, dcell): """ Return acceptors, donors and clashers that are close to this changeable heavy atoms. :param ct: Structure with annotated atoms signfying interaction class :type ct: Structure :param dcell: Distance cell to query for neighboring atoms :type dcell: DistanceCell :returns: List of acceptors, donor pairs, and clashers atom indices :rtype: tuple[list[int], list[tuple[int, int]], list[int]] """ acceptors = set() donors = set() clashers = set() for iatom1 in self.get_heavies(): for iatom2 in dcell.query(*ct.atom[iatom1].xyz): p = ct.atom[iatom2].property if p.get(ACCEPTOR_PROPERTY): acceptors.add(iatom2) if p.get(DONOR_PROPERTY): atom2 = ct.atom[iatom2] if atom2.atomic_number == 1: heavy = int(next(iter(atom2.bonded_atoms))) donor = (heavy, iatom2) donors.add(donor) if p.get(ION_PROPERTY): donors.add((iatom2, iatom2)) if p.get(CLASHER_PROPERTY): clashers.add(iatom2) return sorted(acceptors), sorted(donors), sorted(clashers)
[docs] class amide_changeable(changeable): """ This is the primary amide -NH2 group of ASN and GLN residues. """ asl = '((res.ptype \"ASN \" AND atom.ptype \" CG \") OR (res.ptype \"GLN \" AND atom.ptype \" CD \"))'
[docs] def __init__(self, ct, iatom): self.pka = None self.index = 0 self.name = '' self.nstates = 0 self.num_user_states = 0 self.state_names = [] self.carbon = None self.oxygen = None self.nitrogen = None self.hydrogen1 = None self.hydrogen1alt = None self.hydrogen2 = None self.hydrogen2alt = None self.treated = False self.locked = False self.valid = True self.name = self.get_residue_name(ct, iatom) self.type = 'AMIDE' residue_atoms = ct.getResidueAtoms(iatom) for atom in residue_atoms: name = atom.pdbname.strip() if name in ["OD1", "OE1"]: self.oxygen = int(atom) elif name in ["ND2", "NE2"]: self.nitrogen = int(atom) elif name in ["CG", "CD"]: self.carbon = int(atom) if self.carbon is None or self.oxygen is None or self.nitrogen is None: report(LOG_EXTRA, 'Rejecting ' + self.name + ' due to missing atoms.') self.valid = False return # Make sure the nitrogen is covalently bound ONLY to the carbon # CG (for ASN) or CD (for GLN). bound_atoms = [] for bond in ct.atom[self.nitrogen].bond: if bond.atom2.atomic_number != 1 and bond.order != 0: bound_atoms.append(bond.atom2.pdbname) if len(bound_atoms) > 1 or bound_atoms[0] not in [' CG ', ' CD ']: report(LOG_EXTRA, 'Rejecting ' + self.name + ' due to covalent bond.') self.valid = False return if 's_pa_state' in ct.atom[self.carbon].property \ and ct.atom[self.carbon].property['s_pa_state'] == 'Flip': self.current_state = 1 self.initial_state = 1 else: self.current_state = 0 self.initial_state = 0 return
[docs] def pre_treat_1(self, ct): ct.atom[self.oxygen].atomic_number = 7 ct.atom[self.oxygen].formal_charge = 1 ct.atom[self.oxygen].atom_type = 25 return
[docs] def pre_treat_2(self, ct): residue_atoms = ct.getResidueAtoms(self.oxygen) self.hydrogen1 = None self.hydrogen1alt = None self.hydrogen2 = None self.hydrogen2alt = None for hyd in residue_atoms: if hyd.atomic_number != 1: continue for heavy in hyd.bonded_atoms: bond = ct.getBond(heavy, hyd) if bond.order == 0: continue if int(heavy) == self.nitrogen: angle = ct.measure(hyd, self.nitrogen, self.carbon, self.oxygen) if abs(angle) > 90.0 and self.hydrogen1 is None: self.hydrogen1 = int(hyd) elif self.hydrogen2 is None: self.hydrogen2 = int(hyd) else: self.hydrogen1 = int(hyd) hyd.property['i_pa_atomindex'] = int(hyd) elif int(heavy) == self.oxygen: angle = ct.measure(hyd, self.oxygen, self.carbon, self.nitrogen) if abs(angle) > 90.0 and self.hydrogen1alt is None: self.hydrogen1alt = int(hyd) elif self.hydrogen2alt is None: self.hydrogen2alt = int(hyd) else: self.hydrogen1alt = int(hyd) hyd.property['i_pa_atomindex'] = int(hyd) if self.current_state == 1 and not self.locked: # Needs to be flipped back, for bookkeeping to be consistent. atoms_to_swap = [(self.oxygen, self.nitrogen), (self.hydrogen1, self.hydrogen1alt), (self.hydrogen2, self.hydrogen2alt)] for atoms in atoms_to_swap: self.swap_atoms(ct, atoms[0], atoms[1]) self.current_state = 0 self.treated = True return
[docs] def enumerate_states(self, ct, acceptors, donors, pH, do_flips=True, include_initial=False): if do_flips: self.nstates = 2 self.state_names = ['No Flip', 'Flip'] else: self.nstates = 1 self.state_names = ['No Flip'] return
[docs] def assign_state(self, ct, istate, add_labels=True, label_pkas=False, state_gaps=None, verbose=False): if not self.treated: self.pre_treat(ct) if istate != self.current_state: atoms_to_swap = [(self.oxygen, self.nitrogen), (self.hydrogen1, self.hydrogen1alt), (self.hydrogen2, self.hydrogen2alt)] for atoms in atoms_to_swap: self.swap_atoms(ct, atoms[0], atoms[1]) ct.atom[self.oxygen].formal_charge = 0 ct.atom[self.oxygen].atomic_number = 8 ct.atom[self.oxygen].atom_type = 15 ct.atom[self.carbon].label_format = "%UT" ct.atom[self.carbon].label_color = label_color self.assign_state_gap( ct.atom[self.carbon], state_gaps, report_gaps=verbose) if istate == 1: if add_labels: ct.atom[self.carbon].label_user_text = 'Flip' ct.atom[self.carbon].property['s_pa_state'] = 'Flip' else: if add_labels: ct.atom[self.carbon].label_user_text = '' ct.atom[self.carbon].property['s_pa_state'] = 'No Flip' self.current_state = istate self.treated = False return ([self.hydrogen1alt, self.hydrogen2alt])
[docs] def update_atom_indices(self, ct, new_indices): # Gotta be a better way... self.carbon = self.get_new_index(ct, self.carbon, new_indices) self.oxygen = self.get_new_index(ct, self.oxygen, new_indices) self.nitrogen = self.get_new_index(ct, self.nitrogen, new_indices) self.hydrogen1 = self.get_new_index(ct, self.hydrogen1, new_indices) self.hydrogen1alt = self.get_new_index(ct, self.hydrogen1alt, new_indices) self.hydrogen2 = self.get_new_index(ct, self.hydrogen2, new_indices) self.hydrogen2alt = self.get_new_index(ct, self.hydrogen2alt, new_indices) return
[docs] def get_heavies(self): return [self.oxygen, self.nitrogen]
[docs] def get_state_sites(self, ct, istate): if istate == 0: return ([self.oxygen], [(self.nitrogen, self.hydrogen1), (self.nitrogen, self.hydrogen2)], [], 0.0) elif istate == 1: return ([self.nitrogen], [(self.oxygen, self.hydrogen1alt), (self.oxygen, self.hydrogen2alt)], [], 0.0) return
[docs] def get_view_atoms(self): return [self.carbon, self.oxygen, self.nitrogen]
[docs] def get_penalty(self, istate): return 0.0
[docs] def get_adjustable_atoms(self): return [self.oxygen, self.nitrogen, self.hydrogen1, self.hydrogen2]
[docs] class histidine_changeable(changeable): """ Imidazole group of Histidine residues. """ asl = '((res.ptype \"HIS \",\"HID \",\"HIE \",\"HIP \")) AND ((atom.ptype \" CG \"))'
[docs] def __init__(self, ct, iatom): self.pka = None self.index = 0 self.name = '' self.nstates = 0 self.num_user_states = 0 self.state_names = [] self.penalties = [] self.CG = None self.ND1 = None self.HD1 = None self.NE2 = None self.HE2 = None self.CD2 = None self.HD2 = None self.CE1 = None self.HE1 = None self.treated = False self.locked = False self.valid = True self.name = self.get_residue_name(ct, iatom) self.type = 'HISTIDINE' # Find hydrogen atoms: residue_atoms = ct.getResidueAtoms(iatom) for atom in residue_atoms: name = atom.pdbname.strip() if name in self.__dict__: self.__dict__[name] = int(atom) # Catch non-standard hydrogen names. elif atom.atomic_number == 1: for heavy in atom.bonded_atoms: bond = ct.getBond(heavy, atom) if bond.order == 0: continue if int(heavy) == self.ND1: self.HD1 = int(atom) elif int(heavy) == self.NE2: self.HE2 = int(atom) elif int(heavy) == self.CD2: self.HD2 = int(atom) elif int(heavy) == self.CE1: self.HE1 = int(atom) if self.CG is None or self.ND1 is None or self.NE2 is None or self.CD2 is None or self.CE1 is None: report(LOG_EXTRA, 'Rejecting ' + self.name + ' due to missing atoms.') self.valid = False return # Check for something covalently bound (outside of the ring): allowed_bonds = { self.ND1: {' CG ', ' CE1'}, self.NE2: {' CE1', ' CD2'}, self.CE1: {' ND1', ' NE2'}, self.CD2: {' NE2', ' CG '}, } for heavy, expected_names in allowed_bonds.items(): bound_names = [] for bond in ct.atom[heavy].bond: if bond.atom2.atomic_number != 1 and bond.order != 0: bound_names.append(bond.atom2.pdbname) if len(bound_names) > 2 or set(bound_names) != expected_names: report(LOG_EXTRA, 'Rejecting ' + self.name + ' due to covalent bond.') self.valid = False return flip = 0 protonate = 0 flip_tag = re.compile('Flip') if 's_pa_state' in ct.atom[self.CG].property and \ flip_tag.match(ct.atom[self.CG].property['s_pa_state']): flip = 1 else: flip = 0 if self.HD1 is not None: if self.HE2 is not None: protonation = 2 # HIP else: protonation = 0 # HID else: protonation = 1 # HIE self.current_state = protonation + flip * 3 self.initial_state = protonation + flip * 3 return
[docs] def pre_treat_1(self, ct): ct.atom[self.ND1].formal_charge = 1 ct.atom[self.NE2].formal_charge = 0 ct.getBond(self.ND1, self.CE1).type = structure.BondType.Double ct.getBond(self.CD2, self.NE2).type = structure.BondType.Single ct.getBond(self.CE1, self.NE2).type = structure.BondType.Single ct.getBond(self.CG, self.CD2).type = structure.BondType.Double
[docs] def pre_treat_2(self, ct): residue_atoms = ct.getResidueAtoms(self.CG) for hyd in residue_atoms: if hyd.atomic_number != 1: continue for heavy in hyd.bonded_atoms: bond = ct.getBond(heavy, hyd) if bond.order == 0: continue if int(heavy) == self.ND1: self.HD1 = int(hyd) hyd.property['i_pa_atomindex'] = int(hyd) elif int(heavy) == self.NE2: self.HE2 = int(hyd) hyd.property['i_pa_atomindex'] = int(hyd) if self.current_state > 2 and not self.locked: # Needs to be flipped back, for bookkeeping to be consistent. atoms_to_swap = [(self.ND1, self.CD2), (self.HD1, self.HD2), (self.NE2, self.CE1), (self.HE2, self.HE1)] for atoms in atoms_to_swap: self.swap_atoms(ct, atoms[0], atoms[1]) self.current_state -= 3 self.treated = True return
[docs] def enumerate_states(self, ct, acceptors, donors, pH, do_flips=True, include_initial=False): if do_flips: self.nstates = 6 self.state_names = [ 'HID', 'HIE', 'HIP', 'Flip HID', 'Flip HIE', 'Flip HIP' ] else: self.nstates = 3 self.state_names = ['HID', 'HIE', 'HIP'] if pH is None: self.pH_penalties = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] elif pH in [pH_vlow, pH_low]: self.pH_penalties = [4.0, 4.0, 0.0, 4.0, 4.0, 0.0] else: self.pH_penalties = [0.0, 0.0, 4.0, 0.0, 0.0, 4.0] self.contact_penalties = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] for iatom in [self.HD1, self.HD2, self.HE1, self.HE2]: assert iatom is not None for iacceptor in acceptors: assert iacceptor is not None if ct.measure(iatom, iacceptor) < 2.1: formal_charge = 0.0 if ct.atom[iacceptor].formal_charge < 0.0: formal_charge = ct.atom[iacceptor].formal_charge # Check if it's a carboxylate. elif ct.atom[iacceptor].atomic_number == 8: connected_atoms = analyze.evaluate_asl( ct, 'not (atom.num %d) and withinbonds 2 (atom.num %d)' % (iacceptor, iacceptor)) for jatom in connected_atoms: if ct.atom[jatom].atomic_number == 8 and ct.atom[jatom].formal_charge < 0.0: formal_charge = ct.atom[jatom].formal_charge if iatom == self.HD1: report(LOG_DEBUG, self.name + ' : HD1 is near ' + str(ct.atom[iacceptor].resnum) + ':' + ct.atom[iacceptor].pdbname) self.contact_penalties[1] += 10.0 self.contact_penalties[3] += 10.0 self.contact_penalties[4] += 10.0 self.contact_penalties[5] += 10.0 if iatom == self.HD2: report(LOG_DEBUG, self.name + ' : HD2 is near ' + str(ct.atom[iacceptor].resnum) + ':' + ct.atom[iacceptor].pdbname) self.contact_penalties[0] += 10.0 self.contact_penalties[1] += 10.0 self.contact_penalties[2] += 10.0 self.contact_penalties[4] += 10.0 if iatom == self.HE1: report(LOG_DEBUG, self.name + ' : HE1 is near ' + str(ct.atom[iacceptor].resnum) + ':' + ct.atom[iacceptor].pdbname) self.contact_penalties[0] += 10.0 self.contact_penalties[1] += 10.0 self.contact_penalties[2] += 10.0 self.contact_penalties[3] += 10.0 if iatom == self.HE2: report(LOG_DEBUG, self.name + ' : HE2 is near ' + str(ct.atom[iacceptor].resnum) + ':' + ct.atom[iacceptor].pdbname) self.contact_penalties[0] += 10.0 self.contact_penalties[3] += 10.0 self.contact_penalties[4] += 10.0 self.contact_penalties[5] += 10.0 # Only want to enforce this rule if not using PropKa. if (formal_charge < 0.0 or ct.atom[iacceptor].pdbname in [ ' OD1', ' OD2', ' OE1', ' OE2' ]) and (pH is not None): report(LOG_DEBUG, self.name + ' salt-bridged to ' + ct.atom[iacceptor].pdbres + str(ct.atom[iacceptor].resnum)) self.contact_penalties[0] += 10.0 self.contact_penalties[1] += 10.0 self.contact_penalties[3] += 10.0 self.contact_penalties[4] += 10.0 break for donor in donors: # ASP and GLU can also be in donors, if they're being predicted. if ct.atom[donor[0]].pdbres in [ "ASP ", "GLU " ] and ct.atom[donor[0]].pdbname in [ ' OD1', ' OD2', ' OE1', ' OE2' ]: # Must be in contact, and we aren't using PropKa. if ct.measure(iatom, donor[0]) < 2.0 and pH is not None: report(LOG_DEBUG, self.name + ' salt-bridged to ' + ct.atom[donor[0]].pdbres + str(ct.atom[donor[0]].resnum)) self.contact_penalties[0] += 10.0 self.contact_penalties[1] += 10.0 self.contact_penalties[3] += 10.0 self.contact_penalties[4] += 10.0 # Metal will force a particular state, which overrides all else. if donor[0] == donor[1]: if (ct.atom[donor[0]].atomic_number not in [ 7, 8 ]) and (ct.measure(iatom, donor[0]) < 2.5): if iatom == self.HD1: self.contact_penalties = [ 10.0, 0.0, 10.0, 10.0, 10.0, 10.0 ] elif iatom == self.HD2: self.contact_penalties = [ 10.0, 10.0, 10.0, 10.0, 0.0, 10.0 ] elif iatom == self.HE1: self.contact_penalties = [ 10.0, 10.0, 10.0, 0.0, 10.0, 10.0 ] elif iatom == self.HE2: self.contact_penalties = [ 0.0, 10.0, 10.0, 10.0, 10.0, 10.0 ] report(LOG_DEBUG, self.name + ' Metal ion detected, restricting state.') break self.penalties = [] for i in range(6): self.penalties.append( self.pH_penalties[i] + self.contact_penalties[i]) report(LOG_DEBUG, self.name + ' Penalties = ' + str(self.penalties)) return
[docs] def lock_protonation(self): if self.initial_state in [2, 5]: self.contact_penalties = [ 1000.0, 1000.0, 0.0, 1000.0, 1000.0, 0.0 ] else: self.contact_penalties = [0.0, 0.0, 1000.0, 0.0, 0.0, 1000.0] self.penalties = [ self.pH_penalties[i] + self.contact_penalties[i] for i in range(6) ] return
[docs] def assign_state(self, ct, istate, add_labels=True, label_pkas=False, state_gaps=None, verbose=False): if not self.treated: self.pre_treat(ct) if (istate > 2 and self.current_state <= 2) or \ (istate <= 2 and self.current_state > 2): atoms_to_swap = [(self.ND1, self.CD2), (self.HD1, self.HD2), (self.NE2, self.CE1), (self.HE2, self.HE1)] for atoms in atoms_to_swap: self.swap_atoms(ct, atoms[0], atoms[1]) protonation = istate - (old_div(istate, 3)) * 3 atoms_to_delete = [] protonation_name = '' if protonation == 0: ct.atom[self.ND1].formal_charge = 0 ct.getBond(self.ND1, self.CE1).type = structure.BondType.Single ct.getBond(self.NE2, self.CE1).type = structure.BondType.Double protonation_name = 'HIS' atoms_to_delete = [self.HE2] elif protonation == 1: ct.atom[self.ND1].formal_charge = 0 protonation_name = 'HIE' atoms_to_delete = [self.HD1] elif protonation == 2: protonation_name = 'HIP' ct.atom[self.CG].label_format = "%UT" ct.atom[self.CG].label_color = label_color if istate > 2: prot_label = 'Flip ' + protonation_name ct.atom[self.CG].property[ 's_pa_state'] = 'Flip ' + protonation_name else: prot_label = protonation_name ct.atom[self.CG].property['s_pa_state'] = protonation_name self.assign_state_gap( ct.atom[self.CG], state_gaps, report_gaps=verbose) if add_labels: label = prot_label else: label = '' if self.pka is not None and label_pkas: label += ' %.2f' % self.pka ct.atom[self.CG].property[pka_property] = self.pka ct.atom[self.CG].label_user_text = label residue_atoms = ct.getResidueAtoms(self.CG) for atom in residue_atoms: atom.pdbres = protonation_name self.current_state = istate self.treated = False return (atoms_to_delete)
[docs] def update_atom_indices(self, ct, new_indices): # Gotta be a better way... self.CG = self.get_new_index(ct, self.CG, new_indices) self.ND1 = self.get_new_index(ct, self.ND1, new_indices) self.HD1 = self.get_new_index(ct, self.HD1, new_indices) self.NE2 = self.get_new_index(ct, self.NE2, new_indices) self.HE2 = self.get_new_index(ct, self.HE2, new_indices) self.CD2 = self.get_new_index(ct, self.CD2, new_indices) self.HD2 = self.get_new_index(ct, self.HD2, new_indices) self.CE1 = self.get_new_index(ct, self.CE1, new_indices) self.HE1 = self.get_new_index(ct, self.HE1, new_indices) return
[docs] def get_heavies(self): return [self.ND1, self.NE2, self.CD2, self.CE1]
[docs] def get_state_sites(self, ct, istate): if istate == 0: return ([self.NE2], [(self.ND1, self.HD1)], [self.HD2, self.HE1], 0.0) elif istate == 1: return ([self.ND1], [(self.NE2, self.HE2)], [self.HD2, self.HE1], 0.0) elif istate == 2: return ([], [(self.ND1, self.HD1), (self.NE2, self.HE2)], [self.HD2, self.HE1], 1) elif istate == 3: return ([self.CE1], [(self.CD2, self.HD2)], [self.HD1, self.HE2], 0.0) elif istate == 4: return ([self.CD2], [(self.CE1, self.HE1)], [self.HD1, self.HE2], 0.0) elif istate == 5: return ([], [(self.CD2, self.HD2), (self.CE1, self.HE1)], [self.HD1, self.HE2], 1) return
[docs] def get_view_atoms(self): return [self.CG, self.ND1, self.NE2, self.CD2, self.CE1]
[docs] def get_penalty(self, istate): return self.penalties[istate]
[docs] def get_adjustable_atoms(self): return [ self.ND1, self.HD1, self.NE2, self.HE2, self.CD2, self.HD2, self.CE1, self.HE1 ]
[docs] def change_pka(self, pka, propka_pH): if pka is None: reference_pka = 6.10 else: reference_pka = pka self.pka = pka # If the pKa is within 0.5 of the pH, let the scoring function # determine the protonation state if math.fabs(propka_pH - reference_pka) <= 0.5: self.pH_penalties = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0] elif propka_pH < reference_pka: self.pH_penalties = [4.0, 4.0, 0.0, 4.0, 4.0, 0.0] else: self.pH_penalties = [0.0, 0.0, 4.0, 0.0, 0.0, 4.0] new_penalties = [] something_changed = False for i in range(6): new_penalties.append( self.pH_penalties[i] + self.contact_penalties[i]) if self.penalties[i] != new_penalties[i]: something_changed = True self.penalties = new_penalties return something_changed
[docs] class carboxyl_changeable(changeable): asl = '(res.ptype \"ASP \",\"ASH \" AND atom.ptype \" CG \") OR (res.ptype \"GLU \",\"GLH \" AND atom.ptype \" CD \")'
[docs] def __init__(self, ct, iatom): self.pka = None self.index = 0 self.name = '' self.nstates = 0 self.num_user_states = 0 self.state_names = [] self.carbon = None self.oxygen1 = None self.oxygen2 = None self.hydrogen1 = None self.hydrogen2 = None self.treated = False self.locked = False self.valid = True self.name = self.get_residue_name(ct, iatom) self.type = 'CARBOXYL' residue_atoms = ct.getResidueAtoms(iatom) for atom in residue_atoms: name = atom.pdbname if (atom.pdbres in ["ASP ", "ASH "] and name == " CG ") or (atom.pdbres in ["GLU ", "GLH "] and name == " CD "): self.carbon = int(atom) elif name in [' OD1', ' OE1']: self.oxygen1 = int(atom) for bonded_atom in atom.bonded_atoms: if bonded_atom.atomic_number == 1: self.hydrogen1 = int(bonded_atom) elif name in [' OD2', ' OE2']: self.oxygen2 = int(atom) for bonded_atom in atom.bonded_atoms: if bonded_atom.atomic_number == 1: self.hydrogen2 = int(bonded_atom) if self.carbon is None or self.oxygen1 is None or self.oxygen2 is None: report(LOG_EXTRA, 'Rejecting ' + self.name + ' due to missing atoms.') self.valid = False return # Check for something covalently bound. for oxygen in [self.oxygen1, self.oxygen2]: for bond in ct.atom[oxygen].bond: if bond.atom2.atomic_number != 1 and bond.atom2.pdbname not in [ ' CG ', ' CD ' ] and bond.order != 0: report( LOG_EXTRA, 'Rejecting ' + self.name + ' due to covalent bond.') self.valid = False return if self.hydrogen1 is not None: self.current_state = 1 self.initial_state = 1 elif self.hydrogen2 is not None: self.current_state = 2 self.initial_state = 2 else: self.current_state = 0 self.initial_state = 0 return
[docs] def pre_treat_1(self, ct): ct.atom[self.oxygen1].formal_charge = 1 ct.atom[self.oxygen2].formal_charge = 0 ct.getBond(self.carbon, self.oxygen1).type = structure.BondType.Double ct.getBond(self.carbon, self.oxygen2).type = structure.BondType.Single
[docs] def pre_treat_2(self, ct): residue_atoms = ct.getResidueAtoms(self.carbon) for hyd in residue_atoms: if hyd.atomic_number != 1: continue for heavy in hyd.bonded_atoms: bond = ct.getBond(heavy, hyd) if bond.order == 0: continue if int(heavy) == self.oxygen1: self.hydrogen1 = int(hyd) hyd.property['i_pa_atomindex'] = int(hyd) elif int(heavy) == self.oxygen2: self.hydrogen2 = int(hyd) hyd.property['i_pa_atomindex'] = int(hyd) self.treated = True return
[docs] def enumerate_states(self, ct, acceptors, donors, pH, do_flips=True, include_initial=False): self.contact_penalties = [0.0, 0.0, 0.0] self.nstates = 3 self.state_names = ['Charged', 'Neutral 1', 'Neutral 2'] if pH is None: self.pH_penalties = [0.0, 0.0, 0.0] elif pH in [pH_vlow]: self.pH_penalties = [5.0, 0.0, 0.0] else: self.pH_penalties = [0.0, 5.0, 5.0] for iatom in [self.hydrogen1, self.hydrogen2]: # If paired with another ASP/GLU, remove protonation penalties (but don't force protonation) for donor in donors: if ct.atom[donor[0]].pdbname in [ ' OD1', ' OD2', ' OE1', ' OE2' ] and ct.atom[donor[0]].resnum != ct.atom[iatom].resnum: if ct.measure(iatom, donor[0]) < 2.0: if iatom == self.hydrogen1: report(LOG_DEBUG, self.name + ' : hydrogen1 is near ' + str(ct.atom[donor[0]].resnum) + ':' + ct.atom[donor[0]].pdbname) self.contact_penalties = [0.0, 0.0, 0.0] elif iatom == self.hydrogen2: report(LOG_DEBUG, self.name + ' : hydrogen2 is near ' + str(ct.atom[donor[0]].resnum) + ':' + ct.atom[donor[0]].pdbname) self.contact_penalties = [0.0, 0.0, 0.0] # Check for a metal elif donor[0] == donor[1]: if (ct.atom[donor[0]].atomic_number not in [7, 8]) and ( (ct.measure(self.oxygen1, donor[0]) < 3.0) or (ct.measure(self.oxygen2, donor[0]) < 3.0)): self.contact_penalties = [0.0, 10.0, 10.0] report(LOG_DEBUG, self.name + ' Metal ion detected, restricting state.') break for iacceptor in acceptors: if ct.measure(iatom, iacceptor) < 2.0: if iatom == self.hydrogen1: report(LOG_DEBUG, self.name + ' : hydrogen1 is near ' + str(ct.atom[iacceptor].resnum) + ':' + ct.atom[iacceptor].pdbname) self.contact_penalties[2] += 10.0 elif iatom == self.hydrogen2: report(LOG_DEBUG, self.name + ' : hydrogen2 is near ' + str(ct.atom[iacceptor].resnum) + ':' + ct.atom[iacceptor].pdbname) self.contact_penalties[1] += 10.0 break self.penalties = [] for i in range(3): self.penalties.append( self.pH_penalties[i] + self.contact_penalties[i]) report(LOG_DEBUG, self.name + ' Penalties = ' + str(self.penalties)) return
[docs] def lock_protonation(self): if self.initial_state == 0: self.contact_penalties = [0.0, 1000.0, 1000.0] else: self.contact_penalties = [1000.0, 0.0, 0.0] self.penalties = [ self.pH_penalties[i] + self.contact_penalties[i] for i in range(3) ] return
[docs] def assign_state(self, ct, istate, add_labels=True, label_pkas=False, state_gaps=None, verbose=False): if not self.treated: self.pre_treat(ct) atoms_to_delete = [] ct.atom[self.carbon].label_format = "%UT" ct.atom[self.carbon].label_color = label_color if (istate == 1 and self.current_state != 1) or (istate != 1 and self.current_state == 1): atoms_to_swap = [(self.oxygen1, self.oxygen2), (self.hydrogen1, self.hydrogen2)] for atoms in atoms_to_swap: self.swap_atoms(ct, atoms[0], atoms[1]) # Unlike His/Asn/Gln, we aren't preserving the flip info, so update these. self.oxygen1, self.oxygen2 = self.oxygen2, self.oxygen1 self.hydrogen1, self.hydrogen2 = self.hydrogen2, self.hydrogen1 if istate == 0: protonated = False ct.atom[self.oxygen1].formal_charge = 0 ct.atom[self.oxygen2].formal_charge = -1 ct.getBond(self.carbon, self.oxygen1).type = structure.BondType.Double ct.getBond(self.carbon, self.oxygen2).type = structure.BondType.Single atoms_to_delete = [self.hydrogen1, self.hydrogen2] prot_label = '' elif istate == 1: protonated = True ct.atom[self.oxygen1].formal_charge = 0 ct.atom[self.oxygen2].formal_charge = 0 ct.atom[self.carbon].addBond(self.oxygen1, 1) ct.atom[self.carbon].addBond(self.oxygen2, 2) atoms_to_delete = [self.hydrogen2] prot_label = 'Neutral' elif istate == 2: protonated = True ct.atom[self.oxygen1].formal_charge = 0 ct.atom[self.oxygen2].formal_charge = 0 ct.atom[self.carbon].addBond(self.oxygen1, 2) ct.atom[self.carbon].addBond(self.oxygen2, 1) atoms_to_delete = [self.hydrogen1] prot_label = 'Neutral' if add_labels: label = prot_label else: label = '' if self.pka is not None and label_pkas: label += ' %.2f' % self.pka ct.atom[self.carbon].property[pka_property] = self.pka ct.atom[self.carbon].label_user_text = label # Set the residue name. if ct.atom[self.carbon].pdbres[0:2] == 'AS': if protonated: protonation_name = 'ASH ' else: protonation_name = 'ASP ' if ct.atom[self.carbon].pdbres[0:2] == 'GL': if protonated: protonation_name = 'GLH ' else: protonation_name = 'GLU ' self.assign_state_gap( ct.atom[self.carbon], state_gaps, report_gaps=verbose) residue_atoms = ct.getResidueAtoms(self.carbon) for atom in residue_atoms: atom.pdbres = protonation_name self.current_state = istate self.treated = False return atoms_to_delete
[docs] def update_atom_indices(self, ct, new_indices): # Gotta be a better way... self.carbon = self.get_new_index(ct, self.carbon, new_indices) self.oxygen1 = self.get_new_index(ct, self.oxygen1, new_indices) self.oxygen2 = self.get_new_index(ct, self.oxygen2, new_indices) self.hydrogen1 = self.get_new_index(ct, self.hydrogen1, new_indices) self.hydrogen2 = self.get_new_index(ct, self.hydrogen2, new_indices) return
[docs] def get_heavies(self): return [self.oxygen1, self.oxygen2]
[docs] def get_state_sites(self, ct, istate): if istate == 0: return ([self.oxygen1, self.oxygen2], [], [], 0.0) elif istate == 1: return ([self.oxygen2], [(self.oxygen1, self.hydrogen1)], [], 0.0) elif istate == 2: return ([self.oxygen1], [(self.oxygen2, self.hydrogen2)], [], 0.0) return
[docs] def get_view_atoms(self): return [self.carbon, self.oxygen1, self.oxygen2]
[docs] def get_penalty(self, istate): return self.penalties[istate]
[docs] def get_adjustable_atoms(self): return [self.oxygen1, self.oxygen2]
[docs] def change_pka(self, pka, propka_pH): if pka is None: reference_pka = 4.00 else: reference_pka = pka self.pka = pka # propka sometimes has some very large pKas for ASP and GLU # so we never want to force carboxlyate to be protonated. If the # propKa is lower than the reference then we should (almost) # always deprotonate. If the pKa is larger than the reference # then we should allow the hbonding pattern to determine whether # the system should be protonated. Unlike other residues, a # very large pKa will never (almost) force a protonated state. if (reference_pka >= propka_pH): self.pH_penalties = [0.0, 0.0, 0.0] else: self.pH_penalties = [0.0, 4.0, 4.0] new_penalties = [] something_changed = False for i in range(3): new_penalties.append( self.pH_penalties[i] + self.contact_penalties[i]) if self.penalties[i] != new_penalties[i]: something_changed = True self.penalties = new_penalties return something_changed
[docs] class rotatable_changeable(changeable): asl = '((res.ptype \"CYS \",\"CYT \") AND (atom.ptype \" SG \") AND (atom.formal -1)) OR ((res.ptype \"TYR \") AND (atom.ptype \" OH \") AND (atom.formal -1)) OR (( atom.ele H AND not /C0-H0/ AND not /N0-H0/ ) AND NOT (res.ptype \"HOH\",\"DOD\",\"SPC\",\"ASH\",\"GLH\",\"ASP\",\"GLU\" ))'
[docs] def __init__(self, ct, iatom): self.pka = None self.index = 0 self.nstates = 0 self.num_user_states = 0 self.state_names = [] self.dihedrals = [] self.valid = True self.name = self.get_atom_name(ct, iatom) self.type = 'ROTATABLE' self.ionizable = False self.current_state = 0 self.initial_state = 0 # If it's a deprotonated CYS/TYR if ct.atom[iatom].pdbname == " SG " or ( ct.atom[iatom].pdbres == "TYR " and ct.atom[iatom].pdbname == " OH "): # Abort if it's coordinated to a metal for heavy_bond in ct.atom[iatom].bond: if heavy_bond.order == 0: report(LOG_EXTRA, 'Rejecting ' + self.name + ' - coordinated to a metal.') self.valid = False return # Set state to -1 now, and map it to the last state later in enumerate_states. self.current_state = -1 self.initial_state = -1 ct.atom[iatom].formal_charge = 0 build.add_hydrogens(ct, atom_list=[iatom]) for bonded_atom in ct.atom[iatom].bonded_atoms: if bonded_atom.atomic_number == 1: self.hydrogen = int(bonded_atom) break else: self.hydrogen = iatom try: dihedral_atoms = self.get_dihedral_atoms(ct, self.hydrogen) except: self.valid = False return if len(dihedral_atoms) < 4: report( LOG_EXTRA, 'Rejecting ' + self.name + ' due to lack of rotatability.') self.valid = False return if ct.atom[dihedral_atoms[1]].bond_total != 2: report( LOG_EXTRA, 'Rejecting ' + self.name + ' - parent has too many bonds.') self.valid = False return self.parent = dihedral_atoms[1] self.parentparent = dihedral_atoms[2] self.parentparentparent = dihedral_atoms[3] self.treated = True self.locked = False nbonds = ct.atom[self.parentparent].bond_total if nbonds == 3: self.periodicity = 2 elif nbonds == 4: self.periodicity = 3 # Whether or not the atom at a given dihedral position is heavy. self.eclipse_is_heavy = [] # Find out which are heavy/hydrogen. for atom in ct.atom[self.parentparent].bonded_atoms: if atom.atomic_number == 1: heavy = False else: heavy = True if int(atom) == self.parentparentparent: # 0 position. self.eclipse_is_heavy.append((0.0, heavy)) elif int(atom) != self.parent: dihedral = ct.measure(self.parentparentparent, self.parentparent, self.parent, atom) self.eclipse_is_heavy.append((dihedral, heavy)) else: self.periodicity = 0 if ct.atom[self.parent].pdbres in ['CYS ', 'CYT ', 'TYR ']: self.ionizable = True return
[docs] def pre_treat_1(self, ct): ct.atom[self.parent].formal_charge = 0 return
[docs] def pre_treat_2(self, ct): residue_atoms = ct.getResidueAtoms(self.parent) for hyd in residue_atoms: if hyd.atomic_number != 1: continue for heavy in hyd.bonded_atoms: bond = ct.getBond(heavy, hyd) if bond.order == 0: continue if int(heavy) == self.parent: self.hydrogen = int(hyd) hyd.property['i_pa_atomindex'] = int(hyd) self.treated = True return
[docs] def enumerate_states(self, ct, acceptors, donors, pH, do_flips=True, include_initial=False): distance_check = self.max_hbond_distance if include_initial: # Include initial state. dihedral = ct.measure(self.parentparentparent, self.parentparent, self.parent, self.hydrogen) self.dihedrals.append(dihedral) self.state_names.append('Initial') self.nstates = 1 initial_nstates = self.nstates # Keep trying until at least one state is found, increasing the distance each time. while self.nstates == initial_nstates: for (atom1, atom2) in [ (acceptor, None) for acceptor in acceptors ] + donors: #for atom in (acceptors + [donor[0] for donor in donors]): if atom1 == self.parent or atom1 == self.parentparent: continue # Trim any states pointing head-to-head with an N-H. #if (ct.atom[atom1].atomic_number == 7) and (atom2 != None): # angle = ct.measure( self.parent, atom1, atom2 ) # if angle < 90: # continue distance = ct.measure(self.parent, atom1) if distance <= distance_check: angle = ct.measure(self.parentparent, self.parent, atom1) if (angle <= self.hbond_heavy_max_angle) and ( angle >= self.hbond_heavy_min_angle): # Measure what the angle to the atom is. dihedral = ct.measure(self.parentparentparent, self.parentparent, self.parent, atom1) #if ct.atom[self.hydrogen].pdbres != "TYR " or math.fabs(dihedral) <= 40.0 or math.fabs(dihedral) >= 140.0: if True: self.nstates += 1 self.dihedrals.append(dihedral) #self.state_names.append( "To " + self.get_atom_name(ct, atom1) + " " + str(self.nstates) ) self.state_names.append("Orientation") used_donor_heavies = set() for pair in donors: if pair[0] == self.parent or pair[0] == self.parentparent: continue if pair[0] in used_donor_heavies: continue used_donor_heavies.add(pair[0]) distance = ct.measure(self.parent, pair[0]) if distance <= distance_check: # Measure what the angle to the atom is (should it be from the heavy or the H?). dihedral = ct.measure(self.parentparentparent, self.parentparent, self.parent, pair[0]) # Add +/-120 to it. dihedral += 120.0 if dihedral > 180.0: dihedral -= 360.0 elif dihedral < -180.0: dihedral += 360.0 #if ct.atom[self.hydrogen].pdbres != "TYR " or math.fabs(dihedral) <= 40.0 or math.fabs(dihedral) >= 140.0: if True: self.nstates += 1 self.dihedrals.append(dihedral) #self.state_names.append( "From " + self.get_atom_name(ct, pair[0]) + " " + str(self.nstates) ) self.state_names.append("Orientation") dihedral -= 240.0 if dihedral > 180.0: dihedral -= 360.0 elif dihedral < -180.0: dihedral += 360.0 #if ct.atom[self.hydrogen].pdbres != "TYR " or math.fabs(dihedral) <= 40.0 or math.fabs(dihedral) >= 140.0: if True: self.nstates += 1 self.dihedrals.append(dihedral) #self.state_names.append( "From " + self.get_atom_name(ct, pair[0]) + " " + str(self.nstates) ) self.state_names.append("Orientation") distance_check += 0.25 if distance_check >= 50.0: self.nstates += 1 self.dihedrals.append(0.0) #self.state_names.append( "Default" ) self.state_names.append("Orientation") break # Add one state that doesn't touch anything, just in case. if ct.atom[self.hydrogen].pdbres != "TYR ": dihedral_save = ct.measure(self.parentparentparent, self.parentparent, self.parent, self.hydrogen) for idihedral in range(36): ct.adjust( float(idihedral) * 10.0, self.parentparentparent, self.parentparent, self.parent, self.hydrogen) nclashes = analyze.evaluate_asl( ct, '(not (atom.num %d) and within 2 (atom.num %d)) and not (withinbonds 2 (atom.num %d))' % (self.hydrogen, self.hydrogen, self.hydrogen)) if len(nclashes) == 0: self.nstates += 1 self.dihedrals.append(float(idihedral) * 10.0) self.state_names.append("Default") break ct.adjust(dihedral_save, self.parentparentparent, self.parentparent, self.parent, self.hydrogen) # Trim any disallowed states. for istate in range(len(self.dihedrals) - 1, -1, -1): idihedral = self.dihedrals[istate] iname = self.state_names[istate] # Don't trim original state if (istate == 0) and include_initial: continue if ct.atom[self.hydrogen].pdbres == "TYR " and math.fabs( idihedral) > 30.0 and math.fabs(idihedral) < 150.0: if math.fabs(idihedral) < 40.0: report(LOG_FULL_DEBUG, 'Adjusting state ' + iname + ' (' + str(idihedral) + ') to be more planar.') if idihedral < 0.0: self.dihedrals[istate] = -30.0 report(LOG_FULL_DEBUG, ' Adjusted to -30.0') else: self.dihedrals[istate] = 30.0 report(LOG_FULL_DEBUG, ' Adjusted to 30.0') elif math.fabs(idihedral) > 140.0: report(LOG_FULL_DEBUG, 'Adjusting state ' + iname + ' (' + str(idihedral) + ') to be more planar.') if idihedral < 0.0: self.dihedrals[istate] = -150.0 report(LOG_FULL_DEBUG, ' Adjusted to -150.0') else: self.dihedrals[istate] = 150.0 report(LOG_FULL_DEBUG, ' Adjusted to 150.0') else: report(LOG_FULL_DEBUG, 'Removing state ' + iname + ' (' + str(idihedral) + ') - too far out of plane.') self.dihedrals.remove(idihedral) self.state_names.remove(iname) self.nstates -= 1 elif self.periodicity == 3: for (angle, heavy) in self.eclipse_is_heavy: if heavy: cutoff = 30.0 else: cutoff = 10.0 if math.fabs(idihedral - angle) < cutoff: report( LOG_FULL_DEBUG, 'Removing state ' + iname + ' (' + str(idihedral) + ') - too close to eclipsed.') try: self.dihedrals.remove(idihedral) self.state_names.remove(iname) self.nstates -= 1 except: report( LOG_NONE, 'ERROR: Failure removing state for ' + self.name + ' - Please check this residue for structural inconsistencies.' ) if ct.atom[self.hydrogen].pdbres == "TYR ": self.nstates += 1 self.dihedrals.append(0.0) self.state_names.append("Default 1") self.nstates += 1 self.dihedrals.append(180.0) self.state_names.append("Default 2") # Add standard staggered states. elif self.periodicity == 3: self.nstates += 1 self.dihedrals.append(60.0) self.state_names.append("Stagger 1") self.nstates += 1 self.dihedrals.append(180.0) self.state_names.append("Stagger 2") self.nstates += 1 self.dihedrals.append(300.0) self.state_names.append("Stagger 3") # Trim any redundant states. for istate in range(len(self.dihedrals) - 1, 0, -1): idihedral = self.dihedrals[istate] iname = self.state_names[istate] for jstate in range(istate - 1, -1, -1): jdihedral = self.dihedrals[jstate] jname = self.state_names[jstate] if abs(idihedral - jdihedral) < 10.0: report(LOG_FULL_DEBUG, 'Removing state ' + iname + ' (' + str(idihedral) + ') - too similar to ' + jname + '(' + str(idihedral) + ')') self.dihedrals.remove(idihedral) self.state_names.remove(iname) self.nstates -= 1 break # Make sure at least one orientation exists. #if self.nstates == 0: # if ct.atom[self.hydrogen].pdbres == "TYR ": # self.dihedrals = [0.0,180.0] # self.state_names = ['Default','Default'] # self.nstates = 2 # else: # self.dihedrals = [180.0] # self.state_names = ['Default'] # self.nstates = 1 # Name the states if include_initial: for istate in range(1, len(self.state_names)): self.state_names[istate] += ' %d/%d' % (istate, self.nstates - 1) else: for istate in range(0, len(self.state_names)): self.state_names[istate] += ' %d/%d' % (istate + 1, self.nstates) if ct.atom[self.parent].pdbres in ['CYS ', 'CYT ']: self.state_names.append('CYT ') self.dihedrals.append(None) self.nstates += 1 elif ct.atom[self.parent].pdbres in ['TYR ']: self.state_names.append('Charged') self.dihedrals.append(None) self.nstates += 1 self.pH_penalties = [] for istate in range(self.nstates): if self.ionizable: # Penalties will come later if using PropKa. if pH is None: self.pH_penalties.append(0.0) continue # Not using PropKa. if self.dihedrals[istate] is None: if pH is None: self.pH_penalties.append(0.0) elif pH == pH_high: self.pH_penalties.append(0.0) else: # Really don't want to deprotonate at normal pH self.pH_penalties.append(200.0) else: if pH is None: self.pH_penalties.append(0.0) elif pH == pH_high: # Really don't want to deprotonate at normal pH self.pH_penalties.append(200.0) else: self.pH_penalties.append(0.0) else: self.pH_penalties.append(0.0) if self.current_state == -1: self.current_state = self.nstates - 1 if self.initial_state == -1: self.initial_state = self.nstates - 1 self.contact_penalties = [0.0 for i in range(self.nstates)] self.penalties = [ self.pH_penalties[i] + self.contact_penalties[i] for i in range(self.nstates) ] return
[docs] def lock_protonation(self): if not self.ionizable: return if self.initial_state == (self.nstates - 1): self.contact_penalties = [1000.0 for i in range(self.nstates)] self.contact_penalties[-1] = 0.0 else: self.contact_penalties = [0.0 for i in range(self.nstates)] self.contact_penalties[-1] = 1000.0 self.penalties = [ self.pH_penalties[i] + self.contact_penalties[i] for i in range(self.nstates) ] return
[docs] def add_current_to_states(self, ct): dihedral = ct.measure(self.parentparentparent, self.parentparent, self.parent, self.hydrogen) self.dihedrals.append(dihedral) self.nstates += 1 self.num_user_states += 1 self.state_names.append("User %d" % self.num_user_states) self.penalties.append(0.0)
[docs] def assign_state(self, ct, istate, add_labels=True, label_pkas=False, state_gaps=None, verbose=False): if not self.treated: self.pre_treat(ct) atoms_to_delete = [] ct.atom[self.parent].label_format = "%UT" ct.atom[self.parent].label_color = label_color if self.dihedrals[istate] is None: ct.atom[self.parent].formal_charge = -1 atoms_to_delete = [self.hydrogen] if ct.atom[self.parent].pdbres in ['CYS ', 'CYT ']: residue_name = 'CYT ' prot_label = 'CYT' elif ct.atom[self.parent].pdbres == 'TYR ': residue_name = 'TYR ' prot_label = 'Charged' else: ct.adjust(self.dihedrals[istate], self.parentparentparent, self.parentparent, self.parent, self.hydrogen) if ct.atom[self.parent].pdbres in ['CYS ', 'CYT ']: residue_name = 'CYS ' elif ct.atom[self.parent].pdbres == 'TYR ': residue_name = 'TYR ' prot_label = '' if add_labels: label = prot_label else: label = '' if self.pka is not None and label_pkas and self.ionizable: label += ' %.2f' % self.pka ct.atom[self.parent].property[pka_property] = self.pka ct.atom[self.parent].label_user_text = label self.assign_state_gap( ct.atom[self.parent], state_gaps, report_gaps=verbose) if self.ionizable: residue_atoms = ct.getResidueAtoms(self.parent) for atom in residue_atoms: atom.pdbres = residue_name self.current_state = istate self.treated = False return atoms_to_delete
[docs] def update_atom_indices(self, ct, new_indices): # Gotta be a better way... self.hydrogen = self.get_new_index(ct, self.hydrogen, new_indices) self.parent = self.get_new_index(ct, self.parent, new_indices) self.parentparent = self.get_new_index(ct, self.parentparent, new_indices) self.parentparentparent = self.get_new_index( ct, self.parentparentparent, new_indices) return
[docs] def get_heavies(self): return [self.parent]
[docs] def get_state_sites(self, ct, istate): if self.dihedrals[istate] is None: return ([self.parent], [], [], 0.0) else: ct.adjust(self.dihedrals[istate], self.parentparentparent, self.parentparent, self.parent, self.hydrogen) return ([], [(self.parent, self.hydrogen)], [], 0.0)
[docs] def get_view_atoms(self): return [self.parent]
[docs] def get_penalty(self, istate): #if self.periodicity == 2: # print self.name + ' ' + self.state_names[istate] + ' ' + str(0.2*(math.sin(math.radians(self.dihedrals[istate])))**4) #elif self.periodicity == 3: # print self.name + ' ' + self.state_names[istate] + ' ' + str(0.2*(math.cos(math.radians(3.0*self.dihedrals[istate]/2.0)))**4) #else: # print self.name + ' ' + self.state_names[istate] + ' ' + str(0.0) #if self.periodicity == 2: # return 0.2*(math.sin(math.radians(self.dihedrals[istate])))**4 #elif self.periodicity == 3: # return 0.2*(math.cos(math.radians(3.0*self.dihedrals[istate]/2.0)))**4 #else: # return 0.0 return self.penalties[istate]
[docs] def get_adjustable_atoms(self): return [self.hydrogen, self.parent]
[docs] def change_pka(self, pka, propka_pH): if pka is None: reference_pka = 15.00 else: reference_pka = pka self.pka = pka if not self.ionizable: return False if reference_pka < propka_pH: ionized = True else: ionized = False new_penalties = [] changed = False for istate, dihedral in enumerate(self.dihedrals): if dihedral is None: if ionized: new_penalties.append( 0.0 + self.contact_penalties[istate]) else: # Really don't want to deprotonate at normal pH new_penalties.append( 200.0 + self.contact_penalties[istate]) else: if ionized: new_penalties.append( 200.0 + self.contact_penalties[istate]) else: new_penalties.append( 0.0 + self.contact_penalties[istate]) if self.penalties[istate] != new_penalties[istate]: changed = True self.penalties = new_penalties return changed
[docs] class amine_changeable(changeable): # Could be used for all primary amines, but for now let's stick to LYS sidechains. asl = '((res.ptype \"LYS \",\"LYN \") AND (atom.ptype \" NZ \"))'
[docs] def __init__(self, ct, iatom): self.pka = None self.index = 0 self.nstates = 0 self.num_user_states = 0 self.state_names = [] self.nitrogen = None self.carbon = None self.hyds = [] self.hyd_coords = [] self.initial_coords = [] self.locked = False self.valid = True self.name = self.get_residue_name(ct, iatom) self.type = 'AMINE' self.initial_coords = [] for atom in ct.atom[iatom].bonded_atoms: if atom.atomic_number == 1: self.hyds.append(int(atom)) elif self.carbon is None: self.carbon = int(atom) else: report(LOG_EXTRA, 'Rejecting ' + self.name + ' due to covalent bond.') self.valid = False return if len(self.hyds) not in [2, 3]: report( LOG_EXTRA, 'Rejecting ' + self.name + ' due to incorrect bound count.') self.valid = False return self.nitrogen = iatom for hyd in self.hyds: self.initial_coords.append(ct.atom[hyd].xyz) self.current_state = 0 self.initial_state = 0 return
[docs] def pre_treat_1(self, ct): ct.atom[self.nitrogen].formal_charge = 1 return
[docs] def pre_treat_2(self, ct): self.hyds = [] for hyd in ct.atom[self.nitrogen].bonded_atoms: if hyd.atomic_number == 1: self.hyds.append(int(hyd)) hyd.property['i_pa_atomindex'] = int(hyd) self.treated = True return
[docs] def enumerate_states(self, ct, acceptors, donors, pH, do_flips=True, sample_neutral_states=False, include_initial=False): """ Generate states for lysines. States are generated by rotating hydrogens for acceptor/donor interactions and by optionally including the neutral state. :param ct: Structure to generate states for :type ct: Structure :param acceptors: List of acceptor atom indices :type acceptors: List[int] :param donors: List of donor atom indices :type donors: List[(int, int)] :param pH: pH of system :type pH: float :param do_flips: Does nothing :type do_flips: bool :param sample_neutral_states: Include neutral states. Since PROPKA's pKa prediction is unreliable for Lys, currently we have no method of confidently assess whether it is neutral. So it's turned off by default. :type sample_neutral_states: bool :param include_initial: Include the initial state of the Lys :type include_initial: bool """ dihedral_atoms = self.get_dihedral_atoms(ct, self.hyds[0]) distance_check = self.max_hbond_distance if include_initial: self.hyd_coords.append(self.initial_coords) self.state_names.append('Initial') self.nstates += 1 initial_nstates = self.nstates while self.nstates == initial_nstates: for (atom1, atom2) in [ (acceptor, None) for acceptor in acceptors ] + donors: if atom1 == self.nitrogen: continue distance = ct.measure(self.nitrogen, atom1) if distance <= distance_check: angle = ct.measure(self.carbon, self.nitrogen, atom1) if (angle <= self.hbond_heavy_max_angle) and ( angle >= self.hbond_heavy_min_angle): dihedral = ct.measure(atom1, dihedral_atoms[1], dihedral_atoms[2], dihedral_atoms[3]) eclipsed = False for eclipse_ang in [0.0, 120.0, 240.0]: delta = math.fabs( math.fabs(dihedral) - eclipse_ang) if delta >= 360.0: delta -= 360.0 if delta < 30.0: eclipsed = True break if not eclipsed: ct.adjust(dihedral, dihedral_atoms[3], dihedral_atoms[2], dihedral_atoms[1], dihedral_atoms[0]) coords = [] for hyd in self.hyds: coords.append(ct.atom[hyd].xyz) self.hyd_coords.append(coords) self.state_names.append("Orientation") self.nstates += 1 distance_check += 0.25 if distance_check > MAX_ORIENT_HYDROGEN_DISTANCE: break # Add indexes to the names if include_initial: for istate in range(1, len(self.state_names)): self.state_names[istate] += ' %d/%d' % (istate, self.nstates - 1) else: for istate in range(len(self.state_names)): self.state_names[istate] += ' %d/%d' % (istate + 1, self.nstates) # Add a standard staggered state. ct.adjust(60.0, dihedral_atoms[3], dihedral_atoms[2], dihedral_atoms[1], dihedral_atoms[0]) coords = [] for hyd in self.hyds: coords.append(ct.atom[hyd].xyz) self.hyd_coords.append(coords) self.state_names.append("Stagger") self.nstates += 1 self.pH_penalties = [] # Protonation penalties for charged forms. for istate in range(self.nstates): self.pH_penalties.append(0.0) # After generating the charged states, now generate 3 neutral # states for each charged state. if sample_neutral_states: first_to_expand = 1 if include_initial else 0 for istate in range(first_to_expand, self.nstates): coords = self.hyd_coords[istate] self.hyd_coords.append([coords[0], coords[1]]) self.state_names.append(self.state_names[istate] + ' LYN1') self.hyd_coords.append([coords[0], coords[2]]) self.state_names.append(self.state_names[istate] + ' LYN2') self.hyd_coords.append([coords[1], coords[2]]) self.state_names.append(self.state_names[istate] + ' LYN3') self.nstates += 3 if pH is None: self.pH_penalties.extend([0.0, 0.0, 0.0]) else: self.pH_penalties.extend([100.0, 100.0, 100.0]) self.contact_penalties = [0.0 for i in range(self.nstates)] self.penalties = [ self.pH_penalties[i] + self.contact_penalties[i] for i in range(self.nstates) ]
[docs] def lock_protonation(self): nhyd_desired = len(self.initial_coords) for istate in range(self.nstates): if len(self.hyd_coords[istate]) == nhyd_desired: self.contact_penalties[istate] = 0.0 else: self.contact_penalties[istate] = 1000.0 self.penalties = [ self.pH_penalties[i] + self.contact_penalties[i] for i in range(self.nstates) ] return
[docs] def assign_state(self, ct, istate, add_labels=True, label_pkas=False, state_gaps=None, verbose=False): if not self.treated: self.pre_treat(ct) atoms_to_delete = [] ct.atom[self.nitrogen].label_format = "%UT" ct.atom[self.nitrogen].label_color = label_color residue_name = 'LYS ' prot_label = '' if len(self.hyd_coords[istate]) == 2: residue_name = 'LYN ' prot_label = 'Neutral' atoms_to_delete.append(self.hyds[2]) self.hyds[2] = None ct.atom[self.nitrogen].formal_charge = 0 else: ct.atom[self.nitrogen].formal_charge = 1 for ihyd, hyd_coords in enumerate(self.hyd_coords[istate]): ct.atom[self.hyds[ihyd]].xyz = hyd_coords if add_labels: label = prot_label else: label = '' if self.pka is not None and label_pkas: label += ' %.2f' % self.pka ct.atom[self.nitrogen].property[pka_property] = self.pka ct.atom[self.nitrogen].label_user_text = label self.assign_state_gap( ct.atom[self.nitrogen], state_gaps, report_gaps=verbose) residue_atoms = ct.getResidueAtoms(self.nitrogen) for atom in residue_atoms: atom.pdbres = residue_name self.current_state = istate self.treated = False return atoms_to_delete
[docs] def update_atom_indices(self, ct, new_indices): # Gotta be a better way... self.nitrogen = self.get_new_index(ct, self.nitrogen, new_indices) self.carbon = self.get_new_index(ct, self.carbon, new_indices) for i in range(len(self.hyds)): if self.hyds[i] is not None: self.hyds[i] = self.get_new_index(ct, self.hyds[i], new_indices) return
[docs] def get_heavies(self): return [self.nitrogen]
[docs] def get_state_sites(self, ct, istate): donors = [] for ihyd, hyd_coords in enumerate(self.hyd_coords[istate]): ct.atom[self.hyds[ihyd]].xyz = hyd_coords donors.append((self.nitrogen, self.hyds[ihyd])) return ([], donors, [], 0.0)
[docs] def get_view_atoms(self): return [self.nitrogen]
[docs] def get_penalty(self, istate): return self.penalties[istate]
[docs] def change_pka(self, pka, propka_pH): if pka is None: reference_pka = 15.00 else: reference_pka = pka self.pka = pka neutral = reference_pka < propka_pH new_penalties = [] changed = False for istate, hyd_coords in enumerate(self.hyd_coords): nhyds = len(hyd_coords) pH_penalty = 0 if (nhyds == 2 and not neutral) or (nhyds == 3 and neutral): pH_penalty = 100 new_penalty = pH_penalty + self.contact_penalties[istate] new_penalties.append(new_penalty) if self.penalties[istate] != new_penalty: changed = True self.penalties = new_penalties return changed
[docs] class water_changeable(changeable): asl = "(water) AND (atom.ele O)" redundancy_tolerance = 0.5
[docs] def __init__(self, ct, iatom): self.pka = None self.index = 0 self.name = '' self.num_user_states = 0 self.state_names = [] self.state_coordinates = [] self.oxygen = iatom self.hydrogen1 = None self.hydrogen2 = None self.valid = True self.name = self.get_residue_name(ct, iatom) self.type = 'WATER' self.interacts_with_nonwater = False for atom in ct.atom[iatom].bonded_atoms: if atom.atomic_number != 1: continue if self.hydrogen1 is None: self.hydrogen1 = int(atom) elif self.hydrogen2 is None: self.hydrogen2 = int(atom) else: report( LOG_EXTRA, 'Rejecting ' + self.name + ' due to excess hydrogens.') self.valid = False return if self.hydrogen1 is None or self.hydrogen2 is None: report(LOG_EXTRA, 'Rejecting ' + self.name + ' due to lack of hydrogens.') self.valid = False return self.treated = True self.locked = False self.current_state = 0 self.initial_state = 0
@property def nstates(self): """Return number of enumerated states""" return len(self.state_coordinates)
[docs] def enumerate_states(self, ct, acceptors, donors, pH, do_flips=True, include_initial=False): """ Generate discrete states for water, where a state is defined by the coordinates of its two hydrogens. :param ct: Structure :type ct: Structure :param acceptors: Acceptor atom indices :type acceptors: List[int] :param donors: Donor heavy-hydrogen atom indices :type donors: List[Tuple(int, int)] :param pH: Does nothing here :type pH: float :param do_flips: Does nothing here :type do_flips: bool :param include_initial: Include the current water orientation in the state list :type include_initial: bool """ # Get the atom objects for easy access to attributes oxygen = ct.atom[self.oxygen] hydrogen1 = ct.atom[self.hydrogen1] hydrogen2 = ct.atom[self.hydrogen2] # Include initial state. if include_initial: self.state_coordinates.append((hydrogen1.xyz, hydrogen2.xyz)) self.state_names.append("Initial") enumerator = WaterStateEnumerator(ct, ct.atom[self.oxygen], acceptors, donors) states = enumerator.enumerate_donor_donor_states() states += enumerator.enumerate_acceptor_acceptor_states() # Only enumerate additional less favorable states if the current # pool is not too large. if len(states) < 30: states += enumerator.enumerate_donor_states() states += enumerator.enumerate_acceptor_states() for state in states: self.state_coordinates.append(state.coordinates) self.state_names.append("Orientation") # In case no acceptors or donors are nearby and no states are # generated, just include the initial state. if self.nstates == 0: self.state_coordinates.append((hydrogen1.xyz, hydrogen2.xyz)) self.state_names.append("Initial") # Name the states if include_initial: for istate in range(1, len(self.state_names)): self.state_names[istate] += ' %d/%d' % (istate, self.nstates - 1) else: for istate in range(0, len(self.state_names)): self.state_names[istate] += ' %d/%d' % (istate + 1, self.nstates)
[docs] def add_current_to_states(self, ct): xyz1 = ct.atom[self.hydrogen1].xyz xyz2 = ct.atom[self.hydrogen2].xyz self.state_coordinates.append((xyz1, xyz2)) self.num_user_states += 1 self.state_names.append("User %d" % self.num_user_states)
[docs] def assign_state(self, ct, istate, add_labels=True, label_pkas=False, state_gaps=None, verbose=False): xyz1, xyz2 = self.state_coordinates[istate] ct.atom[self.hydrogen1].xyz = xyz1 ct.atom[self.hydrogen2].xyz = xyz2 self.current_state = istate return []
[docs] def update_atom_indices(self, ct, new_indices): # Gotta be a better way... self.oxygen = self.get_new_index(ct, self.oxygen, new_indices) self.hydrogen1 = self.get_new_index(ct, self.hydrogen1, new_indices) self.hydrogen2 = self.get_new_index(ct, self.hydrogen2, new_indices) return
[docs] def get_heavies(self): return [self.oxygen]
[docs] def get_state_sites(self, ct, istate): xyz1, xyz2 = self.state_coordinates[istate] ct.atom[self.hydrogen1].xyz = xyz1 ct.atom[self.hydrogen2].xyz = xyz2 return ([], [(self.oxygen, self.hydrogen1), (self.oxygen, self.hydrogen2)], [], 0.0)
[docs] def get_view_atoms(self): return [self.oxygen, self.hydrogen1, self.hydrogen2]
[docs] def get_penalty(self, istate): return 0.0
[docs] def get_adjustable_atoms(self): return [self.oxygen, self.hydrogen1, self.hydrogen2]
[docs] class hbond_cluster:
[docs] def get_residue_name(self, ct, iatom): resnum = ct.atom[iatom].resnum inscode = ct.atom[iatom].inscode chain = ct.atom[iatom].chain if chain == " ": chain = "_" pdbres = ct.atom[iatom].pdbres residue = chain + ":" + pdbres + str(resnum) + inscode residue.rstrip() return residue
[docs] def get_atom_name(self, ct, iatom): return self.get_residue_name(ct, iatom) + ':' + ct.atom[iatom].pdbname
[docs] def __init__(self): self.changeables = [] self.self_scores = [] self.pair_scores = None self.pair_scores_calculated = None self.self_charges = [] self.combinations = [] self.static_acceptors = [] self.static_donors = [] self.static_clashers = [] self.max_keep = 10 self.abort_sampling = False self.num_scored = 0 self.max_score = 100000 self.mem_cutoff = 10**1500 self.low_memory = False self.use_xtal = False self.torsion_penalty = False self.trouble = False self.failed = False # Save the cutoff values as an attribute as these can be changed # during optimization. self.polar_clash_cutoff = POLAR_CLASH_CUTOFF self.nonpolar_clash_cutoff = NONPOLAR_CLASH_CUTOFF
[docs] def setup_xtal(self, ct, interact, clustering_distance): list = [] self.need_xtal = [] for i in range(len(self.changeables)): list.append(False) for i in range(len(self.changeables)): self.need_xtal.append(list[:]) if self.use_xtal: for ichangeable in range(len(self.changeables)): for jchangeable in range(ichangeable + 1, len(self.changeables)): if interact[self.changeables[ichangeable].index][ self.changeables[jchangeable].index]: # See if they interact without xtal self.need_xtal[ichangeable][jchangeable] = True self.need_xtal[jchangeable][ichangeable] = True for iatom in self.changeables[ ichangeable].get_heavies(): for jatom in self.changeables[ jchangeable].get_heavies(): if ct.measure(iatom, jatom) <= clustering_distance: self.need_xtal[ichangeable][ jchangeable] = False self.need_xtal[jchangeable][ ichangeable] = False if self.need_xtal[ichangeable][jchangeable]: report( LOG_EXTRA, self.changeables[ichangeable].name + ' and ' + self.changeables[jchangeable].name + ' interact via crystal symmetry only') return
[docs] def optimize(self, ct, interact, static_donors, static_acceptors, static_clashers, max_comb, num_sequential_cycles, use_propka, propka_pH=7.0, xtal_ct=None): for changeable in self.changeables: if not changeable.treated: changeable.pre_treat(ct) report(LOG_FULL_DEBUG, ' Setting up local static donors and acceptors...') if xtal_ct is None: self.setup_local_static(ct, static_acceptors, static_donors, static_clashers) else: self.setup_local_static_alt(xtal_ct, static_acceptors, static_donors, static_clashers) report(LOG_FULL_DEBUG, ' Done.') potential_combinations = 1 for changeable in self.changeables: potential_combinations *= changeable.nstates if potential_combinations > self.mem_cutoff: self.low_memory = True report(LOG_BASIC, ' Running this cluster in low memory footprint mode.') self.combinations = [] if potential_combinations <= max_comb: report(LOG_BASIC) report(LOG_BASIC, ' Scoring exhaustively (%d potential combinations)...' % potential_combinations) for i in range(3): self.initialize_score_storage() self.pre_score_self(ct) if not self.low_memory: self.pre_score_pairs(ct, interact) self.score_exhaustively( ct, interact, find_all_solutions=True, tolerate_clashes=False) if len(self.combinations) == 0 and i != 2: report( LOG_BASIC, ' No valid conformations found. Trying again with less strict requirements.' ) self.trouble = True self.polar_clash_cutoff -= 0.5 self.nonpolar_clash_cutoff -= 0.5 else: break if len(self.combinations) == 0: report( LOG_BASIC, ' Still failed to find any valid conformations. Scoring again while tolerating some clashes.' ) self.score_exhaustively( ct, interact, find_all_solutions=True, tolerate_clashes=True) self.trouble = False self.failed = True else: report(LOG_BASIC) report( LOG_BASIC, ' Scoring via sequential iteration and hybridization (%d potential combinations)...' % potential_combinations) report(LOG_BASIC, ' Beginning sequential iteration...') self.initialize_score_storage() self.pre_score_self(ct) if not self.low_memory: self.pre_score_pairs(ct, interact) self.score_sequentially(ct, interact, num_sequential_cycles) self.expand_solutions(ct, interact) self.recombine_solutions(ct, interact) self.trouble = False self.failed = False self.trim_redundant_combinations() report(LOG_BASIC) report(LOG_BASIC, ' Cluster results:') for i, changeable in enumerate(self.changeables): if changeable.pka is not None: report(LOG_BASIC, ' ' + changeable.name + ' : ' + changeable.state_names[self.combinations[0][0][i]] + ' (pKa = ' + str(changeable.pka) + ')') else: report(LOG_BASIC, ' ' + changeable.name + ' : ' + changeable.state_names[self.combinations[0][0][i]]) report(LOG_EXTRA, ' Top %d results:' % len(self.combinations)) for combination in self.combinations: report(LOG_EXTRA, ' ' + str(combination)) #for i,changeable in enumerate(self.changeables): # report( LOG_EXTRA, ' ' + changeable.name + ' : ' + changeable.state_names[combination[0][i]] ) report(LOG_BASIC) return
[docs] def score_combination(self, ct, interact, states): # This is for calculating the energy of a single set of states, and assumes prescoring of self terms. energy = 0.0 for ichangeable, istate in enumerate(states): i_index = self.mapping[(ichangeable, istate)] energy += self.self_scores[i_index] (iacceptors, idonors, iclashers, icharge) = self.changeables[ichangeable].get_state_sites( ct, istate) for jchangeable in range(ichangeable + 1, len(self.changeables)): if not interact[self.changeables[ichangeable] .index][self.changeables[jchangeable] .index]: continue jstate = states[jchangeable] j_index = self.mapping[(jchangeable, jstate)] if self.low_memory: (jacceptors, jdonors, jclashers, jcharge ) = self.changeables[jchangeable].get_state_sites( ct, jstate) energy += self.score_pair( ct, iacceptors, idonors, iclashers, icharge, jacceptors, jdonors, jclashers, jcharge, use_xtal=self.need_xtal[ichangeable][jchangeable]) else: energy += self.pair_scores[i_index, j_index] return energy
[docs] def single_point(self, ct, interact, static_donors, static_acceptors, static_clashers, xtal_ct=None): # This is for calling from the GUI, and assumes nothing is precalculated. combination = [] for changeable in self.changeables: combination.append(changeable.current_state) if not changeable.treated: changeable.pre_treat(ct) if xtal_ct is None: self.setup_local_static(ct, static_acceptors, static_donors, static_clashers) else: self.setup_local_static_alt(xtal_ct, static_acceptors, static_donors, static_clashers) self.single_point_score = 0.0 for ichangeable in range(len(self.changeables)): #istate = self.changeables[ichangeable].current_state istate = combination[ichangeable] (iacceptors, idonors, iclashers, icharge) = self.changeables[ichangeable].get_state_sites( ct, istate) report(LOG_FULL_DEBUG, ' Calculating self score for ' + self.changeables[ichangeable].name + ' : ' + self.changeables[ichangeable].state_names[istate]) self.single_point_score += self.score_pair( ct, iacceptors, idonors, iclashers, icharge, self.static_acceptors, self.static_donors, self.static_clashers, 0.0, use_xtal=self.use_xtal) self.single_point_score += self.changeables[ ichangeable].get_penalty(istate) for jchangeable in range(ichangeable + 1, len(self.changeables)): #jstate = self.changeables[jchangeable].current_state jstate = combination[jchangeable] report(LOG_FULL_DEBUG, ' Calculating pair score for ' + self.changeables[ichangeable].name + ' : ' + self.changeables[ichangeable].state_names[istate] \ + ' and ' + self.changeables[jchangeable].name + ' : ' + self.changeables[jchangeable].state_names[jstate]) (jacceptors, jdonors, jclashers, jcharge) = self.changeables[jchangeable].get_state_sites( ct, jstate) self.single_point_score += self.score_pair( ct, iacceptors, idonors, iclashers, icharge, jacceptors, jdonors, jclashers, jcharge, use_xtal=self.need_xtal[ichangeable][jchangeable]) # Restore to proper state atoms_to_delete = [] for i, changeable in enumerate(self.changeables): atoms = changeable.assign_state(ct, combination[i], False) atoms_to_delete.extend(atoms) return atoms_to_delete
[docs] def setup_local_static_alt(self, ct, static_acceptors, static_donors, static_clashers): self.static_acceptors = [] self.static_donors = [] self.static_clashers = [] base_distance = 4.0 heavies = [] for changeable in self.changeables: heavies.extend(changeable.get_heavies()) max_interaction_distance = base_distance while static_acceptors and static_donors: atoms = analyze.evaluate_asl(ct, 'within %d atom.i_pa_atomindex %s' % \ (max_interaction_distance, ','.join(str(i) for i in heavies))) for acceptor in static_acceptors: if acceptor in atoms and acceptor not in self.static_acceptors: self.static_acceptors.append(acceptor) for donor in static_donors: if donor[0] in atoms and donor not in self.static_donors: self.static_donors.append(donor) for clasher in static_clashers: if clasher in atoms and clasher not in self.static_clashers: self.static_clashers.append(clasher) if len(self.static_acceptors) > 0 and len( self.static_donors) > 0: break max_interaction_distance += 0.5 return
[docs] def setup_local_static(self, ct, static_acceptors, static_donors, static_clashers): self.static_acceptors = [] self.static_donors = [] self.static_clashers = [] base_distance = 4.0 heavies = [] for changeable in self.changeables: for atom in changeable.get_heavies(): heavies.append(atom) if len(static_acceptors) > 0: max_interaction_distance = base_distance while len(self.static_acceptors) == 0: for acceptor in static_acceptors: for atom in heavies: if measure( ct, atom1=atom, atom2=acceptor, use_xtal=self.use_xtal ) <= max_interaction_distance: self.static_acceptors.append(acceptor) break max_interaction_distance += 0.5 if len(static_donors) > 0: max_interaction_distance = base_distance while len(self.static_donors) == 0: for donor in static_donors: for atom in heavies: if measure( ct, atom1=atom, atom2=donor[0], use_xtal=self.use_xtal ) <= max_interaction_distance: self.static_donors.append(donor) break max_interaction_distance += 0.5 if len(static_clashers) > 0: max_interaction_distance = base_distance for clasher in static_clashers: for atom in heavies: if measure( ct, atom1=atom, atom2=clasher, use_xtal=self.use_xtal ) <= max_interaction_distance: self.static_clashers.append(clasher) break max_interaction_distance += 0.5 return
[docs] def initialize_score_storage(self): # A mapping from the 2D array of changeable x state to a 1D array. self.mapping = {} i = 0 for ispecies in range(len(self.changeables)): for istate in range(self.changeables[ispecies].nstates): self.mapping[(ispecies, istate)] = i i += 1 self.self_scores = [] self.self_charges = [] for i in range(len(self.mapping)): self.self_scores.append(0.0) self.self_charges.append(0) if self.low_memory: self.pair_scores = None self.pair_scores_calculated = None else: # Initialize an nxn sparse matrix of pair interactions. float_list = [] boolean_list = [] self.pair_scores = dok_matrix((len(self.mapping), len(self.mapping))) self.pair_scores_calculated = dok_matrix((len(self.mapping), len(self.mapping))) return
[docs] def pre_score_self(self, ct): # Setup a crystal mate ct if necessary. if self.use_xtal: # Record all of the acceptors, donors and clashers. working_ct = copy.copy(ct) annotate_structure_interactors( working_ct, self.static_acceptors, self.static_donors, self.static_clashers) # Only want to bother generating crystal mates for atoms that will be used. needed_atoms = analyze.evaluate_asl( working_ct, f'within 5 ((atom.{ACCEPTOR_PROPERTY}) OR (atom.{CLASHER_PROPERTY}) OR (atom.{DONOR_PROPERTY}))' ) if needed_atoms: needed_atoms_ct = working_ct.extract( needed_atoms, copy_props=True) mates = analyze.generate_crystal_mates( needed_atoms_ct, radius=4.0) # The first one is just a copy of the original mates.pop(0) for mate in mates: working_ct.extend(mate) # Find the new indices. working_static_acceptors = analyze.evaluate_asl( working_ct, f'(atom.{ACCEPTOR_PROPERTY})') working_static_clashers = analyze.evaluate_asl( working_ct, f'(atom.{CLASHER_PROPERTY})') working_static_donors = [] donor_atoms = analyze.evaluate_asl(working_ct, f'(atom.{DONOR_PROPERTY})') for H in donor_atoms: if working_ct.atom[H].atomic_number != 1: continue for heavy in working_ct.atom[H].bonded_atoms: working_static_donors.append((int(heavy), H)) # Recover the ions metal_ions = analyze.evaluate_asl(working_ct, f'(atom.{ION_PROPERTY})') for ion in metal_ions: working_static_donors.append((ion, ion)) else: working_ct = ct working_static_acceptors = self.static_acceptors working_static_donors = self.static_donors working_static_clashers = self.static_clashers # Want to iterate in sorted order. items = sorted(self.mapping.items(), key=operator.itemgetter(1)) for item in items: (ichangeable, istate) = item[0] if self.changeables[ichangeable].locked and self.changeables[ichangeable].current_state != istate: report(LOG_FULL_DEBUG, self.changeables[ichangeable].name + ' not calculating state ' + str(istate)) continue # Should be okay to use working_ct here, even though get_state_sites alters it # for rotatables. The alteration only need to persist through score_pair(). (iacceptors, idonors, iclashers, icharge) = self.changeables[ichangeable].get_state_sites( working_ct, istate) if not self.changeables[ichangeable].locked: report(LOG_FULL_DEBUG) report(LOG_FULL_DEBUG, ' Calculating self score for ' + self.changeables[ichangeable].name + ' : ' + self.changeables[ichangeable].state_names[istate]) # Get the score (don't need use_xtal even if xtal is on, because we have explicit crystalmates. score = self.score_pair( working_ct, iacceptors, idonors, iclashers, icharge, working_static_acceptors, working_static_donors, working_static_clashers, 0.0, use_xtal=False) penalty = self.changeables[ichangeable].get_penalty(istate) i = self.mapping[(ichangeable, istate)] self.self_scores[i] = score + penalty self.self_charges[i] = icharge report(LOG_FULL_DEBUG, ' Score = ' + str(score)) report(LOG_FULL_DEBUG, ' Penalty = ' + str(penalty)) report(LOG_FULL_DEBUG, ' Total Self Score = ' + str(self.self_scores[i])) return
[docs] def pre_score_pairs(self, ct, interact): # Want to iterate in sorted order. items = sorted(self.mapping.items(), key=operator.itemgetter(1)) for item in items: (ichangeable, istate) = item[0] i = self.mapping[(ichangeable, istate)] (iacceptors, idonors, iclashers, icharge) = self.changeables[ichangeable].get_state_sites( ct, istate) # Want to iterate in sorted order. items = sorted(self.mapping.items(), key=operator.itemgetter(1)) for item in items: (jchangeable, jstate) = item[0] if jchangeable == ichangeable: continue if not interact[self.changeables[ichangeable] .index][self.changeables[jchangeable] .index]: continue if self.changeables[jchangeable].locked and self.changeables[jchangeable].current_state != jstate: report(LOG_FULL_DEBUG, self.changeables[jchangeable].name + ' not calculating state ' + str(jstate)) continue j = self.mapping[(jchangeable, jstate)] if j < i: continue (jacceptors, jdonors, jclashers, jcharge) = self.changeables[jchangeable].get_state_sites( ct, jstate) report(LOG_FULL_DEBUG) report(LOG_FULL_DEBUG, ' Calculating pair score for ' + self.changeables[ichangeable].name + ' : ' + self.changeables[ichangeable].state_names[istate] \ + ' and ' + self.changeables[jchangeable].name + ' : ' + self.changeables[jchangeable].state_names[jstate]) score = self.score_pair( ct, iacceptors, idonors, iclashers, icharge, jacceptors, jdonors, jclashers, jcharge, use_xtal=self.need_xtal[ichangeable][jchangeable]) if score != 0.0: self.pair_scores[i, j] = score self.pair_scores[j, i] = score self.pair_scores_calculated[i, j] = True self.pair_scores_calculated[j, i] = True report( LOG_FULL_DEBUG, ' Total pair score = ' + str(self.pair_scores[i, j])) report(LOG_FULL_DEBUG) return
[docs] def score_pair(self, ct, iacceptors, idonors, iclashers, icharge, jacceptors, jdonors, jclashers, jcharge, use_xtal=False): score = 0.0 # Donor-Clasher. clasher_list1 = iclashers + [donor[1] for donor in idonors] clasher_list2 = jclashers + [donor[1] for donor in jdonors] for iclash, clash1 in enumerate(clasher_list1): assert clash1 is not None for jclash, clash2 in enumerate(clasher_list2): assert clash2 is not None distance = measure( ct, atom1=clash1, atom2=clash2, use_xtal=use_xtal) # If either clasher is part of the static, use a less stringent cutoff. if iclash < len(iclashers) or jclash < len(jclashers): cutoff_to_use = self.nonpolar_clash_cutoff else: cutoff_to_use = self.polar_clash_cutoff clashing_term = self.calculate_clash_term( distance, cutoff_to_use) if clashing_term > 0: if log_level == LOG_SCORE_DEBUG: name1 = self.get_atom_name(ct, clash1) name2 = self.get_atom_name(ct, clash2) msg = f" Clash: {name1} {name2} with energy {clashing_term:.1f}" report(LOG_SCORE_DEBUG, msg) score += clashing_term # idonors, jdonors for idonor in idonors: for jdonor in jdonors: score += self.score_donor_donor( ct, idonor[0], idonor[1], jdonor[0], jdonor[1], use_xtal=use_xtal) #if score > 10.0: # return score # idonors, jacceptors for donor in idonors: for acceptor in jacceptors: score += self.score_donor_acceptor( ct, donor[0], donor[1], acceptor, use_xtal=use_xtal) # iacceptors, jdonors for acceptor in iacceptors: for jdonor in jdonors: score += self.score_donor_acceptor( ct, jdonor[0], jdonor[1], acceptor, use_xtal=use_xtal) return score
[docs] def score_donor_acceptor(self, ct, donor_heavy, donor_hydrogen, acceptor_heavy, use_xtal=False): heavy_heavy_distance = measure( ct, atom1=donor_heavy, atom2=acceptor_heavy, use_xtal=use_xtal) if heavy_heavy_distance > MAX_HEAVY_HEAVY_INTERACTION_DISTANCE: return 0.0 if heavy_heavy_distance == 0.0: name1 = self.get_atom_name(ct, acceptor_heavy) name2 = self.get_atom_name(ct, donor_heavy) report(LOG_BASIC, f'WARNING: Overlapping atoms found: {name1} - {name2}') return 0.0 # Acceptor-metal interaction score is only based on distance if donor_heavy == donor_hydrogen: return self.calculate_distance_term(heavy_heavy_distance) angle = measure( ct, atom1=donor_hydrogen, atom2=donor_heavy, atom3=acceptor_heavy, use_xtal=use_xtal) angle_term = self.calculate_angle_term(angle) if angle_term == 0: return 0.0 distance_term = self.calculate_distance_term(heavy_heavy_distance) score = distance_term * angle_term return score
[docs] def score_donor_donor(self, ct, donor1_heavy, donor1_hydrogen, donor2_heavy, donor2_hydrogen, use_xtal=False): heavy1_heavy2_distance = measure( ct, atom1=donor1_heavy, atom2=donor2_heavy, use_xtal=use_xtal) if heavy1_heavy2_distance > MAX_HEAVY_HEAVY_INTERACTION_DISTANCE: return 0.0 if heavy1_heavy2_distance == 0.0: name1 = self.get_atom_name(ct, donor1_heavy) name2 = self.get_atom_name(ct, donor2_heavy) report(LOG_BASIC, f'WARNING: Overlapping atoms found: {name1} - {name2}') return 0.0 theta1 = measure( ct, atom1=donor1_hydrogen, atom2=donor1_heavy, atom3=donor2_heavy, use_xtal=use_xtal) theta2 = measure( ct, atom1=donor2_hydrogen, atom2=donor2_heavy, atom3=donor1_heavy, use_xtal=use_xtal) hydrogen_distance = measure( ct, atom1=donor1_hydrogen, atom2=donor2_hydrogen, use_xtal=use_xtal) clashing_term = self.calculate_clash_term(hydrogen_distance, self.polar_clash_cutoff) # Choose the angle with the best hydrogen bond interaction angle = min(abs(theta1), abs(theta2)) angle_term = self.calculate_angle_term(angle) if angle_term == 0: return clashing_term distance_term = self.calculate_distance_term(heavy1_heavy2_distance) score = angle_term * distance_term + clashing_term return score
[docs] @staticmethod def calculate_distance_term(distance): """Return distance dependent part of the hydrogen-bond potential functions.""" return -(3.0 / distance)**3
[docs] @staticmethod def calculate_angle_term(angle): """ Return angle dependent part of the hydrogen bond potential. :param angle: Angle in degrees formed by H-D-A, with Hydrogen, Donor and Acceptor :param angle: float :return: Score :rtype: float """ if angle >= 90.0: return 0 return math.cos(math.radians(angle))**2
[docs] @staticmethod def calculate_clash_term(distance, cutoff): """Return clash term""" if distance < cutoff: # Distinghuish between bad and very bad clashes return 50.0 * (1 + (cutoff - distance)) return 0.0
[docs] def score_exhaustively(self, ct, interact, find_all_solutions=True, tolerate_clashes=False): def recurse(changeables, combination, energy, charge, ct, interact, find_all_solutions, tolerate_clashes): if len(combination) > 0: # Abort this branch if we've picked up any clashes with the static. ichangeable = len(combination) - 1 istate = combination[-1] i_index = self.mapping[(ichangeable, istate)] (iacceptors, idonors, iclashers, icharge) = self.changeables[ichangeable].get_state_sites( ct, istate) self_energy = self.self_scores[i_index] if self_energy > 10.0 and not tolerate_clashes: report(LOG_DEBUG, ' Self clash.') return -1 else: energy += self_energy charge += self.self_charges[i_index] # Abort this branch if we've picked up any clashes with an existing changeable. for jchangeable in range(len(combination) - 2, -1, -1): jstate = combination[jchangeable] j_index = self.mapping[(jchangeable, jstate)] if not interact[self.changeables[ichangeable] .index][self.changeables[jchangeable] .index]: continue if self.low_memory: report(LOG_FULL_DEBUG) report(LOG_FULL_DEBUG, 'Calculating pair score for ' + self.changeables[ichangeable].name + ' : ' + self.changeables[ichangeable].state_names[istate] \ + ' and ' + self.changeables[jchangeable].name + ' : ' + self.changeables[jchangeable].state_names[jstate]) (jacceptors, jdonors, jclashers, jcharge ) = self.changeables[jchangeable].get_state_sites( ct, jstate) pair_energy = self.score_pair( ct, iacceptors, idonors, iclashers, icharge, jacceptors, jdonors, jclashers, jcharge, use_xtal=self.need_xtal[ichangeable][ jchangeable]) else: if not self.pair_scores_calculated[i_index, j_index]: report(LOG_FULL_DEBUG) report(LOG_FULL_DEBUG, 'Calculating pair score for ' + self.changeables[ichangeable].name + ' : ' + self.changeables[ichangeable].state_names[istate] \ + ' and ' + self.changeables[jchangeable].name + ' : ' + self.changeables[jchangeable].state_names[jstate]) (jacceptors, jdonors, jclashers, jcharge) = self.changeables[ jchangeable].get_state_sites(ct, jstate) score = self.score_pair( ct, iacceptors, idonors, iclashers, icharge, jacceptors, jdonors, jclashers, jcharge, use_xtal=self.need_xtal[ichangeable][ jchangeable]) if score != 0.0: self.pair_scores[i_index, j_index] = score self.pair_scores[j_index, i_index] = score self.pair_scores_calculated[i_index, j_index] = True self.pair_scores_calculated[j_index, i_index] = True pair_energy = self.pair_scores[i_index, j_index] if pair_energy > 10.0 and not tolerate_clashes: report(LOG_DEBUG, ' Clash with changeable %d - %s.' % (jchangeable, self.changeables[jchangeable].name)) return jchangeable else: energy += pair_energy if not find_all_solutions: report(LOG_DEBUG, ' Current combination: %s' % str(combination)) if changeables: # Delve another level down. clashes = [] if changeables[0].locked: new_combination = combination + [ changeables[0].current_state ] self.num_scored += 1 if not find_all_solutions and self.num_scored > self.max_score: report( LOG_BASIC, ' Failed to find valid initial combination in alotted time.' ) self.abort_sampling = True return clash = recurse(changeables[1:], new_combination, energy, charge, ct, interact, find_all_solutions, tolerate_clashes) clashes.append(clash) if self.abort_sampling: return None else: for state in range(changeables[0].nstates): new_combination = combination + [state] self.num_scored += 1 if not find_all_solutions and self.num_scored > self.max_score: report( LOG_DEBUG, ' Failed to find valid initial combination in alotted time.' ) self.abort_sampling = True return report( LOG_DEBUG, ' Trying state %d (%s) for changeable %d (%s).' % (state, changeables[0].state_names[state], len(combination), changeables[0].name)) clash = recurse(changeables[1:], new_combination, energy, charge, ct, interact, find_all_solutions, tolerate_clashes) clashes.append(clash) if self.abort_sampling: return None if self.abort_to_level is not None: if self.abort_to_level == len(combination): report(LOG_DEBUG, ' Ceasing abort.') self.abort_to_level = None else: return if None not in clashes: highest_clash = 0 for clash in clashes: if clash > highest_clash: highest_clash = clash self.abort_to_level = highest_clash report( LOG_DEBUG, ' Aborting to level %d' % self.abort_to_level) else: if energy > 10.0 and not tolerate_clashes: return #if self.torsion_penalty: # penalty = 0.0 # for i,changeable in enumerate(self.changeables): # penalty += changeable.get_penalty( combination[i] ) # energy += penalty # Finished. report(LOG_DEBUG, ' Final score = ' + str(energy) + ' (' + str(combination) + ')') self.combinations.append((combination, charge, energy)) if not find_all_solutions: # Only wanted one. self.abort_sampling = True return None if len(self.combinations) > self.max_keep: self.combinations.sort(key=operator.itemgetter(-1)) self.combinations = self.combinations[0:self.max_keep] self.abort_sampling = False self.abort_to_level = None self.num_scored = 0 energy = 0.0 charge = 0.0 recurse(self.changeables, [], energy, charge, ct, interact, find_all_solutions, tolerate_clashes) # Final sort. self.combinations.sort(key=lambda item: item[-1])
[docs] def score_sequentially(self, ct, interact, num_sequential_cycles): """ This routine uses an algorithm similar to Prime's iteration to convergence. Starting from a random configuration, each species is optimized in turn, keeping the others fixed in their current state. This continues until the system reaches convergence (no more changes in the most optimal state for all residues). :param ct: input/output structure, will be modified :type ct: schrodigner.Structure :param interact: ?? :type interact: ?? :param num_sequential_cycles: Number of cycles of randomization and optimization to conduct :type num_sequential_cycles: int """ report(LOG_BASIC, ' Stage 1: Sequential iteration with hybridization') local_combinations = [] randomize = True num_failed_hybrids = 0 # Passes with different random starting points for iter in range(num_sequential_cycles): # Whether or not to create a random starting point, or just work with the current one if randomize: comb = [ random.randint(0, self.changeables[i].nstates - 1) for i in range(len(self.changeables)) ] # Main iteration routine. (total_energy, problem_children) = self.iterate_to_convergence( ct, interact, comb) # Append the converged solution local_combinations.append(((comb, 0.0, total_energy), problem_children)) report( LOG_EXTRA, ' Energy for this pass: ' + str(total_energy) + ' (' + str(len(problem_children)) + ' problematic members)') for problem in problem_children: report(LOG_EXTRA, ' ' + self.changeables[problem].name) #for combi in local_combinations: # print str(combi) # Successfully found a good solution. Append it to the list and start again. if len(problem_children) == 0: report( LOG_EXTRA, ' No problematic regions for this structure, storing it and starting again.' ) self.combinations.append((comb, 0.0, total_energy)) local_combinations = [] randomize = True num_failed_hybrids = 0 continue # Try to fix the best solution with pieces of the other solutions. if (iter >= 3) and (iter % 2 == 1): comb = self.create_hybrid(local_combinations, interact) if comb is None: num_failed_hybrids += 1 if num_failed_hybrids == 5: report( LOG_EXTRA, ' Too many failed attempts at hybrid. Storing this solution and starting again.' ) local_combinations.sort(key=lambda x: x[0][-1]) self.combinations.append(local_combinations[0][0]) local_combinations = [] randomize = True num_failed_hybrids = 0 report(LOG_EXTRA, ' Randomizing...') randomize = True else: num_failed_hybrids = 0 randomize = False else: randomize = True # If none were found, at least add the current ones. if len(self.combinations) == 0: for combination in local_combinations: self.combinations.append(combination[0]) self.combinations.sort(key=operator.itemgetter(-1)) report(LOG_BASIC, ' Best solution after Stage 1: ' + str(self.combinations[0][2])) report(LOG_EXTRA, ' ' + str(self.combinations[0][0])) return
[docs] def expand_solutions(self, ct, interact): """ This takes an existing set of good solutions and generates more by deconverging them and then iterating them back to convergence. Generates at least 10 new solutions. """ report(LOG_BASIC, ' Stage 2: Diversification of existing solutions') new_combinations = [] while len(new_combinations) < 10: for comb in self.combinations: (local_comb, charge, initial_energy) = copy.deepcopy(comb) report(LOG_EXTRA, ' Diverging solution with initial energy of ' + str(initial_energy)) self.deconverge(ct, interact, local_comb) report(LOG_EXTRA, ' Now reconverging...') (final_energy, problem_children) = self.iterate_to_convergence( ct, interact, local_comb) report(LOG_EXTRA, ' Energy for reconverged solution: ' + str(final_energy)) new_combinations.append((local_comb, 0.0, final_energy)) self.combinations.extend(new_combinations) self.combinations.sort(key=operator.itemgetter(-1)) report(LOG_BASIC, ' Best solution after Stage 2: ' + str(self.combinations[0][2])) report(LOG_EXTRA, ' ' + str(self.combinations[0][0])) return
[docs] def recombine_solutions(self, ct, interact): """ This is similar to score_sequentially, but begins with some pre-existing good solutions in self.combinations, and then creates hybrids to try to improve on them. """ report(LOG_BASIC, ' Stage 3: Recombination of existing solutions') # Set up the list of combinations, plus their problem children. # Iterations should do nothing. local_combinations = [] for (comb, charge, energy) in self.combinations: (total_energy, problem_children) = self.iterate_to_convergence( ct, interact, comb, problem_cutoff=0.0) local_combinations.append(((comb, 0.0, total_energy), problem_children)) for iter in range(30): comb = self.create_hybrid( local_combinations, interact, random_scaffold=True) if comb is None: continue (total_energy, problem_children) = self.iterate_to_convergence( ct, interact, comb, problem_cutoff=-0.5) report( LOG_EXTRA, ' Energy for this pass: ' + str(total_energy) + ' (' + str(len(problem_children)) + ' problematic members)') local_combinations.append(((comb, 0.0, total_energy), problem_children)) for comb in local_combinations: self.combinations.append(comb[0]) self.combinations.sort(key=operator.itemgetter(-1)) report(LOG_BASIC, ' Best solution after Stage 3: ' + str(self.combinations[0][2])) report(LOG_EXTRA, ' ' + str(self.combinations[0][0])) return
[docs] def deconverge(self, ct, interact, comb, problem_cutoff=50.0): """ This starts with what is assumed to be a good solution, and then randomizes the states, but not to anything that produces a problem. """ for (ichangeable, changeable) in enumerate(self.changeables): states = [i for i in range(changeable.nstates)] random.shuffle(states) for istate in states: state_energy = 0.0 i_index = self.mapping[(ichangeable, istate)] (iacceptors, idonors, iclashers, icharge) = self.changeables[ichangeable].get_state_sites( ct, istate) state_energy += self.self_scores[i_index] for jchangeable in range(len(comb) - 1, -1, -1): if ichangeable == jchangeable: continue if not interact[self.changeables[ichangeable] .index][self.changeables[jchangeable] .index]: continue jstate = comb[jchangeable] j_index = self.mapping[(jchangeable, jstate)] if self.low_memory: (jacceptors, jdonors, jclashers, jcharge ) = self.changeables[jchangeable].get_state_sites( ct, jstate) state_energy += self.score_pair( ct, iacceptors, idonors, iclashers, icharge, jacceptors, jdonors, jclashers, jcharge, use_xtal=self.need_xtal[ichangeable][ jchangeable]) else: state_energy += self.pair_scores[i_index, j_index] if state_energy < problem_cutoff: comb[ichangeable] = istate break return
[docs] def iterate_to_convergence(self, ct, interact, comb, problem_cutoff=50.0): """ This iterates the combination 'comb' to convergence. Maximum of 10 cycles. """ # Create a list of indices for shuffling later. Using a different # order each time may help avoid a few local minima. changeable_list = [i for i in range(len(self.changeables))] for ipass in range(10): random.shuffle(changeable_list) problem_children = [] nchanges = 0 #for ichangeable,changeable in enumerate(self.changeables): for ichangeable in changeable_list: changeable = self.changeables[ichangeable] if changeable.locked: comb[ichangeable] = changeable.current_state else: best_state = -1 best_state_energy = 1000000 for istate in range(changeable.nstates): state_energy = 0.0 i_index = self.mapping[(ichangeable, istate)] (iacceptors, idonors, iclashers, icharge ) = self.changeables[ichangeable].get_state_sites( ct, istate) state_energy += self.self_scores[i_index] for jchangeable in range(len(comb) - 1, -1, -1): if ichangeable == jchangeable: continue if not interact[self.changeables[ichangeable] .index][self. changeables[jchangeable] .index]: continue jstate = comb[jchangeable] j_index = self.mapping[(jchangeable, jstate)] if self.low_memory: (jacceptors, jdonors, jclashers, jcharge) = self.changeables[ jchangeable].get_state_sites( ct, jstate) state_energy += self.score_pair( ct, iacceptors, idonors, iclashers, icharge, jacceptors, jdonors, jclashers, jcharge, use_xtal=self.need_xtal[ichangeable][ jchangeable]) else: state_energy += self.pair_scores[i_index, j_index] if state_energy < best_state_energy: best_state = istate best_state_energy = state_energy if best_state != comb[ichangeable]: nchanges += 1 comb[ichangeable] = best_state report(LOG_DEBUG, ' Best score for ' + changeable.name + ' is ' + str(best_state_energy) + ' for state ' + changeable.state_names[best_state]) if best_state_energy > problem_cutoff: problem_children.append(ichangeable) report(LOG_EXTRA, ' Number of state flips: ' + str(nchanges)) if nchanges == 0: break total_energy = self.score_combination(ct, interact, comb) return (total_energy, problem_children)
[docs] def create_hybrid(self, local_combinations, interact, random_scaffold=False): """ This takes the lowest energy solution, and for each problematic region it searches other solutions (in random order) for any which may have had better luck for just that part of the overall cluster. It then splices those solutions into the lowest energy one. If random_scaffold, then it selects a random solution as the basis in stead of the lowest energy one. """ # Try to fix the best solution with pieces of the other solutions report(LOG_EXTRA, ' Creating hybrid solution...') local_combinations.sort(key=lambda x: x[0][-1]) fixes = {} neighbours = {} if random_scaffold: iscaffold = random.randint(0, len(local_combinations) - 1) else: iscaffold = 0 for problem_child in local_combinations[iscaffold][1]: # Randomize the order in which solutions are searched randomized_combinations = copy.deepcopy(local_combinations) random.shuffle(randomized_combinations) for solution in randomized_combinations: if problem_child not in solution[1]: fixes[problem_child] = solution[0][0][problem_child] for ichangeable in range(len(self.changeables)): if interact[self.changeables[problem_child].index][ self.changeables[ichangeable].index]: neighbours[ichangeable] = solution[0][0][ ichangeable] break fixes.update(neighbours) if len(fixes) != 0: comb = copy.deepcopy(local_combinations[0][0][0]) for (problem_child, istate) in fixes.items(): comb[problem_child] = istate randomize = False return comb else: report(LOG_EXTRA, ' No valid hybrid found.') return None
[docs] def trim_redundant_combinations(self): nonredund_combs = [] existing_scores = set() for comb in self.combinations: if comb[2] not in existing_scores: nonredund_combs.append(comb) existing_scores.add(comb[2]) self.combinations = nonredund_combs return
[docs] def assign_combination(self, ct, icombination, add_labels, label_pkas, verbose=False): """ Assign a given combination to this cluster :param ct: The structure to operate on :type ct:schrodinger.Structure :param icombination: The index of the combination to assign or if this number is larger then the stored combinations, just keep the current state :type icombinations: integer :param add_labels:Whether to add labels to atoms to be seen in maestro with the current protonation state :type add_labels:Boolean :param label_pka:Whether to add labels for the pKa of each residue :type label_pka:Boolean :param verbose:Whether to report additional information to the log file about the combination chosen :type verbose:Boolean """ atoms_to_delete = [] if icombination < len(self.combinations): for ichangeable in range(len(self.changeables)): state_gaps = self.determine_gap(icombination, ichangeable) atoms = self.changeables[ichangeable].assign_state( ct, self.combinations[icombination][0][ichangeable], add_labels, label_pkas, state_gaps=state_gaps, verbose=verbose) atoms_to_delete.extend(atoms) else: for ichangeable in range(len(self.changeables)): atoms = self.changeables[ichangeable].assign_state( ct, self.changeables[ichangeable].current_state, add_labels, label_pkas) atoms_to_delete.extend(atoms) return atoms_to_delete
[docs] def determine_gap(self, icombination, ichangeable): """ Create a dictionary with the energy gaps to each of the various states. States that differ by only a hydrogen rotation are not considered unique :type icombination: integer :param icombination: the combination to use as the zero point. In most situations this will be the lowest energy combination ( 0 when sorted) :type ichangeable: integer :param ichangeable: The residue number ( or position number) within the cluster which will be analyzed :rparam: dictionary where the key is the name of the state or "Default" when the state is one of the staggers :rtype: dictionary with a key of string and value of a float """ def dedup_state(state): if (re.match(r"Stagger \d+ \d+/\d+", state) or re.match("Stagger", state) or re.match(r"Orientation \d+/\d+", state) or re.match(r"Default \d+ \d+/\d+", state) or re.match(r"Default \d+/\d+", state)): return "Default" return state changeable = self.changeables[ichangeable] state_gaps = {} min_score = self.combinations[icombination][2] for combination in self.combinations: state_name = dedup_state( changeable.state_names[combination[0][ichangeable]]) if state_name not in state_gaps: state_gaps[state_name] = combination[2] - min_score else: state_gaps[state_name] = min(combination[2] - min_score, state_gaps[state_name]) return state_gaps
# ProtAssign class start
[docs] def __init__( self, ct, interactive=False, do_flips=True, asl='', noprot_asl='', atoms=[], # noqa: M511 use_xtal=False, torsion_penalty=False, sample_waters=True, sample_acids=True, freeze_existing=False, include_initial=False, max_comb=10000, num_sequential_cycles=30, max_cluster_size=None, # The maximum number of residues # to include in each cluster or None # if no such maximum should be used logging_level=DEFAULT_LOG_LEVEL, quiet_flag=False, debug_flag=False, add_labels=True, label_pkas=False, pH=pH_neutral, use_propka=True, propka_pH=7.0, user_states=[], # noqa: M511 minimize=False): global log_level log_level = logging_level # Alternate (backwards compatible) way of specifying output level. if quiet_flag: log_level = LOG_NONE elif debug_flag: log_level = LOG_DEBUG self.species = [ self.amide_changeable, self.amine_changeable, self.histidine_changeable, self.rotatable_changeable ] self.interactive = interactive self.do_flips = do_flips self.noprot_asl = noprot_asl self.use_xtal = use_xtal self.torsion_penalty = torsion_penalty self.freeze_existing = freeze_existing self.include_initial = include_initial self.max_comb = max_comb self.num_sequential_cycles = num_sequential_cycles self.max_cluster_size = max_cluster_size self.add_labels = add_labels self.label_pkas = label_pkas self.changeables = [] self.clusters = [] self.interact = [] self.donors = [] self.acceptors = [] self.clashers = [] self.xtal_ct = None self.pH = pH self.use_propka = use_propka self.propka_pH = propka_pH self.propka_failed = False self.user_states = user_states self.minimize = minimize if sample_acids: self.species.append(self.carboxyl_changeable) if sample_waters: self.species.append(self.water_changeable) # If using PropKa, don't want to use the coarse pH rules if self.use_propka: self.pH = None # Record it though self.pH_backup = pH # Fix some element types, in case they're wrong. self.fix_elements(ct) # Set up the atoms to process. asl_atoms = [] if asl == '' and len(atoms) == 0: asl = 'all' if asl: asl_atoms = analyze.evaluate_asl(ct, asl) self.target_atoms = set(atoms + asl_atoms) H_atoms = analyze.evaluate_asl(ct, 'atom.ele H') self.target_hydrogens = set(H_atoms).intersection(self.target_atoms) if self.freeze_existing: self.frozen_hydrogens = self.target_hydrogens.copy() self.freeze_existing_hydrogens(ct) else: self.frozen_hydrogens = set() self.setup(ct) self.set_user_states(ct) if not self.interactive: self.optimize(ct) self.cleanup(ct) self.summarize_pkas() return
[docs] def fix_elements(self, ct): for atom in ct.atom: if atom.pdbname in [' OD1', ' OE1', ' OD2', ' OE2']: atom.atomic_number = 8 elif atom.pdbname in [' ND1', ' ND2', ' NE2']: atom.atomic_number = 7 elif atom.pdbname in [' CG ', ' CD ', ' CD2', ' CE1']: atom.atomic_number = 6 ct.retype() return
[docs] def freeze_existing_hydrogens(self, ct): atoms_to_remove = set() labile_H_atoms = set( analyze.evaluate_asl( ct, 'atom.ptype \" HD1\" OR atom.ptype \" HE2\" OR atom.ptype \"1HD2\" OR atom.ptype \"2HD2\" OR atom.ptype \"1HE2\" OR atom.ptype \"2HE2\" OR atom.ptype \"HD21\" OR atom.ptype \"HD22\" OR atom.ptype \"HE21\" OR atom.ptype \"HE22\"' )) for hydrogen in self.target_hydrogens.intersection(labile_H_atoms): if hydrogen in atoms_to_remove: continue residue_atoms = ct.getResidueAtoms(hydrogen) for residue_atom in residue_atoms: atom = int(residue_atom) atoms_to_remove.add(atom) for hydrogen in self.target_hydrogens.difference(labile_H_atoms): if hydrogen in atoms_to_remove: continue residue_atoms = ct.getResidueAtoms(hydrogen) for residue_atom in residue_atoms: atom = int(residue_atom) if ct.areBound(hydrogen, atom): atoms_to_remove.add(atom) self.target_atoms.difference_update(atoms_to_remove | self.frozen_hydrogens) return
[docs] def setup(self, ct): build.add_hydrogens(ct) self.extend_targeted_to_hyds(ct) self.identify_species(ct) if self.use_propka: try: pkas = self.run_propka( self.changeables, ct, use_xtal=self.use_xtal) except PropKaException: report( LOG_BASIC, 'WARNING: PropKa failed on this structure - disabling use of PropKa.' ) self.use_propka = False self.pH = self.pH_backup self.propka_failed = True ct.property['b_pa_PROPKA_failed'] = 1 self.remove_zero_order_bonds(ct) for item in self.changeables: item.pre_treat_1(ct) build.add_hydrogens(ct) for item in self.changeables: item.pre_treat_2(ct) self.record_current_indices(ct) self.enumerate_changeable_states(ct) self.lock_protonation_states(ct) self.cluster(ct) if self.use_propka: self.apply_pkas(self.changeables, pkas, self.propka_pH) if self.interactive: # Reset back to initial states. atoms_to_delete = [] for changeable in self.changeables: atoms = changeable.assign_state( ct, changeable.initial_state, add_labels=False, label_pkas=self.label_pkas) if atoms is not None: atoms_to_delete.extend(atoms) self.delete_atoms(ct, atoms_to_delete) return
[docs] def remove_zero_order_bonds(self, ct): num_zobs = 0 for item in self.changeables: atoms = item.get_adjustable_atoms() for iatom in atoms: if iatom is None: continue deleting = True while deleting: deleting = False for bond in ct.atom[iatom].bond: if bond.order == 0: bond.atom1.deleteBond(bond.atom2) bond.atom1.property['i_pa_zob%d' % num_zobs] = 1 bond.atom2.property['i_pa_zob%d' % num_zobs] = 1 num_zobs += 1 deleting = True break ct.property['i_pa_num_zobs'] = num_zobs return
[docs] def extend_targeted_to_hyds(self, ct): # If a heavy atom is targets, so are its attached hyds. H_atoms = set(analyze.evaluate_asl( ct, 'atom.ele H')).difference(self.frozen_hydrogens & self.target_atoms) for H_atom in H_atoms: for atom in ct.atom[H_atom].bonded_atoms: if int(atom) in self.target_atoms: self.target_atoms.add(H_atom) return
[docs] def delete_atoms(self, ct, atoms): ct.deleteAtoms(atoms) new_indices = {} for atom in ct.atom: new_indices[atom.property['i_pa_atomindex']] = int(atom) for changeable in self.changeables: changeable.update_atom_indices(ct, new_indices) new_acceptors = [] for acceptor in self.acceptors: new_acceptors.append(new_indices[acceptor]) self.acceptors = new_acceptors new_donors = [] for donor in self.donors: new_donors.append((new_indices[donor[0]], new_indices[donor[1]])) self.donors = new_donors new_clashers = [] for clasher in self.clashers: new_clashers.append(new_indices[clasher]) self.clashers = new_clashers self.record_current_indices(ct) return
[docs] def run_propka(self, changeables, ct, use_xtal=False): # See if we even need to bother. # If there's just a single carboxylate with an # already assigned pKa, it can't have changed. if ((len(changeables) == 1) and (changeables[0].type == 'CARBOXYL') and (changeables[0].pka is not None)): return {} # If there's nothing that depends on pH. proceed = False for changeable in changeables: if (changeable.type in ['HISTIDINE', 'CARBOXYL', 'AMINE'] or (changeable.type == 'ROTATABLE' and changeable.ionizable)): proceed = True break if not proceed: return {} report(LOG_BASIC, ' Running PropKa to get pKas') if use_xtal: propka_ct = self.generate_mates(ct) else: propka_ct = ct pkas = {} try: calculations = ['pka'] calculator = protein.PropertyCalculator(propka_ct, 'hbondopt_propkajob') results = {} for res, data in calculator.calculateOverResidues(*calculations): if data['pka'] is not None: results[str(res)] = float(data['pka']) except: raise PropKaException('PropKa failed') for changeable in changeables: # Need to convert to same format as PropKa uses. name = changeable.name.split(':')[0] + ':' + changeable.name.split( ':')[1][4:] name = name.rstrip() if name in results: pkas[name] = (changeable.pka, results[name]) return pkas
[docs] def generate_mates(self, ct): chains = [ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0' ] for chain in ct.chain: if chain.name in chains: chains.remove(chain.name) try: mates = analyze.generate_crystal_mates(ct) except: report( LOG_NONE, 'Failure in generating crystal mates. Please check crystal properties.' ) sys.exit() new_ct = copy.copy(ct) for atom in new_ct.atom: atom.property['i_pa_propka_maincell'] = 1 # Need to renumber, rechain resnum = 1 chain = chains.pop() for i, mate in enumerate(mates): if i == 0: continue for res in mate.residue: res.chain = chain res.resnum = resnum resnum += 1 if resnum > 9999: chain = chains.pop() resnum = 1 new_ct.extend(mate) important_atoms = analyze.evaluate_asl( new_ct, 'fillres within 20 (atom.i_pa_propka_maincell 1)') truncated_new_ct = new_ct.extract(important_atoms) build.add_hydrogens(truncated_new_ct) return truncated_new_ct
[docs] def apply_pkas(self, changeables, changes, propka_pH): something_changed = False for changeable in changeables: # Need to convert to same format as PropKa uses. name = changeable.name.split(':')[0] + ':' + changeable.name.split( ':')[1][4:] name = name.rstrip() if name in changes: old_pka = changeable.pka changed = changeable.change_pka(changes[name][1], propka_pH) if changed: something_changed = True if old_pka is not None: report( LOG_BASIC, ' Updating protonation penalties for %s: pKa %.2f' % (changeable.name, changeable.pka)) else: changed = changeable.change_pka(None, propka_pH) return something_changed
[docs] def find_protonation_state_changes(self, ct, clusters='all'): clusters_to_redo = [] pkas = self.run_propka(self.changeables, ct, use_xtal=self.use_xtal) for i, cluster in enumerate(self.clusters): if clusters != 'all' and i not in clusters: continue something_changed = self.apply_pkas(cluster.changeables, pkas, self.propka_pH) if something_changed: clusters_to_redo.append(i) return clusters_to_redo
[docs] def identify_species(self, ct): report(LOG_EXTRA) for species in self.species: atoms = analyze.evaluate_asl(ct, species.asl) for atom in atoms: if atom not in self.target_atoms: continue new_changeable = species(ct, atom) if new_changeable.valid: self.changeables.append(new_changeable) self.changeables[-1].index = len(self.changeables) - 1 return
[docs] def identify_all_hbonders(self, ct): self.donors = [] donor_Hs = analyze.evaluate_asl(ct, "(atom.ele H AND not /C0-H0/)") donor_Hs += analyze.evaluate_asl( ct, "(res.ptype \"HIS \",\"HID \",\"HIE \",\"HIP \") AND (atom.ptype \" HD2\" or atom.ptype \" HE1\")" ) for H in donor_Hs: H_atom = ct.atom[H] if H_atom.bond_total == 1: for atom in H_atom.bonded_atoms: self.donors.append((int(atom), H)) self.acceptors = analyze.evaluate_asl( ct, "not atom.ele H AND not atom.ele C") # Move metal ions from acceptor list to donor list (ion is both heavy atom and "hydrogen"). atoms_to_remove = [] for acceptor in self.acceptors: if ct.atom[acceptor].formal_charge > 0 and ct.atom[acceptor].atomic_number not in [ 7, 8 ]: self.donors.append((acceptor, acceptor)) atoms_to_remove.append(acceptor) for atom in atoms_to_remove: self.acceptors.remove(atom) # Remove any atoms accounted for in donors from list of acceptors. for donor in self.donors: if donor[0] in self.acceptors: self.acceptors.remove(donor[0]) # All of the non-polar hydrogens self.clashers = analyze.evaluate_asl(ct, "atom.ele H AND /C0-H0/") return
[docs] def annotate_structure(self, ct): """ Annotate atoms in structure by their interaction class and whether or not they are static Internally updates the acceptors, donors and clashers attributes """ self.identify_all_hbonders(ct) annotate_structure_interactors(ct, self.acceptors, self.donors, self.clashers) # Indicate if interactor is static. This is important to know during # generation of states. self.remove_changeables_from_hbonders() atom_iterator = itertools.chain(self.acceptors, self.clashers, *self.donors) for iatom in atom_iterator: ct.atom[iatom].property[STATIC_PROPERTY] = True return ct
[docs] def enumerate_changeable_states(self, ct): """ Enumerate all states for each changeable. Crystal symmetry mates are taken into account if requested. """ working_ct = ct.copy() self.annotate_structure(working_ct) if self.use_xtal: try: mates = analyze.generate_crystal_mates(working_ct, radius=4.0) except: report( LOG_NONE, 'Failure in generating crystal mates. Please check crystal properties.' ) sys.exit() for i, mate in enumerate(mates): for atom in mate.atom: atom.property['i_pa_xtalatom'] = i if i > 0: mates[0].extend(mate) self.xtal_ct = mates[0] # Atoms in primary image which are close to a crystal image (assume atom indices in ct and xtal_ct are the same). surface_atoms = set( analyze.evaluate_asl( self.xtal_ct, "(within 5.0 (atom.i_pa_xtalatom > 0)) and not (atom.i_pa_xtal_atom > 0)" )) for atom in ct.atom: if int(atom) in surface_atoms: atom.property['i_pa_surfaceatom'] = 1 else: atom.property['i_pa_surfaceatom'] = 0 # Get rid of extra atoms. atoms_to_delete = analyze.evaluate_asl( self.xtal_ct, "not (fillres within 4 (atom.i_pa_xtalatom 0 ) )") self.xtal_ct.deleteAtoms(atoms_to_delete) working_ct = self.xtal_ct dcell = create_distance_cell( working_ct, MAX_ORIENT_HYDROGEN_DISTANCE, honor_pbc=False) for changeable in self.changeables: acceptors, donors, _ = changeable.get_close_interactors( working_ct, dcell) changeable.enumerate_states( working_ct, acceptors, donors, self.pH, do_flips=self.do_flips, include_initial=self.include_initial)
[docs] def lock_protonation_states(self, ct): if self.noprot_asl == '': return locked_atoms = analyze.evaluate_asl(ct, self.noprot_asl) for changeable in self.changeables: atoms = changeable.get_heavies() for atom in atoms: if atom in locked_atoms: changeable.lock_protonation() return
[docs] def remove_changeables_from_hbonders(self): for changeable in self.changeables: heavy_atoms = changeable.get_heavies() for atom in heavy_atoms: while atom in self.acceptors: self.acceptors.remove(atom) for donor in reversed(self.donors): if donor[0] == atom: self.donors.remove(donor) adjustable_atoms = changeable.get_adjustable_atoms() for atom in adjustable_atoms: if atom in self.clashers: self.clashers.remove(atom)
@staticmethod def _get_index_clusters(interaction_matrix, max_cluster_size): """ Cluster interactors based on an interaction matrix :param interaction_matrix: Interaction lookup table :type interaction_matrix: list of list of bool :param max_cluster_size: Maximum cluster size. None is no maximum. :type max_cluster_size: int or NoneType :return: Clustered interactors :rtype: list of lists of ints """ ninteractors = len(interaction_matrix) # Initially each residue is in its own cluster. index_clusters = [[i] for i in range(ninteractors)] if max_cluster_size is None: max_cluster_size = ninteractors # Climb up backwards, merging things up to what they interact with. for i in reversed(range(ninteractors)): # For each item currently in this cluster. icluster = index_clusters[i] cluster_size = len(icluster) for j in icluster: # Find out if it interacts with anything higher up. j_interacts_with = interaction_matrix[j] merged_cluster = False for k in range(min(j, i)): if j_interacts_with[k]: merged_cluster_size = len( index_clusters[k]) + cluster_size if merged_cluster_size > max_cluster_size: continue index_clusters[k] += icluster index_clusters.pop(i) merged_cluster = True break if merged_cluster: break return index_clusters
[docs] def cluster(self, ct): """Cluster changeables based on their heavies.""" # Extract all heavies from the changeables as these determine whether # they interact with other changeables. Annotate to which changeable a # heavy belongs. heavies = [] for changeable in self.changeables: for heavy in changeable.get_heavies(): ct.atom[heavy].property[ 'i_pa_changeable_index'] = changeable.index heavies.append(heavy) self.interact = calculate_interaction_matrix( ct, heavies, CLUSTERING_DISTANCE, self.use_xtal) # Clean up ct for heavy in heavies: del ct.atom[heavy].property['i_pa_changeable_index'] local_interact = copy.deepcopy(self.interact) # Flesh out the interactions if i interacts with j and k, then j and k interact too. for i in range(len(self.changeables)): for j in range(len(self.changeables)): if local_interact[i][j]: for k in range(len(self.changeables)): if local_interact[i][k]: if j != k: local_interact[j][k] = True local_interact[k][j] = True # Create clusters based on interaction index_clusters = self._get_index_clusters(local_interact, self.max_cluster_size) self.clusters = [] # Create the hbond_cluster instances from the index clusters for icluster in index_clusters: new_cluster = self.hbond_cluster() new_cluster.use_xtal = self.use_xtal new_cluster.torsion_penalty = self.torsion_penalty for index in icluster: new_cluster.changeables.append(self.changeables[index]) # Setup record of which species need xtal symmetry to interact. new_cluster.setup_xtal(ct, self.interact, CLUSTERING_DISTANCE) self.clusters.append(new_cluster) # Report. report(LOG_BASIC) report(LOG_BASIC, 'Clusters to optimize:') for i, cluster in enumerate(self.clusters): report(LOG_BASIC, (" Cluster %d:" % (i + 1))) for item in cluster.changeables: report(LOG_BASIC, (" " + item.name)) report(LOG_BASIC)
[docs] def set_user_states(self, ct): for choice in self.user_states: (user_name, user_state) = choice found_name = False for ichangeable, changeable in enumerate(self.changeables): # Remove the pdbres name = changeable.name[0] + ':' + changeable.name[6:].rstrip() if name == user_name: found_name = True found_state = False for istate, state in enumerate(changeable.state_names): if state == user_state: self.assign_state_of_changeable( ct, ichangeable, istate) self.changeables[ichangeable].locked = True found_state = True if not found_name: print('ERROR: did not find species %s' % user_name) sys.exit() elif not found_state: print('ERROR: did not find state %s for species %s' % (user_state, user_name)) sys.exit() return
[docs] def assign_state_of_changeable(self, ct, ichangeable, istate): atoms_to_delete = self.changeables[ichangeable].assign_state( ct, istate, self.add_labels, self.label_pkas) if atoms_to_delete is not None: self.delete_atoms(ct, atoms_to_delete) return
[docs] def increment_state_of_changeable(self, ct, ichangeable): if (self.changeables[ichangeable].current_state + 1) == self.changeables[ichangeable].nstates: self.assign_state_of_changeable(ct, ichangeable, 0) else: self.assign_state_of_changeable( ct, ichangeable, self.changeables[ichangeable].current_state + 1) return
[docs] def decrement_state_of_changeable(self, ct, ichangeable): if self.changeables[ichangeable].current_state == 0: self.assign_state_of_changeable( ct, ichangeable, self.changeables[ichangeable].nstates - 1) else: self.assign_state_of_changeable( ct, ichangeable, self.changeables[ichangeable].current_state - 1) return
[docs] def record_current_indices(self, ct): for index in range(len(ct.atom)): ct.atom[index + 1].property['i_pa_atomindex'] = index + 1 return
[docs] def assign_best_combinations(self, ct, last_time=False): """ Assign the best combinations to the ct and report output :param ct:The structure to operate on :type ct: schrodinger.Structure :param last_time: Whether or not this is the last time through when we should be extra verbose :type last_time: Boolean """ atoms_to_delete = [] for cluster in self.clusters: atoms = cluster.assign_combination( ct, 0, self.add_labels, self.label_pkas, verbose=last_time) atoms_to_delete.extend(atoms) self.delete_atoms(ct, atoms_to_delete) return
[docs] def assign_cluster_combination(self, ct, icluster, icombination): atoms = self.clusters[icluster].assign_combination( ct, icombination, self.add_labels, self.label_pkas) self.delete_atoms(ct, atoms) return
[docs] def single_point_cluster(self, ct, icluster): atoms = self.clusters[icluster].single_point( ct, self.interact, self.donors, self.acceptors, self.clashers) self.delete_atoms(ct, atoms) atoms = self.clusters[icluster].assign_combination( ct, 1000000000, self.add_labels, self.label_pkas) self.delete_atoms(ct, atoms) return self.clusters[icluster].single_point_score
[docs] def optimize_cluster(self, ct, icluster, assign=True): self.clusters[icluster].optimize( ct, self.interact, self.donors, self.acceptors, self.clashers, self.max_comb, self.num_sequential_cycles, self.use_propka, propka_pH=self.propka_pH) if assign: self.assign_cluster_combination(ct, icluster, 0) return
[docs] def optimize(self, ct): clusters_to_do = [i for i in range(len(self.clusters))] niter = 0 niter_max = 2 total_score = 0.0 while (len(clusters_to_do) > 0) and (niter < niter_max): niter += 1 total_score = 0.0 for i, cluster in enumerate(self.clusters): if i not in clusters_to_do: total_score += cluster.combinations[0][-1] continue report(LOG_BASIC, 'Working on cluster #%d...' % (i + 1)) for changeable in cluster.changeables: report(LOG_BASIC, ' ' + changeable.name + ' : ' + str(changeable.nstates) + ' states') cluster.optimize( ct, self.interact, self.donors, self.acceptors, self.clashers, self.max_comb, self.num_sequential_cycles, self.use_propka, propka_pH=self.propka_pH, xtal_ct=self.xtal_ct) # Just in case something went wrong. try: total_score += cluster.combinations[0][-1] except: pass self.assign_best_combinations(ct) # Now see if any protonation states need to be redone. if self.use_propka and niter < niter_max: report(LOG_BASIC, '') report( LOG_BASIC, 'Recalculating pKas to identify changes in protonation penalties...' ) self.assign_best_combinations(ct) try: clusters_to_do = self.find_protonation_state_changes(ct) except PropKaException: report( LOG_NONE, 'WARNING: PropKa failed when looking for changes in protonation state penalties.' ) clusters_to_do = [] if len(clusters_to_do) > 0: report( LOG_BASIC, ' The following clusters have new protonation state penalties, and will be re-run:' ) report(LOG_BASIC, ' ' + ','.join([str(i + 1) for i in clusters_to_do])) else: report(LOG_BASIC, ' No changes in protonation penalties found.') report(LOG_BASIC, '') else: clusters_to_do = [] self.assign_best_combinations(ct, last_time=True) if self.minimize: report(LOG_BASIC, 'Minimizing hydrogens...\n') self.minimize_hydrogens(ct) report(LOG_BASIC, 'Total score for structure: ' + str(total_score)) return
[docs] def minimize_hydrogens(self, ct): hydrogens = [] for cluster in self.clusters: for changeable in cluster.changeables: if changeable.type in ['ROTATABLE', 'WATER']: changeable_sites = changeable.get_state_sites( ct, changeable.current_state) hydrogens += [i[1] for i in changeable_sites[1]] minimizer = minimize.Minimizer(min_method=minimize.MinCG) minimizer.setStructure(ct) for atom in ct.atom: if int(atom) not in hydrogens: minimizer.addPosFrozen(int(atom)) minimizer.minimize() return
[docs] def restore_zobs(self, ct): num_zobs = ct.property.get('i_pa_num_zobs') if num_zobs is None: return for i in range(num_zobs): atoms = analyze.evaluate_asl(ct, '(atom.i_pa_zob%d = 1)' % i) if len(atoms) == 2: ct.addBond(atoms[0], atoms[1], 0) for iatom in atoms: del ct.atom[iatom].property['i_pa_zob%d' % i] del ct.property['i_pa_num_zobs'] return
[docs] def cleanup(self, ct): self.restore_zobs(ct) atoms_to_delete = analyze.evaluate_asl( ct, "withinbonds 1 (atom.i_pa_xtalatom 1)") if len(atoms_to_delete) == 0: ct.retype() return ct.deleteAtoms(atoms_to_delete) for atom in ct.atom: if 'i_pa_atomindex' in atom.property: del atom.property['i_pa_atomindex'] if 'i_pa_xtalatom' in atom.property: del atom.property['i_pa_xtalatom'] if 'i_pa_surfaceatom' in atom.property: del atom.property['i_pa_surfaceatom'] ct.retype() return
[docs] def summarize_pkas(self): first = True for changeable in self.changeables: if changeable.pka is not None: if first: report(LOG_BASIC, '\nFinal pKas:') first = False report(LOG_BASIC, ' %s : %.2f' % (changeable.name, changeable.pka))
if __name__ == '__main__': if len(sys.argv) != 3: print("Please specify 2 arguments: input and output file names.") sys.stdout.flush() sys.exit(1) inputfile_name = sys.argv[1] outputfile_name = sys.argv[2] try: structure_ct = structure.Structure.read(inputfile_name) except: print("ERROR: unable to open file " + inputfile_name) sys.stdout.flush() sys.exit(1) print("Structure extracted from file: " + inputfile_name) print("Output structure will be written to: " + outputfile_name) sys.stdout.flush() instance = ProtAssign( structure_ct, interactive=False, asl='', noprot_asl='', use_xtal=False, do_flips=True, freeze_existing=False, include_initial=False, max_comb=500000, num_sequential_cycles=30, max_cluster_size=None, add_labels=True, label_pkas=True, pH=pH_neutral, use_propka=True, propka_pH=7.0, user_states=[], minimize=False) structure_ct.write(outputfile_name) print("\nComplete. Output file: %s" % outputfile_name) sys.stdout.flush()