Source code for schrodinger.application.prime.wrapper

"""
Classes to assist in preparing prime input files
"""
import os

from schrodinger import structure
from schrodinger.application.inputconfig import InputConfig
from schrodinger.structutils import analyze

ALL_LIGANDS = "all"
NO_LIGANDS = "none"


class PrimeConfig(InputConfig):
    """
    Class for reading and writing simplified Prime homology modeling
    input files.
    """

    specs = [
        "JOB_TYPE                   = string(default='MODEL')",
        "DIR_NAME                   = string(default='.')",
        "QUERY_OFFSET               = integer(default=0)",
        "TAILS                      = integer(default=0)",
        "SIDE_OPT                   = string(default='false')",
        "MINIMIZE                   = string(default='true')",
        "BUILD_INSERTIONS           = string(default='true')",
        "MAX_INSERTION_SIZE         = integer(default=20)",
        "BUILD_TRANSITIONS          = string(default='true')",
        "BUILD_DELETIONS            = string(default='true')",
        "KEEP_ROTAMERS              = string(default='true')",
        "KNOWLEDGE_BASED            = string(default='true')",
        "NUM_OUTPUT_STRUCT          = integer(default=1)",
        "TEMPLATE_RESIDUE_NUMBERS   = string(default='false')"
    ]

    def __init__(self, kw=None):
        """
        Accepts one argument which is either a path or a keyword dictionary.
        """
        InputConfig.__init__(self, kw, self.specs)
        self.validateValues(preserve_errors=False, copy=True)

    def setAlignment(self,
                     alignment,
                     ligands=(ALL_LIGANDS,),
                     include_water=False,
                     template_number=1):
        """
        :type alignment: `BasePrimeAlignment`
        :param alignment: Alignment.

        :type ligands: tuple of str
        :param ligands: Names of ligands to be included in the model.

        :type include_water: bool
        :param include_water: Whether to include water molecules in the built
            model.

        :type template_number: int
        :param template_number: Template number
        """
        template = alignment.template

        short_name = basename_no_ext(template.name)
        full_template_name = short_name + '_' + template.chain
        self['TEMPLATE_NAME'] = full_template_name
        self['%s_NUMBER' % full_template_name] = template_number
        self['%s_STRUCT_FILE' % full_template_name] = template.file_name
        self['%s_ALIGN_FILE' % full_template_name] = '%s.aln' % alignment.name

        # Save template ligands.
        # Currently, this assumes every ligand consists of a single residue.
        index = 0
        if ligands and NO_LIGANDS not in ligands:
            for lig in template.ligands:
                if ALL_LIGANDS in ligands or lig.pdbres in ligands:
                    het_name = '%s_HETERO_%d' % (full_template_name, index)
                    lig_id = 'LIG%s%c:%d' % (lig.pdbres, template.chain,
                                             lig.st.atom[1].resnum)
                    self[het_name] = lig_id
                    index += 1

        # Save water molecules.
        if include_water:
            for water in template.waters:
                het_name = '%s_HETERO_%d' % (full_template_name, index)
                atom = water.atom[1]
                lig_id = 'LIG%s%c:%d' % (atom.pdbres, template.chain,
                                         atom.resnum)
                self[het_name] = lig_id
                index += 1

        self.validateValues(preserve_errors=False, copy=False)

    def writeConfig(self, filename):
        """
        Writes a simplified input file to filename.

        This input file needs to be run via `$SCHRODINGER/prime`.

        :type filename: str
        :param filename: Name of output file.
        """
        if not filename.endswith('.inp'):
            raise ValueError("Prime input file should end with *.inp")

        self.writeInputFile(filename, ignore_none=True, yesno=False)
        return filename

    def _quote(self, value, multiline=True):
        """
        Overwrite 'InputConfig._quote' to not quote any values
        """
        if str(value).startswith('LIG'):
            return value[3:]
        return value


class ConsensusConfig(InputConfig):
    """
    Class for reading and writing simplified Prime consensus modeling
    input files.
    """
    specs = [
        "ALIGN_FILE                 = string()"
        "SC_OPT                     = string(default='yes')"
    ]

    def __init__(self, kw=None):
        """
        Accepts one argument which is either a path or a keyword dictionary.
        """

        InputConfig.__init__(self, kw, self.specs)

    def setAlignment(self, alignment):
        """
        """
        self['ALIGN_FILE'] = alignment.name + '.aln'
        self['SC_OPT'] = 'yes'

        self.validateValues(preserve_errors=False, copy=False)

    def writeConfig(self, filename):
        """
        Writes a simplified input file to filename.

        This input file needs to be run via `$SCHRODINGER/consensus_homology`.

        :type filename: str
        :param filename: Name of output file.
        """
        if not filename.endswith('.inp'):
            raise ValueError(
                "Consensus homology input file should end with *.inp")
        self.writeInputFile(filename, ignore_none=True, yesno=False)
        return filename


class _WorkingDirectoryMixin:

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._working_directory = None

    def setWorkingDirectory(self, wd):
        """
        Set directory used to write files

        :param wd: Path of the working directory
        :type wd: str
        """
        self._working_directory = wd

    def prependWorkingDirectory(self, path):
        """
        If it's set, prepend the working directory to the given path.

        :param path: Path to modify
        :type path: str

        :return: Modified path
        :rtype: str
        """
        if self._working_directory is not None:
            path = os.path.join(self._working_directory, path)
        return path


class PrimeTemplate(_WorkingDirectoryMixin):
    """
    This class encapsulates Prime template structure. A template includes
    a single chain.
    """

    WATER_NAMES = {'HOH', 'WAT'}
    FIND_LIGANDS_KWARGS = {
        'allow_ion_only_molecules': True,
        'min_heavy_atom_count': 1,
        'excluded_residue_names': WATER_NAMES,
    }

    def __init__(self, pdb_id=''):
        super().__init__()
        self._st = None
        self._ligands = []
        self._waters = []
        self._file_name = pdb_id + '.pdb'
        self.sequence = ''
        self.chain = ''
        self.name = pdb_id

    @property
    def file_name(self):
        return self._file_name

    @property
    def st(self):
        return self._st

    @st.setter
    def st(self, st):
        self._st = st
        self.chain = st.atom[1].chain.strip()
        self._file_name = ''.join(
            [basename_no_ext(self.name), '_', self.chain, '.pdb'])

    @property
    def ligands(self):
        """
        Returns a list of ligands found in the template structure.

        :rtype: list of `schrodinger.structuils.analyze.Ligand` objects
        :return: List of ligands in current template structure.
        """
        if not self._ligands:
            self._ligands = analyze.find_ligands(self._st,
                                                 **self.FIND_LIGANDS_KWARGS)

        return self._ligands

    @property
    def waters(self):
        if not self._waters:
            self._waters = self._getWaters()

        return self._waters

    def _getWaters(self):
        """
        Returns a list of `schrodinger.structure._Molecule` objects
        corresponding to water molecules present in the template structure.

        :rtype: list of `schrodinger.structure._Molecule`
        :return: List of water molecules in the template.
        """
        waters = []
        for mol in self._st.molecule:
            if mol.atom[1].pdbres[:3] in self.WATER_NAMES:
                waters.append(mol)

        return waters

    def writePDB(self):
        """
        Writes template structure to PDB file.

        :rtype: bool
        :return: True on successful write.
        """
        file_name = self.prependWorkingDirectory(self._file_name)
        try:
            self._st.write(file_name, format=structure.PDB)
        except IOError:
            print('Cannot write template structure ', file_name)
            return False

        return True


class BasePrimeAlignment(_WorkingDirectoryMixin):
    """
    Encapsulates pairwise query-template alignment. Allows writing alignment
    files in Prime format.
    """

    def __init__(self, name, query_seq):
        """
        :type name: str
        :param name: Alignment name.

        :type query_seq: str
        :param query_seq: Text of prealigned query sequence.
        """
        super().__init__()
        self._name = name

        self._query_sequence = query_seq
        self._query_sequence = convert_gaps_to_dots(self.query_sequence)
        self._query_sequence = self.query_sequence.upper()

        self._template = None
        self._template_sequence = ''

    @property
    def name(self):
        return self._name

    @property
    def query_sequence(self):
        return self._query_sequence

    @property
    def template(self):
        return self._template

    @property
    def template_sequence(self):
        return self._template_sequence

    def setTemplate(self, template, sequence=None):
        """
        Adds a pre-aligned template sequence to the alignment.

        :type template: `PrimeTemplate`
        :param template: Template.

        :type sequence: str
        :param sequence: Template sequence aligned to query (i.e. including gaps).
        """
        self._template = template
        self._template_sequence = sequence if sequence else template.sequence

    def setWorkingDirectory(self, wd):
        """
        Propagate change in working directory to template
        """
        super().setWorkingDirectory(wd)
        if self._template is not None:
            self._template.setWorkingDirectory(wd)

    def getAlnFilename(self):
        """
        Get the filename of the aln written by `writeInputForPrime`.
        """
        return self.prependWorkingDirectory(f"{self.name}.aln")

    def writeInputForPrime(self):
        """
        Writes Prime alignment to external file.
        """
        file_name = self.getAlnFilename()
        try:
            out_file = open(file_name, "w")
        except IOError:
            return False

        out_file.write(
            "ProbeAA:" + convert_gaps_to_dots(self._query_sequence) + "\n")
        out_file.write(
            "Fold AA:" + convert_gaps_to_dots(self._template_sequence) + "\n")
        out_file.close()

        self._template.writePDB()

        return True

    def writeInputForConsensus(self, file_name, append=False):
        """
        Writes out FASTA alignment for consensus homology modeling.
        The alignment needs to include chain name and secondary structure
        definition for each template sequence.

        :type append: bool
        :param append: Whether to append to existing file.

        :type file_name: str
        :param file_name: Name of output file.
        """
        file_name = self.prependWorkingDirectory(file_name)
        try:
            if append:
                out_file = open(file_name, 'a')
            else:
                out_file = open(file_name, 'w')
                out_file.write(">Query\n")
                out_file.write(self.query_sequence.replace('.', '~') + "\n")
        except IOError:
            return False

        chain_id = self._template.st.atom[1].chain
        name = '>' + basename_no_ext(self._template.name) + '_' + chain_id
        # write ssa string
        fasta_name = name + ' ssa,\n'
        out_file.write(fasta_name)
        ssa = convert_gaps_to_tildes(self.getSSA(self._template.st,
                                                 chain_id)).replace(' ', '-')
        out_file.write(ssa + '\n')
        # write sequence string
        fasta_name = name + ' Chain,\n'
        out_file.write(fasta_name)
        out_file.write(convert_gaps_to_tildes(self._template_sequence) + '\n')
        # write template structure
        self._template.writePDB()

        out_file.close()

        return True

    def getSSA(self, st, chain_id=''):
        """
        Gets a secondary structure annotation string for structure st
        chain chain_id.

        :type st: `schrodinger.structure.Structure`
        :param st: Input structure.

        :type chain_id: str
        :param chain_id: Chain ID to extract SSA from.
        """
        secondary_types = {
            structure.SS_NONE: ' ',
            structure.SS_HELIX: 'H',
            structure.SS_STRAND: 'E',
            structure.SS_LOOP: ' ',
            structure.SS_TURN: ' '
        }

        ssa = []
        for chain in st.chain:
            if chain.name == chain_id:
                prev_res_id = (None, None, None)
                for atom in chain.atom:
                    res_id = (atom.resnum, atom.chain, atom.inscode)
                    if res_id != prev_res_id:
                        ssa.append(secondary_types[atom.secondary_structure])
                        prev_res_id = res_id

        return ''.join(ssa)


def basename_no_ext(file_name):
    """
    Returns file basename without extension.
    """
    return os.path.splitext(os.path.basename(file_name))[0]


def convert_gaps_to_dots(sequence):
    """
    Replaces gap symbols with dots to make a sequence compatible with Prime.

    :type sequence: str
    :param sequence: Input gapped sequence.

    :rtype: str
    :return: Converted sequence.
    """
    return sequence.replace(' ', '.').replace('-', '.').replace('~', '.')


def convert_gaps_to_tildes(sequence):
    """
    Replaces gap symbols with tildes to make a sequence compatible with
    consensus modeling.

    :type sequence: str
    :param sequence: Input gapped sequence.

    :rtype: str
    :return: Converted sequence.
    """
    return sequence.replace('.', '~').replace('-', '~')