Source code for schrodinger.application.matsci.spectra

"""
Copyright Schrodinger, LLC. All rights reserved.
"""

from past.utils import old_div

import numpy
from collections import OrderedDict
from scipy import sparse
from scipy.sparse.linalg import spsolve
from scipy.signal import savgol_filter
from scipy.spatial import ConvexHull

from schrodinger.application.matsci import xrd
from schrodinger.infra import mm
from schrodinger.infra import table
from schrodinger.utils import fileutils
from schrodinger.application.matsci.nano.xtal import get_pymatgen_structure

UVVIS = 'UV/Vis'
VIB_SPECTRUM_MIN = 0
VIB_SPECTRUM_MAX = 4000
UV_SPECTRUM_MIN = 13333  # 750 nm
UV_SPECTRUM_MAX = 100000  # 350 nm
GAUSSIAN = 'Gaussian'
LORENTZIAN = 'Lorentzian'

PDP_DEFAULT_JOB_NAME = 'powder_diffraction_pattern'
PDP_FILE_EXT = '.pdp.spm'

POLYORDER = 'polyorder'
WINDOW_WIDTH = 'win_width'
LAMBDA = 'lambda_val'
WTFACTOR = 'wt_factor'

DEFAULT_LAMBDA = 5.0
DEFAULT_WT_FACTOR = 1e-5


[docs]def rubberband_method(x_data, y_data, parameters):
    """
    Apply a rubberband to the data

    :type x_data: list
    :param x_data: list of 2 theta values

    :type y_data: list
    :param y_data: list of intensity values

    :type parameters: SimpleNamespace
    :param parameters: all the tunable parameters for the method

    :rtype: list
    :return: list of intensity values
    """

    # Find rough base line curve
    vert = ConvexHull(numpy.array(list(zip(x_data, y_data)))).vertices

    # Rotate the base line to make it flat
    vert = numpy.roll(vert, -vert.argmin())
    vert = vert[:vert.argmax()]

    # Interpret the y values for the rotate curve for the original x data
    return numpy.interp(x_data, x_data[vert], y_data[vert])


[docs]def least_square_method(x_data, y_data, parameters):
    """
    Smooth data using least sqaure method

    :type x_data: list
    :param x_data: list of 2 theta values

    :type y_data: list
    :param y_data: list of intensity values

    :type parameters: SimpleNamespace
    :param parameters: all the tunable parameters for the method

    :rtype: list
    :return: list of intensity values
    """

    lamda_val = (10**getattr(parameters, LAMBDA))
    wt_factor = getattr(parameters, WTFACTOR)
    data_len = len(y_data)
    diag_mat = sparse.diags(
        [1, -2, 1], [0, -1, -2], shape=(data_len, data_len - 2))
    wts = numpy.ones(data_len)
    # Iterate multiple times to smooth data
    for index in range(10):
        spdiags_mat = sparse.spdiags(wts, 0, data_len, data_len)
        back_mat = spdiags_mat + lamda_val * diag_mat.dot(diag_mat.transpose())
        back_sub = spsolve(back_mat, wts * y_data)
        wts = wt_factor * (y_data > back_sub) + (1 - wt_factor) * (
            y_data < back_sub)

    return back_sub


[docs]def savgol_method(x_data, y_data, parameters):
    """
    Apply a Savitzky-Golay filter to the data

    :type x_data: list
    :param x_data: list of 2 theta values

    :type y_data: list
    :param y_data: list of intensity values

    :type parameters: SimpleNamespace
    :param parameters: all the tunable parameters for the method

    :rtype: list
    :return: list of intensity values
    """

    window_size = getattr(parameters, WINDOW_WIDTH)
    polyorder = getattr(parameters, POLYORDER)
    return savgol_filter(y_data, window_size, polyorder, mode='mirror')


[docs]def convert_nm_and_wavenumber(value):
    """
    Convert value in wavenumbers to nanometers or vice versa, the conversion
    is the same.

    :type value: int or float
    :param value: Number to convert

    :rtype: float
    :return: value converted from wavenumbers to nanometers or vice versa
    """

    return old_div(10000000.0, float(value))


[docs]def get_file_data(filename):
    """
    Read the data from the file

    :type name: str
    :param name: The name for the spectrum.  If none, it will be derived
        from the file name.

    :rtype: tuple of (str, str, schrodinger.infra.table.Table)
    :return: First two members of the tuple are the s_j_x_lable and s_j_y_label
        properties.  The third member of the tuple is a Table object from the
        schrodinger.infra.table module.  This object holds the actual data.
    """

    # Grab the X and Y axis labels
    mm.m2io_initialize(mm.error_handler)
    file_handle = mm.m2io_open_file(filename, mm.M2IO_READ_FORWARD)
    mm.m2io_goto_next_block(file_handle, 'f_m_table')
    xaxis_prop = mm.m2io_get_string(file_handle, ['s_j_x_label'])[0]
    intensity_prop = mm.m2io_get_string(file_handle, ['s_j_y_label'])[0]
    mm.m2io_close_file(file_handle)

    mytable = table.Table(filename)
    return xaxis_prop, intensity_prop, mytable


[docs]def generate_curve(mytable,
                   line_width,
                   xprop,
                   intensity,
                   x_scale=1.0,
                   uvvis=False,
                   smin=None,
                   smax=None,
                   stride=None,
                   function=LORENTZIAN,
                   line_width_nm=False):
    """
    Generate a full curve for a series of frequency/intensity lines.  The
    curve is generated by broadening the lines with Gaussian or Lorentzian
    curves centered on each line, and summing the curves together.

    :type mytable: table.Table object
    :param mytable: table of raw data

    :type line_width: float
    :param line_width: the half-bandwidth to use when generating spectrum

    :type x_scale: float
    :param x_scale: the x-axis scale factor

    :type intensity: str
    :param intensity: The label of the intensity property in mytable

    :type xprop: str
    :param xprop: The label of the x-axis property in mytable

    :type uvvis: bool
    :param uvvis: True if this is a uv/vis spectrum, False if not (default
        is False, vibrational spectrum)

    :type smin: int
    :param smin: The minimum x-value in wavenumbers.

    :type smax: int
    :param smax: The maximum x-value in wavenumbers.

    :type stride: int
    :param stride: Compute the intensity every stride values of x

    :type function: str
    :param function: The function used to broaden singular intensity values
        to a full spectrum curve.  Default is Lorentzian, other option is
        Gaussian.  Use the LORENTZIAN or GAUSSIAN module constants.

    :type line_width_nm: bool
    :param line_width_nm: Linewidth is given in nm. Default is
        wavenumbers. Not used for non-UV/Vis spectra.

    :rtype: tuple of 1-D numpy arrays
    :return: (xvalues, yvalues) with xvalues running from smin to smax with Xn =
        X(n-1) + stride.  Note that the X unit will be wavenumbers for all spectra.
    """

    if uvvis:
        if not stride:
            stride = 10
        if not smin:
            smin = UV_SPECTRUM_MIN
        if not smax:
            smax = UV_SPECTRUM_MAX
    else:
        if not stride:
            stride = 5
        if not smin:
            smin = VIB_SPECTRUM_MIN
        if not smax:
            smax = VIB_SPECTRUM_MAX
    xvals = numpy.arange(smin, smax, stride)
    yvals = numpy.zeros(len(xvals))

    half_original_linewidth = line_width * 0.5
    # Simulate the spectrum:
    is_gaussian = function == GAUSSIAN
    for irow in range(1, len(mytable) + 1):
        pos = mytable[irow][xprop] * x_scale
        if uvvis:
            pos = old_div(pos, 0.00012398)
            if line_width_nm:
                # We compute the spectrum in wavenumber space, but a
                # bandwidth in NM is not linear (or constant) in this space
                # Figure out what the bandwidth should be by converting the
                # peak to nanometers, compute the long and short sides of
                # the bandwidth, then convert those back to wavenumbers and
                # take half the difference.
                nm_pos = convert_nm_and_wavenumber(pos)
                nm_short = nm_pos - half_original_linewidth
                nm_long = nm_pos + half_original_linewidth
                wave_short = convert_nm_and_wavenumber(nm_short)
                wave_long = convert_nm_and_wavenumber(nm_long)
                line_width = old_div((wave_short - wave_long), 2.0)
        try:
            height = mytable[irow][intensity]
        except KeyError:
            return None, None

        if line_width == 0:
            # No line-broadening
            # The formula below finds the index of the closest x value
            spot = int(round(old_div((pos - xvals[0]), float(stride))))
            if spot >= 0:
                try:
                    yvals[spot] = height
                except IndexError:
                    pass
        elif is_gaussian:
            yvals += height * numpy.exp(
                old_div(-(xvals - pos)**2, (2 * line_width**2)))
        else:
            # Lorentzian:
            yvals += old_div(height, (1 + (old_div(
                (xvals - pos), line_width))**2))
    return xvals, yvals


[docs]class PowderDiffractionException(Exception):
    pass


[docs]def get_powder_diffraction_pattern(st,
                                   wave_length=None,
                                   debye_waller_factors=None,
                                   two_theta_range=(0, 90),
                                   compute_intensities=True):
    """
    Get a pymatgen powder diffraction pattern.

    :type st: `schrodinger.structure.Structure`
    :param st: the structure

    :type wave_length: float
    :param wave_length: the wave length in Ang.

    :type debye_waller_factors: dict
    :param debye_waller_factors: the temperature dependent Debye-Waller factors,
        keys are elemental symbols, values are factors
        in Ang.^2

    :type two_theta_range: tuple or None
    :param two_theta_range: (min, max) pair tuple specifying the x-axis two theta
        range in degrees over which to calculate the powder diffraction pattern
        or None if it is to be calculated at all diffracted beams within the
        limiting sphere of 2 * radius / wave_length

    :type compute_intensities: bool
    :param compute_intensities: If True, compute peaks intensities
        (requires atoms in the structure), otherwise only peak locations

    :raise: PowderDiffractionException if there is an issue

    :rtype: pymatgen.analysis.diffraction.core.DiffractionPattern
    :return: the pymatgen powder diffraction pattern
    """

    if compute_intensities:
        assert st.atom_total

    pymatgen_st = get_pymatgen_structure(st)

    if wave_length is None:
        wave_length = xrd.WAVELENGTHS['CuKa']

    obj = xrd.XRDCalculator(
        wavelength=wave_length,
        symprec=0,
        debye_waller_factors=debye_waller_factors)

    # see MATSCI-7468 where the input parameters result in zero peaks
    try:
        pattern = obj.get_pattern(
            pymatgen_st,
            scaled=False,
            two_theta_range=two_theta_range,
            compute_intensities=compute_intensities)
    except ValueError as err:
        raise PowderDiffractionException(str(err))

    return pattern


[docs]class SpectrumFile(object):
    """
    Manage a spectrum file.
    """

    # the following need to be defined in subclasses

    HEADERS = OrderedDict()
    COLUMNS = OrderedDict()

[docs]    def __init__(self, data_file_name, spm_file_name, spectrum_data=None):
        """
        Create an instance.

        :type data_file_name: str
        :param data_file_name: the text file containing the data

        :type spm_file_name: str
        :param spm_file_name: the name of the *spm file to create

        :type spectrum_data: list of lists
        :param spectrum_data: Spectrum data. If provided, used to fill table/write file. Otherwise, data obtained from data_file_name
        """

        self.data_file_name = data_file_name
        self.spm_file_name = spm_file_name
        self.table_obj = None
        self.spectrum_data = spectrum_data

[docs]    @staticmethod
    def getData(data_file_name, separator=None, types=None):
        """
        Return the data from the given data file.

        :type data_file_name: str
        :param data_file_name: the text file containing the data

        :type separator: str
        :param separator: the data separator

        :type types: list
        :param types: a list of types used to type cast the data

        :rtype: list
        :return: contains data tuples
        """

        with open(data_file_name, 'r') as afile:
            lines = afile.readlines()
        data = []
        for aline in lines:
            values = []
            for idx, value in enumerate(aline.split(separator)):
                if types:
                    value = types[idx](value)
                else:
                    value = float(value)
                values.append(value)
            data.append(tuple(values))
        return data

    def _setUp(self):
        """
        Set up.
        """

        mm.mmtable_initialize(mm.error_handler)
        mm.mmerr_level(mm.error_handler, mm.MMERR_OFF)
        handle = mm.mmtable_new()
        self.table_obj = table.Table(table_handle=handle)

    def _buildTable(self):
        """
        Build the table. If spectrum_data provided in init(), fill table with that data. Otherwise, read data from self.data_file_name.
        """

        # emulate the *spm files obtained by running Jaguar

        for key, value in self.HEADERS.items():
            mm.mmtable_set_string(self.table_obj, key, value)
        for acol, aname in self.COLUMNS.items():
            self.table_obj.insertColumn(mm.MMTABLE_END, acol)
            idx = self.table_obj.getColumnIndex(acol)
            self.table_obj.columnSetName(idx, aname)

        #grab data for filling table
        if self.spectrum_data:
            data = self.spectrum_data
        else:
            data = self.getData(self.data_file_name)

        # table columns and rows are indexed from 1
        for idx, values in enumerate(data, 1):
            self.table_obj.insertRow(mm.MMTABLE_END, 0)
            for jdx, value in enumerate(values, 1):
                self.table_obj[idx][jdx] = value

    def _createFile(self):
        """
        Create the *spm file.
        """

        mm.mmtable_m2io_write(self.spm_file_name, self.table_obj, True)

    def _tearDown(self):
        """
        Tear down.
        """

        mm.mmerr_level(mm.error_handler, mm.MMERR_WARNING)
        mm.mmtable_terminate()

[docs]    def write(self):
        """
        Write the *spm file.
        """

        self._setUp()
        self._buildTable()
        self._createFile()
        self._tearDown()


[docs]class PowderDiffractionFile(SpectrumFile):
    """
    Manage a powder diffraction pattern file.
    """

    TWO_THETA_KEY = 'r_matsci_Two_Theta_(degrees)'
    TWO_THETA_TITLE = '2*Theta/deg.'
    INTENSITY_KEY = 'r_matsci_Intensity'
    INTENSITY_TITLE = 'Intensity'
    HKLS_KEY = 's_matsci_HKLs'
    HKLS_TITLE = 'HKLs'
    MULTIPLICITIES_KEY = 's_matsci_HKL_Multiplicities'
    MULTIPLICITIES_TITLE = 'Mults'
    INTERPLANAR_SPACING_KEY = 'r_matsci_Interplanar_Spacing_(Ang.)'
    INTERPLANAR_SPACING_TITLE = 'd_HKL/Ang.'

    # use _j_ properties so that spectrum plot automatically
    # recognizes the file
    SPECTRUM_KEY = 's_j_spectrum_type'
    SPECTRUM_TITLE = 'Powder Diffraction Pattern'
    X_KEY = 's_j_x_label'
    X_ALIAS = TWO_THETA_KEY
    Y_KEY = 's_j_y_label'
    Y_ALIAS = INTENSITY_KEY

    HEADERS = OrderedDict([
        (SPECTRUM_KEY, SPECTRUM_TITLE),
        (X_KEY, X_ALIAS),
        (Y_KEY, Y_ALIAS)
    ]) # yapf: disable

    COLUMNS = OrderedDict([
        (TWO_THETA_KEY, TWO_THETA_TITLE),
        (INTENSITY_KEY, INTENSITY_TITLE),
        (HKLS_KEY, HKLS_TITLE),
        (MULTIPLICITIES_KEY, MULTIPLICITIES_TITLE),
        (INTERPLANAR_SPACING_KEY, INTERPLANAR_SPACING_TITLE)
    ]) # yapf: disable

    SEPARATOR = '; '

    # column types
    TYPES = [float, float, str, str, float]

[docs]    @staticmethod
    def getData(data_file_name, separator=None, types=None):
        """
        Return the data from the given data file.

        :type data_file_name: str
        :param data_file_name: the text file containing the data

        :type separator: str
        :param separator: the data separator

        :type types: list
        :param types: a list of types used to type cast the data

        :rtype: list
        :return: contains data tuples
        """

        separator = PowderDiffractionFile.SEPARATOR
        types = PowderDiffractionFile.TYPES

        return SpectrumFile.getData(
            data_file_name, separator=separator, types=types)


[docs]def write_powder_diffraction_pattern(st,
                                     wave_length=None,
                                     debye_waller_factors=None,
                                     two_theta_range=(0, 90),
                                     file_name=None):
    """
    Write a powder diffraction pattern file.

    :type st: `schrodinger.structure.Structure`
    :param st: the structure

    :type wave_length: float
    :param wave_length: the wave length in Ang.

    :type debye_waller_factors: dict
    :param debye_waller_factors: the temperature dependent Debye-Waller factors,
        keys are elemental symbols, values are factors
        in Ang.^2

    :type two_theta_range: tuple or None
    :param two_theta_range: (min, max) pair tuple specifying the x-axis two theta
        range in degrees over which to calculate the powder diffraction pattern
        or None if it is to be calculated at all diffracted beams within the
        limiting sphere of 2 * radius / wave_length

    :type file_name: str
    :param file_name: the file name to which the pattern will be written

    :rtype: str
    :return: the file name to which the pattern was written
    """

    pdp_obj = get_powder_diffraction_pattern(
        st,
        wave_length=wave_length,
        debye_waller_factors=debye_waller_factors,
        two_theta_range=two_theta_range)
    data = zip(pdp_obj.x, pdp_obj.y, pdp_obj.hkls, pdp_obj.d_hkls)

    if not file_name:
        file_name = PDP_DEFAULT_JOB_NAME + PDP_FILE_EXT

    line = ['{two_theta}', '{intensity}', '{hkls}', '{mults}', '{d_hkl}\n']
    line = PowderDiffractionFile.SEPARATOR.join(line)

    with fileutils.tempfilename(prefix='data', suffix='.txt') as tmp_path:
        with open(tmp_path, 'w') as afile:
            for two_theta, intensity, hkl_dicts, d_hkl in data:
                hkls, mults = [], []
                for hkl_dict in hkl_dicts:
                    # the following 'hkl' and 'multiplicity' keys are
                    # from pymatgen
                    hkls.append(str(hkl_dict['hkl']))
                    mults.append(str(hkl_dict['multiplicity']))
                hkls, mults = ', '.join(hkls), ', '.join(mults)
                afile.write(
                    line.format(
                        two_theta=two_theta,
                        intensity=intensity,
                        hkls=hkls,
                        mults=mults,
                        d_hkl=d_hkl))
        obj = PowderDiffractionFile(tmp_path, file_name)
        obj.write()

    return file_name


[docs]class VCD_Spectrum(SpectrumFile):
    """
    Manage a VCD (Vibrational Circular Dichroism) file.

    Note that this class expects it's data to be provided upon initialization via SpectrumFile's init() fxn's spectrum_data argument,
    as opposed to filled after initialization via reading a file.
    
    In the future, if we want to initialize via reading a file, we'll have to implement a getData() as in PowderDiffractionFile.
    """

    FREQ_KEY = 'r_j_Frequency_(cm-1)'
    FREQ_TITLE = 'Frequency (cm-1)'
    ROT_STR_KEY = 'r_j_Rotational_Strength_(10**-40_esu**2_cm**2)'
    ROT_STR_TITLE = 'Rotational Strength (10**-40 esu**2 cm**2)'

    # use _j_ properties so that spectrum plot automatically
    # recognizes the file
    SPECTRUM_KEY = 's_j_spectrum_type'
    SPECTRUM_TITLE = 'Vibrational Circular Dichroism'
    X_KEY = 's_j_x_label'
    X_ALIAS = FREQ_KEY
    Y_KEY = 's_j_y_label'
    Y_ALIAS = ROT_STR_KEY

    HEADERS = OrderedDict([
        (SPECTRUM_KEY, SPECTRUM_TITLE),
        (X_KEY, X_ALIAS),
        (Y_KEY, Y_ALIAS)
    ]) # yapf: disable

    COLUMNS = OrderedDict([
        (FREQ_KEY, FREQ_TITLE),
        (ROT_STR_KEY, ROT_STR_TITLE),
        ('s_j_Symmetry', 'Symmetry'),
        ('r_j_edtm_x', 'edtm x'),
        ('r_j_edtm_y', 'edtm y'),
        ('r_j_edtm_z', 'edtm z'),
        ('r_j_mdtm_x', 'mdtm x'),
        ('r_j_mdtm_y', 'mdtm y'),
        ('r_j_mdtm_z', 'mdtm z'),
    ]) # yapf: disable

    # column types
    TYPES = [float, float, str, float, float, float, float, float, float]


[docs]class ECD_Spectrum(SpectrumFile):
    """
    Manage a ECD (Electronic Circular Dichroism) file.

    Note that this class expects it's data to be provided upon initialization via SpectrumFile's init() fxn's spectrum_data argument,
    as opposed to filled after initialization via reading a file.
    In the future, if we want to initialize via reading a file, we'll have to implement a getData() as in PowderDiffractionFile.
    """

    ENERGY_KEY = 'r_j_Electronic_Circular_Dichroism_Energy_(eV)'
    ENERGY_TITLE = 'ECD Energy (eV)'
    INTENSITY_KEY = 'r_j_Molar_Circular_Dichroism_(L_mol-1_cm-1)'
    INTENSITY_TITLE = 'Molar Circular Dichroism (L mol-1 cm-1)'

    # use _j_ properties so that spectrum plot automatically
    # recognizes the file
    SPECTRUM_KEY = 's_j_spectrum_type'
    SPECTRUM_TITLE = 'Electronic Circular Dichroism'
    X_KEY = 's_j_x_label'
    X_ALIAS = ENERGY_KEY
    Y_KEY = 's_j_y_label'
    Y_ALIAS = INTENSITY_KEY

    HEADERS = OrderedDict([
        (SPECTRUM_KEY, SPECTRUM_TITLE),
        (X_KEY, X_ALIAS),
        (Y_KEY, Y_ALIAS)
    ]) # yapf: disable

    COLUMNS = OrderedDict([
        (ENERGY_KEY, ENERGY_TITLE),
        (INTENSITY_KEY, INTENSITY_TITLE),
        ('s_j_Symmetry', 'Symmetry'),
    ]) # yapf: disable

    # column types
    TYPES = [float, float, str]


[docs]class IR_Spectrum(SpectrumFile):
    """
    Manage an IR (Infrared/Vibrational) file.

    Note that this class expects it's data to be provided upon initialization via SpectrumFile's init() fxn's spectrum_data argument,
    as opposed to filled after initialization via reading a file.
    In the future, if we want to initialize via reading a file, we'll have to implement a getData() as in PowderDiffractionFile.
    """

    FREQ_KEY = 'r_j_Frequency_(cm-1)'
    FREQ_TITLE = 'Frequency (cm-1)'
    INTENSITY_KEY = 'r_j_Intensity_(km/mol)'
    INTENSITY_TITLE = 'Intensity (km/mol)'

    # use _j_ properties so that spectrum plot automatically
    # recognizes the file
    SPECTRUM_KEY = 's_j_spectrum_type'
    SPECTRUM_TITLE = 'Infrared Vibrational Frequencies'
    X_KEY = 's_j_x_label'
    X_ALIAS = FREQ_KEY
    Y_KEY = 's_j_y_label'
    Y_ALIAS = INTENSITY_KEY

    HEADERS = OrderedDict([
        (SPECTRUM_KEY, SPECTRUM_TITLE),
        (X_KEY, X_ALIAS),
        (Y_KEY, Y_ALIAS)
    ]) # yapf: disable

    COLUMNS = OrderedDict([
        (FREQ_KEY, FREQ_TITLE),
        (INTENSITY_KEY, INTENSITY_TITLE),
        ('s_j_Symmetry', 'Symmetry'),
    ]) # yapf: disable

    # column types
    TYPES = [float, float, str]