"""
Copyright Schrodinger, LLC. All rights reserved.
"""
from past.utils import old_div
import numpy
from collections import OrderedDict
from scipy import sparse
from scipy.sparse.linalg import spsolve
from scipy.signal import savgol_filter
from scipy.spatial import ConvexHull
from schrodinger.application.matsci import xrd
from schrodinger.infra import mm
from schrodinger.infra import table
from schrodinger.utils import fileutils
from schrodinger.application.matsci.nano.xtal import get_pymatgen_structure
UVVIS = 'UV/Vis'
VIB_SPECTRUM_MIN = 0
VIB_SPECTRUM_MAX = 4000
UV_SPECTRUM_MIN = 13333 # 750 nm
UV_SPECTRUM_MAX = 100000 # 350 nm
GAUSSIAN = 'Gaussian'
LORENTZIAN = 'Lorentzian'
PDP_DEFAULT_JOB_NAME = 'powder_diffraction_pattern'
PDP_FILE_EXT = '.pdp.spm'
POLYORDER = 'polyorder'
WINDOW_WIDTH = 'win_width'
LAMBDA = 'lambda_val'
WTFACTOR = 'wt_factor'
DEFAULT_LAMBDA = 5.0
DEFAULT_WT_FACTOR = 1e-5
[docs]def rubberband_method(x_data, y_data, parameters):
"""
Apply a rubberband to the data
:type x_data: list
:param x_data: list of 2 theta values
:type y_data: list
:param y_data: list of intensity values
:type parameters: SimpleNamespace
:param parameters: all the tunable parameters for the method
:rtype: list
:return: list of intensity values
"""
# Find rough base line curve
vert = ConvexHull(numpy.array(list(zip(x_data, y_data)))).vertices
# Rotate the base line to make it flat
vert = numpy.roll(vert, -vert.argmin())
vert = vert[:vert.argmax()]
# Interpret the y values for the rotate curve for the original x data
return numpy.interp(x_data, x_data[vert], y_data[vert])
[docs]def least_square_method(x_data, y_data, parameters):
"""
Smooth data using least sqaure method
:type x_data: list
:param x_data: list of 2 theta values
:type y_data: list
:param y_data: list of intensity values
:type parameters: SimpleNamespace
:param parameters: all the tunable parameters for the method
:rtype: list
:return: list of intensity values
"""
lamda_val = (10**getattr(parameters, LAMBDA))
wt_factor = getattr(parameters, WTFACTOR)
data_len = len(y_data)
diag_mat = sparse.diags(
[1, -2, 1], [0, -1, -2], shape=(data_len, data_len - 2))
wts = numpy.ones(data_len)
# Iterate multiple times to smooth data
for index in range(10):
spdiags_mat = sparse.spdiags(wts, 0, data_len, data_len)
back_mat = spdiags_mat + lamda_val * diag_mat.dot(diag_mat.transpose())
back_sub = spsolve(back_mat, wts * y_data)
wts = wt_factor * (y_data > back_sub) + (1 - wt_factor) * (
y_data < back_sub)
return back_sub
[docs]def savgol_method(x_data, y_data, parameters):
"""
Apply a Savitzky-Golay filter to the data
:type x_data: list
:param x_data: list of 2 theta values
:type y_data: list
:param y_data: list of intensity values
:type parameters: SimpleNamespace
:param parameters: all the tunable parameters for the method
:rtype: list
:return: list of intensity values
"""
window_size = getattr(parameters, WINDOW_WIDTH)
polyorder = getattr(parameters, POLYORDER)
return savgol_filter(y_data, window_size, polyorder, mode='mirror')
[docs]def convert_nm_and_wavenumber(value):
"""
Convert value in wavenumbers to nanometers or vice versa, the conversion
is the same.
:type value: int or float
:param value: Number to convert
:rtype: float
:return: value converted from wavenumbers to nanometers or vice versa
"""
return old_div(10000000.0, float(value))
[docs]def get_file_data(filename):
"""
Read the data from the file
:type name: str
:param name: The name for the spectrum. If none, it will be derived
from the file name.
:rtype: tuple of (str, str, schrodinger.infra.table.Table)
:return: First two members of the tuple are the s_j_x_lable and s_j_y_label
properties. The third member of the tuple is a Table object from the
schrodinger.infra.table module. This object holds the actual data.
"""
# Grab the X and Y axis labels
mm.m2io_initialize(mm.error_handler)
file_handle = mm.m2io_open_file(filename, mm.M2IO_READ_FORWARD)
mm.m2io_goto_next_block(file_handle, 'f_m_table')
xaxis_prop = mm.m2io_get_string(file_handle, ['s_j_x_label'])[0]
intensity_prop = mm.m2io_get_string(file_handle, ['s_j_y_label'])[0]
mm.m2io_close_file(file_handle)
mytable = table.Table(filename)
return xaxis_prop, intensity_prop, mytable
[docs]def generate_curve(mytable,
line_width,
xprop,
intensity,
x_scale=1.0,
uvvis=False,
smin=None,
smax=None,
stride=None,
function=LORENTZIAN,
line_width_nm=False):
"""
Generate a full curve for a series of frequency/intensity lines. The
curve is generated by broadening the lines with Gaussian or Lorentzian
curves centered on each line, and summing the curves together.
:type mytable: table.Table object
:param mytable: table of raw data
:type line_width: float
:param line_width: the half-bandwidth to use when generating spectrum
:type x_scale: float
:param x_scale: the x-axis scale factor
:type intensity: str
:param intensity: The label of the intensity property in mytable
:type xprop: str
:param xprop: The label of the x-axis property in mytable
:type uvvis: bool
:param uvvis: True if this is a uv/vis spectrum, False if not (default
is False, vibrational spectrum)
:type smin: int
:param smin: The minimum x-value in wavenumbers.
:type smax: int
:param smax: The maximum x-value in wavenumbers.
:type stride: int
:param stride: Compute the intensity every stride values of x
:type function: str
:param function: The function used to broaden singular intensity values
to a full spectrum curve. Default is Lorentzian, other option is
Gaussian. Use the LORENTZIAN or GAUSSIAN module constants.
:type line_width_nm: bool
:param line_width_nm: Linewidth is given in nm. Default is
wavenumbers. Not used for non-UV/Vis spectra.
:rtype: tuple of 1-D numpy arrays
:return: (xvalues, yvalues) with xvalues running from smin to smax with Xn =
X(n-1) + stride. Note that the X unit will be wavenumbers for all spectra.
"""
if uvvis:
if not stride:
stride = 10
if not smin:
smin = UV_SPECTRUM_MIN
if not smax:
smax = UV_SPECTRUM_MAX
else:
if not stride:
stride = 5
if not smin:
smin = VIB_SPECTRUM_MIN
if not smax:
smax = VIB_SPECTRUM_MAX
xvals = numpy.arange(smin, smax, stride)
yvals = numpy.zeros(len(xvals))
half_original_linewidth = line_width * 0.5
# Simulate the spectrum:
is_gaussian = function == GAUSSIAN
for irow in range(1, len(mytable) + 1):
pos = mytable[irow][xprop] * x_scale
if uvvis:
pos = old_div(pos, 0.00012398)
if line_width_nm:
# We compute the spectrum in wavenumber space, but a
# bandwidth in NM is not linear (or constant) in this space
# Figure out what the bandwidth should be by converting the
# peak to nanometers, compute the long and short sides of
# the bandwidth, then convert those back to wavenumbers and
# take half the difference.
nm_pos = convert_nm_and_wavenumber(pos)
nm_short = nm_pos - half_original_linewidth
nm_long = nm_pos + half_original_linewidth
wave_short = convert_nm_and_wavenumber(nm_short)
wave_long = convert_nm_and_wavenumber(nm_long)
line_width = old_div((wave_short - wave_long), 2.0)
try:
height = mytable[irow][intensity]
except KeyError:
return None, None
if line_width == 0:
# No line-broadening
# The formula below finds the index of the closest x value
spot = int(round(old_div((pos - xvals[0]), float(stride))))
if spot >= 0:
try:
yvals[spot] = height
except IndexError:
pass
elif is_gaussian:
yvals += height * numpy.exp(
old_div(-(xvals - pos)**2, (2 * line_width**2)))
else:
# Lorentzian:
yvals += old_div(height, (1 + (old_div(
(xvals - pos), line_width))**2))
return xvals, yvals
[docs]class PowderDiffractionException(Exception):
pass
[docs]def get_powder_diffraction_pattern(st,
wave_length=None,
debye_waller_factors=None,
two_theta_range=(0, 90),
compute_intensities=True):
"""
Get a pymatgen powder diffraction pattern.
:type st: `schrodinger.structure.Structure`
:param st: the structure
:type wave_length: float
:param wave_length: the wave length in Ang.
:type debye_waller_factors: dict
:param debye_waller_factors: the temperature dependent Debye-Waller factors,
keys are elemental symbols, values are factors
in Ang.^2
:type two_theta_range: tuple or None
:param two_theta_range: (min, max) pair tuple specifying the x-axis two theta
range in degrees over which to calculate the powder diffraction pattern
or None if it is to be calculated at all diffracted beams within the
limiting sphere of 2 * radius / wave_length
:type compute_intensities: bool
:param compute_intensities: If True, compute peaks intensities
(requires atoms in the structure), otherwise only peak locations
:raise: PowderDiffractionException if there is an issue
:rtype: pymatgen.analysis.diffraction.core.DiffractionPattern
:return: the pymatgen powder diffraction pattern
"""
if compute_intensities:
assert st.atom_total
pymatgen_st = get_pymatgen_structure(st)
if wave_length is None:
wave_length = xrd.WAVELENGTHS['CuKa']
obj = xrd.XRDCalculator(
wavelength=wave_length,
symprec=0,
debye_waller_factors=debye_waller_factors)
# see MATSCI-7468 where the input parameters result in zero peaks
try:
pattern = obj.get_pattern(
pymatgen_st,
scaled=False,
two_theta_range=two_theta_range,
compute_intensities=compute_intensities)
except ValueError as err:
raise PowderDiffractionException(str(err))
return pattern
[docs]class SpectrumFile(object):
"""
Manage a spectrum file.
"""
# the following need to be defined in subclasses
HEADERS = OrderedDict()
COLUMNS = OrderedDict()
[docs] def __init__(self, data_file_name, spm_file_name, spectrum_data=None):
"""
Create an instance.
:type data_file_name: str
:param data_file_name: the text file containing the data
:type spm_file_name: str
:param spm_file_name: the name of the *spm file to create
:type spectrum_data: list of lists
:param spectrum_data: Spectrum data. If provided, used to fill table/write file. Otherwise, data obtained from data_file_name
"""
self.data_file_name = data_file_name
self.spm_file_name = spm_file_name
self.table_obj = None
self.spectrum_data = spectrum_data
[docs] @staticmethod
def getData(data_file_name, separator=None, types=None):
"""
Return the data from the given data file.
:type data_file_name: str
:param data_file_name: the text file containing the data
:type separator: str
:param separator: the data separator
:type types: list
:param types: a list of types used to type cast the data
:rtype: list
:return: contains data tuples
"""
with open(data_file_name, 'r') as afile:
lines = afile.readlines()
data = []
for aline in lines:
values = []
for idx, value in enumerate(aline.split(separator)):
if types:
value = types[idx](value)
else:
value = float(value)
values.append(value)
data.append(tuple(values))
return data
def _setUp(self):
"""
Set up.
"""
mm.mmtable_initialize(mm.error_handler)
mm.mmerr_level(mm.error_handler, mm.MMERR_OFF)
handle = mm.mmtable_new()
self.table_obj = table.Table(table_handle=handle)
def _buildTable(self):
"""
Build the table. If spectrum_data provided in init(), fill table with that data. Otherwise, read data from self.data_file_name.
"""
# emulate the *spm files obtained by running Jaguar
for key, value in self.HEADERS.items():
mm.mmtable_set_string(self.table_obj, key, value)
for acol, aname in self.COLUMNS.items():
self.table_obj.insertColumn(mm.MMTABLE_END, acol)
idx = self.table_obj.getColumnIndex(acol)
self.table_obj.columnSetName(idx, aname)
#grab data for filling table
if self.spectrum_data:
data = self.spectrum_data
else:
data = self.getData(self.data_file_name)
# table columns and rows are indexed from 1
for idx, values in enumerate(data, 1):
self.table_obj.insertRow(mm.MMTABLE_END, 0)
for jdx, value in enumerate(values, 1):
self.table_obj[idx][jdx] = value
def _createFile(self):
"""
Create the *spm file.
"""
mm.mmtable_m2io_write(self.spm_file_name, self.table_obj, True)
def _tearDown(self):
"""
Tear down.
"""
mm.mmerr_level(mm.error_handler, mm.MMERR_WARNING)
mm.mmtable_terminate()
[docs] def write(self):
"""
Write the *spm file.
"""
self._setUp()
self._buildTable()
self._createFile()
self._tearDown()
[docs]class PowderDiffractionFile(SpectrumFile):
"""
Manage a powder diffraction pattern file.
"""
TWO_THETA_KEY = 'r_matsci_Two_Theta_(degrees)'
TWO_THETA_TITLE = '2*Theta/deg.'
INTENSITY_KEY = 'r_matsci_Intensity'
INTENSITY_TITLE = 'Intensity'
HKLS_KEY = 's_matsci_HKLs'
HKLS_TITLE = 'HKLs'
MULTIPLICITIES_KEY = 's_matsci_HKL_Multiplicities'
MULTIPLICITIES_TITLE = 'Mults'
INTERPLANAR_SPACING_KEY = 'r_matsci_Interplanar_Spacing_(Ang.)'
INTERPLANAR_SPACING_TITLE = 'd_HKL/Ang.'
# use _j_ properties so that spectrum plot automatically
# recognizes the file
SPECTRUM_KEY = 's_j_spectrum_type'
SPECTRUM_TITLE = 'Powder Diffraction Pattern'
X_KEY = 's_j_x_label'
X_ALIAS = TWO_THETA_KEY
Y_KEY = 's_j_y_label'
Y_ALIAS = INTENSITY_KEY
HEADERS = OrderedDict([
(SPECTRUM_KEY, SPECTRUM_TITLE),
(X_KEY, X_ALIAS),
(Y_KEY, Y_ALIAS)
]) # yapf: disable
COLUMNS = OrderedDict([
(TWO_THETA_KEY, TWO_THETA_TITLE),
(INTENSITY_KEY, INTENSITY_TITLE),
(HKLS_KEY, HKLS_TITLE),
(MULTIPLICITIES_KEY, MULTIPLICITIES_TITLE),
(INTERPLANAR_SPACING_KEY, INTERPLANAR_SPACING_TITLE)
]) # yapf: disable
SEPARATOR = '; '
# column types
TYPES = [float, float, str, str, float]
[docs] @staticmethod
def getData(data_file_name, separator=None, types=None):
"""
Return the data from the given data file.
:type data_file_name: str
:param data_file_name: the text file containing the data
:type separator: str
:param separator: the data separator
:type types: list
:param types: a list of types used to type cast the data
:rtype: list
:return: contains data tuples
"""
separator = PowderDiffractionFile.SEPARATOR
types = PowderDiffractionFile.TYPES
return SpectrumFile.getData(
data_file_name, separator=separator, types=types)
[docs]def write_powder_diffraction_pattern(st,
wave_length=None,
debye_waller_factors=None,
two_theta_range=(0, 90),
file_name=None):
"""
Write a powder diffraction pattern file.
:type st: `schrodinger.structure.Structure`
:param st: the structure
:type wave_length: float
:param wave_length: the wave length in Ang.
:type debye_waller_factors: dict
:param debye_waller_factors: the temperature dependent Debye-Waller factors,
keys are elemental symbols, values are factors
in Ang.^2
:type two_theta_range: tuple or None
:param two_theta_range: (min, max) pair tuple specifying the x-axis two theta
range in degrees over which to calculate the powder diffraction pattern
or None if it is to be calculated at all diffracted beams within the
limiting sphere of 2 * radius / wave_length
:type file_name: str
:param file_name: the file name to which the pattern will be written
:rtype: str
:return: the file name to which the pattern was written
"""
pdp_obj = get_powder_diffraction_pattern(
st,
wave_length=wave_length,
debye_waller_factors=debye_waller_factors,
two_theta_range=two_theta_range)
data = zip(pdp_obj.x, pdp_obj.y, pdp_obj.hkls, pdp_obj.d_hkls)
if not file_name:
file_name = PDP_DEFAULT_JOB_NAME + PDP_FILE_EXT
line = ['{two_theta}', '{intensity}', '{hkls}', '{mults}', '{d_hkl}\n']
line = PowderDiffractionFile.SEPARATOR.join(line)
with fileutils.tempfilename(prefix='data', suffix='.txt') as tmp_path:
with open(tmp_path, 'w') as afile:
for two_theta, intensity, hkl_dicts, d_hkl in data:
hkls, mults = [], []
for hkl_dict in hkl_dicts:
# the following 'hkl' and 'multiplicity' keys are
# from pymatgen
hkls.append(str(hkl_dict['hkl']))
mults.append(str(hkl_dict['multiplicity']))
hkls, mults = ', '.join(hkls), ', '.join(mults)
afile.write(
line.format(
two_theta=two_theta,
intensity=intensity,
hkls=hkls,
mults=mults,
d_hkl=d_hkl))
obj = PowderDiffractionFile(tmp_path, file_name)
obj.write()
return file_name
[docs]class VCD_Spectrum(SpectrumFile):
"""
Manage a VCD (Vibrational Circular Dichroism) file.
Note that this class expects it's data to be provided upon initialization via SpectrumFile's init() fxn's spectrum_data argument,
as opposed to filled after initialization via reading a file.
In the future, if we want to initialize via reading a file, we'll have to implement a getData() as in PowderDiffractionFile.
"""
FREQ_KEY = 'r_j_Frequency_(cm-1)'
FREQ_TITLE = 'Frequency (cm-1)'
ROT_STR_KEY = 'r_j_Rotational_Strength_(10**-40_esu**2_cm**2)'
ROT_STR_TITLE = 'Rotational Strength (10**-40 esu**2 cm**2)'
# use _j_ properties so that spectrum plot automatically
# recognizes the file
SPECTRUM_KEY = 's_j_spectrum_type'
SPECTRUM_TITLE = 'Vibrational Circular Dichroism'
X_KEY = 's_j_x_label'
X_ALIAS = FREQ_KEY
Y_KEY = 's_j_y_label'
Y_ALIAS = ROT_STR_KEY
HEADERS = OrderedDict([
(SPECTRUM_KEY, SPECTRUM_TITLE),
(X_KEY, X_ALIAS),
(Y_KEY, Y_ALIAS)
]) # yapf: disable
COLUMNS = OrderedDict([
(FREQ_KEY, FREQ_TITLE),
(ROT_STR_KEY, ROT_STR_TITLE),
('s_j_Symmetry', 'Symmetry'),
('r_j_edtm_x', 'edtm x'),
('r_j_edtm_y', 'edtm y'),
('r_j_edtm_z', 'edtm z'),
('r_j_mdtm_x', 'mdtm x'),
('r_j_mdtm_y', 'mdtm y'),
('r_j_mdtm_z', 'mdtm z'),
]) # yapf: disable
# column types
TYPES = [float, float, str, float, float, float, float, float, float]
[docs]class ECD_Spectrum(SpectrumFile):
"""
Manage a ECD (Electronic Circular Dichroism) file.
Note that this class expects it's data to be provided upon initialization via SpectrumFile's init() fxn's spectrum_data argument,
as opposed to filled after initialization via reading a file.
In the future, if we want to initialize via reading a file, we'll have to implement a getData() as in PowderDiffractionFile.
"""
ENERGY_KEY = 'r_j_Electronic_Circular_Dichroism_Energy_(eV)'
ENERGY_TITLE = 'ECD Energy (eV)'
INTENSITY_KEY = 'r_j_Molar_Circular_Dichroism_(L_mol-1_cm-1)'
INTENSITY_TITLE = 'Molar Circular Dichroism (L mol-1 cm-1)'
# use _j_ properties so that spectrum plot automatically
# recognizes the file
SPECTRUM_KEY = 's_j_spectrum_type'
SPECTRUM_TITLE = 'Electronic Circular Dichroism'
X_KEY = 's_j_x_label'
X_ALIAS = ENERGY_KEY
Y_KEY = 's_j_y_label'
Y_ALIAS = INTENSITY_KEY
HEADERS = OrderedDict([
(SPECTRUM_KEY, SPECTRUM_TITLE),
(X_KEY, X_ALIAS),
(Y_KEY, Y_ALIAS)
]) # yapf: disable
COLUMNS = OrderedDict([
(ENERGY_KEY, ENERGY_TITLE),
(INTENSITY_KEY, INTENSITY_TITLE),
('s_j_Symmetry', 'Symmetry'),
]) # yapf: disable
# column types
TYPES = [float, float, str]
[docs]class IR_Spectrum(SpectrumFile):
"""
Manage an IR (Infrared/Vibrational) file.
Note that this class expects it's data to be provided upon initialization via SpectrumFile's init() fxn's spectrum_data argument,
as opposed to filled after initialization via reading a file.
In the future, if we want to initialize via reading a file, we'll have to implement a getData() as in PowderDiffractionFile.
"""
FREQ_KEY = 'r_j_Frequency_(cm-1)'
FREQ_TITLE = 'Frequency (cm-1)'
INTENSITY_KEY = 'r_j_Intensity_(km/mol)'
INTENSITY_TITLE = 'Intensity (km/mol)'
# use _j_ properties so that spectrum plot automatically
# recognizes the file
SPECTRUM_KEY = 's_j_spectrum_type'
SPECTRUM_TITLE = 'Infrared Vibrational Frequencies'
X_KEY = 's_j_x_label'
X_ALIAS = FREQ_KEY
Y_KEY = 's_j_y_label'
Y_ALIAS = INTENSITY_KEY
HEADERS = OrderedDict([
(SPECTRUM_KEY, SPECTRUM_TITLE),
(X_KEY, X_ALIAS),
(Y_KEY, Y_ALIAS)
]) # yapf: disable
COLUMNS = OrderedDict([
(FREQ_KEY, FREQ_TITLE),
(INTENSITY_KEY, INTENSITY_TITLE),
('s_j_Symmetry', 'Symmetry'),
]) # yapf: disable
# column types
TYPES = [float, float, str]