Source code for schrodinger.application.jaguar.textparser

"""
Classes for parsing Jaguar output files and accessing output properties
programmatically.

Copyright Schrodinger, LLC. All rights reserved.

"""

# Automatically have all classes inherit from object.

import datetime
import locale
import logging
import re
import time
from past.utils import old_div

import schrodinger.infra.mm as mm
import schrodinger.structure as structure
import schrodinger.utils.log
from schrodinger.application.jaguar import constants
from schrodinger.application.jaguar import jaguar_diff
from schrodinger.application.jaguar import results
from schrodinger.application.jaguar.mmjag import MmJag
from schrodinger.application.jaguar.scan import Scan
from schrodinger.job.jobcontrol import jobid_re

blank_line_re = re.compile(r"^\s*$")
zvar_tags_re = re.compile(r"[\*#]")
input_geometry_re = re.compile(r"^\s+Input geometry:")

_log = logging.getLogger("schrodinger.application.jaguar.output")
schrodinger.utils.log.default_logging_config()

###
# .out text file parsing
#

#
# Set textparser_debug to True to have the textparser_trace decorator wrap
# the callback functions.
#
textparser_debug = False

#-----------------------------------------------------------------------------


class JaguarParseError(Exception):
    pass


def textparser_trace(func):
    """
    A decorator that will print the callback function name when it is called.

    """

    def wrapper(tp, jo, m, it):
        if (m.groups()):
            print("Calling textparser function %s (match groups = %s)..." % \
                (func.__name__, m.groups()))
        else:
            print("Calling textparser function %s..." % (func.__name__,))
        return func(tp, jo, m, it)

    return wrapper


class TextParser(object):
    """
    A parser to create a JaguarOutput object from a Jaguar output file.

    The basic organization of this parser is that of a number of line
    processing callback functions triggered by regular expressions.

    """
    # A dictionary to hold functions, indexed by regular expressions.
    callback = {}
    first_line_re = re.compile(r"^Job .+ started on (\S+) at (\w.*)$")

    def __init__(self, jaguar_output, file_iter=None):
        """
        Parameters

        file_iter (iterator returning lines of Jaguar output file)

        jaguar_output (JaguarOutput instance)

        """
        self.irc_direction = None
        self.on_last_geopt_step = False
        self.file_iter = file_iter
        self.jag_out = jaguar_output
        # Useful for nops_opt_switch and NOFAIL restarts
        self.recomputing_energy = False
        self.nofail_geopt = None

        # atom_list and mmjag_handle are for storing a geometry temporarily.
        # The geometry specification of a geometry step comes just before
        # the end of a geometry step. Rather than mess with the definitive
        # end of a geopt step, I'll just store things on the side for a bit.
        self.atom_list = []
        self.mmjag_handle = None

        # The irc_summary is
        self.irc_summary = []

        self.general_search_items = list(self.callback[None].items())
        self.search_items = list(
            self.callback["before pre"].items()) + self.general_search_items

    def _newResults(self, jo):
        """
        Add a new results object for a new geometry or scan step.

        Caller is responsible for storing jo._results before calling this
        if it needs to be kept. For example, storing results from a geometry
        in a geopt sequence of results.

        """

        # See JAGUAR-7003
        if self.nofail_geopt:
            jo.last_results = jo.geopt_step[self.nofail_geopt - 1]
            self.nofail_geopt = None
        else:
            jo.last_results = jo._results

        # Initialize new JaguarResults instance
        jo._results = results.JaguarResults()
        if self.atom_list:
            jo._results.atom = self.atom_list
            self.atom_list = []
            jo._results._mmjag_handle = self.mmjag_handle
            self.mmjag_handle = None

        else:
            # Initial gas phase geometry doesn't get printed out, so use the
            # last_results mmjag_handle and atom names.
            jo._results.atom = [
                results.JaguarAtomicResults(a.index, a.atom_name)
                for a in jo.last_results.atom
            ]
            if jo.last_results._mmjag_handle is not None:
                jo._results._mmjag_handle = jo.last_results._mmjag_handle

    def endGeopt(self, jo):
        """
        Clean up at the end of a geopt step.

        Adds the current results to the geopt list and creates a new current
        results object if appropriate.

        """
        jo.geopt_step.append(jo._results)

        # Allows us to store the gas phase part of a solution phase optimization
        if jo.opts.solvation:
            if jo._results.solution_phase_energy:
                jo.solution_phase_geopt_step.append(jo._results)
            else:
                jo.gas_phase_geopt_step.append(jo._results)

        # Many property calculations take place after the last geopt step
        # and need to be added to the same JaguarResults instance.
        create_new_results = not self.on_last_geopt_step

        # Special cases for making a new JaguarResults object
        if jo.opts.solvation and jo._results.solution_phase_energy is None:
            create_new_results = True
        if self.irc_direction is not None:
            create_new_results = True
        if self.recomputing_energy:
            self.recomputing_energy = False
            create_new_results = True

        # Create a new JaguarResults object if appropriate.
        if create_new_results:
            self._newResults(jo)
        else:
            jo.last_results = jo._results

        # Reset the last geopt step indicator.
        self.on_last_geopt_step = False

        if self.irc_direction == 'Forward' or self.irc_direction == 'Downhill':
            if len(jo.irc_geopt_step) == 0:
                # If this is the first forward step, break out the first
                # geometry as a separate irc_geopt_step element to be the
                # rxn coord 0.00 step.
                jo.irc_geopt_step.append(jo.geopt_step[:1])
                jo.irc_geopt_step.append(jo.geopt_step[1:])
            else:
                jo.irc_geopt_step.append(jo.geopt_step)
            jo.geopt_step = []
        elif self.irc_direction == 'Reverse':
            jo.irc_geopt_step.insert(0, jo.geopt_step)
            jo.geopt_step = []
        elif self.irc_direction is not None:
            raise JaguarParseError("Bad IRC direction.")

        self.irc_direction = None

    def endScan(self, jo):
        """
        Clean up at the end of a scan step.

        Adds the current results to the scan list and creates a new current
        results object. Or, if this a relaxed scan, archives the geopt steps
        to the scan list and creates an empty geopt_step list.

        """
        if jo.geopt_step:
            jo.scan_geopt_step.append(jo.geopt_step)
            jo.geopt_step = []
            self._newResults(jo)
        else:
            jo.scan_geopt_step.append([jo._results])
            self._newResults(jo)

    def endIRC(self, direction):
        """
        Set state indicating the end of an IRC step and its direction.

        direction (str)
            Must be 'Forward' or 'Reverse'.

        """
        self.irc_direction = direction

    def parse(self, jaguar_output=None):
        """
        Parse the provided file iterator.

        Return a JaguarOutput instance populated with properties parsed from
        the output file.

        Parameters

        jaguar_output (JaguarOutput instance)
            If jaguar_output is provided, that instance will be populated
            with the properties parsed from the output file. Otherwise,
            the object provided to the TextParser constructor will be used.

        :raise StopIteration: In cases of unexpected file termination
        :raise JaguarParseError: For parsing errors
        """
        if jaguar_output is not None:
            self.jag_out = jaguar_output
        elif self.jag_out is None:
            raise JaguarParseError
        jaguar_output = self.jag_out

        # The following try/except is needed for an empty file
        try:
            line = next(self.file_iter)
        except StopIteration:
            line = ''

        m = self.first_line_re.match(line)
        if m:
            jaguar_output.host = m.group(1)
            lc = locale.getlocale(locale.LC_TIME)
            try:
                locale.setlocale(locale.LC_TIME, "C")
                ts = time.mktime(time.strptime(m.group(2)))
                jaguar_output.start_time = datetime.datetime.fromtimestamp(ts)
            finally:
                locale.setlocale(locale.LC_TIME, lc)

        for line in self.file_iter:
            for pattern, func in self.search_items:
                m = pattern.search(line)
                if m:
                    # Note - the func call below may try to read another line
                    # from the file. This can raise StopIteration in the case of
                    # unexpected file termination.
                    func(self, jaguar_output, m, self.file_iter)
                    break

        self._finalize(jaguar_output)

        self.jag_out = None
        return jaguar_output

    def _finalize(self, jo):
        """
        Actions to be taken at the end of the file.

        """
        if jo.geopt_step:
            jo.last_results = jo._results

        # There's no indicator on the last scan step
        if jo.scan_geopt_step:
            if jo.geopt_step:
                jo.scan_geopt_step.append(jo.geopt_step)
                jo.geopt_step = []
            else:
                jo.scan_geopt_step.append([jo._results])
            jo.last_results = jo._results

        if self.irc_summary:
            for line, step in zip(self.irc_summary, jo.irc_geopt_step):
                field = line.split()

                # Jaguar-6649 the reaction coordinate field can have a '*' indicating
                #             an approximate value.  Remove it if it exists before
                #             converting to a float.
                step[-1].reaction_coord = float(field[1].strip('*'))
                energy = float(field[2])
                # Make a very loose comparison here - it's just a sanity
                # check.
                if abs(energy - step[-1].energy) > 2.0e-6:
                    raise JaguarParseError("Bad IRC summary correspondence.")
                step[-1].transition_state_components = \
                    [float(c) for c in field[3:]]

            self.irc_summary = []


#-----------------------------------------------------------------------------
#
# See PEP 318 or the 2.4 "What's New" document for discussion of
# decorators.
#
def callback(prog, regexp=None, debug=False, parser_class=TextParser):
    """
    A decorator to add a function to the TextParser callback dictionary.

    The 'callback' dictionary consists of dictionaries for individual
    programs, each one indexed by the regular expression that needs to be
    satisfied to invoke the function.

    Arguments

    prog (str)
        A Jaguar subprogram to which searching will be restricted.
        Set to None if the whole file needs to be searched.

    regexp (str)
        The regular expression that needs to be matched. This can be None if
        multiple decorators are being applied (to restrict to multiple
        subprograms) and the inner decorator specified a regexp.

    """
    global textparser_debug
    if textparser_debug:
        debug = True

    def deco(func):
        progdict = parser_class.callback.setdefault(prog, {})
        if regexp is not None:
            func.regexp = regexp
        if not debug:
            progdict[re.compile(func.regexp)] = func
        else:
            progdict[re.compile(func.regexp)] = textparser_trace(func)
        return func

    return deco


#-----------------------------------------------------------------------------
#
# Text parsing functions; all must take the TextParser object, the
# JaguarOutput object to operate on, the regular expression match, and the
# file iterator.
#
# The docstrings are the regular expressions that must be satisfied to
# invoke the function.
#


@callback("scf", r"^\s*number of electrons\.+\s+(\d+)")
def nelectron(tp, jo, m, it):
    jo.nelectron = int(m.group(1))


@callback("scf", r"^\s*Sz\*\(Sz\+1\)[\s\.]+(\S+)")
def sz2(tp, jo, m, it):
    jo._results.sz2 = float(m.group(1))


@callback("scf", r"^\s*<S\*\*2>[\s\.]+(\S+)")
def s2(tp, jo, m, it):
    jo._results.s2 = float(m.group(1))


@callback("scf", r"^\s*GVB:\s+(\S+)")
def npair(tp, jo, m, it):
    jo.npair = int(m.group(1))


@callback("pre", r"net molecular charge:\s+(-?\d+)")
def molchg(tp, jo, m, it):
    jo.charge = int(m.group(1))


@callback("pre", r"multiplicity:\s+(\d+)")
def multip(tp, jo, m, it):
    jo.multiplicity = int(m.group(1))


@callback("pre", r"number of basis functions\.\.\.\.\s+(\d+)")
def nbasis(tp, jo, m, it):
    jo.nbasis = int(m.group(1))


@callback("pre", r"Molecular weight:\s+([0-9.]+)")
def mol_weight(tp, jo, m, it):
    jo.mol_weight = float(m.group(1))


@callback("pre", r"Stoichiometry:\s+(\w+)")
def stoichiometry(tp, jo, m, it):
    jo.stoichiometry = m.group(1)


@callback("pre", r"basis set:\s+(\S+)")
def basis_set(tp, jo, m, it):
    jo.basis = jaguar_diff.CaseInsensitiveString(m.group(1))


@callback("pre", r"Number of optimization coordinates:\s+(\d+)")
def coords_opt(tp, jo, m, it):
    jo.coords_opt = int(m.group(1))


@callback("pre", r"Number of independent coordinates:\s+(\d+)")
def coords_ind(tp, jo, m, it):
    jo.coords_ind = int(m.group(1))


@callback("pre", r"Number of non-redundant coordinates:\s+(\d+)")
def coords_nred(tp, jo, m, it):
    jo.coords_nred = int(m.group(1))


# In v79001, "constrained coordinates" was replaced with "frozen
# coordinates", and "harmonic constraints" was also added.
@callback("pre", r"Number of constrained coordinates:\s+(\d+)")
def coords_frozen1(tp, jo, m, it):
    jo.coords_frozen = int(m.group(1))


@callback("pre", r"Number of frozen coordinates:\s+(\d+)")
def coords_frozen2(tp, jo, m, it):
    jo.coords_frozen = int(m.group(1))


@callback("pre", r"Number of harmonic constraints:\s+(\d+)")
def coords_harmonic(tp, jo, m, it):
    jo.coords_harmonic = int(m.group(1))


@callback("pre", r"Solvation energy will be computed")
def solvation_job(tp, jo, m, it):
    jo.opts.solvation = True


@callback("pre", "Numerical 2nd derivatives will be computed")
def numerical_freqs(tp, jo, m, it):
    jo.opts.analytic_frequencies = False


@callback("pre",
          "Electrostatic potential fit to point charges on atomic centers")
def esp_fit_atoms(tp, jo, m, it):
    jo.opts.esp_fit = jo.opts.ESP_ATOMS


@callback("pre", "and bond midpoints")
def esp_fit_atoms_and_bonds(tp, jo, m, it):
    if jo.opts.esp_fit == jo.opts.ESP_ATOMS:
        jo.opts.esp_fit = jo.opts.ESP_ATOMS_AND_BOND_MIDPOINTS
    else:
        raise JaguarParseError("Unexpected occurrence of the phrase "
                               "'and bond midpoints'.")


@callback("read_external_gradient",
          r"Energy From External Program \(a\.u\.\)\s+(-?[\.\d]+)")
def external_program_energy(tp, jo, m, it):
    jo._results.external_program_energy = float(m.group(1))


@callback("ani_energy", r"ANI Energy \(a\.u\.\)\s+(-?[\.\d]+)")
def external_program_ani_energy(tp, jo, m, it):
    jo._results.ani_energy = float(m.group(1))


@callback("ani_energy",
          r"ANI Committee Standard Deviation \(a\.u\.\)\s+(-?[\.\d]+)")
def external_program_ani_energy_stddev(tp, jo, m, it):
    jo._results.ani_stddev = float(m.group(1))


@callback("scf", r"(^etot.*$)")
def etot(tp, jo, m, it):
    line = m.group(1)
    while line.startswith("etot"):
        jo._results.scf_iter.append(results.ScfIteration.fromEtotString(line))
        line = next(it)


# LDJ: negative lookbehind required so that we do not parse
#      the total internal energy for atoms, which is printed
#      from program scf.
@callback("scf", r"(?<!\()SCFE.*\s+(-?[\.\d]+)\s+hartrees")
def scfe(tp, jo, m, it):
    jo._results.scf_energy = float(m.group(1))


@callback("scf", r"\(B\)\s+Nuclear-nuclear\.+\s+(-?[\.\d]+)")
def nucrep1(tp, jo, m, it):
    jo._results.nuclear_repulsion = float(m.group(1))


@callback("scf", r"\(A\)\s+Nuclear repulsion\.+\s+(-?[\.\d]+)")
def nucrep2(tp, jo, m, it):
    jo._results.nuclear_repulsion = float(m.group(1))


@callback("scf", r"\(E\)\s+Total one-electron terms\.+\s+(-?[\.\d]+)")
def one_e_terms(tp, jo, m, it):
    jo._results.energy_one_electron = float(m.group(1))


@callback("scf", r"\(I\)\s+Total two-electron terms\.+\s+(-?[\.\d]+)")
def two_e_terms(tp, jo, m, it):
    jo._results.energy_two_electron = float(m.group(1))


@callback("scf", r"\(L\)\s+Electronic energy\.+\s+(-?[\.\d]+)")
def electronic_e(tp, jo, m, it):
    jo._results.energy_electronic = float(m.group(1))


@callback("scf", r"\(N0\).*correction\.+\s+(-?[\.\d]+)")
def aposteri_e(tp, jo, m, it):
    jo._results.energy_aposteri = float(m.group(1))


@callback("scf", r"(Alpha|Beta)? HOMO energy:\s+(\S+)")
def homo(tp, jo, m, it):
    type_ = m.group(1)
    if type_ == "Alpha":
        jo._results.homo_alpha = float(m.group(2))
    elif type_ == "Beta":
        jo._results.homo_beta = float(m.group(2))
    else:
        jo._results.homo = float(m.group(2))


@callback("scf", r"(Alpha|Beta)? LUMO energy:\s+(\S+)")
def lumo(tp, jo, m, it):
    type_ = m.group(1)
    if type_ == "Alpha":
        jo._results.lumo_alpha = float(m.group(2))
    elif type_ == "Beta":
        jo._results.lumo_beta = float(m.group(2))
    else:
        jo._results.lumo = float(m.group(2))


@callback("scf", r"(?P<type>Alpha|Beta)? Orbital energies"
          r"( \(hartrees\))?(?P<label>/symmetry label)?:")
def orbital_energies(tp, jo, m, it):
    orbs = []
    has_symmetry = m.group('label')
    type_ = m.group('type')
    while 1:
        line = next(it)
        fields = line.split()
        if not fields:
            break
        if has_symmetry:
            for ix in range(0, len(fields), 2):
                orbs.append(
                    results.Orbital(type_, len(orbs), float(fields[ix]),
                                    fields[ix + 1]))
        else:
            orbs.extend([
                results.Orbital(type_, index + len(orbs), float(f))
                for index, f in enumerate(fields)
            ])

    if type_ == "Alpha":
        jo._results.orbital_alpha = orbs
    elif type_ == "Beta":
        jo._results.orbital_beta = orbs
    else:
        jo._results.orbital = orbs


@callback("rolmp2")
@callback("gvblmp2")
@callback("lmp2", r"Total LMP2.*\s+(-?[\.\d]+)")
def mp2(tp, jo, m, it):
    jo._results.lmp2_energy = float(m.group(1))
    jo._results.gas_phase_energy = float(m.group(1))


# Note that rolmp2 and gvblmp2 energies are both produced by the gvblmp2
# subprogram.
#DSL: WHAT DOES THIS MEAN? THERE ARE TWO VALUES IN THIS PROGRAM TRYING TO
# POPULATE THE SAME RESULT
@callback("gvblmp2", r"Total GVB-LMP2.*\s+(-?[\.\d]+)")
def gvblmp2(tp, jo, m, it):
    jo._results.lmp2_energy = float(m.group(1))


@callback("pre")
@callback("scanner", r"(Input|new) geometry:")
def start_geometry(tp, jo, m, it):
    # "Starting geometries" are present in pre for simple geopts as "Input
    # geometry" and also in scanner steps for scan jobs as "new geometry".
    jo._results.atom, jo._results._mmjag_handle = geometry_read(it, jo.charge)


# geopt properties


@callback("geopt", r"new geometry:")
def geopt_geometry(tp, jo, m, it):
    tp.atom_list, tp.mmjag_handle = geometry_read(it, jo.charge)


@callback("pre", r"Path geometry: \(interpolated\, X\=.*\)")
def path_geometry(tp, jo, m, it):

    result = results.JaguarResults()
    result.atom, result._mmjag_handle = geometry_read(it, jo.charge)
    jo._path_step.append(result)


@callback("geopt", r"String geometry: \(iteration=.* point=.*energy=.*\)")
def sm_geometry(tp, jo, m, it):

    # extract string energy, point number and iteration number
    data = m.group(0).split()
    if len(data) == 9:
        iter = int(data[3])
        pt = int(data[5])
        enrgy = float(data[7])
    else:
        raise ValueError('Malformed string geometry identifier')

    result = results.JaguarResults()
    result.atom, result._mmjag_handle = geometry_read(it, jo.charge)

    # add data to JaguarResults instance
    if jo.opts.solvation:
        result.solution_phase_energy = enrgy
    else:
        result.scf_energy = enrgy

    # these attributes
    result.sm_point = pt
    result.sm_iter = iter

    jo._sm_n_points = max(pt, jo._sm_n_points)

    jo._sm_geopt_step.append(result)


def geometry_read(it, charge=None):
    """
    A utility function to read an input cartesian geometry from the output
    file.

    Return a tuple of (JaguarAtomicResults list, mmjag_handle). The
    mmjag_handle containing the parsed geometry in MMJAG_ZMAT1 if charge is
    provided. If not, the mmjag handle is None.

    """
    geometry_array = []
    atom_list = []
    mmjag_handle = None

    # Skip two lines
    line = next(it)
    if "angstroms" in line:
        units = mm.MMJAG_ANGSTROM_DEGREE
    else:
        units = mm.MMJAG_BOHR_DEGREE
    line = next(it)
    index = 0
    while 1:
        line = next(it)
        _log.debug("geometry_read: " + line)
        if blank_line_re.match(line):
            break
        geometry_array.append(line)
        index += 1
        fields = line.split()
        atom_list.append(results.JaguarAtomicResults(index, fields[0]))

    if charge is not None:
        mmjag_handle = MmJag(mm.mmjag_new())
        mm.mmjag_zmat_from_geostring(mmjag_handle, mm.MMJAG_ZMAT1, charge,
                                     units, "".join(geometry_array))

    return atom_list, mmjag_handle


@callback("geopt", r"end of geometry optimization iteration")
def end_geometry(tp, jo, m, it):
    tp.endGeopt(jo)


@callback(
    "geopt",
    r"(stopping optimization: maximum number of iterations reached|Geometry optimization complete)"
)
def stopping_optimization(tp, jo, m, it):
    tp.on_last_geopt_step = True


@callback("geopt", r"optimization seems to be stuck")
def geopt_stuck1(tp, jo, m, it):
    jo.geopt_stuck = True


@callback("geopt", r"Convergence category (\d+)")
def convergence_category(tp, jo, m, it):
    jo.convergence_category.append(int(m.group(1)))


@callback("geopt",
          r"IRC point found -\s+(Forward|Reverse|Downhill)\s+#\s+(\d+)")
def irc_point(tp, jo, m, it):
    tp.endIRC(m.group(1))


@callback("geopt", r"Summary of IRC Reaction Path:")
def irc_summary(tp, jo, m, it):
    for i in range(5):
        next(it)

    tp.irc_summary = []
    while 1:
        line = next(it)
        if blank_line_re.match(line):
            break
        tp.irc_summary.append(line)

    # Make sure the number of points is one less than the current number of
    # irc steps (still have to collect the last one).
    # However, if there is only 1 IRC pt in addition to the TS (for a total of
    # 2), then neither will have been collected yet - so check for such a case.
    pts_not_agree = (len(tp.irc_summary) != len(jo.irc_geopt_step) + 1)
    pts_not_agree_if_only_2 = \
        (len(tp.irc_summary) != 2 or len(jo.irc_geopt_step) != 0)

    if pts_not_agree and pts_not_agree_if_only_2:
        raise JaguarParseError(
            "%s%s: %d vs %d" %
            ("Summary of IRC reaction path does not agree with ",
             "number of IRC steps", len(tp.irc_summary),
             len(jo.irc_geopt_step)))

    coord_description_re = re.compile(r"\s*Coord\s+\d+\s+=\s+(\S+)\s*")
    jo.ts_component_descriptions = []
    while 1:
        line = next(it)
        m = coord_description_re.match(line)
        if not m:
            break
        jo.ts_component_descriptions.append(m.group(1))


@callback("geopt", r"restarting optimization from step")
def doubted_geom(tp, jo, m, it):
    jo._results.doubted_geom = True


#-----------------------------------------------------------------------------
# nops_opt_switch callbacks
@callback("scf", r'Energy computed with NOPS on.')
def nops_on(tp, jo, m, it):
    jo._results.nops_on = True


@callback("scf", r'Energy computed with NOPS off.')
def nops_off(tp, jo, m, it):
    jo._results.nops_on = False


@callback("geopt", r'Setting nops=0, recomputing energy')
def geopt_nops_on(tp, jo, m, it):
    tp.recomputing_energy = True


#-----------------------------------------------------------------------------
# NOFAIL geopt callbacks
@callback("geopt", r"to be a stuck geometry")
def geopt_stuck2(tp, jo, m, it):
    jo.geopt_stuck = True


@callback("geopt", r'SCF will be redone to get proper energy & wavefunction.')
def nofail_geopt_restart(tp, jo, m, it):
    tp.recomputing_energy = True


@callback("geopt",
          r'Taking the geometry with the lowest energy \(iteration (\d+)\)')
def nofail_geopt(tp, jo, m, it):
    tp.nofail_geopt = int(m.group(1))


#-----------------------------------------------------------------------------


@callback("pre")
@callback("geopt", r"Z-variables: (.*)$")
def z_variables(tp, jo, m, it):
    units = m.group(1)
    if "angstrom" in units:
        length_unit = constants.unAngstrom
    elif "bohr" in units:
        length_unit = constants.unBohr
    else:
        raise JaguarParseError(
            "Z-variables length units have been changed; update the z_variables function."
        )

    if "degree" in units:
        angle_unit = constants.unDegree
    elif "radian" in units:
        angle_unit = constants.unRadian
    else:
        raise JaguarParseError(
            "Z-variables angle units have been changed; update the z_variables function."
        )

    jo._results.zvar = results.ZVariables(length_unit, angle_unit)

    while 1:
        line = next(it)
        if blank_line_re.match(line):
            break
        line = zvar_tags_re.sub("", line)
        m = re.match(r"\s*(\S+)\s*=\s*(\S+)", line)
        name, value = m.groups()
        jo._results.zvar[name] = float(value)


@callback("nude")
@callback("lmp2gdb")
@callback("lmp2gb")
@callback("der1b")
@callback("tddft_g", r"forces \(hartrees/bohr\) : (total|numerical)")
def forces(tp, jo, m, it):

    if m.group(1) == "numerical":
        jo.opts.analytic_gradients = False

    while 1:
        line = next(it)
        if line.startswith(" ----"):
            break

    ix_atom = 0
    while 1:
        line = next(it)
        if line.startswith(" ----"):
            break
        fields = line.split()
        jo._results.atom[ix_atom].forces = [float(i) for i in fields[2:]]
        ix_atom += 1


@callback("lmp2")
@callback("scf", r"\(V\).*Solvation energy\.+\s*(-?[\.\d]+)")
@callback("scf", r"^\sPCM solvation energy (electrostatic)\.+\s*(-?[\.\d]+)")
@callback("sole", r"\(V\).*Solvation energy\.+\s*(-?[\.\d]+).*\(P-O\)")
def solvation(tp, jo, m, it):
    jo._results.solvation_energy = float(m.group(1))


@callback("lmp2")
@callback("scf", r"\(P\).*Solution phase energy\.+\s*(-?[\.\d]+)")
@callback("scf", r"^\sSolution phase energy\.+\s*(-?[\.\d]+)")
@callback("sole", r"\(P\).*Solution phase energy\.+\s*(-?[\.\d]+)")
def solution_phase(tp, jo, m, it):
    jo._results.solution_phase_energy = float(m.group(1))


@callback("lmp2")
@callback("scf", r"\(O\).*Gas phase energy\.+\s*(-?[\.\d]+)")
@callback("scf", r"^\sGas phase energy\.+\s*(-?[\.\d]+)")
def gas_phase(tp, jo, m, it):
    jo._results.gas_phase_energy = float(m.group(1))


@callback("onee", r"number of canonical orbitals\.\.\.\.\.\s+(\d+)")
def canorb(tp, jo, m, it):
    jo._results.canonical_orbitals = int(m.group(1))


@callback("onee", r"smallest eigenvalue of S:\s+(\S+)")
def s_min_eval(tp, jo, m, it):
    jo._results.S_min_eval = float(m.group(1))


@callback("scanner", r"end of geometry scan step")
def end_scan(tp, jo, m, it):
    tp.endScan(jo)


# pre properties


@callback("pre", r"Maestro file \(output\):\s+(\S.*)")
def mae(tp, jo, m, it):
    jo.mae_out = m.group(1)


@callback("pre", r"Maestro file \(input\):\s+(\S.*)")
def mae_in(tp, jo, m, it):
    jo.mae_in = m.group(1)


@callback("pre", r"Molecular Point Group:\s+(\w+)")
def point_group(tp, jo, m, it):
    jo.point_group = m.group(1)


@callback("pre", r"Point Group used:\s+(\w+)")
def point_group_used(tp, jo, m, it):
    jo.point_group_used = m.group(1)


@callback("pre", r"Number of atoms treated by QM:\s+(\w+)")
def qm_atoms(tp, jo, m, it):
    jo.qm_atoms = int(m.group(1))


@callback("pre", r"SCF calculation type: ([A-Z/_-]+)")
def calc_type(tp, jo, m, it):
    if not m.group(1) in constants._calc_types:
        raise JaguarParseError("Unrecognized SCF type: %s" % m.group(1))
    jo.method = m.group(1)


@callback("pre", r"Post-SCF correlation type: (\w+)")
def correlation_type(tp, jo, m, it):
    #FIXME: this function is too complex.
    correlation = m.group(1)
    if correlation == constants.ctLMP2:
        if jo.method == constants.ctHF:
            jo.method = constants.ctLMP2
        elif jo.method == constants.ctROHF:
            jo.method = constants.ctROHFLMP2
        elif jo.method == constants.ctGVB:
            jo.method = constants.ctGVBLMP2
        else:
            raise JaguarParseError(
                "Unrecognized SCF method for LMP2: %s" % jo.method)
    elif correlation == "RCI":
        if jo.method == constants.ctGVB:
            jo.method = constants.ctGVBRCI
        else:
            raise JaguarParseError(
                "Unrecognized SCF method for RCI: %s" % jo.method)
    elif correlation == constants.ctDFT:
        if jo.method == constants.ctHF:
            jo.method = constants.ctDFT
        else:
            raise JaguarParseError(
                "Unrecognized SCF method for DFT: %s" % jo.method)
    elif correlation == constants.ctRODFT:
        if jo.method == constants.ctROHF:
            jo.method = constants.ctRODFT
        else:
            raise JaguarParseError(
                "Unrecognized SCF method for RODFT: %s" % jo.method)
    elif correlation == constants.ctUDFT:
        if jo.method == constants.ctUHF:
            jo.method = constants.ctUDFT
        else:
            raise JaguarParseError(
                "Unrecognized SCF method for UDFT: %s" % jo.method)
    else:
        raise JaguarParseError(
            "Unrecognized correlation method: %s" % correlation)


@callback("pre", r"DFT=(\S+)\s*(\(.+\))?")
def functional(tp, jo, m, it):
    if m.group(2):
        jo.functional = m.group(2)[1:-1]
    else:
        jo.functional = m.group(1)


@callback("pre", r"^ *User Defined Functional:")
def custom_functional(tp, jo, m, it):
    jo.functional = "User-defined"


@callback("pre", r"Geometry from &zmat(2|3), &zvar(2|3)")
def qst_geometries(tp, jo, m, it):
    # This routine is to skip "Input geometry:" geometries that are
    # associated with zmat2 or zmat3.

    # Skip lines until we hit "Input geometry:"
    while 1:
        line = next(it)
        if input_geometry_re.match(line):
            break

    atoms, mmjag_handle = geometry_read(it, jo.charge)

    st = structure.Structure(
        mm.mmjag_ct_from_zmat(mmjag_handle, mm.MMJAG_ZMAT1))

    if m.group(1) == "2":
        jo.input_geometry2 = st
    elif m.group(1) == "3":
        jo.input_geometry3 = st


def _store_input_geometry(jo, it):
    """
    Store a Structure object at jo.input_geometry for the input geometry,
    then replace the initial geometry with the new one.

    """
    if jo._results._mmjag_handle is not None:
        jo.input_geometry = jo._results.getStructure()
        jo._results._mmjag_handle = None
    jo._results.atom, jo._results._mmjag_handle = geometry_read(it, jo.charge)


@callback("pre", r"Symmetrized geometry:")
def symmetrized_geometry(tp, jo, m, it):
    jo.symmetrized = True
    _store_input_geometry(jo, it)


@callback("pre", r"Initial geometry: \(interpolated\)")
def qst_initial_geometry(tp, jo, m, it):
    _store_input_geometry(jo, it)


@callback("pre", r"Geometry scan coordinates:(?:\s*\((angstroms|bohr) and " +
          r"(degrees|radians)\))?")
def scan_coordinates(tp, jo, m, it):
    while 1:
        line = next(it)
        if blank_line_re.match(line):
            break
        jo.scan_coords.append(Scan.parse(line))


@callback("scanner", r"Geometry scan step\s+\d+\s*:")
def geometry_scan_step(tp, jo, m, it):
    found_energy = False
    while 1:
        line = next(it)
        fields = line.split()
        while fields:
            name = fields.pop(0)
            if name == "scan:":
                continue
            elif name == "energy":
                found_energy = True
                break
            equals = fields.pop(0)
            if equals != '=':
                raise JaguarParseError("Expected '=' in scan: position line")
            jo._results.scan_value[name] = float(fields.pop(0))
        if found_energy:
            break
    return


@callback("pre", r"Non-default print settings:")
def non_default_print_options(tp, jo, m, it):
    int_set_re = re.compile(r"\s*(\d+)\s*=\s*(\d+)\s*")
    while 1:
        line = next(it)
        if blank_line_re.match(line):
            break
        m = int_set_re.match(line)
        pflag = int(m.group(1))
        setting = int(m.group(2))
        jo.opts.ip[pflag] = setting


@callback("pre",
          r"Fully analytic SCF calculation: pseudospectral method not used")
def pseudospectral(tp, jo, m, it):
    jo.opts.pseudospectral = False


# ch properties
def gen_charges_spins(attr, tp, jo, m, it):
    # FIXME: this function is too complex.
    charges = []
    names = []

    # Skip until we hit a line containing "Atom" (note: QSite prints
    # a warning message after the header if there are cuts or hcaps)
    while 1:
        line = next(it)
        if "Atom" in line:
            break

    while 1:
        if "Atom" in line:
            names.extend(line.split()[1:])
            charge_line = next(it)
            charges.extend([float(c) for c in charge_line.split()[1:]])
        elif "sum of atomic" in line:
            break
        elif blank_line_re.match(line):
            pass
        else:
            raise JaguarParseError("Error parsing %s" % attr)
        line = next(it)

    atom_count = len(jo._results.atom)
    for ix in range(atom_count):
        setattr(jo._results.atom[ix], attr, charges[ix])

    if len(charges) > atom_count:
        jo._results.charge_bond_midpoint = []
        if jo.opts.esp_fit != results.JaguarOptions.ESP_ATOMS_AND_BOND_MIDPOINTS:
            raise JaguarParseError("More than %d charges, but not from "
                                   "bond midpoints.", atom_count)
        for ix in range(atom_count, len(charges)):
            jo._results.charge_bond_midpoint.append(
                results.BondCharge(names[ix], charges[ix]))


@callback("ch", r"Atomic charges from electrostatic potential")
def esp_charges(tp, jo, m, it):
    return gen_charges_spins("charge_esp", tp, jo, m, it)


@callback("ch", r"Atomic charges from Lowdin population analysis")
def lowdin_charges(tp, jo, m, it):
    return gen_charges_spins("charge_lowdin", tp, jo, m, it)


@callback("ch", r"Atomic Spin Densities from Lowdin analysis")
def lowdin_spins(tp, jo, m, it):
    return gen_charges_spins("spin_lowdin", tp, jo, m, it)


@callback("ch", r"Atomic charges from Mulliken population analysis")
def mulliken_charges(tp, jo, m, it):
    return gen_charges_spins("charge_mulliken", tp, jo, m, it)


@callback("ch", r"Atomic Spin Densities from Mulliken analysis")
def mulliken_spins(tp, jo, m, it):
    return gen_charges_spins("spin_mulliken", tp, jo, m, it)


@callback("ch", r"Stockholder charges from Hirshfeld partitioning")
def stockholder_charges(tp, jo, m, it):

    data_line_re = re.compile(r"\s+\w+\s+(-*\d+.\d+)")

    stockholder_charges = []
    #find first data line
    while 1:
        line = next(it)
        if data_line_re.match(line):
            break
    #parse first line and following data lines
    while 1:
        m = data_line_re.match(line)
        if m:
            stockholder_charges.append(float(m.group(1)))
        else:
            break
        line = next(it)

    atom_count = len(jo._results.atom)
    if atom_count != len(stockholder_charges):
        raise JaguarParseError(
            f"Found different number of Stockholder charges ({len(stockholder_charges)}) than number of atoms ({len(jo._results.atom)})"
        )

    for ix in range(atom_count):
        setattr(jo._results.atom[ix], 'charge_stockholder',
                stockholder_charges[ix])


def multipole_moments(type_, tp, jo, m, it):
    while 1:
        line = next(it)
        if re.search(r"Dipole Moments.*\(Debye\)", line):
            line = next(it)
            fields = line.split()
            if len(fields) != 8:
                raise JaguarParseError(
                    "Dipole output format has changed; modify multipole_moments routine."
                )
            setattr(jo._results, "dipole_" + type_,
                    results.Dipole(type_, float(fields[1]), float(fields[3]),
                                   float(fields[5]), float(fields[7])))
        elif blank_line_re.match(line):
            break


@callback("ch", r"Moments from quantum mechanical wavefunction")
def multipole_qm(tp, jo, m, it):
    multipole_moments("qm", tp, jo, m, it)


@callback("ch", r"Moments from electrostatic potential charges")
def multipole_esp(tp, jo, m, it):
    multipole_moments("esp", tp, jo, m, it)


@callback("ch", r"Moments from Mulliken charges")
def multipole_mulliken(tp, jo, m, it):
    multipole_moments("mulliken", tp, jo, m, it)


@callback("ch", r"Atomic Fukui indices")
def fukui_indices(tp, jo, m, it):

    degeneracy_re = re.compile(r"Atom\s+f_NN")
    while True:
        line = next(it)
        if degeneracy_re.search(line):
            next(it)  # discard the line of dashes
            break

    ix = 0
    while True:
        line = next(it)
        if blank_line_re.match(line):
            break
        else:
            fields = line.split()
            name = fields[0]
            charges = [float(c) for c in fields[1:]]
            if name != jo._results.atom[ix].atom_name:
                raise JaguarParseError(
                    "Fukui indices are not in the same "
                    "order as atoms in the JaguarResults instance.")
            jo._results.atom[ix].fukui_indices = results.FukuiIndices(
                ix + 1, name, *charges)

        ix += 1


# Electron transfer
# The old ET code (ROHF jobs) produces different output than the new
# code, so we need a very generic pattern for determining
# when to start parsing the lines.
@callback("etit", r"^   Reading ")
def electron_transfer(tp, jo, m, it):
    found = 0
    while 1:
        line = next(it)
        m = re.search(r"S_if =", line)
        if m:
            found = 1
            fields = line.split()
            jo._results.et_S_if = float(fields[2])
            while 1:
                line = next(it)
                fields = line.split()
                if re.search(r"H_if =", line):
                    jo._results.et_H_if = float(fields[2])
                elif re.search(r"H_ii =", line):
                    jo._results.et_H_ii = float(fields[2])
                elif re.search(r"T_i->f =", line):
                    jo._results.et_T_if = float(fields[2])
                    break
        elif found:
            break


# Polarizabilities
@callback("fdpol",
          r"polarizability \(in AU\) alpha\(\s+([0-9.-]+) eV;\s+([0-9.-]+) eV")
def alpha_fdpolar(tp, jo, m, it):
    tmp_freq = float(m.group(2))
    tmp_alpha = None
    while 1:
        line = next(it)
        m = re.search(r"alpha=\s+(\S+)", line)
        if m:
            tmp_alpha = float(m.group(1))
            break

    if jo._results.fdpolar_freq1 is None:
        jo._results.fdpolar_freq1 = tmp_freq
        jo._results.fdpolar_alpha1 = tmp_alpha
    elif jo._results.fdpolar_freq2 is None:
        jo._results.fdpolar_freq2 = tmp_freq
        jo._results.fdpolar_alpha2 = tmp_alpha
    else:
        raise JaguarParseError(
            "There can be only two fdpol alpha calculations, but textparser has found at least 3"
        )


@callback("fdpol", r"hyperpolarizability \(in AU\) beta\(\s+([0-9.-]+) eV;")
def beta_fdpolar(tp, jo, m, it):
    jo._results.fdpolar_freq3 = float(m.group(1))
    while 1:
        line = next(it)
        m = re.search(r"mean of beta-tensor orientations:\s+(\S+)", line)
        if m:
            jo._results.fdpolar_beta = float(m.group(1))
            break


@callback("cpolar", r"^  polarizability \(in AU\):")
@callback("polar", r"^  polarizability \(in AU\):")
def alpha_polar(tp, jo, m, it):
    while 1:
        line = next(it)
        m = re.search(r"alpha=\s+(\S+)", line)
        if m:
            jo._results.polar_alpha = float(m.group(1))
            break
        elif re.search(r"Dalpha=", line):
            break


@callback("cpolar", r"^  first hyperpolarizability \(in AU\):")
def beta_polar(tp, jo, m, it):
    while 1:
        line = next(it)
        m = re.search(r"beta=\s+(\S+)", line)
        if m:
            jo._results.polar_beta = float(m.group(1))
            break
        elif blank_line_re.match(line):
            break


@callback("cpolar", r"^  second hyperpolarizability \(in AU\):")
def gamma_polar(tp, jo, m, it):
    while 1:
        line = next(it)
        m = re.search(r"gamma=\s+(\S+)", line)
        if m:
            jo._results.polar_gamma = float(m.group(1))
            break
        elif blank_line_re.match(line):
            break


# Electrostatic potential at the nuclei (EPN)
@callback("elden", r"^  Electrostatic Potential at the Nuclei")
def epn(tp, jo, m, it):
    epn = []
    atom_label = []

    epn_re = re.compile(r"^\s+(\w+)\s+(\S+)\s+\S+")

    next(it)
    next(it)
    next(it)
    while 1:
        line = next(it)
        m = epn_re.search(line)
        if m:
            atom_label.append(m.group(1))
            epn.append(float(m.group(2)))
        elif blank_line_re.match(line):
            break

    atom_count = len(jo._results.atom)

    if jo.qm_atoms is not None:  # QSite job
        for ia in range(jo.qm_atoms):
            for ix in range(atom_count):
                if atom_label[ia] == jo._results.atom[ix].atom_name:
                    setattr(jo._results.atom[ix], "epn", epn[ia])
    else:
        for ix in range(atom_count):
            setattr(jo._results.atom[ix], "epn", epn[ix])


# ESP analysis on molecular surface
@callback("elden", r"^  Analysis of ESP on isodensity surface:")
def esp_analysis(tp, jo, m, it):
    # FIXME: this function is too complex.
    maxat_esp = []
    minat_esp = []
    atom_label = []

    next(it)
    next(it)
    while 1:
        line = next(it)

        # This matches the old format, prior to EV 112810
        if re.search(r"Density isovalue:", line):
            next(it)
            next(it)
            next(it)
            line = next(it)  # gets the line "Minimum ESP value:"

        # This matches the informational messages, if present, about
        # missing positive or negative data values
        if re.search(r"NOTE:", line):
            next(it)
            next(it)
            next(it)
            line = next(it)  # gets the line "Minimum ESP value:"

        fields = line.split()
        if re.search(r"Minimum ESP value:", line):
            jo._results.min_esp = float(fields[3])
        elif re.search(r"Maximum ESP value:", line):
            jo._results.max_esp = float(fields[3])
        elif re.search(r"Mean ESP value:", line):
            jo._results.mean_esp = float(fields[3])
        elif re.search(r"Mean positive ESP value:", line):
            jo._results.mean_pos_esp = float(fields[4])
        elif re.search(r"Mean negative ESP value:", line):
            jo._results.mean_neg_esp = float(fields[4])
        elif re.search(r"Variance of positive ESP", line):
            jo._results.sig_pos_esp = float(fields[5])
        elif re.search(r"Variance of negative ESP", line):
            jo._results.sig_neg_esp = float(fields[5])
        elif re.search(r"Total ESP variance", line):
            jo._results.sig_tot_esp = float(fields[3])
        elif re.search(r"ESP balance:", line):
            jo._results.balance_esp = float(fields[2])
        elif re.search(r"Local polarity:", line):
            jo._results.local_pol_esp = float(fields[2])
        elif blank_line_re.match(line):
            break

    esp_re = re.compile(r"^\s+(\w+)\s+(\S+)\s+(\S+)")

    next(it)
    next(it)
    while 1:
        line = next(it)
        m = esp_re.search(line)
        if m:
            atom_label.append(m.group(1))
            maxat_esp.append(float(m.group(2)))
            minat_esp.append(float(m.group(3)))
        elif blank_line_re.match(line):
            break

    atom_count = len(jo._results.atom)

    if jo.qm_atoms is not None:  # QSite job
        for ia in range(jo.qm_atoms):
            for ix in range(atom_count):
                if atom_label[ia] == jo._results.atom[ix].atom_name:
                    setattr(jo._results.atom[ix], "maxat_esp", maxat_esp[ia])
                    setattr(jo._results.atom[ix], "minat_esp", minat_esp[ia])
    else:
        for ix in range(atom_count):
            setattr(jo._results.atom[ix], "maxat_esp", maxat_esp[ix])
            setattr(jo._results.atom[ix], "minat_esp", minat_esp[ix])


# ALIE analysis on molecular surface
@callback("elden", r"^  Analysis of ALIE on isodensity surface:")
def alie_analysis(tp, jo, m, it):
    # FIXME: this function is too complex.
    maxat_alie = []
    minat_alie = []
    atom_label = []

    next(it)
    next(it)
    while 1:
        line = next(it)
        fields = line.split()
        if re.search(r"Minimum ALIE value:", line):
            jo._results.min_alie = float(fields[3])
        elif re.search(r"Maximum ALIE value:", line):
            jo._results.max_alie = float(fields[3])
        elif re.search(r"Mean ALIE value:", line):
            jo._results.mean_alie = float(fields[3])
        elif re.search(r"Mean positive ALIE value:", line):
            jo._results.mean_pos_alie = float(fields[4])
        elif re.search(r"Mean negative ALIE value:", line):
            jo._results.mean_neg_alie = float(fields[4])
        elif re.search(r"Variance of positive ALIE", line):
            jo._results.sig_pos_alie = float(fields[5])
        elif re.search(r"Variance of negative ALIE", line):
            jo._results.sig_neg_alie = float(fields[5])
        elif re.search(r"Total ALIE variance", line):
            jo._results.sig_tot_alie = float(fields[3])
        elif re.search(r"ALIE balance:", line):
            jo._results.balance_alie = float(fields[2])
        elif re.search(r"Avg deviation from mean:", line):
            jo._results.local_pol_alie = float(fields[4])
        elif blank_line_re.match(line):
            break

    alie_re = re.compile(r"^\s+(\w+)\s+(\S+)\s+(\S+)")

    next(it)
    next(it)
    while 1:
        line = next(it)
        m = alie_re.search(line)
        if m:
            atom_label.append(m.group(1))
            maxat_alie.append(float(m.group(2)))
            minat_alie.append(float(m.group(3)))
        elif blank_line_re.match(line):
            break

    atom_count = len(jo._results.atom)

    if jo.qm_atoms is not None:  # QSite job
        for ia in range(jo.qm_atoms):
            for ix in range(atom_count):
                if atom_label[ia] == jo._results.atom[ix].atom_name:
                    setattr(jo._results.atom[ix], "maxat_alie", maxat_alie[ia])
                    setattr(jo._results.atom[ix], "minat_alie", minat_alie[ia])
    else:
        for ix in range(atom_count):
            setattr(jo._results.atom[ix], "maxat_alie", maxat_alie[ix])
            setattr(jo._results.atom[ix], "minat_alie", minat_alie[ix])


# NMR
def get_nmr_shifts(jo, it):
    """
    Auxiliary fxn used to parse NMR chemical shifts, called by get_nmr()
    """
    absolute = []
    relative = []
    avg_h = []
    avg_2d = []

    #get to first line of data
    first_line_re = re.compile(r"\s+\d+\s+\w+")
    while 1:
        line = next(it)
        if first_line_re.match(line):
            break
    #parse data of line and advance iterator until end is found
    while 1:
        data = line.split()
        absolute.append(float(data[2]))
        for datum, shift_array in zip((data[3], data[4], data[5]),
                                      (relative, avg_h, avg_2d)):
            if datum == 'N/A':
                shift_array.append(None)
            else:
                shift_array.append(float(datum))
        line = next(it)
        if 'End Shifts' in line:
            break

    atom_count = len(jo._results.atom)
    if len(absolute) != atom_count:
        raise JaguarParseError(
            "Found different number of NMR shifts ({len(absolute)}) and atoms ({atom_count})!"
        )
    for ix in range(atom_count):
        setattr(jo._results.atom[ix], "nmr_abs_shift", absolute[ix])
        setattr(jo._results.atom[ix], "nmr_rel_shift", relative[ix])
        setattr(jo._results.atom[ix], "nmr_h_avg_shift", avg_h[ix])
        setattr(jo._results.atom[ix], "nmr_2d_avg_shift", avg_2d[ix])


@callback("nmrcphf", r"NMR Properties for atom\s+(\S+)")
def get_nmr(tp, jo, m, it):
    shielding = []
    atom_label = []
    #add very first atom, contained in this fxn's callback regex
    atom_label.append(m.group(1))

    # Get the isotropic shielding values and atom labels from the
    # nmrcphf output.  For QSite with hydrogen caps or frozen orbital
    # cuts, the number of shielding values will be less than the
    # number of atoms passed to Jaguar.  In this case we check the the
    # atom labels to make sure we associate the shielding value with
    # the right atom index.

    iso_re = re.compile(r"Isotropic shielding:\s+(\S+)")
    atom_re = re.compile(r"NMR Properties for atom\s+(\S+)")
    shift_re = re.compile(r"NMR Chemical Shifts Relative to TMS")
    while 1:
        line = next(it)
        m = iso_re.search(line)
        m2 = atom_re.search(line)
        m3 = shift_re.search(line)
        if m:
            shielding.append(float(m.group(1)))
        elif m2:
            atom_label.append(m2.group(1))
        elif m3:
            get_nmr_shifts(jo, it)
        elif "end of program nmrcphf" in line:
            break

    atom_count = len(jo._results.atom)

    if jo.qm_atoms is not None:  # QSite job
        for ia in range(jo.qm_atoms):
            for ix in range(atom_count):
                if atom_label[ia] == jo._results.atom[ix].atom_name:
                    setattr(jo._results.atom[ix], "nmr_shielding",
                            shielding[ia])
    else:
        for ix in range(atom_count):
            setattr(jo._results.atom[ix], "nmr_shielding", shielding[ix])


# CIS/TDDFT
@callback("cis", r"CI size =")
def cis_excitation_energies(tp, jo, m, it):
    exc_re = re.compile(r"Excited State\s+\d+:\s+(\S+) eV")
    osc_re = re.compile(r"Oscillator strength, f=\s+(\S+)")
    while 1:
        line = next(it)
        m = exc_re.search(line)
        os = osc_re.search(line)
        if m:
            jo._results.excitation_energies.append(float(m.group(1)))
        if os:
            jo._results.oscillator_strengths.append(float(os.group(1)))
        if "end of program cis" in line:
            break


@callback("stdener", r"Ground State Dipole Moments")
@callback("tdener", r"Ground State Dipole Moments")
def reset_tddft_excitation_energies(tp, jo, m, it):
    # Entering a new TDDFT section for a given SCF.
    # For solvent calculations etc, it is useful to reset the lists of
    # excitation energies from previous SCF steps at this geometry.
    jo._results.excitation_energies = []
    jo._results.singlet_excitation_energies = []
    jo._results.triplet_excitation_energies = []


@callback("stdener", r"(.*)Excited State\s+\d+:\s+$")
@callback("tdener", r"(.*)Excited State\s+\d+:\s+$")
def tddft_excitation_energies(tp, jo, m, it):
    exc_re = re.compile(
        r"Excitation energy =\s+(\S+) hartrees\s+(\S+) eV\s+(\S+) nm")
    osc_re = re.compile(r"Oscillator strength, f=\s+(\S+)")
    while 1:
        line = next(it)
        # Parse excitation energy
        ex = exc_re.search(line)
        if ex:
            val = float(ex.group(2))
            jo._results.excitation_energies.append(val)
            if 'Restricted Singlet' in m.group(1):
                jo._results.singlet_excitation_energies.append(val)
            if 'Restricted Triplet' in m.group(1):
                jo._results.triplet_excitation_energies.append(val)
        # Parse oscillator strength
        os = osc_re.search(line)
        if os:
            val = float(os.group(1))
            jo._results.oscillator_strengths.append(val)
            if 'Restricted Singlet' in m.group(1):
                jo._results.singlet_oscillator_strengths.append(val)
            if 'Restricted Triplet' in m.group(1):
                jo._results.triplet_oscillator_strengths.append(val)
            break


# This callback has the same functionality as the one above and is only needed
# to support parsing of legacy .out files.  It should be deleted eventually.
# (See JAGUAR-6906).
@callback("tdener", r"(.*)Excited State\s+\d+:\s+(\S+) eV")
def tddft_excitation_energies_old(tp, jo, m, it):
    # Parse excitation energy
    val = float(m.group(2))
    jo._results.excitation_energies.append(val)
    if 'Restricted Singlet' in m.group(1):
        jo._results.singlet_excitation_energies.append(val)
    if 'Restricted Triplet' in m.group(1):
        jo._results.triplet_excitation_energies.append(val)
    osc_re = re.compile(r"Oscillator strength, f=\s+(\S+)")
    # Parse oscillator strength
    while 1:
        line = next(it)
        os = osc_re.search(line)
        if os:
            val = float(os.group(1))
            jo._results.oscillator_strengths.append(val)
            if 'Restricted Singlet' in m.group(1):
                jo._results.singlet_oscillator_strengths.append(val)
            if 'Restricted Triplet' in m.group(1):
                jo._results.triplet_oscillator_strengths.append(val)
            break


@callback("geopt", r"First excited state energy:\s+(\S+) hartrees")
def tddft_geopt_energy(tp, jo, m, it):
    # Parse energy of first excited state geometry optimization
    jo._results.opt_excited_state_energy_1 = float(m.group(1))


# ZPE/thermochemistry may be part of the freq program results (multi-atom
# systems) or they may be part of the scf program results (single-atom systems)
@callback("scf")
@callback("freq", r"The zero point energy \(ZPE\):\s+(\S+) k(\w+)/mol")
def zpe(tp, jo, m, it):
    jo._results.zero_point_energy = float(m.group(1))
    if (m.group(2) == 'J'):
        units = 'joules'
    elif (m.group(2) == 'cal'):
        units = 'calories'
    else:
        units = None
    end_re = re.compile(r"\s*end of program freq")
    # This is how single-atom thermochemistry blocks end
    end_re2 = re.compile(r"\s*end of program scf")
    temp_pressure_re = re.compile(r"\s*T =\s+(\S+) K, P =\s+(\S+) atm")
    thermo_properties = []
    while 1:
        line = next(it)
        m = temp_pressure_re.match(line)
        end = end_re.match(line)
        end2 = end_re2.match(line)
        if end or end2:
            break
        elif m:
            thermo_properties.append(
                thermo_helper(float(m.group(1)), float(m.group(2)), units, it))
    jo._results.thermo = thermo_properties


def thermo_helper(temp, press, units, it):
    """
    Parse a thermochemical properties section and return a ThermoCollection
    object.

    """
    next(it)
    next(it)
    next(it)

    trans = next(it).split()
    if trans[0] != "trans.":
        raise JaguarParseError("Unexpected format in thermo_helper.")

    rot = next(it).split()
    single_atom = False
    if rot[0] != "rot.":
        if rot[0] == "elec.":
            # This is a single atom system that skips rot and vib output lines
            elec = rot
            rot = [0.0] * len(trans)
            vib = [0.0] * len(trans)
            single_atom = True
        else:
            raise JaguarParseError("Unexpected format in thermo_helper.")

    if not single_atom:
        vib = next(it).split()
        if vib[0] != "vib.":
            raise JaguarParseError("Unexpected format in thermo_helper.")

        # For formatting change ("vib." -> "vib. (tot)") when individual freq
        # components are requested, remove "(tot)" as second element in vib list
        if vib[1] == "(tot)":
            del vib[1]

        # Skip any added formatting (i.e., lines of dots) and individual freq
        # components if selected as optional output (option added w/ JAGUAR-7830)
        for line in it:
            if " ........." not in line and "vib #" not in line:
                break

        elec = line.split()
        if elec[0] != "elec.":
            raise JaguarParseError("Unexpected format in thermo_helper.")

    # Skip line of dashes if present (added to format as part of JAGUAR-7830)
    line = next(it)
    if "----------" in line:
        line = next(it)

    total = line.split()
    if total[0] != "total":
        raise JaguarParseError("Unexpected format in thermo_helper.")

    #cast list members as floats
    for array in [total, trans, rot, vib, elec]:
        del array[0]
        for index, val in enumerate(array):
            # This "not calcd" to 0.0 conversion is a placeholder until
            # JAGUAR-8934 is resolved, at which point it should be removed.
            # Jaguar was already regarding this "not calcd" as a 0.0 in
            # the summary table of the Jaguar output file.
            if val == "not" or val == "calcd":
                val = 0.0
            array[index] = float(val)

    next(it)
    UTotal = float(next(it).split()[-2])
    HTotal = float(next(it).split()[-2])
    GTotal = float(next(it).split()[-2])

    return results.ThermoCollection(temp, press, units, total, trans, rot, vib,
                                    elec, UTotal, HTotal, GTotal)


# Freq properties
@callback("freq", r"Valid transition vector #\s+(\b[0-9]+\b)")
def get_vetted_vec_index(tp, jo, m, it):
    jo._results.vetted_ts_vector_index = int(m.group(1))


@callback("freq", r"normal modes in cartesian coordinates:\s+(\d+)")
@callback("freq",
          r"normal modes in mass-weighted cartesian coordinates:\s+(\d+)")
def frequencies(tp, jo, m, it):
    label_map = {
        "frequencies": ("frequency", float),
        "symmetries": ("symmetry", str),
        "intensities": ("ir_intensity", float),
        "reduc. mass": ("reduced_mass", float),
        "force const": ("force_constant", float),
        "D. strength": ("dipole_strength", float),
        "R. strength": ("rotational_strength", float),
        "Raman Act.": ("raman_activity", float),
        "Raman Int.": ("raman_intensity", float)
    }

    natoms = len(jo._results.atom)
    nfreq = int(m.group(1))
    # Assume 6 normal modes per text block
    nblocks = 1 + old_div((nfreq - 1), 6)
    # Move to next blank line
    for line in it:
        if blank_line_re.match(line):
            break

    # Loop over normal mode text blocks
    modes = []
    imode = 0
    for i in range(nblocks):
        # Skip any vibration # headers (added with JAGUAR-7830)
        for line in it:
            if "vibration #" not in line and " --------" not in line:
                break
        field = line.replace('-', ' -').split()
        if field[0] != "frequencies":
            msg = "Expected 'frequencies' line first in normal mode data block."
            raise JaguarParseError(msg)

        # Add additional NormalMode objects to the list.
        for freq in field[1:]:
            try:
                freq = float(freq)
            except ValueError:
                msg = "Unexpected frequency %s in normal mode output." % freq
                raise JaguarParseError(msg)
            index = len(modes)
            modes.append(results.NormalMode(freq, natoms, index))

        # The number of normal modes per text block
        nbatch = len(field) - 1

        # Parse all of the normal mode data in this text block
        field = next(it).split()
        while field:
            # Expect blank line to end text block
            label = " ".join(field[:-nbatch])
            if label in label_map:
                attr, type_ = label_map[label]
                for ix, val in enumerate(field[-nbatch:]):
                    setattr(modes[imode + ix], attr, type_(val))
                field = next(it).split()
            elif field[0] == jo._results.atom[0].atom_name:
                # Parse X,Y,Z displacements for all atoms
                ix, jx = -1, -1
                for i in range(3 * natoms):
                    jx = (jx + 1) % 3
                    if jx == 0:
                        ix += 1
                    for kx, val in enumerate(field[2:]):
                        modes[imode + kx].displacement[ix, jx] = float(val)
                    field = next(it).split()
            else:
                msg = "Unexpected format in normal mode output."
                raise JaguarParseError(msg)
        imode += nbatch

    if len(modes) != nfreq:
        msg = "Did not find the %d normal modes expected." % nfreq
        raise JaguarParseError(msg)
    jo._results.normal_mode = modes

    # set the vetted ts vector if it exists
    if jo._results.vetted_ts_vector_index is not None:
        indx = jo._results.vetted_ts_vector_index - 1
        if indx >= 0 and indx < nfreq:
            jo._results.vetted_ts_vector = jo._results.normal_mode[indx]
        else:
            raise JaguarParseError(
                "Index of vetted TS vector outside of exceptable range (%d)" %
                indx)


# This callback has the same functionality as the one above and is only needed
# to support parsing of legacy .out files.  It should be deleted eventually.
# (See JAGUAR-6869).
@callback("freq", r"normal modes in cartesian coordinates:\s+$")
def frequencies_old(tp, jo, m, it):
    # FIXME: this function is too complex.
    for line in it:
        if blank_line_re.match(line):
            break

    normal_mode = []
    ix_mode = 0
    t_atoms = len(jo._results.atom)

    label_map = {
        "frequencies": ("frequency", float),
        "symmetries": ("symmetry", str),
        "intensities": ("ir_intensity", float),
        "reduc. mass": ("reduced_mass", float),
        "force const": ("force_constant", float),
        "D. strength": ("dipole_strength", float),
        "R. strength": ("rotational_strength", float),
        "Raman Act.": ("raman_activity", float),
        "Raman Int.": ("raman_intensity", float)
    }

    while True:
        line = next(it)
        field = line.replace('-', ' -').split()
        # The table ends /
        if not field or field[:2] == [
                "Writing", "vibrational"
        ] or field[:2] == ["Writing", "VCD"
                          ] or field[:3] == ["Number", "of", "imaginary"]:
            break

        if field[0] != "frequencies":
            raise JaguarParseError(
                "Expected 'frequencies' line not found in normal mode output.")

        # The number of normal modes in the next chunk of the table.
        t_modes = len(field) - 1

        # Add additional NormalMode objects to the list.
        for count, freq in enumerate(field[1:]):
            try:
                freq = float(freq)
            except ValueError:
                raise JaguarParseError(
                    "Unexpected frequency in normal mode output.")
            normal_mode.append(
                results.NormalMode(freq, t_atoms, ix_mode + count))

        # Parse all of the normal mode property lines.
        while True:
            field = next(it).split()
            if not field:
                break
            label = " ".join(field[:-t_modes])
            label_info = label_map.get(label, None)
            if label_info:
                attr, type_ = label_info
                for ix, value in enumerate(field[-t_modes:]):
                    setattr(normal_mode[ix_mode + ix], attr, type_(value))
            elif field[0] == jo._results.atom[0].atom_name:
                break
            else:
                raise JaguarParseError(
                    "Unexpected format in normal mode output.")

        # Parse the atomic displacements
        ix, jx = -1, -1
        while True:
            if not field:
                break
            jx = (jx + 1) % 3
            if jx == 0:
                ix += 1
            for kx, value in enumerate(field[2:]):
                normal_mode[ix_mode + kx].displacement[ix, jx] = float(value)
            field = next(it).split()

        ix_mode += t_modes

    jo._results.normal_mode = normal_mode


def jobid(tp, jo, m, it):
    jo.job_id = m.group(1)


TextParser.callback.setdefault("before pre", {})[jobid_re] = jobid


@callback(None, r"start of program (\w+)")
def start_of_program(tp, jo, m, it):
    jo.lastexe = m.group(1)
    try:
        tp.search_items = list(
            tp.callback[m.group(1)].items()) + tp.general_search_items
    except KeyError:
        tp.search_items = tp.general_search_items


@callback(None, r"glibc:\s+(\S+)")
def glibc(tp, jo, m, it):
    jo.glibc = m.group(1)


@callback(None, r"\s+Summary of Natural Population Analysis:")
def nbo_charges(tp, jo, m, it):
    #skip 5 lines
    for i in range(5):
        line = next(it)

    nbo_charges = []
    while 1:
        line = next(it)
        if '====' in line:
            break
        nbo_charges.append(float(line.split()[2]))

    atom_count = len(jo._results.atom)
    if atom_count != len(nbo_charges):
        raise JaguarParseError(
            f"Found different number of NBO charges ({len(nbo_charges)}) and atoms ({atom_count})!"
        )

    for ix in range(atom_count):
        setattr(jo._results.atom[ix], "charge_nbo", nbo_charges[ix])


@callback(None, r"Job .+ completed on \S+ at (\w.*)$")
def end_time(tp, jo, m, it):
    lc = locale.getlocale(locale.LC_TIME)
    try:
        locale.setlocale(locale.LC_TIME, "C")
        ts = time.mktime(time.strptime(m.group(1)))
        jo.end_time = datetime.datetime.fromtimestamp(ts)
    finally:
        locale.setlocale(locale.LC_TIME, lc)
    jo.status = jo.OK


@callback(None, r"ERROR *(\d+)?: fatal error( -- debug information follows)?")
def fatal_error(tp, jo, m, it):
    # This line can be at least "Jaguar cannot recover..." and
    # "NBO cannot recover...":
    splat_re = re.compile(r"cannot recover from this error")

    jo.status = jo.SPLAT

    # skip initial blank lines
    for line in it:
        if not blank_line_re.match(line):
            break
    error_msg = [line]

    for line in it:
        if splat_re.search(line):
            # pop off the last line of -'s
            if len(error_msg) > 1:
                error_msg.pop()
            break
        error_msg.append(line)

    while blank_line_re.match(error_msg[-1]):
        error_msg.pop()

    jo.fatal_error = "".join(error_msg)
    jo.fatal_errorno = m.group(1)


# LOC properties
@callback("pre", r"Total LO correction:\s+(\S+)\s+kcal/mole")
def total_lo_correction(tp, jo, m, it):
    jo._results.total_lo_correction = float(m.group(1))


@callback("pre",
          r"Target molecule has a ligand field spin-splitting score of\s+(\S+)")
def spin_splitting_score(tp, jo, m, it):
    jo._results.spin_splitting_score = float(m.group(1))


@callback("pre", r"rotational constants:")
@callback("geopt", r"rotational constants:")
def rotational_constants(tp, jo, m, it):
    line = next(it)
    regex = r"\s*cm\^\(-1\):\s+([0-9]*\.[0-9]*|infinite)\s*([0-9]*\.[0-9]*)*\s*([0-9]*\.[0-9]*)*"
    re_rc = re.compile(regex)
    matches = re_rc.match(line)
    if matches:
        jo._results.rotational_constants = []
        for constant in matches.groups():
            if "infinite" not in constant:
                jo._results.rotational_constants.append(float(constant))


@callback("freq", r"\s*rotational symmetry number:\s+([0-9]+)")
def symmetry_number(tp, jo, m, it):
    if m:
        jo._results.symmetry_number = int(m.groups()[0])