Source code for pymatgen.io.qchem.outputs

# coding: utf-8
# Copyright (c) Pymatgen Development Team.
# Distributed under the terms of the MIT License.

"""
Parsers for Qchem output files.
"""

import re
import logging
import os
import warnings
from typing import List
import math
import copy

import numpy as np

from monty.io import zopen
from monty.json import jsanitize
from monty.json import MSONable
from pymatgen.core import Molecule

from pymatgen.analysis.graphs import MoleculeGraph
from pymatgen.analysis.local_env import OpenBabelNN
import networkx as nx

try:
    from openbabel import openbabel as ob

    have_babel = True
except ImportError:
    ob = None
    have_babel = False

from .utils import read_table_pattern, read_pattern, process_parsed_coords

__author__ = "Samuel Blau, Brandon Wood, Shyam Dwaraknath"
__copyright__ = "Copyright 2018, The Materials Project"
__version__ = "0.1"

logger = logging.getLogger(__name__)


[docs]class QCOutput(MSONable): """ Class to parse QChem output files. """ def __init__(self, filename): """ Args: filename (str): Filename to parse """ self.filename = filename self.data = {} self.data["errors"] = [] self.data["warnings"] = {} self.text = "" with zopen(filename, 'rt') as f: self.text = f.read() # Check if output file contains multiple output files. If so, print an error message and exit self.data["multiple_outputs"] = read_pattern( self.text, { "key": r"Job\s+\d+\s+of\s+(\d+)\s+" }, terminate_on_match=True).get('key') if not (self.data.get('multiple_outputs') is None or self.data.get('multiple_outputs') == [['1']]): print( "ERROR: multiple calculation outputs found in file " + filename + ". Please instead call QCOutput.mulitple_outputs_from_file(QCOutput,'" + filename + "')") print("Exiting...") exit() # Parse the molecular details: charge, multiplicity, # species, and initial geometry. self._read_charge_and_multiplicity() if read_pattern( self.text, { "key": r"Nuclear Repulsion Energy" }, terminate_on_match=True).get('key') == [[]]: self._read_species_and_inital_geometry() # Check if calculation finished self.data["completion"] = read_pattern( self.text, { "key": r"Thank you very much for using Q-Chem.\s+Have a nice day." }, terminate_on_match=True).get('key') # If the calculation finished, parse the job time. if self.data.get('completion', []): temp_timings = read_pattern( self.text, { "key": r"Total job time\:\s*([\d\-\.]+)s\(wall\)\,\s*([\d\-\.]+)s\(cpu\)" }).get('key') if temp_timings is not None: self.data["walltime"] = float(temp_timings[0][0]) self.data["cputime"] = float(temp_timings[0][1]) else: self.data["walltime"] = None self.data["cputime"] = None # Check if calculation is unrestricted self.data["unrestricted"] = read_pattern( self.text, { "key": r"A(?:n)*\sunrestricted[\s\w\-]+SCF\scalculation\swill\sbe" }, terminate_on_match=True).get('key') # Check if calculation uses GEN_SCFMAN, multiple potential output formats self.data["using_GEN_SCFMAN"] = read_pattern( self.text, { "key": r"\s+GEN_SCFMAN: A general SCF calculation manager" }, terminate_on_match=True).get('key') if not self.data["using_GEN_SCFMAN"]: self.data["using_GEN_SCFMAN"] = read_pattern( self.text, { "key": r"\s+General SCF calculation program by" }, terminate_on_match=True).get('key') # Check if the SCF failed to converge if read_pattern( self.text, { "key": r"SCF failed to converge" }, terminate_on_match=True).get('key') == [[]]: self.data["errors"] += ["SCF_failed_to_converge"] # Parse the SCF self._read_SCF() # Parse the Mulliken/ESP/RESP charges self._read_charges() # Check for various warnings self._detect_general_warnings() # Check to see if PCM or SMD are present self.data["solvent_method"] = None self.data["solvent_data"] = None if read_pattern( self.text, { "key": r"solvent_method\s*=?\s*pcm" }, terminate_on_match=True).get('key') == [[]]: self.data["solvent_method"] = "PCM" if read_pattern( self.text, { "key": r"solvent_method\s*=?\s*smd" }, terminate_on_match=True).get('key') == [[]]: self.data["solvent_method"] = "SMD" # Parse information specific to a solvent model if self.data["solvent_method"] == "PCM": self.data["solvent_data"] = {} temp_dielectric = read_pattern( self.text, { "key": r"dielectric\s*([\d\-\.]+)" }, terminate_on_match=True).get('key') self.data["solvent_data"]["PCM_dielectric"] = float(temp_dielectric[0][0]) self._read_pcm_information() elif self.data["solvent_method"] == "SMD": if read_pattern( self.text, { "key": r"Unrecognized solvent" }, terminate_on_match=True).get('key') == [[]]: if not self.data.get('completion', []): self.data["errors"] += ["unrecognized_solvent"] else: self.data["warnings"]["unrecognized_solvent"] = True self.data["solvent_data"] = {} temp_solvent = read_pattern( self.text, { "key": r"\s[Ss]olvent:? ([a-zA-Z]+)" }).get('key') for val in temp_solvent: if val[0] != temp_solvent[0][0]: if val[0] != "for": self.data["warnings"]["SMD_two_solvents"] = str(temp_solvent[0][0]) + " and " + str(val[0]) else: if "unrecognized_solvent" not in self.data["errors"] and "unrecognized_solvent" not in \ self.data["warnings"]: self.data["warnings"]["questionable_SMD_parsing"] = True self.data["solvent_data"]["SMD_solvent"] = temp_solvent[0][0] self._read_smd_information() # Parse the final energy temp_final_energy = read_pattern( self.text, { "key": r"Final\senergy\sis\s+([\d\-\.]+)" }).get('key') if temp_final_energy is None: self.data["final_energy"] = None else: self.data["final_energy"] = float(temp_final_energy[0][0]) # Check if calculation is using dft_d and parse relevant info if so self.data["using_dft_d3"] = read_pattern( self.text, { "key": r"dft_d\s*= d3" }, terminate_on_match=True).get('key') if self.data.get('using_dft_d3', []): temp_d3 = read_pattern(self.text, { "key": r"\-D3 energy without 3body term =\s*([\d\.\-]+) hartrees" }).get('key') real_d3 = np.zeros(len(temp_d3)) if temp_d3 is None: self.data["dft_d3"] = None elif len(temp_d3) == 1: self.data["dft_d3"] = float(temp_d3[0][0]) else: for ii, entry in enumerate(temp_d3): real_d3[ii] = float(entry[0]) self.data["dft_d3"] = real_d3 # Parse the S2 values in the case of an unrestricted calculation if self.data.get('unrestricted', []): correct_s2 = 0.5 * (self.data["multiplicity"] - 1) * (0.5 * (self.data["multiplicity"] - 1) + 1) temp_S2 = read_pattern(self.text, { "key": r"<S\^2>\s=\s+([\d\-\.]+)" }).get('key') if temp_S2 is None: self.data["S2"] = None elif len(temp_S2) == 1: self.data["S2"] = float(temp_S2[0][0]) if abs(correct_s2 - self.data["S2"]) > 0.01: self.data["warnings"]["spin_contamination"] = abs(correct_s2 - self.data["S2"]) else: real_S2 = np.zeros(len(temp_S2)) have_spin_contamination = False for ii, entry in enumerate(temp_S2): real_S2[ii] = float(entry[0]) if abs(correct_s2 - real_S2[ii]) > 0.01: have_spin_contamination = True self.data["S2"] = real_S2 if have_spin_contamination: spin_contamination = np.zeros(len(self.data["S2"])) for ii, entry in enumerate(self.data["S2"]): spin_contamination[ii] = abs(correct_s2 - entry) self.data["warnings"]["spin_contamination"] = spin_contamination # Check if the calculation is a geometry optimization. If so, parse the relevant output self.data["optimization"] = read_pattern( self.text, { "key": r"(?i)\s*job(?:_)*type\s*(?:=)*\s*opt" }).get('key') if self.data.get('optimization', []): self._read_optimization_data() # Check if the calculation contains a constraint in an $opt section. self.data["opt_constraint"] = read_pattern(self.text, { "key": r"\$opt\s+CONSTRAINT" }).get('key') if self.data.get('opt_constraint'): temp_constraint = read_pattern( self.text, { "key": r"Constraints and their Current Values\s+Value\s+Constraint\s+(\w+)\:\s+([\d\-\.]+)\s+" r"([\d\-\.]+)\s+([\d\-\.]+)\s+([\d\-\.]+)\s+([\d\-\.]+)\s+([\d\-\.]+)" }).get('key') if temp_constraint is not None: self.data["opt_constraint"] = temp_constraint[0] if float(self.data.get('opt_constraint')[5]) != float( self.data.get('opt_constraint')[6]): if abs(float(self.data.get('opt_constraint')[5])) != abs( float(self.data.get('opt_constraint')[6])): raise ValueError( "ERROR: Opt section value and constraint should be the same!" ) elif abs(float( self.data.get('opt_constraint')[5])) not in [ 0.0, 180.0 ]: raise ValueError( "ERROR: Opt section value and constraint can only differ by a sign at 0.0 and 180.0!" ) # Check if the calculation is a frequency analysis. If so, parse the relevant output self.data["frequency_job"] = read_pattern( self.text, { "key": r"(?i)\s*job(?:_)*type\s*(?:=)*\s*freq" }, terminate_on_match=True).get('key') if self.data.get('frequency_job', []): self._read_frequency_data() self.data["single_point_job"] = read_pattern( self.text, { "key": r"(?i)\s*job(?:_)*type\s*(?:=)*\s*sp" }, terminate_on_match=True).get("key") if self.data.get("single_point_job", []): self._read_single_point_data() self.data["force_job"] = read_pattern( self.text, { "key": r"(?i)\s*job(?:_)*type\s*(?:=)*\s*force" }, terminate_on_match=True).get("key") if self.data.get("force", []): self._read_force_data() # If the calculation did not finish and no errors have been identified yet, check for other errors if not self.data.get('completion', []) and self.data.get("errors") == []: self._check_completion_errors()
[docs] @staticmethod def multiple_outputs_from_file(cls, filename, keep_sub_files=True): """ Parses a QChem output file with multiple calculations 1.) Seperates the output into sub-files e.g. qcout -> qcout.0, qcout.1, qcout.2 ... qcout.N a.) Find delimeter for multiple calcualtions b.) Make seperate output sub-files 2.) Creates seperate QCCalcs for each one from the sub-files """ to_return = [] with zopen(filename, 'rt') as f: text = re.split(r'\s*(?:Running\s+)*Job\s+\d+\s+of\s+\d+\s+', f.read()) if text[0] == '': text = text[1:] for i, sub_text in enumerate(text): temp = open(filename + '.' + str(i), 'w') temp.write(sub_text) temp.close() tempOutput = cls(filename + '.' + str(i)) to_return.append(tempOutput) if not keep_sub_files: os.remove(filename + '.' + str(i)) return to_return
def _read_charge_and_multiplicity(self): """ Parses charge and multiplicity. """ temp_charge = read_pattern( self.text, { "key": r"\$molecule\s+([\-\d]+)\s+\d" }, terminate_on_match=True).get('key') if temp_charge is not None: self.data["charge"] = int(temp_charge[0][0]) else: temp_charge = read_pattern( self.text, { "key": r"Sum of atomic charges \=\s+([\d\-\.\+]+)" }, terminate_on_match=True).get('key') if temp_charge is None: self.data["charge"] = None else: self.data["charge"] = int(float(temp_charge[0][0])) temp_multiplicity = read_pattern( self.text, { "key": r"\$molecule\s+[\-\d]+\s+(\d)" }, terminate_on_match=True).get('key') if temp_multiplicity is not None: self.data["multiplicity"] = int(temp_multiplicity[0][0]) else: temp_multiplicity = read_pattern( self.text, { "key": r"Sum of spin\s+charges \=\s+([\d\-\.\+]+)" }, terminate_on_match=True).get('key') if temp_multiplicity is None: self.data["multiplicity"] = 1 else: self.data["multiplicity"] = int( float(temp_multiplicity[0][0])) + 1 def _read_species_and_inital_geometry(self): """ Parses species and initial geometry. """ header_pattern = r"Standard Nuclear Orientation \(Angstroms\)\s+I\s+Atom\s+X\s+Y\s+Z\s+-+" table_pattern = r"\s*\d+\s+([a-zA-Z]+)\s*([\d\-\.]+)\s*([\d\-\.]+)\s*([\d\-\.]+)\s*" footer_pattern = r"\s*-+" temp_geom = read_table_pattern(self.text, header_pattern, table_pattern, footer_pattern) if temp_geom is None or len(temp_geom) == 0: self.data["species"] = None self.data["initial_geometry"] = None self.data["initial_molecule"] = None self.data["point_group"] = None else: temp_point_group = read_pattern( self.text, { "key": r"Molecular Point Group\s+([A-Za-z\d\*]+)" }, terminate_on_match=True).get('key') if temp_point_group is not None: self.data["point_group"] = temp_point_group[0][0] else: self.data["point_group"] = None temp_geom = temp_geom[0] species = [] geometry = np.zeros(shape=(len(temp_geom), 3), dtype=float) for ii, entry in enumerate(temp_geom): species += [entry[0]] for jj in range(3): geometry[ii, jj] = float(entry[jj + 1]) self.data["species"] = species self.data["initial_geometry"] = geometry if self.data["charge"] is not None and self.data["multiplicity"] is not None: self.data["initial_molecule"] = Molecule( species=species, coords=geometry, charge=self.data.get('charge'), spin_multiplicity=self.data.get('multiplicity')) else: self.data["initial_molecule"] = None def _read_SCF(self): """ Parses both old and new SCFs. """ if self.data.get('using_GEN_SCFMAN', []): if "SCF_failed_to_converge" in self.data.get("errors"): footer_pattern = r"^\s*gen_scfman_exception: SCF failed to converge" else: footer_pattern = r"^\s*\-+\n\s+SCF time" header_pattern = r"^\s*\-+\s+Cycle\s+Energy\s+(?:(?:DIIS)*\s+[Ee]rror)*(?:RMS Gradient)*\s+\-+" \ r"(?:\s*\-+\s+OpenMP\s+Integral\s+computing\s+Module\s+" \ r"(?:Release:\s+version\s+[\d\-\.]+\,\s+\w+\s+[\d\-\.]+\, " \ r"Q-Chem Inc\. Pittsburgh\s+)*\-+)*\n" table_pattern = r"(?:\n[a-zA-Z_\s/]+\.C::(?:WARNING energy changes are now smaller than effective " \ r"accuracy\.)*(?:\s+calculation will continue, but THRESH should be increased)*" \ r"(?:\s+or SCF_CONVERGENCE decreased\. )*(?:\s+effective_thresh = [\d\-\.]+e[\d\-]+)*)*" \ r"(?:\s*Nonlocal correlation = [\d\-\.]+e[\d\-]+)*" \ r"(?:\s*Inaccurate integrated density:\n\s+Number of electrons\s+=\s+[\d\-\.]+\n\s+" \ r"Numerical integral\s+=\s+[\d\-\.]+\n\s+Relative error\s+=\s+[\d\-\.]+\s+\%\n)*\s*\d+\s+" \ r"([\d\-\.]+)\s+([\d\-\.]+)e([\d\-\.\+]+)(?:\s+Convergence criterion met)*" \ r"(?:\s+Preconditoned Steepest Descent)*(?:\s+Roothaan Step)*(?:\s+" \ r"(?:Normal\s+)*BFGS [Ss]tep)*(?:\s+LineSearch Step)*(?:\s+Line search: overstep)*" \ r"(?:\s+Dog-leg BFGS step)*(?:\s+Line search: understep)*" \ r"(?:\s+Descent step)*(?:\s+Done DIIS. Switching to GDM)*" \ r"(?:\s*\-+\s+Cycle\s+Energy\s+(?:(?:DIIS)*\s+[Ee]rror)*" \ r"(?:RMS Gradient)*\s+\-+(?:\s*\-+\s+OpenMP\s+Integral\s+computing\s+Module\s+" \ r"(?:Release:\s+version\s+[\d\-\.]+\,\s+\w+\s+[\d\-\.]+\, " \ r"Q-Chem Inc\. Pittsburgh\s+)*\-+)*\n)*" else: if "SCF_failed_to_converge" in self.data.get("errors"): footer_pattern = r"^\s*\d+\s*[\d\-\.]+\s+[\d\-\.]+E[\d\-\.]+\s+Convergence\s+failure\n" else: footer_pattern = r"^\s*\-+\n" header_pattern = r"^\s*\-+\s+Cycle\s+Energy\s+DIIS Error\s+\-+\n" table_pattern = r"(?:\s*Inaccurate integrated density:\n\s+Number of electrons\s+=\s+[\d\-\.]+\n\s+" \ r"Numerical integral\s+=\s+[\d\-\.]+\n\s+Relative error\s+=\s+[\d\-\.]+\s+\%\n)*\s*\d+\s*" \ r"([\d\-\.]+)\s+([\d\-\.]+)E([\d\-\.\+]+)(?:\s*\n\s*cpu\s+[\d\-\.]+\swall\s+[\d\-\.]+)*" \ r"(?:\nin dftxc\.C, eleTot sum is:[\d\-\.]+, tauTot is\:[\d\-\.]+)*" \ r"(?:\s+Convergence criterion met)*(?:\s+Done RCA\. Switching to DIIS)*" \ r"(?:\n\s*Warning: not using a symmetric Q)*" \ r"(?:\nRecomputing EXC\s*[\d\-\.]+\s*[\d\-\.]+\s*[\d\-\.]+" \ r"(?:\s*\nRecomputing EXC\s*[\d\-\.]+\s*[\d\-\.]+\s*[\d\-\.]+)*)*" temp_scf = read_table_pattern(self.text, header_pattern, table_pattern, footer_pattern) real_scf = [] for one_scf in temp_scf: temp = np.zeros(shape=(len(one_scf), 2)) for ii, entry in enumerate(one_scf): temp[ii, 0] = float(entry[0]) temp[ii, 1] = float(entry[1]) * 10 ** float(entry[2]) real_scf += [temp] self.data["SCF"] = real_scf temp_thresh_warning = read_pattern(self.text, { "key": r"\n[a-zA-Z_\s/]+\.C::WARNING energy changes are now smaller than effective accuracy" r"\.\n[a-zA-Z_\s/]+\.C::\s+calculation will continue, but THRESH should be increased\n" r"[a-zA-Z_\s/]+\.C::\s+or SCF_CONVERGENCE decreased\. \n" r"[a-zA-Z_\s/]+\.C::\s+effective_thresh = ([\d\-\.]+e[\d\-]+)" }).get('key') if temp_thresh_warning is not None: if len(temp_thresh_warning) == 1: self.data["warnings"]["thresh"] = float(temp_thresh_warning[0][0]) else: thresh_warning = np.zeros(len(temp_thresh_warning)) for ii, entry in enumerate(temp_thresh_warning): thresh_warning[ii] = float(entry[0]) self.data["warnings"]["thresh"] = thresh_warning temp_SCF_energy = read_pattern(self.text, { "key": r"SCF energy in the final basis set =\s*([\d\-\.]+)" }).get('key') if temp_SCF_energy is not None: if len(temp_SCF_energy) == 1: self.data["SCF_energy_in_the_final_basis_set"] = float(temp_SCF_energy[0][0]) else: SCF_energy = np.zeros(len(temp_SCF_energy)) for ii, val in enumerate(temp_SCF_energy): SCF_energy[ii] = float(val[0]) self.data["SCF_energy_in_the_final_basis_set"] = SCF_energy temp_Total_energy = read_pattern(self.text, { "key": r"Total energy in the final basis set =\s*([\d\-\.]+)" }).get('key') if temp_Total_energy is not None: if len(temp_Total_energy) == 1: self.data["Total_energy_in_the_final_basis_set"] = float(temp_Total_energy[0][0]) else: Total_energy = np.zeros(len(temp_Total_energy)) for ii, val in enumerate(temp_Total_energy): Total_energy[ii] = float(val[0]) self.data["Total_energy_in_the_final_basis_set"] = Total_energy def _read_charges(self): """ Parses Mulliken/ESP/RESP charges. Also parses spins given an unrestricted SCF. """ if self.data.get('unrestricted', []): header_pattern = r"\-+\s+Ground-State Mulliken Net Atomic Charges\s+Atom\s+Charge \(a\.u\.\)\s+" \ r"Spin\s\(a\.u\.\)\s+\-+" table_pattern = r"\s+\d+\s\w+\s+([\d\-\.]+)\s+([\d\-\.]+)" footer_pattern = r"\s\s\-+\s+Sum of atomic charges" else: header_pattern = r"\-+\s+Ground-State Mulliken Net Atomic Charges\s+Atom\s+Charge \(a\.u\.\)\s+\-+" table_pattern = r"\s+\d+\s\w+\s+([\d\-\.]+)" footer_pattern = r"\s\s\-+\s+Sum of atomic charges" temp_mulliken = read_table_pattern(self.text, header_pattern, table_pattern, footer_pattern) real_mulliken = [] for one_mulliken in temp_mulliken: if self.data.get('unrestricted', []): temp = np.zeros(shape=(len(one_mulliken), 2)) for ii, entry in enumerate(one_mulliken): temp[ii, 0] = float(entry[0]) temp[ii, 1] = float(entry[1]) else: temp = np.zeros(len(one_mulliken)) for ii, entry in enumerate(one_mulliken): temp[ii] = float(entry[0]) real_mulliken += [temp] self.data["Mulliken"] = real_mulliken # Check for ESP/RESP charges esp_or_resp = read_pattern( self.text, { "key": r"Merz-Kollman (R?ESP) Net Atomic Charges" }).get('key') if esp_or_resp is not None: header_pattern = r"Merz-Kollman (R?ESP) Net Atomic Charges\s+Atom\s+Charge \(a\.u\.\)\s+\-+" table_pattern = r"\s+\d+\s\w+\s+([\d\-\.]+)" footer_pattern = r"\s\s\-+\s+Sum of atomic charges" temp_esp_or_resp = read_table_pattern(self.text, header_pattern, table_pattern, footer_pattern) real_esp_or_resp = [] for one_entry in temp_esp_or_resp: temp = np.zeros(len(one_entry)) for ii, entry in enumerate(one_entry): temp[ii] = float(entry[0]) real_esp_or_resp += [temp] self.data[esp_or_resp[0][0]] = real_esp_or_resp def _detect_general_warnings(self): # Check for inaccurate integrated density temp_inac_integ = read_pattern( self.text, { "key": r"Inaccurate integrated density:\n\s+Number of electrons\s+=\s+([\d\-\.]+)\n\s+" r"Numerical integral\s+=\s+([\d\-\.]+)\n\s+Relative error\s+=\s+([\d\-\.]+)\s+\%\n" }).get('key') if temp_inac_integ is not None: inaccurate_integrated_density = np.zeros(shape=(len(temp_inac_integ), 3)) for ii, entry in enumerate(temp_inac_integ): for jj, val in enumerate(entry): inaccurate_integrated_density[ii][jj] = float(val) self.data["warnings"]["inaccurate_integrated_density"] = inaccurate_integrated_density # Check for an MKL error if read_pattern( self.text, { "key": r"Intel MKL ERROR" }, terminate_on_match=True).get('key') == [[]]: self.data["warnings"]["mkl"] = True # Check if the job is being hindered by a lack of analytical derivatives if read_pattern( self.text, { "key": r"Starting finite difference calculation for IDERIV" }, terminate_on_match=True).get('key') == [[]]: self.data["warnings"]["missing_analytical_derivates"] = True # Check if the job is complaining about MO files of inconsistent size if read_pattern( self.text, { "key": r"Inconsistent size for SCF MO coefficient file" }, terminate_on_match=True).get('key') == [[]]: self.data["warnings"]["inconsistent_size"] = True # Check for AO linear depend if read_pattern( self.text, { "key": r"Linear dependence detected in AO basis" }, terminate_on_match=True).get('key') == [[]]: self.data["warnings"]["linear_dependence"] = True # Check for Hessian without desired local structure if read_pattern( self.text, { "key": r"\*\*WARNING\*\* Hessian does not have the Desired Local Structure" }, terminate_on_match=True).get('key') == [[]]: self.data["warnings"]["hessian_local_structure"] = True # Check if GetCART cycle iterations ever exceeded if read_pattern( self.text, { "key": r"\*\*\*ERROR\*\*\* Exceeded allowed number of iterative cycles in GetCART" }, terminate_on_match=True).get('key') == [[]]: self.data["warnings"]["GetCART_cycles"] = True # Check for problems with internal coordinates if read_pattern( self.text, { "key": r"\*\*WARNING\*\* Problems with Internal Coordinates" }, terminate_on_match=True).get('key') == [[]]: self.data["warnings"]["internal_coordinates"] = True # Check for problem with eigenvalue magnitude if read_pattern( self.text, { "key": r"\*\*WARNING\*\* Magnitude of eigenvalue" }, terminate_on_match=True).get('key') == [[]]: self.data["warnings"]["eigenvalue_magnitude"] = True # Check for problem with hereditary postivive definiteness if read_pattern( self.text, { "key": r"\*\*WARNING\*\* Hereditary positive definiteness endangered" }, terminate_on_match=True).get('key') == [[]]: self.data["warnings"]["positive_definiteness_endangered"] = True # Check if there were problems with a colinear bend if read_pattern( self.text, { "key": r"\*\*\*ERROR\*\*\* Angle[\s\d]+is near\-linear\s+" r"But No atom available to define colinear bend" }, terminate_on_match=True).get('key') == [[]]: self.data["warnings"]["colinear_bend"] = True # Check if there were problems diagonalizing B*B(t) if read_pattern( self.text, { "key": r"\*\*\*ERROR\*\*\* Unable to Diagonalize B\*B\(t\) in <MakeNIC>" }, terminate_on_match=True).get('key') == [[]]: self.data["warnings"]["diagonalizing_BBt"] = True # Check for bad Roothaan step for scf in self.data["SCF"]: if abs(scf[0][0] - scf[1][0]) > 10.0: self.data["warnings"]["bad_roothaan"] = True def _read_geometries(self): """ Parses all geometries from an optimization trajectory. """ geoms = [] header_pattern = r"\s+Optimization\sCycle:\s+\d+\s+Coordinates \(Angstroms\)\s+ATOM\s+X\s+Y\s+Z" table_pattern = r"\s+\d+\s+\w+\s+([\d\-\.]+)\s+([\d\-\.]+)\s+([\d\-\.]+)" footer_pattern = r"\s+Point Group\:\s+[\d\w\*]+\s+Number of degrees of freedom\:\s+\d+" parsed_geometries = read_table_pattern( self.text, header_pattern, table_pattern, footer_pattern) for ii, parsed_geometry in enumerate(parsed_geometries): if parsed_geometry == [] or None: geoms.append(None) else: geoms.append(process_parsed_coords(parsed_geometry)) self.data["geometries"] = geoms self.data["last_geometry"] = geoms[-1] if self.data.get('charge') is not None: self.data["molecule_from_last_geometry"] = Molecule( species=self.data.get('species'), coords=self.data.get('last_geometry'), charge=self.data.get('charge'), spin_multiplicity=self.data.get('multiplicity')) # Parses optimized XYZ coordinates. If not present, parses optimized Z-matrix. header_pattern = r"\*+\s+OPTIMIZATION\s+CONVERGED\s+\*+\s+\*+\s+Coordinates \(Angstroms\)\s+ATOM\s+X\s+Y\s+Z" table_pattern = r"\s+\d+\s+\w+\s+([\d\-\.]+)\s+([\d\-\.]+)\s+([\d\-\.]+)" footer_pattern = r"\s+Z-matrix Print:" parsed_optimized_geometry = read_table_pattern( self.text, header_pattern, table_pattern, footer_pattern) if parsed_optimized_geometry == [] or None: self.data["optimized_geometry"] = None header_pattern = r"^\s+\*+\s+OPTIMIZATION CONVERGED\s+\*+\s+\*+\s+Z-matrix\s+" \ r"Print:\s+\$molecule\s+[\d\-]+\s+[\d\-]+\n" table_pattern = r"\s*(\w+)(?:\s+(\d+)\s+([\d\-\.]+)(?:\s+(\d+)\s+([\d\-\.]+)" \ r"(?:\s+(\d+)\s+([\d\-\.]+))*)*)*(?:\s+0)*" footer_pattern = r"^\$end\n" self.data["optimized_zmat"] = read_table_pattern( self.text, header_pattern, table_pattern, footer_pattern) else: self.data["optimized_geometry"] = process_parsed_coords( parsed_optimized_geometry[0]) if self.data.get('charge') is not None: self.data["molecule_from_optimized_geometry"] = Molecule( species=self.data.get('species'), coords=self.data.get('optimized_geometry'), charge=self.data.get('charge'), spin_multiplicity=self.data.get('multiplicity')) def _get_grad_format_length(self, header): """ Determines the maximum number of gradient entries printed on a line, which changes for different versions of Q-Chem """ found_end = False index = 1 pattern = header while not found_end: if read_pattern( self.text, { "key": pattern }, terminate_on_match=True).get('key') != [[]]: found_end = True else: pattern = pattern + r"\s+" + str(index) index += 1 return index - 2 def _read_gradients(self): """ Parses all gradients obtained during an optimization trajectory """ grad_header_pattern = r"Gradient of (?:SCF)?(?:MP2)? Energy(?: \(in au\.\))?" footer_pattern = r"(?:Max gradient component|Gradient time)" grad_format_length = self._get_grad_format_length(grad_header_pattern) grad_table_pattern = r"(?:\s+\d+(?:\s+\d+)?(?:\s+\d+)?(?:\s+\d+)?(?:\s+\d+)?(?:\s+\d+)?)?\n\s\s\s\s[1-3]\s*" \ r"(\-?[\d\.]{9,12})" if grad_format_length > 1: for ii in range(1, grad_format_length): grad_table_pattern = grad_table_pattern + r"(?:\s*(\-?[\d\.]{9,12}))?" parsed_gradients = read_table_pattern( self.text, grad_header_pattern, grad_table_pattern, footer_pattern) sorted_gradients = np.zeros(shape=(len(parsed_gradients), len(self.data["initial_molecule"]), 3)) for ii, grad in enumerate(parsed_gradients): for jj in range(int(len(grad) / 3)): for kk in range(grad_format_length): if grad[jj * 3][kk] != 'None': sorted_gradients[ii][jj * grad_format_length + kk][0] = grad[jj * 3][kk] sorted_gradients[ii][jj * grad_format_length + kk][1] = grad[jj * 3 + 1][kk] sorted_gradients[ii][jj * grad_format_length + kk][2] = grad[jj * 3 + 2][kk] self.data["gradients"] = sorted_gradients if self.data["solvent_method"] is not None: header_pattern = r"total gradient after adding PCM contribution --\s+-+\s+Atom\s+X\s+Y\s+Z\s+-+" table_pattern = r"\s+\d+\s+([\d\-\.]+)\s+([\d\-\.]+)\s+([\d\-\.]+)\s" footer_pattern = r"-+" parsed_gradients = read_table_pattern( self.text, header_pattern, table_pattern, footer_pattern) pcm_gradients = np.zeros(shape=(len(parsed_gradients), len(self.data["initial_molecule"]), 3)) for ii, grad in enumerate(parsed_gradients): for jj, entry in enumerate(grad): for kk, val in enumerate(entry): pcm_gradients[ii][jj][kk] = float(val) self.data["pcm_gradients"] = pcm_gradients else: self.data["pcm_gradients"] = None if read_pattern(self.text, { "key": r"Gradient of CDS energy" }, terminate_on_match=True).get('key') == [[]]: header_pattern = r"Gradient of CDS energy" parsed_gradients = read_table_pattern( self.text, header_pattern, grad_table_pattern, grad_header_pattern) sorted_gradients = np.zeros(shape=(len(parsed_gradients), len(self.data["initial_molecule"]), 3)) for ii, grad in enumerate(parsed_gradients): for jj in range(int(len(grad) / 3)): for kk in range(grad_format_length): if grad[jj * 3][kk] != 'None': sorted_gradients[ii][jj * grad_format_length + kk][0] = grad[jj * 3][kk] sorted_gradients[ii][jj * grad_format_length + kk][1] = grad[jj * 3 + 1][kk] sorted_gradients[ii][jj * grad_format_length + kk][2] = grad[jj * 3 + 2][kk] self.data["CDS_gradients"] = sorted_gradients else: self.data["CDS_gradients"] = None def _read_optimization_data(self): temp_energy_trajectory = read_pattern( self.text, { "key": r"\sEnergy\sis\s+([\d\-\.]+)" }).get('key') if temp_energy_trajectory is None: self.data["energy_trajectory"] = [] else: real_energy_trajectory = np.zeros(len(temp_energy_trajectory)) for ii, entry in enumerate(temp_energy_trajectory): real_energy_trajectory[ii] = float(entry[0]) self.data["energy_trajectory"] = real_energy_trajectory self._read_geometries() if have_babel: self.data["structure_change"] = check_for_structure_changes( self.data["initial_molecule"], self.data["molecule_from_last_geometry"]) self._read_gradients() # Then, if no optimized geometry or z-matrix is found, and no errors have been previously # idenfied, check to see if the optimization failed to converge or if Lambda wasn't able # to be determined. if len(self.data.get("errors")) == 0 and self.data.get('optimized_geometry') is None \ and len(self.data.get('optimized_zmat')) == 0: if read_pattern( self.text, { "key": r"MAXIMUM OPTIMIZATION CYCLES REACHED" }, terminate_on_match=True).get('key') == [[]]: self.data["errors"] += ["out_of_opt_cycles"] elif read_pattern( self.text, { "key": r"UNABLE TO DETERMINE Lamda IN FormD" }, terminate_on_match=True).get('key') == [[]]: self.data["errors"] += ["unable_to_determine_lamda"] def _read_frequency_data(self): """ Parses frequencies, enthalpy, entropy, and mode vectors. """ temp_dict = read_pattern( self.text, { "frequencies": r"\s*Frequency:\s+(\-?[\d\.\*]+)(?:\s+(\-?[\d\.\*]+)(?:\s+(\-?[\d\.\*]+))*)*", "trans_dip": r"TransDip\s+(\-?[\d\.]{5,7}|\*{5,7})\s*(\-?[\d\.]{5,7}|\*{5,7})\s*(\-?[\d\.]{5,7}|\*{5,7})\s*" r"(?:(\-?[\d\.]{5,7}|\*{5,7})\s*(\-?[\d\.]{5,7}|\*{5,7})\s*(\-?[\d\.]{5,7}|\*{5,7})\s*" r"(?:(\-?[\d\.]{5,7}|\*{5,7})\s*(\-?[\d\.]{5,7}|\*{5,7})\s*(\-?[\d\.]{5,7}|\*{5,7}))*)*", "IR_intens": r"\s*IR Intens:\s*(\-?[\d\.\*]+)(?:\s+(\-?[\d\.\*]+)(?:\s+(\-?[\d\.\*]+))*)*", "IR_active": r"\s*IR Active:\s+([YESNO]+)(?:\s+([YESNO]+)(?:\s+([YESNO]+))*)*", "ZPE": r"\s*Zero point vibrational energy:\s+([\d\-\.]+)\s+kcal/mol", "trans_enthalpy": r"\s*Translational Enthalpy:\s+([\d\-\.]+)\s+kcal/mol", "rot_enthalpy": r"\s*Rotational Enthalpy:\s+([\d\-\.]+)\s+kcal/mol", "vib_enthalpy": r"\s*Vibrational Enthalpy:\s+([\d\-\.]+)\s+kcal/mol", "gas_constant": r"\s*gas constant \(RT\):\s+([\d\-\.]+)\s+kcal/mol", "trans_entropy": r"\s*Translational Entropy:\s+([\d\-\.]+)\s+cal/mol\.K", "rot_entropy": r"\s*Rotational Entropy:\s+([\d\-\.]+)\s+cal/mol\.K", "vib_entropy": r"\s*Vibrational Entropy:\s+([\d\-\.]+)\s+cal/mol\.K", "total_enthalpy": r"\s*Total Enthalpy:\s+([\d\-\.]+)\s+kcal/mol", "total_entropy": r"\s*Total Entropy:\s+([\d\-\.]+)\s+cal/mol\.K" }) keys = ["ZPE", "trans_enthalpy", "rot_enthalpy", "vib_enthalpy", "gas_constant", "trans_entropy", "rot_entropy", "vib_entropy", "total_enthalpy", "total_entropy"] for key in keys: if temp_dict.get(key) is None: self.data[key] = None else: self.data[key] = float(temp_dict.get(key)[0][0]) if temp_dict.get('frequencies') is None: self.data['frequencies'] = None self.data['IR_intens'] = None self.data['IR_active'] = None self.data['trans_dip'] = None else: temp_freqs = [ value for entry in temp_dict.get('frequencies') for value in entry ] temp_intens = [ value for entry in temp_dict.get('IR_intens') for value in entry ] active = [ value for entry in temp_dict.get('IR_active') for value in entry ] temp_trans_dip = [ value for entry in temp_dict.get('trans_dip') for value in entry ] self.data['IR_active'] = active trans_dip = np.zeros(shape=(int((len(temp_trans_dip) - temp_trans_dip.count('None')) / 3), 3)) for ii, entry in enumerate(temp_trans_dip): if entry != 'None': if "*" in entry: trans_dip[int(ii / 3)][ii % 3] = float("inf") else: trans_dip[int(ii / 3)][ii % 3] = float(entry) self.data['trans_dip'] = trans_dip freqs = np.zeros(len(temp_freqs) - temp_freqs.count('None')) for ii, entry in enumerate(temp_freqs): if entry != 'None': if "*" in entry: if ii == 0: freqs[ii] = -float("inf") elif ii == len(freqs) - 1: freqs[ii] = float("inf") elif freqs[ii - 1] == -float("inf"): freqs[ii] = -float("inf") elif "*" in temp_freqs[ii + 1]: freqs[ii] = float("inf") else: raise RuntimeError( "ERROR: Encountered an undefined frequency not at the beginning or end of the " "frequency list, which makes no sense! Exiting...") if not self.data.get('completion', []): if "undefined_frequency" not in self.data["errors"]: self.data["errors"] += ["undefined_frequency"] else: if "undefined_frequency" not in self.data["warnings"]: self.data["warnings"]["undefined_frequency"] = True else: freqs[ii] = float(entry) self.data['frequencies'] = freqs intens = np.zeros(len(temp_intens) - temp_intens.count('None')) for ii, entry in enumerate(temp_intens): if entry != 'None': if "*" in entry: intens[ii] = float("inf") else: intens[ii] = float(entry) self.data['IR_intens'] = intens header_pattern = r"\s*Raman Active:\s+[YESNO]+\s+(?:[YESNO]+\s+)*X\s+Y\s+Z\s+(?:X\s+Y\s+Z\s+)*" table_pattern = r"\s*[a-zA-Z][a-zA-Z\s]\s*([\d\-\.]+)\s*([\d\-\.]+)\s*([\d\-\.]+)\s*(?:([\d\-\.]+)\s*" \ r"([\d\-\.]+)\s*([\d\-\.]+)\s*(?:([\d\-\.]+)\s*([\d\-\.]+)\s*([\d\-\.]+))*)*" footer_pattern = r"TransDip\s+\-?[\d\.\*]+\s*\-?[\d\.\*]+\s*\-?[\d\.\*]+\s*(?:\-?[\d\.\*]+\s*\-?" \ r"[\d\.\*]+\s*\-?[\d\.\*]+\s*)*" temp_freq_mode_vecs = read_table_pattern( self.text, header_pattern, table_pattern, footer_pattern) freq_mode_vecs = np.zeros( shape=(len(freqs), len(temp_freq_mode_vecs[0]), 3)) for ii, triple_FMV in enumerate(temp_freq_mode_vecs): for jj, line in enumerate(triple_FMV): for kk, entry in enumerate(line): if entry != 'None': freq_mode_vecs[int(ii * 3 + math.floor(kk / 3)), jj, kk % 3] = float(entry) self.data["frequency_mode_vectors"] = freq_mode_vecs freq_length = len(self.data["frequencies"]) if len(self.data["frequency_mode_vectors"]) != freq_length or len( self.data["IR_intens"]) != freq_length or len(self.data["IR_active"]) != freq_length: self.data["warnings"]["frequency_length_inconsistency"] = True def _read_single_point_data(self): """ Parses final free energy information from single-point calculations. """ temp_dict = read_pattern( self.text, { "final_energy": r"\s*SCF\s+energy in the final basis set\s+=\s*([\d\-\.]+)" }) if temp_dict.get('final_energy') is None: self.data['final_energy'] = None else: # -1 in case of pcm # Two lines will match the above; we want final calculation self.data['final_energy'] = float(temp_dict.get('final_energy')[-1][0]) def _read_force_data(self): self._read_gradients() def _read_pcm_information(self): """ Parses information from PCM solvent calculations. """ temp_dict = read_pattern( self.text, { "g_electrostatic": r"\s*G_electrostatic\s+=\s+([\d\-\.]+)\s+hartree\s+=\s+([\d\-\.]+)\s+kcal/mol\s*", "g_cavitation": r"\s*G_cavitation\s+=\s+([\d\-\.]+)\s+hartree\s+=\s+([\d\-\.]+)\s+kcal/mol\s*", "g_dispersion": r"\s*G_dispersion\s+=\s+([\d\-\.]+)\s+hartree\s+=\s+([\d\-\.]+)\s+kcal/mol\s*", "g_repulsion": r"\s*G_repulsion\s+=\s+([\d\-\.]+)\s+hartree\s+=\s+([\d\-\.]+)\s+kcal/mol\s*", "total_contribution_pcm": r"\s*Total\s+=\s+([\d\-\.]+)\s+hartree\s+=\s+([\d\-\.]+)\s+kcal/mol\s*", "solute_internal_energy": r"Solute Internal Energy \(H0\)\s*=\s*([\d\-\.]+)" } ) for key in temp_dict: if temp_dict.get(key) is None: self.data["solvent_data"][key] = None elif len(temp_dict.get(key)) == 1: self.data["solvent_data"][key] = float(temp_dict.get(key)[0][0]) else: temp_result = np.zeros(len(temp_dict.get(key))) for ii, entry in enumerate(temp_dict.get(key)): temp_result[ii] = float(entry[0]) self.data["solvent_data"][key] = temp_result smd_keys = ["smd0", "smd3", "smd4", "smd6", "smd9"] for key in smd_keys: self.data["solvent_data"][key] = None def _read_smd_information(self): """ Parses information from SMD solvent calculations. """ temp_dict = read_pattern( self.text, { "smd0": r"E-EN\(g\) gas\-phase elect\-nuc energy\s*([\d\-\.]+) a\.u\.", "smd3": r"G\-ENP\(liq\) elect\-nuc\-pol free energy of system\s*([\d\-\.]+) a\.u\.", "smd4": r"G\-CDS\(liq\) cavity\-dispersion\-solvent structure\s*free energy\s*([\d\-\.]+) kcal\/mol", "smd6": r"G\-S\(liq\) free energy of system\s*([\d\-\.]+) a\.u\.", "smd9": r"DeltaG\-S\(liq\) free energy of\s*solvation\s*\(9\) = \(6\) \- \(0\)\s*([\d\-\.]+) kcal\/mol" } ) for key in temp_dict: if temp_dict.get(key) is None: self.data["solvent_data"][key] = None elif len(temp_dict.get(key)) == 1: self.data["solvent_data"][key] = float(temp_dict.get(key)[0][0]) else: temp_result = np.zeros(len(temp_dict.get(key))) for ii, entry in enumerate(temp_dict.get(key)): temp_result[ii] = float(entry[0]) self.data["solvent_data"][key] = temp_result pcm_keys = ["g_electrostatic", "g_cavitation", "g_dispersion", "g_repulsion", "total_contribution_pcm", "solute_internal_energy"] for key in pcm_keys: self.data["solvent_data"][key] = None def _check_completion_errors(self): """ Parses potential errors that can cause jobs to crash """ if read_pattern( self.text, { "key": r"Coordinates do not transform within specified threshold" }, terminate_on_match=True).get('key') == [[]]: self.data["errors"] += ["failed_to_transform_coords"] elif read_pattern( self.text, { "key": r"The Q\-Chem input file has failed to pass inspection" }, terminate_on_match=True).get('key') == [[]]: self.data["errors"] += ["input_file_error"] elif read_pattern( self.text, { "key": r"Error opening input stream" }, terminate_on_match=True).get('key') == [[]]: self.data["errors"] += ["failed_to_read_input"] elif read_pattern( self.text, { "key": r"FileMan error: End of file reached prematurely" }, terminate_on_match=True).get('key') == [[]]: self.data["errors"] += ["premature_end_FileMan_error"] elif read_pattern( self.text, { "key": r"method not available" }, terminate_on_match=True).get('key') == [[]]: self.data["errors"] += ["method_not_available"] elif read_pattern( self.text, { "key": r"Could not find \$molecule section in ParseQInput" }, terminate_on_match=True).get('key') == [[]]: self.data["errors"] += ["read_molecule_error"] elif read_pattern( self.text, { "key": r"Welcome to Q-Chem" }, terminate_on_match=True).get('key') != [[]]: self.data["errors"] += ["never_called_qchem"] elif read_pattern( self.text, { "key": r"\*\*\*ERROR\*\*\* Hessian Appears to have all zero or negative eigenvalues" }, terminate_on_match=True).get('key') == [[]]: self.data["errors"] += ["hessian_eigenvalue_error"] elif read_pattern( self.text, { "key": r"FlexNet Licensing error" }, terminate_on_match=True).get('key') == [[]]: self.data["errors"] += ["licensing_error"] elif read_pattern( self.text, { "key": r"Could not open driver file in ReadDriverFromDisk" }, terminate_on_match=True).get('key') == [[]]: self.data["errors"] += ["driver_error"] else: tmp_failed_line_searches = read_pattern( self.text, { "key": r"\d+\s+failed line searches\.\s+Resetting" }, terminate_on_match=False).get('key') if tmp_failed_line_searches is not None: if len(tmp_failed_line_searches) > 10: self.data["errors"] += ["SCF_failed_to_converge"] if self.data.get("errors") == []: self.data["errors"] += ["unknown_error"]
[docs] def as_dict(self): """ Returns: MSONAble dict. """ d = {} d["data"] = self.data d["text"] = self.text d["filename"] = self.filename return jsanitize(d, strict=True)
[docs]def check_for_structure_changes(mol1: Molecule, mol2: Molecule) -> str: """ Compares connectivity of two molecules (using MoleculeGraph w/ OpenBabelNN). This function will work with two molecules with different atom orderings, but for proper treatment, atoms should be listed in the same order. Possible outputs include: - no_change: the bonding in the two molecules is identical - unconnected_fragments: the MoleculeGraph of mol1 is connected, but the MoleculeGraph is mol2 is not connected - fewer_bonds: the MoleculeGraph of mol1 has more bonds (edges) than the MoleculeGraph of mol2 - more_bonds: the MoleculeGraph of mol2 has more bonds (edges) than the MoleculeGraph of mol1 - bond_change: this case catches any other non-identical MoleculeGraphs Args: mol1: Pymatgen Molecule object to be compared. mol2: Pymatgen Molecule object to be compared. Returns: One of ["unconnected_fragments", "fewer_bonds", "more_bonds", "bond_change", "no_change"] """ special_elements = ["Li", "Na", "Mg", "Ca", "Zn"] mol_list = [copy.deepcopy(mol1), copy.deepcopy(mol2)] if mol1.composition != mol2.composition: raise RuntimeError("Molecules have different compositions! Exiting...") for ii, site in enumerate(mol1): if site.specie.symbol != mol2[ii].specie.symbol: warnings.warn( "Comparing molecules with different atom ordering! " "Turning off special treatment for coordinating metals." ) special_elements = [] special_sites: List[List] = [[], []] for ii, mol in enumerate(mol_list): for jj, site in enumerate(mol): if site.specie.symbol in special_elements: distances = [[kk, site.distance(other_site)] for kk, other_site in enumerate(mol)] special_sites[ii].append([jj, site, distances]) for jj, site in enumerate(mol): if site.specie.symbol in special_elements: mol.__delitem__(jj) # Can add logic to check the distances in the future if desired initial_mol_graph = MoleculeGraph.with_local_env_strategy(mol_list[0], OpenBabelNN()) initial_graph = initial_mol_graph.graph last_mol_graph = MoleculeGraph.with_local_env_strategy(mol_list[1], OpenBabelNN()) last_graph = last_mol_graph.graph if initial_mol_graph.isomorphic_to(last_mol_graph): return "no_change" else: if (nx.is_connected(initial_graph.to_undirected()) and not nx.is_connected(last_graph.to_undirected())): return "unconnected_fragments" elif last_graph.number_of_edges() < initial_graph.number_of_edges(): return "fewer_bonds" elif last_graph.number_of_edges() > initial_graph.number_of_edges(): return "more_bonds" else: return "bond_change"