Source code for

# coding: utf-8
# Copyright (c) Pymatgen Development Team.
# Distributed under the terms of the MIT License.

Wrapper classes for Cif input and output from Structures.

import math
import re
import os
import textwrap
import warnings
from collections import OrderedDict, deque
from io import StringIO
import numpy as np
from functools import partial
from pathlib import Path
from inspect import getfullargspec as getargspec
from itertools import groupby
from pymatgen.core.periodic_table import Element, Specie, get_el_sp, DummySpecie
from import zopen
from pymatgen.util.coord import in_coord_list_pbc, find_in_coord_list_pbc
from monty.string import remove_non_ascii
from pymatgen.core.lattice import Lattice
from pymatgen.core.structure import Structure
from pymatgen.core.composition import Composition
from pymatgen.core.operations import SymmOp
from pymatgen.symmetry.groups import SpaceGroup, SYMM_DATA
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
from pymatgen.electronic_structure.core import Magmom
from pymatgen.core.operations import MagSymmOp
from pymatgen.symmetry.maggroups import MagneticSpaceGroup

__author__ = "Shyue Ping Ong, Will Richards, Matthew Horton"
__copyright__ = "Copyright 2011, The Materials Project"
__version__ = "4.0"
__maintainer__ = "Shyue Ping Ong"
__email__ = ""
__status__ = "Production"
__date__ = "Sep 23, 2011"

sub_spgrp = partial(re.sub, r"[\s_]", "")

space_groups = {sub_spgrp(k): k for k in SYMM_DATA['space_group_encoding'].keys()}  # type: ignore

space_groups.update({sub_spgrp(k): k for k in SYMM_DATA['space_group_encoding'].keys()})  # type: ignore

_COD_DATA = None

def _get_cod_data():
    global _COD_DATA
    if _COD_DATA is None:
        import pymatgen
        with open(os.path.join(pymatgen.symmetry.__path__[0],
                               "symm_ops.json")) \
                as f:
            import json
            _COD_DATA = json.load(f)

    return _COD_DATA

[docs]class CifBlock: """ Object for storing cif data. All data is stored in a single dictionary. Data inside loops are stored in lists in the data dictionary, and information on which keys are grouped together are stored in the loops attribute. """ maxlen = 70 # not quite 80 so we can deal with semicolons and things def __init__(self, data, loops, header): """ Args: data: dict or OrderedDict of data to go into the cif. Values should be convertible to string, or lists of these if the key is in a loop loops: list of lists of keys, grouped by which loop they should appear in header: name of the block (appears after the data_ on the first line) """ self.loops = loops = data # AJ says: CIF Block names cannot be more than 75 characters or you # get an Exception self.header = header[:74] def __eq__(self, other): return self.loops == other.loops \ and == \ and self.header == other.header def __getitem__(self, key): return[key] def __str__(self): """ Returns the cif string for the data block """ s = ["data_{}".format(self.header)] keys = written = [] for k in keys: if k in written: continue for l in self.loops: # search for a corresponding loop if k in l: s.append(self._loop_to_string(l)) written.extend(l) break if k not in written: # k didn't belong to a loop v = self._format_field([k]) if len(k) + len(v) + 3 < self.maxlen: s.append("{} {}".format(k, v)) else: s.extend([k, v]) return "\n".join(s) def _loop_to_string(self, loop): s = "loop_" for l in loop: s += '\n ' + l for fields in zip(*[[k] for k in loop]): line = "\n" for val in map(self._format_field, fields): if val[0] == ";": s += line + "\n" + val line = "\n" elif len(line) + len(val) + 2 < self.maxlen: line += " " + val else: s += line line = '\n ' + val s += line return s def _format_field(self, v): v = v.__str__().strip() if len(v) > self.maxlen: return ';\n' + textwrap.fill(v, self.maxlen) + '\n;' # add quotes if necessary if v == '': return '""' if (" " in v or v[0] == "_") \ and not (v[0] == "'" and v[-1] == "'") \ and not (v[0] == '"' and v[-1] == '"'): if "'" in v: q = '"' else: q = "'" v = q + v + q return v @classmethod def _process_string(cls, string): # remove comments string = re.sub(r"(\s|^)#.*$", "", string, flags=re.MULTILINE) # remove empty lines string = re.sub(r"^\s*\n", "", string, flags=re.MULTILINE) # remove non_ascii string = remove_non_ascii(string) # since line breaks in .cif files are mostly meaningless, # break up into a stream of tokens to parse, rejoining multiline # strings (between semicolons) q = deque() multiline = False ml = [] # this regex splits on spaces, except when in quotes. # starting quotes must not be preceded by non-whitespace # (these get eaten by the first expression) # ending quotes must not be followed by non-whitespace p = re.compile(r'''([^'"\s][\S]*)|'(.*?)'(?!\S)|"(.*?)"(?!\S)''') for l in string.splitlines(): if multiline: if l.startswith(";"): multiline = False q.append(('', '', '', ' '.join(ml))) ml = [] l = l[1:].strip() else: ml.append(l) continue if l.startswith(";"): multiline = True ml.append(l[1:].strip()) else: for s in p.findall(l): # s is tuple. location of the data in the tuple # depends on whether it was quoted in the input q.append(s) return q
[docs] @classmethod def from_string(cls, string): """ Reads CifBlock from string. :param string: String representation. :return: CifBlock """ q = cls._process_string(string) header = q.popleft()[0][5:] data = OrderedDict() loops = [] while q: s = q.popleft() # cif keys aren't in quotes, so show up in s[0] if s[0] == "_eof": break if s[0].startswith("_"): try: data[s[0]] = "".join(q.popleft()) except IndexError: data[s[0]] = "" elif s[0].startswith("loop_"): columns = [] items = [] while q: s = q[0] if s[0].startswith("loop_") or not s[0].startswith("_"): break columns.append("".join(q.popleft())) data[columns[-1]] = [] while q: s = q[0] if s[0].startswith("loop_") or s[0].startswith("_"): break items.append("".join(q.popleft())) n = len(items) // len(columns) assert len(items) % n == 0 loops.append(columns) for k, v in zip(columns * n, items): data[k].append(v.strip()) elif "".join(s).strip() != "": warnings.warn("Possible issue in cif file" " at line: {}".format("".join(s).strip())) return cls(data, loops, header)
[docs]class CifFile: """ Reads and parses CifBlocks from a .cif file or string """ def __init__(self, data, orig_string=None, comment=None): """ Args: data (OrderedDict): Of CifBlock objects.å orig_string (str): The original cif string. comment (str): Comment string. """ = data self.orig_string = orig_string self.comment = comment or "# generated using pymatgen" def __str__(self): s = ["%s" % v for v in] return self.comment + "\n" + "\n".join(s) + "\n"
[docs] @classmethod def from_string(cls, string): """ Reads CifFile from a string. :param string: String representation. :return: CifFile """ d = OrderedDict() for x in re.split(r"^\s*data_", "x\n" + string, flags=re.MULTILINE | re.DOTALL)[1:]: # Skip over Cif block that contains powder diffraction data. # Some elements in this block were missing from CIF files in # Springer materials/Pauling file DBs. # This block anyway does not contain any structure information, and # CifParser was also not parsing it. if 'powder_pattern' in re.split(r"\n", x, 1)[0]: continue c = CifBlock.from_string("data_" + x) d[c.header] = c return cls(d, string)
[docs] @classmethod def from_file(cls, filename): """ Reads CifFile from a filename. :param filename: Filename :return: CifFile """ with zopen(str(filename), "rt", errors="replace") as f: return cls.from_string(
[docs]class CifParser: """ Parses a CIF file. Attempts to fix CIFs that are out-of-spec, but will issue warnings if corrections applied. These are also stored in the CifParser's errors attribute. """ def __init__(self, filename, occupancy_tolerance=1., site_tolerance=1e-4): """ Args: filename (str): CIF filename, bzipped or gzipped CIF files are fine too. occupancy_tolerance (float): If total occupancy of a site is between 1 and occupancy_tolerance, the occupancies will be scaled down to 1. site_tolerance (float): This tolerance is used to determine if two sites are sitting in the same position, in which case they will be combined to a single disordered site. Defaults to 1e-4. """ self._occupancy_tolerance = occupancy_tolerance self._site_tolerance = site_tolerance if isinstance(filename, (str, Path)): self._cif = CifFile.from_file(filename) else: self._cif = CifFile.from_string( # store if CIF contains features from non-core CIF dictionaries # e.g. magCIF self.feature_flags = {} self.warnings = [] def is_magcif(): """ Checks to see if file appears to be a magCIF file (heuristic). """ # Doesn't seem to be a canonical way to test if file is magCIF or # not, so instead check for magnetic symmetry datanames prefixes = ['_space_group_magn', '_atom_site_moment', '_space_group_symop_magn'] for d in for k in for prefix in prefixes: if prefix in k: return True return False self.feature_flags['magcif'] = is_magcif() def is_magcif_incommensurate(): """ Checks to see if file contains an incommensurate magnetic structure (heuristic). """ # Doesn't seem to be a canonical way to test if magCIF file # describes incommensurate strucure or not, so instead check # for common datanames if not self.feature_flags["magcif"]: return False prefixes = ['_cell_modulation_dimension', '_cell_wave_vector'] for d in for k in for prefix in prefixes: if prefix in k: return True return False self.feature_flags['magcif_incommensurate'] = is_magcif_incommensurate() for k in # pass individual CifBlocks to _sanitize_data[k] = self._sanitize_data([k])
[docs] @staticmethod def from_string(cif_string, occupancy_tolerance=1.): """ Creates a CifParser from a string. Args: cif_string (str): String representation of a CIF. occupancy_tolerance (float): If total occupancy of a site is between 1 and occupancy_tolerance, the occupancies will be scaled down to 1. Returns: CifParser """ stream = StringIO(cif_string) return CifParser(stream, occupancy_tolerance)
def _sanitize_data(self, data): """ Some CIF files do not conform to spec. This function corrects known issues, particular in regards to Springer materials/ Pauling files. This function is here so that CifParser can assume its input conforms to spec, simplifying its implementation. :param data: CifBlock :return: data CifBlock """ """ This part of the code deals with handling formats of data as found in CIF files extracted from the Springer Materials/Pauling File databases, and that are different from standard ICSD formats. """ # check for implicit hydrogens, warn if any present if "_atom_site_attached_hydrogens" in attached_hydrogens = [str2float(x) for x in['_atom_site_attached_hydrogens'] if str2float(x) != 0] if len(attached_hydrogens) > 0: self.warnings.append("Structure has implicit hydrogens defined, " "parsed structure unlikely to be suitable for use " "in calculations unless hydrogens added.") # Check to see if "_atom_site_type_symbol" exists, as some test CIFs do # not contain this key. if "_atom_site_type_symbol" in # Keep a track of which data row needs to be removed. # Example of a row: Nb,Zr '0.8Nb + 0.2Zr' .2a .m-3m 0 0 0 1 14 # 'rhombic dodecahedron, Nb<sub>14</sub>' # Without this code, the above row in a structure would be parsed # as an ordered site with only Nb (since # CifParser would try to parse the first two characters of the # label "Nb,Zr") and occupancy=1. # However, this site is meant to be a disordered site with 0.8 of # Nb and 0.2 of Zr. idxs_to_remove = [] new_atom_site_label = [] new_atom_site_type_symbol = [] new_atom_site_occupancy = [] new_fract_x = [] new_fract_y = [] new_fract_z = [] for idx, el_row in enumerate(data["_atom_site_label"]): # CIF files from the Springer Materials/Pauling File have # switched the label and symbol. Thus, in the # above shown example row, '0.8Nb + 0.2Zr' is the symbol. # Below, we split the strings on ' + ' to # check if the length (or number of elements) in the label and # symbol are equal. if len(data["_atom_site_type_symbol"][idx].split(' + ')) > \ len(data["_atom_site_label"][idx].split(' + ')): # Dictionary to hold extracted elements and occupancies els_occu = {} # parse symbol to get element names and occupancy and store # in "els_occu" symbol_str = data["_atom_site_type_symbol"][idx] symbol_str_lst = symbol_str.split(' + ') for elocc_idx in range(len(symbol_str_lst)): # Remove any bracketed items in the string symbol_str_lst[elocc_idx] = re.sub( r'\([0-9]*\)', '', symbol_str_lst[elocc_idx].strip()) # Extract element name and its occupancy from the # string, and store it as a # key-value pair in "els_occ". els_occu[str(re.findall(r'\D+', symbol_str_lst[ elocc_idx].strip())[1]).replace('<sup>', '')] = \ float('0' + re.findall(r'\.?\d+', symbol_str_lst[ elocc_idx].strip())[1]) x = str2float(data["_atom_site_fract_x"][idx]) y = str2float(data["_atom_site_fract_y"][idx]) z = str2float(data["_atom_site_fract_z"][idx]) for et, occu in els_occu.items(): # new atom site labels have 'fix' appended new_atom_site_label.append( et + '_fix' + str(len(new_atom_site_label))) new_atom_site_type_symbol.append(et) new_atom_site_occupancy.append(str(occu)) new_fract_x.append(str(x)) new_fract_y.append(str(y)) new_fract_z.append(str(z)) idxs_to_remove.append(idx) # Remove the original row by iterating over all keys in the CIF # data looking for lists, which indicates # multiple data items, one for each row, and remove items from the # list that corresponds to the removed row, # so that it's not processed by the rest of this function (which # would result in an error). for original_key in if isinstance([original_key], list): for id in sorted(idxs_to_remove, reverse=True): del[original_key][id] if len(idxs_to_remove) > 0: self.warnings.append("Pauling file corrections applied.")["_atom_site_label"] += new_atom_site_label["_atom_site_type_symbol"] += new_atom_site_type_symbol["_atom_site_occupancy"] += new_atom_site_occupancy["_atom_site_fract_x"] += new_fract_x["_atom_site_fract_y"] += new_fract_y["_atom_site_fract_z"] += new_fract_z """ This fixes inconsistencies in naming of several magCIF tags as a result of magCIF being in widespread use prior to specification being finalized (on advice of Branton Campbell). """ if self.feature_flags["magcif"]: # CIF-1 style has all underscores, interim standard # had period before magn instead of before the final # component (e.g. xyz) # we want to standardize on a specific key, to simplify # parsing code correct_keys = ["", "", "_space_group_magn.name_BNS", "_space_group_magn.number_BNS", "_atom_site_moment_crystalaxis_x", "_atom_site_moment_crystalaxis_y", "_atom_site_moment_crystalaxis_z", "_atom_site_moment_label"] # cannot mutate OrderedDict during enumeration, # so store changes we want to make changes_to_make = {} for original_key in for correct_key in correct_keys: # convert to all underscore trial_key = "_".join(correct_key.split(".")) test_key = "_".join(original_key.split(".")) if trial_key == test_key: changes_to_make[correct_key] = original_key # make changes for correct_key, original_key in changes_to_make.items():[correct_key] =[original_key] # renamed_keys maps interim_keys to final_keys renamed_keys = { "_magnetic_space_group.transform_to_standard_Pp_abc": "_space_group_magn.transform_BNS_Pp_abc"} changes_to_make = {} for interim_key, final_key in renamed_keys.items(): if changes_to_make[final_key] = interim_key if len(changes_to_make) > 0: self.warnings.append("Keys changed to match new magCIF specification.") for final_key, interim_key in changes_to_make.items():[final_key] =[interim_key] # check for finite precision frac co-ordinates (e.g. 0.6667 instead of 0.6666666...7) # this can sometimes cause serious issues when applying symmetry operations important_fracs = (1 / 3., 2 / 3.) fracs_to_change = {} for label in ('_atom_site_fract_x', '_atom_site_fract_y', '_atom_site_fract_z'): if label in for idx, frac in enumerate([label]): try: frac = str2float(frac) except Exception: # co-ordinate might not be defined e.g. '?' continue for comparison_frac in important_fracs: if abs(1 - frac / comparison_frac) < 1e-4: fracs_to_change[(label, idx)] = str(comparison_frac) if fracs_to_change: self.warnings.append("Some fractional co-ordinates rounded to ideal values to " "avoid issues with finite precision.") for (label, idx), val in fracs_to_change.items():[label][idx] = val return data def _unique_coords(self, coords_in, magmoms_in=None, lattice=None): """ Generate unique coordinates using coord and symmetry positions and also their corresponding magnetic moments, if supplied. """ coords = [] if magmoms_in: magmoms = [] if len(magmoms_in) != len(coords_in): raise ValueError for tmp_coord, tmp_magmom in zip(coords_in, magmoms_in): for op in self.symmetry_operations: coord = op.operate(tmp_coord) coord = np.array([i - math.floor(i) for i in coord]) if isinstance(op, MagSymmOp): # Up to this point, magmoms have been defined relative # to crystal axis. Now convert to Cartesian and into # a Magmom object. magmom = Magmom.from_moment_relative_to_crystal_axes( op.operate_magmom(tmp_magmom), lattice=lattice ) else: magmom = Magmom(tmp_magmom) if not in_coord_list_pbc(coords, coord, atol=self._site_tolerance): coords.append(coord) magmoms.append(magmom) return coords, magmoms else: for tmp_coord in coords_in: for op in self.symmetry_operations: coord = op.operate(tmp_coord) coord = np.array([i - math.floor(i) for i in coord]) if not in_coord_list_pbc(coords, coord, atol=self._site_tolerance): coords.append(coord) return coords, [Magmom(0)] * len(coords) # return dummy magmoms
[docs] def get_lattice(self, data, length_strings=("a", "b", "c"), angle_strings=("alpha", "beta", "gamma"), lattice_type=None): """ Generate the lattice from the provided lattice parameters. In the absence of all six lattice parameters, the crystal system and necessary parameters are parsed """ try: lengths = [str2float(data["_cell_length_" + i]) for i in length_strings] angles = [str2float(data["_cell_angle_" + i]) for i in angle_strings] if not lattice_type: return Lattice.from_parameters(*lengths, *angles) else: return getattr(Lattice, lattice_type)(*(lengths + angles)) except KeyError: # Missing Key search for cell setting for lattice_lable in ["_symmetry_cell_setting", "_space_group_crystal_system"]: if lattice_type = try: required_args = getargspec( getattr(Lattice, lattice_type)).args lengths = (l for l in length_strings if l in required_args) angles = (a for a in angle_strings if a in required_args) return self.get_lattice(data, lengths, angles, lattice_type=lattice_type) except AttributeError as exc: self.warnings.append(str(exc)) warnings.warn(exc) else: return None
[docs] def get_symops(self, data): """ In order to generate symmetry equivalent positions, the symmetry operations are parsed. If the symops are not present, the space group symbol is parsed, and symops are generated. """ symops = [] for symmetry_label in ["_symmetry_equiv_pos_as_xyz", "_symmetry_equiv_pos_as_xyz_", "_space_group_symop_operation_xyz", "_space_group_symop_operation_xyz_"]: if xyz = if isinstance(xyz, str): msg = "A 1-line symmetry op P1 CIF is detected!" warnings.warn(msg) self.warnings.append(msg) xyz = [xyz] try: symops = [SymmOp.from_xyz_string(s) for s in xyz] break except ValueError: continue if not symops: # Try to parse symbol for symmetry_label in ["_symmetry_space_group_name_H-M", "_symmetry_space_group_name_H_M", "_symmetry_space_group_name_H-M_", "_symmetry_space_group_name_H_M_", "_space_group_name_Hall", "_space_group_name_Hall_", "_space_group_name_H-M_alt", "_space_group_name_H-M_alt_", "_symmetry_space_group_name_hall", "_symmetry_space_group_name_hall_", "_symmetry_space_group_name_h-m", "_symmetry_space_group_name_h-m_"]: sg = if sg: sg = sub_spgrp(sg) try: spg = space_groups.get(sg) if spg: symops = SpaceGroup(spg).symmetry_ops msg = "No _symmetry_equiv_pos_as_xyz type key found. " \ "Spacegroup from %s used." % symmetry_label warnings.warn(msg) self.warnings.append(msg) break except ValueError: # Ignore any errors pass try: for d in _get_cod_data(): if sg == re.sub(r"\s+", "", d["hermann_mauguin"]): xyz = d["symops"] symops = [SymmOp.from_xyz_string(s) for s in xyz] msg = "No _symmetry_equiv_pos_as_xyz type key found. " \ "Spacegroup from %s used." % symmetry_label warnings.warn(msg) self.warnings.append(msg) break except Exception: continue if symops: break if not symops: # Try to parse International number for symmetry_label in ["_space_group_IT_number", "_space_group_IT_number_", "_symmetry_Int_Tables_number", "_symmetry_Int_Tables_number_"]: if try: i = int(str2float( symops = SpaceGroup.from_int_number(i).symmetry_ops break except ValueError: continue if not symops: msg = "No _symmetry_equiv_pos_as_xyz type key found. " \ "Defaulting to P1." warnings.warn(msg) self.warnings.append(msg) symops = [SymmOp.from_xyz_string(s) for s in ['x', 'y', 'z']] return symops
[docs] def get_magsymops(self, data): """ Equivalent to get_symops except for magnetic symmetry groups. Separate function since additional operation for time reversal symmetry (which changes magnetic moments on sites) needs to be returned. """ magsymmops = [] # check to see if magCIF file explicitly contains magnetic symmetry operations if""): xyzt ="") if isinstance(xyzt, str): xyzt = [xyzt] magsymmops = [MagSymmOp.from_xyzt_string(s) for s in xyzt] if""): xyzt ="") if isinstance(xyzt, str): xyzt = [xyzt] centering_symops = [MagSymmOp.from_xyzt_string(s) for s in xyzt] all_ops = [] for op in magsymmops: for centering_op in centering_symops: new_translation = [i - np.floor(i) for i in op.translation_vector + centering_op.translation_vector] new_time_reversal = op.time_reversal * centering_op.time_reversal all_ops.append( MagSymmOp.from_rotation_and_translation_and_time_reversal( rotation_matrix=op.rotation_matrix, translation_vec=new_translation, time_reversal=new_time_reversal)) magsymmops = all_ops # else check to see if it specifies a magnetic space group elif"_space_group_magn.name_BNS") or "_space_group_magn.number_BNS"): if"_space_group_magn.name_BNS"): # get BNS label for MagneticSpaceGroup() id ="_space_group_magn.name_BNS") else: # get BNS number for MagneticSpaceGroup() # by converting string to list of ints id = list(map(int, ("_space_group_magn.number_BNS").split(".")))) if"_space_group_magn.transform_BNS_Pp_abc"): if "_space_group_magn.transform_BNS_Pp_abc") != "a,b,c;0,0,0": jf ="_space_group_magn.transform_BNS_Pp_abc") msg = MagneticSpaceGroup(id, jf) elif"_space_group_magn.transform_BNS_Pp"): return NotImplementedError( "Incomplete specification to implement.") else: msg = MagneticSpaceGroup(id) magsymmops = msg.symmetry_ops if not magsymmops: msg = "No magnetic symmetry detected, using primitive symmetry." warnings.warn(msg) self.warnings.append(msg) magsymmops = [MagSymmOp.from_xyzt_string("x, y, z, 1")] return magsymmops
[docs] def parse_oxi_states(self, data): """ Parse oxidation states from data dictionary """ try: oxi_states = { data["_atom_type_symbol"][i]: str2float(data["_atom_type_oxidation_number"][i]) for i in range(len(data["_atom_type_symbol"]))} # attempt to strip oxidation state from _atom_type_symbol # in case the label does not contain an oxidation state for i, symbol in enumerate(data["_atom_type_symbol"]): oxi_states[re.sub(r"\d?[\+,\-]?$", "", symbol)] = \ str2float(data["_atom_type_oxidation_number"][i]) except (ValueError, KeyError): oxi_states = None return oxi_states
[docs] def parse_magmoms(self, data, lattice=None): """ Parse atomic magnetic moments from data dictionary """ if lattice is None: raise Exception( 'Magmoms given in terms of crystal axes in magCIF spec.') try: magmoms = { data["_atom_site_moment_label"][i]: np.array( [str2float(data["_atom_site_moment_crystalaxis_x"][i]), str2float(data["_atom_site_moment_crystalaxis_y"][i]), str2float(data["_atom_site_moment_crystalaxis_z"][i])] ) for i in range(len(data["_atom_site_moment_label"])) } except (ValueError, KeyError): return None return magmoms
def _parse_symbol(self, sym): """ Parse a string with a symbol to extract a string representing an element. Args: sym (str): A symbol to be parsed. Returns: A string with the parsed symbol. None if no parsing was possible. """ # Common representations for elements/water in cif files # TODO: fix inconsistent handling of water special = {"Hw": "H", "Ow": "O", "Wat": "O", "wat": "O", "OH": "", "OH2": "", "NO3": "N"} parsed_sym = None # try with special symbols, otherwise check the first two letters, # then the first letter alone. If everything fails try extracting the # first letters. m_sp = re.match("|".join(special.keys()), sym) if m_sp: parsed_sym = special[] elif Element.is_valid_symbol(sym[:2].title()): parsed_sym = sym[:2].title() elif Element.is_valid_symbol(sym[0].upper()): parsed_sym = sym[0].upper() else: m = re.match(r"w?[A-Z][a-z]*", sym) if m: parsed_sym = if parsed_sym is not None and (m_sp or not re.match(r"{}\d*".format(parsed_sym), sym)): msg = "{} parsed as {}".format(sym, parsed_sym) warnings.warn(msg) self.warnings.append(msg) return parsed_sym def _get_structure(self, data, primitive): """ Generate structure from part of the cif. """ def get_num_implicit_hydrogens(sym): num_h = {"Wat": 2, "wat": 2, "O-H": 1} return num_h.get(sym[:3], 0) lattice = self.get_lattice(data) # if magCIF, get magnetic symmetry moments and magmoms # else standard CIF, and use empty magmom dict if self.feature_flags["magcif_incommensurate"]: raise NotImplementedError( "Incommensurate structures not currently supported.") elif self.feature_flags["magcif"]: self.symmetry_operations = self.get_magsymops(data) magmoms = self.parse_magmoms(data, lattice=lattice) else: self.symmetry_operations = self.get_symops(data) magmoms = {} oxi_states = self.parse_oxi_states(data) coord_to_species = OrderedDict() coord_to_magmoms = OrderedDict() def get_matching_coord(coord): keys = list(coord_to_species.keys()) coords = np.array(keys) for op in self.symmetry_operations: c = op.operate(coord) inds = find_in_coord_list_pbc(coords, c, atol=self._site_tolerance) # cant use if inds, because python is dumb and np.array([0]) evaluates # to False if len(inds): return keys[inds[0]] return False for i in range(len(data["_atom_site_label"])): try: # If site type symbol exists, use it. Otherwise, we use the # label. symbol = self._parse_symbol(data["_atom_site_type_symbol"][i]) num_h = get_num_implicit_hydrogens( data["_atom_site_type_symbol"][i]) except KeyError: symbol = self._parse_symbol(data["_atom_site_label"][i]) num_h = get_num_implicit_hydrogens(data["_atom_site_label"][i]) if not symbol: continue if oxi_states is not None: o_s = oxi_states.get(symbol, 0) # use _atom_site_type_symbol if possible for oxidation state if "_atom_site_type_symbol" in oxi_symbol = data["_atom_site_type_symbol"][i] o_s = oxi_states.get(oxi_symbol, o_s) try: el = Specie(symbol, o_s) except Exception: el = DummySpecie(symbol, o_s) else: el = get_el_sp(symbol) x = str2float(data["_atom_site_fract_x"][i]) y = str2float(data["_atom_site_fract_y"][i]) z = str2float(data["_atom_site_fract_z"][i]) magmom = magmoms.get(data["_atom_site_label"][i], np.array([0, 0, 0])) try: occu = str2float(data["_atom_site_occupancy"][i]) except (KeyError, ValueError): occu = 1 if occu > 0: coord = (x, y, z) match = get_matching_coord(coord) comp_d = {el: occu} if num_h > 0: comp_d["H"] = num_h self.warnings.append("Structure has implicit hydrogens defined, " "parsed structure unlikely to be suitable for use " "in calculations unless hydrogens added.") comp = Composition(comp_d) if not match: coord_to_species[coord] = comp coord_to_magmoms[coord] = magmom else: coord_to_species[match] += comp # disordered magnetic not currently supported coord_to_magmoms[match] = None sum_occu = [sum(c.values()) for c in coord_to_species.values() if not set(c.elements) == {Element("O"), Element("H")}] if any([o > 1 for o in sum_occu]): msg = "Some occupancies ({}) sum to > 1! If they are within " \ "the occupancy_tolerance, they will be rescaled. " \ "The current occupancy_tolerance is set to: {}".format(sum_occu, self._occupancy_tolerance) warnings.warn(msg) self.warnings.append(msg) allspecies = [] allcoords = [] allmagmoms = [] allhydrogens = [] # check to see if magCIF file is disordered if self.feature_flags["magcif"]: for k, v in coord_to_magmoms.items(): if v is None: # Proposed solution to this is to instead store magnetic # moments as Specie 'spin' property, instead of site # property, but this introduces ambiguities for end user # (such as unintended use of `spin` and Specie will have # fictious oxidation state). raise NotImplementedError( 'Disordered magnetic structures not currently supported.') if coord_to_species.items(): for comp, group in groupby( sorted(list(coord_to_species.items()), key=lambda x: x[1]), key=lambda x: x[1]): tmp_coords = [site[0] for site in group] tmp_magmom = [coord_to_magmoms[tmp_coord] for tmp_coord in tmp_coords] if self.feature_flags["magcif"]: coords, magmoms = self._unique_coords(tmp_coords, magmoms_in=tmp_magmom, lattice=lattice) else: coords, magmoms = self._unique_coords(tmp_coords) if set(comp.elements) == {Element("O"), Element("H")}: # O with implicit hydrogens im_h = comp["H"] species = Composition({"O": comp["O"]}) else: im_h = 0 species = comp allhydrogens.extend(len(coords) * [im_h]) allcoords.extend(coords) allspecies.extend(len(coords) * [species]) allmagmoms.extend(magmoms) # rescale occupancies if necessary for i, species in enumerate(allspecies): totaloccu = sum(species.values()) if 1 < totaloccu <= self._occupancy_tolerance: allspecies[i] = species / totaloccu if allspecies and len(allspecies) == len(allcoords) \ and len(allspecies) == len(allmagmoms): site_properties = dict() if any(allhydrogens): assert len(allhydrogens) == len(allcoords) site_properties["implicit_hydrogens"] = allhydrogens if self.feature_flags["magcif"]: site_properties["magmom"] = allmagmoms if len(site_properties) == 0: site_properties = None struct = Structure(lattice, allspecies, allcoords, site_properties=site_properties) struct = struct.get_sorted_structure() if primitive and self.feature_flags['magcif']: struct = struct.get_primitive_structure(use_site_props=True) elif primitive: struct = struct.get_primitive_structure() struct = struct.get_reduced_structure() return struct
[docs] def get_structures(self, primitive=True): """ Return list of structures in CIF file. primitive boolean sets whether a conventional cell structure or primitive cell structure is returned. Args: primitive (bool): Set to False to return conventional unit cells. Defaults to True. With magnetic CIF files, will return primitive magnetic cell which may be larger than nuclear primitive cell. Returns: List of Structures. """ structures = [] for i, d in enumerate( try: s = self._get_structure(d, primitive) if s: structures.append(s) except (KeyError, ValueError) as exc: # Warn the user (Errors should never pass silently) # A user reported a problem with cif files produced by Avogadro # in which the atomic coordinates are in Cartesian coords. self.warnings.append(str(exc)) warnings.warn("No structure parsed for %d structure in CIF. Section of CIF file below." % (i + 1)) warnings.warn(str(d)) warnings.warn("Error is %s." % str(exc)) if self.warnings: warnings.warn("Issues encountered while parsing CIF: %s" % "\n".join(self.warnings)) if len(structures) == 0: raise ValueError("Invalid cif file with no structures!") return structures
[docs] def get_bibtex_string(self): """ Get BibTeX reference from CIF file. :param data: :return: BibTeX string """ try: from pybtex.database import BibliographyData, Entry except ImportError: raise RuntimeError("Bibliographic data extraction requires pybtex.") bibtex_keys = {'author': ('_publ_author_name', '_citation_author_name'), 'title': ('_publ_section_title', '_citation_title'), 'journal': ('_journal_name_full', '_journal_name_abbrev', '_citation_journal_full', '_citation_journal_abbrev'), 'volume': ('_journal_volume', '_citation_journal_volume'), 'year': ('_journal_year', '_citation_year'), 'number': ('_journal_number', '_citation_number'), 'page_first': ('_journal_page_first', '_citation_page_first'), 'page_last': ('_journal_page_last', '_citation_page_last'), 'doi': ('_journal_DOI', '_citation_DOI')} entries = {} # TODO: parse '_publ_section_references' when it exists? # TODO: CIF specification supports multiple citations. for idx, data in enumerate( # convert to lower-case keys, some cif files inconsistent data = {k.lower(): v for k, v in} bibtex_entry = {} for field, tags in bibtex_keys.items(): for tag in tags: if tag in data: if isinstance(data[tag], list): bibtex_entry[field] = data[tag][0] else: bibtex_entry[field] = data[tag] # convert to bibtex author format ('and' delimited) if 'author' in bibtex_entry: # separate out semicolon authors if isinstance(bibtex_entry["author"], str): if ";" in bibtex_entry["author"]: bibtex_entry["author"] = bibtex_entry["author"].split(";") if isinstance(bibtex_entry['author'], list): bibtex_entry['author'] = ' and '.join(bibtex_entry['author']) # convert to bibtex page range format, use empty string if not specified if ('page_first' in bibtex_entry) or ('page_last' in bibtex_entry): bibtex_entry['pages'] = '{0}--{1}'.format(bibtex_entry.get('page_first', ''), bibtex_entry.get('page_last', '')) bibtex_entry.pop('page_first', None) # and remove page_first, page_list if present bibtex_entry.pop('page_last', None) # cite keys are given as cif-reference-idx in order they are found entries['cifref{}'.format(idx)] = Entry('article', list(bibtex_entry.items())) return BibliographyData(entries).to_string(bib_format='bibtex')
[docs] def as_dict(self): """ :return: MSONable dict """ d = OrderedDict() for k, v in d[k] = {} for k2, v2 in d[k][k2] = v2 return d
@property def has_errors(self): """ :return: Whether there are errors/warnings detected in CIF parsing. """ return len(self.warnings) > 0
[docs]class CifWriter: """ A wrapper around CifFile to write CIF files from pymatgen structures. """ def __init__(self, struct, symprec=None, write_magmoms=False, significant_figures=8, angle_tolerance=5.0): """ Args: struct (Structure): structure to write symprec (float): If not none, finds the symmetry of the structure and writes the cif with symmetry information. Passes symprec to the SpacegroupAnalyzer. write_magmoms (bool): If True, will write magCIF file. Incompatible with symprec significant_figures (int): Specifies precision for formatting of floats. Defaults to 8. angle_tolerance (float): Angle tolerance for symmetry finding. Passes angle_tolerance to the SpacegroupAnalyzer. Used only if symprec is not None. """ if write_magmoms and symprec: warnings.warn( "Magnetic symmetry cannot currently be detected by pymatgen," "disabling symmetry detection.") symprec = None format_str = "{:.%df}" % significant_figures block = OrderedDict() loops = [] spacegroup = ("P 1", 1) if symprec is not None: sf = SpacegroupAnalyzer(struct, symprec, angle_tolerance=angle_tolerance) spacegroup = (sf.get_space_group_symbol(), sf.get_space_group_number()) # Needs the refined struture when using symprec. This converts # primitive to conventional structures, the standard for CIF. struct = sf.get_refined_structure() latt = struct.lattice comp = struct.composition no_oxi_comp = comp.element_composition block["_symmetry_space_group_name_H-M"] = spacegroup[0] for cell_attr in ['a', 'b', 'c']: block["_cell_length_" + cell_attr] = format_str.format( getattr(latt, cell_attr)) for cell_attr in ['alpha', 'beta', 'gamma']: block["_cell_angle_" + cell_attr] = format_str.format( getattr(latt, cell_attr)) block["_symmetry_Int_Tables_number"] = spacegroup[1] block["_chemical_formula_structural"] = no_oxi_comp.reduced_formula block["_chemical_formula_sum"] = no_oxi_comp.formula block["_cell_volume"] = format_str.format(latt.volume) reduced_comp, fu = no_oxi_comp.get_reduced_composition_and_factor() block["_cell_formula_units_Z"] = str(int(fu)) if symprec is None: block["_symmetry_equiv_pos_site_id"] = ["1"] block["_symmetry_equiv_pos_as_xyz"] = ["x, y, z"] else: sf = SpacegroupAnalyzer(struct, symprec) symmops = [] for op in sf.get_symmetry_operations(): v = op.translation_vector symmops.append(SymmOp.from_rotation_and_translation( op.rotation_matrix, v)) ops = [op.as_xyz_string() for op in symmops] block["_symmetry_equiv_pos_site_id"] = \ ["%d" % i for i in range(1, len(ops) + 1)] block["_symmetry_equiv_pos_as_xyz"] = ops loops.append(["_symmetry_equiv_pos_site_id", "_symmetry_equiv_pos_as_xyz"]) try: symbol_to_oxinum = OrderedDict([ (el.__str__(), float(el.oxi_state)) for el in sorted(comp.elements)]) block["_atom_type_symbol"] = symbol_to_oxinum.keys() block["_atom_type_oxidation_number"] = symbol_to_oxinum.values() loops.append(["_atom_type_symbol", "_atom_type_oxidation_number"]) except (TypeError, AttributeError): symbol_to_oxinum = OrderedDict([(el.symbol, 0) for el in sorted(comp.elements)]) atom_site_type_symbol = [] atom_site_symmetry_multiplicity = [] atom_site_fract_x = [] atom_site_fract_y = [] atom_site_fract_z = [] atom_site_label = [] atom_site_occupancy = [] atom_site_moment_label = [] atom_site_moment_crystalaxis_x = [] atom_site_moment_crystalaxis_y = [] atom_site_moment_crystalaxis_z = [] count = 0 if symprec is None: for site in struct: for sp, occu in sorted(site.species.items()): atom_site_type_symbol.append(sp.__str__()) atom_site_symmetry_multiplicity.append("1") atom_site_fract_x.append(format_str.format(site.a)) atom_site_fract_y.append(format_str.format(site.b)) atom_site_fract_z.append(format_str.format(site.c)) atom_site_label.append("{}{}".format(sp.symbol, count)) atom_site_occupancy.append(occu.__str__()) magmom = Magmom('magmom', getattr(sp, 'spin', 0))) if write_magmoms and abs(magmom) > 0: moment = Magmom.get_moment_relative_to_crystal_axes( magmom, latt) atom_site_moment_label.append( "{}{}".format(sp.symbol, count)) atom_site_moment_crystalaxis_x.append(format_str.format(moment[0])) atom_site_moment_crystalaxis_y.append(format_str.format(moment[1])) atom_site_moment_crystalaxis_z.append(format_str.format(moment[2])) count += 1 else: # The following just presents a deterministic ordering. unique_sites = [ (sorted(sites, key=lambda s: tuple([abs(x) for x in s.frac_coords]))[0], len(sites)) for sites in sf.get_symmetrized_structure().equivalent_sites ] for site, mult in sorted( unique_sites, key=lambda t: (t[0].species.average_electroneg, -t[1], t[0].a, t[0].b, t[0].c)): for sp, occu in site.species.items(): atom_site_type_symbol.append(sp.__str__()) atom_site_symmetry_multiplicity.append("%d" % mult) atom_site_fract_x.append(format_str.format(site.a)) atom_site_fract_y.append(format_str.format(site.b)) atom_site_fract_z.append(format_str.format(site.c)) atom_site_label.append("{}{}".format(sp.symbol, count)) atom_site_occupancy.append(occu.__str__()) count += 1 block["_atom_site_type_symbol"] = atom_site_type_symbol block["_atom_site_label"] = atom_site_label block["_atom_site_symmetry_multiplicity"] = \ atom_site_symmetry_multiplicity block["_atom_site_fract_x"] = atom_site_fract_x block["_atom_site_fract_y"] = atom_site_fract_y block["_atom_site_fract_z"] = atom_site_fract_z block["_atom_site_occupancy"] = atom_site_occupancy loops.append(["_atom_site_type_symbol", "_atom_site_label", "_atom_site_symmetry_multiplicity", "_atom_site_fract_x", "_atom_site_fract_y", "_atom_site_fract_z", "_atom_site_occupancy"]) if write_magmoms: block["_atom_site_moment_label"] = atom_site_moment_label block[ "_atom_site_moment_crystalaxis_x"] = atom_site_moment_crystalaxis_x block[ "_atom_site_moment_crystalaxis_y"] = atom_site_moment_crystalaxis_y block[ "_atom_site_moment_crystalaxis_z"] = atom_site_moment_crystalaxis_z loops.append(["_atom_site_moment_label", "_atom_site_moment_crystalaxis_x", "_atom_site_moment_crystalaxis_y", "_atom_site_moment_crystalaxis_z"]) d = OrderedDict() d[comp.reduced_formula] = CifBlock(block, loops, comp.reduced_formula) self._cf = CifFile(d) @property def ciffile(self): """ Returns: CifFile associated with the CifWriter. """ return self._cf def __str__(self): """ Returns the cif as a string. """ return self._cf.__str__()
[docs] def write_file(self, filename): """ Write the cif file. """ with zopen(filename, "wt") as f: f.write(self.__str__())
[docs]def str2float(text): """ Remove uncertainty brackets from strings and return the float. """ try: # Note that the ending ) is sometimes missing. That is why the code has # been modified to treat it as optional. Same logic applies to lists. return float(re.sub(r"\(.+\)*", "", text)) except TypeError: if isinstance(text, list) and len(text) == 1: return float(re.sub(r"\(.+\)*", "", text[0])) except ValueError as ex: if text.strip() == ".": return 0 raise ex