Source code for pymatgen.io.cif

# coding: utf-8
# Copyright (c) Pymatgen Development Team.
# Distributed under the terms of the MIT License.

from __future__ import division, unicode_literals, print_function

import math
import re
import os
import textwrap
import warnings
from collections import OrderedDict, deque

import six
from six.moves import zip, cStringIO

import numpy as np
from functools import partial

try:
    from inspect import getfullargspec as getargspec
except ImportError:
    from inspect import getargspec
from itertools import groupby
from pymatgen.core.periodic_table import Element, Specie, get_el_sp, DummySpecie
from monty.io import zopen
from pymatgen.util.coord_utils import in_coord_list_pbc, pbc_diff, \
    find_in_coord_list_pbc
from monty.string import remove_non_ascii
from pymatgen.core.lattice import Lattice
from pymatgen.core.structure import Structure
from pymatgen.core.composition import Composition
from pymatgen.core.operations import SymmOp
from pymatgen.symmetry.groups import SpaceGroup, SYMM_DATA
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
from pymatgen.electronic_structure.core import Magmom
from pymatgen.core.operations import MagSymmOp
from pymatgen.symmetry.maggroups import MagneticSpaceGroup

"""
Wrapper classes for Cif input and output from Structures.
"""

__author__ = "Shyue Ping Ong, Will Richards, Matthew Horton"
__copyright__ = "Copyright 2011, The Materials Project"
__version__ = "4.0"
__maintainer__ = "Shyue Ping Ong"
__email__ = "shyuep@gmail.com"
__status__ = "Production"
__date__ = "Sep 23, 2011"

sub_spgrp = partial(re.sub, r"[\s_]", "")

space_groups = {sub_spgrp(k): k
                for k in SYMM_DATA['space_group_encoding'].keys()}

space_groups.update({sub_spgrp(k): k
                     for k in SYMM_DATA['space_group_encoding'].keys()})

_COD_DATA = None


def _get_cod_data():
    global _COD_DATA
    if _COD_DATA is None:
        import pymatgen
        with open(os.path.join(pymatgen.symmetry.__path__[0],
                               "symm_ops.json")) \
                as f:
            import json
            _COD_DATA = json.load(f)

    return _COD_DATA


[docs]class CifBlock(object): maxlen = 70 # not quite 80 so we can deal with semicolons and things def __init__(self, data, loops, header): """ Object for storing cif data. All data is stored in a single dictionary. Data inside loops are stored in lists in the data dictionary, and information on which keys are grouped together are stored in the loops attribute. Args: data: dict or OrderedDict of data to go into the cif. Values should be convertible to string, or lists of these if the key is in a loop loops: list of lists of keys, grouped by which loop they should appear in header: name of the block (appears after the data_ on the first line) """ self.loops = loops self.data = data # AJ says: CIF Block names cannot be more than 75 characters or you # get an Exception self.header = header[:74] def __eq__(self, other): return self.loops == other.loops \ and self.data == other.data \ and self.header == other.header def __getitem__(self, key): return self.data[key] def __str__(self): """ Returns the cif string for the data block """ s = ["data_{}".format(self.header)] keys = self.data.keys() written = [] for k in keys: if k in written: continue for l in self.loops: # search for a corresponding loop if k in l: s.append(self._loop_to_string(l)) written.extend(l) break if k not in written: # k didn't belong to a loop v = self._format_field(self.data[k]) if len(k) + len(v) + 3 < self.maxlen: s.append("{} {}".format(k, v)) else: s.extend([k, v]) return "\n".join(s) def _loop_to_string(self, loop): s = "loop_" for l in loop: s += '\n ' + l for fields in zip(*[self.data[k] for k in loop]): line = "\n" for val in map(self._format_field, fields): if val[0] == ";": s += line + "\n" + val line = "\n" elif len(line) + len(val) + 2 < self.maxlen: line += " " + val else: s += line line = '\n ' + val s += line return s def _format_field(self, v): v = v.__str__().strip() if len(v) > self.maxlen: return ';\n' + textwrap.fill(v, self.maxlen) + '\n;' # add quotes if necessary if v == '': return '""' if (" " in v or v[0] == "_") \ and not (v[0] == "'" and v[-1] == "'") \ and not (v[0] == '"' and v[-1] == '"'): if "'" in v: q = '"' else: q = "'" v = q + v + q return v @classmethod def _process_string(cls, string): # remove comments string = re.sub(r"(\s|^)#.*$", "", string, flags=re.MULTILINE) # remove empty lines string = re.sub(r"^\s*\n", "", string, flags=re.MULTILINE) # remove non_ascii string = remove_non_ascii(string) # since line breaks in .cif files are mostly meaningless, # break up into a stream of tokens to parse, rejoining multiline # strings (between semicolons) q = deque() multiline = False ml = [] # this regex splits on spaces, except when in quotes. # starting quotes must not be preceded by non-whitespace # (these get eaten by the first expression) # ending quotes must not be followed by non-whitespace p = re.compile(r'''([^'"\s][\S]*)|'(.*?)'(?!\S)|"(.*?)"(?!\S)''') for l in string.splitlines(): if multiline: if l.startswith(";"): multiline = False q.append(('', '', '', ' '.join(ml))) ml = [] l = l[1:].strip() else: ml.append(l) continue if l.startswith(";"): multiline = True ml.append(l[1:].strip()) else: for s in p.findall(l): # s is tuple. location of the data in the tuple # depends on whether it was quoted in the input q.append(s) return q
[docs] @classmethod def from_string(cls, string): q = cls._process_string(string) header = q.popleft()[0][5:] data = OrderedDict() loops = [] while q: s = q.popleft() # cif keys aren't in quotes, so show up in s[0] if s[0] == "_eof": break if s[0].startswith("_"): data[s[0]] = "".join(q.popleft()) elif s[0].startswith("loop_"): columns = [] items = [] while q: s = q[0] if s[0].startswith("loop_") or not s[0].startswith("_"): break columns.append("".join(q.popleft())) data[columns[-1]] = [] while q: s = q[0] if s[0].startswith("loop_") or s[0].startswith("_"): break items.append("".join(q.popleft())) n = len(items) // len(columns) assert len(items) % n == 0 loops.append(columns) for k, v in zip(columns * n, items): data[k].append(v.strip()) elif "".join(s).strip() != "": warnings.warn("Possible error in cif format" " error at {}".format("".join(s).strip())) return cls(data, loops, header)
[docs]class CifFile(object): """ Reads and parses CifBlocks from a .cif file or string """ def __init__(self, data, orig_string=None, comment=None): """ Args: data (OrderedDict): Of CifBlock objects.å orig_string (str): The original cif string. comment (str): Comment string. """ self.data = data self.orig_string = orig_string self.comment = comment or "# generated using pymatgen" def __str__(self): s = ["%s" % v for v in self.data.values()] return self.comment + "\n" + "\n".join(s) + "\n"
[docs] @classmethod def from_string(cls, string): d = OrderedDict() for x in re.split(r"^\s*data_", "x\n" + string, flags=re.MULTILINE | re.DOTALL)[1:]: # Skip over Cif block that contains powder diffraction data. # Some elements in this block were missing from CIF files in Springer materials/Pauling file DBs. # This block anyway does not contain any structure information, and CifParser was also not parsing it. if 'powder_pattern' in re.split(r"\n", x, 1)[0]: continue c = CifBlock.from_string("data_" + x) d[c.header] = c return cls(d, string)
[docs] @classmethod def from_file(cls, filename): with zopen(filename, "rt", errors="replace") as f: return cls.from_string(f.read())
[docs]class CifParser(object): """ Parses a cif file Args: filename (str): Cif filename. bzipped or gzipped cifs are fine too. occupancy_tolerance (float): If total occupancy of a site is between 1 and occupancy_tolerance, the occupancies will be scaled down to 1. site_tolerance (float): This tolerance is used to determine if two sites are sitting in the same position, in which case they will be combined to a single disordered site. Defaults to 1e-4. """ def __init__(self, filename, occupancy_tolerance=1., site_tolerance=1e-4): self._occupancy_tolerance = occupancy_tolerance self._site_tolerance = site_tolerance if isinstance(filename, six.string_types): self._cif = CifFile.from_file(filename) else: self._cif = CifFile.from_string(filename.read()) # store if CIF contains features from non-core CIF dictionaries # e.g. magCIF self.feature_flags = {} def is_magcif(): """ Checks to see if file appears to be a magCIF file (heuristic). """ # Doesn't seem to be a canonical way to test if file is magCIF or # not, so instead check for magnetic symmetry datanames prefixes = ['_space_group_magn', '_atom_site_moment', '_space_group_symop_magn'] for d in self._cif.data.values(): for k in d.data.keys(): for prefix in prefixes: if prefix in k: return True return False self.feature_flags['magcif'] = is_magcif() def is_magcif_incommensurate(): """ Checks to see if file contains an incommensurate magnetic structure (heuristic). """ # Doesn't seem to be a canonical way to test if magCIF file # describes incommensurate strucure or not, so instead check # for common datanames if not self.feature_flags["magcif"]: return False prefixes = ['_cell_modulation_dimension', '_cell_wave_vector'] for d in self._cif.data.values(): for k in d.data.keys(): for prefix in prefixes: if prefix in k: return True return False self.feature_flags['magcif_incommensurate'] = is_magcif_incommensurate() for k in self._cif.data.keys(): # pass individual CifBlocks to _sanitize_data self._cif.data[k] = self._sanitize_data(self._cif.data[k])
[docs] @staticmethod def from_string(cif_string, occupancy_tolerance=1.): """ Creates a CifParser from a string. Args: cif_string (str): String representation of a CIF. occupancy_tolerance (float): If total occupancy of a site is between 1 and occupancy_tolerance, the occupancies will be scaled down to 1. Returns: CifParser """ stream = cStringIO(cif_string) return CifParser(stream, occupancy_tolerance)
def _sanitize_data(self, data): """ Some CIF files do not conform to spec. This function corrects known issues, particular in regards to Springer materials/ Pauling files. This function is here so that CifParser can assume its input conforms to spec, simplifying its implementation. :param data: CifBlock :return: data CifBlock """ """ This part of the code deals with handling formats of data as found in CIF files extracted from the Springer Materials/Pauling File databases, and that are different from standard ICSD formats. """ # Check to see if "_atom_site_type_symbol" exists, as some test CIFs do # not contain this key. if "_atom_site_type_symbol" in data.data.keys(): # Keep a track of which data row needs to be removed. # Example of a row: Nb,Zr '0.8Nb + 0.2Zr' .2a .m-3m 0 0 0 1 14 # 'rhombic dodecahedron, Nb<sub>14</sub>' # Without this code, the above row in a structure would be parsed # as an ordered site with only Nb (since # CifParser would try to parse the first two characters of the # label "Nb,Zr") and occupancy=1. # However, this site is meant to be a disordered site with 0.8 of # Nb and 0.2 of Zr. idxs_to_remove = [] new_atom_site_label = [] new_atom_site_type_symbol = [] new_atom_site_occupancy = [] new_fract_x = [] new_fract_y = [] new_fract_z = [] for idx, el_row in enumerate(data["_atom_site_label"]): # CIF files from the Springer Materials/Pauling File have # switched the label and symbol. Thus, in the # above shown example row, '0.8Nb + 0.2Zr' is the symbol. # Below, we split the strings on ' + ' to # check if the length (or number of elements) in the label and # symbol are equal. if len(data["_atom_site_type_symbol"][idx].split(' + ')) > \ len(data["_atom_site_label"][idx].split(' + ')): # Dictionary to hold extracted elements and occupancies els_occu = {} # parse symbol to get element names and occupancy and store # in "els_occu" symbol_str = data["_atom_site_type_symbol"][idx] symbol_str_lst = symbol_str.split(' + ') for elocc_idx in range(len(symbol_str_lst)): # Remove any bracketed items in the string symbol_str_lst[elocc_idx] = re.sub(r'\([0-9]*\)', '', symbol_str_lst[elocc_idx].strip()) # Extract element name and its occupancy from the # string, and store it as a # key-value pair in "els_occ". els_occu[str(re.findall(r'\D+', symbol_str_lst[ elocc_idx].strip())[1]).replace('<sup>', '')] = \ float('0' + re.findall(r'\.?\d+', symbol_str_lst[ elocc_idx].strip())[1]) x = str2float(data["_atom_site_fract_x"][idx]) y = str2float(data["_atom_site_fract_y"][idx]) z = str2float(data["_atom_site_fract_z"][idx]) for et, occu in els_occu.items(): # new atom site labels have 'fix' appended new_atom_site_label.append(et + '_fix' + str(len(new_atom_site_label))) new_atom_site_type_symbol.append(et) new_atom_site_occupancy.append(str(occu)) new_fract_x.append(str(x)) new_fract_y.append(str(y)) new_fract_z.append(str(z)) idxs_to_remove.append(idx) # Remove the original row by iterating over all keys in the CIF # data looking for lists, which indicates # multiple data items, one for each row, and remove items from the # list that corresponds to the removed row, # so that it's not processed by the rest of this function (which # would result in an error). for original_key in data.data: if isinstance(data.data[original_key], list): for id in sorted(idxs_to_remove, reverse=True): del data.data[original_key][id] if len(idxs_to_remove) > 0: data.data["_atom_site_label"] += new_atom_site_label data.data["_atom_site_type_symbol"] += new_atom_site_type_symbol data.data["_atom_site_occupancy"] += new_atom_site_occupancy data.data["_atom_site_fract_x"] += new_fract_x data.data["_atom_site_fract_y"] += new_fract_y data.data["_atom_site_fract_z"] += new_fract_z """ This fixes inconsistencies in naming of several magCIF tags as a result of magCIF being in widespread use prior to specification being finalized (on advice of Branton Campbell). """ if self.feature_flags["magcif"]: # CIF-1 style has all underscores, interim standard # had period before magn instead of before the final # component (e.g. xyz) correct_keys = ["_space_group_symop_magn_operation.xyz", "_space_group_symop_magn_centering.xyz", "_space_group_magn.name_BNS", "_space_group_magn.number_BNS"] # cannot mutate OrderedDict during enumeration, # so store changes we want to make changes_to_make = {} for original_key in data.data: for correct_key in correct_keys: # convert to all underscore trial_key = "_".join(correct_key.split(".")) test_key = "_".join(original_key.split(".")) if trial_key == test_key: changes_to_make[correct_key] = original_key # make changes for correct_key, original_key in changes_to_make.items(): data.data[correct_key] = data.data[original_key] # some keys have been renamed, renamed_keys maps interim_keys to final_keys renamed_keys = {"_magnetic_space_group.transform_to_standard_Pp_abc": "_space_group_magn.transform_BNS_Pp_abc"} changes_to_make = {} for interim_key, final_key in renamed_keys.items(): if data.data.get(interim_key): changes_to_make[final_key] = interim_key for final_key, interim_key in changes_to_make.items(): data.data[final_key] = data.data[interim_key] return data def _unique_coords(self, coords_in, magmoms_in=None): """ Generate unique coordinates using coord and symmetry positions and also their corresponding magnetic moments, if supplied. """ coords = [] if magmoms_in: magmoms = [] magmoms_in = [Magmom(magmom) for magmom in magmoms_in] if len(magmoms_in) != len(coords_in): raise ValueError for tmp_coord, tmp_magmom in zip(coords_in, magmoms_in): for op in self.symmetry_operations: coord = op.operate(tmp_coord) coord = np.array([i - math.floor(i) for i in coord]) if isinstance(op, MagSymmOp): magmom = Magmom(op.operate_magmom(tmp_magmom.moment)) else: magmom = tmp_magmom if not in_coord_list_pbc(coords, coord, atol=self._site_tolerance): coords.append(coord) magmoms.append(magmom) return coords, magmoms else: for tmp_coord in coords_in: for op in self.symmetry_operations: coord = op.operate(tmp_coord) coord = np.array([i - math.floor(i) for i in coord]) if not in_coord_list_pbc(coords, coord, atol=self._site_tolerance): coords.append(coord) return coords, [Magmom(0)]*len(coords) # return dummy magmoms
[docs] def get_lattice(self, data, length_strings=("a", "b", "c"), angle_strings=("alpha", "beta", "gamma"), lattice_type=None): """ Generate the lattice from the provided lattice parameters. In the absence of all six lattice parameters, the crystal system and necessary parameters are parsed """ try: lengths = [str2float(data["_cell_length_" + i]) for i in length_strings] angles = [str2float(data["_cell_angle_" + i]) for i in angle_strings] if not lattice_type: return Lattice.from_lengths_and_angles(lengths, angles) else: return getattr(Lattice, lattice_type)(*(lengths + angles)) except KeyError: # Missing Key search for cell setting for lattice_lable in ["_symmetry_cell_setting", "_space_group_crystal_system"]: if data.data.get(lattice_lable): lattice_type = data.data.get(lattice_lable).lower() try: required_args = getargspec( getattr(Lattice, lattice_type)).args lengths = (l for l in length_strings if l in required_args) angles = (a for a in angle_strings if a in required_args) return self.get_lattice(data, lengths, angles, lattice_type=lattice_type) except AttributeError as exc: warnings.warn(exc) else: return None
[docs] def get_symops(self, data): """ In order to generate symmetry equivalent positions, the symmetry operations are parsed. If the symops are not present, the space group symbol is parsed, and symops are generated. """ symops = [] for symmetry_label in ["_symmetry_equiv_pos_as_xyz", "_symmetry_equiv_pos_as_xyz_", "_space_group_symop_operation_xyz", "_space_group_symop_operation_xyz_"]: if data.data.get(symmetry_label): xyz = data.data.get(symmetry_label) if isinstance(xyz, six.string_types): warnings.warn("A 1-line symmetry op P1 CIF is detected!") xyz = [xyz] try: symops = [SymmOp.from_xyz_string(s) for s in xyz] break except ValueError: continue if not symops: # Try to parse symbol for symmetry_label in ["_symmetry_space_group_name_H-M", "_symmetry_space_group_name_H_M", "_symmetry_space_group_name_H-M_", "_symmetry_space_group_name_H_M_", "_space_group_name_Hall", "_space_group_name_Hall_", "_space_group_name_H-M_alt", "_space_group_name_H-M_alt_", "_symmetry_space_group_name_hall", "_symmetry_space_group_name_hall_", "_symmetry_space_group_name_h-m", "_symmetry_space_group_name_h-m_"]: sg = data.data.get(symmetry_label) if sg: sg = sub_spgrp(sg) try: spg = space_groups.get(sg) if spg: symops = SpaceGroup(spg).symmetry_ops warnings.warn( "No _symmetry_equiv_pos_as_xyz type key found. " "Spacegroup from %s used." % symmetry_label) break except ValueError: # Ignore any errors pass try: for d in _get_cod_data(): if sg == re.sub(r"\s+", "", d["hermann_mauguin"]): xyz = d["symops"] symops = [SymmOp.from_xyz_string(s) for s in xyz] warnings.warn( "No _symmetry_equiv_pos_as_xyz type key found. " "Spacegroup from %s used." % symmetry_label) break except Exception as ex: continue if symops: break if not symops: # Try to parse International number for symmetry_label in ["_space_group_IT_number", "_space_group_IT_number_", "_symmetry_Int_Tables_number", "_symmetry_Int_Tables_number_"]: if data.data.get(symmetry_label): try: i = int(str2float(data.data.get(symmetry_label))) symops = SpaceGroup.from_int_number(i).symmetry_ops break except ValueError: continue if not symops: warnings.warn("No _symmetry_equiv_pos_as_xyz type key found. " "Defaulting to P1.") symops = [SymmOp.from_xyz_string(s) for s in ['x', 'y', 'z']] return symops
[docs] def get_magsymops(self, data): """ Equivalent to get_symops except for magnetic symmetry groups. Separate function since additional operation for time reversal symmetry (which changes magnetic moments on sites) needs to be returned. """ magsymmops = [] # check to see if magCIF file explicitly contains magnetic symmetry operations if data.data.get("_space_group_symop_magn_operation.xyz"): xyzt = data.data.get("_space_group_symop_magn_operation.xyz") if isinstance(xyzt, six.string_types): xyzt = [xyzt] magsymmops = [MagSymmOp.from_xyzt_string(s) for s in xyzt] if data.data.get("_space_group_symop_magn_centering.xyz"): xyzt = data.data.get("_space_group_symop_magn_centering.xyz") if isinstance(xyzt, six.string_types): xyzt = [xyzt] centering_symops = [MagSymmOp.from_xyzt_string(s) for s in xyzt] all_ops = [] for op in magsymmops: for centering_op in centering_symops: new_translation = [i - np.floor(i) for i in op.translation_vector + centering_op.translation_vector] new_time_reversal = op.time_reversal * centering_op.time_reversal all_ops.append(MagSymmOp.from_rotation_and_translation_and_time_reversal( rotation_matrix=op.rotation_matrix, translation_vec=new_translation, time_reversal=new_time_reversal)) magsymmops = all_ops # else check to see if it specifies a magnetic space group elif data.data.get("_space_group_magn.name_BNS") or data.data.get("_space_group_magn.number_BNS"): if data.data.get("_space_group_magn.name_BNS"): # get BNS label for MagneticSpaceGroup() id = data.data.get("_space_group_magn.name_BNS") else: # get BNS number for MagneticSpaceGroup() # by converting string to list of ints id = list(map(int, (data.data.get("_space_group_magn.number_BNS").split(".")))) msg = MagneticSpaceGroup(id) if data.data.get("_space_group_magn.transform_BNS_Pp_abc"): if data.data.get("_space_group_magn.transform_BNS_Pp_abc") != "a,b,c;0,0,0": return NotImplementedError("Non-standard settings not currently supported.") elif data.data.get("_space_group_magn.transform_BNS_Pp"): return NotImplementedError("Incomplete specification to implement.") magsymmops = msg.symmetry_ops if not magsymmops: warnings.warn("No magnetic symmetry detected, using primitive symmetry.") magsymmops = [MagSymmOp.from_xyzt_string("x, y, z, 1")] return magsymmops
[docs] def parse_oxi_states(self, data): """ Parse oxidation states from data dictionary """ try: oxi_states = { data["_atom_type_symbol"][i]: str2float(data["_atom_type_oxidation_number"][i]) for i in range(len(data["_atom_type_symbol"]))} # attempt to strip oxidation state from _atom_type_symbol # in case the label does not contain an oxidation state for i, symbol in enumerate(data["_atom_type_symbol"]): oxi_states[re.sub(r"\d?[\+,\-]?$", "", symbol)] = \ str2float(data["_atom_type_oxidation_number"][i]) except (ValueError, KeyError): oxi_states = None return oxi_states
[docs] def parse_magmoms(self, data, lattice=None): """ Parse atomic magnetic moments from data dictionary """ if lattice is None: raise Exception('Magmoms given in terms of crystal axes in magCIF spec.') try: magmoms = { data["_atom_site_moment_label"][i]: Magmom.from_moment_relative_to_crystal_axes([str2float(data["_atom_site_moment_crystalaxis_x"][i]), str2float(data["_atom_site_moment_crystalaxis_y"][i]), str2float(data["_atom_site_moment_crystalaxis_z"][i])], lattice) for i in range(len(data["_atom_site_moment_label"])) } except (ValueError, KeyError): return None return magmoms
def _get_structure(self, data, primitive): """ Generate structure from part of the cif. """ def parse_symbol(sym): # Common representations for elements/water in cif files # TODO: fix inconsistent handling of water special = {"D": "D", "Hw": "H", "Ow": "O", "Wat": "O", "wat": "O", "OH": "", "OH2": ""} m = re.findall(r"w?[A-Z][a-z]*", sym) if m and m != "?": if sym in special: v = special[sym] else: v = special.get(m[0], m[0]) if len(m) > 1 or (m[0] in special): warnings.warn("{} parsed as {}".format(sym, v)) return v def get_num_implicit_hydrogens(sym): num_h = {"Wat": 2, "wat": 2, "O-H": 1} return num_h.get(sym[:3], 0) lattice = self.get_lattice(data) # if magCIF, get magnetic symmetry moments and magmoms # else standard CIF, and use empty magmom dict if self.feature_flags["magcif_incommensurate"]: raise NotImplementedError("Incommensurate structures not currently supported.") elif self.feature_flags["magcif"]: self.symmetry_operations = self.get_magsymops(data) magmoms = self.parse_magmoms(data, lattice=lattice) else: self.symmetry_operations = self.get_symops(data) magmoms = {} oxi_states = self.parse_oxi_states(data) coord_to_species = OrderedDict() coord_to_magmoms = OrderedDict() def get_matching_coord(coord): keys = list(coord_to_species.keys()) coords = np.array(keys) for op in self.symmetry_operations: c = op.operate(coord) inds = find_in_coord_list_pbc(coords, c, atol=self._site_tolerance) # cant use if inds, because python is dumb and np.array([0]) evaluates # to False if len(inds): return keys[inds[0]] return False for i in range(len(data["_atom_site_label"])): try: # If site type symbol exists, use it. Otherwise, we use the # label. symbol = parse_symbol(data["_atom_site_type_symbol"][i]) num_h = get_num_implicit_hydrogens(data["_atom_site_type_symbol"][i]) except KeyError: symbol = parse_symbol(data["_atom_site_label"][i]) num_h = get_num_implicit_hydrogens(data["_atom_site_label"][i]) if not symbol: continue if oxi_states is not None: o_s = oxi_states.get(symbol, 0) # use _atom_site_type_symbol if possible for oxidation state if "_atom_site_type_symbol" in data.data.keys(): oxi_symbol = data["_atom_site_type_symbol"][i] o_s = oxi_states.get(oxi_symbol, o_s) try: el = Specie(symbol, o_s) except: el = DummySpecie(symbol, o_s) else: el = get_el_sp(symbol) x = str2float(data["_atom_site_fract_x"][i]) y = str2float(data["_atom_site_fract_y"][i]) z = str2float(data["_atom_site_fract_z"][i]) magmom = magmoms.get(data["_atom_site_label"][i], Magmom(0)) try: occu = str2float(data["_atom_site_occupancy"][i]) except (KeyError, ValueError): occu = 1 if occu > 0: coord = (x, y, z) match = get_matching_coord(coord) comp_d = {el: occu} if num_h > 0: comp_d["H"] = num_h comp = Composition(comp_d) if not match: coord_to_species[coord] = comp coord_to_magmoms[coord] = magmom else: coord_to_species[match] += comp coord_to_magmoms[match] = None # disordered magnetic not currently supported sum_occu = [sum(c.values()) for c in coord_to_species.values() if not set(c.elements) == {Element("O"), Element("H")}] if any([o > 1 for o in sum_occu]): warnings.warn("Some occupancies (%s) sum to > 1! If they are within " "the tolerance, they will be rescaled." % str(sum_occu)) allspecies = [] allcoords = [] allmagmoms = [] allhydrogens = [] # check to see if magCIF file is disordered if self.feature_flags["magcif"]: for k, v in coord_to_magmoms.items(): if v is None: # Proposed solution to this is to instead store magnetic moments # as Specie 'spin' property, instead of site property, but this # introduces ambiguities for end user (such as unintended use of # `spin` and Specie will have fictious oxidation state). raise NotImplementedError('Disordered magnetic structures not currently supported.') if coord_to_species.items(): for comp, group in groupby( sorted(list(coord_to_species.items()), key=lambda x: x[1]), key=lambda x: x[1]): tmp_coords = [site[0] for site in group] tmp_magmom = [coord_to_magmoms[tmp_coord] for tmp_coord in tmp_coords] if self.feature_flags["magcif"]: coords, magmoms = self._unique_coords(tmp_coords, tmp_magmom) else: coords, magmoms = self._unique_coords(tmp_coords) if set(comp.elements) == {Element("O"), Element("H")}: # O with implicit hydrogens im_h = comp["H"] species = Composition({"O": comp["O"]}) else: im_h = 0 species = comp allhydrogens.extend(len(coords) * [im_h]) allcoords.extend(coords) allspecies.extend(len(coords) * [species]) allmagmoms.extend(magmoms) # rescale occupancies if necessary for i, species in enumerate(allspecies): totaloccu = sum(species.values()) if 1 < totaloccu <= self._occupancy_tolerance: allspecies[i] = species / totaloccu if allspecies and len(allspecies) == len(allcoords) and len(allspecies) == len(allmagmoms): site_properties = dict() if any(allhydrogens): assert len(allhydrogens) == len(allcoords) site_properties["implicit_hydrogens"] = allhydrogens if self.feature_flags["magcif"]: site_properties["magmom"] = allmagmoms if len(site_properties) == 0: site_properties = None struct = Structure(lattice, allspecies, allcoords, site_properties=site_properties) struct = struct.get_sorted_structure() if primitive and self.feature_flags['magcif']: struct = struct.get_primitive_structure(use_site_props=True) elif primitive: struct = struct.get_primitive_structure() struct = struct.get_reduced_structure() return struct
[docs] def get_structures(self, primitive=True): """ Return list of structures in CIF file. primitive boolean sets whether a conventional cell structure or primitive cell structure is returned. Args: primitive (bool): Set to False to return conventional unit cells. Defaults to True. With magnetic CIF files, will return primitive magnetic cell which may be larger than nuclear primitive cell. Returns: List of Structures. """ structures = [] for d in self._cif.data.values(): try: s = self._get_structure(d, primitive) if s: structures.append(s) except (KeyError, ValueError) as exc: # Warn the user (Errors should never pass silently) # A user reported a problem with cif files produced by Avogadro # in which the atomic coordinates are in Cartesian coords. warnings.warn(str(exc)) if len(structures) == 0: raise ValueError("Invalid cif file with no structures!") return structures
[docs] def get_bibtex_strings(self): """ (Beta) Get BibTeX reference from CIF file. :param data: :return: BibTeX string """ # TODO: CIF specification supports multiple citations. bibtex_strs = [] for d in self._cif.data.values(): bibtex_entry = {'authors': '_citation_author_name', 'title': '_citation_title', 'journal': '_citation_journal_abbrev', 'volume': '_citation_journal_volume', 'year': '_citation_year', 'number': '_citation_number', 'page_first': '_citation_page_first', 'page_last': '_citation_page_last', 'doi': '_citation_DOI'} for field, tag in bibtex_entry.items(): try: bibtex_entry[field] = d[tag] except: bibtex_entry[field] = "?" bibtex_entry['key'] = bibtex_entry['authors'][0].split(',')[0]+":"+bibtex_entry['year'] bibtex_entry['key'] = ''.join(bibtex_entry['key'].split()) bibtex_entry['authors'] = " and ".join(bibtex_entry['authors']) bibtex_entry['pages'] = "{0}--{1}".format(bibtex_entry['page_first'], bibtex_entry['page_last']) for field, entry in bibtex_entry.items(): if field is not 'key': bibtex_entry[field] = "{"+entry+"}" bibtex_str = ("""{key}, author = {authors}, title = {title}, journal = {journal}, year = {year}, volume = {volume}, number = {number}, pages = {pages}, doi = {doi}""".format(**bibtex_entry)) bibtex_strs.append("@article{"+bibtex_str+"\n}") return bibtex_strs
[docs] def as_dict(self): d = OrderedDict() for k, v in self._cif.data.items(): d[k] = {} for k2, v2 in v.data.items(): d[k][k2] = v2 return d
[docs]class CifWriter(object): def __init__(self, struct, symprec=None, write_magmoms=False): """ A wrapper around CifFile to write CIF files from pymatgen structures. Args: struct (Structure): structure to write symprec (float): If not none, finds the symmetry of the structure and writes the cif with symmetry information. Passes symprec to the SpacegroupAnalyzer write_magmoms (bool): If True, will write magCIF file. Incompatible with symprec """ if write_magmoms and symprec: warnings.warn("Magnetic symmetry cannot currently be detected by pymatgen," "disabling symmetry detection.") symprec = None format_str = "{:.8f}" block = OrderedDict() loops = [] spacegroup = ("P 1", 1) if symprec is not None: sf = SpacegroupAnalyzer(struct, symprec) spacegroup = (sf.get_space_group_symbol(), sf.get_space_group_number()) # Needs the refined struture when using symprec. This converts # primitive to conventional structures, the standard for CIF. struct = sf.get_refined_structure() latt = struct.lattice comp = struct.composition no_oxi_comp = comp.element_composition block["_symmetry_space_group_name_H-M"] = spacegroup[0] for cell_attr in ['a', 'b', 'c']: block["_cell_length_" + cell_attr] = format_str.format( getattr(latt, cell_attr)) for cell_attr in ['alpha', 'beta', 'gamma']: block["_cell_angle_" + cell_attr] = format_str.format( getattr(latt, cell_attr)) block["_symmetry_Int_Tables_number"] = spacegroup[1] block["_chemical_formula_structural"] = no_oxi_comp.reduced_formula block["_chemical_formula_sum"] = no_oxi_comp.formula block["_cell_volume"] = latt.volume.__str__() reduced_comp, fu = no_oxi_comp.get_reduced_composition_and_factor() block["_cell_formula_units_Z"] = str(int(fu)) if symprec is None: block["_symmetry_equiv_pos_site_id"] = ["1"] block["_symmetry_equiv_pos_as_xyz"] = ["x, y, z"] else: sf = SpacegroupAnalyzer(struct, symprec) symmops = [] for op in sf.get_symmetry_operations(): v = op.translation_vector symmops.append(SymmOp.from_rotation_and_translation( op.rotation_matrix, v)) ops = [op.as_xyz_string() for op in symmops] block["_symmetry_equiv_pos_site_id"] = \ ["%d" % i for i in range(1, len(ops) + 1)] block["_symmetry_equiv_pos_as_xyz"] = ops loops.append(["_symmetry_equiv_pos_site_id", "_symmetry_equiv_pos_as_xyz"]) contains_oxidation = True try: symbol_to_oxinum = OrderedDict([ (el.__str__(), float(el.oxi_state)) for el in sorted(comp.elements)]) except AttributeError: symbol_to_oxinum = OrderedDict([(el.symbol, 0) for el in sorted(comp.elements)]) contains_oxidation = False if contains_oxidation: block["_atom_type_symbol"] = symbol_to_oxinum.keys() block["_atom_type_oxidation_number"] = symbol_to_oxinum.values() loops.append(["_atom_type_symbol", "_atom_type_oxidation_number"]) atom_site_type_symbol = [] atom_site_symmetry_multiplicity = [] atom_site_fract_x = [] atom_site_fract_y = [] atom_site_fract_z = [] atom_site_label = [] atom_site_occupancy = [] atom_site_moment_label = [] atom_site_moment_crystalaxis_x = [] atom_site_moment_crystalaxis_y = [] atom_site_moment_crystalaxis_z = [] count = 1 if symprec is None: for site in struct: for sp, occu in sorted(site.species_and_occu.items()): atom_site_type_symbol.append(sp.__str__()) atom_site_symmetry_multiplicity.append("1") atom_site_fract_x.append("{0:f}".format(site.a)) atom_site_fract_y.append("{0:f}".format(site.b)) atom_site_fract_z.append("{0:f}".format(site.c)) atom_site_label.append("{}{}".format(sp.symbol, count)) atom_site_occupancy.append(occu.__str__()) magmom = Magmom(site.properties.get('magmom', getattr(sp, 'spin', 0))) if write_magmoms and abs(magmom) > 0: moment = Magmom.get_moment_relative_to_crystal_axes(magmom, latt) atom_site_moment_label.append("{}{}".format(sp.symbol, count)) atom_site_moment_crystalaxis_x.append(moment[0]) atom_site_moment_crystalaxis_y.append(moment[1]) atom_site_moment_crystalaxis_z.append(moment[2]) count += 1 else: # The following just presents a deterministic ordering. unique_sites = [ (sorted(sites, key=lambda s: tuple([abs(x) for x in s.frac_coords]))[0], len(sites)) for sites in sf.get_symmetrized_structure().equivalent_sites ] for site, mult in sorted( unique_sites, key=lambda t: (t[0].species_and_occu.average_electroneg, -t[1], t[0].a, t[0].b, t[0].c)): for sp, occu in site.species_and_occu.items(): atom_site_type_symbol.append(sp.__str__()) atom_site_symmetry_multiplicity.append("%d" % mult) atom_site_fract_x.append("{0:f}".format(site.a)) atom_site_fract_y.append("{0:f}".format(site.b)) atom_site_fract_z.append("{0:f}".format(site.c)) atom_site_label.append("{}{}".format(sp.symbol, count)) atom_site_occupancy.append(occu.__str__()) count += 1 block["_atom_site_type_symbol"] = atom_site_type_symbol block["_atom_site_label"] = atom_site_label block["_atom_site_symmetry_multiplicity"] = \ atom_site_symmetry_multiplicity block["_atom_site_fract_x"] = atom_site_fract_x block["_atom_site_fract_y"] = atom_site_fract_y block["_atom_site_fract_z"] = atom_site_fract_z block["_atom_site_occupancy"] = atom_site_occupancy loops.append(["_atom_site_type_symbol", "_atom_site_label", "_atom_site_symmetry_multiplicity", "_atom_site_fract_x", "_atom_site_fract_y", "_atom_site_fract_z", "_atom_site_occupancy"]) if write_magmoms: block["_atom_site_moment_label"] = atom_site_moment_label block["_atom_site_moment_crystalaxis_x"] = atom_site_moment_crystalaxis_x block["_atom_site_moment_crystalaxis_y"] = atom_site_moment_crystalaxis_y block["_atom_site_moment_crystalaxis_z"] = atom_site_moment_crystalaxis_z loops.append(["_atom_site_moment_label", "_atom_site_moment_crystalaxis_x", "_atom_site_moment_crystalaxis_y", "_atom_site_moment_crystalaxis_z"]) d = OrderedDict() d[comp.reduced_formula] = CifBlock(block, loops, comp.reduced_formula) self._cf = CifFile(d) def __str__(self): """ Returns the cif as a string. """ return self._cf.__str__()
[docs] def write_file(self, filename): """ Write the cif file. """ with zopen(filename, "wt") as f: f.write(self.__str__())
[docs]def str2float(text): """ Remove uncertainty brackets from strings and return the float. """ try: return float(re.sub(r"\(.+\)", "", text)) except TypeError: if isinstance(text, list) and len(text) == 1: return float(re.sub(r"\(.+\)", "", text[0])) except ValueError as ex: if text.strip() == ".": return 0 raise ex