Source code for pymatgen.analysis.bond_valence

# coding: utf-8
# Copyright (c) Pymatgen Development Team.
# Distributed under the terms of the MIT License.

"""
This module implements classes to perform bond valence analyses.
"""

import collections
import numpy as np
import operator
import os
import functools
from math import exp, sqrt
from monty.serialization import loadfn

from pymatgen.core.periodic_table import Element, Specie
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
from pymatgen.core.periodic_table import get_el_sp

__author__ = "Shyue Ping Ong"
__copyright__ = "Copyright 2012, The Materials Project"
__version__ = "0.1"
__maintainer__ = "Shyue Ping Ong"
__email__ = "shyuep@gmail.com"
__date__ = "Oct 26, 2012"


# Let's initialize some module level properties.

# List of electronegative elements specified in M. O'Keefe, & N. Brese,
# JACS, 1991, 113(9), 3226-3229. doi:10.1021/ja00009a002.
ELECTRONEG = [Element(sym) for sym in ["H", "B", "C", "Si",
                                       "N", "P", "As", "Sb",
                                       "O", "S", "Se", "Te",
                                       "F", "Cl", "Br", "I"]]

module_dir = os.path.dirname(os.path.abspath(__file__))

# Read in BV parameters.
BV_PARAMS = {}
for k, v in loadfn(os.path.join(module_dir, "bvparam_1991.yaml")).items():
    BV_PARAMS[Element(k)] = v

# Read in yaml containing data-mined ICSD BV data.
all_data = loadfn(os.path.join(module_dir, "icsd_bv.yaml"))
ICSD_BV_DATA = {Specie.from_string(sp): data
                for sp, data in all_data["bvsum"].items()}
PRIOR_PROB = {Specie.from_string(sp): data
              for sp, data in all_data["occurrence"].items()}


[docs]def calculate_bv_sum(site, nn_list, scale_factor=1.0): """ Calculates the BV sum of a site. Args: site (PeriodicSite): The central site to calculate the bond valence nn_list ([Neighbor]): A list of namedtuple Neighbors having "distance" and "site" attributes scale_factor (float): A scale factor to be applied. This is useful for scaling distance, esp in the case of calculation-relaxed structures which may tend to under (GGA) or over bind (LDA). """ el1 = Element(site.specie.symbol) bvsum = 0 for nn in nn_list: el2 = Element(nn.specie.symbol) if (el1 in ELECTRONEG or el2 in ELECTRONEG) and el1 != el2: r1 = BV_PARAMS[el1]["r"] r2 = BV_PARAMS[el2]["r"] c1 = BV_PARAMS[el1]["c"] c2 = BV_PARAMS[el2]["c"] R = r1 + r2 - r1 * r2 * (sqrt(c1) - sqrt(c2)) ** 2 / \ (c1 * r1 + c2 * r2) vij = exp((R - nn.nn_distance * scale_factor) / 0.31) bvsum += vij * (1 if el1.X < el2.X else -1) return bvsum
[docs]def calculate_bv_sum_unordered(site, nn_list, scale_factor=1): """ Calculates the BV sum of a site for unordered structures. Args: site (PeriodicSite): The central site to calculate the bond valence nn_list ([Neighbor]): A list of namedtuple Neighbors having "distance" and "site" attributes scale_factor (float): A scale factor to be applied. This is useful for scaling distance, esp in the case of calculation-relaxed structures which may tend to under (GGA) or over bind (LDA). """ # If the site "site" has N partial occupations as : f_{site}_0, # f_{site}_1, ... f_{site}_N of elements # X_{site}_0, X_{site}_1, ... X_{site}_N, and each neighbors nn_i in nn # has N_{nn_i} partial occupations as : # f_{nn_i}_0, f_{nn_i}_1, ..., f_{nn_i}_{N_{nn_i}}, then the bv sum of # site "site" is obtained as : # \sum_{nn} \sum_j^N \sum_k^{N_{nn}} f_{site}_j f_{nn_i}_k vij_full # where vij_full is the valence bond of the fully occupied bond bvsum = 0 for specie1, occu1 in site.species.items(): el1 = Element(specie1.symbol) for nn in nn_list: for specie2, occu2 in nn.species.items(): el2 = Element(specie2.symbol) if (el1 in ELECTRONEG or el2 in ELECTRONEG) and el1 != el2: r1 = BV_PARAMS[el1]["r"] r2 = BV_PARAMS[el2]["r"] c1 = BV_PARAMS[el1]["c"] c2 = BV_PARAMS[el2]["c"] R = r1 + r2 - r1 * r2 * (sqrt(c1) - sqrt(c2)) ** 2 / \ (c1 * r1 + c2 * r2) vij = exp((R - nn.nn_distance * scale_factor) / 0.31) bvsum += occu1 * occu2 * vij * (1 if el1.X < el2.X else -1) return bvsum
[docs]class BVAnalyzer: """ This class implements a maximum a posteriori (MAP) estimation method to determine oxidation states in a structure. The algorithm is as follows: 1) The bond valence sum of all symmetrically distinct sites in a structure is calculated using the element-based parameters in M. O'Keefe, & N. Brese, JACS, 1991, 113(9), 3226-3229. doi:10.1021/ja00009a002. 2) The posterior probabilities of all oxidation states is then calculated using: P(oxi_state/BV) = K * P(BV/oxi_state) * P(oxi_state), where K is a constant factor for each element. P(BV/oxi_state) is calculated as a Gaussian with mean and std deviation determined from an analysis of the ICSD. The posterior P(oxi_state) is determined from a frequency analysis of the ICSD. 3) The oxidation states are then ranked in order of decreasing probability and the oxidation state combination that result in a charge neutral cell is selected. """ CHARGE_NEUTRALITY_TOLERANCE = 0.00001 def __init__(self, symm_tol=0.1, max_radius=4, max_permutations=100000, distance_scale_factor=1.015, charge_neutrality_tolerance=CHARGE_NEUTRALITY_TOLERANCE, forbidden_species=None): """ Initializes the BV analyzer, with useful defaults. Args: symm_tol: Symmetry tolerance used to determine which sites are symmetrically equivalent. Set to 0 to turn off symmetry. max_radius: Maximum radius in Angstrom used to find nearest neighbors. max_permutations: The maximum number of permutations of oxidation states to test. distance_scale_factor: A scale factor to be applied. This is useful for scaling distances, esp in the case of calculation-relaxed structures which may tend to under (GGA) or over bind (LDA). The default of 1.015 works for GGA. For experimental structure, set this to 1. charge_neutrality_tolerance: Tolerance on the charge neutrality when unordered structures are at stake. forbidden_species: List of species that are forbidden (example : ["O-"] cannot be used) It is used when e.g. someone knows that some oxidation state cannot occur for some atom in a structure or list of structures. """ self.symm_tol = symm_tol self.max_radius = max_radius self.max_permutations = max_permutations self.dist_scale_factor = distance_scale_factor self.charge_neutrality_tolerance = charge_neutrality_tolerance forbidden_species = [get_el_sp(sp) for sp in forbidden_species] if \ forbidden_species else [] self.icsd_bv_data = {get_el_sp(specie): data for specie, data in ICSD_BV_DATA.items() if specie not in forbidden_species} \ if len(forbidden_species) > 0 else ICSD_BV_DATA def _calc_site_probabilities(self, site, nn): el = site.specie.symbol bv_sum = calculate_bv_sum(site, nn, scale_factor=self.dist_scale_factor) prob = {} for sp, data in self.icsd_bv_data.items(): if sp.symbol == el and sp.oxi_state != 0 and data["std"] > 0: u = data["mean"] sigma = data["std"] # Calculate posterior probability. Note that constant # factors are ignored. They have no effect on the results. prob[sp.oxi_state] = exp(-(bv_sum - u) ** 2 / 2 / (sigma ** 2)) \ / sigma * PRIOR_PROB[sp] # Normalize the probabilities try: prob = {k: v / sum(prob.values()) for k, v in prob.items()} except ZeroDivisionError: prob = {k: 0.0 for k in prob} return prob def _calc_site_probabilities_unordered(self, site, nn): bv_sum = calculate_bv_sum_unordered( site, nn, scale_factor=self.dist_scale_factor) prob = {} for specie, occu in site.species.items(): el = specie.symbol prob[el] = {} for sp, data in self.icsd_bv_data.items(): if sp.symbol == el and sp.oxi_state != 0 and data["std"] > 0: u = data["mean"] sigma = data["std"] # Calculate posterior probability. Note that constant # factors are ignored. They have no effect on the results. prob[el][sp.oxi_state] = exp(-(bv_sum - u) ** 2 / 2 / (sigma ** 2)) \ / sigma * PRIOR_PROB[sp] # Normalize the probabilities try: prob[el] = {k: v / sum(prob[el].values()) for k, v in prob[el].items()} except ZeroDivisionError: prob[el] = {k: 0.0 for k in prob[el]} return prob
[docs] def get_valences(self, structure): """ Returns a list of valences for the structure. This currently works only for ordered structures only. Args: structure: Structure to analyze Returns: A list of valences for each site in the structure (for an ordered structure), e.g., [1, 1, -2] or a list of lists with the valences for each fractional element of each site in the structure (for an unordered structure), e.g., [[2, 4], [3], [-2], [-2], [-2]] Raises: A ValueError if the valences cannot be determined. """ els = [Element(el.symbol) for el in structure.composition.elements] if not set(els).issubset(set(BV_PARAMS.keys())): raise ValueError( "Structure contains elements not in set of BV parameters!" ) # Perform symmetry determination and get sites grouped by symmetry. if self.symm_tol: finder = SpacegroupAnalyzer(structure, self.symm_tol) symm_structure = finder.get_symmetrized_structure() equi_sites = symm_structure.equivalent_sites else: equi_sites = [[site] for site in structure] # Sort the equivalent sites by decreasing electronegativity. equi_sites = sorted(equi_sites, key=lambda sites: -sites[0].species .average_electroneg) # Get a list of valences and probabilities for each symmetrically # distinct site. valences = [] all_prob = [] if structure.is_ordered: for sites in equi_sites: test_site = sites[0] nn = structure.get_neighbors(test_site, self.max_radius) prob = self._calc_site_probabilities(test_site, nn) all_prob.append(prob) val = list(prob.keys()) # Sort valences in order of decreasing probability. val = sorted(val, key=lambda v: -prob[v]) # Retain probabilities that are at least 1/100 of highest prob. valences.append( list(filter(lambda v: prob[v] > 0.01 * prob[val[0]], val))) else: full_all_prob = [] for sites in equi_sites: test_site = sites[0] nn = structure.get_neighbors(test_site, self.max_radius) prob = self._calc_site_probabilities_unordered(test_site, nn) all_prob.append(prob) full_all_prob.extend(prob.values()) vals = [] for (elsp, occ) in get_z_ordered_elmap( test_site.species): val = list(prob[elsp.symbol].keys()) # Sort valences in order of decreasing probability. val = sorted(val, key=lambda v: -prob[elsp.symbol][v]) # Retain probabilities that are at least 1/100 of highest # prob. vals.append( list(filter( lambda v: prob[elsp.symbol][v] > 0.001 * prob[ elsp.symbol][val[0]], val))) valences.append(vals) # make variables needed for recursion if structure.is_ordered: nsites = np.array([len(i) for i in equi_sites]) vmin = np.array([min(i) for i in valences]) vmax = np.array([max(i) for i in valences]) self._n = 0 self._best_score = 0 self._best_vset = None def evaluate_assignment(v_set): el_oxi = collections.defaultdict(list) for i, sites in enumerate(equi_sites): el_oxi[sites[0].specie.symbol].append(v_set[i]) max_diff = max([max(v) - min(v) for v in el_oxi.values()]) if max_diff > 1: return score = functools.reduce( operator.mul, [all_prob[i][v] for i, v in enumerate(v_set)]) if score > self._best_score: self._best_vset = v_set self._best_score = score def _recurse(assigned=[]): # recurses to find permutations of valences based on whether a # charge balanced assignment can still be found if self._n > self.max_permutations: return i = len(assigned) highest = vmax.copy() highest[:i] = assigned highest *= nsites highest = np.sum(highest) lowest = vmin.copy() lowest[:i] = assigned lowest *= nsites lowest = np.sum(lowest) if highest < 0 or lowest > 0: self._n += 1 return if i == len(valences): evaluate_assignment(assigned) self._n += 1 return else: for v in valences[i]: new_assigned = list(assigned) _recurse(new_assigned + [v]) else: nsites = np.array([len(i) for i in equi_sites]) tmp = [] attrib = [] for insite, nsite in enumerate(nsites): for val in valences[insite]: tmp.append(nsite) attrib.append(insite) new_nsites = np.array(tmp) fractions = [] elements = [] for sites in equi_sites: for sp, occu in get_z_ordered_elmap(sites[0].species): elements.append(sp.symbol) fractions.append(occu) fractions = np.array(fractions, np.float) new_valences = [] for vals in valences: for val in vals: new_valences.append(val) vmin = np.array([min(i) for i in new_valences], np.float) vmax = np.array([max(i) for i in new_valences], np.float) self._n = 0 self._best_score = 0 self._best_vset = None def evaluate_assignment(v_set): el_oxi = collections.defaultdict(list) jj = 0 for i, sites in enumerate(equi_sites): for specie, occu in get_z_ordered_elmap( sites[0].species): el_oxi[specie.symbol].append(v_set[jj]) jj += 1 max_diff = max([max(v) - min(v) for v in el_oxi.values()]) if max_diff > 2: return score = functools.reduce(operator.mul, [all_prob[attrib[iv]][elements[iv]][vv] for iv, vv in enumerate(v_set)]) if score > self._best_score: self._best_vset = v_set self._best_score = score def _recurse(assigned=[]): # recurses to find permutations of valences based on whether a # charge balanced assignment can still be found if self._n > self.max_permutations: return i = len(assigned) highest = vmax.copy() highest[:i] = assigned highest *= new_nsites highest *= fractions highest = np.sum(highest) lowest = vmin.copy() lowest[:i] = assigned lowest *= new_nsites lowest *= fractions lowest = np.sum(lowest) if (highest < -self.charge_neutrality_tolerance or lowest > self.charge_neutrality_tolerance): self._n += 1 return if i == len(new_valences): evaluate_assignment(assigned) self._n += 1 return else: for v in new_valences[i]: new_assigned = list(assigned) _recurse(new_assigned + [v]) _recurse() if self._best_vset: if structure.is_ordered: assigned = {} for val, sites in zip(self._best_vset, equi_sites): for site in sites: assigned[site] = val return [int(assigned[site]) for site in structure] else: assigned = {} new_best_vset = [] for ii in range(len(equi_sites)): new_best_vset.append(list()) for ival, val in enumerate(self._best_vset): new_best_vset[attrib[ival]].append(val) for val, sites in zip(new_best_vset, equi_sites): for site in sites: assigned[site] = val return [[int(frac_site) for frac_site in assigned[site]] for site in structure] else: raise ValueError("Valences cannot be assigned!")
[docs] def get_oxi_state_decorated_structure(self, structure): """ Get an oxidation state decorated structure. This currently works only for ordered structures only. Args: structure: Structure to analyze Returns: A modified structure that is oxidation state decorated. Raises: ValueError if the valences cannot be determined. """ s = structure.copy() if s.is_ordered: valences = self.get_valences(s) s.add_oxidation_state_by_site(valences) else: valences = self.get_valences(s) s = add_oxidation_state_by_site_fraction(s, valences) return s
[docs]def get_z_ordered_elmap(comp): """ Arbitrary ordered elmap on the elements/species of a composition of a given site in an unordered structure. Returns a list of tuples ( element_or_specie: occupation) in the arbitrary order. The arbitrary order is based on the Z of the element and the smallest fractional occupations first. Example : {"Ni3+": 0.2, "Ni4+": 0.2, "Cr3+": 0.15, "Zn2+": 0.34, "Cr4+": 0.11} will yield the species in the following order : Cr4+, Cr3+, Ni3+, Ni4+, Zn2+ ... or Cr4+, Cr3+, Ni4+, Ni3+, Zn2+ """ return sorted([(elsp, comp[elsp]) for elsp in comp.keys()])
[docs]def add_oxidation_state_by_site_fraction(structure, oxidation_states): """ Add oxidation states to a structure by fractional site. Args: oxidation_states (list): List of list of oxidation states for each site fraction for each site. E.g., [[2, 4], [3], [-2], [-2], [-2]] """ try: for i, site in enumerate(structure): new_sp = collections.defaultdict(float) for j, (el, occu) in enumerate(get_z_ordered_elmap(site.species)): specie = Specie(el.symbol, oxidation_states[i][j]) new_sp[specie] += occu structure[i] = new_sp return structure except IndexError: raise ValueError("Oxidation state of all sites must be " "specified in the list.")