Source code for pymatgen.analysis.structure_prediction.substitution_probability

# coding: utf-8
# Copyright (c) Pymatgen Development Team.
# Distributed under the terms of the MIT License.

"""
This module provides classes for representing species substitution
probabilities.
"""

from collections import defaultdict
from operator import mul
from pymatgen.core.periodic_table import Specie, get_el_sp
from monty.design_patterns import cached_class

import functools
import itertools
import json
import logging
import math
import os

__author__ = "Will Richards, Geoffroy Hautier"
__copyright__ = "Copyright 2012, The Materials Project"
__version__ = "1.2"
__maintainer__ = "Will Richards"
__email__ = "wrichard@mit.edu"
__date__ = "Aug 31, 2012"


[docs]@cached_class class SubstitutionProbability: """ This class finds substitution probabilities given lists of atoms to substitute. The inputs make more sense if you look through the from_defaults static method. The substitution prediction algorithm is presented in: Hautier, G., Fischer, C., Ehrlacher, V., Jain, A., and Ceder, G. (2011) Data Mined Ionic Substitutions for the Discovery of New Compounds. Inorganic Chemistry, 50(2), 656-663. doi:10.1021/ic102031h """ def __init__(self, lambda_table=None, alpha=-5): """ Args: lambda_table: json table of the weight functions lambda if None, will use the default lambda.json table alpha: weight function for never observed substitutions """ if lambda_table is not None: self._lambda_table = lambda_table else: module_dir = os.path.dirname(__file__) json_file = os.path.join(module_dir, 'data', 'lambda.json') with open(json_file) as f: self._lambda_table = json.load(f) # build map of specie pairs to lambdas self.alpha = alpha self._l = {} self.species = set() for row in self._lambda_table: if 'D1+' not in row: s1 = Specie.from_string(row[0]) s2 = Specie.from_string(row[1]) self.species.add(s1) self.species.add(s2) self._l[frozenset([s1, s2])] = float(row[2]) # create Z and px self.Z = 0 self._px = defaultdict(float) for s1, s2 in itertools.product(self.species, repeat=2): value = math.exp(self.get_lambda(s1, s2)) self._px[s1] += value / 2 self._px[s2] += value / 2 self.Z += value def get_lambda(self, s1, s2): """ Args: s1 (Structure): 1st Structure s2 (Structure): 2nd Structure Returns: Lambda values """ k = frozenset([get_el_sp(s1), get_el_sp(s2)]) return self._l.get(k, self.alpha) def get_px(self, sp): """ Args: sp (Specie/Element): Species Returns: Probability """ return self._px[get_el_sp(sp)] def prob(self, s1, s2): """ Gets the probability of 2 species substitution. Not used by the structure predictor. Returns: Probability of s1 and s2 substitution. """ return math.exp(self.get_lambda(s1, s2)) / self.Z def cond_prob(self, s1, s2): """ Conditional probability of substituting s1 for s2. Args: s1: The *variable* specie s2: The *fixed* specie Returns: Conditional probability used by structure predictor. """ return math.exp(self.get_lambda(s1, s2)) / self.get_px(s2) def pair_corr(self, s1, s2): """ Pair correlation of two species. Returns: The pair correlation of 2 species """ return math.exp(self.get_lambda(s1, s2)) * self.Z / (self.get_px(s1) * self.get_px(s2)) def cond_prob_list(self, l1, l2): """ Find the probabilities of 2 lists. These should include ALL species. This is the probability conditional on l2 Args: l1, l2: lists of species Returns: The conditional probability (assuming these species are in l2) """ assert len(l1) == len(l2) p = 1 for s1, s2 in zip(l1, l2): p *= self.cond_prob(s1, s2) return p def as_dict(self): """ Returns: MSONAble dict """ return {"name": self.__class__.__name__, "version": __version__, "init_args": {"lambda_table": self._lambda_table, "alpha": self._alpha}, "@module": self.__class__.__module__, "@class": self.__class__.__name__} @classmethod def from_dict(cls, d): """ Args: d(dict): Dict representation Returns: Class """ return cls(**d['init_args'])
[docs]class SubstitutionPredictor: """ Predicts likely substitutions either to or from a given composition or species list using the SubstitutionProbability """ def __init__(self, lambda_table=None, alpha=-5, threshold=1e-3): """ Args: lambda_table (): Input lambda table. alpha (float): weight function for never observed substitutions threshold (float): Threshold to use to identify high probability structures. """ self.p = SubstitutionProbability(lambda_table, alpha) self.threshold = threshold
[docs] def list_prediction(self, species, to_this_composition=True): """ Args: species: list of species to_this_composition: If true, substitutions with this as a final composition will be found. If false, substitutions with this as a starting composition will be found (these are slightly different) Returns: List of predictions in the form of dictionaries. If to_this_composition is true, the values of the dictionary will be from the list species. If false, the keys will be from that list. """ for sp in species: if get_el_sp(sp) not in self.p.species: raise ValueError("the species {} is not allowed for the" "probability model you are using".format(sp)) max_probabilities = [] for s1 in species: if to_this_composition: max_p = max([self.p.cond_prob(s2, s1) for s2 in self.p.species]) else: max_p = max([self.p.cond_prob(s1, s2) for s2 in self.p.species]) max_probabilities.append(max_p) output = [] def _recurse(output_prob, output_species): best_case_prob = list(max_probabilities) best_case_prob[:len(output_prob)] = output_prob if functools.reduce(mul, best_case_prob) > self.threshold: if len(output_species) == len(species): odict = { 'probability': functools.reduce(mul, best_case_prob)} if to_this_composition: odict['substitutions'] = dict( zip(output_species, species)) else: odict['substitutions'] = dict( zip(species, output_species)) if len(output_species) == len(set(output_species)): output.append(odict) return for sp in self.p.species: i = len(output_prob) if to_this_composition: prob = self.p.cond_prob(sp, species[i]) else: prob = self.p.cond_prob(species[i], sp) _recurse(output_prob + [prob], output_species + [sp]) _recurse([], []) logging.info('{} substitutions found'.format(len(output))) return output
[docs] def composition_prediction(self, composition, to_this_composition=True): """ Returns charged balanced substitutions from a starting or ending composition. Args: composition: starting or ending composition to_this_composition: If true, substitutions with this as a final composition will be found. If false, substitutions with this as a starting composition will be found (these are slightly different) Returns: List of predictions in the form of dictionaries. If to_this_composition is true, the values of the dictionary will be from the list species. If false, the keys will be from that list. """ preds = self.list_prediction(list(composition.keys()), to_this_composition) output = [] for p in preds: if to_this_composition: subs = {v: k for k, v in p['substitutions'].items()} else: subs = p['substitutions'] charge = 0 for k, v in composition.items(): charge += subs[k].oxi_state * v if abs(charge) < 1e-8: output.append(p) logging.info('{} charge balanced substitutions found' .format(len(output))) return output