Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

175

176

177

178

179

180

181

182

183

184

185

186

187

188

189

190

191

192

193

194

195

196

197

198

199

200

201

202

203

204

205

206

207

208

209

210

211

212

213

214

215

216

217

218

219

220

221

222

223

224

225

226

227

228

229

230

231

232

233

234

235

236

237

238

239

240

241

242

243

244

245

246

247

248

249

250

251

252

253

254

255

256

257

258

259

260

261

262

# coding: utf-8 

# Copyright (c) Pymatgen Development Team. 

# Distributed under the terms of the MIT License. 

 

from __future__ import division, unicode_literals 

 

""" 

This module provides classes for representing species substitution 

probabilities. 

""" 

 

from six.moves import zip 

 

__author__ = "Will Richards, Geoffroy Hautier" 

__copyright__ = "Copyright 2012, The Materials Project" 

__version__ = "1.2" 

__maintainer__ = "Will Richards" 

__email__ = "wrichard@mit.edu" 

__date__ = "Aug 31, 2012" 

 

from collections import defaultdict 

from operator import mul 

from pymatgen import get_el_sp 

from pymatgen.core.periodic_table import Specie 

from monty.design_patterns import cached_class 

 

import itertools 

import json 

import logging 

import math 

import os 

 

import six 

 

 

@cached_class 

class SubstitutionProbability(object): 

""" 

This class finds substitution probabilities given lists of atoms 

to substitute. The inputs make more sense if you look through the 

from_defaults static method. 

 

The substitution prediction algorithm is presented in: 

Hautier, G., Fischer, C., Ehrlacher, V., Jain, A., and Ceder, G. (2011) 

Data Mined Ionic Substitutions for the Discovery of New Compounds. 

Inorganic Chemistry, 50(2), 656-663. doi:10.1021/ic102031h 

 

Args: 

lambda_table: 

json table of the weight functions lambda if None, 

will use the default lambda.json table 

alpha: 

weight function for never observed substitutions 

""" 

 

def __init__(self, lambda_table=None, alpha=-5): 

if lambda_table is not None: 

self._lambda_table = lambda_table 

else: 

module_dir = os.path.dirname(__file__) 

json_file = os.path.join(module_dir, 'data', 'lambda.json') 

with open(json_file) as f: 

self._lambda_table = json.load(f) 

 

#build map of specie pairs to lambdas 

self.alpha = alpha 

self._l = {} 

self.species = set() 

for row in self._lambda_table: 

if 'D1+' not in row: 

s1 = Specie.from_string(row[0]) 

s2 = Specie.from_string(row[1]) 

self.species.add(s1) 

self.species.add(s2) 

self._l[frozenset([s1, s2])] = float(row[2]) 

 

#create Z and px 

self.Z = 0 

self._px = defaultdict(float) 

for s1, s2 in itertools.product(self.species, repeat=2): 

value = math.exp(self.get_lambda(s1, s2)) 

self._px[s1] += value / 2 

self._px[s2] += value / 2 

self.Z += value 

 

def get_lambda(self, s1, s2): 

k = frozenset([get_el_sp(s1), 

get_el_sp(s2)]) 

return self._l.get(k, self.alpha) 

 

def get_px(self, sp): 

return self._px[get_el_sp(sp)] 

 

def prob(self, s1, s2): 

""" 

Gets the probability of 2 species substitution. Not used by the 

structure predictor. 

 

Returns: 

Probability of s1 and s2 substitution. 

""" 

return math.exp(self.get_lambda(s1, s2)) / self.Z 

 

def cond_prob(self, s1, s2): 

""" 

Conditional probability of substituting s1 for s2. 

 

Args: 

s1: 

The *variable* specie 

s2: 

The *fixed* specie 

 

Returns: 

Conditional probability used by structure predictor. 

""" 

return math.exp(self.get_lambda(s1, s2)) / self.get_px(s2) 

 

def pair_corr(self, s1, s2): 

""" 

Pair correlation of two species. 

 

Returns: 

The pair correlation of 2 species 

""" 

return math.exp(self.get_lambda(s1, s2)) * \ 

self.Z / (self.get_px(s1) * self.get_px(s2)) 

 

def cond_prob_list(self, l1, l2): 

""" 

Find the probabilities of 2 lists. These should include ALL species. 

This is the probability conditional on l2 

 

Args: 

l1, l2: 

lists of species 

 

Returns: 

The conditional probability (assuming these species are in 

l2) 

""" 

assert len(l1) == len(l2) 

p = 1 

for s1, s2 in zip(l1, l2): 

p *= self.cond_prob(s1, s2) 

return p 

 

def as_dict(self): 

return {"name": self.__class__.__name__, "version": __version__, 

"init_args": {"lambda_table": self._lambda_table, 

"alpha": self._alpha}, 

"@module": self.__class__.__module__, 

"@class": self.__class__.__name__} 

 

@classmethod 

def from_dict(cls, d): 

return cls(**d['init_args']) 

 

 

class SubstitutionPredictor(object): 

""" 

Predicts likely substitutions either to or from a given composition 

or species list using the SubstitutionProbability 

""" 

def __init__(self, lambda_table=None, alpha=-5, threshold=1e-3): 

self.p = SubstitutionProbability(lambda_table, alpha) 

self.threshold = threshold 

 

def list_prediction(self, species, to_this_composition = True): 

""" 

Args: 

species: 

list of species 

to_this_composition: 

If true, substitutions with this as a final composition 

will be found. If false, substitutions with this as a 

starting composition will be found (these are slightly 

different) 

Returns: 

List of predictions in the form of dictionaries. 

If to_this_composition is true, the values of the dictionary 

will be from the list species. If false, the keys will be 

from that list. 

""" 

for sp in species: 

if get_el_sp(sp) not in self.p.species: 

raise ValueError("the species {} is not allowed for the" 

"probability model you are using".format(sp)) 

max_probabilities = [] 

for s1 in species: 

if to_this_composition: 

max_p = max([self.p.cond_prob(s2, s1) for s2 in self.p.species]) 

else: 

max_p = max([self.p.cond_prob(s1, s2) for s2 in self.p.species]) 

max_probabilities.append(max_p) 

 

output = [] 

 

def _recurse(output_prob, output_species): 

best_case_prob = list(max_probabilities) 

best_case_prob[:len(output_prob)] = output_prob 

if six.moves.reduce(mul, best_case_prob) > self.threshold: 

if len(output_species) == len(species): 

odict = { 

'probability': six.moves.reduce(mul, best_case_prob)} 

if to_this_composition: 

odict['substitutions'] = dict( 

zip(output_species, species)) 

else: 

odict['substitutions'] = dict( 

zip(species, output_species)) 

if len(output_species) == len(set(output_species)): 

output.append(odict) 

return 

for sp in self.p.species: 

i = len(output_prob) 

if to_this_composition: 

prob = self.p.cond_prob(sp, species[i]) 

else: 

prob = self.p.cond_prob(species[i], sp) 

_recurse(output_prob + [prob], output_species + [sp]) 

 

_recurse([], []) 

logging.info('{} substitutions found'.format(len(output))) 

return output 

 

def composition_prediction(self, composition, to_this_composition = True): 

""" 

Returns charged balanced substitutions from a starting or ending 

composition. 

 

Args: 

composition: 

starting or ending composition 

to_this_composition: 

If true, substitutions with this as a final composition 

will be found. If false, substitutions with this as a 

starting composition will be found (these are slightly 

different) 

 

Returns: 

List of predictions in the form of dictionaries. 

If to_this_composition is true, the values of the dictionary 

will be from the list species. If false, the keys will be 

from that list. 

""" 

preds = self.list_prediction(list(composition.keys()), 

to_this_composition) 

output = [] 

for p in preds: 

if to_this_composition: 

subs = {v:k for k, v in p['substitutions'].items()} 

else: 

subs = p['substitutions'] 

charge = 0 

for k, v in composition.items(): 

charge += subs[k].oxi_state * v 

if abs(charge) < 1e-8: 

output.append(p) 

logging.info('{} charge balanced substitutions found' 

.format(len(output))) 

return output