"""
This module provides functions to get the dimensionality of a structure.
A number of different algorithms are implemented. These are based on the
following publications:
get_dimensionality_larsen:
- P. M. Larsen, M. Pandey, M. Strange, K. W. Jacobsen. Definition of a
scoring parameter to identify low-dimensional materials components.
Phys. Rev. Materials 3, 034003 (2019).
get_dimensionality_cheon:
- Cheon, G.; Duerloo, K.-A. N.; Sendek, A. D.; Porter, C.; Chen, Y.; Reed,
E. J. Data Mining for New Two- and One-Dimensional Weakly Bonded Solids
and Lattice-Commensurate Heterostructures. Nano Lett. 2017.
get_dimensionality_gorai:
- Gorai, P., Toberer, E. & Stevanovic, V. Computational Identification of
Promising Thermoelectric Materials Among Known Quasi-2D Binary Compounds.
J. Mater. Chem. A 2, 4136 (2016).
"""
import itertools
import copy
import numpy as np
from collections import defaultdict
from networkx.readwrite import json_graph
from pymatgen.analysis.graphs import MoleculeGraph, StructureGraph
from pymatgen.core.lattice import get_integer_index
from pymatgen.core.structure import Structure, Molecule
from pymatgen.core.periodic_table import Specie
from pymatgen.core.surface import SlabGenerator
from pymatgen.analysis.local_env import JmolNN
from pymatgen.analysis.structure_analyzer import get_max_bond_lengths
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
__author__ = "Alex Ganose, Gowoon Cheon, Prashun Gorai"
[docs]def get_dimensionality_larsen(bonded_structure):
"""
Gets the dimensionality of a bonded structure.
The dimensionality of the structure is the highest dimensionality of all
structure components. This method is very robust and can handle
many tricky structures, regardless of structure type or improper connections
due to periodic boundary conditions.
Requires a StructureGraph object as input. This can be generated using one
of the NearNeighbor classes. For example, using the CrystalNN class::
bonded_structure = CrystalNN().get_bonded_structure(structure)
Based on the modified breadth-first-search algorithm described in:
P. M. Larsen, M. Pandey, M. Strange, K. W. Jacobsen. Definition of a
scoring parameter to identify low-dimensional materials components.
Phys. Rev. Materials 3, 034003 (2019).
Args:
bonded_structure (StructureGraph): A structure with bonds, represented
as a pymatgen structure graph. For example, generated using the
CrystalNN.get_bonded_structure() method.
Returns:
(int): The dimensionality of the structure.
"""
return max([c['dimensionality'] for c in
get_structure_components(bonded_structure)])
[docs]def get_structure_components(bonded_structure, inc_orientation=False,
inc_site_ids=False, inc_molecule_graph=False):
"""
Gets information on the components in a bonded structure.
Correctly determines the dimensionality of all structures, regardless of
structure type or improper connections due to periodic boundary conditions.
Requires a StructureGraph object as input. This can be generated using one
of the NearNeighbor classes. For example, using the CrystalNN class::
bonded_structure = CrystalNN().get_bonded_structure(structure)
Based on the modified breadth-first-search algorithm described in:
P. M. Larsen, M. Pandey, M. Strange, K. W. Jacobsen. Definition of a
scoring parameter to identify low-dimensional materials components.
Phys. Rev. Materials 3, 034003 (2019).
Args:
bonded_structure (StructureGraph): A structure with bonds, represented
as a pymatgen structure graph. For example, generated using the
CrystalNN.get_bonded_structure() method.
inc_orientation (bool, optional): Whether to include the orientation
of the structure component. For surfaces, the miller index is given,
for one-dimensional structures, the direction of the chain is given.
inc_site_ids (bool, optional): Whether to include the site indices
of the sites in the structure component.
inc_molecule_graph (bool, optional): Whether to include MoleculeGraph
objects for zero-dimensional components.
Returns:
(list of dict): Information on the components in a structure as a list
of dictionaries with the keys:
- "structure_graph": A pymatgen StructureGraph object for the
component.
- "dimensionality": The dimensionality of the structure component as an
int.
- "orientation": If inc_orientation is `True`, the orientation of the
component as a tuple. E.g. (1, 1, 1)
- "site_ids": If inc_site_ids is `True`, the site indices of the
sites in the component as a tuple.
- "molecule_graph": If inc_molecule_graph is `True`, the site a
MoleculeGraph object for zero-dimensional components.
"""
import networkx as nx # optional dependency therefore not top level import
comp_graphs = (bonded_structure.graph.subgraph(c) for c in
nx.weakly_connected_components(bonded_structure.graph))
components = []
for graph in comp_graphs:
dimensionality, vertices = calculate_dimensionality_of_site(
bonded_structure, list(graph.nodes())[0], inc_vertices=True)
component = {'dimensionality': dimensionality}
if inc_orientation:
if dimensionality in [1, 2]:
vertices = np.array(vertices)
g = vertices.sum(axis=0) / vertices.shape[0]
# run singular value decomposition
_, _, vh = np.linalg.svd(vertices - g)
# get direction (first column is best fit line,
# 3rd column is unitary norm)
index = 2 if dimensionality == 2 else 0
orientation = get_integer_index(vh[index, :])
else:
orientation = None
component['orientation'] = orientation
if inc_site_ids:
component['site_ids'] = tuple(graph.nodes())
if inc_molecule_graph and dimensionality == 0:
component['molecule_graph'] = zero_d_graph_to_molecule_graph(
bonded_structure, graph)
component_structure = Structure.from_sites(
[bonded_structure.structure[n] for n in sorted(graph.nodes())])
sorted_graph = nx.convert_node_labels_to_integers(
graph, ordering="sorted")
component_graph = StructureGraph(
component_structure,
graph_data=json_graph.adjacency_data(sorted_graph))
component['structure_graph'] = component_graph
components.append(component)
return components
[docs]def calculate_dimensionality_of_site(bonded_structure, site_index,
inc_vertices=False):
"""
Calculates the dimensionality of the component containing the given site.
Implements directly the modified breadth-first-search algorithm described in
Algorithm 1 of:
P. M. Larsen, M. Pandey, M. Strange, K. W. Jacobsen. Definition of a
scoring parameter to identify low-dimensional materials components.
Phys. Rev. Materials 3, 034003 (2019).
Args:
bonded_structure (StructureGraph): A structure with bonds, represented
as a pymatgen structure graph. For example, generated using the
CrystalNN.get_bonded_structure() method.
site_index (int): The index of a site in the component of interest.
inc_vertices (bool, optional): Whether to return the vertices (site
images) of the component.
Returns:
(int or tuple): If inc_vertices is False, the dimensionality of the
component will be returned as an int. If inc_vertices is true, the
function will return a tuple of (dimensionality, vertices), where
vertices is a list of tuples. E.g. [(0, 0, 0), (1, 1, 1)].
"""
def neighbours(comp_index):
return [(s.index, s.jimage) for s
in bonded_structure.get_connected_sites(comp_index)]
def rank(vertices):
if len(vertices) == 0:
return -1
elif len(vertices) == 1:
return 0
else:
vertices = np.array(list(vertices))
return np.linalg.matrix_rank(vertices[1:] - vertices[0])
def rank_increase(seen, candidate):
rank0 = len(seen) - 1
rank1 = rank(seen.union({candidate}))
return rank1 > rank0
connected_sites = {i: neighbours(i) for i in
range(bonded_structure.structure.num_sites)}
seen_vertices = set()
seen_comp_vertices = defaultdict(set)
queue = [(site_index, (0, 0, 0))]
while len(queue) > 0:
comp_i, image_i = queue.pop(0)
if (comp_i, image_i) in seen_vertices:
continue
seen_vertices.add((comp_i, image_i))
if not rank_increase(seen_comp_vertices[comp_i], image_i):
continue
seen_comp_vertices[comp_i].add(image_i)
for comp_j, image_j in connected_sites[comp_i]:
image_j = tuple(np.add(image_j, image_i))
if (comp_j, image_j) in seen_vertices:
continue
if rank_increase(seen_comp_vertices[comp_j], image_j):
queue.append((comp_j, image_j))
if inc_vertices:
return (rank(seen_comp_vertices[site_index]),
list(seen_comp_vertices[site_index]))
else:
return rank(seen_comp_vertices[site_index])
[docs]def zero_d_graph_to_molecule_graph(bonded_structure, graph):
"""
Converts a zero-dimensional networkx Graph object into a MoleculeGraph.
Implements a similar breadth-first search to that in
calculate_dimensionality_of_site().
Args:
bonded_structure (StructureGraph): A structure with bonds, represented
as a pymatgen structure graph. For example, generated using the
CrystalNN.get_bonded_structure() method.
graph (nx.Graph): A networkx `Graph` object for the component of
interest.
Returns:
(MoleculeGraph): A MoleculeGraph object of the component.
"""
import networkx as nx
seen_indices = []
sites = []
start_index = list(graph.nodes())[0]
queue = [(start_index, (0, 0, 0),
bonded_structure.structure[start_index])]
while len(queue) > 0:
comp_i, image_i, site_i = queue.pop(0)
if comp_i in [x[0] for x in seen_indices]:
raise ValueError("Graph component is not 0D")
seen_indices.append((comp_i, image_i))
sites.append(site_i)
for site_j in bonded_structure.get_connected_sites(
comp_i, jimage=image_i):
if ((site_j.index, site_j.jimage) not in seen_indices and
(site_j.index, site_j.jimage, site_j.site) not in queue):
queue.append((site_j.index, site_j.jimage, site_j.site))
# sort the list of indices and the graph by index to make consistent
indices_ordering = np.argsort([x[0] for x in seen_indices])
sorted_sites = np.array(sites, dtype=object)[indices_ordering]
sorted_graph = nx.convert_node_labels_to_integers(graph, ordering="sorted")
mol = Molecule([s.specie for s in sorted_sites],
[s.coords for s in sorted_sites])
mol_graph = MoleculeGraph.with_edges(mol, nx.Graph(sorted_graph).edges())
return mol_graph
[docs]def get_dimensionality_cheon(structure_raw, tolerance=0.45,
ldict=JmolNN().el_radius, standardize=True, larger_cell=False):
"""
Algorithm for finding the dimensions of connected subunits in a structure.
This method finds the dimensionality of the material even when the material
is not layered along low-index planes, or does not have flat
layers/molecular wires.
Author: "Gowoon Cheon"
Email: "gcheon@stanford.edu"
See details at :
Cheon, G.; Duerloo, K.-A. N.; Sendek, A. D.; Porter, C.; Chen, Y.; Reed,
E. J. Data Mining for New Two- and One-Dimensional Weakly Bonded Solids and
Lattice-Commensurate Heterostructures. Nano Lett. 2017.
Args:
structure_raw (Structure): A pymatgen Structure object.
tolerance (float): length in angstroms used in finding bonded atoms.
Two atoms are considered bonded if (radius of atom 1) + (radius of
atom 2) + (tolerance) < (distance between atoms 1 and 2). Default
value = 0.45, the value used by JMol and Cheon et al.
ldict (dict): dictionary of bond lengths used in finding bonded atoms.
Values from JMol are used as default
standardize: works with conventional standard structures if True. It is
recommended to keep this as True.
larger_cell: tests with 3x3x3 supercell instead of 2x2x2. Testing with
2x2x2 supercell is faster but misclssifies rare interpenetrated 3D
structures. Testing with a larger cell circumvents this problem
Returns:
(str): dimension of the largest cluster as a string. If there are ions
or molecules it returns 'intercalated ion/molecule'
"""
if standardize:
structure = SpacegroupAnalyzer(structure_raw).get_conventional_standard_structure()
else:
structure = structure_raw
structure_save = copy.copy(structure_raw)
connected_list1 = find_connected_atoms(structure, tolerance=tolerance, ldict=ldict)
max1, min1, clusters1 = find_clusters(structure, connected_list1)
if larger_cell:
structure.make_supercell([[3, 0, 0], [0, 3, 0], [0, 0, 3]])
connected_list3 = find_connected_atoms(structure, tolerance=tolerance, ldict=ldict)
max3, min3, clusters3 = find_clusters(structure, connected_list3)
if min3 == min1:
if max3 == max1:
dim = '0D'
else:
dim = 'intercalated molecule'
else:
dim = np.log2(float(max3) / max1) / np.log2(3)
if dim == int(dim):
dim = str(int(dim)) + 'D'
else:
return
else:
structure.make_supercell([[2, 0, 0], [0, 2, 0], [0, 0, 2]])
connected_list2 = find_connected_atoms(structure, tolerance=tolerance, ldict=ldict)
max2, min2, clusters2 = find_clusters(structure, connected_list2)
if min2 == 1:
dim = 'intercalated ion'
elif min2 == min1:
if max2 == max1:
dim = '0D'
else:
dim = 'intercalated molecule'
else:
dim = np.log2(float(max2) / max1)
if dim == int(dim):
dim = str(int(dim)) + 'D'
else:
structure = copy.copy(structure_save)
structure.make_supercell([[3, 0, 0], [0, 3, 0], [0, 0, 3]])
connected_list3 = find_connected_atoms(structure, tolerance=tolerance, ldict=ldict)
max3, min3, clusters3 = find_clusters(structure, connected_list3)
if min3 == min2:
if max3 == max2:
dim = '0D'
else:
dim = 'intercalated molecule'
else:
dim = np.log2(float(max3) / max1) / np.log2(3)
if dim == int(dim):
dim = str(int(dim)) + 'D'
else:
return
return dim
[docs]def find_connected_atoms(struct, tolerance=0.45, ldict=JmolNN().el_radius):
"""
Finds bonded atoms and returns a adjacency matrix of bonded atoms.
Author: "Gowoon Cheon"
Email: "gcheon@stanford.edu"
Args:
struct (Structure): Input structure
tolerance: length in angstroms used in finding bonded atoms. Two atoms
are considered bonded if (radius of atom 1) + (radius of atom 2) +
(tolerance) < (distance between atoms 1 and 2). Default
value = 0.45, the value used by JMol and Cheon et al.
ldict: dictionary of bond lengths used in finding bonded atoms. Values
from JMol are used as default
Returns:
(np.ndarray): A numpy array of shape (number of atoms, number of atoms);
If any image of atom j is bonded to atom i with periodic boundary
conditions, the matrix element [atom i, atom j] is 1.
"""
n_atoms = len(struct.species)
fc = np.array(struct.frac_coords)
fc_copy = np.repeat(fc[:, :, np.newaxis], 27, axis=2)
neighbors = np.array(list(itertools.product([0, 1, -1], [0, 1, -1], [0, 1, -1]))).T
neighbors = np.repeat(neighbors[np.newaxis, :, :], 1, axis=0)
fc_diff = fc_copy - neighbors
species = list(map(str, struct.species))
# in case of charged species
for i, item in enumerate(species):
if item not in ldict.keys():
species[i] = str(Specie.from_string(item).element)
latmat = struct.lattice.matrix
connected_matrix = np.zeros((n_atoms, n_atoms))
for i in range(n_atoms):
for j in range(i + 1, n_atoms):
max_bond_length = ldict[species[i]] + ldict[species[j]] + tolerance
frac_diff = fc_diff[j] - fc_copy[i]
distance_ij = np.dot(latmat.T, frac_diff)
# print(np.linalg.norm(distance_ij,axis=0))
if sum(np.linalg.norm(distance_ij, axis=0) < max_bond_length) > 0:
connected_matrix[i, j] = 1
connected_matrix[j, i] = 1
return connected_matrix
[docs]def find_clusters(struct, connected_matrix):
"""
Finds bonded clusters of atoms in the structure with periodic boundary
conditions.
If there are atoms that are not bonded to anything, returns [0,1,0]. (For
faster computation time)
Author: "Gowoon Cheon"
Email: "gcheon@stanford.edu"
Args:
struct (Structure): Input structure
connected_matrix: Must be made from the same structure with
find_connected_atoms() function.
Returns:
max_cluster: the size of the largest cluster in the crystal structure
min_cluster: the size of the smallest cluster in the crystal structure
clusters: list of bonded clusters found here, clusters are formatted as
sets of indices of atoms
"""
n_atoms = len(struct.species)
if n_atoms == 0:
return [0, 0, 0]
if 0 in np.sum(connected_matrix, axis=0):
return [0, 1, 0]
cluster_sizes = []
clusters = []
visited = [False for item in range(n_atoms)]
connected_matrix += np.eye(len(connected_matrix))
def visit(atom, atom_cluster):
visited[atom] = True
new_cluster = set(np.where(connected_matrix[atom] != 0)[0]).union(atom_cluster)
atom_cluster = new_cluster
for new_atom in atom_cluster:
if not visited[new_atom]:
visited[new_atom] = True
atom_cluster = visit(new_atom, atom_cluster)
return atom_cluster
for i in range(n_atoms):
if not visited[i]:
atom_cluster = set()
cluster = visit(i, atom_cluster)
clusters.append(cluster)
cluster_sizes.append(len(cluster))
max_cluster = max(cluster_sizes)
min_cluster = min(cluster_sizes)
return [max_cluster, min_cluster, clusters]
[docs]def get_dimensionality_gorai(structure, max_hkl=2, el_radius_updates=None,
min_slab_size=5, min_vacuum_size=5,
standardize=True, bonds=None):
"""
This method returns whether a structure is 3D, 2D (layered), or 1D (linear
chains or molecules) according to the algorithm published in Gorai, P.,
Toberer, E. & Stevanovic, V. Computational Identification of Promising
Thermoelectric Materials Among Known Quasi-2D Binary Compounds. J. Mater.
Chem. A 2, 4136 (2016).
Note that a 1D structure detection might indicate problems in the bonding
algorithm, particularly for ionic crystals (e.g., NaCl)
Users can change the behavior of bonds detection by passing either
el_radius_updates to update atomic radii for auto-detection of max bond
distances, or bonds to explicitly specify max bond distances for atom pairs.
Note that if you pass both, el_radius_updates are ignored.
Args:
structure: (Structure) structure to analyze dimensionality for
max_hkl: (int) max index of planes to look for layers
el_radius_updates: (dict) symbol->float to update atomic radii
min_slab_size: (float) internal surface construction parameter
min_vacuum_size: (float) internal surface construction parameter
standardize (bool): whether to standardize the structure before
analysis. Set to False only if you already have the structure in a
convention where layers / chains will be along low <hkl> indexes.
bonds ({(specie1, specie2): max_bond_dist}: bonds are
specified as a dict of tuples: float of specie1, specie2
and the max bonding distance. For example, PO4 groups may be
defined as {("P", "O"): 3}.
Returns: (int) the dimensionality of the structure - 1 (molecules/chains),
2 (layered), or 3 (3D)
"""
if standardize:
structure = SpacegroupAnalyzer(structure). \
get_conventional_standard_structure()
if not bonds:
bonds = get_max_bond_lengths(structure, el_radius_updates)
num_surfaces = 0
for h in range(max_hkl):
for k in range(max_hkl):
for l in range(max_hkl):
if max([h, k, l]) > 0 and num_surfaces < 2:
sg = SlabGenerator(structure, (h, k, l),
min_slab_size=min_slab_size,
min_vacuum_size=min_vacuum_size)
slabs = sg.get_slabs(bonds)
for _ in slabs:
num_surfaces += 1
return 3 - min(num_surfaces, 2)