Source code for pymatgen.analysis.graphs

# coding: utf-8
# Copyright (c) Pymatgen Development Team.
# Distributed under the terms of the MIT License.

"""
Module for graph representations of crystals.
"""

import warnings
import subprocess
import numpy as np
import os.path
import copy
from itertools import combinations

from pymatgen.core import Structure, Lattice, PeriodicSite, Molecule
from pymatgen.core.structure import FunctionalGroups
from pymatgen.util.coord import lattice_points_in_supercell
from pymatgen.vis.structure_vtk import EL_COLORS

from monty.json import MSONable
from monty.os.path import which
from operator import itemgetter
from collections import namedtuple, defaultdict
from scipy.spatial import KDTree
from scipy.stats import describe

import networkx as nx
import networkx.algorithms.isomorphism as iso
from networkx.readwrite import json_graph
from networkx.drawing.nx_agraph import write_dot

try:
    import igraph
    IGRAPH_AVAILABLE = True
except ImportError:
    IGRAPH_AVAILABLE = False

import logging

logger = logging.getLogger(__name__)
logger.setLevel(logging.INFO)

__author__ = "Matthew Horton, Evan Spotte-Smith, Samuel Blau"
__version__ = "0.1"
__maintainer__ = "Matthew Horton"
__email__ = "mkhorton@lbl.gov"
__status__ = "Production"
__date__ = "August 2017"

ConnectedSite = namedtuple('ConnectedSite', 'site, jimage, index, weight, dist')


def _compare(g1, g2, i1, i2):
    """
    Helper function called by isomorphic to ensure comparison of node identities.
    """
    return g1.vs[i1]['species'] == g2.vs[i2]['species']


def _igraph_from_nxgraph(graph):
    """
    Helper function that converts a networkx graph object into an igraph graph object.
    """
    nodes = graph.nodes(data=True)
    new_igraph = igraph.Graph()
    for node in nodes:
        new_igraph.add_vertex(name=str(node[0]), species=node[1]["specie"], coords=node[1]["coords"])
    new_igraph.add_edges([(str(edge[0]), str(edge[1])) for edge in graph.edges()])
    return new_igraph


def _isomorphic(frag1, frag2):
    """
    Internal function to check if two graph objects are isomorphic, using igraph if
    if is available and networkx if it is not.
    """
    f1_nodes = frag1.nodes(data=True)
    f2_nodes = frag2.nodes(data=True)
    if len(f1_nodes) != len(f2_nodes):
        return False
    f2_edges = frag2.edges()
    if len(f2_edges) != len(f2_edges):
        return False
    f1_comp_dict = {}
    f2_comp_dict = {}
    for node in f1_nodes:
        if node[1]["specie"] not in f1_comp_dict:
            f1_comp_dict[node[1]["specie"]] = 1
        else:
            f1_comp_dict[node[1]["specie"]] += 1
    for node in f2_nodes:
        if node[1]["specie"] not in f2_comp_dict:
            f2_comp_dict[node[1]["specie"]] = 1
        else:
            f2_comp_dict[node[1]["specie"]] += 1
    if f1_comp_dict != f2_comp_dict:
        return False
    if IGRAPH_AVAILABLE:
        ifrag1 = _igraph_from_nxgraph(frag1)
        ifrag2 = _igraph_from_nxgraph(frag2)
        return ifrag1.isomorphic_vf2(ifrag2, node_compat_fn=_compare)
    else:
        nm = iso.categorical_node_match("specie", "ERROR")
        return nx.is_isomorphic(frag1.to_undirected(), frag2.to_undirected(), node_match=nm)


[docs]class StructureGraph(MSONable): """ This is a class for annotating a Structure with bond information, stored in the form of a graph. A "bond" does not necessarily have to be a chemical bond, but can store any kind of information that connects two Sites. """ def __init__(self, structure, graph_data=None): """ If constructing this class manually, use the `with_empty_graph` method or `with_local_env_strategy` method (using an algorithm provided by the `local_env` module, such as O'Keeffe). This class that contains connection information: relationships between sites represented by a Graph structure, and an associated structure object. This class uses the NetworkX package to store and operate on the graph itself, but contains a lot of helper methods to make associating a graph with a given crystallographic structure easier. Use cases for this include storing bonding information, NMR J-couplings, Heisenberg exchange parameters, etc. For periodic graphs, class stores information on the graph edges of what lattice image the edge belongs to. :param structure: a Structure object :param graph_data: dict containing graph information in dict format (not intended to be constructed manually, see as_dict method for format) """ if isinstance(structure, StructureGraph): # just make a copy from input graph_data = structure.as_dict()['graphs'] self.structure = structure self.graph = nx.readwrite.json_graph.adjacency_graph(graph_data) # tidy up edge attr dicts, reading to/from json duplicates # information for u, v, k, d in self.graph.edges(keys=True, data=True): if 'id' in d: del d['id'] if 'key' in d: del d['key'] # ensure images are tuples (conversion to lists happens # when serializing back from json), it's important images # are hashable/immutable if 'to_jimage' in d: d['to_jimage'] = tuple(d['to_jimage']) if 'from_jimage' in d: d['from_jimage'] = tuple(d['from_jimage'])
[docs] @classmethod def with_empty_graph(cls, structure, name="bonds", edge_weight_name=None, edge_weight_units=None): """ Constructor for StructureGraph, returns a StructureGraph object with an empty graph (no edges, only nodes defined that correspond to Sites in Structure). :param structure (Structure): :param name (str): name of graph, e.g. "bonds" :param edge_weight_name (str): name of edge weights, e.g. "bond_length" or "exchange_constant" :param edge_weight_units (str): name of edge weight units e.g. "Å" or "eV" :return (StructureGraph): """ if edge_weight_name and (edge_weight_units is None): raise ValueError("Please specify units associated " "with your edge weights. Can be " "empty string if arbitrary or " "dimensionless.") # construct graph with one node per site # graph attributes don't change behavior of graph, # they're just for book-keeping graph = nx.MultiDiGraph(edge_weight_name=edge_weight_name, edge_weight_units=edge_weight_units, name=name) graph.add_nodes_from(range(len(structure))) graph_data = json_graph.adjacency_data(graph) return cls(structure, graph_data=graph_data)
[docs] @staticmethod def with_edges(structure, edges): """ Constructor for MoleculeGraph, using pre-existing or pre-defined edges with optional edge parameters. :param molecule: Molecule object :param edges: dict representing the bonds of the functional group (format: {(from_index, to_index, from_image, to_image): props}, where props is a dictionary of properties, including weight. Props should be None if no additional properties are to be specified. :return: sg, a StructureGraph """ sg = StructureGraph.with_empty_graph(structure, name="bonds", edge_weight_name="weight", edge_weight_units="") for edge, props in edges.items(): try: from_index = edge[0] to_index = edge[1] from_image = edge[2] to_image = edge[3] except TypeError: raise ValueError("Edges must be given as (from_index, to_index," " from_image, to_image) tuples") if props is not None: if "weight" in props.keys(): weight = props["weight"] del props["weight"] else: weight = None if len(props.items()) == 0: props = None else: weight = None nodes = sg.graph.nodes if not (from_index in nodes and to_index in nodes): raise ValueError("Edges cannot be added if nodes are not" " present in the graph. Please check your" " indices.") sg.add_edge(from_index, to_index, from_jimage=from_image, to_jimage=to_image, weight=weight, edge_properties=props) sg.set_node_attributes() return sg
[docs] @staticmethod def with_local_env_strategy(structure, strategy, weights=False): """ Constructor for StructureGraph, using a strategy from :Class: `pymatgen.analysis.local_env`. :param structure: Structure object :param strategy: an instance of a :Class: `pymatgen.analysis.local_env.NearNeighbors` object :param weights: if True, use weights from local_env class (consult relevant class for their meaning) :return: """ if not strategy.structures_allowed: raise ValueError("Chosen strategy is not designed for use with structures! " "Please choose another strategy.") sg = StructureGraph.with_empty_graph(structure, name="bonds") for n, neighbors in enumerate(strategy.get_all_nn_info(structure)): for neighbor in neighbors: # local_env will always try to add two edges # for any one bond, one from site u to site v # and another form site v to site u: this is # harmless, so warn_duplicates=False sg.add_edge(from_index=n, from_jimage=(0, 0, 0), to_index=neighbor['site_index'], to_jimage=neighbor['image'], weight=neighbor['weight'] if weights else None, warn_duplicates=False) return sg
@property def name(self): """ :return: Name of graph """ return self.graph.graph['name'] @property def edge_weight_name(self): """ :return: Name of the edge weight property of graph """ return self.graph.graph['edge_weight_name'] @property def edge_weight_unit(self): """ :return: Units of the edge weight property of graph """ return self.graph.graph['edge_weight_units']
[docs] def add_edge(self, from_index, to_index, from_jimage=(0, 0, 0), to_jimage=None, weight=None, warn_duplicates=True, edge_properties=None): """ Add edge to graph. Since physically a 'bond' (or other connection between sites) doesn't have a direction, from_index, from_jimage can be swapped with to_index, to_jimage. However, images will always always be shifted so that from_index < to_index and from_jimage becomes (0, 0, 0). :param from_index: index of site connecting from :param to_index: index of site connecting to :param from_jimage (tuple of ints): lattice vector of periodic image, e.g. (1, 0, 0) for periodic image in +x direction :param to_jimage (tuple of ints): lattice vector of image :param weight (float): e.g. bond length :param warn_duplicates (bool): if True, will warn if trying to add duplicate edges (duplicate edges will not be added in either case) :param edge_properties (dict): any other information to store on graph edges, similar to Structure's site_properties :return: """ # this is not necessary for the class to work, but # just makes it neater if to_index < from_index: to_index, from_index = from_index, to_index to_jimage, from_jimage = from_jimage, to_jimage # constrain all from_jimages to be (0, 0, 0), # initial version of this class worked even if # from_jimage != (0, 0, 0), but making this # assumption simplifies logic later if not np.array_equal(from_jimage, (0, 0, 0)): shift = from_jimage from_jimage = np.subtract(from_jimage, shift) to_jimage = np.subtract(to_jimage, shift) # automatic detection of to_jimage if user doesn't specify # will try and detect all equivalent images and add multiple # edges if appropriate if to_jimage is None: # assume we want the closest site warnings.warn("Please specify to_jimage to be unambiguous, " "trying to automatically detect.") dist, to_jimage = self.structure[from_index] \ .distance_and_image(self.structure[to_index]) if dist == 0: # this will happen when from_index == to_index, # typically in primitive single-atom lattices images = [1, 0, 0], [0, 1, 0], [0, 0, 1] dists = [] for image in images: dists.append(self.structure[from_index] .distance_and_image(self.structure[from_index], jimage=image)[0]) dist = min(dists) equiv_sites = self.structure.get_neighbors_in_shell(self.structure[from_index].coords, dist, dist * 0.01, include_index=True) for site, dist, to_index in equiv_sites: to_jimage = np.subtract(site.frac_coords, self.structure[from_index].frac_coords) to_jimage = np.round(to_jimage).astype(int) self.add_edge(from_index=from_index, from_jimage=(0, 0, 0), to_jimage=to_jimage, to_index=to_index) return # sanitize types from_jimage, to_jimage = tuple(map(int, from_jimage)), tuple(map(int, to_jimage)) from_index, to_index = int(from_index), int(to_index) # check we're not trying to add a duplicate edge # there should only ever be at most one edge # between a given (site, jimage) pair and another # (site, jimage) pair existing_edge_data = self.graph.get_edge_data(from_index, to_index) if existing_edge_data: for key, d in existing_edge_data.items(): if d["to_jimage"] == to_jimage: if warn_duplicates: warnings.warn("Trying to add an edge that already exists from " "site {} to site {} in {}.".format(from_index, to_index, to_jimage)) return # generic container for additional edge properties, # similar to site properties edge_properties = edge_properties or {} if weight: self.graph.add_edge(from_index, to_index, to_jimage=to_jimage, weight=weight, **edge_properties) else: self.graph.add_edge(from_index, to_index, to_jimage=to_jimage, **edge_properties)
[docs] def insert_node(self, i, species, coords, coords_are_cartesian=False, validate_proximity=False, site_properties=None, edges=None): """ A wrapper around Molecule.insert(), which also incorporates the new site into the MoleculeGraph. :param i: Index at which to insert the new site :param species: Species for the new site :param coords: 3x1 array representing coordinates of the new site :param coords_are_cartesian: Whether coordinates are cartesian. Defaults to False. :param validate_proximity: For Molecule.insert(); if True (default False), distance will be checked to ensure that site can be safely added. :param site_properties: Site properties for Molecule :param edges: List of dicts representing edges to be added to the MoleculeGraph. These edges must include the index of the new site i, and all indices used for these edges should reflect the MoleculeGraph AFTER the insertion, NOT before. Each dict should at least have a "to_index" and "from_index" key, and can also have a "weight" and a "properties" key. :return: """ self.structure.insert(i, species, coords, coords_are_cartesian=coords_are_cartesian, validate_proximity=validate_proximity, properties=site_properties) mapping = {} for j in range(len(self.structure) - 1): if j < i: mapping[j] = j else: mapping[j] = j + 1 nx.relabel_nodes(self.graph, mapping, copy=False) self.graph.add_node(i) self.set_node_attributes() if edges is not None: for edge in edges: try: self.add_edge(edge["from_index"], edge["to_index"], from_jimage=(0, 0, 0), to_jimage=edge["to_jimage"], weight=edge.get("weight", None), edge_properties=edge.get("properties", None)) except KeyError: raise RuntimeError("Some edges are invalid.")
[docs] def set_node_attributes(self): """ Gives each node a "specie" and a "coords" attribute, updated with the current species and coordinates. :return: """ species = {} coords = {} properties = {} for node in self.graph.nodes(): species[node] = self.structure[node].specie.symbol coords[node] = self.structure[node].coords properties[node] = self.structure[node].properties nx.set_node_attributes(self.graph, species, "specie") nx.set_node_attributes(self.graph, coords, "coords") nx.set_node_attributes(self.graph, properties, "properties")
[docs] def alter_edge(self, from_index, to_index, to_jimage=None, new_weight=None, new_edge_properties=None): """ Alters either the weight or the edge_properties of an edge in the StructureGraph. :param from_index: int :param to_index: int :param to_jimage: tuple :param new_weight: alter_edge does not require that weight be altered. As such, by default, this is None. If weight is to be changed, it should be a float. :param new_edge_properties: alter_edge does not require that edge_properties be altered. As such, by default, this is None. If any edge properties are to be changed, it should be a dictionary of edge properties to be changed. :return: """ existing_edges = self.graph.get_edge_data(from_index, to_index) # ensure that edge exists before attempting to change it if not existing_edges: raise ValueError("Edge between {} and {} cannot be altered;\ no edge exists between those sites.".format( from_index, to_index )) if to_jimage is None: edge_index = 0 else: for i, properties in existing_edges.items(): if properties["to_jimage"] == to_jimage: edge_index = i if new_weight is not None: self.graph[from_index][to_index][edge_index]['weight'] = new_weight if new_edge_properties is not None: for prop in list(new_edge_properties.keys()): self.graph[from_index][to_index][edge_index][prop] = new_edge_properties[prop]
[docs] def break_edge(self, from_index, to_index, to_jimage=None, allow_reverse=False): """ Remove an edge from the StructureGraph. If no image is given, this method will fail. :param from_index: int :param to_index: int :param to_jimage: tuple :param allow_reverse: If allow_reverse is True, then break_edge will attempt to break both (from_index, to_index) and, failing that, will attempt to break (to_index, from_index). :return: """ # ensure that edge exists before attempting to remove it existing_edges = self.graph.get_edge_data(from_index, to_index) existing_reverse = None if to_jimage is None: raise ValueError("Image must be supplied, to avoid ambiguity.") if existing_edges: for i, properties in existing_edges.items(): if properties["to_jimage"] == to_jimage: edge_index = i self.graph.remove_edge(from_index, to_index, edge_index) else: if allow_reverse: existing_reverse = self.graph.get_edge_data(to_index, from_index) if existing_reverse: for i, properties in existing_reverse.items(): if properties["to_jimage"] == to_jimage: edge_index = i self.graph.remove_edge(to_index, from_index, edge_index) else: raise ValueError("Edge cannot be broken between {} and {};\ no edge exists between those sites.".format( from_index, to_index ))
[docs] def remove_nodes(self, indices): """ A wrapper for Molecule.remove_sites(). :param indices: list of indices in the current Molecule (and graph) to be removed. :return: """ self.structure.remove_sites(indices) self.graph.remove_nodes_from(indices) mapping = {} for correct, current in enumerate(sorted(self.graph.nodes)): mapping[current] = correct nx.relabel_nodes(self.graph, mapping, copy=False) self.set_node_attributes()
[docs] def substitute_group(self, index, func_grp, strategy, bond_order=1, graph_dict=None, strategy_params=None): """ Builds off of Structure.substitute to replace an atom in self.structure with a functional group. This method also amends self.graph to incorporate the new functional group. NOTE: Care must be taken to ensure that the functional group that is substituted will not place atoms to close to each other, or violate the dimensions of the Lattice. :param index: Index of atom to substitute. :param func_grp: Substituent molecule. There are two options: 1. Providing an actual Molecule as the input. The first atom must be a DummySpecie X, indicating the position of nearest neighbor. The second atom must be the next nearest atom. For example, for a methyl group substitution, func_grp should be X-CH3, where X is the first site and C is the second site. What the code will do is to remove the index site, and connect the nearest neighbor to the C atom in CH3. The X-C bond indicates the directionality to connect the atoms. 2. A string name. The molecule will be obtained from the relevant template in func_groups.json. :param strategy: Class from pymatgen.analysis.local_env. :param bond_order: A specified bond order to calculate the bond length between the attached functional group and the nearest neighbor site. Defaults to 1. :param graph_dict: Dictionary representing the bonds of the functional group (format: {(u, v): props}, where props is a dictionary of properties, including weight. If None, then the algorithm will attempt to automatically determine bonds using one of a list of strategies defined in pymatgen.analysis.local_env. :param strategy_params: dictionary of keyword arguments for strategy. If None, default parameters will be used. :return: """ def map_indices(grp): grp_map = {} # Get indices now occupied by functional group # Subtracting 1 because the dummy atom X should not count atoms = len(grp) - 1 offset = len(self.structure) - atoms for i in range(atoms): grp_map[i] = i + offset return grp_map if isinstance(func_grp, Molecule): func_grp = copy.deepcopy(func_grp) else: try: func_grp = copy.deepcopy(FunctionalGroups[func_grp]) except Exception: raise RuntimeError("Can't find functional group in list. " "Provide explicit coordinate instead") self.structure.substitute(index, func_grp, bond_order=bond_order) mapping = map_indices(func_grp) # Remove dummy atom "X" func_grp.remove_species("X") if graph_dict is not None: for (u, v) in graph_dict.keys(): edge_props = graph_dict[(u, v)] if "to_jimage" in edge_props.keys(): to_jimage = edge_props["to_jimage"] del edge_props["to_jimage"] else: # By default, assume that all edges should stay remain # inside the initial image to_jimage = (0, 0, 0) if "weight" in edge_props.keys(): weight = edge_props["weight"] del edge_props["weight"] self.add_edge(mapping[u], mapping[v], to_jimage=to_jimage, weight=weight, edge_properties=edge_props) else: if strategy_params is None: strategy_params = {} strat = strategy(**strategy_params) for site in mapping.values(): neighbors = strat.get_nn_info(self.structure, site) for neighbor in neighbors: self.add_edge(from_index=site, from_jimage=(0, 0, 0), to_index=neighbor['site_index'], to_jimage=neighbor['image'], weight=neighbor['weight'], warn_duplicates=False)
[docs] def get_connected_sites(self, n, jimage=(0, 0, 0)): """ Returns a named tuple of neighbors of site n: periodic_site, jimage, index, weight. Index is the index of the corresponding site in the original structure, weight can be None if not defined. :param n: index of Site in Structure :param jimage: lattice vector of site :return: list of ConnectedSite tuples, sorted by closest first """ connected_sites = set() connected_site_images = set() out_edges = [(u, v, d, 'out') for u, v, d in self.graph.out_edges(n, data=True)] in_edges = [(u, v, d, 'in') for u, v, d in self.graph.in_edges(n, data=True)] for u, v, d, dir in out_edges + in_edges: to_jimage = d['to_jimage'] if dir == 'in': u, v = v, u to_jimage = np.multiply(-1, to_jimage) to_jimage = tuple(map(int, np.add(to_jimage, jimage))) site_d = self.structure[v].as_dict() site_d['abc'] = np.add(site_d['abc'], to_jimage).tolist() site = PeriodicSite.from_dict(site_d) # from_site if jimage arg != (0, 0, 0) relative_jimage = np.subtract(to_jimage, jimage) dist = self.structure[u].distance(self.structure[v], jimage=relative_jimage) weight = d.get('weight', None) if (v, to_jimage) not in connected_site_images: connected_site = ConnectedSite(site=site, jimage=to_jimage, index=v, weight=weight, dist=dist) connected_sites.add(connected_site) connected_site_images.add((v, to_jimage)) # return list sorted by closest sites first connected_sites = list(connected_sites) connected_sites.sort(key=lambda x: x.dist) return connected_sites
[docs] def get_coordination_of_site(self, n): """ Returns the number of neighbors of site n. In graph terms, simply returns degree of node corresponding to site n. :param n: index of site :return (int): """ number_of_self_loops = sum([1 for n, v in self.graph.edges(n) if n == v]) return self.graph.degree(n) - number_of_self_loops
[docs] def draw_graph_to_file(self, filename="graph", diff=None, hide_unconnected_nodes=False, hide_image_edges=True, edge_colors=False, node_labels=False, weight_labels=False, image_labels=False, color_scheme="VESTA", keep_dot=False, algo="fdp"): """ Draws graph using GraphViz. The networkx graph object itself can also be drawn with networkx's in-built graph drawing methods, but note that this might give misleading results for multigraphs (edges are super-imposed on each other). If visualization is difficult to interpret, `hide_image_edges` can help, especially in larger graphs. :param filename: filename to output, will detect filetype from extension (any graphviz filetype supported, such as pdf or png) :param diff (StructureGraph): an additional graph to compare with, will color edges red that do not exist in diff and edges green that are in diff graph but not in the reference graph :param hide_unconnected_nodes: if True, hide unconnected nodes :param hide_image_edges: if True, do not draw edges that go through periodic boundaries :param edge_colors (bool): if True, use node colors to color edges :param node_labels (bool): if True, label nodes with species and site index :param weight_labels (bool): if True, label edges with weights :param image_labels (bool): if True, label edges with their periodic images (usually only used for debugging, edges to periodic images always appear as dashed lines) :param color_scheme (str): "VESTA" or "JMOL" :param keep_dot (bool): keep GraphViz .dot file for later visualization :param algo: any graphviz algo, "neato" (for simple graphs) or "fdp" (for more crowded graphs) usually give good outputs :return: """ if not which(algo): raise RuntimeError("StructureGraph graph drawing requires " "GraphViz binaries to be in the path.") # Developer note: NetworkX also has methods for drawing # graphs using matplotlib, these also work here. However, # a dedicated tool like GraphViz allows for much easier # control over graph appearance and also correctly displays # mutli-graphs (matplotlib can superimpose multiple edges). g = self.graph.copy() g.graph = {'nodesep': 10.0, 'dpi': 300, 'overlap': "false"} # add display options for nodes for n in g.nodes(): # get label by species name label = "{}({})".format(str(self.structure[n].specie), n) if node_labels else "" # use standard color scheme for nodes c = EL_COLORS[color_scheme].get(str(self.structure[n].specie.symbol), [0, 0, 0]) # get contrasting font color # magic numbers account for perceived luminescence # https://stackoverflow.com/questions/1855884/determine-font-color-based-on-background-color fontcolor = '#000000' if 1 - (c[0] * 0.299 + c[1] * 0.587 + c[2] * 0.114) / 255 < 0.5 else '#ffffff' # convert color to hex string color = "#{:02x}{:02x}{:02x}".format(c[0], c[1], c[2]) g.add_node(n, fillcolor=color, fontcolor=fontcolor, label=label, fontname="Helvetica-bold", style="filled", shape="circle") edges_to_delete = [] # add display options for edges for u, v, k, d in g.edges(keys=True, data=True): # retrieve from/to images, set as origin if not defined to_image = d['to_jimage'] # set edge style d['style'] = "solid" if to_image != (0, 0, 0): d['style'] = "dashed" if hide_image_edges: edges_to_delete.append((u, v, k)) # don't show edge directions d['arrowhead'] = "none" # only add labels for images that are not the origin if image_labels: d['headlabel'] = "" if to_image == (0, 0, 0) else "to {}".format((to_image)) d['arrowhead'] = "normal" if d['headlabel'] else "none" # optionally color edges using node colors color_u = g.nodes[u]['fillcolor'] color_v = g.nodes[v]['fillcolor'] d['color_uv'] = "{};0.5:{};0.5".format(color_u, color_v) if edge_colors else "#000000" # optionally add weights to graph if weight_labels: units = g.graph.get('edge_weight_units', "") if d.get('weight'): d['label'] = "{:.2f} {}".format(d['weight'], units) # update edge with our new style attributes g.edges[u, v, k].update(d) # optionally remove periodic image edges, # these can be confusing due to periodic boundaries if hide_image_edges: for edge_to_delete in edges_to_delete: g.remove_edge(*edge_to_delete) # optionally hide unconnected nodes, # these can appear when removing periodic edges if hide_unconnected_nodes: g = g.subgraph([n for n in g.degree() if g.degree()[n] != 0]) # optionally highlight differences with another graph if diff: diff = self.diff(diff, strict=True) green_edges = [] red_edges = [] for u, v, k, d in g.edges(keys=True, data=True): if (u, v, d['to_jimage']) in diff['self']: # edge has been deleted red_edges.append((u, v, k)) elif (u, v, d['to_jimage']) in diff['other']: # edge has been added green_edges.append((u, v, k)) for u, v, k in green_edges: g.edges[u, v, k].update({'color_uv': '#00ff00'}) for u, v, k in red_edges: g.edges[u, v, k].update({'color_uv': '#ff0000'}) basename, extension = os.path.splitext(filename) extension = extension[1:] write_dot(g, basename + ".dot") with open(filename, "w") as f: args = [algo, "-T", extension, basename + ".dot"] rs = subprocess.Popen(args, stdout=f, stdin=subprocess.PIPE, close_fds=True) rs.communicate() if rs.returncode != 0: raise RuntimeError("{} exited with return code {}.".format(algo, rs.returncode)) if not keep_dot: os.remove(basename + ".dot")
@property def types_and_weights_of_connections(self): """ Extract a dictionary summarizing the types and weights of edges in the graph. :return: A dictionary with keys specifying the species involved in a connection in alphabetical order (e.g. string 'Fe-O') and values which are a list of weights for those connections (e.g. bond lengths). """ def get_label(u, v): u_label = self.structure[u].species_string v_label = self.structure[v].species_string return "-".join(sorted((u_label, v_label))) types = defaultdict(list) for u, v, d in self.graph.edges(data=True): label = get_label(u, v) types[label].append(d['weight']) return dict(types) @property def weight_statistics(self): """ Extract a statistical summary of edge weights present in the graph. :return: A dict with an 'all_weights' list, 'minimum', 'maximum', 'median', 'mean', 'std_dev' """ all_weights = [d.get('weight', None) for u, v, d in self.graph.edges(data=True)] stats = describe(all_weights, nan_policy='omit') return { 'all_weights': all_weights, 'min': stats.minmax[0], 'max': stats.minmax[1], 'mean': stats.mean, 'variance': stats.variance }
[docs] def types_of_coordination_environments(self, anonymous=False): """ Extract information on the different co-ordination environments present in the graph. :param anonymous: if anonymous, will replace specie names with A, B, C, etc. :return: a list of co-ordination environments, e.g. ['Mo-S(6)', 'S-Mo(3)'] """ motifs = set() for idx, site in enumerate(self.structure): centre_sp = site.species_string connected_sites = self.get_connected_sites(idx) connected_species = [connected_site.site.species_string for connected_site in connected_sites] labels = [] for sp in set(connected_species): count = connected_species.count(sp) labels.append((count, sp)) labels = sorted(labels, reverse=True) if anonymous: mapping = {centre_sp: 'A'} available_letters = [chr(66 + i) for i in range(25)] for label in labels: sp = label[1] if sp not in mapping: mapping[sp] = available_letters.pop(0) centre_sp = 'A' labels = [(label[0], mapping[label[1]]) for label in labels] labels = ["{}({})".format(label[1], label[0]) for label in labels] motif = '{}-{}'.format(centre_sp, ','.join(labels)) motifs.add(motif) return sorted(list(motifs))
[docs] def as_dict(self): """ As in :Class: `pymatgen.core.Structure` except with using `to_dict_of_dicts` from NetworkX to store graph information. """ d = {"@module": self.__class__.__module__, "@class": self.__class__.__name__, "structure": self.structure.as_dict(), "graphs": json_graph.adjacency_data(self.graph)} return d
[docs] @classmethod def from_dict(cls, d): """ As in :Class: `pymatgen.core.Structure` except restoring graphs using `from_dict_of_dicts` from NetworkX to restore graph information. """ s = Structure.from_dict(d['structure']) return cls(s, d['graphs'])
def __mul__(self, scaling_matrix): """ Replicates the graph, creating a supercell, intelligently joining together edges that lie on periodic boundaries. In principle, any operations on the expanded graph could also be done on the original graph, but a larger graph can be easier to visualize and reason about. :param scaling_matrix: same as Structure.__mul__ :return: """ # Developer note: a different approach was also trialed, using # a simple Graph (instead of MultiDiGraph), with node indices # representing both site index and periodic image. Here, the # number of nodes != number of sites in the Structure. This # approach has many benefits, but made it more difficult to # keep the graph in sync with its corresponding Structure. # Broadly, it would be easier to multiply the Structure # *before* generating the StructureGraph, but this isn't # possible when generating the graph using critic2 from # charge density. # Multiplication works by looking for the expected position # of an image node, and seeing if that node exists in the # supercell. If it does, the edge is updated. This is more # computationally expensive than just keeping track of the # which new lattice images present, but should hopefully be # easier to extend to a general 3x3 scaling matrix. # code adapted from Structure.__mul__ scale_matrix = np.array(scaling_matrix, np.int16) if scale_matrix.shape != (3, 3): scale_matrix = np.array(scale_matrix * np.eye(3), np.int16) else: # TODO: test __mul__ with full 3x3 scaling matrices raise NotImplementedError('Not tested with 3x3 scaling matrices yet.') new_lattice = Lattice(np.dot(scale_matrix, self.structure.lattice.matrix)) f_lat = lattice_points_in_supercell(scale_matrix) c_lat = new_lattice.get_cartesian_coords(f_lat) new_sites = [] new_graphs = [] for v in c_lat: # create a map of nodes from original graph to its image mapping = {n: n + len(new_sites) for n in range(len(self.structure))} for idx, site in enumerate(self.structure): s = PeriodicSite(site.species, site.coords + v, new_lattice, properties=site.properties, coords_are_cartesian=True, to_unit_cell=False) new_sites.append(s) new_graphs.append(nx.relabel_nodes(self.graph, mapping, copy=True)) new_structure = Structure.from_sites(new_sites) # merge all graphs into one big graph new_g = nx.MultiDiGraph() for new_graph in new_graphs: new_g = nx.union(new_g, new_graph) edges_to_remove = [] # tuple of (u, v, k) edges_to_add = [] # tuple of (u, v, attr_dict) # list of new edges inside supercell # for duplicate checking edges_inside_supercell = [{u, v} for u, v, d in new_g.edges(data=True) if d['to_jimage'] == (0, 0, 0)] new_periodic_images = [] orig_lattice = self.structure.lattice # use k-d tree to match given position to an # existing Site in Structure kd_tree = KDTree(new_structure.cart_coords) # tolerance in Å for sites to be considered equal # this could probably be a lot smaller tol = 0.05 for u, v, k, d in new_g.edges(keys=True, data=True): to_jimage = d['to_jimage'] # for node v # reduce unnecessary checking if to_jimage != (0, 0, 0): # get index in original site n_u = u % len(self.structure) n_v = v % len(self.structure) # get fractional co-ordinates of where atoms defined # by edge are expected to be, relative to original # lattice (keeping original lattice has # significant benefits) v_image_frac = np.add(self.structure[n_v].frac_coords, to_jimage) u_frac = self.structure[n_u].frac_coords # using the position of node u as a reference, # get relative Cartesian co-ordinates of where # atoms defined by edge are expected to be v_image_cart = orig_lattice.get_cartesian_coords(v_image_frac) u_cart = orig_lattice.get_cartesian_coords(u_frac) v_rel = np.subtract(v_image_cart, u_cart) # now retrieve position of node v in # new supercell, and get asgolute Cartesian # co-ordinates of where atoms defined by edge # are expected to be v_expec = new_structure[u].coords + v_rel # now search in new structure for these atoms # query returns (distance, index) v_present = kd_tree.query(v_expec) v_present = v_present[1] if v_present[0] <= tol else None # check if image sites now present in supercell # and if so, delete old edge that went through # periodic boundary if v_present is not None: new_u = u new_v = v_present new_d = d.copy() # node now inside supercell new_d['to_jimage'] = (0, 0, 0) edges_to_remove.append((u, v, k)) # make sure we don't try to add duplicate edges # will remove two edges for everyone one we add if {new_u, new_v} not in edges_inside_supercell: # normalize direction if new_v < new_u: new_u, new_v = new_v, new_u edges_inside_supercell.append({new_u, new_v}) edges_to_add.append((new_u, new_v, new_d)) else: # want to find new_v such that we have # full periodic boundary conditions # so that nodes on one side of supercell # are connected to nodes on opposite side v_expec_frac = new_structure.lattice.get_fractional_coords(v_expec) # find new to_jimage # use np.around to fix issues with finite precision leading to incorrect image v_expec_image = np.around(v_expec_frac, decimals=3) v_expec_image = v_expec_image - v_expec_image % 1 v_expec_frac = np.subtract(v_expec_frac, v_expec_image) v_expec = new_structure.lattice.get_cartesian_coords(v_expec_frac) v_present = kd_tree.query(v_expec) v_present = v_present[1] if v_present[0] <= tol else None if v_present is not None: new_u = u new_v = v_present new_d = d.copy() new_to_jimage = tuple(map(int, v_expec_image)) # normalize direction if new_v < new_u: new_u, new_v = new_v, new_u new_to_jimage = tuple(np.multiply(-1, d['to_jimage']).astype(int)) new_d['to_jimage'] = new_to_jimage edges_to_remove.append((u, v, k)) if (new_u, new_v, new_to_jimage) not in new_periodic_images: edges_to_add.append((new_u, new_v, new_d)) new_periodic_images.append((new_u, new_v, new_to_jimage)) logger.debug("Removing {} edges, adding {} new edges.".format(len(edges_to_remove), len(edges_to_add))) # add/delete marked edges for edges_to_remove in edges_to_remove: new_g.remove_edge(*edges_to_remove) for (u, v, d) in edges_to_add: new_g.add_edge(u, v, **d) # return new instance of StructureGraph with supercell d = {"@module": self.__class__.__module__, "@class": self.__class__.__name__, "structure": new_structure.as_dict(), "graphs": json_graph.adjacency_data(new_g)} sg = StructureGraph.from_dict(d) return sg def __rmul__(self, other): return self.__mul__(other) def _edges_to_string(self, g): header = "from to to_image " header_line = "---- ---- ------------" edge_weight_name = g.graph["edge_weight_name"] if edge_weight_name: print_weights = ["weight"] edge_label = g.graph["edge_weight_name"] edge_weight_units = g.graph["edge_weight_units"] if edge_weight_units: edge_label += " ({})".format(edge_weight_units) header += " {}".format(edge_label) header_line += " {}".format("-" * max([18, len(edge_label)])) else: print_weights = False s = header + "\n" + header_line + "\n" edges = list(g.edges(data=True)) # sort edges for consistent ordering edges.sort(key=itemgetter(0, 1)) if print_weights: for u, v, data in edges: s += "{:4} {:4} {:12} {:.3e}\n".format(u, v, str(data.get("to_jimage", (0, 0, 0))), data.get("weight", 0)) else: for u, v, data in edges: s += "{:4} {:4} {:12}\n".format(u, v, str(data.get("to_jimage", (0, 0, 0)))) return s def __str__(self): s = "Structure Graph" s += "\nStructure: \n{}".format(self.structure.__str__()) s += "\nGraph: {}\n".format(self.name) s += self._edges_to_string(self.graph) return s def __repr__(self): s = "Structure Graph" s += "\nStructure: \n{}".format(self.structure.__repr__()) s += "\nGraph: {}\n".format(self.name) s += self._edges_to_string(self.graph) return s def __len__(self): """ :return: length of Structure / number of nodes in graph """ return len(self.structure)
[docs] def sort(self, key=None, reverse=False): """ Same as Structure.sort(), also remaps nodes in graph. :param key: :param reverse: :return: """ old_structure = self.structure.copy() # sort Structure self.structure._sites = sorted(self.structure._sites, key=key, reverse=reverse) # apply Structure ordering to graph mapping = {idx: self.structure.index(site) for idx, site in enumerate(old_structure)} self.graph = nx.relabel_nodes(self.graph, mapping, copy=True) # normalize directions of edges edges_to_remove = [] edges_to_add = [] for u, v, k, d in self.graph.edges(keys=True, data=True): if v < u: new_v, new_u, new_d = u, v, d.copy() new_d['to_jimage'] = tuple(np.multiply(-1, d['to_jimage']).astype(int)) edges_to_remove.append((u, v, k)) edges_to_add.append((new_u, new_v, new_d)) # add/delete marked edges for edges_to_remove in edges_to_remove: self.graph.remove_edge(*edges_to_remove) for (u, v, d) in edges_to_add: self.graph.add_edge(u, v, **d)
def __copy__(self): return StructureGraph.from_dict(self.as_dict()) def __eq__(self, other): """ Two StructureGraphs are equal if they have equal Structures, and have the same edges between Sites. Edge weights can be different and StructureGraphs can still be considered equal. :param other: StructureGraph :return (bool): """ # sort for consistent node indices # PeriodicSite should have a proper __hash__() value, # using its frac_coords as a convenient key mapping = {tuple(site.frac_coords): self.structure.index(site) for site in other.structure} other_sorted = other.__copy__() other_sorted.sort(key=lambda site: mapping[tuple(site.frac_coords)]) edges = {(u, v, d['to_jimage']) for u, v, d in self.graph.edges(keys=False, data=True)} edges_other = {(u, v, d['to_jimage']) for u, v, d in other_sorted.graph.edges(keys=False, data=True)} return (edges == edges_other) and \ (self.structure == other_sorted.structure)
[docs] def diff(self, other, strict=True): """ Compares two StructureGraphs. Returns dict with keys 'self', 'other', 'both' with edges that are present in only one StructureGraph ('self' and 'other'), and edges that are present in both. The Jaccard distance is a simple measure of the dissimilarity between two StructureGraphs (ignoring edge weights), and is defined by 1 - (size of the intersection / size of the union) of the sets of edges. This is returned with key 'dist'. Important note: all node indices are in terms of the StructureGraph this method is called from, not the 'other' StructureGraph: there is no guarantee the node indices will be the same if the underlying Structures are ordered differently. :param other: StructureGraph :param strict: if False, will compare bonds from different Structures, with node indices replaced by Specie strings, will not count number of occurrences of bonds :return: """ if self.structure != other.structure and strict: return ValueError("Meaningless to compare StructureGraphs if " "corresponding Structures are different.") if strict: # sort for consistent node indices # PeriodicSite should have a proper __hash__() value, # using its frac_coords as a convenient key mapping = {tuple(site.frac_coords): self.structure.index(site) for site in other.structure} other_sorted = other.__copy__() other_sorted.sort(key=lambda site: mapping[tuple(site.frac_coords)]) edges = {(u, v, d['to_jimage']) for u, v, d in self.graph.edges(keys=False, data=True)} edges_other = {(u, v, d['to_jimage']) for u, v, d in other_sorted.graph.edges(keys=False, data=True)} else: edges = {(str(self.structure[u].specie), str(self.structure[v].specie)) for u, v, d in self.graph.edges(keys=False, data=True)} edges_other = {(str(other.structure[u].specie), str(other.structure[v].specie)) for u, v, d in other.graph.edges(keys=False, data=True)} if len(edges) == 0 and len(edges_other) == 0: jaccard_dist = 0 # by definition else: jaccard_dist = 1 - len(edges.intersection(edges_other)) / len(edges.union(edges_other)) return { 'self': edges - edges_other, 'other': edges_other - edges, 'both': edges.intersection(edges_other), 'dist': jaccard_dist }
[docs] def get_subgraphs_as_molecules(self, use_weights=False): """ Retrieve subgraphs as molecules, useful for extracting molecules from periodic crystals. Will only return unique molecules, not any duplicates present in the crystal (a duplicate defined as an isomorphic subgraph). :param use_weights (bool): If True, only treat subgraphs as isomorphic if edges have the same weights. Typically, this means molecules will need to have the same bond lengths to be defined as duplicates, otherwise bond lengths can differ. This is a fairly robust approach, but will treat e.g. enantiomers as being duplicates. :return: list of unique Molecules in Structure """ # creating a supercell is an easy way to extract # molecules (and not, e.g., layers of a 2D crystal) # without adding extra logic if getattr(self, '_supercell_sg', None) is None: self._supercell_sg = supercell_sg = self * (3, 3, 3) # make undirected to find connected subgraphs supercell_sg.graph = nx.Graph(supercell_sg.graph) # find subgraphs all_subgraphs = [supercell_sg.graph.subgraph(c) for c in nx.connected_components(supercell_sg.graph)] # discount subgraphs that lie across *supercell* boundaries # these will subgraphs representing crystals molecule_subgraphs = [] for subgraph in all_subgraphs: intersects_boundary = any([d['to_jimage'] != (0, 0, 0) for u, v, d in subgraph.edges(data=True)]) if not intersects_boundary: molecule_subgraphs.append(nx.MultiDiGraph(subgraph)) # add specie names to graph to be able to test for isomorphism for subgraph in molecule_subgraphs: for n in subgraph: subgraph.add_node(n, specie=str(supercell_sg.structure[n].specie)) # now define how we test for isomorphism def node_match(n1, n2): return n1['specie'] == n2['specie'] def edge_match(e1, e2): if use_weights: return e1['weight'] == e2['weight'] else: return True # prune duplicate subgraphs unique_subgraphs = [] for subgraph in molecule_subgraphs: already_present = [nx.is_isomorphic(subgraph, g, node_match=node_match, edge_match=edge_match) for g in unique_subgraphs] if not any(already_present): unique_subgraphs.append(subgraph) # get Molecule objects for each subgraph molecules = [] for subgraph in unique_subgraphs: coords = [supercell_sg.structure[n].coords for n in subgraph.nodes()] species = [supercell_sg.structure[n].specie for n in subgraph.nodes()] molecule = Molecule(species, coords) # shift so origin is at center of mass molecule = molecule.get_centered_molecule() molecules.append(molecule) return molecules
[docs]class MolGraphSplitError(Exception): """ Raised when a molecule graph is failed to split into two disconnected subgraphs """ pass
[docs]class MoleculeGraph(MSONable): """ This is a class for annotating a Molecule with bond information, stored in the form of a graph. A "bond" does not necessarily have to be a chemical bond, but can store any kind of information that connects two Sites. """ def __init__(self, molecule, graph_data=None): """ If constructing this class manually, use the `with_empty_graph` method or `with_local_env_strategy` method (using an algorithm provided by the `local_env` module, such as O'Keeffe). This class that contains connection information: relationships between sites represented by a Graph structure, and an associated structure object. This class uses the NetworkX package to store and operate on the graph itself, but contains a lot of helper methods to make associating a graph with a given molecule easier. Use cases for this include storing bonding information, NMR J-couplings, Heisenberg exchange parameters, etc. :param molecule: Molecule object :param graph_data: dict containing graph information in dict format (not intended to be constructed manually, see as_dict method for format) """ if isinstance(molecule, MoleculeGraph): # just make a copy from input graph_data = molecule.as_dict()['graphs'] self.molecule = molecule self.graph = nx.readwrite.json_graph.adjacency_graph(graph_data) # tidy up edge attr dicts, reading to/from json duplicates # information for u, v, k, d in self.graph.edges(keys=True, data=True): if 'id' in d: del d['id'] if 'key' in d: del d['key'] # ensure images are tuples (conversion to lists happens # when serializing back from json), it's important images # are hashable/immutable if 'to_jimage' in d: d['to_jimage'] = tuple(d['to_jimage']) if 'from_jimage' in d: d['from_jimage'] = tuple(d['from_jimage']) self.set_node_attributes()
[docs] @classmethod def with_empty_graph(cls, molecule, name="bonds", edge_weight_name=None, edge_weight_units=None): """ Constructor for MoleculeGraph, returns a MoleculeGraph object with an empty graph (no edges, only nodes defined that correspond to Sites in Molecule). :param molecule (Molecule): :param name (str): name of graph, e.g. "bonds" :param edge_weight_name (str): name of edge weights, e.g. "bond_length" or "exchange_constant" :param edge_weight_units (str): name of edge weight units e.g. "Å" or "eV" :return (MoleculeGraph): """ if edge_weight_name and (edge_weight_units is None): raise ValueError("Please specify units associated " "with your edge weights. Can be " "empty string if arbitrary or " "dimensionless.") # construct graph with one node per site # graph attributes don't change behavior of graph, # they're just for book-keeping graph = nx.MultiDiGraph(edge_weight_name=edge_weight_name, edge_weight_units=edge_weight_units, name=name) graph.add_nodes_from(range(len(molecule))) graph_data = json_graph.adjacency_data(graph) return cls(molecule, graph_data=graph_data)
[docs] @staticmethod def with_edges(molecule, edges): """ Constructor for MoleculeGraph, using pre-existing or pre-defined edges with optional edge parameters. :param molecule: Molecule object :param edges: dict representing the bonds of the functional group (format: {(u, v): props}, where props is a dictionary of properties, including weight. Props should be None if no additional properties are to be specified. :return: mg, a MoleculeGraph """ mg = MoleculeGraph.with_empty_graph(molecule, name="bonds", edge_weight_name="weight", edge_weight_units="") for edge, props in edges.items(): try: from_index = edge[0] to_index = edge[1] except TypeError: raise ValueError("Edges must be given as (from_index, to_index)" "tuples") if props is not None: if "weight" in props.keys(): weight = props["weight"] del props["weight"] else: weight = None if len(props.items()) == 0: props = None else: weight = None nodes = mg.graph.nodes if not (from_index in nodes and to_index in nodes): raise ValueError("Edges cannot be added if nodes are not" " present in the graph. Please check your" " indices.") mg.add_edge(from_index, to_index, weight=weight, edge_properties=props) mg.set_node_attributes() return mg
[docs] @staticmethod def with_local_env_strategy(molecule, strategy): """ Constructor for MoleculeGraph, using a strategy from :Class: `pymatgen.analysis.local_env`. :param molecule: Molecule object :param strategy: an instance of a :Class: `pymatgen.analysis.local_env.NearNeighbors` object :return: mg, a MoleculeGraph """ if not strategy.molecules_allowed: raise ValueError("Chosen strategy is not designed for use with molecules! " "Please choose another strategy.") if strategy.extend_structure_molecules: extend_structure = True else: extend_structure = False mg = MoleculeGraph.with_empty_graph(molecule, name="bonds", edge_weight_name="weight", edge_weight_units="") # NearNeighbor classes only (generally) work with structures # molecules have to be boxed first coords = molecule.cart_coords if extend_structure: a = max(coords[:, 0]) - min(coords[:, 0]) + 100 b = max(coords[:, 1]) - min(coords[:, 1]) + 100 c = max(coords[:, 2]) - min(coords[:, 2]) + 100 structure = molecule.get_boxed_structure(a, b, c, no_cross=True, reorder=False) else: structure = None for n in range(len(molecule)): if structure is None: neighbors = strategy.get_nn_info(molecule, n) else: neighbors = strategy.get_nn_info(structure, n) for neighbor in neighbors: # all bonds in molecules should not cross # (artificial) periodic boundaries if not np.array_equal(neighbor['image'], [0, 0, 0]): continue if n > neighbor['site_index']: from_index = neighbor['site_index'] to_index = n else: from_index = n to_index = neighbor['site_index'] mg.add_edge(from_index=from_index, to_index=to_index, weight=neighbor['weight'], warn_duplicates=False) duplicates = [] for edge in mg.graph.edges: if edge[2] != 0: duplicates.append(edge) for duplicate in duplicates: mg.graph.remove_edge(duplicate[0], duplicate[1], key=duplicate[2]) mg.set_node_attributes() return mg
@property def name(self): """ :return: Name of graph """ return self.graph.graph['name'] @property def edge_weight_name(self): """ :return: Name of the edge weight property of graph """ return self.graph.graph['edge_weight_name'] @property def edge_weight_unit(self): """ :return: Units of the edge weight property of graph """ return self.graph.graph['edge_weight_units']
[docs] def add_edge(self, from_index, to_index, weight=None, warn_duplicates=True, edge_properties=None): """ Add edge to graph. Since physically a 'bond' (or other connection between sites) doesn't have a direction, from_index, from_jimage can be swapped with to_index, to_jimage. However, images will always always be shifted so that from_index < to_index and from_jimage becomes (0, 0, 0). :param from_index: index of site connecting from :param to_index: index of site connecting to :param weight (float): e.g. bond length :param warn_duplicates (bool): if True, will warn if trying to add duplicate edges (duplicate edges will not be added in either case) :param edge_properties (dict): any other information to store on graph edges, similar to Structure's site_properties :return: """ # this is not necessary for the class to work, but # just makes it neater if to_index < from_index: to_index, from_index = from_index, to_index # sanitize types from_index, to_index = int(from_index), int(to_index) # check we're not trying to add a duplicate edge # there should only ever be at most one edge # between two sites existing_edge_data = self.graph.get_edge_data(from_index, to_index) if existing_edge_data and warn_duplicates: warnings.warn("Trying to add an edge that already exists from " "site {} to site {}.".format(from_index, to_index)) return # generic container for additional edge properties, # similar to site properties edge_properties = edge_properties or {} if weight: self.graph.add_edge(from_index, to_index, weight=weight, **edge_properties) else: self.graph.add_edge(from_index, to_index, **edge_properties)
[docs] def insert_node(self, i, species, coords, validate_proximity=False, site_properties=None, edges=None): """ A wrapper around Molecule.insert(), which also incorporates the new site into the MoleculeGraph. :param i: Index at which to insert the new site :param species: Species for the new site :param coords: 3x1 array representing coordinates of the new site :param validate_proximity: For Molecule.insert(); if True (default False), distance will be checked to ensure that site can be safely added. :param site_properties: Site properties for Molecule :param edges: List of dicts representing edges to be added to the MoleculeGraph. These edges must include the index of the new site i, and all indices used for these edges should reflect the MoleculeGraph AFTER the insertion, NOT before. Each dict should at least have a "to_index" and "from_index" key, and can also have a "weight" and a "properties" key. :return: """ self.molecule.insert(i, species, coords, validate_proximity=validate_proximity, properties=site_properties) mapping = {} for j in range(len(self.molecule) - 1): if j < i: mapping[j] = j else: mapping[j] = j + 1 nx.relabel_nodes(self.graph, mapping, copy=False) self.graph.add_node(i) self.set_node_attributes() if edges is not None: for edge in edges: try: self.add_edge(edge["from_index"], edge["to_index"], weight=edge.get("weight", None), edge_properties=edge.get("properties", None)) except KeyError: raise RuntimeError("Some edges are invalid.")
[docs] def set_node_attributes(self): """ Replicates molecule site properties (specie, coords, etc.) in the MoleculeGraph. :return: """ species = {} coords = {} properties = {} for node in self.graph.nodes(): species[node] = self.molecule[node].specie.symbol coords[node] = self.molecule[node].coords properties[node] = self.molecule[node].properties nx.set_node_attributes(self.graph, species, "specie") nx.set_node_attributes(self.graph, coords, "coords") nx.set_node_attributes(self.graph, properties, "properties")
[docs] def alter_edge(self, from_index, to_index, new_weight=None, new_edge_properties=None): """ Alters either the weight or the edge_properties of an edge in the MoleculeGraph. :param from_index: int :param to_index: int :param new_weight: alter_edge does not require that weight be altered. As such, by default, this is None. If weight is to be changed, it should be a float. :param new_edge_properties: alter_edge does not require that edge_properties be altered. As such, by default, this is None. If any edge properties are to be changed, it should be a dictionary of edge properties to be changed. :return: """ existing_edge = self.graph.get_edge_data(from_index, to_index) # ensure that edge exists before attempting to change it if not existing_edge: raise ValueError("Edge between {} and {} cannot be altered;\ no edge exists between those sites.".format( from_index, to_index )) # Third index should always be 0 because there should only be one edge between any two nodes if new_weight is not None: self.graph[from_index][to_index][0]['weight'] = new_weight if new_edge_properties is not None: for prop in list(new_edge_properties.keys()): self.graph[from_index][to_index][0][prop] = new_edge_properties[prop]
[docs] def break_edge(self, from_index, to_index, allow_reverse=False): """ Remove an edge from the MoleculeGraph :param from_index: int :param to_index: int :param allow_reverse: If allow_reverse is True, then break_edge will attempt to break both (from_index, to_index) and, failing that, will attempt to break (to_index, from_index). :return: """ # ensure that edge exists before attempting to remove it existing_edge = self.graph.get_edge_data(from_index, to_index) existing_reverse = None if existing_edge: self.graph.remove_edge(from_index, to_index) else: if allow_reverse: existing_reverse = self.graph.get_edge_data(to_index, from_index) if existing_reverse: self.graph.remove_edge(to_index, from_index) else: raise ValueError("Edge cannot be broken between {} and {};\ no edge exists between those sites.".format( from_index, to_index ))
[docs] def remove_nodes(self, indices): """ A wrapper for Molecule.remove_sites(). :param indices: list of indices in the current Molecule (and graph) to be removed. :return: """ self.molecule.remove_sites(indices) self.graph.remove_nodes_from(indices) mapping = {} for correct, current in enumerate(sorted(self.graph.nodes)): mapping[current] = correct nx.relabel_nodes(self.graph, mapping, copy=False) self.set_node_attributes()
[docs] def split_molecule_subgraphs(self, bonds, allow_reverse=False, alterations=None): """ Split MoleculeGraph into two or more MoleculeGraphs by breaking a set of bonds. This function uses MoleculeGraph.break_edge repeatedly to create disjoint graphs (two or more separate molecules). This function does not only alter the graph information, but also changes the underlying Moledules. If the bonds parameter does not include sufficient bonds to separate two molecule fragments, then this function will fail. Currently, this function naively assigns the charge of the total molecule to a single submolecule. A later effort will be to actually accurately assign charge. NOTE: This function does not modify the original MoleculeGraph. It creates a copy, modifies that, and returns two or more new MoleculeGraph objects. :param bonds: list of tuples (from_index, to_index) representing bonds to be broken to split the MoleculeGraph. :param alterations: a dict {(from_index, to_index): alt}, where alt is a dictionary including weight and/or edge properties to be changed following the split. :param allow_reverse: If allow_reverse is True, then break_edge will attempt to break both (from_index, to_index) and, failing that, will attempt to break (to_index, from_index). :return: list of MoleculeGraphs """ self.set_node_attributes() original = copy.deepcopy(self) for bond in bonds: original.break_edge(bond[0], bond[1], allow_reverse=allow_reverse) if nx.is_weakly_connected(original.graph): raise MolGraphSplitError("Cannot split molecule; \ MoleculeGraph is still connected.") else: # alter any bonds before partition, to avoid remapping if alterations is not None: for (u, v) in alterations.keys(): if "weight" in alterations[(u, v)]: weight = alterations[(u, v)]["weight"] del alterations[(u, v)]["weight"] edge_properties = alterations[(u, v)] \ if len(alterations[(u, v)]) != 0 else None original.alter_edge(u, v, new_weight=weight, new_edge_properties=edge_properties) else: original.alter_edge(u, v, new_edge_properties=alterations[(u, v)]) sub_mols = [] # Had to use nx.weakly_connected_components because of deprecation # of nx.weakly_connected_component_subgraphs subgraphs = [original.graph.subgraph(c) for c in nx.weakly_connected_components(original.graph)] for subg in subgraphs: nodes = sorted(list(subg.nodes)) # Molecule indices are essentially list-based, so node indices # must be remapped, incrementing from 0 mapping = {} for i in range(len(nodes)): mapping[nodes[i]] = i # just give charge to whatever subgraph has node with index 0 # TODO: actually figure out how to distribute charge if 0 in nodes: charge = self.molecule.charge else: charge = 0 # relabel nodes in graph to match mapping new_graph = nx.relabel_nodes(subg, mapping) species = nx.get_node_attributes(new_graph, "specie") coords = nx.get_node_attributes(new_graph, "coords") raw_props = nx.get_node_attributes(new_graph, "properties") properties = {} for prop_set in raw_props.values(): for prop in prop_set.keys(): if prop in properties: properties[prop].append(prop_set[prop]) else: properties[prop] = [prop_set[prop]] # Site properties must be present for all atoms in the molecule # in order to be used for Molecule instantiation for k, v in properties.items(): if len(v) != len(species): del properties[k] new_mol = Molecule(species, coords, charge=charge, site_properties=properties) graph_data = json_graph.adjacency_data(new_graph) # create new MoleculeGraph sub_mols.append(MoleculeGraph(new_mol, graph_data=graph_data)) return sub_mols
[docs] def build_unique_fragments(self): """ Find all possible fragment combinations of the MoleculeGraphs (in other words, all connected induced subgraphs) :return: """ self.set_node_attributes() graph = self.graph.to_undirected() # find all possible fragments, aka connected induced subgraphs frag_dict = {} for ii in range(1, len(self.molecule)): for combination in combinations(graph.nodes, ii): mycomp = [] for idx in combination: mycomp.append(str(self.molecule[idx].specie)) mycomp = "".join(sorted(mycomp)) subgraph = nx.subgraph(graph, combination) if nx.is_connected(subgraph): mykey = mycomp + str(len(subgraph.edges())) if mykey not in frag_dict: frag_dict[mykey] = [copy.deepcopy(subgraph)] else: frag_dict[mykey].append(copy.deepcopy(subgraph)) # narrow to all unique fragments using graph isomorphism unique_frag_dict = {} for key in frag_dict: unique_frags = [] for frag in frag_dict[key]: found = False for f in unique_frags: if _isomorphic(frag, f): found = True break if not found: unique_frags.append(frag) unique_frag_dict[key] = copy.deepcopy(unique_frags) # convert back to molecule graphs unique_mol_graph_dict = {} for key in unique_frag_dict: unique_mol_graph_list = [] for fragment in unique_frag_dict[key]: mapping = {e: i for i, e in enumerate(sorted(fragment.nodes))} remapped = nx.relabel_nodes(fragment, mapping) species = nx.get_node_attributes(remapped, "specie") coords = nx.get_node_attributes(remapped, "coords") edges = {} for from_index, to_index, key in remapped.edges: edge_props = fragment.get_edge_data(from_index, to_index, key=key) edges[(from_index, to_index)] = edge_props unique_mol_graph_list.append(self.with_edges(Molecule(species=species, coords=coords, charge=self.molecule.charge), edges)) frag_key = str(unique_mol_graph_list[0].molecule.composition.alphabetical_formula) + " E" + str( len(unique_mol_graph_list[0].graph.edges())) unique_mol_graph_dict[frag_key] = copy.deepcopy(unique_mol_graph_list) return unique_mol_graph_dict
[docs] def substitute_group(self, index, func_grp, strategy, bond_order=1, graph_dict=None, strategy_params=None): """ Builds off of Molecule.substitute to replace an atom in self.molecule with a functional group. This method also amends self.graph to incorporate the new functional group. NOTE: using a MoleculeGraph will generally produce a different graph compared with using a Molecule or str (when not using graph_dict). :param index: Index of atom to substitute. :param func_grp: Substituent molecule. There are three options: 1. Providing an actual molecule as the input. The first atom must be a DummySpecie X, indicating the position of nearest neighbor. The second atom must be the next nearest atom. For example, for a methyl group substitution, func_grp should be X-CH3, where X is the first site and C is the second site. What the code will do is to remove the index site, and connect the nearest neighbor to the C atom in CH3. The X-C bond indicates the directionality to connect the atoms. 2. A string name. The molecule will be obtained from the relevant template in func_groups.json. 3. A MoleculeGraph object. :param strategy: Class from pymatgen.analysis.local_env. :param bond_order: A specified bond order to calculate the bond length between the attached functional group and the nearest neighbor site. Defaults to 1. :param graph_dict: Dictionary representing the bonds of the functional group (format: {(u, v): props}, where props is a dictionary of properties, including weight. If None, then the algorithm will attempt to automatically determine bonds using one of a list of strategies defined in pymatgen.analysis.local_env. :param strategy_params: dictionary of keyword arguments for strategy. If None, default parameters will be used. :return: """ def map_indices(grp): grp_map = {} # Get indices now occupied by functional group # Subtracting 1 because the dummy atom X should not count atoms = len(grp) - 1 offset = len(self.molecule) - atoms for i in range(atoms): grp_map[i] = i + offset return grp_map # Work is simplified if a graph is already in place if isinstance(func_grp, MoleculeGraph): self.molecule.substitute(index, func_grp.molecule, bond_order=bond_order) mapping = map_indices(func_grp.molecule) for (u, v) in list(func_grp.graph.edges()): edge_props = func_grp.graph.get_edge_data(u, v)[0] weight = None if "weight" in edge_props.keys(): weight = edge_props["weight"] del edge_props["weight"] self.add_edge(mapping[u], mapping[v], weight=weight, edge_properties=edge_props) else: if isinstance(func_grp, Molecule): func_grp = copy.deepcopy(func_grp) else: try: func_grp = copy.deepcopy(FunctionalGroups[func_grp]) except Exception: raise RuntimeError("Can't find functional group in list. " "Provide explicit coordinate instead") self.molecule.substitute(index, func_grp, bond_order=bond_order) mapping = map_indices(func_grp) # Remove dummy atom "X" func_grp.remove_species("X") if graph_dict is not None: for (u, v) in graph_dict.keys(): edge_props = graph_dict[(u, v)] if "weight" in edge_props.keys(): weight = edge_props["weight"] del edge_props["weight"] self.add_edge(mapping[u], mapping[v], weight=weight, edge_properties=edge_props) else: if strategy_params is None: strategy_params = {} strat = strategy(**strategy_params) graph = self.with_local_env_strategy(func_grp, strat) for (u, v) in list(graph.graph.edges()): edge_props = graph.graph.get_edge_data(u, v)[0] weight = None if "weight" in edge_props.keys(): weight = edge_props["weight"] del edge_props["weight"] if 0 not in list(graph.graph.nodes()): # If graph indices have different indexing u, v = (u - 1), (v - 1) self.add_edge(mapping[u], mapping[v], weight=weight, edge_properties=edge_props)
[docs] def replace_group(self, index, func_grp, strategy, bond_order=1, graph_dict=None, strategy_params=None): """ Builds off of Molecule.substitute and MoleculeGraph.substitute_group to replace a functional group in self.molecule with a functional group. This method also amends self.graph to incorporate the new functional group. TODO: Figure out how to replace into a ring structure. :param index: Index of atom to substitute. :param func_grp: Substituent molecule. There are three options: 1. Providing an actual molecule as the input. The first atom must be a DummySpecie X, indicating the position of nearest neighbor. The second atom must be the next nearest atom. For example, for a methyl group substitution, func_grp should be X-CH3, where X is the first site and C is the second site. What the code will do is to remove the index site, and connect the nearest neighbor to the C atom in CH3. The X-C bond indicates the directionality to connect the atoms. 2. A string name. The molecule will be obtained from the relevant template in func_groups.json. 3. A MoleculeGraph object. :param strategy: Class from pymatgen.analysis.local_env. :param bond_order: A specified bond order to calculate the bond length between the attached functional group and the nearest neighbor site. Defaults to 1. :param graph_dict: Dictionary representing the bonds of the functional group (format: {(u, v): props}, where props is a dictionary of properties, including weight. If None, then the algorithm will attempt to automatically determine bonds using one of a list of strategies defined in pymatgen.analysis.local_env. :param strategy_params: dictionary of keyword arguments for strategy. If None, default parameters will be used. :return: """ self.set_node_attributes() neighbors = self.get_connected_sites(index) # If the atom at index is terminal if len(neighbors) == 1: self.substitute_group(index, func_grp, strategy, bond_order=bond_order, graph_dict=graph_dict, strategy_params=strategy_params) else: rings = self.find_rings(including=[index]) if len(rings) != 0: raise RuntimeError("Currently functional group replacement" "cannot occur at an atom within a ring" "structure.") to_remove = set() sizes = dict() disconnected = self.graph.to_undirected() disconnected.remove_node(index) for neighbor in neighbors: sizes[neighbor[2]] = len(nx.descendants(disconnected, neighbor[2])) keep = max(sizes, key=lambda x: sizes[x]) for i in sizes.keys(): if i != keep: to_remove.add(i) self.remove_nodes(list(to_remove)) self.group = self.substitute_group(index, func_grp, strategy, bond_order=bond_order, graph_dict=graph_dict, strategy_params=strategy_params)
[docs] def find_rings(self, including=None): """ Find ring structures in the MoleculeGraph. :param including: list of site indices. If including is not None, then find_rings will only return those rings including the specified sites. By default, this parameter is None, and all rings will be returned. :return: dict {index:cycle}. Each entry will be a ring (cycle, in graph theory terms) including the index found in the Molecule. If there is no cycle including an index, the value will be an empty list. """ # Copies self.graph such that all edges (u, v) matched by edges (v, u) undirected = self.graph.to_undirected() directed = undirected.to_directed() cycles_nodes = [] cycles_edges = [] # Remove all two-edge cycles all_cycles = [c for c in nx.simple_cycles(directed) if len(c) > 2] # Using to_directed() will mean that each cycle always appears twice # So, we must also remove duplicates unique_sorted = [] unique_cycles = [] for cycle in all_cycles: if sorted(cycle) not in unique_sorted: unique_sorted.append(sorted(cycle)) unique_cycles.append(cycle) if including is None: cycles_nodes = unique_cycles else: for i in including: for cycle in unique_cycles: if i in cycle and cycle not in cycles_nodes: cycles_nodes.append(cycle) for cycle in cycles_nodes: edges = [] for i, e in enumerate(cycle): edges.append((cycle[i - 1], e)) cycles_edges.append(edges) return cycles_edges
[docs] def get_connected_sites(self, n): """ Returns a named tuple of neighbors of site n: periodic_site, jimage, index, weight. Index is the index of the corresponding site in the original structure, weight can be None if not defined. :param n: index of Site in Molecule :param jimage: lattice vector of site :return: list of ConnectedSite tuples, sorted by closest first """ connected_sites = set() out_edges = [(u, v, d) for u, v, d in self.graph.out_edges(n, data=True)] in_edges = [(u, v, d) for u, v, d in self.graph.in_edges(n, data=True)] for u, v, d in out_edges + in_edges: weight = d.get('weight', None) if v == n: site = self.molecule[u] dist = self.molecule[v].distance(self.molecule[u]) connected_site = ConnectedSite(site=site, jimage=(0, 0, 0), index=u, weight=weight, dist=dist) else: site = self.molecule[v] dist = self.molecule[u].distance(self.molecule[v]) connected_site = ConnectedSite(site=site, jimage=(0, 0, 0), index=v, weight=weight, dist=dist) connected_sites.add(connected_site) # return list sorted by closest sites first connected_sites = list(connected_sites) connected_sites.sort(key=lambda x: x.dist) return connected_sites
[docs] def get_coordination_of_site(self, n): """ Returns the number of neighbors of site n. In graph terms, simply returns degree of node corresponding to site n. :param n: index of site :return (int): """ number_of_self_loops = sum([1 for n, v in self.graph.edges(n) if n == v]) return self.graph.degree(n) - number_of_self_loops
[docs] def draw_graph_to_file(self, filename="graph", diff=None, hide_unconnected_nodes=False, hide_image_edges=True, edge_colors=False, node_labels=False, weight_labels=False, image_labels=False, color_scheme="VESTA", keep_dot=False, algo="fdp"): """ Draws graph using GraphViz. The networkx graph object itself can also be drawn with networkx's in-built graph drawing methods, but note that this might give misleading results for multigraphs (edges are super-imposed on each other). If visualization is difficult to interpret, `hide_image_edges` can help, especially in larger graphs. :param filename: filename to output, will detect filetype from extension (any graphviz filetype supported, such as pdf or png) :param diff (StructureGraph): an additional graph to compare with, will color edges red that do not exist in diff and edges green that are in diff graph but not in the reference graph :param hide_unconnected_nodes: if True, hide unconnected nodes :param hide_image_edges: if True, do not draw edges that go through periodic boundaries :param edge_colors (bool): if True, use node colors to color edges :param node_labels (bool): if True, label nodes with species and site index :param weight_labels (bool): if True, label edges with weights :param image_labels (bool): if True, label edges with their periodic images (usually only used for debugging, edges to periodic images always appear as dashed lines) :param color_scheme (str): "VESTA" or "JMOL" :param keep_dot (bool): keep GraphViz .dot file for later visualization :param algo: any graphviz algo, "neato" (for simple graphs) or "fdp" (for more crowded graphs) usually give good outputs :return: """ if not which(algo): raise RuntimeError("StructureGraph graph drawing requires " "GraphViz binaries to be in the path.") # Developer note: NetworkX also has methods for drawing # graphs using matplotlib, these also work here. However, # a dedicated tool like GraphViz allows for much easier # control over graph appearance and also correctly displays # mutli-graphs (matplotlib can superimpose multiple edges). g = self.graph.copy() g.graph = {'nodesep': 10.0, 'dpi': 300, 'overlap': "false"} # add display options for nodes for n in g.nodes(): # get label by species name label = "{}({})".format(str(self.molecule[n].specie), n) if node_labels else "" # use standard color scheme for nodes c = EL_COLORS[color_scheme].get(str(self.molecule[n].specie.symbol), [0, 0, 0]) # get contrasting font color # magic numbers account for perceived luminescence # https://stackoverflow.com/questions/1855884/determine-font-color-based-on-background-color fontcolor = '#000000' if 1 - (c[0] * 0.299 + c[1] * 0.587 + c[2] * 0.114) / 255 < 0.5 else '#ffffff' # convert color to hex string color = "#{:02x}{:02x}{:02x}".format(c[0], c[1], c[2]) g.add_node(n, fillcolor=color, fontcolor=fontcolor, label=label, fontname="Helvetica-bold", style="filled", shape="circle") edges_to_delete = [] # add display options for edges for u, v, k, d in g.edges(keys=True, data=True): # retrieve from/to images, set as origin if not defined if "to_image" in d: to_image = d['to_jimage'] else: to_image = (0, 0, 0) # set edge style d['style'] = "solid" if to_image != (0, 0, 0): d['style'] = "dashed" if hide_image_edges: edges_to_delete.append((u, v, k)) # don't show edge directions d['arrowhead'] = "none" # only add labels for images that are not the origin if image_labels: d['headlabel'] = "" if to_image == (0, 0, 0) else "to {}".format((to_image)) d['arrowhead'] = "normal" if d['headlabel'] else "none" # optionally color edges using node colors color_u = g.node[u]['fillcolor'] color_v = g.node[v]['fillcolor'] d['color_uv'] = "{};0.5:{};0.5".format(color_u, color_v) if edge_colors else "#000000" # optionally add weights to graph if weight_labels: units = g.graph.get('edge_weight_units', "") if d.get('weight'): d['label'] = "{:.2f} {}".format(d['weight'], units) # update edge with our new style attributes g.edges[u, v, k].update(d) # optionally remove periodic image edges, # these can be confusing due to periodic boundaries if hide_image_edges: for edge_to_delete in edges_to_delete: g.remove_edge(*edge_to_delete) # optionally hide unconnected nodes, # these can appear when removing periodic edges if hide_unconnected_nodes: g = g.subgraph([n for n in g.degree() if g.degree()[n] != 0]) # optionally highlight differences with another graph if diff: diff = self.diff(diff, strict=True) green_edges = [] red_edges = [] for u, v, k, d in g.edges(keys=True, data=True): if (u, v, d['to_jimage']) in diff['self']: # edge has been deleted red_edges.append((u, v, k)) elif (u, v, d['to_jimage']) in diff['other']: # edge has been added green_edges.append((u, v, k)) for u, v, k in green_edges: g.edges[u, v, k].update({'color_uv': '#00ff00'}) for u, v, k in red_edges: g.edges[u, v, k].update({'color_uv': '#ff0000'}) basename, extension = os.path.splitext(filename) extension = extension[1:] write_dot(g, basename + ".dot") with open(filename, "w") as f: args = [algo, "-T", extension, basename + ".dot"] rs = subprocess.Popen(args, stdout=f, stdin=subprocess.PIPE, close_fds=True) rs.communicate() if rs.returncode != 0: raise RuntimeError("{} exited with return code {}.".format(algo, rs.returncode)) if not keep_dot: os.remove(basename + ".dot")
[docs] def as_dict(self): """ As in :Class: `pymatgen.core.Molecule` except with using `to_dict_of_dicts` from NetworkX to store graph information. """ d = {"@module": self.__class__.__module__, "@class": self.__class__.__name__, "molecule": self.molecule.as_dict(), "graphs": json_graph.adjacency_data(self.graph)} return d
[docs] @classmethod def from_dict(cls, d): """ As in :Class: `pymatgen.core.Molecule` except restoring graphs using `from_dict_of_dicts` from NetworkX to restore graph information. """ m = Molecule.from_dict(d['molecule']) return cls(m, d['graphs'])
def _edges_to_string(self, g): header = "from to to_image " header_line = "---- ---- ------------" edge_weight_name = g.graph["edge_weight_name"] if edge_weight_name: print_weights = ["weight"] edge_label = g.graph["edge_weight_name"] edge_weight_units = g.graph["edge_weight_units"] if edge_weight_units: edge_label += " ({})".format(edge_weight_units) header += " {}".format(edge_label) header_line += " {}".format("-" * max([18, len(edge_label)])) else: print_weights = False s = header + "\n" + header_line + "\n" edges = list(g.edges(data=True)) # sort edges for consistent ordering edges.sort(key=itemgetter(0, 1)) if print_weights: for u, v, data in edges: s += "{:4} {:4} {:12} {:.3e}\n".format(u, v, str(data.get("to_jimage", (0, 0, 0))), data.get("weight", 0)) else: for u, v, data in edges: s += "{:4} {:4} {:12}\n".format(u, v, str(data.get("to_jimage", (0, 0, 0)))) return s def __str__(self): s = "Molecule Graph" s += "\nMolecule: \n{}".format(self.molecule.__str__()) s += "\nGraph: {}\n".format(self.name) s += self._edges_to_string(self.graph) return s def __repr__(self): s = "Molecule Graph" s += "\nMolecule: \n{}".format(self.molecule.__repr__()) s += "\nGraph: {}\n".format(self.name) s += self._edges_to_string(self.graph) return s def __len__(self): """ :return: length of Molecule / number of nodes in graph """ return len(self.molecule)
[docs] def sort(self, key=None, reverse=False): """ Same as Molecule.sort(), also remaps nodes in graph. :param key: :param reverse: :return: """ old_molecule = self.molecule.copy() # sort Molecule self.molecule._sites = sorted(self.molecule._sites, key=key, reverse=reverse) # apply Molecule ordering to graph mapping = {idx: self.molecule.index(site) for idx, site in enumerate(old_molecule)} self.graph = nx.relabel_nodes(self.graph, mapping, copy=True) # normalize directions of edges edges_to_remove = [] edges_to_add = [] for u, v, k, d in self.graph.edges(keys=True, data=True): if v < u: new_v, new_u, new_d = u, v, d.copy() new_d['to_jimage'] = (0, 0, 0) edges_to_remove.append((u, v, k)) edges_to_add.append((new_u, new_v, new_d)) # add/delete marked edges for edges_to_remove in edges_to_remove: self.graph.remove_edge(*edges_to_remove) for (u, v, d) in edges_to_add: self.graph.add_edge(u, v, **d)
def __copy__(self): return MoleculeGraph.from_dict(self.as_dict()) def __eq__(self, other): """ Two MoleculeGraphs are equal if they have equal Molecules, and have the same edges between Sites. Edge weights can be different and MoleculeGraphs can still be considered equal. :param other: MoleculeGraph :return (bool): """ # sort for consistent node indices # PeriodicSite should have a proper __hash__() value, # using its frac_coords as a convenient key try: mapping = {tuple(site.coords): self.molecule.index(site) for site in other.molecule} except ValueError: return False other_sorted = other.__copy__() other_sorted.sort(key=lambda site: mapping[tuple(site.coords)]) edges = {(u, v) for u, v, d in self.graph.edges(keys=False, data=True)} edges_other = {(u, v) for u, v, d in other_sorted.graph.edges(keys=False, data=True)} return (edges == edges_other) and \ (self.molecule == other_sorted.molecule)
[docs] def isomorphic_to(self, other): """ Checks if the graphs of two MoleculeGraphs are isomorphic to one another. In order to prevent problems with misdirected edges, both graphs are converted into undirected nx.Graph objects. :param other: MoleculeGraph object to be compared. :return: bool """ if len(self.molecule) != len(other.molecule): return False elif self.molecule.composition.alphabetical_formula != other.molecule.composition.alphabetical_formula: return False elif len(self.graph.edges()) != len(other.graph.edges()): return False else: return _isomorphic(self.graph, other.graph)
[docs] def diff(self, other, strict=True): """ Compares two MoleculeGraphs. Returns dict with keys 'self', 'other', 'both' with edges that are present in only one MoleculeGraph ('self' and 'other'), and edges that are present in both. The Jaccard distance is a simple measure of the dissimilarity between two MoleculeGraphs (ignoring edge weights), and is defined by 1 - (size of the intersection / size of the union) of the sets of edges. This is returned with key 'dist'. Important note: all node indices are in terms of the MoleculeGraph this method is called from, not the 'other' MoleculeGraph: there is no guarantee the node indices will be the same if the underlying Molecules are ordered differently. :param other: MoleculeGraph :param strict: if False, will compare bonds from different Molecules, with node indices replaced by Specie strings, will not count number of occurrences of bonds :return: """ if self.molecule != other.molecule and strict: return ValueError("Meaningless to compare MoleculeGraphs if " "corresponding Molecules are different.") if strict: # sort for consistent node indices # PeriodicSite should have a proper __hash__() value, # using its frac_coords as a convenient key mapping = {tuple(site.frac_coords): self.molecule.index(site) for site in other.molecule} other_sorted = other.__copy__() other_sorted.sort(key=lambda site: mapping[tuple(site.frac_coords)]) edges = {(u, v, d.get('to_jimage', (0, 0, 0))) for u, v, d in self.graph.edges(keys=False, data=True)} edges_other = {(u, v, d.get('to_jimage', (0, 0, 0))) for u, v, d in other_sorted.graph.edges(keys=False, data=True)} else: edges = {(str(self.molecule[u].specie), str(self.molecule[v].specie)) for u, v, d in self.graph.edges(keys=False, data=True)} edges_other = {(str(other.structure[u].specie), str(other.structure[v].specie)) for u, v, d in other.graph.edges(keys=False, data=True)} if len(edges) == 0 and len(edges_other) == 0: jaccard_dist = 0 # by definition else: jaccard_dist = 1 - len(edges.intersection(edges_other)) / len(edges.union(edges_other)) return { 'self': edges - edges_other, 'other': edges_other - edges, 'both': edges.intersection(edges_other), 'dist': jaccard_dist }