Source code for schrodinger.application.matsci.graph
"""
Module containing methods applied on networkx graph
Copyright Schrodinger, LLC. All rights reserved.
"""
import networkx
from itertools import combinations
BFS_SEARCH = networkx.algorithms.traversal.breadth_first_search
MAX_CHECKS = 10000
[docs]def get_sub_graphs(graph):
"""
Generator of the disconnect graphs in the passed graph.
:type graph: 'networkx.classes.graph.Graph'
:param graph: The graph to find subgraphs in
:rtype: 'networkx.classes.graph.Graph'
:return: subgraph of the graph
"""
for gindex in networkx.connected_components(graph):
yield graph.subgraph(gindex).copy()
[docs]def get_fused_cycles(graph):
"""
Return cycles (fused and non-fused) in the graph
:type graph: 'networkx.classes.graph.Graph'
:param graph: The graph to find cycles in
:rtype: list(set)
:return: list of sets of nodes that make each cycle
"""
# Get simple cycles
cycles = list(set(x) for x in networkx.cycle_basis(graph))
# No cycles found
if not cycles:
return cycles
# Fuse cycles
while True:
for cid1, cid2 in combinations(range(len(cycles)), 2):
if not cycles[cid1].isdisjoint(cycles[cid2]):
fused_ring = cycles[cid1].union(cycles[cid2])
new_rings = [
cycles[x]
for x in range(len(cycles))
if x not in [cid1, cid2]
]
new_rings.append(fused_ring)
cycles = new_rings
break
else:
break
return cycles
def _fix_ring_path(graph, backbone_path):
"""
Backbone path can have degenerate results if cycle nodes are part of it.
This method fixes the issue by selecting the path with lowest indexes
:type graph: 'networkx.classes.graph.Graph'
:param graph: The graph to fix the ring path in
:type backbone_path: list
:param backbone_path: list of nodes in the longest path in the graph, with
no preference in case of nodes that are path of the cycle
:rtype: list
:return: list of nodes in the longest path in the graph. Between the two end
node, the node with lower index is considered as the first element of
the list and the other as the last. If cycles nodes (rings) are part of
the path then among the degenerate paths, path containing lower indexes
(sorted) is selected.
"""
cycles = get_fused_cycles(graph)
if not cycles:
return backbone_path
# Create cycle associations for faster checks
node_to_cid = {}
for cid, cycle in enumerate(cycles, 1):
for cycle_node in cycle:
node_to_cid[cycle_node] = cid
# Generate new sorted backbone path from current backbone path
sorted_path = []
ring_path = []
last_ring = None
for node in backbone_path:
current_ring = node_to_cid.get(node, None)
if not last_ring and not current_ring:
# Add to the sorted path when last atom and current atom is linear
sorted_path.append(node)
elif last_ring == current_ring or (not last_ring and current_ring):
# Add to ring path when last atom and current atom are part of
# same ring or when a new ring start
ring_path.append(node)
elif last_ring != current_ring and ring_path:
# When last atom and current atom are not of same ring, find the
# local sorted path for the ring path and add to the sorted path
all_ring_paths = networkx.all_shortest_paths(
graph, source=ring_path[0], target=ring_path[-1])
ring_path = sorted(all_ring_paths)[0]
sorted_path.extend(ring_path)
if current_ring:
# This condition should not occur for fused rings
raise ValueError('Cycle nodes not properly fused.')
else:
# Reset ring path and add the current node to the sorted path
sorted_path.append(node)
ring_path = []
else:
# This should never happen, but good to have a check for it
raise ValueError(f'Node {node} does not meet any conditions.')
last_ring = current_ring
return sorted_path
[docs]def find_backbone(graph):
"""
Find the shortest path between atoms that are furthest apart
:type graph: 'networkx.classes.graph.Graph'
:param graph: The graph to find longest path in
:rtype: list
:return: list of nodes in the longest path in the graph. Between the two end
node, the node with lower index is considered as the first element of
the list and the other as the last. In case of degeneracy due to cycles
nodes in the path, the shortest path containing lowest indexes is
selected.
"""
# Get all nodes
nodes = sorted(graph.nodes())
# Return empty path if backbone does not have atleast 2 atoms
if len(nodes) < 2:
return []
# First select lowest index node in the graph and find the
# farthest node from it using bfs. This will be one of the end nodes
# then find the other end node by finding the farthest node from the
# first end node using bfs. The method is described in
# "https://stackoverflow.com/questions/20010472/proof-of-correctness-
# algorithm-for-diameter-of-a-tree-in-graph-theory"
source_node = nodes[0]
end_nodes = []
for _ in range(2):
# Transverse the graph using breadth first search, and get the ordered
# dict where the key is the source node and item is the list of next
# nodes
bfs_nodes_dict = BFS_SEARCH.bfs_successors(graph, source=source_node)
# Convert the bfs nodes dict to list to get the last key source node
# and its values as end_targets
end_source, end_targets = list(bfs_nodes_dict)[-1]
# Get the last node, with largest atom index
source_node = sorted(end_targets)[-1]
end_nodes.append(source_node)
# Sort end atoms indexes to make backbone path always go from
# lower to higher node
end_nodes.sort()
# Get the shortest path with lowest indexes. Networkx does not handle
# degeneracy due to cycles properly, so using all_shortest_paths to compare
# each path
backbone_path = []
for count, path in enumerate(
networkx.all_shortest_paths(
graph, source=end_nodes[0], target=end_nodes[1])):
# The number of paths increase exponentially with number of rings, at
# this point its better to fix the degeneracy locally
if count >= MAX_CHECKS:
backbone_path = _fix_ring_path(graph, backbone_path)
break
# Update if current path is less than backbone path, if yes update
# it as backbone path. The less than operator on path (list type) checks
# for sorting (https://docs.python.org/3/tutorial/datastructures.html
# #comparing-sequences-and-other-types). Also update current path as
# backbone path if backbone path is empty
if not backbone_path or path < backbone_path:
backbone_path = path
return backbone_path
[docs]def find_segments(graph):
"""
Find the segments in the input graph. Segments are path containing nodes
where no node appears in a segment twice. Longest segments are always
created first and then recursively smaller ones.
:type graph: 'networkx.classes.graph.Graph'
:param graph: The graph to find segments in
:rtype: list(list)
:return: list of list of nodes. Each sublist is a list of ordered nodes,
starting from lower index of the two end nodes, forming the longest
path graph not containing the nodes already considered.
"""
segments = []
# Loop over all disconnected graphs
for sub_graph in get_sub_graphs(graph):
# Search for segment only if graph contains more than 1 node
if len(set(sub_graph.nodes())) > 1:
# Find the backbone for the graph, do not add to segment if no
# backbone is found
backbone = find_backbone(sub_graph)
# Check if backbone was found and continue to next graph it was not
if not backbone:
continue
# Add the backbone to segments
segments.append(backbone)
# Create a copy of graph with backbone removed
new_graph = sub_graph.copy()
for backbone_node in backbone:
new_graph.remove_node(backbone_node)
# Find child segments of the backbone removed graph recursively
child_segments = find_segments(new_graph)
for child_chain in child_segments:
# Check if there were any segments (backbones) in the child
if child_chain:
segments.append(child_chain)
return segments