patternpythonMinor
Removing graph nodes of a specific type and with exactly two neighbors
Viewed 0 times
nodesgraphremovingwithtypetwospecificandexactlyneighbors
Problem
I am working with an undirected, weighted graph. It contains two types of nodes, called "A-type" and "B-type". I am starting with a
(string: Name of First Node, string: Name of Second Node, float: Weight)
Example graph:
My goal is to reduce the graph by removing any nodes of B-type that have exactly two neighbors, and their associated edges, and replacing them with a single edge that has a weight equal to the sum of the weights of removed edges. The reduced version of the example graph would look like this:
And printing the set of edges after reduction would produce this:
{('a1', 'a2', 1.1),
('a1', 'a4', 1.0),
('a1', 'a5', 1.1),
('a2', 'a3', 0.4),
('a3', 'a4', 2.3),
('a3', 'b5', 1.6),
('a3', 'b6', 3.0),
('a4', 'a5', 1.2),
('a4', 'b5', 1.7),
('a5', 'b5', 1.8)}
Note that...
```
import re
import pprint
import operator
import collections
from itertools import chain
def series_reduction(edge_set):
"""
Reduces a set of graph edges by eliminating B-type nodes with two neighbors
Nodes of type B are extracted from the nodes in the set of edges, and become
candidates for removal if they have only two neighbors. The set of removal
candidates is iterated over, and each node it contains becomes the target
node. The target node's edges are removed and replaced with a new edge that
connects the target node's neighbors to each other. The weight of the new
edge is equal to the sum of the removed edges' weights.
@param edge_set: graph edges to perform series reduction on
@type edge_set: set
@rtype: set
"""
neighbor_counter = collections.defaultdict(int)
set of tuples of the following form that describe the graph edges:(string: Name of First Node, string: Name of Second Node, float: Weight)
Example graph:
My goal is to reduce the graph by removing any nodes of B-type that have exactly two neighbors, and their associated edges, and replacing them with a single edge that has a weight equal to the sum of the weights of removed edges. The reduced version of the example graph would look like this:
And printing the set of edges after reduction would produce this:
{('a1', 'a2', 1.1),
('a1', 'a4', 1.0),
('a1', 'a5', 1.1),
('a2', 'a3', 0.4),
('a3', 'a4', 2.3),
('a3', 'b5', 1.6),
('a3', 'b6', 3.0),
('a4', 'a5', 1.2),
('a4', 'b5', 1.7),
('a5', 'b5', 1.8)}
Note that...
- Nodes B6 and B5 were not removed, even though they are B-type nodes, because they do not have exactly two neighbors.
- A2 was not removed because it is not a B-type node
- The weights of the new edges are the sum of the weights of the removed edges.
```
import re
import pprint
import operator
import collections
from itertools import chain
def series_reduction(edge_set):
"""
Reduces a set of graph edges by eliminating B-type nodes with two neighbors
Nodes of type B are extracted from the nodes in the set of edges, and become
candidates for removal if they have only two neighbors. The set of removal
candidates is iterated over, and each node it contains becomes the target
node. The target node's edges are removed and replaced with a new edge that
connects the target node's neighbors to each other. The weight of the new
edge is equal to the sum of the removed edges' weights.
@param edge_set: graph edges to perform series reduction on
@type edge_set: set
@rtype: set
"""
neighbor_counter = collections.defaultdict(int)
Solution
- Data structure
If you look at the Wikipedia article on graph representations, you'll see that it describes three common data structures that are used to represent graphs: adjacency list, adjacency matrix, and incidence matrix. Notice that the representation you've chosen (set of edges) does not appear. This is because:
-
The set-of-edges representation can't represent all graphs! If there's an isolated node (a node with no edges) there's no way to represent it.
-
The set-of-edges representation doesn't support efficient implementations of the operations that are needed by graph algorithms. For example, it is a hard problem just to iterate over the nodes. You use:
for node in chain.from_iterable(
(node1, node2)
for node1, node2, w in edge_set):but nodes appear in this iteration multiple times (once for each edge incident to the node), so you have to de-duplicate the nodes by storing them in a set.
So the first priority is to transform your graph representation into a more convenient data structure. We could use a package like python-graph, but it's not hard to write your own. Here's a graph implementation using the adjacency list representation:
from collections import defaultdict
class Graph:
"""An undirected weighted graph."""
def __init__(self):
# Map from node to set of its adjacent nodes.
self._graph = defaultdict(set)
# Map from sorted tuple of two nodes to the weight of the edge
# between them.
self._weight = dict()
def has_node(self, n):
"""Return True iff the graph contains the node n."""
return n in self._graph
def nodes(self):
"""Return an iterator over the nodes of the graph."""
return iter(self._graph)
def node_count(self):
"""Return the number of nodes of the graph."""
return len(self._graph)
def neighbours(self, n):
"""Return an iterator over the neighbours of the node n."""
return iter(self._graph[n])
def neighbour_count(self, n):
"""Return the number of neighbours of the node n."""
return len(self._graph[n])
def add_node(self, n):
"""Add the node n."""
self._graph[n]
def remove_node(self, n):
"""Remove the node n and all incident edges."""
for n1 in self.neighbours(n):
if n1 != n:
self._graph[n1].remove(n)
del self._weight[self._edge(n, n1)]
del self._graph[n]
def _edge(self, n1, n2):
"""Return the representation of the edge between nodes n1, n2."""
return (n1, n2) if n1 <= n2 else (n2, n1)
def has_edge(self, n1, n2):
"""Return True iff the graph contains an edge between nodes n1, n2."""
return self._edge(n1, n2) in self._weight
def edges(self):
"""Return an iterator over the edges of the graph."""
for n, w in self._weight.items():
yield n + (w,)
def edge_count(self):
"""Return the number of edges of the graph."""
return len(self._weight)
def edge_weight(self, n1, n2):
"""Return the weight of the edge between n1 and n2."""
return self._weight[self._edge(n1, n2)]
def add_edge(self, n1, n2, w):
"""Add an edge between nodes n1 and n2 with weight w."""
self._graph[n1].add(n2)
self._graph[n2].add(n1)
self._weight[self._edge(n1, n2)] = w
def remove_edge(self, n1, n2):
"""Remove the edge between nodes n1 and n2."""
self._graph[n1].remove(n2)
if n1 != n2:
self._graph[n2].remove(n1)
del self._weight[self._edge(n1, n2)]Using this data structure it is easy to implement the algorithm:
def series_reduction(edge_set):
# Build graph from edge set.
g = Graph()
for e in edge_set:
g.add_edge(*e)
# Remove B-type nodes with exactly two neighbours.
btype_nodes = [n for n in g.nodes() if n.startswith('b')]
for n in btype_nodes:
if g.neighbour_count(n) == 2:
n1, n2 = g.neighbours(n)
if g.has_edge(n1, n2):
pass # see §2.2 below
else:
g.add_edge(n1, n2, g.weight(n, n1) + g.weight(n, n2))
g.remove_node(n)
# Return modified edge set.
return set(g.edges())The code would be even simpler if you were able to use the adjacency list representation throughout your program: then you'd be able to drop the conversion steps.
- Other review comments
-
Instead of:
re.search('\Ab', node):write:
node.startswith('b')-
series_reduction goes wrong if, when removing a B-type node between two nodes \$a\$ and \$c\$, there was already an edge between \$a\$ and \$c\$:>>> series_reduction({('a', 'b', 1), ('b', 'c', 1), ('a', 'c', 1)})
{('a', 'c', 1), ('a', 'c', 2)}The result is not a valid graph: it has two edges between the same pair of nodes.
You'll see that my revised c
Code Snippets
for node in chain.from_iterable(
(node1, node2)
for node1, node2, w in edge_set):from collections import defaultdict
class Graph:
"""An undirected weighted graph."""
def __init__(self):
# Map from node to set of its adjacent nodes.
self._graph = defaultdict(set)
# Map from sorted tuple of two nodes to the weight of the edge
# between them.
self._weight = dict()
def has_node(self, n):
"""Return True iff the graph contains the node n."""
return n in self._graph
def nodes(self):
"""Return an iterator over the nodes of the graph."""
return iter(self._graph)
def node_count(self):
"""Return the number of nodes of the graph."""
return len(self._graph)
def neighbours(self, n):
"""Return an iterator over the neighbours of the node n."""
return iter(self._graph[n])
def neighbour_count(self, n):
"""Return the number of neighbours of the node n."""
return len(self._graph[n])
def add_node(self, n):
"""Add the node n."""
self._graph[n]
def remove_node(self, n):
"""Remove the node n and all incident edges."""
for n1 in self.neighbours(n):
if n1 != n:
self._graph[n1].remove(n)
del self._weight[self._edge(n, n1)]
del self._graph[n]
def _edge(self, n1, n2):
"""Return the representation of the edge between nodes n1, n2."""
return (n1, n2) if n1 <= n2 else (n2, n1)
def has_edge(self, n1, n2):
"""Return True iff the graph contains an edge between nodes n1, n2."""
return self._edge(n1, n2) in self._weight
def edges(self):
"""Return an iterator over the edges of the graph."""
for n, w in self._weight.items():
yield n + (w,)
def edge_count(self):
"""Return the number of edges of the graph."""
return len(self._weight)
def edge_weight(self, n1, n2):
"""Return the weight of the edge between n1 and n2."""
return self._weight[self._edge(n1, n2)]
def add_edge(self, n1, n2, w):
"""Add an edge between nodes n1 and n2 with weight w."""
self._graph[n1].add(n2)
self._graph[n2].add(n1)
self._weight[self._edge(n1, n2)] = w
def remove_edge(self, n1, n2):
"""Remove the edge between nodes n1 and n2."""
self._graph[n1].remove(n2)
if n1 != n2:
self._graph[n2].remove(n1)
del self._weight[self._edge(n1, n2)]def series_reduction(edge_set):
# Build graph from edge set.
g = Graph()
for e in edge_set:
g.add_edge(*e)
# Remove B-type nodes with exactly two neighbours.
btype_nodes = [n for n in g.nodes() if n.startswith('b')]
for n in btype_nodes:
if g.neighbour_count(n) == 2:
n1, n2 = g.neighbours(n)
if g.has_edge(n1, n2):
pass # see §2.2 below
else:
g.add_edge(n1, n2, g.weight(n, n1) + g.weight(n, n2))
g.remove_node(n)
# Return modified edge set.
return set(g.edges())re.search('\Ab', node):node.startswith('b')Context
StackExchange Code Review Q#81880, answer score: 6
Revisions (0)
No revisions yet.