patternpythonMinor
Basic graph traversals
Viewed 0 times
traversalsgraphbasic
Problem
Below is an implementation of two graph traversals. Graph constructor creates a random graph with a fixed number of edges (fixed as a proportion of the maximum number of vertices,
Any comments welcome, but especially data structure and algorithm comments. Tests are not implemented properly, but it is ok in this case.
```
import random
class Graph:
def __init__(self, number_nodes, max_number_edges = 0.5):
if (max_number_edges > 1) or (max_number_edges (max_number_edges * (number_nodes - key - 1)):
index_to_remove = random.randint(0, len(value)-1)
del value[index_to_remove]
def Print(self):
for key, value in self.nodes_connections.iteritems():
print key, value
def IterateInBreadth(self, start_node_id):
visited_nodes_ids = []
scheduled_nodes_ids = [start_node_id]
while scheduled_nodes_ids:
node_id = scheduled_nodes_ids[0]
new_scheduled_nodes_ids = [node for node in self.nodes_connections[node_id]
if not (node in visited_nodes_ids or node in scheduled_nodes_ids)]
new_scheduled_nodes_ids += [key for key, value in self.nodes_connections.iteritems()
if (node_id in value) and not (key in visited_nodes_ids or key in scheduled_nodes_ids)]
scheduled_nodes_ids += new_scheduled_nodes_ids
visited_nodes_ids.append(node_id)
del scheduled_nodes_ids[0]
return visited_nodes_ids
def __IterateInDepth(self, start_node_id, visited_nodes):
visited_nodes.append(start_node_id)
for key, value in self.nodes_connections.iteritems():
if start_node_id in value:
if key not in visited_nodes:
self.__IterateInDepth(start_node_id = key, visited_nodes = visited_nodes)
for value in self.nodes_connections[start_node_id]:
if value
number_nodes-1).Any comments welcome, but especially data structure and algorithm comments. Tests are not implemented properly, but it is ok in this case.
```
import random
class Graph:
def __init__(self, number_nodes, max_number_edges = 0.5):
if (max_number_edges > 1) or (max_number_edges (max_number_edges * (number_nodes - key - 1)):
index_to_remove = random.randint(0, len(value)-1)
del value[index_to_remove]
def Print(self):
for key, value in self.nodes_connections.iteritems():
print key, value
def IterateInBreadth(self, start_node_id):
visited_nodes_ids = []
scheduled_nodes_ids = [start_node_id]
while scheduled_nodes_ids:
node_id = scheduled_nodes_ids[0]
new_scheduled_nodes_ids = [node for node in self.nodes_connections[node_id]
if not (node in visited_nodes_ids or node in scheduled_nodes_ids)]
new_scheduled_nodes_ids += [key for key, value in self.nodes_connections.iteritems()
if (node_id in value) and not (key in visited_nodes_ids or key in scheduled_nodes_ids)]
scheduled_nodes_ids += new_scheduled_nodes_ids
visited_nodes_ids.append(node_id)
del scheduled_nodes_ids[0]
return visited_nodes_ids
def __IterateInDepth(self, start_node_id, visited_nodes):
visited_nodes.append(start_node_id)
for key, value in self.nodes_connections.iteritems():
if start_node_id in value:
if key not in visited_nodes:
self.__IterateInDepth(start_node_id = key, visited_nodes = visited_nodes)
for value in self.nodes_connections[start_node_id]:
if value
Solution
- It's not very relevant for traversal, but a dict-of-sets may be a better overall structure than a dict-of-lists, as it would let you check for edge existence between two random nodes in amortized constant time, as opposed to worst case linear on the number of edges.
- Using sets instead of lists for things like
visited_nodesis bound to have a significant impact on performance for larger graphs though.
- Another point to consider, since you are building an iterator, is to make your function return, well, an iterator of course! You can always materialize them calling
liston the return, but for traversals of huge graphs it may spare you a generous amount of memory.
I'm going to skip the making it a class part, but with
graph being a dict-of-iterables, and the above points in mind, you could implement depth-first iteration as:def _dfs_iter(graph, node, visited_nodes):
if node in visited_nodes:
raise StopIteration
visited_nodes.add(node)
yield node
for next_node in graph[node]:
for n in _dfs_iter(graph, next_node, visited_nodes):
yield n
def dfs_iter(graph, node):
assert node in graph
visited_nodes = set()
return _dfs_iter(graph, node, visited_nodes)- There is no native Python data structure really well suited to create a FIFO queue. There is
dequeincollections, but it is a little obscure language feature, and also not ideally suited, so I'm going to pass on using it. Your implementation using a list and removing items from the front will lead to a terrible worse case performance, probably quadratic in the number of nodes. At the cost of not releasing the memory early, I think it is better to never remove items from the queue, and use an indexing pointer.
With the same caveats as before, you could do breadth-first iteration as:
def bfs_iter(graph, node):
assert node in graph
fifo_index = 0
fifo_queue = [node]
scheduled_nodes = set(fifo_queue)
while fifo_index < len(fifo_queue):
node = fifo_queue[fifo_index]
fifo_index += 1
fifo_queue.extend([n for n in graph[node] if n not in scheduled_nodes])
scheduled_nodes.update(graph[node])
yield node- Note that I'm keeping duplicate accounting on scheduled_nodes, both in
fifo_queueandscheduled_nodes. This is to have the ordering of the FIFO queue and the fast membership check of a hash table. You could get rid ofscheduled_nodesby checking againstmembershipinfifo_queue, but you will again get quadratic performance, not a good thing.
A simple test on this graph:
6 - 0 - 1
| / | \ |
5 4 - 3 - 2 - 7seems to yield correct results:
if __name__ == '__main__':
graph = {0: set([1, 4, 5, 6,]),
1: set([0, 3,]),
2: set([3, 7]),
3: set([0, 1, 2, 3, 4,]),
4: set([0, 3,]),
5: set([0, 6,]),
6: set([0, 5,]),
7: set([2,]),
}
assert list(dfs_iter(graph, 0)) == [0, 1, 3, 2, 7, 4, 5, 6]
assert list(bfs_iter(graph, 0)) == [0, 1, 4, 5, 6, 3, 2, 7]Note that, since we are using sets, the exact order of iteration over the connected nodes is implementation dependent, so a failure form the above tests doesn't necessarily mean that something is broken in the algorithm.
Code Snippets
def _dfs_iter(graph, node, visited_nodes):
if node in visited_nodes:
raise StopIteration
visited_nodes.add(node)
yield node
for next_node in graph[node]:
for n in _dfs_iter(graph, next_node, visited_nodes):
yield n
def dfs_iter(graph, node):
assert node in graph
visited_nodes = set()
return _dfs_iter(graph, node, visited_nodes)def bfs_iter(graph, node):
assert node in graph
fifo_index = 0
fifo_queue = [node]
scheduled_nodes = set(fifo_queue)
while fifo_index < len(fifo_queue):
node = fifo_queue[fifo_index]
fifo_index += 1
fifo_queue.extend([n for n in graph[node] if n not in scheduled_nodes])
scheduled_nodes.update(graph[node])
yield node6 - 0 - 1
| / | \ |
5 4 - 3 - 2 - 7if __name__ == '__main__':
graph = {0: set([1, 4, 5, 6,]),
1: set([0, 3,]),
2: set([3, 7]),
3: set([0, 1, 2, 3, 4,]),
4: set([0, 3,]),
5: set([0, 6,]),
6: set([0, 5,]),
7: set([2,]),
}
assert list(dfs_iter(graph, 0)) == [0, 1, 3, 2, 7, 4, 5, 6]
assert list(bfs_iter(graph, 0)) == [0, 1, 4, 5, 6, 3, 2, 7]Context
StackExchange Code Review Q#96500, answer score: 4
Revisions (0)
No revisions yet.