patternpythonMinor
Independent cascade modelling in Python
Viewed 0 times
pythoncascademodellingindependent
Problem
I'm trying to study network diffusion. I took the code form this. However, I would like to see if there's any potential to optimize the code to be faster and clearer. This is what I'm trying to do.
In a node, get all the successors and loop through to get the property in an edge to see if the probability is more than random number from (0,1). If yes, then that node should be activated, if no then move on. I use iGraph library for any graph manipulation.
```
def independent_cascade_igraph(G, seeds, steps=0):
# init activation probabilities
for e in G.es():
if 'act_prob' not in e.attributes():
e['act_prob'] = 0.1
elif e['act_prob'] > 1:
raise Exception("edge activation probability:", e['act_prob'], "cannot be larger than 1")
# perform diffusion
A = copy.deepcopy(seeds) # prevent side effect
if steps 0 and len(A) < G.vcount():
len_old = len(A)
(A, activated_nodes_of_this_round, cur_tried_edges) = _diffuse_one_round(G, A, tried_edges)
layer_i_nodes.append(activated_nodes_of_this_round)
tried_edges = tried_edges.union(cur_tried_edges)
if len(A) == len_old:
break
steps -= 1
return layer_i_nodes
def _diffuse_one_round(G, A, tried_edges):
activated_nodes_of_this_round = set()
cur_tried_edges = set()
for s in A:
for nb in G.successors(s):
if nb in A or (s, nb) in tried_edges or (s, nb) in cur_tried_edges:
continue
if _prop_success(G, s, nb):
activated_nodes_of_this_round.add(nb)
cur_tried_edges.add((s, nb))
activated_nodes_of_this_round = list(activated_nodes_of_this_round)
A.extend(activated_nodes_of_this_round)
return A, activated_nodes_of_this_round, cur_tried_edges
def _prop_success(G, src, dest):
'''
act_prob = 0.1
for e in G.es():
if (src, dest) == e.tuple:
act_prob = e['act_prob']
break
'''
In a node, get all the successors and loop through to get the property in an edge to see if the probability is more than random number from (0,1). If yes, then that node should be activated, if no then move on. I use iGraph library for any graph manipulation.
```
def independent_cascade_igraph(G, seeds, steps=0):
# init activation probabilities
for e in G.es():
if 'act_prob' not in e.attributes():
e['act_prob'] = 0.1
elif e['act_prob'] > 1:
raise Exception("edge activation probability:", e['act_prob'], "cannot be larger than 1")
# perform diffusion
A = copy.deepcopy(seeds) # prevent side effect
if steps 0 and len(A) < G.vcount():
len_old = len(A)
(A, activated_nodes_of_this_round, cur_tried_edges) = _diffuse_one_round(G, A, tried_edges)
layer_i_nodes.append(activated_nodes_of_this_round)
tried_edges = tried_edges.union(cur_tried_edges)
if len(A) == len_old:
break
steps -= 1
return layer_i_nodes
def _diffuse_one_round(G, A, tried_edges):
activated_nodes_of_this_round = set()
cur_tried_edges = set()
for s in A:
for nb in G.successors(s):
if nb in A or (s, nb) in tried_edges or (s, nb) in cur_tried_edges:
continue
if _prop_success(G, s, nb):
activated_nodes_of_this_round.add(nb)
cur_tried_edges.add((s, nb))
activated_nodes_of_this_round = list(activated_nodes_of_this_round)
A.extend(activated_nodes_of_this_round)
return A, activated_nodes_of_this_round, cur_tried_edges
def _prop_success(G, src, dest):
'''
act_prob = 0.1
for e in G.es():
if (src, dest) == e.tuple:
act_prob = e['act_prob']
break
'''
Solution
To start with, I'd add the activation probability as a parameter of the
However I recommend you define your own
The copy of the seeds can be simplified since it represent vertices which are always integers:
The two functions for the diffusion hold the same logic, only the stopping condition differ. DRY:
Updating the
You don't need to return
I'm going to assume that you actually what to retrieve the attribute here, orherwise just remove this function, all the attribute handling at the beginning and use
You can then test with:
Note however that the very purpose of
I also renamed some variables to make the code more explicit, which you should do more often.
independant_cascade_igraph function. That way you are assured that the attribute is defined on the edges and can also manage default value. Thus you are not required to know the name of the edge attribute in advance to use your function:def independent_cascade_igraph(G, seeds, probs=0.1, steps=0):
# init activation probabilities
if (type(probs) in (int, float) and (0 <= probs <= 1)) or all(0 <= p <= 1 for p in probs):
G.es['act_prob'] = probs
else:
raise Exception("Found probability outside of [0, 1]")However I recommend you define your own
Exception so further handling is better performed.The copy of the seeds can be simplified since it represent vertices which are always integers:
# perform diffusion
A = seeds[:] # prevent side effectThe two functions for the diffusion hold the same logic, only the stopping condition differ. DRY:
if steps = vertices:
break
return layer_i_nodesUpdating the
set is more idiomatic.You don't need to return
A from _diffuse_one_round since it is modified in place:def _diffuse_one_round(G, A, tried_edges):
cur_tried_edges = set((s, nb) for s in A for nb in G.successors(s)) - tried_edges
activated_nodes = [node for src,node in cur_tried_edges if node not in A and _prop_success(G, src, node)]
A.extend(set(activated_nodes))
return activated_nodes, cur_tried_edgesI'm going to assume that you actually what to retrieve the attribute here, orherwise just remove this function, all the attribute handling at the beginning and use
random.random() <= 0.1 in the list comprehension:def _prop_success(G, src, dest, r=random.random): # Faster access to random
id = G.get_eid(src, dest)
return r() <= G.es[id]['act_prob']You can then test with:
test_g = Graph([(1,2), (2,1), (1,3), (3,1), (2,3), (2,4), (3,4), (3,5), (4,5), (5,6), (6,5), (6,4), (6,2)], directed=True)
independent_cascade_igraph(test_g, [1], [.5, .5, .2, .2, .3, .5, .1, .2, .2, .6, .6, .3, .4])Note however that the very purpose of
_diffuse_one_round, its gained simplicity and the fact that it has the non-trivial side effect of modifying A in place, all play in favor of merging it in the while loop of _diffuse instead of keeping it:def _diffuse(G, active_nodes, steps=None):
vertices = G.vcount()
visited_edges = set()
activation_history = [active_nodes[:]]
previous_len = None
while previous_len != len(active_nodes) and (step is None or step):
previous_len = len(active_nodes)
current_edges = set((begin, end) for begin in active_nodes for end in G.successors(begin)) - visited_edges
activated_nodes = [node for src,node in current_edges if node not in A and _prop_success(G, src, node)]
active_nodes.extend(set(activated_nodes))
activation_history.append(activated_nodes)
visited_edges |= current_edges
if steps is not None:
steps -= 1
return activation_historyI also renamed some variables to make the code more explicit, which you should do more often.
Code Snippets
def independent_cascade_igraph(G, seeds, probs=0.1, steps=0):
# init activation probabilities
if (type(probs) in (int, float) and (0 <= probs <= 1)) or all(0 <= p <= 1 for p in probs):
G.es['act_prob'] = probs
else:
raise Exception("Found probability outside of [0, 1]")# perform diffusion
A = seeds[:] # prevent side effectif steps <= 0:
# perform diffusion until no more nodes can be activated
return _diffuse(G, A)
# perform diffusion for at most "steps" rounds
return _diffuse(G, A, steps)
def _diffuse(G, A, steps=None):
vertices = G.vcount()
tried_edges = set()
layer_i_nodes = [A[:]]
while True:
len_old = len(A)
activated_nodes_round, tried_edges_round = _diffuse_one_round(G, A, tried_edges)
layer_i_nodes.append(activated_nodes_round)
tried_edges |= tried_edges_round
if len(A) == len_old:
break
if steps is not None:
steps -= 1
if not steps or len(A) >= vertices:
break
return layer_i_nodesdef _diffuse_one_round(G, A, tried_edges):
cur_tried_edges = set((s, nb) for s in A for nb in G.successors(s)) - tried_edges
activated_nodes = [node for src,node in cur_tried_edges if node not in A and _prop_success(G, src, node)]
A.extend(set(activated_nodes))
return activated_nodes, cur_tried_edgesdef _prop_success(G, src, dest, r=random.random): # Faster access to random
id = G.get_eid(src, dest)
return r() <= G.es[id]['act_prob']Context
StackExchange Code Review Q#104803, answer score: 3
Revisions (0)
No revisions yet.