patternpythonMinor
Epidemic simulation
Viewed 0 times
simulationepidemicstackoverflow
Problem
The purpose of this code is to simulate epidemics across a population. There are 625 (pop) individuals at random locations. The epidemic parameters are infectious period (inf_period), trans (transmissibility of the disease - essentially virulence), susc (the susceptibility of each individual to the disease), and eps (epislon, the probability of an individual becoming infectious randomly, not due to contact with an infectious person). The argument 'reps' is the number of times to simulate one set of epidemic parameters - that is, one set of [susc, trans, inf_period, eps].
In this example, there are 24 possible combinations of parameter values, and we want 400 reps per combination, so 24*400 = 9600 runs. Those values cannot change. To make this code faster, how can the number of loops be reduced (I've heard those are slow)?
This has many loops and
```
import numpy as np
from scipy import spatial
import json
def fun(susc, trans, inf_period, eps, reps, pop):
epi_list = []
count_list = []
new_susc = []
new_trans = []
new_inf_period = []
new_eps = []
count = 0
epi_file = "file1.json"
count_file = "file2.json"
with open(epi_file, 'w') as f, open(count_file, 'w') as h:
for i in range(len(trans)):
for j in inf_period:
for k in eps:
should_restart = True
while should_restart:
should_restart = False
broken = False
count_2 = 0
for rep in reps:
In this example, there are 24 possible combinations of parameter values, and we want 400 reps per combination, so 24*400 = 9600 runs. Those values cannot change. To make this code faster, how can the number of loops be reduced (I've heard those are slow)?
This has many loops and
if statements, and to run the full version will take roughly 2.5 days. How can it be made more efficient in terms of time? I know that may be vague, so if there's a way I can clarify please let me know! I should also mention I have access to a GPU.```
import numpy as np
from scipy import spatial
import json
def fun(susc, trans, inf_period, eps, reps, pop):
epi_list = []
count_list = []
new_susc = []
new_trans = []
new_inf_period = []
new_eps = []
count = 0
epi_file = "file1.json"
count_file = "file2.json"
with open(epi_file, 'w') as f, open(count_file, 'w') as h:
for i in range(len(trans)):
for j in inf_period:
for k in eps:
should_restart = True
while should_restart:
should_restart = False
broken = False
count_2 = 0
for rep in reps:
Solution
78 characters of indentation at its deepest: this code is unreadable. We can't easily match the core of the code with the definition of the parameters.
To improve that, you can:
This lead to the more readable:
Now we can start thinking a bit about the code.
First off, you don't need to write the call to
In this test, you can also take the
The fail count could also be better handled with a
```
import numpy as np
from scipy import spatial
import json
import itertools
MAX_FAILED_ATTEMPS = 50
def fun(susc, trans, inf_period, eps, reps, pop):
epi_list = []
count_list = []
new_susc = []
new_trans = []
new_inf_period = []
new_eps = []
count = 0
for i, j, k in itertools.product(range(len(trans)), inf_period, eps):
should_restart = True
while should_restart:
should_restart = False
for rep in reps:
for _ in range(MAX_FAILED_ATTEMPS):
g1 = external_function_call(pop, susc, trans[i], j, k, full_mat)
if len(g1) >= 10 and max(g1.values()) >= 10:
g2 = inf_per_count_time(g1)
count += 1
epi_list.append(g1) #if the first epi in the reps works, but the subsequent ones do not, still writes. Bad!
count_list.append(g2)
new_susc.append(susc)
new_trans.append(trans[i])
new_inf_period.append(j)
new_eps.append(k)
break
else:
trans[i] += 1
To improve that, you can:
- use 4 space per indentation level instead of 8 as recommended per PEP 8;
- use
itertools.productto iterate over all the combinations of parameters in one single loop instead of 3;
- remove unused variable declaration such as your
opens;
- use the
break ... elseconstruct that can be applied to any loop, this will save you the use of thebrokenflag;
- use slice deletion rather than deleting items one by one in a
forloop (plus it will be more efficient).
This lead to the more readable:
import numpy as np
from scipy import spatial
import json
import itertools
def fun(susc, trans, inf_period, eps, reps, pop):
epi_list = []
count_list = []
new_susc = []
new_trans = []
new_inf_period = []
new_eps = []
count = 0
for i, j, k in itertools.product(range(len(trans)), inf_period, eps):
should_restart = True
while should_restart:
should_restart = False
for rep in reps:
failcount = 0
g1 = external_function_call(pop, susc, trans[i], j, k, full_mat)
while(len(g1.keys()) 50:
trans[i] += 1
break
g1 = external_function_call(pop, susc, trans[i], j, k, full_mat) #run again with new i, rep times
else:
g2 = inf_per_count_time(g1)
count += 1
epi_list.append(g1) #if the first epi in the reps works, but the subsequent ones do not, still writes. Bad!
count_list.append(g2)
new_susc.append(susc)
new_trans.append(trans[i])
new_inf_period.append(j)
new_eps.append(k)
continue
# Cleanup because we failed too many times
should_restart = True # restart from rep
deletion_range = slice(0, rep, 1)
del epi_list[deletion_range]
del count_list[deletion_range]
del new_susc[deletion_range]
del new_trans[deletion_range]
del new_inf_period[deletion_range]
del new_eps[deletion_range]
if rep > 0: #if we've already written an epidemic using this set of parameters
count -=1
break
paras = np.array([np.asarray(new_susc), np.asarray(new_trans), np.asarray(new_inf_period), np.asarray(new_eps)]).T
print 'number of parameter rows', paras[:,0].shape
with open('parameters.txt', 'w') as newfile1:
np.savetxt(newfile1, paras, fmt = ['%f', '%f', '%f', '%f'])
print count
if __name__ == "__main__":
pop = 625
susc = 0.3
trans = [1.5, 2.5, 3]
inf_period = [2, 3]
eps = [0, 0.01, 0.02, 0.05]
reps = np.arange(400)
fun(susc, trans, inf_period, eps, reps, pop)Now we can start thinking a bit about the code.
First off, you don't need to write the call to
external_function_call twice, especially with the same set of parameters. It is more idiomatic to use a while True: if : break rather than while : . This also let you handle the successful case within that if rather than with your broken flag.In this test, you can also take the
len of g1 directly, it's equivalent to using len(g1.keys()). And since g1 seems to be a regular Python dictionnary, there is no need in involving numpy there, Python already has a max builtin.The fail count could also be better handled with a
for loop and a named constant:```
import numpy as np
from scipy import spatial
import json
import itertools
MAX_FAILED_ATTEMPS = 50
def fun(susc, trans, inf_period, eps, reps, pop):
epi_list = []
count_list = []
new_susc = []
new_trans = []
new_inf_period = []
new_eps = []
count = 0
for i, j, k in itertools.product(range(len(trans)), inf_period, eps):
should_restart = True
while should_restart:
should_restart = False
for rep in reps:
for _ in range(MAX_FAILED_ATTEMPS):
g1 = external_function_call(pop, susc, trans[i], j, k, full_mat)
if len(g1) >= 10 and max(g1.values()) >= 10:
g2 = inf_per_count_time(g1)
count += 1
epi_list.append(g1) #if the first epi in the reps works, but the subsequent ones do not, still writes. Bad!
count_list.append(g2)
new_susc.append(susc)
new_trans.append(trans[i])
new_inf_period.append(j)
new_eps.append(k)
break
else:
trans[i] += 1
Code Snippets
import numpy as np
from scipy import spatial
import json
import itertools
def fun(susc, trans, inf_period, eps, reps, pop):
epi_list = []
count_list = []
new_susc = []
new_trans = []
new_inf_period = []
new_eps = []
count = 0
for i, j, k in itertools.product(range(len(trans)), inf_period, eps):
should_restart = True
while should_restart:
should_restart = False
for rep in reps:
failcount = 0
g1 = external_function_call(pop, susc, trans[i], j, k, full_mat)
while(len(g1.keys()) < 10 or np.max(g1.values()) < 10):
failcount += 1
if failcount > 50:
trans[i] += 1
break
g1 = external_function_call(pop, susc, trans[i], j, k, full_mat) #run again with new i, rep times
else:
g2 = inf_per_count_time(g1)
count += 1
epi_list.append(g1) #if the first epi in the reps works, but the subsequent ones do not, still writes. Bad!
count_list.append(g2)
new_susc.append(susc)
new_trans.append(trans[i])
new_inf_period.append(j)
new_eps.append(k)
continue
# Cleanup because we failed too many times
should_restart = True # restart from rep
deletion_range = slice(0, rep, 1)
del epi_list[deletion_range]
del count_list[deletion_range]
del new_susc[deletion_range]
del new_trans[deletion_range]
del new_inf_period[deletion_range]
del new_eps[deletion_range]
if rep > 0: #if we've already written an epidemic using this set of parameters
count -=1
break
paras = np.array([np.asarray(new_susc), np.asarray(new_trans), np.asarray(new_inf_period), np.asarray(new_eps)]).T
print 'number of parameter rows', paras[:,0].shape
with open('parameters.txt', 'w') as newfile1:
np.savetxt(newfile1, paras, fmt = ['%f', '%f', '%f', '%f'])
print count
if __name__ == "__main__":
pop = 625
susc = 0.3
trans = [1.5, 2.5, 3]
inf_period = [2, 3]
eps = [0, 0.01, 0.02, 0.05]
reps = np.arange(400)
fun(susc, trans, inf_period, eps, reps, pop)import numpy as np
from scipy import spatial
import json
import itertools
MAX_FAILED_ATTEMPS = 50
def fun(susc, trans, inf_period, eps, reps, pop):
epi_list = []
count_list = []
new_susc = []
new_trans = []
new_inf_period = []
new_eps = []
count = 0
for i, j, k in itertools.product(range(len(trans)), inf_period, eps):
should_restart = True
while should_restart:
should_restart = False
for rep in reps:
for _ in range(MAX_FAILED_ATTEMPS):
g1 = external_function_call(pop, susc, trans[i], j, k, full_mat)
if len(g1) >= 10 and max(g1.values()) >= 10:
g2 = inf_per_count_time(g1)
count += 1
epi_list.append(g1) #if the first epi in the reps works, but the subsequent ones do not, still writes. Bad!
count_list.append(g2)
new_susc.append(susc)
new_trans.append(trans[i])
new_inf_period.append(j)
new_eps.append(k)
break
else:
trans[i] += 1
# Cleanup because we failed too many times
should_restart = True # restart from rep
deletion_range = slice(0, rep, 1)
del epi_list[deletion_range]
del count_list[deletion_range]
del new_susc[deletion_range]
del new_trans[deletion_range]
del new_inf_period[deletion_range]
del new_eps[deletion_range]
if rep > 0: #if we've already written an epidemic using this set of parameters
count -=1
break
paras = np.array([np.asarray(new_susc), np.asarray(new_trans), np.asarray(new_inf_period), np.asarray(new_eps)]).T
print 'number of parameter rows', paras[:,0].shape
with open('parameters.txt', 'w') as newfile1:
np.savetxt(newfile1, paras, fmt = ['%f', '%f', '%f', '%f'])
print count
if __name__ == "__main__":
pop = 625
susc = 0.3
trans = [1.5, 2.5, 3]
inf_period = [2, 3]
eps = [0, 0.01, 0.02, 0.05]
reps = np.arange(400)
fun(susc, trans, inf_period, eps, reps, pop)import numpy as np
from scipy import spatial
import json
import itertools
MAX_FAILED_ATTEMPS = 50
def fun(susc, trans, inf_period, eps, repetitions, pop):
epi_list = []
count_list = []
new_susc = []
new_trans = []
new_inf_period = []
new_eps = []
count = 0
for i, j, k in itertools.product(range(len(trans)), inf_period, eps):
while True:
for rep in range(repetitions):
for _ in range(MAX_FAILED_ATTEMPS):
g1 = external_function_call(pop, susc, trans[i], j, k, full_mat)
if len(g1) >= 10 and max(g1.values()) >= 10:
g2 = inf_per_count_time(g1)
count += 1
epi_list.append(g1) #if the first epi in the reps works, but the subsequent ones do not, still writes. Bad!
count_list.append(g2)
new_susc.append(susc)
new_trans.append(trans[i])
new_inf_period.append(j)
new_eps.append(k)
break
else:
trans[i] += 1
# Cleanup because we failed too many times
del epi_list[-rep:]
del count_list[-rep:]
del new_susc[-rep:]
del new_trans[-rep:]
del new_inf_period[-rep:]
del new_eps[-rep:]
if rep > 0: #if we've already written an epidemic using this set of parameters
count -=1
break
else:
break # do not restart if we made it through the whole repetitions
paras = np.array([np.asarray(new_susc), np.asarray(new_trans), np.asarray(new_inf_period), np.asarray(new_eps)]).T
print 'number of parameter rows', paras[:,0].shape
with open('parameters.txt', 'w') as newfile1:
np.savetxt(newfile1, paras, fmt = ['%f', '%f', '%f', '%f'])
print count
if __name__ == "__main__":
pop = 625
susc = 0.3
trans = [1.5, 2.5, 3]
inf_period = [2, 3]
eps = [0, 0.01, 0.02, 0.05]
fun(susc, trans, inf_period, eps, 400, pop)import numpy as np
from scipy import spatial
import json
import itertools
MAX_FAILED_ATTEMPS = 50
def fun(susc, trans, inf_period, eps, repetitions, pop):
epi_list = []
count_list = []
new_susc = []
new_trans = []
new_inf_period = []
new_eps = []
count = 0
parameters_product = itertools.product(trans, inf_period, eps)
for transmissibility, infectious_period, epsilon in parameters_product:
while True:
for rep in range(repetitions):
for _ in range(MAX_FAILED_ATTEMPS):
g1 = external_function_call(
pop, susc, transmissibility,
infectious_period, epsilon, full_mat)
if len(g1) >= 10 and max(g1.values()) >= 10:
g2 = inf_per_count_time(g1)
count += 1
epi_list.append(g1)
count_list.append(g2)
new_susc.append(susc)
new_trans.append(transmissibility)
new_inf_period.append(infectious_period)
new_eps.append(epsilon)
break
else:
transmissibility += 1
# Cleanup because we failed too many times
del epi_list[-rep:]
del count_list[-rep:]
del new_susc[-rep:]
del new_trans[-rep:]
del new_inf_period[-rep:]
del new_eps[-rep:]
if rep > 0:
# if we've already written an epidemic
# using this set of parameters
count -=1
break
else:
# do not restart if we made it through the whole repetitions
break
paras = np.array([
np.asarray(new_susc),
np.asarray(new_trans),
np.asarray(new_inf_period),
np.asarray(new_eps)
]).T
print 'number of parameter rows', paras[:,0].shape
with open('parameters.txt', 'w') as newfile1:
np.savetxt(newfile1, paras, fmt = ['%f', '%f', '%f', '%f'])
print count
if __name__ == "__main__":
pop = 625
susc = 0.3
trans = [1.5, 2.5, 3]
inf_period = [2, 3]
eps = [0, 0.01, 0.02, 0.05]
fun(susc, trans, inf_period, eps, 400, pop)Context
StackExchange Code Review Q#150044, answer score: 6
Revisions (0)
No revisions yet.