patternpythonMinor
Taking wind data and simulating future wind profiles
Viewed 0 times
simulatingfutureprofilesanddatawindtaking
Problem
I am new to programming, and am using Python to take wind data and simulate future wind profiles. The code as written takes a while to execute and I was hoping someone could suggest ways to make my code more efficient...
```
from __future__ import division
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
def boot(data, block=10, size=100):
length = len(data)
new = np.zeros(size*block)
for i in range(size):
x = random.randint(0, length-block-1)
new[iblock: (i+1) block] = data[x: x+block]
return new
def sumx(data, x):
length = data.shape[0]
new = np.zeros(length//x)
for i in range(length//x):
new[i] = data[xi:xi+x].sum()
return new
def capsum(x):
length = x.shape[0]
new = np.zeros(length)
new[0] = x[0]
for i in range(1, length):
if new[i-1] + x[i] > 1:
new[i] = 1
elif new[i-1] + x[i] < 0:
new[i] = 0
else:
new[i] = new[i-1] + x[i]
return new
def differences(wind, load, total_wind):
length = len(load)
wind_diff = wind.diff(1)
sim = boot(wind_diff[1:], 5000, 20)
sum_sim = capsum(sim)
sum_sim = sum_sim[:length] * total_wind
net_load = load[:length] - sum_sim
return net_load, sum_sim
def monte(x, wind, load, total_wind):
wind_energy = np.zeros(x)
sim_max = np.zeros(x)
sim_min = np.zeros(x)
sim_std = np.zeros(x)
sim_mean = np.zeros(x)
sim_5 = np.zeros(x)
sim_10 = np.zeros(x)
sim_15 = np.zeros(x)
sim_20 = np.zeros(x)
sim_25 = np.zeros(x)
sim_30 = np.zeros(x)
sim_35 = np.zeros(x)
sim_40 = np.zeros(x)
sim_45 = np.zeros(x)
sim_50 = np.zeros(x)
sim_55 = np.zeros(x)
sim_60 = np.zeros(x)
sim_65 = np.zeros(x)
sim_70 = np.zeros(x)
sim_75 = np.zeros(x)
sim_80 = np.zeros(x)
sim_85 = np.zeros(x)
sim_90 = np.zeros(x)
sim_95 = np.zeros(x)
sim_96 = np.zeros(x)
sim_97 = np.
```
from __future__ import division
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
def boot(data, block=10, size=100):
length = len(data)
new = np.zeros(size*block)
for i in range(size):
x = random.randint(0, length-block-1)
new[iblock: (i+1) block] = data[x: x+block]
return new
def sumx(data, x):
length = data.shape[0]
new = np.zeros(length//x)
for i in range(length//x):
new[i] = data[xi:xi+x].sum()
return new
def capsum(x):
length = x.shape[0]
new = np.zeros(length)
new[0] = x[0]
for i in range(1, length):
if new[i-1] + x[i] > 1:
new[i] = 1
elif new[i-1] + x[i] < 0:
new[i] = 0
else:
new[i] = new[i-1] + x[i]
return new
def differences(wind, load, total_wind):
length = len(load)
wind_diff = wind.diff(1)
sim = boot(wind_diff[1:], 5000, 20)
sum_sim = capsum(sim)
sum_sim = sum_sim[:length] * total_wind
net_load = load[:length] - sum_sim
return net_load, sum_sim
def monte(x, wind, load, total_wind):
wind_energy = np.zeros(x)
sim_max = np.zeros(x)
sim_min = np.zeros(x)
sim_std = np.zeros(x)
sim_mean = np.zeros(x)
sim_5 = np.zeros(x)
sim_10 = np.zeros(x)
sim_15 = np.zeros(x)
sim_20 = np.zeros(x)
sim_25 = np.zeros(x)
sim_30 = np.zeros(x)
sim_35 = np.zeros(x)
sim_40 = np.zeros(x)
sim_45 = np.zeros(x)
sim_50 = np.zeros(x)
sim_55 = np.zeros(x)
sim_60 = np.zeros(x)
sim_65 = np.zeros(x)
sim_70 = np.zeros(x)
sim_75 = np.zeros(x)
sim_80 = np.zeros(x)
sim_85 = np.zeros(x)
sim_90 = np.zeros(x)
sim_95 = np.zeros(x)
sim_96 = np.zeros(x)
sim_97 = np.
Solution
I want to speak towards some style and general Python improvements (of which there are quite a few) that you can make.
Repeated Code
Whenever you have to define a handful of variables that are all basically identical (or generated identically), you can simplify this with another structure. In your case, your
With this change (and some iteration tricks), your whole function gets slimmed down to this:
Note: There are more optimizations that I would suggest (use/return a
You can implement this same idea later on in your code, e.g. when you are dealing with your
Pull Code into Functions
In my code above, I have a nice loop that gives counts
This function will
you can do this:
Style Pointers
-
Use descriptive variable names.
Also, shy away from using capital letters. Convention says that variables are
-
Use
-
Try to keep your line length less than 80 characters. Long lines are especially irksome for users with small monitors (or portrait monitors).
Repeated Code
Whenever you have to define a handful of variables that are all basically identical (or generated identically), you can simplify this with another structure. In your case, your
sim_xx variables in monte can be all placed into a list:sims = [np.zeros(x)]*24With this change (and some iteration tricks), your whole function gets slimmed down to this:
def monte(x, wind, load, total_wind):
wind_energy = np.zeros(x)
sim_max = np.zeros(x)
sim_min = np.zeros(x)
sim_std = np.zeros(x)
sim_mean = np.zeros(x)
sims = [np.zeros(x)]*24
for i in range(x):
net_load, sim_wind = differences(wind, load, total_wind)
len_wind = len(sim_wind)
wind_energy[i] = sim_wind.mean() * len_wind
ramp = pd.Series(net_load).diff(1)
ramp = ramp[1:]
sim_max[i] = net_load.max()
sim_min[i] = net_load.min()
sim_std[i] = ramp.std()
sim_mean[i] = ramp.mean()
# Assign the values for each `sim`. This also generates
# values in `val` of 5, 10, 15, ..., 98, 99, 100.
val = 5
for sim in sims:
sim[i] = np.percentile(ramp, val)
val += 5 if val < 95 else 1
return tuple(arr.mean() for arr in [wind_energy, sim_max, sim_min,
sim_std, sim_mean] + sims)Note: There are more optimizations that I would suggest (use/return a
dict). However, your return value is used in a parameter to a DataFrame. I do not know how they would affect the structure of your code.You can implement this same idea later on in your code, e.g. when you are dealing with your
CH_5min, BV_5min, etc. values.Pull Code into Functions
In my code above, I have a nice loop that gives counts
5, 10, 15, 20, ..., 98, 99, 100. This would be useful in other sections of your code. So the best thing to do is pull it into a function (more specifically a generator):def get_values(start=5, stop=100, threshold=95, less_than=5, greater_than=1):
while start <= stop:
yield start
start += less_than if start < threshold else greater_thanThis function will
yield values incremented by a certain value until a specific threshold then increment values by a different value. It can be used like xrange and instead of doing this to create your indexes:results = pd.DataFrame(results, index=['Wind Energy', 'NL max', 'NL min', 'NL ramp std', 'NL ramp mean', 'NL ramp 5', 'NL ramp 10',
'NL ramp 15', 'NL ramp 20', 'NL ramp 25', 'NL ramp 30', 'NL ramp 35', 'NL ramp 40',
'NL ramp 45', 'NL ramp 50', 'NL ramp 55', 'NL ramp 60', 'NL ramp 65', 'NL ramp 70',
'NL ramp 75', 'NL ramp 80', 'NL ramp 85', 'NL ramp 90', 'NL ramp 95', 'NL ramp 96',
'NL ramp 97','NL ramp 98','NL ramp 99','NL ramp 100'])you can do this:
indices = ['Wind Energy', 'NL max', 'NL min', 'NL ramp std',
'NL ramp mean'] + ['NL ramp {}'.format(val) for val in get_values()]
results = pd.DataFrame(results, index=indices)Style Pointers
- Take a look at PEP8, the official Python style guide. It will help your code look cleaner.
-
Use descriptive variable names.
x tells us nothing about what it holds. Always err on the side of being to descriptive than being too terse.Also, shy away from using capital letters. Convention says that variables are
lowercase_with_underscores. The only time capital letters are used in conventional Python is for class names (PascalCase) and constants (ALL_CAPS).-
Use
format when creating strings with variable information. This is in the Style Pointers because the benefit of using string formatting over string concatenation is debatable. However, using format (as I have in above sections) makes your code more readable. Another example:# Your original code...
results.to_csv(str(year) + 'Year' + ' Peak Load ' + str(load.max()) + '-' + 'results.csv')
# becomes this.
results.to_csv('{}Year Peak Load {}-results.csv'.format(year, load.max()))-
Try to keep your line length less than 80 characters. Long lines are especially irksome for users with small monitors (or portrait monitors).
Code Snippets
sims = [np.zeros(x)]*24def monte(x, wind, load, total_wind):
wind_energy = np.zeros(x)
sim_max = np.zeros(x)
sim_min = np.zeros(x)
sim_std = np.zeros(x)
sim_mean = np.zeros(x)
sims = [np.zeros(x)]*24
for i in range(x):
net_load, sim_wind = differences(wind, load, total_wind)
len_wind = len(sim_wind)
wind_energy[i] = sim_wind.mean() * len_wind
ramp = pd.Series(net_load).diff(1)
ramp = ramp[1:]
sim_max[i] = net_load.max()
sim_min[i] = net_load.min()
sim_std[i] = ramp.std()
sim_mean[i] = ramp.mean()
# Assign the values for each `sim`. This also generates
# values in `val` of 5, 10, 15, ..., 98, 99, 100.
val = 5
for sim in sims:
sim[i] = np.percentile(ramp, val)
val += 5 if val < 95 else 1
return tuple(arr.mean() for arr in [wind_energy, sim_max, sim_min,
sim_std, sim_mean] + sims)def get_values(start=5, stop=100, threshold=95, less_than=5, greater_than=1):
while start <= stop:
yield start
start += less_than if start < threshold else greater_thanresults = pd.DataFrame(results, index=['Wind Energy', 'NL max', 'NL min', 'NL ramp std', 'NL ramp mean', 'NL ramp 5', 'NL ramp 10',
'NL ramp 15', 'NL ramp 20', 'NL ramp 25', 'NL ramp 30', 'NL ramp 35', 'NL ramp 40',
'NL ramp 45', 'NL ramp 50', 'NL ramp 55', 'NL ramp 60', 'NL ramp 65', 'NL ramp 70',
'NL ramp 75', 'NL ramp 80', 'NL ramp 85', 'NL ramp 90', 'NL ramp 95', 'NL ramp 96',
'NL ramp 97','NL ramp 98','NL ramp 99','NL ramp 100'])indices = ['Wind Energy', 'NL max', 'NL min', 'NL ramp std',
'NL ramp mean'] + ['NL ramp {}'.format(val) for val in get_values()]
results = pd.DataFrame(results, index=indices)Context
StackExchange Code Review Q#54337, answer score: 3
Revisions (0)
No revisions yet.