patternpythonMinor
Alternating Direction Method of Multipliers
Viewed 0 times
directionmultipliersalternatingmethod
Problem
This is a python implementation of the Alternating Direction Method of Multipliers - a method of constrained optimisation that is used widely in statistics (http://stanford.edu/~boyd/admm.html).
This is simplified version, specifically for the LASSO:
Given a sparse vector $$x \in R^n$$and matrix $$A \in R^{m \times n}$$ and noisy measurements $$y = Ax + e$$where $$e$$ is additive Gaussian white noise we can solve the following minimisation problem
$$
\hat{x} = \min_x ||y-Ax||_2^2 + \lambda||x||_1
$$
to recover an estimate of $$x$$
The algorithm proceeds iteratively by calculating
$$ x^{k+1} = (A^TA + \rho I )^{-1}(A^Ty + \rho (z - u))$$
$$ z^{k+1} = \mathrm{sign}(\hat{x})\mathrm{max}\left(0, |x| - \frac{\lambda}{\rho}\right) $$
until some convergence criteria is met.
The implementation of the algorithm is below:
```
import numpy as np
import matplotlib.pyplot as plt
from math import sqrt, log
def Sthresh(x, gamma):
return np.sign(x)*np.maximum(0, np.absolute(x)-gamma/2.0)
def ADMM(A, y):
m, n = A.shape
w, v = np.linalg.eig(A.T.dot(A))
MAX_ITER = 10000
"Function to caluculate min 1/2(y - Ax) + l||x||"
"via alternating direction methods"
xhat = np.zeros([n, 1])
zhat = np.zeros([n, 1])
u = np.zeros([n, 1])
"Calculate regression co-efficient and stepsize"
l = sqrt(2*log(n, 10))
rho = 1/(np.amax(np.absolute(w)))
"Pre-compute to save some multiplications"
AtA = A.T.dot(A)
Aty = A.T.dot(y)
Q = AtA + rho*np.identity(n)
Q = np.linalg.inv(Q)
i = 0
while(i < MAX_ITER):
"x minimisation step via posterier OLS"
xhat = Q.dot(Aty + rho*(zhat - u))
"z minimisation via soft-thresholding"
zhat = Sthresh(xhat + u, l/rho)
"mulitplier update"
u = u + xhat - zhat
i = i+1
return zhat, rho, l
A = np.random.randn(50, 200)
num_non_zeros = 10
positions = np.random.randint(0, 200, num_non_zeros)
amplitudes = 100*np.random.randn(num_non
This is simplified version, specifically for the LASSO:
Given a sparse vector $$x \in R^n$$and matrix $$A \in R^{m \times n}$$ and noisy measurements $$y = Ax + e$$where $$e$$ is additive Gaussian white noise we can solve the following minimisation problem
$$
\hat{x} = \min_x ||y-Ax||_2^2 + \lambda||x||_1
$$
to recover an estimate of $$x$$
The algorithm proceeds iteratively by calculating
$$ x^{k+1} = (A^TA + \rho I )^{-1}(A^Ty + \rho (z - u))$$
$$ z^{k+1} = \mathrm{sign}(\hat{x})\mathrm{max}\left(0, |x| - \frac{\lambda}{\rho}\right) $$
until some convergence criteria is met.
The implementation of the algorithm is below:
```
import numpy as np
import matplotlib.pyplot as plt
from math import sqrt, log
def Sthresh(x, gamma):
return np.sign(x)*np.maximum(0, np.absolute(x)-gamma/2.0)
def ADMM(A, y):
m, n = A.shape
w, v = np.linalg.eig(A.T.dot(A))
MAX_ITER = 10000
"Function to caluculate min 1/2(y - Ax) + l||x||"
"via alternating direction methods"
xhat = np.zeros([n, 1])
zhat = np.zeros([n, 1])
u = np.zeros([n, 1])
"Calculate regression co-efficient and stepsize"
l = sqrt(2*log(n, 10))
rho = 1/(np.amax(np.absolute(w)))
"Pre-compute to save some multiplications"
AtA = A.T.dot(A)
Aty = A.T.dot(y)
Q = AtA + rho*np.identity(n)
Q = np.linalg.inv(Q)
i = 0
while(i < MAX_ITER):
"x minimisation step via posterier OLS"
xhat = Q.dot(Aty + rho*(zhat - u))
"z minimisation via soft-thresholding"
zhat = Sthresh(xhat + u, l/rho)
"mulitplier update"
u = u + xhat - zhat
i = i+1
return zhat, rho, l
A = np.random.randn(50, 200)
num_non_zeros = 10
positions = np.random.randint(0, 200, num_non_zeros)
amplitudes = 100*np.random.randn(num_non
Solution
A string is not a comment
A comment in the code starts with
Do not confuse yourself between docstrings and comments.
I truly thought
Given that:
I suggest you precompute using:
and call
Besides, you are dividing
A
Since you don't need the value of
to better emphasize the fact that you won't use the iteration value.
Separate computation and presentation
I'd recommend creating a function for plotting the results and a function for your tests. That way, it would be easier to jump into an interactive session and test your function with alternatives input values:
[Optimization] Method lookups are faster with local variables
The
Full modifications (with a tiny bit of variable renaming)
```
import numpy as np
import matplotlib.pyplot as plt
from math import sqrt, log
def ADMM(A, y):
"""Alternating Direction Method of Multipliers
This is a python implementation of the Alternating Direction
Method of Multipliers - a method of constrained optimisation
that is used widely in statistics (http://stanford.edu/~boyd/admm.html).
This is simplified version, specifically for the LASSO
"""
m, n = A.shape
A_t_A = A.T.dot(A)
w, v = np.linalg.eig(A_t_A)
MAX_ITER = 10000
#Function to caluculate min 1/2(y - Ax) + l||x||
#via alternating direction methods
x_hat = np.zeros([n, 1])
z_hat = np.zeros([n, 1])
u = np.zeros([n, 1])
#Calculate regression co-efficient and stepsize
r = np.amax(np.absolute(w))
l_over_rho = sqrt(2log(n, 10)) r / 2.0 # I might be wrong here
rho = 1/r
#Pre-compute to save some multiplications
A_t_y = A.T.dot(y)
Q = A_t_A + rho * np.identity(n)
Q = np.linalg.inv(Q)
Q_dot = Q.dot
sign = np.sign
maximum = np.maximum
absolute = np.absolute
for _ in xrange(MAX_ITER):
#x minimisation step via posterier OLS
x_hat = Q_dot(A_t_y + rho*(z_hat - u))
#z minimisation via soft-thresholding
u = x_hat + u
z_hat = sign(u) * maximum(0, absolute(u)-l_over_rho)
#mulitplier update
u = u - z_hat
return z_hat
def test(m=50, n=200):
"""Test the ADMM method with randomly generated matrices and vectors"""
A = np.random.randn(m, n)
num_non_zeros = 10
positions = np.random.randint(0, n, num_non_zeros)
amplitudes = 100*np.random.randn(num_non_zeros, 1)
x = np.zeros((n, 1))
x[positions] = amplitudes
y = A.dot(x) + np.random.randn(m, 1)
plot(x, ADMM(A,y))
def plot(original, computed):
"""Plot two vectors to compare their values"""
plt.plot(or
A comment in the code starts with
#. Even thought the strings you write seems to have no effect in the code, they are evaluated and created in memory (and thrown away right after) each time. Especially in the while loop.Do not confuse yourself between docstrings and comments.
l is a really bad variable name.I truly thought
l/rho was 1/rho at first and had a hard time mapping your code to the maths.Given that:
- You don't seem to use the returned value for
rhoandl;
- You “only” need
rhoand the quotientl/rho;
- You don't need to recompute
l/rhoat each iterations.
I suggest you precompute using:
#Calculate regression co-efficient and stepsize
r = np.amax(np.absolute(w))
l_over_rho = sqrt(2*log(n, 10)) * r
rho = 1/rand call
Sthresh(xhat + u, l_over_rho) latter on. You also only need to return zhat.Besides, you are dividing
l_over_rho by 2.0 in Sthresh; on top of not seeing that anywhere in the maths, you should also incorporate it in the precomputation for a faster loop.A
for loop is more pythonicSince you don't need the value of
i in your while loop, it is best to write:for _ in xrange(MAX_ITER):
#x minimisation step via posterier OLS
xhat = Q.dot(Aty + rho*(zhat - u))
#z minimisation via soft-thresholding
zhat = Sthresh(xhat + u, l_over_rho)
#mulitplier update
u = u + xhat - zhatto better emphasize the fact that you won't use the iteration value.
Separate computation and presentation
I'd recommend creating a function for plotting the results and a function for your tests. That way, it would be easier to jump into an interactive session and test your function with alternatives input values:
def test():
A = np.random.randn(50, 200)
num_non_zeros = 10
positions = np.random.randint(0, 200, num_non_zeros)
amplitudes = 100*np.random.randn(num_non_zeros, 1)
x = np.zeros((200, 1))
x[positions] = amplitudes
y = A.dot(x) + np.random.randn(50, 1)
plot(x, ADMM(A,y)) #given that ADMM only 'return zhat' now
def plot(original, computed):
plt.plot(original, label='Original')
plt.plot(computed, label = 'Estimate')
plt.legend(loc = 'upper right')
plt.show()
if __name__ == "__main__":
test()[Optimization] Method lookups are faster with local variables
The
Sthresh call incur overheads that you can easily remove since it is a one liner. In the same vein Q.dot, np.sign, np.maximum, and np.absolute have to be resolved at each loop iteration. You will save some time using local variable as alias to these functions:Q_dot = Q.dot
sign = np.sign
maximum = np.maximum
absolute = np.absolute
for _ in xrange(MAX_ITER):
#x minimisation step via posterier OLS
xhat = Q_dot(Aty + rho*(zhat - u))
#z minimisation via soft-thresholding
u = xhat + u
zhat = sign(u)*maximum(0, absolute(u)-l_over_rho/2.0) # do we even need the '/2.0' part ? see comment on 'l'
#mulitplier update
u = u - zhatFull modifications (with a tiny bit of variable renaming)
```
import numpy as np
import matplotlib.pyplot as plt
from math import sqrt, log
def ADMM(A, y):
"""Alternating Direction Method of Multipliers
This is a python implementation of the Alternating Direction
Method of Multipliers - a method of constrained optimisation
that is used widely in statistics (http://stanford.edu/~boyd/admm.html).
This is simplified version, specifically for the LASSO
"""
m, n = A.shape
A_t_A = A.T.dot(A)
w, v = np.linalg.eig(A_t_A)
MAX_ITER = 10000
#Function to caluculate min 1/2(y - Ax) + l||x||
#via alternating direction methods
x_hat = np.zeros([n, 1])
z_hat = np.zeros([n, 1])
u = np.zeros([n, 1])
#Calculate regression co-efficient and stepsize
r = np.amax(np.absolute(w))
l_over_rho = sqrt(2log(n, 10)) r / 2.0 # I might be wrong here
rho = 1/r
#Pre-compute to save some multiplications
A_t_y = A.T.dot(y)
Q = A_t_A + rho * np.identity(n)
Q = np.linalg.inv(Q)
Q_dot = Q.dot
sign = np.sign
maximum = np.maximum
absolute = np.absolute
for _ in xrange(MAX_ITER):
#x minimisation step via posterier OLS
x_hat = Q_dot(A_t_y + rho*(z_hat - u))
#z minimisation via soft-thresholding
u = x_hat + u
z_hat = sign(u) * maximum(0, absolute(u)-l_over_rho)
#mulitplier update
u = u - z_hat
return z_hat
def test(m=50, n=200):
"""Test the ADMM method with randomly generated matrices and vectors"""
A = np.random.randn(m, n)
num_non_zeros = 10
positions = np.random.randint(0, n, num_non_zeros)
amplitudes = 100*np.random.randn(num_non_zeros, 1)
x = np.zeros((n, 1))
x[positions] = amplitudes
y = A.dot(x) + np.random.randn(m, 1)
plot(x, ADMM(A,y))
def plot(original, computed):
"""Plot two vectors to compare their values"""
plt.plot(or
Code Snippets
#Calculate regression co-efficient and stepsize
r = np.amax(np.absolute(w))
l_over_rho = sqrt(2*log(n, 10)) * r
rho = 1/rfor _ in xrange(MAX_ITER):
#x minimisation step via posterier OLS
xhat = Q.dot(Aty + rho*(zhat - u))
#z minimisation via soft-thresholding
zhat = Sthresh(xhat + u, l_over_rho)
#mulitplier update
u = u + xhat - zhatdef test():
A = np.random.randn(50, 200)
num_non_zeros = 10
positions = np.random.randint(0, 200, num_non_zeros)
amplitudes = 100*np.random.randn(num_non_zeros, 1)
x = np.zeros((200, 1))
x[positions] = amplitudes
y = A.dot(x) + np.random.randn(50, 1)
plot(x, ADMM(A,y)) #given that ADMM only 'return zhat' now
def plot(original, computed):
plt.plot(original, label='Original')
plt.plot(computed, label = 'Estimate')
plt.legend(loc = 'upper right')
plt.show()
if __name__ == "__main__":
test()Q_dot = Q.dot
sign = np.sign
maximum = np.maximum
absolute = np.absolute
for _ in xrange(MAX_ITER):
#x minimisation step via posterier OLS
xhat = Q_dot(Aty + rho*(zhat - u))
#z minimisation via soft-thresholding
u = xhat + u
zhat = sign(u)*maximum(0, absolute(u)-l_over_rho/2.0) # do we even need the '/2.0' part ? see comment on 'l'
#mulitplier update
u = u - zhatimport numpy as np
import matplotlib.pyplot as plt
from math import sqrt, log
def ADMM(A, y):
"""Alternating Direction Method of Multipliers
This is a python implementation of the Alternating Direction
Method of Multipliers - a method of constrained optimisation
that is used widely in statistics (http://stanford.edu/~boyd/admm.html).
This is simplified version, specifically for the LASSO
"""
m, n = A.shape
A_t_A = A.T.dot(A)
w, v = np.linalg.eig(A_t_A)
MAX_ITER = 10000
#Function to caluculate min 1/2(y - Ax) + l||x||
#via alternating direction methods
x_hat = np.zeros([n, 1])
z_hat = np.zeros([n, 1])
u = np.zeros([n, 1])
#Calculate regression co-efficient and stepsize
r = np.amax(np.absolute(w))
l_over_rho = sqrt(2*log(n, 10)) * r / 2.0 # I might be wrong here
rho = 1/r
#Pre-compute to save some multiplications
A_t_y = A.T.dot(y)
Q = A_t_A + rho * np.identity(n)
Q = np.linalg.inv(Q)
Q_dot = Q.dot
sign = np.sign
maximum = np.maximum
absolute = np.absolute
for _ in xrange(MAX_ITER):
#x minimisation step via posterier OLS
x_hat = Q_dot(A_t_y + rho*(z_hat - u))
#z minimisation via soft-thresholding
u = x_hat + u
z_hat = sign(u) * maximum(0, absolute(u)-l_over_rho)
#mulitplier update
u = u - z_hat
return z_hat
def test(m=50, n=200):
"""Test the ADMM method with randomly generated matrices and vectors"""
A = np.random.randn(m, n)
num_non_zeros = 10
positions = np.random.randint(0, n, num_non_zeros)
amplitudes = 100*np.random.randn(num_non_zeros, 1)
x = np.zeros((n, 1))
x[positions] = amplitudes
y = A.dot(x) + np.random.randn(m, 1)
plot(x, ADMM(A,y))
def plot(original, computed):
"""Plot two vectors to compare their values"""
plt.plot(original, label='Original')
plt.plot(computed, label='Estimate')
plt.legend(loc='upper right')
plt.show()
if __name__ == "__main__":
test()Context
StackExchange Code Review Q#108263, answer score: 6
Revisions (0)
No revisions yet.