patternpythonMinor
Statistical samples and distributions
Viewed 0 times
andstatisticalsamplesdistributions
Problem
It is usually inefficient to find bugs in a longer code. Unfortunately, after spending too much time in debugging my code, I realize that a good programming habit is important. Please give me some advice about codestyle, design... anything important to write high-quality code.
```
import numpy as np
from numpy.random import standard_normal, chisquare, multivariate_normal, dirichlet, multinomial
from numpy.linalg import cholesky, inv
from math import sqrt
import math
class GibbsSampler(object):
"""Gibbs sampler for finite Gaussian mixture model
Given a set of hyperparameters and observations, run Gibbs sampler to estimate the parameters of the model
"""
def __init__(self, hyp_pi, mu0, kappa0, T0, nu0, y, prior_z):
"""Initialize the Gibbs sampler
@para hyp_pi: hyperparameter of pi
@para mu0, kappa0: parameter of Normal-Wishart distribution
@para T0, nu0: parameter of Normal-Wishart distribution
@para y: samples draw from Normal distributions
"""
self.hyp_pi = hyp_pi
self.mu0 = mu0
self.kappa0 = kappa0
self.T0 = T0
self.nu0 = nu0
self.y = y
self.prior_z = prior_z
def _clusters(self):
return len(self.hyp_pi)
def _dim(self):
"""
Dimension of the data
"""
return len(mu0)
def _numbers(self):
return len(self.y)
def estimate_clusters(self, iterations, burn_in, lag):
"""
"""
estimated_clusters = np.zeros(self._numbers(), float)
for iteration, z, pi, mu, sigma in self.run(iterations, burn_in, lag):
# print "Precision = %f" % self._estimate_precision(z)
count = [z.count(k) for k in range(self._clusters())]
print count
if iteration % 10 == 0:
print "iteration = %d" %iteration
def _estimate_precision(self, z):
numbers = self._numbers()
count = 0
for i in range(number
```
import numpy as np
from numpy.random import standard_normal, chisquare, multivariate_normal, dirichlet, multinomial
from numpy.linalg import cholesky, inv
from math import sqrt
import math
class GibbsSampler(object):
"""Gibbs sampler for finite Gaussian mixture model
Given a set of hyperparameters and observations, run Gibbs sampler to estimate the parameters of the model
"""
def __init__(self, hyp_pi, mu0, kappa0, T0, nu0, y, prior_z):
"""Initialize the Gibbs sampler
@para hyp_pi: hyperparameter of pi
@para mu0, kappa0: parameter of Normal-Wishart distribution
@para T0, nu0: parameter of Normal-Wishart distribution
@para y: samples draw from Normal distributions
"""
self.hyp_pi = hyp_pi
self.mu0 = mu0
self.kappa0 = kappa0
self.T0 = T0
self.nu0 = nu0
self.y = y
self.prior_z = prior_z
def _clusters(self):
return len(self.hyp_pi)
def _dim(self):
"""
Dimension of the data
"""
return len(mu0)
def _numbers(self):
return len(self.y)
def estimate_clusters(self, iterations, burn_in, lag):
"""
"""
estimated_clusters = np.zeros(self._numbers(), float)
for iteration, z, pi, mu, sigma in self.run(iterations, burn_in, lag):
# print "Precision = %f" % self._estimate_precision(z)
count = [z.count(k) for k in range(self._clusters())]
print count
if iteration % 10 == 0:
print "iteration = %d" %iteration
def _estimate_precision(self, z):
numbers = self._numbers()
count = 0
for i in range(number
Solution
import numpy as np
from numpy.random import standard_normal, chisquare, multivariate_normal, dirichlet, multinomial
from numpy.linalg import cholesky, invYou are importing a lot of stuff here, and you aren't even using much of it. I guess not importing these names unless you refer to them a lot.
from math import sqrt
import mathWhen using numpy, you generally don't want stuff from math. You should use the numpy versions.
class GibbsSampler(object):
"""Gibbs sampler for finite Gaussian mixture model
Given a set of hyperparameters and observations, run Gibbs sampler to estimate the parameters of the model
"""
def __init__(self, hyp_pi, mu0, kappa0, T0, nu0, y, prior_z):
"""Initialize the Gibbs sampler
@para hyp_pi: hyperparameter of pi
@para mu0, kappa0: parameter of Normal-Wishart distribution
@para T0, nu0: parameter of Normal-Wishart distribution
@para y: samples draw from Normal distributions
"""You don't have a description of prior_z. You also have a lot of parameters here. Four of them describe the Normal-Wishart distribution suggesting perhaps that should be its own class.
self.hyp_pi = hyp_pi
self.mu0 = mu0
self.kappa0 = kappa0
self.T0 = T0
self.nu0 = nu0
self.y = y
self.prior_z = prior_z
def _clusters(self):
return len(self.hyp_pi)Rather than this, I suggest adding
self._clusters = len(self.hyp_pi) to __init__. def _dim(self):
"""
Dimension of the data
"""
return len(mu0)
def _numbers(self):
return len(self.y)
def estimate_clusters(self, iterations, burn_in, lag):
"""
"""Empty docstring, why?
estimated_clusters = np.zeros(self._numbers(), float)You create this, but don't seem to do anything with it
for iteration, z, pi, mu, sigma in self.run(iterations, burn_in, lag):That's a lot of elements in a tuple. your ignore most of them which raises questions
# print "Precision = %f" % self._estimate_precision(z)
count = [z.count(k) for k in range(self._clusters())]
print countCould you use a collections.Counter here?
if iteration % 10 == 0:
print "iteration = %d" %iterationYou are mixing output and logic. ITs better to have this class do the calculations, and the calling code print stuff.
def _estimate_precision(self, z):
numbers = self._numbers()
count = 0
for i in range(numbers):
if self.prior_z[i] == z[i]:
count += 1Use numpy's vector routines for this kind of thing. You should be doing something like:
count = (self.prior_z == z).sum()
return float(count)/float(numbers)Add
from __future__ import division to the beginning of your script rather then explicitly forcing everything to float.def run(self, iterations, burn_in, lag):
"""
Run the Gibbs sampler
"""
self._initialize_gibbs_sampler()My general rule is that all initialization happens in a constructor. The gibbs sample should be initalized in the constructor.
lag_counter = lag
iteration = 1
while iteration 0:
burn_in -= 1
else:
if lag_counter > 0:
lag_counter -= 1
else:
lag_counter = lag
yield iteration, self.z, self.pi, self.mu, self.sigma
iteration += 1Your looping logic isn't very clear. I suggest something like:
for _ in xrange(burn_in):
self._iterate_gibbs_sampler()
for _ in xrange(burn_in, iterations, lag):
for _ in xrange(lag):
self._iterate_gibbs_sampler()
yield stuffI think that more clearly conveys what the code is doing.
```
def _initialize_gibbs_sampler(self):
"""
This sets the initial values of the parameters.
"""
clusters = self._clusters()
numbers = self._numbers()
self.mu = np.array([self._sampling_Normal_Wishart()[0] for _ in range(clusters)])
self.pi = dirichlet(hyp_pi, 1)[0]
self.sigma = np.array([self._sampling_Normal_Wishart()[1] for _ in range(clusters)])
self.z = np.array([self._multinomial_samples(pi) for _ in range(numbers)])
def _sampling_Normal_Wishart(self):
"""
Sampling mu and sigma from Normal-Wishart distribution.
"""
# Create the matrix A of the Bartlett decomposition from a p-variate Wishart distribution
d = self._dim()
chol = np.linalg.cholesky(self.T0)
if (self.nu0 <= 81+d) and (self.nu0 == round(self.nu0)):
X = np.dot(chol, np.random.normal(size = (d, self.nu0)))
else:
A = np.diag(np.sqrt(np.random.chisquare(self.nu0 - np.arange(0, d), size = d)))
A[np.tri(d, k=-1, dtype = bool)
Code Snippets
import numpy as np
from numpy.random import standard_normal, chisquare, multivariate_normal, dirichlet, multinomial
from numpy.linalg import cholesky, invfrom math import sqrt
import mathclass GibbsSampler(object):
"""Gibbs sampler for finite Gaussian mixture model
Given a set of hyperparameters and observations, run Gibbs sampler to estimate the parameters of the model
"""
def __init__(self, hyp_pi, mu0, kappa0, T0, nu0, y, prior_z):
"""Initialize the Gibbs sampler
@para hyp_pi: hyperparameter of pi
@para mu0, kappa0: parameter of Normal-Wishart distribution
@para T0, nu0: parameter of Normal-Wishart distribution
@para y: samples draw from Normal distributions
"""self.hyp_pi = hyp_pi
self.mu0 = mu0
self.kappa0 = kappa0
self.T0 = T0
self.nu0 = nu0
self.y = y
self.prior_z = prior_z
def _clusters(self):
return len(self.hyp_pi)def _dim(self):
"""
Dimension of the data
"""
return len(mu0)
def _numbers(self):
return len(self.y)
def estimate_clusters(self, iterations, burn_in, lag):
"""
"""Context
StackExchange Code Review Q#28356, answer score: 8
Revisions (0)
No revisions yet.