patternMinor
Latent Dirichlet Allocation Posterior Inference
Viewed 0 times
dirichletposteriorinferencelatentallocation
Problem
I have implemented Latent Dirichlet Allocation in Julia. Since the code is rather long, I copied the essential part here but the complete code can be found on GitHub.
```
#=
LDA.jl
Adham Beyki, odinay@gmail.com
=#
##########################################
###### Latent Dirichlet Allocation ######
##########################################
type LDA{T}
bayesian_component::T
K::Int
aa::Float64
LDA{T}(c::T, K::Int64, aa::Float64) = new(c, K, aa)
end
LDA{T}(c::T, K::Int64, aa::Real) = LDA{typeof(c)}(c, K, convert(Float64, aa))
function Base.show(io::IO, lda::LDA)
println(io, "Latent Dirichlet Allocation model with $(lda.K) $(typeof(lda.bayesian_component)) components")
end
function initialize_gibbs_sampler!(lda::LDA, zz::Vector{Vector{Int64}})
# populates the cluster labels randomly
n_groups = length(zz)
n_group_j = [length(zz[jj]) for jj = 1:n_groups]
for jj = 1:n_groups
zz[jj] = rand(1:lda.K, n_group_j[jj])
end
end
function LDA_sample_pp{T1, T2}(
bayesian_components::Vector{T1},
xx::T2,
nn::Array{Float64, 2},
jj::Int64,
aa::Float64)
K = length(bayesian_components)
pp = zeros(Float64, K)
@inbounds for kk = 1:K
pp[kk] = log(nn[jj, kk] + aa) + logpredictive(bayesian_components[kk], xx)
end
normalize_pp!(pp)
return sample(pp)
end
function collapsed_gibbs_sampler!{T1, T2}(
lda::LDA{T1},
xx::Vector{Vector{T2}},
zz::Vector{Vector{Int64}},
n_burnins::Int64, n_lags::Int64, n_samples::Int64)
n_groups = length(xx)
n_group_j = [length(zz[jj]) for jj = 1:n_groups]
nn = zeros(Float64, n_groups, lda.K)
lda_aa = fill(lda.aa, lda.K)
n_iterations = n_burnins + (n_samples)*(n_lags+1)
bayesian_components = [deepcopy(lda.bayesian_component) for k = 1:lda.K]
pp = zeros(Float64, length(lda.K))
tic()
for jj = 1:n_groups
for ii = 1:n_group_j[jj]
kk = zz[jj][ii]
additem(bayesian_components[kk], xx[jj][ii])
n
```
#=
LDA.jl
Adham Beyki, odinay@gmail.com
=#
##########################################
###### Latent Dirichlet Allocation ######
##########################################
type LDA{T}
bayesian_component::T
K::Int
aa::Float64
LDA{T}(c::T, K::Int64, aa::Float64) = new(c, K, aa)
end
LDA{T}(c::T, K::Int64, aa::Real) = LDA{typeof(c)}(c, K, convert(Float64, aa))
function Base.show(io::IO, lda::LDA)
println(io, "Latent Dirichlet Allocation model with $(lda.K) $(typeof(lda.bayesian_component)) components")
end
function initialize_gibbs_sampler!(lda::LDA, zz::Vector{Vector{Int64}})
# populates the cluster labels randomly
n_groups = length(zz)
n_group_j = [length(zz[jj]) for jj = 1:n_groups]
for jj = 1:n_groups
zz[jj] = rand(1:lda.K, n_group_j[jj])
end
end
function LDA_sample_pp{T1, T2}(
bayesian_components::Vector{T1},
xx::T2,
nn::Array{Float64, 2},
jj::Int64,
aa::Float64)
K = length(bayesian_components)
pp = zeros(Float64, K)
@inbounds for kk = 1:K
pp[kk] = log(nn[jj, kk] + aa) + logpredictive(bayesian_components[kk], xx)
end
normalize_pp!(pp)
return sample(pp)
end
function collapsed_gibbs_sampler!{T1, T2}(
lda::LDA{T1},
xx::Vector{Vector{T2}},
zz::Vector{Vector{Int64}},
n_burnins::Int64, n_lags::Int64, n_samples::Int64)
n_groups = length(xx)
n_group_j = [length(zz[jj]) for jj = 1:n_groups]
nn = zeros(Float64, n_groups, lda.K)
lda_aa = fill(lda.aa, lda.K)
n_iterations = n_burnins + (n_samples)*(n_lags+1)
bayesian_components = [deepcopy(lda.bayesian_component) for k = 1:lda.K]
pp = zeros(Float64, length(lda.K))
tic()
for jj = 1:n_groups
for ii = 1:n_group_j[jj]
kk = zz[jj][ii]
additem(bayesian_components[kk], xx[jj][ii])
n
Solution
I am a newbee of Julia so cannot comment on good Julian ways of optimization. But when I tried running your code, I noticed that the major part of computational time is spent on the evaluation of exp() and log() rather than array handling etc. Below, I changed the iteration number in demo_LDA.jl to perform the test quickly as
The result of the original code (using Julia 0.4.0-pre, see below) is
So it should take about 260-270 sec for 100 iterations. Next, I modified the function
Then the result changed as
Next, I modified
Then the result becomes
Further, I changed the code artificially such that all the exp(x) and log(x) were removed (by simply replacing with x) in the above routines as well as in sample() and LDA_sample_pp(). The result is
This suggests that a rather large fraction of computation is spent on exp() and log(). So I think that, to get a significant speed up, it might be useful to reduce the net computational cost of exp/log evaluation, in addition to any algorithmic improvement, parallel calculation, or more Julian ways of optimization. (I also tried attaching @inline to some functions, but it did not give much improvement.)
Below is the version info for my environment:
Edit
I have tried Yeppp! library for fast math functions (Julia's package is here) with the following code.
The result is
``
#... In demo_LDA.jl
collapsed_gibbs_sampler!(lda, xx, zz, 0, 0, 1)
@time collapsed_gibbs_sampler!(lda, xx, zz, 0, 0, 5)The result of the original code (using Julia 0.4.0-pre, see below) is
iteration: 1, number of components: 20, elapsed time: 0.009839647
iteration: 1, number of components: 20, elapsed time: 0.009694991
iteration: 2, number of components: 20, elapsed time: 2.690772589
iteration: 3, number of components: 20, elapsed time: 2.670282622
iteration: 4, number of components: 20, elapsed time: 2.66571178
iteration: 5, number of components: 20, elapsed time: 2.666383962
13.366990 seconds (5.00 M allocations: 1.043 GB, 0.23% gc time)So it should take about 260-270 sec for 100 iterations. Next, I modified the function
posterior() in conjugates.jl to make the operation count a bit fewer.function posterior(me::Gaussian1DGaussian1D)
xbar = me.mu
if me.nn>0
# mu = me.v0*xbar / (me.vv/me.nn + me.v0) + (me.vv/me.nn) * me.m0 / (me.vv/me.nn + me.v0)
# vv = (me.vv*me.v0/me.nn) / (me.vv/me.nn + me.v0)
#>>> modified part (start)
tmp1 = me.vv / me.nn
tmp2 = 1.0 / ( tmp1 + me.v0 )
mu = ( me.v0 * xbar + tmp1 * me.m0 ) * tmp2
vv = me.v0 * tmp1 * tmp2
#<<<< (end)
else
mu = me.m0
vv = me.v0
end
return Gaussian1D(mu, vv)
endThen the result changed as
iteration: 1, number of components: 20, elapsed time: 0.009504789
iteration: 1, number of components: 20, elapsed time: 0.009608533
iteration: 2, number of components: 20, elapsed time: 2.351606166
...
iteration: 5, number of components: 20, elapsed time: 2.345334333
11.742193 seconds (5.00 M allocations: 1.043 GB, 0.25% gc time)Next, I modified
logpredictive() in conjugate.jl such thatfunction logpredictive(me::Gaussian1DGaussian1D, xx::Float64)
gg = posterior(me)
mu = gg.mu
vv = gg.vv
# ll = exp(-(xx - mu)^2 / (2*(vv + me.vv))) / sqrt(2*pi * (vv + me.vv))
# return log(ll)
#>>> modified part (start)
tmp = vv + me.vv
val = -(xx - mu)^2 / (2 * tmp) - 0.5 * log( 2*pi * tmp )
return val
#<<< (end)
endThen the result becomes
iteration: 1, number of components: 20, elapsed time: 0.009714609
iteration: 1, number of components: 20, elapsed time: 0.009562715
iteration: 2, number of components: 20, elapsed time: 1.409258263
...
iteration: 5, number of components: 20, elapsed time: 1.40165818
7.022493 seconds (5.00 M allocations: 1.043 GB, 0.43% gc time)Further, I changed the code artificially such that all the exp(x) and log(x) were removed (by simply replacing with x) in the above routines as well as in sample() and LDA_sample_pp(). The result is
iteration: 1, number of components: 20, elapsed time: 0.009678183
iteration: 1, number of components: 20, elapsed time: 0.009703009
iteration: 2, number of components: 20, elapsed time: 0.49649945
...
iteration: 5, number of components: 20, elapsed time: 0.496507961
2.492949 seconds (5.00 M allocations: 1.043 GB, 1.25% gc time)This suggests that a rather large fraction of computation is spent on exp() and log(). So I think that, to get a significant speed up, it might be useful to reduce the net computational cost of exp/log evaluation, in addition to any algorithmic improvement, parallel calculation, or more Julian ways of optimization. (I also tried attaching @inline to some functions, but it did not give much improvement.)
Below is the version info for my environment:
julia> versioninfo()
Julia Version 0.4.0-pre+7176
Commit 65d7954 (2015-09-04 06:48 UTC)
Platform Info:
System: Linux (x86_64-unknown-linux-gnu)
CPU: Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz
WORD_SIZE: 64
BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Sandybridge)
LAPACK: libopenblas
LIBM: libopenlibm
LLVM: libLLVM-3.3Edit
I have tried Yeppp! library for fast math functions (Julia's package is here) with the following code.
using Yeppp
function test( n::Int )
x = rand( n ) + 1.0
@time y = exp( x ) # Julia's native exp()
y_yep = zeros( n )
@time Yeppp.exp!( y_yep, x ) # use Yeppp library
@show sum( abs( y_yep - y ) ) # check the difference
# @show x[ 1:3 ] # very large array, don't print all!
# @show y[ 1:3 ]
# @show y_yep[ 1:3 ]
end
test( 10 ) # warm-up
test( 10^6 )The result is
``
Code Snippets
#... In demo_LDA.jl
collapsed_gibbs_sampler!(lda, xx, zz, 0, 0, 1)
@time collapsed_gibbs_sampler!(lda, xx, zz, 0, 0, 5)iteration: 1, number of components: 20, elapsed time: 0.009839647
iteration: 1, number of components: 20, elapsed time: 0.009694991
iteration: 2, number of components: 20, elapsed time: 2.690772589
iteration: 3, number of components: 20, elapsed time: 2.670282622
iteration: 4, number of components: 20, elapsed time: 2.66571178
iteration: 5, number of components: 20, elapsed time: 2.666383962
13.366990 seconds (5.00 M allocations: 1.043 GB, 0.23% gc time)function posterior(me::Gaussian1DGaussian1D)
xbar = me.mu
if me.nn>0
# mu = me.v0*xbar / (me.vv/me.nn + me.v0) + (me.vv/me.nn) * me.m0 / (me.vv/me.nn + me.v0)
# vv = (me.vv*me.v0/me.nn) / (me.vv/me.nn + me.v0)
#>>> modified part (start)
tmp1 = me.vv / me.nn
tmp2 = 1.0 / ( tmp1 + me.v0 )
mu = ( me.v0 * xbar + tmp1 * me.m0 ) * tmp2
vv = me.v0 * tmp1 * tmp2
#<<<< (end)
else
mu = me.m0
vv = me.v0
end
return Gaussian1D(mu, vv)
enditeration: 1, number of components: 20, elapsed time: 0.009504789
iteration: 1, number of components: 20, elapsed time: 0.009608533
iteration: 2, number of components: 20, elapsed time: 2.351606166
...
iteration: 5, number of components: 20, elapsed time: 2.345334333
11.742193 seconds (5.00 M allocations: 1.043 GB, 0.25% gc time)function logpredictive(me::Gaussian1DGaussian1D, xx::Float64)
gg = posterior(me)
mu = gg.mu
vv = gg.vv
# ll = exp(-(xx - mu)^2 / (2*(vv + me.vv))) / sqrt(2*pi * (vv + me.vv))
# return log(ll)
#>>> modified part (start)
tmp = vv + me.vv
val = -(xx - mu)^2 / (2 * tmp) - 0.5 * log( 2*pi * tmp )
return val
#<<< (end)
endContext
StackExchange Code Review Q#104137, answer score: 2
Revisions (0)
No revisions yet.