patterncsharpMinor
Implementation of a sparse Markov Chain
Viewed 0 times
implementationsparsemarkovchain
Problem
I need to create a sparse Markov chain. It is supposed to receive text, so the number of rows or columns can easily go up to 20000. Besides, if I want to consider higher orders of the Markov chain (creating pairs of consecutive words) the dimension can become much bigger. Hence the need to have something sparse.
I added the constraint to have a "uniform prior" on the transitions (so as to avoid having infinite log likelihood).
I am not sure this is the cleanest way to proceed.
```
using System.Collections.Generic;
using System;
namespace rossum.Machine.Learning.Markov
{
public class SparseMarkovChain
{
private Dictionary> _sparseMC = new Dictionary>();
private Dictionary _countEltLeaving = new Dictionary();
private int _size = 0;
public double GetTransition(T p1, T p2)
{
if (_sparseMC.ContainsKey(p1))
{
if (_sparseMC[p1].ContainsKey(p2))
return (1f + _sparseMC[p1][p2]) / (_countEltLeaving[p1] + _size);
else
return 1f / (_countEltLeaving[p1] + _size);
}
else
return 1f / _size;
}
public void AddTransition(T p1, T p2)
{
if (_sparseMC.ContainsKey(p1))
{
_countEltLeaving[p1]++;
if (_sparseMC[p1].ContainsKey(p2))
_sparseMC[p1][p2] += 1;
else
_sparseMC[p1].Add(p2, 1);
}
else
{
_size++;
if (!_sparseMC.ContainsKey(p2))
_size++;
Dictionary nd = new Dictionary();
nd.Add(p2, 1);
_sparseMC.Add(p1, nd);
_countEltLeaving.Add(p1, 1);
}
}
public double LogLikelihood(T[] path)
{
double res = 0;
for (int i = 1; i < path.Length; i++)
res +=
I added the constraint to have a "uniform prior" on the transitions (so as to avoid having infinite log likelihood).
I am not sure this is the cleanest way to proceed.
```
using System.Collections.Generic;
using System;
namespace rossum.Machine.Learning.Markov
{
public class SparseMarkovChain
{
private Dictionary> _sparseMC = new Dictionary>();
private Dictionary _countEltLeaving = new Dictionary();
private int _size = 0;
public double GetTransition(T p1, T p2)
{
if (_sparseMC.ContainsKey(p1))
{
if (_sparseMC[p1].ContainsKey(p2))
return (1f + _sparseMC[p1][p2]) / (_countEltLeaving[p1] + _size);
else
return 1f / (_countEltLeaving[p1] + _size);
}
else
return 1f / _size;
}
public void AddTransition(T p1, T p2)
{
if (_sparseMC.ContainsKey(p1))
{
_countEltLeaving[p1]++;
if (_sparseMC[p1].ContainsKey(p2))
_sparseMC[p1][p2] += 1;
else
_sparseMC[p1].Add(p2, 1);
}
else
{
_size++;
if (!_sparseMC.ContainsKey(p2))
_size++;
Dictionary nd = new Dictionary();
nd.Add(p2, 1);
_sparseMC.Add(p1, nd);
_countEltLeaving.Add(p1, 1);
}
}
public double LogLikelihood(T[] path)
{
double res = 0;
for (int i = 1; i < path.Length; i++)
res +=
Solution
If you're using
Your first method would then become:
Your naming could be improved. The "MC" in
These three lines could be a single one:
versus:
Use braces everywhere to avoid introducing bugs, even for code like this:
ContainsKey, you're probably doing it wrong and you need to use Dictionary.TryGetValue Method (TKey, TValue).Your first method would then become:
public double GetTransition(T p1, T p2)
{
Dictionary p1Value;
if (!_sparseMarkovChain.TryGetValue(p1, out p1Value))
{
return 1f/_size;
}
int p2Value;
if (p1Value.TryGetValue(p2, out p2Value))
{
return (1f + p2Value)/(_countEltLeaving[p1] + _size);
}
return 1f/(_countEltLeaving[p1] + _size);
}Your naming could be improved. The "MC" in
_sparseMC makes sense in context, but why not write "MarkovChain" in full? p1 and p2 aren't clear to me, but I don't know the convention.These three lines could be a single one:
Dictionary nd = new Dictionary();
nd.Add(p2, 1);
_sparseMC.Add(p1, nd);versus:
_sparseMC.Add(p1, new Dictionary{{ p2, 1 }});Use braces everywhere to avoid introducing bugs, even for code like this:
if (_sparseMC[p1].ContainsKey(p2))
{
_sparseMC[p1][p2] += 1;
}
else
{
_sparseMC[p1].Add(p2, 1);
}_sparseMC and _countEltLeaving can be made readonly.Code Snippets
public double GetTransition(T p1, T p2)
{
Dictionary<T, int> p1Value;
if (!_sparseMarkovChain.TryGetValue(p1, out p1Value))
{
return 1f/_size;
}
int p2Value;
if (p1Value.TryGetValue(p2, out p2Value))
{
return (1f + p2Value)/(_countEltLeaving[p1] + _size);
}
return 1f/(_countEltLeaving[p1] + _size);
}Dictionary<T, int> nd = new Dictionary<T, int>();
nd.Add(p2, 1);
_sparseMC.Add(p1, nd);_sparseMC.Add(p1, new Dictionary<T, int>{{ p2, 1 }});if (_sparseMC[p1].ContainsKey(p2))
{
_sparseMC[p1][p2] += 1;
}
else
{
_sparseMC[p1].Add(p2, 1);
}Context
StackExchange Code Review Q#108384, answer score: 5
Revisions (0)
No revisions yet.