patterncsharpMinor
Thread-safe LRU Dictionary in C#
Viewed 0 times
dictionarythreadlrusafe
Problem
Can't seem to find one, so trying to build a very simple but fast implementation. Thought I would post on SO for review/feedback, and so that others can just copy/paste for their own use.
I'm using a Dictionary and LinkedList, with a non-granular lock. Basic benchmark included below:
```
public class LRUDictionary : IDictionary
{
private Dictionary>>_dict=
new Dictionary>>();
private LinkedList> _list =
new LinkedList>();
public int Max_Size { get; set; }
public LRUDictionary(int maxsize)
{
Max_Size = maxsize;
}
public void Add(TKey key, TValue value)
{
lock (_dict)
{
LinkedListNode> node;
if (_dict.TryGetValue(key, out node))
{
_list.Remove(node);
_list.AddFirst(node);
}
else
{
node = new LinkedListNode>(
new KeyValuePair(key, value));
_dict.Add(key, node);
_list.AddFirst(node);
}
if (_dict.Count > Max_Size)
{
var nodetoremove = _list.Last;
if (nodetoremove != null)
Remove(nodetoremove.Value.Key);
}
}
}
public bool Remove(TKey key)
{
lock (_dict)
{
LinkedListNode> removednode;
if (_dict.TryGetValue(key, out removednode))
{
_dict.Remove(key);
_list.Remove(removednode);
return true;
}
else
return false;
}
}
public bool TryGetValue(TKey key, out TValue value)
{
LinkedListNode> node;
bool result = false;
lock (_dict)
result = _dict.TryGetValue(key, out node);
if (node != null)
value = node.Value.Value;
else
value = default(TValue);
return result;
}
I'm using a Dictionary and LinkedList, with a non-granular lock. Basic benchmark included below:
```
public class LRUDictionary : IDictionary
{
private Dictionary>>_dict=
new Dictionary>>();
private LinkedList> _list =
new LinkedList>();
public int Max_Size { get; set; }
public LRUDictionary(int maxsize)
{
Max_Size = maxsize;
}
public void Add(TKey key, TValue value)
{
lock (_dict)
{
LinkedListNode> node;
if (_dict.TryGetValue(key, out node))
{
_list.Remove(node);
_list.AddFirst(node);
}
else
{
node = new LinkedListNode>(
new KeyValuePair(key, value));
_dict.Add(key, node);
_list.AddFirst(node);
}
if (_dict.Count > Max_Size)
{
var nodetoremove = _list.Last;
if (nodetoremove != null)
Remove(nodetoremove.Value.Key);
}
}
}
public bool Remove(TKey key)
{
lock (_dict)
{
LinkedListNode> removednode;
if (_dict.TryGetValue(key, out removednode))
{
_dict.Remove(key);
_list.Remove(removednode);
return true;
}
else
return false;
}
}
public bool TryGetValue(TKey key, out TValue value)
{
LinkedListNode> node;
bool result = false;
lock (_dict)
result = _dict.TryGetValue(key, out node);
if (node != null)
value = node.Value.Value;
else
value = default(TValue);
return result;
}
Solution
Whilst your code is thread safe, since you have a global lock protecting the dictionary (and the lock is relatively long lived), it will be a serious bottleneck under concurrent load. The principal scalability limit in concurrent applications is the exclusive resource lock.
The chart below shows throughput with an increasing degree of parallelism on a 16 core VM for concurrent LRU reads and writes. It compares an implementation of LRU that does not have a global lock, to an implementation fundamentally the same as your code (with a global lock).
In this test I set the concurrent dictionary concurrency level = number of threads. I believe the unusual dip at the beginning is caused by false sharing. It's old now, but gallery of processor cache effects has a nice description.
The implementation with better throughput is a thread safe pseudo LRU designed for concurrent workloads. Latency is very close to ConcurrentDictionary, ~10x faster than MemoryCache and hit rate is better than a conventional LRU algorithm (these are the other two factors that will determine performance). A full analysis provided in the github link below.
Usage of the ConcurrentLru looks like this:
GitHub: https://github.com/bitfaster/BitFaster.Caching
Below is the complete source code for the ClassicLRU tested above. It has shorter span locks and uses ConcurrentDictionary. It's not intended for production use (it may have bugs) - it's just to illustrate that even when taking narrow locks throughput is still bad under concurrent load.
```
public sealed class ClassicLru : ICache
{
private readonly int capacity;
private readonly ConcurrentDictionary> dictionary;
private readonly LinkedList linkedList = new LinkedList();
private long requestHitCount;
private long requestTotalCount;
public ClassicLru(int capacity)
: this(Defaults.ConcurrencyLevel, capacity, EqualityComparer.Default)
{
}
public ClassicLru(int concurrencyLevel, int capacity, IEqualityComparer comparer)
{
if (capacity >(concurrencyLevel, this.capacity + 1, comparer);
}
public int Count => this.linkedList.Count;
public double HitRatio => (double)requestHitCount / (double)requestTotalCount;
///
public bool TryGet(K key, out V value)
{
Interlocked.Increment(ref requestTotalCount);
LinkedListNode node;
if (dictionary.TryGetValue(key, out node))
{
LockAndMoveToEnd(node);
Interlocked.Increment(ref requestHitCount);
value = node.Value.Value;
return true;
}
value = default(V);
return false;
}
public V GetOrAdd(K key, Func valueFactory)
{
if (this.TryGet(key, out var value))
{
return value;
}
var node = new LinkedListNode(new LruItem(key, valueFactory(key)));
if (this.dictionary.TryAdd(key, node))
{
LinkedListNode first = null;
lock (this.linkedList)
{
if (linkedList.Count >= capacity)
{
first = linkedList.First;
linkedList.RemoveFirst();
}
linkedList.AddLast(node);
}
// Remove from the dictionary outside the lock. This means that the dictionary at this moment
// contains an item that is not in the linked list. If another thread fetches this item,
// LockAndMoveToEnd will ignore it, since it is detached. This means we potentially 'lose' an
// item just as it was about to move to the back of the LRU list and be preserved. The next request
// for the same key will be a miss. Dictionary and list are eventually consistent.
// However, all operations inside the lock are extremely fast, so contention is minimized.
if (first != null)
{
dictionary.TryRemove(first.Value.Key, out var removed);
if (removed.Value.Value is IDisposable d)
{
d.Dispose();
}
}
return node.Value.Value;
}
return this.GetOrAdd(key, valueFactory);
}
public bool TryRemove(K key)
{
if (dictionary.TryRemove(key, out var node))
{
// If the node has already been removed from the list, ignore.
// E.g. thread A reads x from the dictionary. Thread B adds a new item, removes x from
// the List & Dictionary. Now thread A will try to move x to the end of the list.
if (node.List != null)
{
lock (this.linkedList)
{
if (node.List != null)
{
linkedList.Re
The chart below shows throughput with an increasing degree of parallelism on a 16 core VM for concurrent LRU reads and writes. It compares an implementation of LRU that does not have a global lock, to an implementation fundamentally the same as your code (with a global lock).
In this test I set the concurrent dictionary concurrency level = number of threads. I believe the unusual dip at the beginning is caused by false sharing. It's old now, but gallery of processor cache effects has a nice description.
The implementation with better throughput is a thread safe pseudo LRU designed for concurrent workloads. Latency is very close to ConcurrentDictionary, ~10x faster than MemoryCache and hit rate is better than a conventional LRU algorithm (these are the other two factors that will determine performance). A full analysis provided in the github link below.
Usage of the ConcurrentLru looks like this:
int capacity = 666;
var lru = new ConcurrentLru(capacity);
var value = lru.GetOrAdd(1, (k) => new SomeItem(k));GitHub: https://github.com/bitfaster/BitFaster.Caching
Install-Package BitFaster.CachingBelow is the complete source code for the ClassicLRU tested above. It has shorter span locks and uses ConcurrentDictionary. It's not intended for production use (it may have bugs) - it's just to illustrate that even when taking narrow locks throughput is still bad under concurrent load.
```
public sealed class ClassicLru : ICache
{
private readonly int capacity;
private readonly ConcurrentDictionary> dictionary;
private readonly LinkedList linkedList = new LinkedList();
private long requestHitCount;
private long requestTotalCount;
public ClassicLru(int capacity)
: this(Defaults.ConcurrencyLevel, capacity, EqualityComparer.Default)
{
}
public ClassicLru(int concurrencyLevel, int capacity, IEqualityComparer comparer)
{
if (capacity >(concurrencyLevel, this.capacity + 1, comparer);
}
public int Count => this.linkedList.Count;
public double HitRatio => (double)requestHitCount / (double)requestTotalCount;
///
public bool TryGet(K key, out V value)
{
Interlocked.Increment(ref requestTotalCount);
LinkedListNode node;
if (dictionary.TryGetValue(key, out node))
{
LockAndMoveToEnd(node);
Interlocked.Increment(ref requestHitCount);
value = node.Value.Value;
return true;
}
value = default(V);
return false;
}
public V GetOrAdd(K key, Func valueFactory)
{
if (this.TryGet(key, out var value))
{
return value;
}
var node = new LinkedListNode(new LruItem(key, valueFactory(key)));
if (this.dictionary.TryAdd(key, node))
{
LinkedListNode first = null;
lock (this.linkedList)
{
if (linkedList.Count >= capacity)
{
first = linkedList.First;
linkedList.RemoveFirst();
}
linkedList.AddLast(node);
}
// Remove from the dictionary outside the lock. This means that the dictionary at this moment
// contains an item that is not in the linked list. If another thread fetches this item,
// LockAndMoveToEnd will ignore it, since it is detached. This means we potentially 'lose' an
// item just as it was about to move to the back of the LRU list and be preserved. The next request
// for the same key will be a miss. Dictionary and list are eventually consistent.
// However, all operations inside the lock are extremely fast, so contention is minimized.
if (first != null)
{
dictionary.TryRemove(first.Value.Key, out var removed);
if (removed.Value.Value is IDisposable d)
{
d.Dispose();
}
}
return node.Value.Value;
}
return this.GetOrAdd(key, valueFactory);
}
public bool TryRemove(K key)
{
if (dictionary.TryRemove(key, out var node))
{
// If the node has already been removed from the list, ignore.
// E.g. thread A reads x from the dictionary. Thread B adds a new item, removes x from
// the List & Dictionary. Now thread A will try to move x to the end of the list.
if (node.List != null)
{
lock (this.linkedList)
{
if (node.List != null)
{
linkedList.Re
Code Snippets
int capacity = 666;
var lru = new ConcurrentLru<int, SomeItem>(capacity);
var value = lru.GetOrAdd(1, (k) => new SomeItem(k));Install-Package BitFaster.Cachingpublic sealed class ClassicLru<K, V> : ICache<K, V>
{
private readonly int capacity;
private readonly ConcurrentDictionary<K, LinkedListNode<LruItem>> dictionary;
private readonly LinkedList<LruItem> linkedList = new LinkedList<LruItem>();
private long requestHitCount;
private long requestTotalCount;
public ClassicLru(int capacity)
: this(Defaults.ConcurrencyLevel, capacity, EqualityComparer<K>.Default)
{
}
public ClassicLru(int concurrencyLevel, int capacity, IEqualityComparer<K> comparer)
{
if (capacity < 3)
{
throw new ArgumentOutOfRangeException("Capacity must be greater than or equal to 3.");
}
if (comparer == null)
{
throw new ArgumentNullException(nameof(comparer));
}
this.capacity = capacity;
this.dictionary = new ConcurrentDictionary<K, LinkedListNode<LruItem>>(concurrencyLevel, this.capacity + 1, comparer);
}
public int Count => this.linkedList.Count;
public double HitRatio => (double)requestHitCount / (double)requestTotalCount;
///<inheritdoc/>
public bool TryGet(K key, out V value)
{
Interlocked.Increment(ref requestTotalCount);
LinkedListNode<LruItem> node;
if (dictionary.TryGetValue(key, out node))
{
LockAndMoveToEnd(node);
Interlocked.Increment(ref requestHitCount);
value = node.Value.Value;
return true;
}
value = default(V);
return false;
}
public V GetOrAdd(K key, Func<K, V> valueFactory)
{
if (this.TryGet(key, out var value))
{
return value;
}
var node = new LinkedListNode<LruItem>(new LruItem(key, valueFactory(key)));
if (this.dictionary.TryAdd(key, node))
{
LinkedListNode<LruItem> first = null;
lock (this.linkedList)
{
if (linkedList.Count >= capacity)
{
first = linkedList.First;
linkedList.RemoveFirst();
}
linkedList.AddLast(node);
}
// Remove from the dictionary outside the lock. This means that the dictionary at this moment
// contains an item that is not in the linked list. If another thread fetches this item,
// LockAndMoveToEnd will ignore it, since it is detached. This means we potentially 'lose' an
// item just as it was about to move to the back of the LRU list and be preserved. The next request
// for the same key will be a miss. Dictionary and list are eventually consistent.
// However, all operations inside the lock are extremely fast, so contention is minimized.
if (first != null)
{
dictionary.TryRemove(first.Value.Key, out var removed);
if (removed.Value.Value is IDisposable d)
Context
StackExchange Code Review Q#16326, answer score: 3
Revisions (0)
No revisions yet.