patterncsharpMinor
Optimization of comparing two collections and get the changes
Viewed 0 times
thecomparingoptimizationtwogetchangescollectionsand
Problem
I use the following code to get the changes between two collections. Objects are "joined" using a primary key. Any tips on performance issues or other optimizations appreciated.
```
///
/// Gets the changes [Deleted, changed, inserted] comparing this collection to another.
///
/// The source collection.
/// The remote collection to comare agains.
/// The primary key selector function
///
public static ChangeResult CompareTo(this IEnumerable local, IEnumerable remote, Func keySelector)
{
if (local == null)
throw new ArgumentNullException("local");
if (remote == null)
throw new ArgumentNullException("remote");
if (keySelector == null)
throw new ArgumentNullException("keySelector");
var remoteKeyValues = remote.ToDictionary(keySelector);
var deleted = new List();
var changed = new List();
var localKeys = new HashSet();
foreach (var localItem in local)
{
var localKey = keySelector(localItem);
localKeys.Add(localKey);
/* Check if primary key exists in both local and remote
* and if so check if changed, if not it has been deleted
*/
TSource changeCandidate;
if (remoteKeyValues.TryGetValue(localKey, out changeCandidate))
{
if (!changeCandidate.Equals(localItem))
changed.Add(changeCandidate);
}
else
{
deleted.Add(localItem);
}
}
var inserted = remoteKeyValues
.Where(x => !localKeys.Contains(x.Key))
.Select(x => x.Value)
.ToList();
return new ChangeResult(deleted, changed, inserted);
}
///
/// Immutable class containing changes
///
public class ChangeResult
{
public ChangeResult(IList deleted, IList changed, IList inserted)
{
Deleted = new ReadOnlyCollection(deleted);
Changed = new ReadOnlyCollection(changed);
Inserted = new ReadOnlyCollection(inserted);
}
```
///
/// Gets the changes [Deleted, changed, inserted] comparing this collection to another.
///
/// The source collection.
/// The remote collection to comare agains.
/// The primary key selector function
///
public static ChangeResult CompareTo(this IEnumerable local, IEnumerable remote, Func keySelector)
{
if (local == null)
throw new ArgumentNullException("local");
if (remote == null)
throw new ArgumentNullException("remote");
if (keySelector == null)
throw new ArgumentNullException("keySelector");
var remoteKeyValues = remote.ToDictionary(keySelector);
var deleted = new List();
var changed = new List();
var localKeys = new HashSet();
foreach (var localItem in local)
{
var localKey = keySelector(localItem);
localKeys.Add(localKey);
/* Check if primary key exists in both local and remote
* and if so check if changed, if not it has been deleted
*/
TSource changeCandidate;
if (remoteKeyValues.TryGetValue(localKey, out changeCandidate))
{
if (!changeCandidate.Equals(localItem))
changed.Add(changeCandidate);
}
else
{
deleted.Add(localItem);
}
}
var inserted = remoteKeyValues
.Where(x => !localKeys.Contains(x.Key))
.Select(x => x.Value)
.ToList();
return new ChangeResult(deleted, changed, inserted);
}
///
/// Immutable class containing changes
///
public class ChangeResult
{
public ChangeResult(IList deleted, IList changed, IList inserted)
{
Deleted = new ReadOnlyCollection(deleted);
Changed = new ReadOnlyCollection(changed);
Inserted = new ReadOnlyCollection(inserted);
}
Solution
I would first make
and because it's good design to program to interfaces, I extracted one:
Then, in your
ChangeResult truly immutable using sealed and readonly. It's unfortunate that we don't have true immutable syntax for automatic properties, but space is cheap, they say:///
/// Immutable class containing changes
///
public sealed class ChangeResult : IChangeResult
{
private readonly ReadOnlyCollection deleted;
private readonly ReadOnlyCollection changed;
private readonly ReadOnlyCollection inserted;
public ChangeResult(IList deleted, IList changed, IList inserted)
{
this.deleted = new ReadOnlyCollection(deleted);
this.changed = new ReadOnlyCollection(changed);
this.inserted = new ReadOnlyCollection(inserted);
}
public IList Deleted
{
get
{
return this.deleted;
}
}
public IList Changed
{
get
{
return this.changed;
}
}
public IList Inserted
{
get
{
return this.inserted;
}
}
}and because it's good design to program to interfaces, I extracted one:
public interface IChangeResult
{
IList Deleted
{
get;
}
IList Changed
{
get;
}
IList Inserted
{
get;
}
}Then, in your
CompareTo() method, I have it return said interface. Also, I expand the deleted and changed lists to the capacity of the local enumerable (converted to a list locally so an expensive enumerable doesn't get iterated multiple times). This may be overkill in some cases, but in the worst cases, you wind up eliminating potentially expensive list expansion reallocations. A classic speed-vs-space tradeoff. I would then posit that this is actually a great algorithm to go parallel with, as long as you don't care about the order in the three lists:///
/// Gets the changes [Deleted, changed, inserted] comparing this collection to another.
///
///
///
/// The source collection.
/// The remote collection to compare against.
/// The primary key selector function
///
public static IChangeResult CompareTo(
this IEnumerable local,
IEnumerable remote,
Func keySelector)
{
if (local == null)
{
throw new ArgumentNullException("local");
}
if (remote == null)
{
throw new ArgumentNullException("remote");
}
if (keySelector == null)
{
throw new ArgumentNullException("keySelector");
}
local = local.ToList();
var remoteKeyValues = remote.ToDictionary(keySelector);
var deleted = new List(local.Count());
var changed = new List(local.Count());
var localKeys = new HashSet();
Parallel.ForEach(
local,
localItem =>
{
var localKey = keySelector(localItem);
lock (localKeys)
{
localKeys.Add(localKey);
}
/* Check if primary key exists in both local and remote
* and if so check if changed, if not it has been deleted
*/
TSource changeCandidate;
if (remoteKeyValues.TryGetValue(localKey, out changeCandidate))
{
if (changeCandidate.Equals(localItem))
{
return;
}
lock (changed)
{
changed.Add(changeCandidate);
}
}
else
{
lock (deleted)
{
deleted.Add(localItem);
}
}
});
var inserted = remoteKeyValues
.AsParallel()
.Where(x => !localKeys.Contains(x.Key))
.Select(x => x.Value)
.ToList();
return new ChangeResult(deleted, changed, inserted);
}Code Snippets
/// <summary>
/// Immutable class containing changes
/// </summary>
public sealed class ChangeResult<T> : IChangeResult<T>
{
private readonly ReadOnlyCollection<T> deleted;
private readonly ReadOnlyCollection<T> changed;
private readonly ReadOnlyCollection<T> inserted;
public ChangeResult(IList<T> deleted, IList<T> changed, IList<T> inserted)
{
this.deleted = new ReadOnlyCollection<T>(deleted);
this.changed = new ReadOnlyCollection<T>(changed);
this.inserted = new ReadOnlyCollection<T>(inserted);
}
public IList<T> Deleted
{
get
{
return this.deleted;
}
}
public IList<T> Changed
{
get
{
return this.changed;
}
}
public IList<T> Inserted
{
get
{
return this.inserted;
}
}
}public interface IChangeResult<T>
{
IList<T> Deleted
{
get;
}
IList<T> Changed
{
get;
}
IList<T> Inserted
{
get;
}
}/// <summary>
/// Gets the changes [Deleted, changed, inserted] comparing this collection to another.
/// </summary>
/// <typeparam name="TSource"></typeparam>
/// <typeparam name="TKey"></typeparam>
/// <param name="local">The source collection.</param>
/// <param name="remote">The remote collection to compare against.</param>
/// <param name="keySelector">The primary key selector function</param>
/// <returns></returns>
public static IChangeResult<TSource> CompareTo<TSource, TKey>(
this IEnumerable<TSource> local,
IEnumerable<TSource> remote,
Func<TSource, TKey> keySelector)
{
if (local == null)
{
throw new ArgumentNullException("local");
}
if (remote == null)
{
throw new ArgumentNullException("remote");
}
if (keySelector == null)
{
throw new ArgumentNullException("keySelector");
}
local = local.ToList();
var remoteKeyValues = remote.ToDictionary(keySelector);
var deleted = new List<TSource>(local.Count());
var changed = new List<TSource>(local.Count());
var localKeys = new HashSet<TKey>();
Parallel.ForEach(
local,
localItem =>
{
var localKey = keySelector(localItem);
lock (localKeys)
{
localKeys.Add(localKey);
}
/* Check if primary key exists in both local and remote
* and if so check if changed, if not it has been deleted
*/
TSource changeCandidate;
if (remoteKeyValues.TryGetValue(localKey, out changeCandidate))
{
if (changeCandidate.Equals(localItem))
{
return;
}
lock (changed)
{
changed.Add(changeCandidate);
}
}
else
{
lock (deleted)
{
deleted.Add(localItem);
}
}
});
var inserted = remoteKeyValues
.AsParallel()
.Where(x => !localKeys.Contains(x.Key))
.Select(x => x.Value)
.ToList();
return new ChangeResult<TSource>(deleted, changed, inserted);
}Context
StackExchange Code Review Q#33053, answer score: 4
Revisions (0)
No revisions yet.