patterncsharpModerate
String join of distinct string
Viewed 0 times
distinctstringjoin
Problem
I have a class like this one:
Through a JSON deserialization I obtain a
I need to join all those adresses in one big string (the separator is ";"). For this purpose I wrote this code:
Now, is there a more polished way to obtain this result?
How much will
class EmailClass
{
public string MailAdresse { get; set; }
public string MailAdresseCC { get; set; }
}Through a JSON deserialization I obtain a
List and two other strings anotherMailAddress and justAnotherMailAddress.I need to join all those adresses in one big string (the separator is ";"). For this purpose I wrote this code:
// this one is populated in another section but it
// is put there for the sake of knowing the variable name
List splittedList
List listOfAdresses = new List();
string joinedAdresses = String.Empty;
listOfAdresses.AddRange(splittedList.Select(x => x.MailAdresse));
listOfAdresses.AddRange(splittedList.Select(x => x.MailAdresseCC));
listOfAdresses.Add(anotherMailAddress);
listOfAdresses.Add(justAnotherMailAdresses);
joinedAdresses = String.Join(";", listOfAdresses.Distinct().ToArray());Now, is there a more polished way to obtain this result?
How much will
Distinct() affect the performance on medium result set (~100.000 adresses)? I've read that the complexity of the function should be O(n), is this true?Solution
Use a HashSet then it will only contain unique values, duplicates will be discarded when calling
--
Following the "feedback" from Almaz, here's a basic benchmark to show the performance difference (using unique values for each address):
Results run in release mode without debugger attached:
Ramping up to 1,000,000 items
As you can see, the HashSet approach is substantially faster than the OP and significantly faster than the Linq approach.
Add based upon the hashcode of the string. This should be far more efficient than calling Distinct.List splittedList
HashSet listOfAdresses = new HashSet();
// HashSet does not contain an AddRange method.
foreach (var emailClass in splittedList)
{
listOfAdresses.Add(emailClass.MailAdresse);
listOfAdresses.Add(emailClass.MailAdresseCC);
}
listOfAdresses.Add(anotherMailAddress);
listOfAdresses.Add(justAnotherMailAdresses);
string joinedAdresses = String.Join(";", listOfAdresses.ToArray());--
Following the "feedback" from Almaz, here's a basic benchmark to show the performance difference (using unique values for each address):
private static void Main(string[] args)
{
List splittedList = Enumerable.Range(1, 100000).Select(i => new EmailClass
{
MailAdresse = i.ToString() + "@email.com",
MailAdresseCC = i.ToString() + "cc@email.com"
}).ToList();
OriginalMethod(splittedList);
HashSetMethod(splittedList);
LinqMethod(splittedList);
Console.ReadLine();
}
private static void OriginalMethod(List splittedList)
{
var sw = new Stopwatch();
sw.Start();
List listOfAdresses = new List();
listOfAdresses.AddRange(splittedList.Select(x => x.MailAdresse));
listOfAdresses.AddRange(splittedList.Select(x => x.MailAdresseCC));
listOfAdresses.Add("someone@email.com");
listOfAdresses.Add("someone.else@email.com");
var joinedAdresses = String.Join(";", listOfAdresses.Distinct().ToArray());
sw.Stop();
Console.WriteLine("OriginalMethod");
Console.WriteLine(sw.Elapsed);
}
private static void HashSetMethod(List splittedList)
{
var sw = new Stopwatch();
sw.Start();
HashSet listOfAdresses = new HashSet();
foreach (var emailClass in splittedList)
{
listOfAdresses.Add(emailClass.MailAdresse);
listOfAdresses.Add(emailClass.MailAdresseCC);
}
listOfAdresses.Add("someone@email.com");
listOfAdresses.Add("someone.else@email.com");
string joinedAdresses = String.Join(";", listOfAdresses.ToArray());
sw.Stop();
Console.WriteLine("HashSetMethod");
Console.WriteLine(sw.Elapsed);
}
private static void LinqMethod(List splittedList)
{
var sw = new Stopwatch();
sw.Start();
var emails = splittedList.SelectMany(emailClass => new[] { emailClass.MailAdresse, emailClass.MailAdresseCC })
.Concat(new[] { "someone@email.com", "someone.else@email.com" })
.Distinct();
var joinedAdresses = String.Join(";", emails);
sw.Stop();
Console.WriteLine("LinqMethod");
Console.WriteLine(sw.Elapsed);
}Results run in release mode without debugger attached:
OriginalMethod: 00:00:00.0789540
HashSetMethod: 00:00:00.0488568
LinqMethod: 00:00:00.0668056Ramping up to 1,000,000 items
OriginalMethod: 00:00:00.8189667
HashSetMethod: 00:00:00.6891028
LinqMethod: 00:00:01.0157357As you can see, the HashSet approach is substantially faster than the OP and significantly faster than the Linq approach.
Code Snippets
List<EmailClass> splittedList
HashSet<string> listOfAdresses = new HashSet<string>();
// HashSet does not contain an AddRange method.
foreach (var emailClass in splittedList)
{
listOfAdresses.Add(emailClass.MailAdresse);
listOfAdresses.Add(emailClass.MailAdresseCC);
}
listOfAdresses.Add(anotherMailAddress);
listOfAdresses.Add(justAnotherMailAdresses);
string joinedAdresses = String.Join(";", listOfAdresses.ToArray());private static void Main(string[] args)
{
List<EmailClass> splittedList = Enumerable.Range(1, 100000).Select(i => new EmailClass
{
MailAdresse = i.ToString() + "@email.com",
MailAdresseCC = i.ToString() + "cc@email.com"
}).ToList();
OriginalMethod(splittedList);
HashSetMethod(splittedList);
LinqMethod(splittedList);
Console.ReadLine();
}
private static void OriginalMethod(List<EmailClass> splittedList)
{
var sw = new Stopwatch();
sw.Start();
List<string> listOfAdresses = new List<string>();
listOfAdresses.AddRange(splittedList.Select(x => x.MailAdresse));
listOfAdresses.AddRange(splittedList.Select(x => x.MailAdresseCC));
listOfAdresses.Add("someone@email.com");
listOfAdresses.Add("someone.else@email.com");
var joinedAdresses = String.Join(";", listOfAdresses.Distinct().ToArray());
sw.Stop();
Console.WriteLine("OriginalMethod");
Console.WriteLine(sw.Elapsed);
}
private static void HashSetMethod(List<EmailClass> splittedList)
{
var sw = new Stopwatch();
sw.Start();
HashSet<string> listOfAdresses = new HashSet<string>();
foreach (var emailClass in splittedList)
{
listOfAdresses.Add(emailClass.MailAdresse);
listOfAdresses.Add(emailClass.MailAdresseCC);
}
listOfAdresses.Add("someone@email.com");
listOfAdresses.Add("someone.else@email.com");
string joinedAdresses = String.Join(";", listOfAdresses.ToArray());
sw.Stop();
Console.WriteLine("HashSetMethod");
Console.WriteLine(sw.Elapsed);
}
private static void LinqMethod(List<EmailClass> splittedList)
{
var sw = new Stopwatch();
sw.Start();
var emails = splittedList.SelectMany(emailClass => new[] { emailClass.MailAdresse, emailClass.MailAdresseCC })
.Concat(new[] { "someone@email.com", "someone.else@email.com" })
.Distinct();
var joinedAdresses = String.Join(";", emails);
sw.Stop();
Console.WriteLine("LinqMethod");
Console.WriteLine(sw.Elapsed);
}OriginalMethod: 00:00:00.0789540
HashSetMethod: 00:00:00.0488568
LinqMethod: 00:00:00.0668056OriginalMethod: 00:00:00.8189667
HashSetMethod: 00:00:00.6891028
LinqMethod: 00:00:01.0157357Context
StackExchange Code Review Q#18884, answer score: 14
Revisions (0)
No revisions yet.