patterncsharpMinor
Merge entities which have the same children
Viewed 0 times
samethechildrenmergewhichentitieshave
Problem
The C# code below, using LINQ to Entities, aims to merge all
For an idea of scale, there are about 1600 classes with 30 students each, and
I'd really appreciate any suggestions.
Class entities which have the same SubjectId and the same Students.var sb = new StringBuilder();
var counter = 0;
using (var ctx = new Ctx())
{
var allClasses = ctx.Classes.Where(o => o.SubjectId > 0).OrderBy(o => o.Id).ToList();
foreach (var c in allClasses)
ctx.LoadProperty(c, o => o.Students);
// For each class...
for (var i = 0; i o.SubjectId == c.SubjectId && o.Id != c.Id &&
o.Students.OrderBy(s => s.Id).Select(s => s.Id)
.SequenceEqual(c.Students.OrderBy(s => s.Id)
.Select(s => s.Id)))
.ToList();
// Does it have any duplicates?
if (duplicates.Count == 0)
continue;
ctx.LoadProperty(c, o => o.PeriodsTimetable);
// For each duplicate...
for (var j = 0; j o.PeriodsTimetable);
var dt = d.PeriodsTimetable.ToList();
for (var k = 0; k o.ClassId == d.Id).ToList();
for (var k = 0; k o.ClassId == d.Id).ToList();
for (var k = 0; k o.ClassId == d.Id).ToList();
for (var k = 0; k o.ClassId == d.Id).ToList();
for (var k = 0; k ");
// Finally, delete the duplicate
ctx.Classes.DeleteObject(d);
allClasses.Remove(d);
counter++;
}
}
ctx.SaveChanges();
lblOutput.Text = "Merged " + counter + " duplicate classes." + sb.ToString();
}For an idea of scale, there are about 1600 classes with 30 students each, and
counter comes out at just over 300.I'd really appreciate any suggestions.
Solution
Your code is very difficult to read. (To give you an idea of how difficult it is, I've been looking at this for the past 2 hours so far)
You have a lot of performance issues too.
Applying what I've explained to your code, I'd start with this (hopefully I didn't break anything).
That's my initial analysis so far. I'll update some more once I've gained understanding on what you are doing. As I've said, it is very difficult to read. I'll probably offer more improvements once I've h
- You are using traditional
forloops when you instead should be usingforeachloops.
- Your "loop variables" are poorly named. Look toward the end of the code. What is a
c? What is ad? Leave the shorter names within smaller lambdas, index varialbes and possibly single instances of "utility" objects (e.g.,StringBuilderorRegex).
- You are trying too hard to write everything within a single line.
forloops,ifconditions, large queries... all of them should span multiple lines. With perhaps an exception to theif (duplicates.Count == 0) continue;line. I'd be okay with that but you should try to keep those to a minimum (it might be possible that you could code it so that line is not needed).
- You have a long chain of
Append()s to the string builder when you really should use a single call toAppendFormat().
- There are some snippets of your code which would work better either as a separate method or a LINQ query. Use it.
- LINQ to Entities should have made associations between your objects so your joins could be replaced by using the navigation properties. If you don't have those set up, you should.
- You also have some useless comments.
// For each class...Do we really need a comment to tell us that? Well, maybe since we were usingforloops but that should have been avoided in the first place.
You have a lot of performance issues too.
- You are calling
ToList()on every query when you are simply just enumerating over it most of the time.
- You have a lot of repeated loops on the same collection too. You should be able to do a lot within a single loop.
- Your query for finding duplicates will be a huge hit. I don't have much to say on it right now but it's already making this
O(n^2).
- I have a feeling that you don't need the calls to
LoadProperty(). From what I can tell, the majority of your code is filling in data manually when you probably don't need to. Unfortunately I don't know the entity framework enough to know better (though I am looking into it right now).
Applying what I've explained to your code, I'd start with this (hopefully I didn't break anything).
var sb = new StringBuilder();
var counter = 0;
using (var ctx = new SchoolModel())
{
var allClasses = ctx.Classes
.Where(o => o.SubjectId > 0)
.OrderBy(o => o.Id)
.ToList();
foreach (var aClass in allClasses)
{
ctx.LoadProperty(aClass, o => o.Students);
// a properly crafted query could remove this completely and not use LINQ to Objects
var students = new HashSet(aClass.Students.Select(s => s.Id));
var dupClasses = allClasses
.Where(o => o.SubjectId == aClass.SubjectId
&& o.Id != aClass.Id
&& students.SetEquals(o.Students.Select(s => s.Id)))
.ToList();
if (dupClasses.Count == 0) continue;
ctx.LoadProperty(aClass, o => o.PeriodsTimetable);
foreach (var dupClass in dupClasses)
{
// If the duplicate class has a timetable slot that's not also allocated to the
// class we are keeping, then allocate it (otherwise MySql will delete it).
ctx.LoadProperty(dupClass, o => o.PeriodsTimetable);
foreach (var dupTimetable in dupClass.PeriodsTimetable)
{
var isDuplicate = aClass.PeriodsTimetable
.Any(ctp => dupTimetable.TeacherId == ctp.TeacherId
&& dupTimetable.PeriodTime == ctp.PeriodTime);
if (!isDuplicate)
dupTimetable.ClassId = aClass.Id;
}
// Now update all other entities which reference the duplicate to instead reference
// the class we are keeping.
foreach (var dupPeriod in dupClass.Periods)
dupPeriod.ClassId = aClass.Id;
foreach (var dupAssessment in dupClass.Assessments)
dupAssessment.ClassId = aClass.Id;
foreach (var dupSeatingPlan in dupClass.SeatingPlans)
dupSeatingPlan.ClassId = aClass.Id;
foreach (var dupMerit in dupClass.Merits)
dupMerit.ClassId = aClass.Id;
sb.AppendFormat("{0} merged with {1}.", dupClass.LongName, aClass.LongName);
// Finally, delete the duplicate
ctx.Classes.DeleteObject(dupClass);
// not sure this is really needed
allClasses.Remove(dupClass);
counter++;
}
}
ctx.SaveChanges();
Output = "Merged " + counter + " duplicate classes." + sb.ToString();
}That's my initial analysis so far. I'll update some more once I've gained understanding on what you are doing. As I've said, it is very difficult to read. I'll probably offer more improvements once I've h
Code Snippets
var sb = new StringBuilder();
var counter = 0;
using (var ctx = new SchoolModel())
{
var allClasses = ctx.Classes
.Where(o => o.SubjectId > 0)
.OrderBy(o => o.Id)
.ToList();
foreach (var aClass in allClasses)
{
ctx.LoadProperty(aClass, o => o.Students);
// a properly crafted query could remove this completely and not use LINQ to Objects
var students = new HashSet<int>(aClass.Students.Select(s => s.Id));
var dupClasses = allClasses
.Where(o => o.SubjectId == aClass.SubjectId
&& o.Id != aClass.Id
&& students.SetEquals(o.Students.Select(s => s.Id)))
.ToList();
if (dupClasses.Count == 0) continue;
ctx.LoadProperty(aClass, o => o.PeriodsTimetable);
foreach (var dupClass in dupClasses)
{
// If the duplicate class has a timetable slot that's not also allocated to the
// class we are keeping, then allocate it (otherwise MySql will delete it).
ctx.LoadProperty(dupClass, o => o.PeriodsTimetable);
foreach (var dupTimetable in dupClass.PeriodsTimetable)
{
var isDuplicate = aClass.PeriodsTimetable
.Any(ctp => dupTimetable.TeacherId == ctp.TeacherId
&& dupTimetable.PeriodTime == ctp.PeriodTime);
if (!isDuplicate)
dupTimetable.ClassId = aClass.Id;
}
// Now update all other entities which reference the duplicate to instead reference
// the class we are keeping.
foreach (var dupPeriod in dupClass.Periods)
dupPeriod.ClassId = aClass.Id;
foreach (var dupAssessment in dupClass.Assessments)
dupAssessment.ClassId = aClass.Id;
foreach (var dupSeatingPlan in dupClass.SeatingPlans)
dupSeatingPlan.ClassId = aClass.Id;
foreach (var dupMerit in dupClass.Merits)
dupMerit.ClassId = aClass.Id;
sb.AppendFormat("{0} merged with {1}.<br />", dupClass.LongName, aClass.LongName);
// Finally, delete the duplicate
ctx.Classes.DeleteObject(dupClass);
// not sure this is really needed
allClasses.Remove(dupClass);
counter++;
}
}
ctx.SaveChanges();
Output = "<b>Merged " + counter + " duplicate classes.</b><br /><br />" + sb.ToString();
}Context
StackExchange Code Review Q#5396, answer score: 8
Revisions (0)
No revisions yet.