HiveBrain v1.2.0
Get Started
← Back to all entries
patterncsharpMinor

Using LINQ to perform a LEFT OUTER JOIN in 2 DataTables (Multiples criteria)

Submitted by: @import:stackexchange-codereview··
0
Viewed 0 times
leftlinqdatatablescriteriajoinperformmultiplesusingouter

Problem

I know that exists a lot of solutions about how to create an OUTER JOIN between two DataTables.

I created the following code in C#:

DataTable vDT1 = new DataTable();
vDT1.Columns.Add("Key");
vDT1.Columns.Add("Key2");
vDT1.Columns.Add("Data1");
vDT1.Columns.Add("Data2");

vDT1.Rows.Add(new object[] { "01", "ZZ", "DATA1_AAAA", "DATA2_AAAA" });
vDT1.Rows.Add(new object[] { "02", "ZZ", "DATA1_BBBB", "DATA2_BBBB" });

DataTable vDT2 = new DataTable();
vDT2.Columns.Add("Key");
vDT2.Columns.Add("Key2");
vDT2.Columns.Add("Data3");
vDT2.Columns.Add("Data4");

vDT2.Rows.Add(new object[] { "01", "ZZ", "DATA3_AAAA", "DATA4_AAAA" });
vDT2.Rows.Add(new object[] { "01", "ZZ", "DATA3_BBBB", "DATA4_BBBB" });
vDT2.Rows.Add(new object[] { "01", "ZZ", "DATA3_CCCC", "DATA4_CCCC" });
vDT2.Rows.Add(new object[] { "01", "ZZ", "DATA3_DDDD", "DATA4_DDDD" });

DataTable vDT3 = new DataTable();
vDT3.Columns.Add("Key");
vDT3.Columns.Add("Key2");
vDT3.Columns.Add("Data1");
vDT3.Columns.Add("Data2");
vDT3.Columns.Add("KeyTemp1");
vDT3.Columns.Add("KeyTemp2");
vDT3.Columns.Add("Data3");
vDT3.Columns.Add("Data4");

DataRow vDRnull = vDT2.Rows.Add();

var vLINQ = vDT1.AsEnumerable()
    .GroupJoin(vDT2.AsEnumerable(), 
        dr1 => new { key1 = dr1["Key"], key2 = dr1["Key2"] }, 
        dr2 => new { key1 = dr2["Key"], key2 = dr2["Key2"] },
        (dr1, result) => dr1.ItemArray.Koncat(
            ((result.FirstOrDefault() == null) 
                ? vDRnull 
                : result.FirstOrDefault()).ItemArray));

foreach (var aw in vLINQ)
{
    vDT3.Rows.Add(aw);
}


I implemented this extension:

public static T[] Koncat(this T[] x, T[] y)
{
    if (x == null) throw new ArgumentNullException("x");
    if (y == null) throw new ArgumentNullException("y");
    int oldLen = x.Length;
    Array.Resize(ref x, x.Length + y.Length);
    Array.Copy(y, 0, x, oldLen, y.Length);
    return x;
}


This easy and clean (from my point of view), but I wanted to ask the experts for recommenda

Solution

DataTables are quite powefull and offer lots of the real database functionality. Also as far as joins are concerned a few things are possible and I'm of the opinion that if someone uses DataTables he also should use the functionality they offer ;-)

It this case using DataTable joins your example could look like this:

DataSet ds = new DataSet();

DataTable dt1 = new DataTable();
dt1.Columns.Add("Key");
dt1.Columns.Add("Key2");
dt1.Columns.Add("Data1");
dt1.Columns.Add("Data2");

dt1.Rows.Add(new object[] { "01", "ZZ", "DATA1_AAAA", "DATA2_AAAA" });
dt1.Rows.Add(new object[] { "02", "ZZ", "DATA1_BBBB", "DATA2_BBBB" });

DataTable dt2 = new DataTable();
dt2.Columns.Add("Key");
dt2.Columns.Add("Key2");
dt2.Columns.Add("Data3");
dt2.Columns.Add("Data4");

dt2.Rows.Add(new object[] { "01", "ZZ", "DATA3_AAAA", "DATA4_AAAA" });
dt2.Rows.Add(new object[] { "01", "ZZ", "DATA3_BBBB", "DATA4_BBBB" });
dt2.Rows.Add(new object[] { "01", "ZZ", "DATA3_CCCC", "DATA4_CCCC" });
dt2.Rows.Add(new object[] { "01", "ZZ", "DATA3_DDDD", "DATA4_DDDD" });
//dt2.Rows.Add(new object[] { "02", "ZZ", "DATA5_DDDD", "DATA4_DDDD" });

ds.Tables.Add(dt1);
ds.Tables.Add(dt2);

// specify the relations between the data tables
DataRelation drel = new DataRelation(
    "MyJoin",
    new DataColumn[] { dt1.Columns["Key"], dt1.Columns["Key2"] },
    new DataColumn[] { dt2.Columns["Key"], dt2.Columns["Key2"]});

ds.Relations.Add(drel);

DataTable jt = new DataTable("JoinedTable");
jt.Columns.Add("Key");
jt.Columns.Add("Key2");
jt.Columns.Add("Data1");
jt.Columns.Add("Data2");
jt.Columns.Add("Data3");
jt.Columns.Add("Data4");
ds.Tables.Add(jt);

// create the result table
foreach (DataRow row in dt1.Rows)
{
    var childRows = row.GetChildRows("MyJoin");
    // mimics left join
    var hasChildRows = childRows.Length > 0;
    if (!hasChildRows) 
    {
        jt.Rows.Add(row["Key"], row["Key2"], row["Data1"], row["Data2"], null, null);
        continue;
    }

    foreach (var child in childRows)
    {
        jt.Rows.Add(row["Key"], row["Key2"], row["Data1"], row["Data2"], child["Data3"], child["Data4"]);
    }
}

jt.Rows.Dump(); // LINQPad dump


As far as your code is concerned I'm not happy with the Koncat method because it modifies the ItemArray that belongs to the DataRow instead of creating a new result.

You actually don't need it because LINQ already has a such a method that you could use like this:

dr1.ItemArray
.Concat(result.Any() ? result.First().ItemArray: Enumerable.Empty())
.ToArray());


It's not necessary to call the FirstOrDefault method two times. It's better to just check if the result has Any rows and then get the First one and its ItemArray or otherwise an empty IEnumerable, finally you turn it into an array and you're done:

var vLINQ = vDT1.AsEnumerable()
    .GroupJoin(
        vDT2.AsEnumerable(), 
        dr1 => new { key1 = dr1["Key"], key2 = dr1["Key2"] }, 
        dr2 => new { key1 = dr2["Key"], key2 = dr2["Key2"] },
        (dr1, result) => 
            dr1.ItemArray
            .Concat(result.Any() ? result.First().ItemArray: Enumerable.Empty())
            .ToArray());


or if you can use C# 6 even shorter with the ?. and ?? operators

dr1.ItemArray
.Concat(result.FirstOrDefault()?.ItemArray ?? Enumerable.Empty())
.ToArray());

Code Snippets

DataSet ds = new DataSet();

DataTable dt1 = new DataTable();
dt1.Columns.Add("Key");
dt1.Columns.Add("Key2");
dt1.Columns.Add("Data1");
dt1.Columns.Add("Data2");

dt1.Rows.Add(new object[] { "01", "ZZ", "DATA1_AAAA", "DATA2_AAAA" });
dt1.Rows.Add(new object[] { "02", "ZZ", "DATA1_BBBB", "DATA2_BBBB" });

DataTable dt2 = new DataTable();
dt2.Columns.Add("Key");
dt2.Columns.Add("Key2");
dt2.Columns.Add("Data3");
dt2.Columns.Add("Data4");

dt2.Rows.Add(new object[] { "01", "ZZ", "DATA3_AAAA", "DATA4_AAAA" });
dt2.Rows.Add(new object[] { "01", "ZZ", "DATA3_BBBB", "DATA4_BBBB" });
dt2.Rows.Add(new object[] { "01", "ZZ", "DATA3_CCCC", "DATA4_CCCC" });
dt2.Rows.Add(new object[] { "01", "ZZ", "DATA3_DDDD", "DATA4_DDDD" });
//dt2.Rows.Add(new object[] { "02", "ZZ", "DATA5_DDDD", "DATA4_DDDD" });

ds.Tables.Add(dt1);
ds.Tables.Add(dt2);

// specify the relations between the data tables
DataRelation drel = new DataRelation(
    "MyJoin",
    new DataColumn[] { dt1.Columns["Key"], dt1.Columns["Key2"] },
    new DataColumn[] { dt2.Columns["Key"], dt2.Columns["Key2"]});

ds.Relations.Add(drel);

DataTable jt = new DataTable("JoinedTable");
jt.Columns.Add("Key");
jt.Columns.Add("Key2");
jt.Columns.Add("Data1");
jt.Columns.Add("Data2");
jt.Columns.Add("Data3");
jt.Columns.Add("Data4");
ds.Tables.Add(jt);

// create the result table
foreach (DataRow row in dt1.Rows)
{
    var childRows = row.GetChildRows("MyJoin");
    // mimics left join
    var hasChildRows = childRows.Length > 0;
    if (!hasChildRows) 
    {
        jt.Rows.Add(row["Key"], row["Key2"], row["Data1"], row["Data2"], null, null);
        continue;
    }

    foreach (var child in childRows)
    {
        jt.Rows.Add(row["Key"], row["Key2"], row["Data1"], row["Data2"], child["Data3"], child["Data4"]);
    }
}

jt.Rows.Dump(); // LINQPad dump
dr1.ItemArray
.Concat(result.Any() ? result.First().ItemArray: Enumerable.Empty<object>())
.ToArray());
var vLINQ = vDT1.AsEnumerable()
    .GroupJoin(
        vDT2.AsEnumerable(), 
        dr1 => new { key1 = dr1["Key"], key2 = dr1["Key2"] }, 
        dr2 => new { key1 = dr2["Key"], key2 = dr2["Key2"] },
        (dr1, result) => 
            dr1.ItemArray
            .Concat(result.Any() ? result.First().ItemArray: Enumerable.Empty<object>())
            .ToArray());
dr1.ItemArray
.Concat(result.FirstOrDefault()?.ItemArray ?? Enumerable.Empty<object>())
.ToArray());

Context

StackExchange Code Review Q#107830, answer score: 7

Revisions (0)

No revisions yet.