patternsqlMinor
Collect all Similar Persons to One Group
Viewed 0 times
collectgroupallpersonsonesimilar
Problem
I have a person with several Id's.
Some of them in Column Id1 and Some of them in Id2.
I want to collect all the same persons Id's to one group.
If id1=10, is in the same row with id2=20. so it's mean that person with id1=10 he is the same person like id2=20.
The Input and Output example:
Input
Output
Some of them in Column Id1 and Some of them in Id2.
I want to collect all the same persons Id's to one group.
If id1=10, is in the same row with id2=20. so it's mean that person with id1=10 he is the same person like id2=20.
The Input and Output example:
Input
Id1 Id2
--- ---
10 20
10 30
30 30
10 40
50 70
60 50
70 70Output
NewId OldId
----- -----
1 10
1 20
1 30
1 40
2 50
2 60
2 70Solution
It will be better to make such calculations in iterative manner.
The "issue" of such solution is that, in case of large source table, the output table will contains all the rows. But, actually, this is the benefit also: in case of large source table, such code will work much more faster, that recursions.
In some details, the "logic" of the script is the following:
-
set new ID (chainid) initial value (ex. 1)
-
get the first entry which was not found yet (external cycle)
-
increment the new ID
-
find next entry which is not in the result table
-
continue till have data to work
-
return sorted result
The "issue" of such solution is that, in case of large source table, the output table will contains all the rows. But, actually, this is the benefit also: in case of large source table, such code will work much more faster, that recursions.
if object_id('tempdb..#src') is not null drop table #src;
create table #src (id1 int not null, id2 int)
if object_id('tempdb..#rez') is not null drop table #rez;
create table #rez (id int not null, chainid int not null);
declare @chainId int;
insert #src
values (10, 20), (10, 30), (30, 30), (10, 40), (50, 70), (60, 50), (70, 70)
--values (4457745,255714),(4457745,2540222),(2540222,4457745),(255714,4457745)
set @chainId = 1
while 1 = 1
begin
insert #rez(id, chainid)
select top 1 id1, @chainId
from #src
where
Id1 NOT IN (select Id from #rez)
and id2 NOT IN (select Id from #rez)
if @@rowcount = 0 break
while 1 = 1
begin
insert #rez(id, chainid)
select id1, @chainid
from #src
where
id2 in (select id from #rez where chainid = @chainId)
and id1 not in (select id from #rez where chainid = @chainId)
union
select id2, @chainId
from #src
where
id1 in (select id from #rez where chainid = @chainId)
and id2 not in (select id from #rez where chainid = @chainId)
if @@rowcount = 0 break
end
select @chainId = @chainId + 1
end
select [NewId] = chainid, OldID = id
from #rez
order by 1, 2;In some details, the "logic" of the script is the following:
-
set new ID (chainid) initial value (ex. 1)
-
get the first entry which was not found yet (external cycle)
- find and store all related to it (in both columns) (internal cycle)
-
increment the new ID
-
find next entry which is not in the result table
-
continue till have data to work
-
return sorted result
Code Snippets
if object_id('tempdb..#src') is not null drop table #src;
create table #src (id1 int not null, id2 int)
if object_id('tempdb..#rez') is not null drop table #rez;
create table #rez (id int not null, chainid int not null);
declare @chainId int;
insert #src
values (10, 20), (10, 30), (30, 30), (10, 40), (50, 70), (60, 50), (70, 70)
--values (4457745,255714),(4457745,2540222),(2540222,4457745),(255714,4457745)
set @chainId = 1
while 1 = 1
begin
insert #rez(id, chainid)
select top 1 id1, @chainId
from #src
where
Id1 NOT IN (select Id from #rez)
and id2 NOT IN (select Id from #rez)
if @@rowcount = 0 break
while 1 = 1
begin
insert #rez(id, chainid)
select id1, @chainid
from #src
where
id2 in (select id from #rez where chainid = @chainId)
and id1 not in (select id from #rez where chainid = @chainId)
union
select id2, @chainId
from #src
where
id1 in (select id from #rez where chainid = @chainId)
and id2 not in (select id from #rez where chainid = @chainId)
if @@rowcount = 0 break
end
select @chainId = @chainId + 1
end
select [NewId] = chainid, OldID = id
from #rez
order by 1, 2;Context
StackExchange Database Administrators Q#156292, answer score: 3
Revisions (0)
No revisions yet.