patternMinor
Group data by non-unique keys by distinct time range
Viewed 0 times
uniquedistinctgroupnonrangetimekeysdata
Problem
I have data in an Oracle table like the following:
Our table has 2,091,348 rows.
I want data like following...
PROJ_NBR STATUS START_DT
AL20 AC 1/14/2010 4:31
AL20 AC 1/14/2010 4:32
AL20 AC 1/14/2010 4:32
AL20 DE 1/14/2010 4:34
AL20 DE 1/14/2010 4:46
AL20 DE 1/14/2010 4:46
AL20 DE 1/14/2010 4:46
AL20 DE 1/14/2010 4:46
AL20 DE 1/14/2010 4:46
AL20 DE 1/14/2010 4:46
AL20 DE 1/14/2010 4:46
AL20 DE 1/14/2010 4:46
AL20 DE 1/14/2010 4:46
AL20 DE 1/14/2010 4:46
AL20 AC 3/4/2010 4:31
AL20 AC 3/4/2010 4:39
AL20 AC 3/21/2010 13:24
AL20 AC 2/4/2011 13:54 --year changes here
AL20 AC 2/4/2011 14:14
AL20 AC 2/4/2011 14:27
AL20 DE 2/11/2011 16:24Our table has 2,091,348 rows.
I want data like following...
PROJ_NBR STATUS BEGIN END
AL20 AC 1/14/2010 4:31 1/14/2010 4:32
AL20 DE 1/14/2010 4:34 1/14/2010 4:46
AL20 AC 3/4/2010 4:31 2/4/2011 14:27 --Second time range for AL20, AC
AL20 DE 2/11/2011 16:24 2/11/2011 16:24Solution
Below a complete solution for sql-server.
I was trying analytical functions, but didn't succeed. I'm tempted to repost the question to SO requiring a solution using analytical functions. I think there must be one.
And here is my working solution. I'm proud not to have used cursors.
I think besides column and table names this can easily adapted to Oracle too.
I was trying analytical functions, but didn't succeed. I'm tempted to repost the question to SO requiring a solution using analytical functions. I think there must be one.
And here is my working solution. I'm proud not to have used cursors.
set language us_english
Set Nocount ON
create table #t( PROJ_NBR char(4), STATUS char(2), START_DT datetime);
insert into #t values ('AL20', 'AC', '1/14/2010 4:31');
insert into #t values ('AL20', 'AC', '1/14/2010 4:32');
insert into #t values ('AL20', 'AC', '1/14/2010 4:32');
insert into #t values ('AL20', 'DE', '1/14/2010 4:34');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'AC', '3/4/2010 4:31');
insert into #t values ('AL20', 'AC', '3/4/2010 4:39');
insert into #t values ('AL20', 'AC', '3/21/2010 13:24');
insert into #t values ('AL20', 'AC', '2/4/2011 13:54');
insert into #t values ('AL20', 'AC', '2/4/2011 14:14');
insert into #t values ('AL20', 'AC', '2/4/2011 14:27');
insert into #t values ('AL20', 'DE', '2/11/2011 16:24');
select
distinct PROJ_NBR, STATUS, [start], [end]
from(
select
*,
(select MIN(START_DT)
from #t t1
where t.STATUS = t1.STATUS
and t.START_DT >= t1.START_DT
and not exists ( select * from #t t2
where t.STATUS <> t2.STATUS
and t.START_DT >= t2.START_DT
and t1.START_DT t2.STATUS
and t.START_DT = t2.START_DT)
) [end]
from #t t
) t1
order by [start]I think besides column and table names this can easily adapted to Oracle too.
Code Snippets
set language us_english
Set Nocount ON
create table #t( PROJ_NBR char(4), STATUS char(2), START_DT datetime);
insert into #t values ('AL20', 'AC', '1/14/2010 4:31');
insert into #t values ('AL20', 'AC', '1/14/2010 4:32');
insert into #t values ('AL20', 'AC', '1/14/2010 4:32');
insert into #t values ('AL20', 'DE', '1/14/2010 4:34');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'DE', '1/14/2010 4:46');
insert into #t values ('AL20', 'AC', '3/4/2010 4:31');
insert into #t values ('AL20', 'AC', '3/4/2010 4:39');
insert into #t values ('AL20', 'AC', '3/21/2010 13:24');
insert into #t values ('AL20', 'AC', '2/4/2011 13:54');
insert into #t values ('AL20', 'AC', '2/4/2011 14:14');
insert into #t values ('AL20', 'AC', '2/4/2011 14:27');
insert into #t values ('AL20', 'DE', '2/11/2011 16:24');
select
distinct PROJ_NBR, STATUS, [start], [end]
from(
select
*,
(select MIN(START_DT)
from #t t1
where t.STATUS = t1.STATUS
and t.START_DT >= t1.START_DT
and not exists ( select * from #t t2
where t.STATUS <> t2.STATUS
and t.START_DT >= t2.START_DT
and t1.START_DT <= t2.START_DT)
) [start],
(select Max(START_DT)
from #t t1
where t.STATUS = t1.STATUS
and t.START_DT <= t1.START_DT
and not exists ( select * from #t t2
where t.STATUS <> t2.STATUS
and t.START_DT <= t2.START_DT
and t1.START_DT >= t2.START_DT)
) [end]
from #t t
) t1
order by [start]Context
StackExchange Database Administrators Q#1334, answer score: 4
Revisions (0)
No revisions yet.