HiveBrain v1.2.0
Get Started
← Back to all entries
patternMinor

Group data by non-unique keys by distinct time range

Submitted by: @import:stackexchange-dba··
0
Viewed 0 times
uniquedistinctgroupnonrangetimekeysdata

Problem

I have data in an Oracle table like the following:

PROJ_NBR     STATUS    START_DT        
AL20         AC        1/14/2010 4:31        
AL20         AC        1/14/2010 4:32        
AL20         AC        1/14/2010 4:32        
AL20         DE        1/14/2010 4:34        
AL20         DE        1/14/2010 4:46        
AL20         DE        1/14/2010 4:46        
AL20         DE        1/14/2010 4:46        
AL20         DE        1/14/2010 4:46        
AL20         DE        1/14/2010 4:46        
AL20         DE        1/14/2010 4:46        
AL20         DE        1/14/2010 4:46        
AL20         DE        1/14/2010 4:46        
AL20         DE        1/14/2010 4:46        
AL20         DE        1/14/2010 4:46        
AL20         AC        3/4/2010 4:31        
AL20         AC        3/4/2010 4:39        
AL20         AC        3/21/2010 13:24        
AL20         AC        2/4/2011 13:54        --year changes here
AL20         AC        2/4/2011 14:14        
AL20         AC        2/4/2011 14:27        
AL20         DE        2/11/2011 16:24


Our table has 2,091,348 rows.

I want data like following...

PROJ_NBR     STATUS    BEGIN                END
AL20         AC        1/14/2010 4:31       1/14/2010 4:32
AL20         DE        1/14/2010 4:34       1/14/2010 4:46
AL20         AC        3/4/2010 4:31        2/4/2011 14:27  --Second time range for AL20, AC
AL20         DE        2/11/2011 16:24      2/11/2011 16:24

Solution

Below a complete solution for sql-server.

I was trying analytical functions, but didn't succeed. I'm tempted to repost the question to SO requiring a solution using analytical functions. I think there must be one.

And here is my working solution. I'm proud not to have used cursors.

set language us_english
Set Nocount ON

create table #t( PROJ_NBR char(4),  STATUS char(2), START_DT datetime);

insert into #t values ('AL20',     'AC', '1/14/2010 4:31');  
insert into #t values ('AL20',     'AC', '1/14/2010 4:32');  
insert into #t values ('AL20',     'AC', '1/14/2010 4:32');  
insert into #t values ('AL20',     'DE', '1/14/2010 4:34');  
insert into #t values ('AL20',     'DE', '1/14/2010 4:46');  
insert into #t values ('AL20',     'DE', '1/14/2010 4:46');  
insert into #t values ('AL20',     'DE', '1/14/2010 4:46');  
insert into #t values ('AL20',     'DE', '1/14/2010 4:46');  
insert into #t values ('AL20',     'DE', '1/14/2010 4:46');  
insert into #t values ('AL20',     'DE', '1/14/2010 4:46');  
insert into #t values ('AL20',     'DE', '1/14/2010 4:46');  
insert into #t values ('AL20',     'DE', '1/14/2010 4:46');  
insert into #t values ('AL20',     'DE', '1/14/2010 4:46');  
insert into #t values ('AL20',     'DE', '1/14/2010 4:46');  
insert into #t values ('AL20',     'AC', '3/4/2010 4:31');   
insert into #t values ('AL20',     'AC', '3/4/2010 4:39');   
insert into #t values ('AL20',     'AC', '3/21/2010 13:24'); 
insert into #t values ('AL20',     'AC', '2/4/2011 13:54');  
insert into #t values ('AL20',     'AC', '2/4/2011 14:14');  
insert into #t values ('AL20',     'AC', '2/4/2011 14:27');  
insert into #t values ('AL20',     'DE', '2/11/2011 16:24'); 

select
    distinct PROJ_NBR, STATUS, [start], [end]
from(
select 
*,
(select MIN(START_DT) 
    from #t t1 
    where t.STATUS = t1.STATUS 
    and t.START_DT >= t1.START_DT 
    and not exists ( select * from #t t2 
                    where t.STATUS <> t2.STATUS  
                    and  t.START_DT >= t2.START_DT
                    and t1.START_DT  t2.STATUS  
                    and  t.START_DT = t2.START_DT)
) [end]

from #t t
) t1
order by [start]


I think besides column and table names this can easily adapted to Oracle too.

Code Snippets

set language us_english
Set Nocount ON

create table #t( PROJ_NBR char(4),  STATUS char(2), START_DT datetime);

insert into #t values ('AL20',     'AC', '1/14/2010 4:31');  
insert into #t values ('AL20',     'AC', '1/14/2010 4:32');  
insert into #t values ('AL20',     'AC', '1/14/2010 4:32');  
insert into #t values ('AL20',     'DE', '1/14/2010 4:34');  
insert into #t values ('AL20',     'DE', '1/14/2010 4:46');  
insert into #t values ('AL20',     'DE', '1/14/2010 4:46');  
insert into #t values ('AL20',     'DE', '1/14/2010 4:46');  
insert into #t values ('AL20',     'DE', '1/14/2010 4:46');  
insert into #t values ('AL20',     'DE', '1/14/2010 4:46');  
insert into #t values ('AL20',     'DE', '1/14/2010 4:46');  
insert into #t values ('AL20',     'DE', '1/14/2010 4:46');  
insert into #t values ('AL20',     'DE', '1/14/2010 4:46');  
insert into #t values ('AL20',     'DE', '1/14/2010 4:46');  
insert into #t values ('AL20',     'DE', '1/14/2010 4:46');  
insert into #t values ('AL20',     'AC', '3/4/2010 4:31');   
insert into #t values ('AL20',     'AC', '3/4/2010 4:39');   
insert into #t values ('AL20',     'AC', '3/21/2010 13:24'); 
insert into #t values ('AL20',     'AC', '2/4/2011 13:54');  
insert into #t values ('AL20',     'AC', '2/4/2011 14:14');  
insert into #t values ('AL20',     'AC', '2/4/2011 14:27');  
insert into #t values ('AL20',     'DE', '2/11/2011 16:24'); 


select
    distinct PROJ_NBR, STATUS, [start], [end]
from(
select 
*,
(select MIN(START_DT) 
    from #t t1 
    where t.STATUS = t1.STATUS 
    and t.START_DT >= t1.START_DT 
    and not exists ( select * from #t t2 
                    where t.STATUS <> t2.STATUS  
                    and  t.START_DT >= t2.START_DT
                    and t1.START_DT <= t2.START_DT)
) [start],
(select Max(START_DT) 
    from #t t1 
    where t.STATUS = t1.STATUS 
    and t.START_DT <= t1.START_DT 
    and not exists ( select * from #t t2 
                    where t.STATUS <> t2.STATUS  
                    and  t.START_DT <= t2.START_DT
                    and t1.START_DT >= t2.START_DT)
) [end]

from #t t
) t1
order by [start]

Context

StackExchange Database Administrators Q#1334, answer score: 4

Revisions (0)

No revisions yet.