Следующий пример собирает острова из ваших данных.Изменяя, какие из окончательных операторов select
включены / прокомментированы, вы можете видеть промежуточные результаты в процессе.
Обновление : изменено сравнение дат в CTE, чтобы они могли принести пользуиз индексов SSN, FromDate
и SSN, ToDate
.
-- Sample data.
declare @Samples table ( SSN VarChar(10), FromDate Date, ToDate Date );
insert into @Samples ( SSN, FromDate, ToDate ) values
( '0987654321', '2011-01-01', '2011-12-31' ),
( '0987654321', '2012-01-01', '2012-12-31' ),
( '1234567890', '2012-01-01', '2012-12-31' ),
( '0987654321', '2013-01-01', '2013-12-31' ),
( '1234567890', '2013-01-01', '2013-06-30' ),
( '0987654321', '2014-01-01', '2014-08-31' ),
( '1234567890', '2016-01-01', '2016-12-31' ),
( '1234567890', '2017-01-01', '2017-12-31' ),
( '1234567890', '2018-01-01', null );
select *
from @Samples;
-- Sample data made a little easier to read.
select *,
case when exists (
select 42 from @Samples as SI where SI.SSN = S.SSN and
DateDiff( day, S.ToDate, SI.FromDate ) = 1 ) then 1 else 0 end as Continued
from @Samples as S
order by SSN, FromDate;
-- Process the data.
with
FromDates as (
-- All of the FromDates for each SSN for which there is not
-- a contiguous preceding period.
select SO.SSN, SO.FromDate, SO.ToDate,
Row_Number() over ( partition by SO.SSN order by SO.FromDate ) as RN
from @Samples as SO
where not exists (
select 42 from @Samples as SI where SI.SSN = SO.SSN and
SI.ToDate = DateAdd( day, -1, SO.FromDate ) ) ),
ToDates as (
-- All of the ToDates for each SSN for which there is not
-- a contiguous following period.
select SSN, FromDate, ToDate, Row_Number() over ( partition by SSN order by FromDate ) as RN
from @Samples as SO
where not exists (
select 42 from @Samples as SI where SI.SSN = SO.SSN and
SI.FromDate = DateAdd( day, 1, SO.ToDate ) ) ),
Ranges as (
-- Pair the FromDate and ToDate entries for each SSN .
select F.SSN, F.FromDate, T.ToDate
from FromDates as F inner join
ToDates as T on T.SSN = F.SSN and T.RN = F.RN )
-- Use any ONE of the following select statements to see what is going on:
-- select * from FromDates order by SSN, FromDate;
-- select * from ToDates order by SSN, FromDate;
select * from Ranges order by SSN, FromDate;
Конечно, если бы в пределах SSN
s были фактически отдельные значения Id
, которые должны были обрабатываться независимо, ответ изменился бы на что-токак это:
-- Sample data.
declare @Samples as Table ( SSN VarChar(10), Id VarChar(4), FromDate Date, ToDate Date );
insert into @Samples ( SSN, ID, FromDate, ToDate ) values
( '6612140000', '1000', '2005-01-01', '2005-03-31' ),
( '6612140000', '1000', '2005-04-01', '2005-09-30' ),
( '6612140000', '1000', '2005-10-01', '2006-03-31' ),
( '6612140000', '2000', '2005-10-01', '2006-04-30' ),
( '6612140000', '1000', '2006-04-01', '2007-03-31' ),
( '6612140000', '1000', '2007-04-01', '2008-03-31' ),
( '6612140000', '1000', '2008-04-01', '2009-03-31' ),
( '6612140000', '1000', '2009-04-01', '2010-03-31' ),
( '6612140000', '1000', '2010-04-01', '2010-11-30' ),
( '6612140000', '1000', '2010-12-01', '2011-03-31' ),
( '6612140000', '1000', '2011-04-01', '2011-08-21' ),
( '6612140000', '1000', '2011-08-22', '2011-11-13' ),
( '6612140000', '1000', '2011-11-14', '2011-11-30' ),
( '6612140000', '1000', '2011-12-01', '2012-01-31' ),
( '6612140000', '1000', '2016-07-01', '2017-03-31' ),
( '6612140000', '1000', '2017-04-01', '2017-11-30' ),
( '6612140000', '1000', '2017-12-01', '2018-03-31' ),
( '6612140000', '1000', '2018-04-01', null ),
( '7605140000', '1000', '2013-11-01', '2013-11-30' ),
( '7605140000', '1000', '2013-12-01', '2013-12-31' ),
( '7605140000', '1000', '2014-01-01', '2014-03-31' ),
( '7605140000', '1000', '2014-03-01', '2014-12-31' ),
( '7605140000', '1000', '2014-04-01', '2014-12-31' ),
( '7605140000', '1000', '2015-05-01', '2015-05-31' ),
-- ( '7605140000', '1000', '2015-05-01', '2015-05-31' ), -- Duplicate row?!
( '7605140000', '1000', '2015-06-01', '2015-09-30' ),
-- ( '7605140000', '1000', '2015-06-01', '2015-09-30' ), -- Duplicate row?!
( '7605140000', '1000', '2015-10-01', '2015-10-31' ),
-- ( '7605140000', '1000', '2015-10-01', '2015-10-31' ), -- Duplicate row?!
( '7605140000', '1000', '2016-01-25', '2016-07-24' ),
( '7605140000', '1000', '2016-07-25', '2016-08-31' ),
( '7605140000', '1000', '2016-09-01', '2017-03-31' ),
( '7605140000', '1000', '2017-04-01', '2017-11-30' ),
( '7605140000', '1000', '2017-12-01', null );
select *
from @Samples;
-- Sample data made a little easier to read.
select *,
case when exists (
select 42 from @Samples as SI where SI.SSN = S.SSN and SI.Id = S.Id and
DateDiff( day, S.ToDate, SI.FromDate ) = 1 ) then 1 else 0 end as Continued
from @Samples as S
order by SSN, Id, FromDate;
-- Process the data.
with
FromDates as (
-- All of the FromDates for each SSN for which there is not
-- a contiguous preceding period.
select SO.SSN, SO.Id, SO.FromDate, SO.ToDate,
Row_Number() over ( partition by SO.SSN, SO.Id order by SO.FromDate ) as RN
from @Samples as SO
where not exists (
select 42 from @Samples as SI where SI.SSN = SO.SSN and SI.Id = SO.Id and
SI.ToDate = DateAdd( day, -1, SO.FromDate ) ) ),
ToDates as (
-- All of the ToDates for each SSN for which there is not
-- a contiguous following period.
select SO.SSN, SO.Id, SO.FromDate, SO.ToDate,
Row_Number() over ( partition by SSN, SO.Id order by FromDate ) as RN
from @Samples as SO
where not exists (
select 42 from @Samples as SI where SI.SSN = SO.SSN and SI.Id = SO.Id and
SI.FromDate = DateAdd( day, 1, SO.ToDate ) ) ),
Ranges as (
-- Pair the FromDate and ToDate entries for each SSN .
select F.SSN, F.Id, F.FromDate, T.ToDate
from FromDates as F inner join
ToDates as T on T.SSN = F.SSN and T.Id = F.Id and T.RN = F.RN )
-- Use any ONE of the following select statements to see what is going on:
-- select * from FromDates order by SSN, Id, FromDate;
-- select * from ToDates order by SSN, Id, FromDate;
select * from Ranges order by SSN, Id, FromDate;