Используйте функции analyti c для получения START_ID и LAST_ID, затем агрегируйте:
with PUBLISH as ( --Use your_table instead of this CTE
select stack(6,
'20200101','14','A','2020-01-01 14:18:53.016 GMT','ID_111',
'20200101','14','A','2020-01-01 14:18:53.012 GMT','ID_222',
'20200101','14','A','2020-01-01 14:18:53.016 GMT','ID_111',
'20200101','14','A','2020-01-01 14:18:53.019 GMT','ID_333',
'20200101','15','C','2020-01-01 15:18:53.016 GMT','ID_444',
'20200102','00','A','2020-01-01 15:18:53.016 GMT','ID_444'
) as (DT, HOUR, SOURCE, COL_TIMESTAMP, ID)
)
select DT, HOUR, SOURCE,
min(COL_TIMESTAMP) as MIN_TIMESTAMP,
START_ID,
max(COL_TIMESTAMP) as MAX_TIMESTAMP,
END_ID,
sum(case when rn=1 then 1 else 0 end) as RECORD_CNT --unique records have rn=1
from
(
select DT, HOUR, SOURCE, COL_TIMESTAMP, ID,
first_value(ID) over(partition by DT, HOUR, SOURCE order by COL_TIMESTAMP) as START_ID,
first_value(ID) over(partition by DT, HOUR, SOURCE order by COL_TIMESTAMP desc) as END_ID,
row_number() over(partition by DT, HOUR, SOURCE, COL_TIMESTAMP, ID) as rn
from PUBLISH p
) s
group by DT, HOUR, SOURCE, START_ID, END_ID;
Результат:
dt hour source min_timestamp start_id max_timestamp end_id record_cnt
20200101 14 A 2020-01-01 14:18:53.012 GMT ID_222 2020-01-01 14:18:53.019 GMT ID_333 3
20200101 15 C 2020-01-01 15:18:53.016 GMT ID_444 2020-01-01 15:18:53.016 GMT ID_444 1
20200102 00 A 2020-01-01 15:18:53.016 GMT ID_444 2020-01-01 15:18:53.016 GMT ID_444 1