Новый диапазон начинается, когда (year - prev_year) > 1 or (prev_year is NULL)
, вы можете принять текущий год в качестве первого года для нового диапазона. Назначьте first_year всем строкам, затем вычислите last_year для каждой группы (group_no, first_year)
.
with my_data as(
select stack(14,
1111, 2003,
1111, 2004,
1111, 2005,
1111, 2008,
1111, 2010,
1111, 2011,
1111, 2012,
2222, 2015,
3333, 2014,
3333, 2015,
3333, 2017,
3333, 2019,
4444, 2010,
4444, 2012
) as (group_no, year)
)
select group_no, array_sort(collect_list(case when first_year=last_year then first_year else concat(first_year,'-',last_year) end)) as year
from
(--calculate last_year
select s.group_no, s.first_year, max(year) last_year
from
(
select group_no, year,
--New range starts when (year - prev_year) > 1 or (prev_year is NULL)
--Calculate first_year for every row
max(case when (year - prev_year) = 1 then NULL else year end) over(partition by group_no order by year rows between unbounded preceding and current row ) first_year
from
(
select d.*,
lag(year) over(partition by group_no order by year) prev_year
from my_data d
)s
)s
group by s.group_no, s.first_year
)s
group by group_no
order by group_no
Результат:
group_no year
1111 ["2003-2005","2008","2010-2012"]
2222 ["2015"]
3333 ["2014-2015","2017","2019"]
4444 ["2010","2012"]