Решение Pure SQL с использованием оконных функций, без объединений, протестировано на Hive с примером данных:
with your_table as --use your table instead of this CTE
(
select stack(23,
1 , 0,
2 , 1,
3 , 0,
4 , 1,
5 , 1,
6 , 0,
7 , 0,
8 , 1,
9 , 1,
10, 1,
11, 0,
12, 0,
13, 1,
14, 1,
15, 1,
16, 1,
17, 0,
18, 1,
19, 1,
20, 0,
21, 0,
22, 1,
23, 0
) as ( ID ,Input_column )
) --use your table instead of this CTE
select s.id, s.input_column,
case when grp_id is null then 0
else dense_rank() over (partition by (grp_id is null) order by grp_id)
end as output_column
from
(
select s.id, s.input_column,
case when same_group_flag then --distribute same grp_id across same group
max(grp_id) over(order by id rows between unbounded preceding and current row )
end as grp_id
from
(
select s.id, s.input_column ,
case when input_column=1 and (prev_value!=1 or prev_value is null) and next_value=1 then id end as grp_id,
input_column=1 and (prev_value=1 or next_value=1) as same_group_flag
from
(
select id, input_column,
lead(input_column) over(order by id) next_value,
lag(input_column) over(order by id) prev_value
from your_table
)s
)s
)s
order by id;
Результат:
id input_column output_column
1 0 0
2 1 0
3 0 0
4 1 1
5 1 1
6 0 0
7 0 0
8 1 2
9 1 2
10 1 2
11 0 0
12 0 0
13 1 3
14 1 3
15 1 3
16 1 3
17 0 0
18 1 4
19 1 4
20 0 0
21 0 0
22 1 0
23 0 0
Time taken: 226.407 seconds, Fetched: 23 row(s)
Результат в точности соответствует ожидаемому.Также взгляните на очень похожий вопрос: https://stackoverflow.com/a/55336802/2700344