Мне нужна помощь в настройке моего кода нескольких окон.Когда я использую только одно окно, выполнение заканчивается за считанные секунды, но когда я добавляю больше окон, код запускается часами.
Я пытался сгруппировать функции для каждого таймфрейма в уникальный UDF, который получает всестолбцы и считать / суммировать все в одном окне.Но Spark 2.3 не разрешает UDAF для Windows.
SELECT *
, sum( amount ) OVER (PARTITION BY id ORDER BY dtime asc RANGE BETWEEN 300000 PRECEDING AND CURRENT ROW ) sum_all_5m
, sum( amount ) OVER (PARTITION BY id ORDER BY dtime asc RANGE BETWEEN 900000 PRECEDING AND CURRENT ROW ) sum_all_15m
, sum( amount ) OVER (PARTITION BY id ORDER BY dtime asc RANGE BETWEEN 3600000 PRECEDING AND CURRENT ROW ) sum_all_60m
, sum( amount ) OVER (PARTITION BY id ORDER BY dtime asc RANGE BETWEEN 7200000 PRECEDING AND CURRENT ROW ) sum_all_120m
, count( id ) OVER (PARTITION BY id ORDER BY dtime asc RANGE BETWEEN 300000 PRECEDING AND CURRENT ROW ) count_all_5m
, count( id ) OVER (PARTITION BY id ORDER BY dtime asc RANGE BETWEEN 900000 PRECEDING AND CURRENT ROW ) count_all_15m
, count( id ) OVER (PARTITION BY id ORDER BY dtime asc RANGE BETWEEN 3600000 PRECEDING AND CURRENT ROW ) count_all_60m
, count( id ) OVER (PARTITION BY id ORDER BY dtime asc RANGE BETWEEN 7200000 PRECEDING AND CURRENT ROW ) count_all_120m
, sum( case when type='1' then 1 else 0 end ) OVER (PARTITION BY id ORDER BY dtime asc RANGE BETWEEN 300000 PRECEDING AND CURRENT ROW ) count_1_5m
, sum( case when type='1' then 1 else 0 end ) OVER (PARTITION BY id ORDER BY dtime asc RANGE BETWEEN 900000 PRECEDING AND CURRENT ROW ) count_1_15m
, sum( case when type='1' then 1 else 0 end ) OVER (PARTITION BY id ORDER BY dtime asc RANGE BETWEEN 3600000 PRECEDING AND CURRENT ROW ) count_1_60m
, sum( case when type='1' then 1 else 0 end ) OVER (PARTITION BY id ORDER BY dtime asc RANGE BETWEEN 7200000 PRECEDING AND CURRENT ROW ) count_1_120m
, sum( case when type='2' then 1 else 0 end ) OVER (PARTITION BY id ORDER BY dtime asc RANGE BETWEEN 300000 PRECEDING AND CURRENT ROW ) count_2_5m
, sum( case when type='2' then 1 else 0 end ) OVER (PARTITION BY id ORDER BY dtime asc RANGE BETWEEN 900000 PRECEDING AND CURRENT ROW ) count_2_15m
, sum( case when type='2' then 1 else 0 end ) OVER (PARTITION BY id ORDER BY dtime asc RANGE BETWEEN 3600000 PRECEDING AND CURRENT ROW ) count_2_60m
, sum( case when type='2' then 1 else 0 end ) OVER (PARTITION BY id ORDER BY dtime asc RANGE BETWEEN 7200000 PRECEDING AND CURRENT ROW ) count_2_120m
, sum( case when type='2' then amount else 0 end ) OVER (PARTITION BY id ORDER BY dtime asc RANGE BETWEEN 300000 PRECEDING AND CURRENT ROW ) sum_2_5m
, sum( case when type='2' then amount else 0 end ) OVER (PARTITION BY id ORDER BY dtime asc RANGE BETWEEN 900000 PRECEDING AND CURRENT ROW ) sum_2_15m
, sum( case when type='2' then amount else 0 end ) OVER (PARTITION BY id ORDER BY dtime asc RANGE BETWEEN 3600000 PRECEDING AND CURRENT ROW ) sum_2_60m
, sum( case when type='2' then amount else 0 end ) OVER (PARTITION BY id ORDER BY dtime asc RANGE BETWEEN 7200000 PRECEDING AND CURRENT ROW ) sum_2_120m
...
FROM tableX