BigQuery - объединить фрагментированные события - PullRequest
1 голос
/ 26 февраля 2020

Вот пример данных:

create table activity as
select "2020-02-25T09:06:12" as datetime_start,  "2020-02-25T09:07:31" as datetime_end, 0 as flag uniuon all 
select "2020-02-25T09:16:08" as datetime_start,  "2020-02-25T09:17:31" as datetime_end, 0 as flag uniuon all 
select "2020-02-25T09:17:31" as datetime_start,  "2020-02-25T09:27:31" as datetime_end, 1 as flag uniuon all 
select "2020-02-25T09:27:31" as datetime_start,  "2020-02-25T09:32:41" as datetime_end, 1 as flag uniuon all 
select "2020-02-25T09:35:57" as datetime_start,  "2020-02-25T09:37:31" as datetime_end, 0 as flag uniuon all 
select "2020-02-25T09:49:23" as datetime_start,  "2020-02-25T09:51:16" as datetime_end, 0 as flag uniuon all 
select "2020-02-25T09:51:16" as datetime_start,  "2020-02-25T10:03:46" as datetime_end, 1 as flag uniuon all 
select "2020-02-25T10:03:46" as datetime_start,  "2020-02-25T10:05:57" as datetime_end, 1 as flag uniuon all 
select "2020-02-25T10:05:57" as datetime_start,  "2020-02-25T10:07:31" as datetime_end, 1 as flag uniuon all 
select "2020-02-25T10:07:31" as datetime_start,  "2020-02-25T10:10:22" as datetime_end, 1 as flag uniuon all 
select "2020-02-25T10:10:22" as datetime_start,  "2020-02-25T10:12:55" as datetime_end, 1 as flag uniuon all 
select "2020-02-25T10:12:55" as datetime_start,  "2020-02-25T10:20:17" as datetime_end, 1 as flag uniuon all 
select "2020-02-25T10:20:17" as datetime_start,  "2020-02-25T10:27:40" as datetime_end, 1 as flag uniuon all 
select "2020-02-25T10:27:40" as datetime_start,  "2020-02-25T10:39:51" as datetime_end, 1 as flag;

Я ищу запрос, который вычислит блок активности на основе столбца флага.
Если для флага установлено значение 1, то строки после пока флаг не изменится на 0, его необходимо объединить в один блок действий.

В приведенном выше примере получено 6 блоков действий.

  1. 2020-02-25T09: 06: 12 - 2020-02-25T09: 07: 31
  2. 2020-02-25T09: 16: 08 - 2020-02-25T09: 17: 31
  3. 2020-02-25T09: 17: 31 - 2020-02-25T09: 32: 41
  4. 2020-02-25T09: 35: 57 - 2020-02-25T09: 37: 31
  5. 2020-02-25T09: 49: 23 - 2020-02-25T09: 51: 16
  6. 2020-02-25T09: 51: 16 - 2020-02-25T10: 39: 51

Ответы [ 2 ]

2 голосов
/ 26 февраля 2020

Это отвечает на оригинальную версию вопроса.

Ответ GMB может работать, но, похоже, он сделан на заказ, потому что он жестко кодирует значение флага. Я предпочитаю более общий подход:

with activity as (
    select "2020-02-25T09:06:12" as datetime_start,  "2020-02-25T09:07:31" as datetime_end, 0 as flag union all 
    select "2020-02-25T09:16:08" as datetime_start,  "2020-02-25T09:17:31" as datetime_end, 0 as flag union all 
    select "2020-02-25T09:17:31" as datetime_start,  "2020-02-25T09:27:31" as datetime_end, 1 as flag union all 
    select "2020-02-25T09:27:31" as datetime_start,  "2020-02-25T09:32:41" as datetime_end, 1 as flag union all 
    select "2020-02-25T09:35:57" as datetime_start,  "2020-02-25T09:37:31" as datetime_end, 0 as flag union all 
    select "2020-02-25T09:49:23" as datetime_start,  "2020-02-25T09:51:16" as datetime_end, 0 as flag union all 
    select "2020-02-25T09:51:16" as datetime_start,  "2020-02-25T10:03:46" as datetime_end, 1 as flag union all 
    select "2020-02-25T10:03:46" as datetime_start,  "2020-02-25T10:05:57" as datetime_end, 1 as flag union all 
    select "2020-02-25T10:05:57" as datetime_start,  "2020-02-25T10:07:31" as datetime_end, 1 as flag union all 
    select "2020-02-25T10:07:31" as datetime_start,  "2020-02-25T10:10:22" as datetime_end, 1 as flag union all 
    select "2020-02-25T10:10:22" as datetime_start,  "2020-02-25T10:12:55" as datetime_end, 1 as flag union all 
    select "2020-02-25T10:12:55" as datetime_start,  "2020-02-25T10:20:17" as datetime_end, 1 as flag union all 
    select "2020-02-25T10:20:17" as datetime_start,  "2020-02-25T10:27:40" as datetime_end, 1 as flag union all 
    select "2020-02-25T10:27:40" as datetime_start,  "2020-02-25T10:39:51" as datetime_end, 1 as flag
    )
select min(datetime_start) as datetime_stat,
       max(datetime_end) as datetime_end,
       flag
from (select a.*,
             countif( datetime_start <> prev_datetime_end OR
                      prev_flag <> flag
                    ) over (order by datetime_start) as grp
       from (select a.*,
                    lag(flag) over (order by datetime_start) as prev_flag,
                    lag(datetime_end) over (order by datetime_start) as prev_datetime_end
             from activity a
            ) a
) t
group by flag, grp
2 голосов
/ 26 февраля 2020

Это варианты пробелов и островов. Вот подход, который использует lag() и сумму окна для определения групп последовательных 1 s:

select
    min(datetime_start) datetime_stat,
    max(datetime_end) datetime_end,
    flag
from (
    select
        t.*,
        sum(case when flag = 1 and lag_flag = 1 then 0 else 1 end) 
            over(order by datetime_start) grp
    from (
        select 
            t.*,
            lag(flag) over(order by datetime_start) lag_flag
        from mytable t
    ) t
) t
group by flag, grp
...