Ниже для BigQuery Standard SQL
#standardSQL
SELECT event, TIMESTAMP_DIFF(b_time, time, SECOND) duration, b_event
FROM (
SELECT event, time,
LEAD(time) OVER(PARTITION BY grp ORDER BY time) b_time,
LEAD(event) OVER(PARTITION BY grp ORDER BY time) b_event
FROM (
SELECT *,
COUNTIF(STARTS_WITH(event, 'A')) OVER(ORDER BY time) grp
FROM `project.dataset.your_table` t
)
)
WHERE STARTS_WITH(event, 'A')
-- ORDER BY time
Вы можете протестировать / поиграть с ним, используя фиктивные данные из вашего вопроса, как показано ниже
#standardSQL
WITH `project.dataset.your_table` AS (
SELECT 'A1' event, TIMESTAMP '2018-01-01 1:00:00' time UNION ALL
SELECT 'B5', TIMESTAMP '2018-01-01 2:00:00' UNION ALL
SELECT 'A3', TIMESTAMP '2018-01-01 3:00:00' UNION ALL
SELECT 'B9', TIMESTAMP '2018-01-01 5:00:00'
)
SELECT event, TIMESTAMP_DIFF(b_time, time, SECOND) duration, b_event
FROM (
SELECT event, time,
LEAD(time) OVER(PARTITION BY grp ORDER BY time) b_time,
LEAD(event) OVER(PARTITION BY grp ORDER BY time) b_event
FROM (
SELECT *,
COUNTIF(STARTS_WITH(event, 'A')) OVER(ORDER BY time) grp
FROM `project.dataset.your_table` t
)
)
WHERE STARTS_WITH(event, 'A')
ORDER BY time
с результатом как
Row event duration b_event
1 A1 3600 B5
2 A3 7200 B9
Обратите внимание: приведенное выше решение опирается на утверждение в вашем вопросе - B will always happen after A
, поэтому, если у вас есть последовательность, как показано ниже
WITH `project.dataset.your_table` AS (
SELECT 'A1' event, TIMESTAMP '2018-01-01 1:00:00' time UNION ALL
SELECT 'A2', TIMESTAMP '2018-01-01 1:30:00' UNION ALL
SELECT 'B5', TIMESTAMP '2018-01-01 2:00:00' UNION ALL
SELECT 'A3', TIMESTAMP '2018-01-01 3:00:00' UNION ALL
SELECT 'B9', TIMESTAMP '2018-01-01 5:00:00'
)
результат будет
Row event duration b_event
1 A1 null null
2 A2 1800 B5
3 A3 7200 B9
Если вам нужно решить эту проблему - попробуйте ниже
#standardSQL
WITH `project.dataset.your_table` AS (
SELECT 'A1' event, TIMESTAMP '2018-01-01 1:00:00' time UNION ALL
SELECT 'A2', TIMESTAMP '2018-01-01 1:30:00' UNION ALL
SELECT 'B5', TIMESTAMP '2018-01-01 2:00:00' UNION ALL
SELECT 'A3', TIMESTAMP '2018-01-01 3:00:00' UNION ALL
SELECT 'B9', TIMESTAMP '2018-01-01 5:00:00'
)
SELECT event, TIMESTAMP_DIFF(b_time, time, SECOND) duration, b_event
FROM (
SELECT event, time, type, grp,
FIRST_VALUE(event) OVER(ORDER BY grp RANGE BETWEEN 1 FOLLOWING AND 1 FOLLOWING) b_event,
FIRST_VALUE(time) OVER(ORDER BY grp RANGE BETWEEN 1 FOLLOWING AND 1 FOLLOWING) b_time
FROM (
SELECT event, time, SUBSTR(event, 1, 1) type,
COUNTIF(STARTS_WITH(event, 'B')) OVER(ORDER BY time) grp
FROM `project.dataset.your_table` t
)
)
WHERE STARTS_WITH(event, 'A')
ORDER BY time
эта версия вернет
Row event duration b_event
1 A1 3600 B5
2 A2 1800 B5
3 A3 7200 B9