Ниже для BigQuery Standard SQL
#standardSQL
SELECT customer_id, device_id, day, SUM(batch_count) total,
SUM(batch_count) - COUNTIF(batch_count = 1) consecutive,
COUNTIF(batch_count = 1) non_consecutive,
ARRAY_AGG(STRUCT(batch AS batch, batch_count AS batch_count, first_recording AS first_recording, last_recording AS last_recording)) details
FROM (
SELECT customer_id, device_id, day, batch,
COUNT(1) batch_count,
MIN(ts) first_recording,
MAX(ts) last_recording
FROM (
SELECT customer_id, device_id, ts, day,
COUNTIF(gap) OVER(PARTITION BY customer_id, device_id, day ORDER BY ts) batch
FROM (
SELECT customer_id, device_id, ts, DATE(ts) day,
IFNULL(TIMESTAMP_DIFF(ts, LAG(ts) OVER(PARTITION BY customer_id, device_id, DATE(ts) ORDER BY ts), HOUR), 777) > 1 gap
FROM `project.dataset.malfunctions`
)
)
GROUP BY customer_id, device_id, day, batch
)
GROUP BY customer_id, device_id, day
Вы можете проверить, поиграть с выше, используя фиктивные данные, как в примере ниже
#standardSQL
WITH `project.dataset.malfunctions` AS (
SELECT 1 customer_id, 1 device_id, TIMESTAMP '2019-02-12T01:00:00' ts UNION ALL
SELECT 1, 1, '2019-02-12T02:00:00' UNION ALL
SELECT 1, 1, '2019-02-12T03:00:00' UNION ALL
SELECT 1, 1, '2019-02-12T04:00:00' UNION ALL
SELECT 1, 1, '2019-02-12T09:00:00' UNION ALL
SELECT 1, 1, '2019-02-12T10:00:00' UNION ALL
SELECT 1, 1, '2019-02-13T03:00:00' UNION ALL
SELECT 2, 2, '2019-02-12T01:00:00'
)
SELECT customer_id, device_id, day, SUM(batch_count) total,
SUM(batch_count) - COUNTIF(batch_count = 1) consecutive,
COUNTIF(batch_count = 1) non_consecutive,
ARRAY_AGG(STRUCT(batch AS batch, batch_count AS batch_count, first_recording AS first_recording, last_recording AS last_recording)) details
FROM (
SELECT customer_id, device_id, day, batch,
COUNT(1) batch_count,
MIN(ts) first_recording,
MAX(ts) last_recording
FROM (
SELECT customer_id, device_id, ts, day,
COUNTIF(gap) OVER(PARTITION BY customer_id, device_id, day ORDER BY ts) batch
FROM (
SELECT customer_id, device_id, ts, DATE(ts) day,
IFNULL(TIMESTAMP_DIFF(ts, LAG(ts) OVER(PARTITION BY customer_id, device_id, DATE(ts) ORDER BY ts), HOUR), 777) > 1 gap
FROM `project.dataset.malfunctions`
)
)
GROUP BY customer_id, device_id, day, batch
)
GROUP BY customer_id, device_id, day
-- ORDER BY customer_id, device_id, day
с результатом
![enter image description here](https://i.stack.imgur.com/F3Tqk.png)