Ниже приведен пример для BigQuery Standard SQL
#standardSQL
WITH `project.dataset.your_table` AS (
SELECT TIME '05:20:19' t, 4 x UNION ALL
SELECT TIME '05:37:18', 7 UNION ALL
SELECT TIME '05:45:14', 4 UNION ALL
SELECT TIME '05:56:04', 4 UNION ALL
SELECT TIME '06:18:48', 5 UNION ALL
SELECT TIME '06:48:34', 3 UNION ALL
SELECT TIME '07:52:48', 1
)
SELECT
t, x, (SELECT COUNT(DISTINCT y) FROM UNNEST(arr) y) uniques
FROM (
SELECT t, x,
ARRAY_AGG(x)
OVER(ORDER BY TIME_DIFF(t, TIME '00:00:00', SECOND)
RANGE BETWEEN 3600 PRECEDING AND CURRENT ROW) arr
FROM `project.dataset.your_table`
)
-- ORDER BY t
с результатом как
Row t x uniques
1 05:20:19 4 1
2 05:37:18 7 2
3 05:45:14 4 2
4 05:56:04 4 2
5 06:18:48 5 3
6 06:48:34 3 3
7 07:52:48 1 1
он использует точные фиктивные данные из вашего вопроса - я чувствую, что на самом деле у вас нет ВРЕМЕНИ, а скорее TIMESTAMP, поэтому вместо ORDER BY TIME_DIFF(t, TIME '00:00:00', SECOND)
вы можете использовать что-то вроде ORDER BY TIMESTAMP_DIFF(t, TIMESTAMP '2000-01-01 00:00:00', SECOND)
, поэтому ваш запрос будет выглядеть ниже
#standardSQL
WITH `project.dataset.your_table` AS (
SELECT TIMESTAMP '2018-01-05 05:20:19' t, 4 x UNION ALL
SELECT TIMESTAMP '2018-01-05 05:37:18', 7 UNION ALL
SELECT TIMESTAMP '2018-01-05 05:45:14', 4 UNION ALL
SELECT TIMESTAMP '2018-01-05 05:56:04', 4 UNION ALL
SELECT TIMESTAMP '2018-01-05 06:18:48', 5 UNION ALL
SELECT TIMESTAMP '2018-01-05 06:48:34', 3 UNION ALL
SELECT TIMESTAMP '2018-01-05 07:52:48', 1
)
SELECT
t, x, (SELECT COUNT(DISTINCT y) FROM UNNEST(arr) y) uniques
FROM (
SELECT t, x,
ARRAY_AGG(x)
OVER(ORDER BY TIMESTAMP_DIFF(t, TIMESTAMP '2000-01-01 00:00:00', SECOND)
RANGE BETWEEN 3600 PRECEDING AND CURRENT ROW) arr
FROM `project.dataset.your_table`
)
-- ORDER BY t
с результатом как
Row t x uniques
1 2018-01-05 05:20:19.000 UTC 4 1
2 2018-01-05 05:37:18.000 UTC 7 2
3 2018-01-05 05:45:14.000 UTC 4 2
4 2018-01-05 05:56:04.000 UTC 4 2
5 2018-01-05 06:18:48.000 UTC 5 3
6 2018-01-05 06:48:34.000 UTC 3 3
7 2018-01-05 07:52:48.000 UTC 1 1
Обновление - ниже приведена "хитрость" для удовлетворения ваших дополнительных новых требований
#standardSQL
WITH `project.dataset.your_table` AS (
SELECT TIME '05:20:19' t, 4 x UNION ALL
SELECT TIME '05:20:19', 5 UNION ALL
SELECT TIME '05:37:18', 7 UNION ALL
SELECT TIME '05:45:14', 4 UNION ALL
SELECT TIME '05:56:04', 4 UNION ALL
SELECT TIME '06:18:48', 6 UNION ALL
SELECT TIME '06:48:34', 3 UNION ALL
SELECT TIME '07:52:48', 1
)
SELECT
t, x, (SELECT COUNT(DISTINCT y) FROM UNNEST(arr) y) uniques
FROM (
SELECT t, x,
ARRAY_AGG(x)
OVER(ORDER BY TIME_DIFF(t, TIME '00:00:00', MILLISECOND) + 1000 * RAND()
RANGE BETWEEN 3600000 PRECEDING AND CURRENT ROW) arr
FROM `project.dataset.your_table`
)
-- ORDER BY t
с результатом как
Row t x uniques
1 05:20:19 5 1
2 05:20:19 4 2
3 05:37:18 7 3
4 05:45:14 4 3
5 05:56:04 4 3
6 06:18:48 6 4
7 06:48:34 3 3
8 07:52:48 1 1
Еще одно обновление: o)
#standardSQL
WITH `project.dataset.your_table` AS (
SELECT TIME '05:20:19' t, 4 x UNION ALL
SELECT TIME '05:20:19', 5 UNION ALL
SELECT TIME '05:37:18', 7 UNION ALL
SELECT TIME '05:45:14', 4 UNION ALL
SELECT TIME '05:56:04', 4 UNION ALL
SELECT TIME '06:18:48', 6 UNION ALL
SELECT TIME '06:48:34', 3 UNION ALL
SELECT TIME '07:52:48', 1
)
SELECT
t, x, (SELECT COUNT(DISTINCT y) FROM UNNEST(arr) y) uniques
FROM (
SELECT t, x,
ARRAY_AGG(x)
OVER(ORDER BY ms
RANGE BETWEEN 3600000 PRECEDING AND CURRENT ROW) arr
FROM (
SELECT t, x, TIME_DIFF(t, TIME '00:00:00', MILLISECOND) + 1000 * RAND() ms
FROM `project.dataset.your_table`
)
)
-- ORDER BY t