Предполагая, что ваше поле date
относится к типу данных TIMESTAMP - ниже для стандарта BigQuery SQL
#standardSQL
SELECT box_id, date, hour, COUNT(1) cnt
FROM (
SELECT box_id, DATE(date) date, EXTRACT(HOUR FROM date) hour
FROM `project.dataset.table1` WHERE status = 'finished' UNION ALL
SELECT box_id, DATE(date) date, EXTRACT(HOUR FROM date) hour
FROM `project.dataset.table2` WHERE status = 'start' UNION ALL
SELECT box_id, DATE(date) date, EXTRACT(HOUR FROM date) hour
FROM `project.dataset.table3` WHERE status = 'close'
)
GROUP BY box_id, date, hour
Вы можете протестировать, поиграть с выше, используя данные образца / фиктивного файла из вашего вопроса, как показано ниже пример
#standardSQL
WITH `project.dataset.table1` AS (
SELECT 1 id, 20 box_id, TIMESTAMP '2019-01-01 01:00:00.000 UTC'date, 'finished' status UNION ALL
SELECT 2, 21, '2019-01-01 02:00:00.000 UTC', 'finished' UNION ALL
SELECT 3, 21, '2019-01-01 01:00:00.000 UTC', 'unfinished'
), `project.dataset.table2` AS (
SELECT 1 id, 21 box_id, TIMESTAMP '2019-01-01 01:00:00.000 UTC' date, 'start' status UNION ALL
SELECT 2, 22, '2019-01-01 02:00:00.000 UTC', 'end' UNION ALL
SELECT 3, 23, '2019-01-01 01:00:00.000 UTC', 'start' UNION ALL
SELECT 4, 24, '2019-01-01 01:00:00.000 UTC', 'start'
), `project.dataset.table3` AS (
SELECT 1 id, 21 box_id, TIMESTAMP '2019-01-01 03:00:00.000 UTC' date, 'close' status UNION ALL
SELECT 2, 22, '2019-01-01 02:00:00.000 UTC', 'end' UNION ALL
SELECT 3, 24, '2019-01-01 01:00:00.000 UTC', 'close'
)
SELECT box_id, date, hour, COUNT(1) cnt
FROM (
SELECT box_id, DATE(date) date, EXTRACT(HOUR FROM date) hour
FROM `project.dataset.table1` WHERE status = 'finished' UNION ALL
SELECT box_id, DATE(date) date, EXTRACT(HOUR FROM date) hour
FROM `project.dataset.table2` WHERE status = 'start' UNION ALL
SELECT box_id, DATE(date) date, EXTRACT(HOUR FROM date) hour
FROM `project.dataset.table3` WHERE status = 'close'
)
GROUP BY box_id, date, hour
-- ORDER BY box_id, date, hour
с результатом
Row box_id date hour cnt
1 20 2019-01-01 1 1
2 21 2019-01-01 1 1
3 21 2019-01-01 2 1
4 21 2019-01-01 3 1
5 23 2019-01-01 1 1
6 24 2019-01-01 1 2
Ниже приведены слегка реорганизованные версии (очевидно, с одинаковым выводом)
#standardSQL
SELECT box_id, DATE(date) date, EXTRACT(HOUR FROM date) hour,
COUNTIF(
(t = 1 AND status = 'finished') OR
(t = 2 AND status = 'start') OR
(t = 3 AND status = 'close')
) cnt
FROM (
SELECT 1 t, * FROM `project.dataset.table1` UNION ALL
SELECT 2, * FROM `project.dataset.table2` UNION ALL
SELECT 3, * FROM `project.dataset.table3`
)
GROUP BY box_id, date, hour
HAVING cnt > 0
ИЛИ
#standardSQL
SELECT box_id, DATE(date) date, EXTRACT(HOUR FROM date) hour, COUNT(1) cnt
FROM (
SELECT * FROM `project.dataset.table1` WHERE status = 'finished' UNION ALL
SELECT * FROM `project.dataset.table2` WHERE status = 'start' UNION ALL
SELECT * FROM `project.dataset.table3` WHERE status = 'close'
)
GROUP BY box_id, date, hour