Я хочу рассчитать уникальное количество пользователей за последние 2 дня для каждой даты.
Первый запрос: - Я пытался с помощью оператора CASE дать мне количество пользователей за этот день, который не ожидаетсярезультат, даже я пытался с оконной функцией.
Я знаю одно альтернативное решение с помощью самостоятельного соединения ( уже упоминалось как второй запрос ), которое дает мне правильный ответ, что я ожидал, но я хочусделайте это в одном запросе.
Причина, по которой в одном запросе нужно уменьшить размер обрабатываемых данных, если я сделаю сам объединение, он дважды прочитает полную таблицу, а исходный размер таблицы составляет несколько ТБ.
SELECT
(CASE WHEN dt BETWEEN DATE_SUB(dt, INTERVAL 1 DAY) AND dt THEN
CONCAT(CAST(DATE_SUB(dt, INTERVAL 1 DAY) AS STRING), '::', CAST(dt AS STRING)) END) AS Date_range,
COUNT(DISTINCT (CASE WHEN dt BETWEEN DATE_SUB(dt, INTERVAL 1 DAY) AND dt THEN Visitor_Name END)) AS Visitor_Count
FROM
(SELECT DATE('2018-01-01') AS dt, 'A' AS Visitor_Name
UNION ALL
SELECT '2018-01-01' AS dt, 'B' AS Visitor_Name
UNION ALL
SELECT '2018-01-01' AS dt, 'C' AS Visitor_Name
UNION ALL
SELECT '2018-01-01' AS dt, 'D' AS Visitor_Name
UNION ALL
SELECT '2018-01-02' AS dt, 'B' AS Visitor_Name
UNION ALL
SELECT '2018-01-02' AS dt, 'C' AS Visitor_Name
UNION ALL
SELECT '2018-01-02' AS dt, 'E' AS Visitor_Name
UNION ALL
SELECT '2018-01-03' AS dt, 'A' AS Visitor_Name
UNION ALL
SELECT '2018-01-03' AS dt, 'P' AS Visitor_Name
UNION ALL
SELECT '2018-01-04' AS dt, 'A' AS Visitor_Name
UNION ALL
SELECT '2018-01-04' AS dt, 'C' AS Visitor_Name
UNION ALL
SELECT '2018-01-05' AS dt, 'D' AS Visitor_Name
UNION ALL
SELECT '2018-01-05' AS dt, 'B' AS Visitor_Name
UNION ALL
SELECT '2018-01-05' AS dt, 'B' AS Visitor_Name
SELECT '2018-01-06' AS dt, 'P' AS Visitor_Name)
GROUP BY Date_range
ORDER BY Date_range;
Решение:
SELECT
(CASE WHEN a.dt BETWEEN DATE_SUB(b.dt, INTERVAL 1 DAY) AND b.dt THEN
CONCAT(CAST(DATE_SUB(b.dt, INTERVAL 1 DAY) AS STRING), '::', CAST(b.dt AS STRING)) END) AS Date_range,
COUNT(DISTINCT (CASE WHEN a.dt BETWEEN DATE_SUB(b.dt, INTERVAL 1 DAY) AND b.dt THEN a.Visitor_Name END)) AS Visitor_Count
FROM
(SELECT DATE('2018-01-01') AS dt, 'A' AS Visitor_Name
UNION ALL
SELECT '2018-01-01' AS dt, 'B' AS Visitor_Name
UNION ALL
SELECT '2018-01-01' AS dt, 'C' AS Visitor_Name
UNION ALL
SELECT '2018-01-01' AS dt, 'D' AS Visitor_Name
UNION ALL
SELECT '2018-01-02' AS dt, 'B' AS Visitor_Name
UNION ALL
SELECT '2018-01-02' AS dt, 'C' AS Visitor_Name
UNION ALL
SELECT '2018-01-02' AS dt, 'E' AS Visitor_Name
UNION ALL
SELECT '2018-01-03' AS dt, 'A' AS Visitor_Name
UNION ALL
SELECT '2018-01-03' AS dt, 'P' AS Visitor_Name
UNION ALL
SELECT '2018-01-04' AS dt, 'A' AS Visitor_Name
UNION ALL
SELECT '2018-01-04' AS dt, 'C' AS Visitor_Name
UNION ALL
SELECT '2018-01-05' AS dt, 'D' AS Visitor_Name
UNION ALL
SELECT '2018-01-05' AS dt, 'B' AS Visitor_Name
UNION ALL
SELECT '2018-01-05' AS dt, 'B' AS Visitor_Name
UNION ALL
SELECT '2018-01-06' AS dt, 'P' AS Visitor_Name) AS a
INNER JOIN
(SELECT DATE('2018-01-01') AS dt, 'A' AS Visitor_Name
UNION ALL
SELECT '2018-01-01' AS dt, 'B' AS Visitor_Name
UNION ALL
SELECT '2018-01-01' AS dt, 'C' AS Visitor_Name
UNION ALL
SELECT '2018-01-01' AS dt, 'D' AS Visitor_Name
UNION ALL
SELECT '2018-01-02' AS dt, 'B' AS Visitor_Name
UNION ALL
SELECT '2018-01-02' AS dt, 'C' AS Visitor_Name
UNION ALL
SELECT '2018-01-02' AS dt, 'E' AS Visitor_Name
UNION ALL
SELECT '2018-01-03' AS dt, 'A' AS Visitor_Name
UNION ALL
SELECT '2018-01-03' AS dt, 'P' AS Visitor_Name
UNION ALL
SELECT '2018-01-04' AS dt, 'A' AS Visitor_Name
UNION ALL
SELECT '2018-01-04' AS dt, 'C' AS Visitor_Name
UNION ALL
SELECT '2018-01-05' AS dt, 'D' AS Visitor_Name
UNION ALL
SELECT '2018-01-05' AS dt, 'B' AS Visitor_Name
UNION ALL
SELECT '2018-01-05' AS dt, 'B' AS Visitor_Name
UNION ALL
SELECT '2018-01-06' AS dt, 'P' AS Visitor_Name) AS b
ON (a.dt <= b.dt)
GROUP BY Date_range
ORDER BY Date_range;