Ниже для BigQuery Standard SQL
#standardSQL
WITH days AS (
SELECT column1, column2, day FROM (
SELECT column1, column2,
MIN(PARSE_DATE('%m/%d/%Y', day)) min_day,
PARSE_DATE('%m/%d/%Y', '1/4/2020') max_day -- or just CURRENT_DATE() in real use case
FROM `project.dataset.table`
GROUP BY column1, column2
),
UNNEST(GENERATE_DATE_ARRAY(min_day, max_day)) day
)
SELECT d.column1, d.column2, d.day,
IFNULL(t.value,
LAST_VALUE(value IGNORE NULLS)
OVER(PARTITION BY d.column1, d.column2 ORDER BY d.day ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING)
) value,
IF(t.value IS NULL, 'Repeated', 'From Input') Status
FROM days d
LEFT JOIN `project.dataset.table` t
ON d.column1 = t.column1
AND d.column2 = t.column2
AND d.day = PARSE_DATE('%m/%d/%Y', t.day)
-- ORDER BY column1, column2, day
Вы можете протестировать, поиграть с выше, используя примеры данных из вашего вопроса, как в примере ниже
#standardSQL
WITH `project.dataset.table` AS (
SELECT 'd1' column1, 'b1' column2, '1/1/2020' day, 1 value UNION ALL
SELECT 'd1', 'b2', '1/1/2020', 3 UNION ALL
SELECT 'd1', 'b3', '1/1/2020', 4 UNION ALL
SELECT 'd2', 'b2', '1/2/2020', 7 UNION ALL
SELECT 'd1', 'b3', '1/2/2020', 5 UNION ALL
SELECT 'd1', 'b4', '1/2/2020', 6 UNION ALL
SELECT 'd1', 'b1', '1/3/2020', 2 UNION ALL
SELECT 'd2', 'b3', '1/3/2020', 8 UNION ALL
SELECT 'd2', 'b3', '1/4/2020', 9
), days AS (
SELECT column1, column2, day FROM (
SELECT column1, column2,
MIN(PARSE_DATE('%m/%d/%Y', day)) min_day,
PARSE_DATE('%m/%d/%Y', '1/4/2020') max_day -- or just CURRENT_DATE() in real use case
FROM `project.dataset.table`
GROUP BY column1, column2
),
UNNEST(GENERATE_DATE_ARRAY(min_day, max_day)) day
)
SELECT d.column1, d.column2, d.day,
IFNULL(t.value,
LAST_VALUE(value IGNORE NULLS)
OVER(PARTITION BY d.column1, d.column2 ORDER BY d.day ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING)
) value,
IF(t.value IS NULL, 'Repeated', 'From Input') Status
FROM days d
LEFT JOIN `project.dataset.table` t
ON d.column1 = t.column1
AND d.column2 = t.column2
AND d.day = PARSE_DATE('%m/%d/%Y', t.day)
-- ORDER BY column1, column2, day
с выводом
Row column1 column2 day value Status
1 d1 b1 2020-01-01 1 From Input
2 d1 b1 2020-01-02 1 Repeated
3 d1 b1 2020-01-03 2 From Input
4 d1 b1 2020-01-04 2 Repeated
5 d1 b2 2020-01-01 3 From Input
6 d1 b2 2020-01-02 3 Repeated
7 d1 b2 2020-01-03 3 Repeated
8 d1 b2 2020-01-04 3 Repeated
9 d1 b3 2020-01-01 4 From Input
10 d1 b3 2020-01-02 5 From Input
11 d1 b3 2020-01-03 5 Repeated
12 d1 b3 2020-01-04 5 Repeated
13 d1 b4 2020-01-02 6 From Input
14 d1 b4 2020-01-03 6 Repeated
15 d1 b4 2020-01-04 6 Repeated
16 d2 b2 2020-01-02 7 From Input
17 d2 b2 2020-01-03 7 Repeated
18 d2 b2 2020-01-04 7 Repeated
19 d2 b3 2020-01-03 8 From Input
20 d2 b3 2020-01-04 9 From Input