Документация Hive заставляет меня поверить, что это сработает:
WITH aud AS (
SELECT
exp_luid
FROM audience_manager.segments5_luid
WHERE segment_version_id IN (627, 629)
),
prod AS (
SELECT
station_callsign,
exp_luid,
ds,
ad_start_ts_utc as ad_time,
COUNT(ds) AS impressions
FROM vizio_production.kantar_vizio_v4_new
WHERE product_id = 36325675
AND ds BETWEEN 20190101 AND 20190430
AND exp_luid IS NOT NULL
GROUP BY 1,2,3,4
),
join_one AS (
SELECT
aud.exp_luid AS exp_luid,
prod.station_callsign AS network,
prod.ds AS ds,
prod.ad_time AS ad_time,
SUM(prod.impressions) AS impressions
FROM aud
INNER JOIN prod ON aud.exp_luid = prod.exp_luid
GROUP BY 1,2,3,4
)
SELECT * FROM join_one
Я не совсем понимаю, зачем вам нужно расширять это до CTE, когда выполнение одного запроса является относительно компактным:
SELECT
aud.exp_luid AS exp_luid,
prod.station_callsign AS network,
prod.ds AS ds,
prod.ad_time AS ad_time,
SUM(prod.impressions) AS impressions
FROM
audience_manager.segments5_luid aud
INNER JOIN
(
SELECT
station_callsign,
exp_luid,
ds,
ad_start_ts_utc as ad_time,
COUNT(ds) AS impressions
FROM vizio_production.kantar_vizio_v4_new
WHERE product_id = 36325675
AND ds BETWEEN 20190101 AND 20190430
AND exp_luid IS NOT NULL
GROUP BY 1,2,3,4
) prod
ON aud.exp_luid = prod.exp_luid
WHERE aud.segment_version_id IN (627, 629)
GROUP BY 1,2,3,4