Ниже для BigQuery Standard SQL
Неортодоксальная версия с использованием RANGE_BUCKET function
#standardSQL
WITH buckets AS (
SELECT state, region,
ARRAY_AGG(variable ORDER BY variable) variables,
ARRAY_AGG(percentage ORDER BY variable) bins
FROM (
SELECT state, region, variable, SUM(1. * percentage) OVER(win) percentage
FROM table2
WINDOW win AS (PARTITION BY state, region ORDER BY variable)
)
GROUP BY state, region
)
SELECT user, state, region,
variables[OFFSET(
RANGE_BUCKET((ROW_NUMBER() OVER(win) - 1) / (COUNT(1) OVER(win)) * 100, bins)
)] AS variable
FROM table1
JOIN buckets USING (state, region)
WINDOW win AS (PARTITION BY state, region)
-- ORDER BY user
Если применить к образцу данных из вашего вопроса - вывод:
Row user state region variable
1 1 ORD 1 ABC
2 2 ORD 1 ABC
3 3 ORD 1 ABC
4 4 ORD 1 XYZ
5 5 ORD 1 XYZ
6 6 ORD 1 XYZ
7 7 IAD 2 ABC
8 8 IAD 2 ABC
9 9 IAD 2 ABC
10 10 IAD 2 ABC
11 11 IAD 2 AED
12 12 IAD 2 AED
13 13 IAD 2 XYZ
14 14 IAD 2 XYZ
Ниже приведена более традиционная версия (очевидно, с тем же выводом, что и в предыдущей / первой версии)
#standardSQL
WITH buckets AS (
SELECT *, SUM(percentage) OVER(PARTITION BY state, region ORDER BY variable) AS bin
FROM table2
), table1_with_stats AS (
SELECT *,
ROW_NUMBER() OVER(win) - 1 AS position,
COUNT(*) OVER(win) AS size
FROM table1
WINDOW win AS (PARTITION BY state, region)
)
SELECT user, state, region, variable
FROM table1_with_stats
INNER JOIN buckets
USING (state, region)
WHERE position BETWEEN size * (bin - percentage) / 100
AND size * bin /100 - 1
-- ORDER BY user
Вы можете проверить, играть с выше, используя ниже CTE
WITH table1 AS (
SELECT 1 user, 'ORD' state, 1 region UNION ALL
SELECT 2, 'ORD', 1 UNION ALL
SELECT 3, 'ORD', 1 UNION ALL
SELECT 4, 'ORD', 1 UNION ALL
SELECT 5, 'ORD', 1 UNION ALL
SELECT 6, 'ORD', 1 UNION ALL
SELECT 7, 'IAD', 2 UNION ALL
SELECT 8, 'IAD', 2 UNION ALL
SELECT 9, 'IAD', 2 UNION ALL
SELECT 10, 'IAD', 2 UNION ALL
SELECT 11, 'IAD', 2 UNION ALL
SELECT 12, 'IAD', 2 UNION ALL
SELECT 13, 'IAD', 2 UNION ALL
SELECT 14, 'IAD', 2
), table2 AS (
SELECT 'ORD' state, 1 region, 'ABC' variable, 50 percentage UNION ALL
SELECT 'ORD', 1, 'XYZ', 50 UNION ALL
SELECT 'IAD', 2, 'ABC', 50 UNION ALL
SELECT 'IAD', 2, 'XYZ', 25 UNION ALL
SELECT 'IAD', 2, 'AED', 25
)