Ниже для стандартного SQL BigQuery (и не зависит от количества столбцов категории - хотя в примере только 5)
#standardSQL
SELECT *,
ARRAY_TO_STRING(ARRAY(
SELECT SPLIT(kv, ':')[OFFSET(0)]
FROM UNNEST(SPLIT(REGEXP_REPLACE(TO_JSON_STRING(t), r'[{"}]', ''))) kv
WHERE LOWER(SPLIT(kv, ':')[OFFSET(0)]) <> 'user'
ORDER BY CAST(SPLIT(kv, ':')[OFFSET(1)] AS INT64) DESC
LIMIT 3
), ',') top3_cat
FROM `yourproject.yourdataset.yourtable` t
Вы можете протестировать, поиграть с вышеприведенными, используя фиктивные данные из вашего вопроса:
#standardSQL
WITH `project.dataset.table` AS (
SELECT 1 user, 0 cat1, 1 cat2, 0 cat3, 2 cat4, 30 cat5 UNION ALL
SELECT 2, 0, 0, 10, 5, 0 UNION ALL
SELECT 3, 0, 5, 0, 0, 0 UNION ALL
SELECT 4, 2, 0, 20, 2, 0 UNION ALL
SELECT 5, 0, 40, 0, 0, 0
)
SELECT *,
ARRAY_TO_STRING(ARRAY(
SELECT SPLIT(kv, ':')[OFFSET(0)]
FROM UNNEST(SPLIT(REGEXP_REPLACE(TO_JSON_STRING(t), r'[{"}]', ''))) kv
WHERE LOWER(SPLIT(kv, ':')[OFFSET(0)]) <> 'user'
ORDER BY CAST(SPLIT(kv, ':')[OFFSET(1)] AS INT64) DESC
LIMIT 3
), ',') top3_cat
FROM `project.dataset.table` t
с результатом
Row user cat1 cat2 cat3 cat4 cat5 top3_cat
1 1 0 1 0 2 30 cat5,cat4,cat2
2 2 0 0 10 5 0 cat3,cat4,cat2
3 3 0 5 0 0 0 cat2,cat3,cat1
4 4 2 0 20 2 0 cat3,cat4,cat1
5 5 0 40 0 0 0 cat2,cat3,cat1
Я обновил свой вопрос кодом, который использовал для построения матрицы, не могли бы вы показать, как я интегрирую ваше решение?
#standardSQL
WITH `query_result` AS (
SELECT
customDimension.value AS UserID,
SUM(IF(LOWER(hits_product.productbrand) LIKE "Brand 1",1,0)) AS brand_1,
SUM(IF(LOWER(hits_product.productbrand) LIKE "Brand 2",1,0)) AS brand_2,
SUM(IF(LOWER(hits_product.productbrand) LIKE "Brand 3",1,0)) AS brand_3,
...
...
FROM
`table*` AS t
CROSS JOIN
UNNEST (hits) AS hits
CROSS JOIN
UNNEST(t.customdimensions) AS customDimension
CROSS JOIN
UNNEST(hits.product) AS hits_product
WHERE
parse_DATE('%y%m%d',
_table_suffix) BETWEEN DATE_SUB(CURRENT_DATE(), INTERVAL 1 day)
AND DATE_SUB(CURRENT_DATE(), INTERVAL 1 day)
AND customDimension.index = 2
AND hits.eventInfo.eventCategory = 'Ecommerce'
AND hits.eventInfo.eventAction = 'Purchase'
GROUP BY
UserID
LIMIT 50
)
SELECT *,
ARRAY_TO_STRING(ARRAY(
SELECT SPLIT(kv, ':')[OFFSET(0)]
FROM UNNEST(SPLIT(REGEXP_REPLACE(TO_JSON_STRING(t), r'[{"}]', ''))) kv
WHERE LOWER(SPLIT(kv, ':')[OFFSET(0)]) <> LOWER('UserID')
ORDER BY CAST(SPLIT(kv, ':')[OFFSET(1)] AS INT64) DESC
LIMIT 3
), ',') top3_cat
FROM `query_result` t