Ниже для BigQuery Standard SQL
#standardSQL
SELECT
ARRAY(SELECT REGEXP_REPLACE(name, r'\(0*', '(') FROM t.names name ORDER BY name DESC) names,
ARRAY(SELECT REGEXP_REPLACE(age, r'\(0*', '(') FROM t.ages age ORDER BY age DESC) ages
FROM (
SELECT
ARRAY_AGG(DISTINCT name ORDER BY name DESC LIMIT 2) names,
ARRAY_AGG(DISTINCT age ORDER BY age DESC LIMIT 2) ages
FROM (
SELECT
CONCAT('(', SUBSTR(CONCAT('00000', CAST(COUNT(1) OVER(PARTITION BY name) AS STRING)), -5), ') ', name) name,
CONCAT('(', SUBSTR(CONCAT('00000', CAST(COUNT(1) OVER(PARTITION BY age) AS STRING)), -5), ') ', CAST(age AS STRING)) age
FROM `project.dataset.table`
)
) t
Вы можете протестировать, поиграть с выше, используя примеры данных из вашего вопроса, как в примере ниже
#standardSQL
WITH `project.dataset.table` AS (
SELECT 'tom' name, 20 age UNION ALL
SELECT 'tom', 20 UNION ALL
SELECT 'brad', 10 UNION ALL
SELECT 'steve', 14 UNION ALL
SELECT 'alex', 13 UNION ALL
SELECT 'alex', 11
)
SELECT
ARRAY(SELECT REGEXP_REPLACE(name, r'\(0*', '(') FROM t.names name ORDER BY name DESC) names,
ARRAY(SELECT REGEXP_REPLACE(age, r'\(0*', '(') FROM t.ages age ORDER BY age DESC) ages
FROM (
SELECT
ARRAY_AGG(DISTINCT name ORDER BY name DESC LIMIT 2) names,
ARRAY_AGG(DISTINCT age ORDER BY age DESC LIMIT 2) ages
FROM (
SELECT
CONCAT('(', SUBSTR(CONCAT('00000', CAST(COUNT(1) OVER(PARTITION BY name) AS STRING)), -5), ') ', name) name,
CONCAT('(', SUBSTR(CONCAT('00000', CAST(COUNT(1) OVER(PARTITION BY age) AS STRING)), -5), ') ', CAST(age AS STRING)) age
FROM `project.dataset.table`
)
) t
с результатом
Row names ages
1 (2) tom (2) 20
(2) alex (1) 14
Обновление для I'd like to have it as an array (exactly as it would be in select APPROX_TOP_COUNT(name, 2), APPROX_TOP_COUNT(age, 2) from mytable)
См. Ниже - изменены только две строки во внешнем SELECT
#standardSQL
WITH `project.dataset.table` AS (
SELECT 'tom' name, 20 age UNION ALL
SELECT 'tom', 20 UNION ALL
SELECT 'brad', 10 UNION ALL
SELECT 'steve', 14 UNION ALL
SELECT 'alex', 13 UNION ALL
SELECT 'alex', 11
)
SELECT
ARRAY(SELECT STRUCT(REGEXP_EXTRACT(name, r'\(\d*\) (.*)') AS value, CAST(REGEXP_EXTRACT(name, r'\((\d*)\)') AS INT64) AS `count`) FROM t.names name ORDER BY name DESC) names,
ARRAY(SELECT STRUCT(REGEXP_EXTRACT(age, r'\(\d*\) (.*)') AS value, CAST(REGEXP_EXTRACT(age, r'\((\d*)\)') AS INT64) AS `count`) FROM t.ages age ORDER BY age DESC) ages
FROM (
SELECT
ARRAY_AGG(DISTINCT name ORDER BY name DESC LIMIT 2) names,
ARRAY_AGG(DISTINCT age ORDER BY age DESC LIMIT 2) ages
FROM (
SELECT
CONCAT('(', SUBSTR(CONCAT('00000', CAST(COUNT(1) OVER(PARTITION BY name) AS STRING)), -5), ') ', name) name,
CONCAT('(', SUBSTR(CONCAT('00000', CAST(COUNT(1) OVER(PARTITION BY age) AS STRING)), -5), ') ', CAST(age AS STRING)) age
FROM `project.dataset.table`
)
) t
с результатом
Row names.value names.count ages.value ages.count
1 tom 2 20 2
alex 2 14 1