Агрегация агрегации в Google Bigquery - PullRequest
0 голосов
/ 28 октября 2019

Мои данные выглядят следующим образом

WITH test AS (
  SELECT * FROM UNNEST([
    STRUCT('2019-10-26' as date,'1.8025137' AS article_id, 'Digital Paying' as user_type,'open' as openmode, '123' as uid),
    ('2019-10-26','1.8025137' , 'Digital Paying','close', '523'),
    ('2019-10-26','1.8025137' , 'Anonymous','open', '321'),
    ('2019-10-26','1.8025137' , 'Registered','close', '231'),
    ('2019-10-26','1.8025137' , 'Registered','open', '431'),
    ('2019-10-26','1.8025137' , 'Digital Paying','close', '132'),
    ('2019-10-26','1.8025137' , 'Anonymous','close', '111')
  ])  
),
-- first level of aggregation, prepare for fine tuning
date_article as (
  SELECT 
    date,
    article_id,
    ARRAY_AGG(struct(user_type,openmode, uid)) AS ut
  FROM test
  GROUP BY 1,2
)

 (SELECT 
  date,
  article_id,
  -- feed sub-query output into an array "action"
  array(SELECT AS STRUCT 
     user_type as user_type, -- re-group data within the array by field "action"
     array_agg(struct(openmode as openmode,uid as uid) ) op 
   FROM UNNEST(ut)
   GROUP BY 1
   ) as user_types
FROM date_article)

Моя цель - объединить user_types.op.openmode и user_types.op.uid по user_types.user_type без создания дубликатов как:

enter image description here

Ответы [ 2 ]

1 голос
/ 29 октября 2019

Я думаю, что вы ищете ниже

#standardSQL
WITH test AS (
  SELECT * FROM UNNEST([
    STRUCT('2019-10-26' AS DATE,'1.8025137' AS article_id, 'Digital Paying' AS user_type,'open' AS openmode, '123' AS uid),
    ('2019-10-26','1.8025137' , 'Digital Paying','close', '523'),
    ('2019-10-26','1.8025137' , 'Anonymous','open', '321'),
    ('2019-10-26','1.8025137' , 'Registered','close', '231'),
    ('2019-10-26','1.8025137' , 'Registered','open', '431'),
    ('2019-10-26','1.8025137' , 'Digital Paying','close', '132'),
    ('2019-10-26','1.8025137' , 'Anonymous','close', '111')
  ])  
), users_agg AS (
  SELECT DATE, article_id, user_type, openmode, COUNT(DISTINCT uid) AS uids 
  FROM test GROUP BY 1,2,3,4
), modes_agg AS (
  SELECT DATE, article_id, user_type, ARRAY_AGG(STRUCT(openmode, uids)) AS modes
  FROM users_agg GROUP BY 1,2,3
), types_agg AS (
  SELECT DATE, article_id, ARRAY_AGG(STRUCT(user_type, modes)) types
  FROM modes_agg GROUP BY 1,2
), article_agg AS (
  SELECT DATE, ARRAY_AGG(STRUCT(article_id, types)) articles
  FROM types_agg GROUP BY 1
) 
SELECT *
FROM article_agg   

с результатом

enter image description here

0 голосов
/ 28 октября 2019

Вы делали это немного сложнее, чем нужно. Если возможно, сначала выполните «нормальный» SQL, а затем форматируйте в массивы / структуры.

WITH test AS (
  SELECT * FROM UNNEST([
    STRUCT('2019-10-26' as date,'1.8025137' AS article_id, 'Digital Paying' as user_type,'open' as openmode, '123' as uid),
    ('2019-10-26','1.8025137' , 'Digital Paying','close', '523'),
    ('2019-10-26','1.8025137' , 'Anonymous','open', '321'),
    ('2019-10-26','1.8025137' , 'Registered','close', '231'),
    ('2019-10-26','1.8025137' , 'Registered','open', '431'),
    ('2019-10-26','1.8025137' , 'Digital Paying','close', '132'),
    ('2019-10-26','1.8025137' , 'Anonymous','close', '111')
  ])  
),
agg as (
  select
    date,
    article_id,
    user_type,
    openmode,
    count(distinct uid) as uids
  from test
  group by 1,2,3,4
),
final as (
  select
    date,
    article_id,
    user_type,
    array_agg(struct(openmode, uids)) as subfields
  from agg
  group by 1,2,3
)
select * from final
...