Я думаю, что функция ARRAY_AGG
в BigQuery имеет ошибку в поведении ORDER BY
.Вот некоторый SQL, чтобы объяснить, что:
#standardSQL
WITH t1 AS (
SELECT *
FROM UNNEST ( [
STRUCT(1 AS user_id, 1 AS team_id, "2018-07-17" AS date_str),
( 2, 1, "2018-07-17" ),
( 3, 1, "2018-07-17" ),
( 4, 1, "2018-07-17" ),
( 5, 1, "2018-07-17" ),
( 6, 1, "2018-07-17" ),
( 7, 1, "2018-07-17" ),
( 8, 2, "2018-07-17" ),
( 9, 2, "2018-07-17" ),
( 10, 2, "2018-07-17" ),
( 11, 2, "2018-07-17" ),
( 14, 3, "2018-07-17" ),
( 15, 3, "2018-07-17" ),
( 16, 3, "2018-07-17" ),
( 17, 3, "2018-07-17" ),
( 1, 1, "2018-07-18" ),
( 4, 1, "2018-07-18" ),
( 5, 1, "2018-07-18" ),
( 6, 1, "2018-07-18" ),
( 7, 1, "2018-07-18" ),
( 8, 2, "2018-07-18" ),
( 9, 2, "2018-07-18" ),
( 10, 2, "2018-07-18" ),
( 11, 2, "2018-07-18" ),
( 12, 2, "2018-07-18" ),
( 13, 2, "2018-07-18" ),
( 14, 3, "2018-07-18" ),
( 15, 3, "2018-07-18" ),
( 16, 3, "2018-07-18" ),
( 17, 3, "2018-07-18" ),
( 18, 3, "2018-07-18" ) ] ) )
SELECT
date_str,
ARRAY_AGG(teams ORDER BY users) AS a1,
ARRAY_AGG(users ORDER BY users) AS a2,
ARRAY_AGG(teams ORDER BY teams) AS a3,
ARRAY_AGG(users ORDER BY teams) AS a4,
ARRAY_AGG(STRUCT(teams, users) ORDER BY users) AS a5
FROM (
SELECT
date_str,
users,
COUNT(*) AS teams
FROM (
SELECT
date_str,
team_id,
COUNT(*) AS users
FROM t1
GROUP BY date_str, team_id
)
GROUP BY date_str, users
)
GROUP BY date_str
ORDER BY date_str;
Этот запрос возвращает;
+-----+------------+----+----+----+----+----------+----------+
| Row | date_str | a1 | a2 | a3 | a4 | a5.teams | a5.users |
+-----+------------+----+----+----+----+----------+----------+
| 1 | 2018-07-17 | 1 | 4 | 1 | 4 | 2 | 4 |
| | | 2 | 7 | 2 | 7 | 1 | 7 |
| 2 | 2018-07-18 | 1 | 5 | 1 | 5 | 2 | 5 |
| | | 2 | 6 | 2 | 6 | 1 | 6 |
+-----+------------+----+----+----+----+----------+----------+
Но я ожидаю как;
+-----+------------+----+----+----+----+----------+----------+
| Row | date_str | a1 | a2 | a3 | a4 | a5.teams | a5.users |
+-----+------------+----+----+----+----+----------+----------+
| 1 | 2018-07-17 | 2 | 4 | 1 | 7 | 2 | 4 |
| | | 1 | 7 | 2 | 4 | 1 | 7 |
| 2 | 2018-07-18 | 2 | 5 | 1 | 6 | 2 | 5 |
| | | 1 | 6 | 2 | 5 | 1 | 6 |
+-----+------------+----+----+----+----+----------+----------+
Кажется, ORDER BY
предложение в функции ARRAY_AGG
не работает должным образом, поскольку a1
и a4
упорядочены неправильно.
Кроме того, трудно проглотить, что запрос работает точно так, как ожидалось, когда я заменяю любой из двух COUNT(*)
детали с COUNT(user_id)
или COUNT(team_id)
, что означает;
SELECT
date_str,
ARRAY_AGG(teams ORDER BY users) AS a1,
ARRAY_AGG(users ORDER BY users) AS a2,
ARRAY_AGG(teams ORDER BY teams) AS a3,
ARRAY_AGG(users ORDER BY teams) AS a4,
ARRAY_AGG(STRUCT(teams, users) ORDER BY users) AS a5
FROM (
SELECT
date_str,
users,
COUNT(*) AS teams
FROM (
SELECT
date_str,
team_id,
COUNT(user_id) AS users
FROM t1
GROUP BY date_str, team_id
)
GROUP BY date_str, users
)
GROUP BY date_str
ORDER BY date_str;
или
SELECT
date_str,
ARRAY_AGG(teams ORDER BY users) AS a1,
ARRAY_AGG(users ORDER BY users) AS a2,
ARRAY_AGG(teams ORDER BY teams) AS a3,
ARRAY_AGG(users ORDER BY teams) AS a4,
ARRAY_AGG(STRUCT(teams, users) ORDER BY users) AS a5
FROM (
SELECT
date_str,
users,
COUNT(team_id) AS teams
FROM (
SELECT
date_str,
team_id,
COUNT(*) AS users
FROM t1
GROUP BY date_str, team_id
)
GROUP BY date_str, users
)
GROUP BY date_str
ORDER BY date_str;
Насколько я понимаю, эти запросы должны возвращать результаты, идентичные исходному в этомсостояние.Это довольно запутанно для меня.Может быть, это ошибка или что-то, что я неправильно понял?
Некоторая дополнительная информация.
Внутренний подзапрос;
SELECT
date_str,
users,
COUNT(*) AS teams
FROM (
SELECT
date_str,
team_id,
COUNT(*) AS users
FROM t1
GROUP BY date_str, team_id
)
GROUP BY date_str, users
Возвращается;
+-----+------------+-------+-------+
| Row | date_str | users | teams |
+-----+------------+-------+-------+
| 1 | 2018-07-18 | 5 | 2 |
| 2 | 2018-07-17 | 7 | 1 |
| 3 | 2018-07-18 | 6 | 1 |
| 4 | 2018-07-17 | 4 | 2 |
+-----+------------+-------+-------+
Итак, создаем эти данные напрямую с помощью предложения и выполняем тот же агрегатный запрос;
#standardSQL
With t2 AS (
SELECT *
FROM UNNEST ( [
STRUCT("2018-07-18" AS date_str, 5 AS users, 2 AS teams),
( "2018-07-17", 7, 1 ),
( "2018-07-18", 6, 1 ),
( "2018-07-17", 4, 2 ) ] )
)
SELECT
date_str,
ARRAY_AGG(teams ORDER BY users) AS a1,
ARRAY_AGG(users ORDER BY users) AS a2,
ARRAY_AGG(teams ORDER BY teams) AS a3,
ARRAY_AGG(users ORDER BY teams) AS a4,
ARRAY_AGG(STRUCT(teams, users) ORDER BY users) AS a5
FROM t2
GROUP BY date_str
ORDER BY date_str;
Результатом стало то, что я ищу;
+-----+------------+----+----+----+----+----------+----------+
| Row | date_str | a1 | a2 | a3 | a4 | a5.teams | a5.users |
+-----+------------+----+----+----+----+----------+----------+
| 1 | 2018-07-17 | 2 | 4 | 1 | 7 | 2 | 4 |
| | | 1 | 7 | 2 | 4 | 1 | 7 |
| 2 | 2018-07-18 | 2 | 5 | 1 | 6 | 2 | 5 |
| | | 1 | 6 | 2 | 5 | 1 | 6 |
+-----+------------+----+----+----+----+----------+----------+
Я не понимаючто вызвало этоЯ полностью озадачен.Любые идеи или предложения приветствуются.