Один из вариантов (BigQuery Standard SQL)
#standardSQL
WITH temp AS (
SELECT state, SUM(LENGTH(state)) OVER(ORDER BY pos) size
FROM (
SELECT state, ROW_NUMBER() OVER() pos
FROM `project.dataset.table`
)
)
SELECT ARRAY_AGG(state IGNORE NULLS)
FROM temp
WHERE size < 1000000
Вы можете протестировать, поиграть с приведенным выше примером ниже:
#standardSQL
WITH `project.dataset.table` AS (
SELECT REPEAT('a', CAST(100 * RAND() AS INT64)) state
FROM UNNEST(GENERATE_ARRAY(1, 100))
), temp AS (
SELECT state, SUM(LENGTH(state)) OVER(ORDER BY pos) size
FROM (
SELECT state, ROW_NUMBER() OVER() pos
FROM `project.dataset.table`
)
)
SELECT ARRAY_AGG(state IGNORE NULLS)
FROM temp
WHERE size < 5000