почему бы не присоединить массив к узлу mergetree - PullRequest
0 голосов
/ 27 апреля 2020
select uniq(uid,sid) as value,l.1 as from ,l.2 as to
from (
select uid,sid,s_t
from (
select
distinct_id as uid,
arraySort((x)->x.1,groupArray(tuple(toUnixTimestamp(ums_ts_),toString(event_id)))) as cur,
arrayDifference((x)->x.1,cur) as cur_diff,
arrayPushBack(
arrayFilter(
(x,y)->y>1800,
arrayEnumerate(cur_diff),
cur_diff
),
length(cur)+1
) as cur_split,
arrayFilter((x)->length(x)>0,
arrayMap((x)->arrayMap((x)->x.2,arraySlice(x,arrayFirstIndex((y)->y.2='1301',x))),
arrayMap((x,y)->arraySlice(cur,
multiIf(y==1,1,cur_split[y-1]),multiIf(y==1,cur_split[y]-1,cur_split[y]-cur_split[y-1])),cur_split,arrayEnumerate(cur_split)))) as t,
arrayMap((x)->arrayMap((y,z,q)->tuple(concat(toString(y),'_$$_',z),concat(multiIf(y==length(arrayEnumerate(x)),'',toString(y+1)),'_$$_',q)),arrayEnumerate(x),x,arrayPushBack(arrayPopFront(x),'_waste')),t) as tx
from event_data.event_wos_p15  where  event_id in (1301,1310,1303,1305,1429) and  event_date>='2020-03-01' and event_date <='2020-03-31' group by distinct_id
) array join tx as s_t,arrayEnumerate(tx) as sid
) array join s_t as l  group by from ,to
check system.query_log table  found that array join  executed on distributed node.why not array join execute on mergetree node?

узел mergetree query_log


type:                 QueryFinish
event_date:           2020-04-27
event_time:           2020-04-27 15:34:54
query_start_time:     2020-04-27 15:34:53
query_duration_ms:    628
read_rows:            4955184
read_bytes:           355066855
written_rows:         0
written_bytes:        0
result_rows:          76798
result_bytes:         4636864
memory_usage:         660752320
query:                SELECT distinct_id AS uid, arrayMap(x -> arrayMap((y, z, q) -> (concat(toString(y), '_$$_', z), concat(multiIf(y = length(arrayEnumerate(x)), '', toString(y + 1)), '_$$_', q)), arrayEnumerate(x), x, arrayPushBack(arrayPopFront(x), '_waste')), arrayFilter(x -> (length(x) > 0), arrayMap(x -> arrayMap(x -> (x.2), arraySlice(x, arrayFirstIndex(y -> ((y.2) = '1301'), x))), arrayMap((x, y) -> arraySlice(arraySort(x -> (x.1), groupArray((toUnixTimestamp(ums_ts_), toString(event_id)))) AS cur, multiIf(y = 1, 1, (arrayPushBack(arrayFilter((x, y) -> (y > 1800), arrayEnumerate(arrayDifference(x -> (x.1), cur) AS cur_diff), cur_diff), length(cur) + 1) AS cur_split)[y - 1]), multiIf(y = 1, (cur_split[y]) - 1, (cur_split[y]) - (cur_split[y - 1]))), cur_split, arrayEnumerate(cur_split)))) AS t) AS tx 
FROM event_data.event_wos_p15 WHERE (event_id IN (1301, 1310, 1303, 1305, 1429)) AND (event_date >= '2020-03-01') AND (event_date <= '2020-03-31') GROUP BY distinct_id

1 Ответ

0 голосов
/ 27 апреля 2020
select 
from ( 
   select xxx, 
   from distributed_table
   group by
   )

Только внутренняя часть запроса from distributed_table будет выполнена для шейдов (в таблице MergeTree), все остальные части вне () будут выполнены на узле инициатора.

...