Я пытаюсь построить пользовательские последовательности в Bigquery и достиг следующего состояния:
u1 e1 41
u1 e1 42
u2 e1 43
u3 e1 44
u1 e1 45
u3 e1 46
u4 e1 47
u2 e1 48
u4 e1 49
u4 e1 50
u1 e2 43
u2 e2 44
u4 e2 48
u2 e2 49
u3 e2 50
u1 e2 51
u3 e2 52
u1 e2 53
u2 e2 54
u2 e2 55
u1 e2 56
, где u1
, u2
- идентификаторы пользователей, e1
, e2
- имена событий, а 41
, 42
, 43
- метки времени событий.
Я использовал этот запрос:
WITH Step1 AS (
SELECT
user_dim.user_id as user_id1,
event.timestamp_micros as event_timestamp1,
DATETIME_DIFF(current_datetime(),
DATETIME(TIMESTAMP_MICROS(event.timestamp_micros)), DAY) as Days1,
event.name AS Event_Name1
FROM `com_xxx_android_ANDROID.app_events_*`,
UNNEST(event_dim) AS event
WHERE _TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d',DATE_SUB(CURRENT_DATE(),
INTERVAL 2 DAY))
AND FORMAT_DATE('%Y%m%d',DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY))
AND event.name='PLDetailForm'
ORDER BY Days1--,Event_Name1,user_id1
),
Step1_COUNT AS (
select count(*) AS A from Step1
),
Step1_USERIDS AS (
select user_id1 from Step1
),
Step1_USERS AS (
select COUNT(DISTINCT user_id1) as users from Step1
),
Step2 AS (
SELECT
user_dim.user_id as user_id2,
event.timestamp_micros as event_timestamp2,
DATETIME_DIFF(current_datetime(),
DATETIME(TIMESTAMP_MICROS(event.timestamp_micros)), DAY) as Days2,
event.name AS Event_Name2
FROM
`com_xxx_android_ANDROID.app_events_*`,
UNNEST(event_dim) AS event
WHERE _TABLE_SUFFIX BETWEEN FORMAT_DATE('%Y%m%d',DATE_SUB(CURRENT_DATE(),
INTERVAL 2 DAY))
AND FORMAT_DATE('%Y%m%d',DATE_SUB(CURRENT_DATE(), INTERVAL 1 DAY))
AND event.name='PL_CONTINUE_PL_DETAILFORM'
AND user_dim.user_id in (select * from Step1_USERIDS)
ORDER BY Days2--,Event_Name2,user_id2
--AND event.timestamp_micros>(select event_timestamp1 from Step1 where )
),
Step2_COUNT AS (
select DISTINCT count(*) AS B from Step1,Step2 where
step1.user_id1=step2.user_id2
AND event_timestamp2>event_timestamp1
),
Step2_USERIDS AS (
select user_id2 from Step2
),
Step2_USERS AS (
select COUNT(DISTINCT user_id2) as users from Step2
)
Я хочу подсчитать, сколько пользователей выполнили e1
, а затем e2
. Как этого добиться из приведенных выше данных?