Этот метод использует функцию Snowflake WIDTH_BUCKET и работает правильно на основе сгенерированных мной тестовых данных:
-- Get the min amd max timestamps for each userid, sessionid
WITH T1 AS (
SELECT USERID, SESSIONID,MIN(DATE_TIME) MIN_DATE, MAX(DATE_TIME) MAX_DATE
FROM TEST_DATA
GROUP BY USERID, SESSIONID
),
--Get the number of 'buckets', for each userid/sessionid, to divide the data into by defining the time period
--Hardcoded here as MINUTE and 30
T2 AS (
SELECT USERID, SESSIONID, MIN_DATE, MAX_DATE, CEIL(DATEDIFF(MINUTE, MIN_DATE, MAX_DATE)/30,0) NUM_BUCKETS
FROM T1
),
--Assign each record to the appropriate time period bucket
--WIDTH_BUCKET takes numeric parameters hence the conversion to epoch_seconds
T3 AS (
SELECT TD.USERID, TD.SESSIONID, TD.DATE_TIME
,width_bucket(DATE_PART(EPOCH_SECONDS,TD.DATE_TIME), DATE_PART(EPOCH_SECONDS,T2.MIN_DATE), DATE_PART(EPOCH_SECONDS,T2.MAX_DATE), T2.NUM_BUCKETS) as "TIME_GROUP"
FROM TEST_DATA TD
INNER JOIN T2 ON TD.USERID = T2.USERID AND TD.SESSIONID = T2.SESSIONID
)
--Get the min and make timestamps for each userid, sessionid and bucket combination
SELECT USERID, SESSIONID, MIN(DATE_TIME), MAX(DATE_TIME)
FROM T3
GROUP BY USERID, SESSIONID, TIME_GROUP
order BY USERID, SESSIONID, TIME_GROUP
LIMIT 10
;