Я конвертирую законный SQL-запрос в стандартный SQL-запрос в Bigquery, чтобы рассчитать показатель отказов Google Analytics.Но при преобразовании запроса есть некоторая разница в выходном результате.
Устаревший SQL-запрос
SELECT session_bounceRate,
t1.source as source,t1.medium as medium,
total_session
FROM (
SELECT
IFNULL(t1.session_bounceSessionCount, 0) / t2.session_sessionId_distinct_count AS session_bounceRate,
t1.source,t1.medium,
t2.total_session_distinct_count AS total_session
FROM (
SELECT
INTEGER(session_bounceSession_distinct_count) AS session_bounceSessionCount,
source,
medium
FROM (
SELECT
COUNT(DISTINCT session_sessionId, 10000000) AS session_bounceSession_distinct_count, --Changes done (Count(x) in legacy sql stands for approx ,
source, --,so replaced it with APPROX_COUNT_DISTINCT to get approx count
medium from (
SELECT
SUM(IF(session_hitsType = 'event'
AND session_isInteraction_first = 1, 1, 0)) AS session_isEventInteraction_sum,
session_sessionId AS session_sessionId,
SUM(session_pageViews_sum) AS session_pageViews_sum_sum,
source,
medium
FROM (
SELECT
hits.type AS session_hitsType,
sessionId AS session_sessionId,
SUM(totals.pageviews) AS session_pageViews_sum,
FIRST(hits.isInteraction) AS session_isInteraction_first,
trafficSource.source AS source,
trafficSource.medium AS medium,
FROM
TABLE_DATE_RANGE([[test:test.session_streaming_], TIMESTAMP('2018-04-01'), TIMESTAMP('2018-04-30')) AS session_streaming
GROUP BY
source,
medium,
session_hitsType,
session_sessionId )
GROUP BY
source,
medium,
session_sessionId )
WHERE
(session_isEventInteraction_sum = 0
AND session_pageViews_sum_sum = 1)
GROUP BY
source,
medium ) ) AS t1
JOIN EACH (
SELECT
COUNT(DISTINCT sessionId, 10000000) AS session_sessionId_distinct_count,
trafficSource.source AS source,
trafficSource.medium AS medium,
COUNT(DISTINCT sessionId, 10000000) AS total_session_distinct_count
FROM
TABLE_DATE_RANGE([test:Test.session_streaming_], TIMESTAMP('2018-04-01'), TIMESTAMP('2018-04-30')) AS session_streaming
GROUP BY
source,
medium ) AS t2
ON
t1.source = t2.source
and t1.medium=t2.medium)
where t1.medium='zadv_display'
ORDER BY
total_session DESC
Стандартный SQL-запрос
SELECT
IFNULL(t1.session_bounceSessionCount, 0) / t2.session_sessionId_distinct_count AS session_bounceRate,
t1.source,
t1.medium,
t2.total_session_distinct_count AS total_session
FROM (
SELECT
CAST(session_bounceSession_distinct_count AS INT64) AS session_bounceSessionCount,
source,
medium
FROM (
SELECT
APPROX_COUNT_DISTINCT(DISTINCT session_sessionId) AS session_bounceSession_distinct_count,
source,
medium
FROM (
SELECT
SUM(IF(session_hitsType = 'event'
AND session_isInteraction_first = 1, 1, 0)) AS session_isEventInteraction_sum,
session_sessionId AS session_sessionId,
SUM(session_pageViews_sum) AS session_pageViews_sum_sum,
source,
medium
FROM (
SELECT
session_hitsType,
session_sessionId,
source,
medium,
CASE
WHEN session_isInteraction_first = TRUE THEN 1
ELSE 0
END AS session_isInteraction_first,
SUM(session_pageViews_sum) AS session_pageViews_sum
FROM (
SELECT
hits.Type AS session_hitsType,
sessionId AS session_sessionId,
trafficSource.source AS source,
trafficSource.medium AS medium,
totals.pageviews AS session_pageViews_sum,
FIRST_VALUE(hits.isInteraction) OVER(PARTITION BY sessionId ORDER BY TIMESTAMP_SECONDS(hits.Time)) AS session_isInteraction_first
FROM
`test.Test.session_streaming_*`,
unNEST(hits) hits
WHERE
_table_suffix BETWEEN '20180401'
AND '20180430' )
GROUP BY
session_hitsType,
session_sessionId,
source,
medium,
session_isInteraction_first )
GROUP BY
source,
medium,
session_sessionId)
WHERE
(session_isEventInteraction_sum = 0
AND session_pageViews_sum_sum = 1)
GROUP BY
source,
medium)) AS t1
JOIN (
SELECT
APPROX_COUNT_DISTINCT(DISTINCT sessionId) AS session_sessionId_distinct_count,
trafficSource.source AS source,
trafficSource.medium AS medium,
APPROX_COUNT_DISTINCT(DISTINCT sessionId) AS total_session_distinct_count
FROM
`test.Test.session_streaming_*`
WHERE _table_suffix BETWEEN '20180401' AND '20180430'
GROUP BY
source,
medium
) AS t2
ON
t1.source = t2.source
AND t1.medium=t2.medium
where t1.medium='zadv_display'
order by total_session desc
Мы должны заменить функцию First взначение с First_value в стандартном sql, которое является заметным изменением, внесенным в запрос.
Может кто-нибудь подсказать мне, есть ли какая-то проблема при преобразовании, поскольку стандартный вывод запроса sql должен совпадать с устаревшим выводом?