Несколько мыслей и комментариев ...
Есть много способов сделать дату-время только значением даты.Я использую DATEADD (DAY, DATEDIFF (DAY, 0,), 0).Но для следующего кода я предлагаю сделать вид, что есть поле justDate, чтобы сделать вещи короче:)
Последовательность важна, а в таблице нет "идентификатор последовательности ".ROW_NUMBER () может дать вам это ...
ROW_NUMBER() OVER (PARTITION BY parameter_id, justDate ORDER BY sampledate) AS "sequence_id"
Кажется, есть несколько способов сделать это.
Для каждого образцаприсоединитесь к следующему образцу пять раз.Мне это не нравится, но это, наверное, самый простой вариант ...
WITH
sequenced_samples
AS
(
SELECT
parameterID AS "parameter_id",
sampledate AS "sample_date_time",
DATEADD(DAY, DATEDIFF(DAY, 0, sampledate), 0) AS "sample_date",
ROW_NUMBER() OVER (PARTITION BY parameter_id, DATEADD(DAY, DATEDIFF(DAY, 0, sampledate), 0) ORDER BY sampledate) AS "sequence_id",
CASE WHEN valid = 1 THEN value ELSE NULL END as "value",
-(valid - 1) AS "invalid" -- turns 1 to 0, and 0 to 1
FROM
samples
)
SELECT
"sample_1".parameter_id,
"sample_1".sample_date,
AVG(value) AS average_value
FROM
samples "sample_1"
LEFT JOIN
samples "sample_2"
ON "sample_2".parameter_id = "sample_1".parameter_id
AND "sample_2".sample_date = "sample_1".sample_date
AND "sample_2".sequence_id = "sample_1".sequence_id + 1
LEFT JOIN
samples "sample_3"
ON "sample_3".parameter_id = "sample_1".parameter_id
AND "sample_3".sample_date = "sample_1".sample_date
AND "sample_3".sequence_id = "sample_1".sequence_id + 2
LEFT JOIN
samples "sample_4"
ON "sample_4".parameter_id = "sample_1".parameter_id
AND "sample_4".sample_date = "sample_1".sample_date
AND "sample_4".sequence_id = "sample_1".sequence_id + 3
LEFT JOIN
samples "sample_5"
ON "sample_5".parameter_id = "sample_1".parameter_id
AND "sample_5".sample_date = "sample_1".sample_date
AND "sample_5".sequence_id = "sample_1".sequence_id + 4
GROUP BY
"sample_1".parameter_id,
"sample_1".sample_date
HAVING
5 > MAX("sample_1".invalid + "sample_2".invalid + "sample_3".invalid + "sample_4".invalid + "sample_5".invalid)
AND 17 < (COUNT(*) - SUM("sample_1".invalid))
Следующее немного более изящно (но немного), но я несидел где-нибудь с доступом к MS SQL Server, поэтому я не могу сказать, является ли он более производительным.
Вместо 4 объединений, присоединяйтесь только один раз, но сопоставляя 5 последовательных выборок.Выполнение двух уровней группировки.
WITH
sequenced_samples
AS
(
SELECT
parameterID AS "parameter_id",
sampledate AS "sample_date_time",
DATEADD(DAY, DATEDIFF(DAY, 0, sampledate), 0) AS "sample_date",
ROW_NUMBER() OVER (PARTITION BY parameter_id, DATEADD(DAY, DATEDIFF(DAY, 0, sampledate), 0) ORDER BY sampledate) AS "sequence_id",
CASE WHEN valid = 1 THEN value ELSE NULL END AS "value",
-(valid - 1) AS "invalid" -- Turns 0 to 1, and 1 to 0
FROM
samples
)
,
checked_samples
AS
(
SELECT
"sample".parameter_id,
"sample".sample_date,
"sample".value,
"sample".invalid,
SUM("next_5_samples".invalid) AS "sequence_invalidity"
FROM
samples "sample"
INNER JOIN
samples "next_5_samples"
ON "next_5_samples".parameter_id = "sample".parameter_id
AND "next_5_samples".sample_date = "sample".sample_date
AND "next_5_samples".sequence_id >= "sample".sequence_id + 1
AND "next_5_samples".sequence_id <= "sample".sequence_id + 4
GROUP BY
"sample".parameter_id,
"sample".sample_date,
"sample".valid,
"sample".value
)
SELECT
parameter_id,
sample_date,
AVG(value)
FROM
checked_samples
GROUP BY
parameter_id,
sample_date
HAVING
5 > MAX(sequence_invalidity)
AND 17 < (COUNT(*) - SUM(invalid))
Последний вариант - использовать рекурсивные выражения общих таблиц для циклического прохождения записей по одной.Это сложнее для кода, чем курсора, но (по моему опыту) гораздо быстрее.
РЕДАКТИРОВАТЬ: Следующий запрос имеллевое соединение в рекурсивном CTE, и теперь это не так.
WITH
sequenced_samples
AS
(
SELECT
parameterID AS "parameter_id",
sampledate AS "sample_date_time",
DATEADD(DAY, DATEDIFF(DAY, 0, sampledate), 0) AS "sample_date",
ROW_NUMBER() OVER (PARTITION BY parameter_id, DATEADD(DAY, DATEDIFF(DAY, 0, sampledate), 0) ORDER BY sampledate) AS "sequence_id",
value,
valid
FROM
samples
)
,
recursed_totals
AS
(
SELECT
parameter_id,
sample_date,
sequence_id - 1 AS "next_sequence_id",
CASE WHEN valid = 1 THEN value ELSE 0 END AS "cumulative_value",
valid AS "cumulative_count",
CASE WHEN valid = 1 THEN 0 ELSE 1 END AS "cumulative_invalid",
CASE WHEN valid = 1 THEN 0 ELSE 1 END AS "max_cumulative_invalid"
FROM
sequenced_samples
WHERE
sequence_id = (
SELECT
COUNT(*)
FROM
sequenced_samples "look_up"
WHERE
"look_up".parameter_id = sequenced_samples.parameter_id
AND "look_up".sample_date = sequenced_samples.sample_date
)
UNION ALL
SELECT
"cumulative_samples".parameter_id,
"cumulative_samples".sample_date,
"next_sample".sequence_id - 1,
"cumulative_samples".cumuatlive_value + CASE WHEN "next_sample".valid = 1 THEN "next_sample".value ELSE 0 END,
"cumulative_samples".valid + ISNULL("next_sample".valid, 0),
CASE
WHEN "next_sample".valid = 0 THEN "cumulative_samples".cumulative_invalid + 1
WHEN "cumulative_samples".cumulative_invalid = 5 THEN 5
ELSE 0
END,
CASE
WHEN "next_sample".valid = 1 THEN "cumulative_samples".max_cumulative_invalid
WHEN "cumulative_samples".cumulative_invalid = "cumulative_samples".max_cumulative_invalid THEN "cumulative_samples".max_cumulative_invalid + 1
ELSE "cumulative_samples".max_cumulative_invalid
END
FROM
recursed_totals AS "cumulative_samples"
INNER JOIN
sequenced_samples AS "next_sample"
ON "next_sample".parameter_id = "cumulative_samples".parameter_id
AND "next_sample".sample_date = "cumulative_samples".sample_date
AND "next_sample".sequence_id = "cumulative_samples".next_sequence_id
)
SELECT
parameter_id,
sample_date,
CAST(cumulative_value AS FLOAT) / CAST(cumulative_count AS FLOAT) AS "average",
cumulative_count AS "valid_samples",
max_cumulative_invalid AS "max_consecutive_invalid_samples"
FROM
recursed_totals
WHERE
parameter_id = @parameter_id