Как найти пропущенные даты в ежемесячных рядах данных? - PullRequest
1 голос
/ 21 февраля 2020

Я работаю с ежемесячной инвентаризацией виджетов с редкими пропущенными месяцами данных. Я хотел бы провести анализ типа «пробелы и острова», но не могу заставить реализацию работать должным образом (см. Sqlfiddle). Я пытаюсь создать новые столбцы, в которых конкретно указываются промежуток и даты начала и окончания острова:

http://www.sqlfiddle.com/#! 18 / a212a / 2

Любая помощь будет принята с благодарностью

Ответы [ 3 ]

1 голос
/ 21 февраля 2020

Я думаю, вам нужно следующее. SQL скрипка

  WITH StartingPoints AS
     (SELECT ID, OCCURRANCE, ROW_NUMBER() OVER(ORDER BY id, OCCURRANCE) AS rn
        FROM dates_test A
       WHERE NOT EXISTS (SELECT *
                FROM dates_test B
               WHERE B.ID = A.ID
                 AND EXTRACT(month FROM B.OCCURRANCE) =
                     EXTRACT(month FROM A.OCCURRANCE) - 1
                 and EXTRACT(year FROM B.OCCURRANCE) =
                     EXTRACT(year FROM A.OCCURRANCE))),
    EndingPoints AS
     (SELECT ID, OCCURRANCE, ROW_NUMBER() OVER(ORDER BY id, OCCURRANCE) AS rn
        FROM dates_test A
       WHERE NOT EXISTS (SELECT *
                FROM dates_test B
               WHERE B.ID = A.ID
                 AND EXTRACT(month FROM B.OCCURRANCE) =
                     EXTRACT(month FROM A.OCCURRANCE) + 1
                 and EXTRACT(year FROM B.OCCURRANCE) =
                     EXTRACT(year FROM A.OCCURRANCE)))
    SELECT S.ID,
           EXTRACT(month FROM S.OCCURRANCE) AS start_range,
           EXTRACT(month FROM E.OCCURRANCE) AS end_range
      FROM StartingPoints S
      JOIN EndingPoints E
        ON E.ID = S.ID
       AND E.rn = S.rn;

вы можете найти начальный интервал и конечный разрыв ниже

                        WITH StartingPoints AS
     (SELECT ID, OCCURRANCE, ROW_NUMBER() OVER(ORDER BY id, OCCURRANCE) AS rn
        FROM dates_test A
       WHERE NOT EXISTS (SELECT *
                FROM dates_test B
               WHERE B.ID = A.ID
                 AND EXTRACT(month FROM B.OCCURRANCE) =
                     EXTRACT(month FROM A.OCCURRANCE) - 1
                 and EXTRACT(year FROM B.OCCURRANCE) =
                     EXTRACT(year FROM A.OCCURRANCE))),
    EndingPoints AS
     (SELECT ID, OCCURRANCE, ROW_NUMBER() OVER(ORDER BY id, OCCURRANCE) AS rn
        FROM dates_test A
       WHERE NOT EXISTS (SELECT *
                FROM dates_test B
               WHERE B.ID = A.ID
                 AND EXTRACT(month FROM B.OCCURRANCE) =
                     EXTRACT(month FROM A.OCCURRANCE) + 1
                 and EXTRACT(year FROM B.OCCURRANCE) =
                     EXTRACT(year FROM A.OCCURRANCE))),
    MissingPoints AS
     (SELECT S.ID,
             EXTRACT(month FROM S.OCCURRANCE) AS start_range,
             EXTRACT(month FROM E.OCCURRANCE) AS end_range,
             EXTRACT(YEAR FROM E.OCCURRANCE) YEAR_of_OCCR
        FROM StartingPoints S
        JOIN EndingPoints E
          ON E.ID = S.ID
         AND E.rn = S.rn),
    i1 as
     (select level num from dual connect by level <= 12),
    ms11 as
     (select ID,
             start_range,
             end_range,
             lead(start_range, 1, 0) OVER(ORDER BY id, year_of_occr, start_range, end_range) as am_i_ms,
             lead(year_of_occr, 1, 0) OVER(ORDER BY id, year_of_occr) as miss_year,
             year_of_occr
        from MissingPoints),
    miss_month1 as
     (select id,
             start_range,
             end_range,
             DECODE(end_range + num, 13, 0, 14, 0, end_range + num) missing_month,
             year_of_occr
        from ms11, i1
       where ((end_range + num < am_i_ms or sTART_Range = end_range) and
             end_range + num <= 14) or (year_of_occr<> miss_year and am_i_ms >=0 and  am_i_ms <=12 and end_range + num <= 14)
       order by year_of_occr, missing_month),
    miss_month as
     (select *
        from miss_month1 A
       where not exists
       (select 1
                from miss_month1 B
               where A.ID = B.ID
                 AND (A.missing_month = B.start_range AND
                     A.missing_month = B.end_range)
                 and A.year_of_occr = B.year_of_occr)
         and decode(end_range, 12, -1, end_range) < missing_month),
    StartingmisPoints AS
     (SELECT A.*,
             ROW_NUMBER() OVER(ORDER BY id, year_of_occr, end_range, missing_month) AS rn
        FROM miss_month A
       WHERE NOT EXISTS (SELECT *
                FROM miss_month B
               WHERE B.ID = A.ID
                 and b.start_range = a.start_range
                 and b.end_range = a.end_range
                 AND B.missing_month = A.missing_month - 1
                 and b.year_of_occr = a.year_of_occr)),
    EndingmisPoints AS
     (SELECT A.*,
             ROW_NUMBER() OVER(ORDER BY id, year_of_occr, end_range, missing_month) AS rn
        FROM miss_month A
       WHERE NOT EXISTS (SELECT *
                FROM miss_month B
               WHERE B.ID = A.ID
                 AND B.missing_month = A.missing_month + 1
                 and b.start_range = a.start_range
                 and b.end_range = a.end_range
                 and b.year_of_occr = a.year_of_occr))
    SELECT distinct  S.ID,
           S.start_range,
           S.end_range,
           S.missing_month start_gap_range,
           E.missing_month end_gap_range,
           E.year_of_occr
      FROM StartingmisPoints S
      JOIN EndingmisPoints E
        ON E.ID = S.ID
       AND E.rn = S.rn
0 голосов
/ 21 февраля 2020

Вы можете использовать функцию LEAD analyti c, чтобы найти пропуски:

SELECT id,
       ADD_MONTHS( date_from, 1 ) AS missing_from,
       date_to - INTERVAL '1' SECOND AS missing_to
FROM   (
  SELECT id,
         TRUNC( occurrance, 'MM' ) AS date_from,
         LEAD( TRUNC( occurrance, 'MM' ) )
           OVER ( PARTITION BY id ORDER BY TRUNC( occurrance, 'MM' ) )
           AS date_to
  FROM   dates_test
)
WHERE  ADD_MONTHS( date_from, 1 ) < date_to
ORDER BY id, date_from;

Итак, для ваших тестовых данных:

CREATE TABLE dates_test ( id, value, occurrance ) As
-- march and september and october are missing for both IDs-
SELECT 1, 10, DATE '2014-01-03' FROM DUAL UNION ALL
SELECT 1, 10, DATE '2014-02-03' FROM DUAL UNION ALL
SELECT 1, 10, DATE '2014-04-01' FROM DUAL UNION ALL
SELECT 1, 10, DATE '2014-05-01' FROM DUAL UNION ALL
SELECT 1, 10, DATE '2014-06-01' FROM DUAL UNION ALL
SELECT 1, 10, DATE '2014-07-01' FROM DUAL UNION ALL
SELECT 1, 10, DATE '2014-08-01' FROM DUAL UNION ALL
SELECT 1, 10, DATE '2014-11-07' FROM DUAL UNION ALL
SELECT 1, 10, DATE '2014-12-07' FROM DUAL UNION ALL
SELECT 1, 10, DATE '2015-01-07' FROM DUAL UNION ALL
SELECT 2, 10, DATE '2014-01-03' FROM DUAL UNION ALL
SELECT 2, 10, DATE '2014-02-03' FROM DUAL UNION ALL
SELECT 2, 10, DATE '2014-04-01' FROM DUAL UNION ALL
SELECT 2, 10, DATE '2014-05-01' FROM DUAL UNION ALL
SELECT 2, 10, DATE '2014-06-01' FROM DUAL UNION ALL
SELECT 2, 10, DATE '2014-07-01' FROM DUAL UNION ALL
SELECT 2, 10, DATE '2014-08-01' FROM DUAL UNION ALL
SELECT 2, 10, DATE '2014-11-07' FROM DUAL UNION ALL
SELECT 2, 10, DATE '2014-12-07' FROM DUAL UNION ALL
SELECT 2, 10, DATE '2015-01-07' FROM DUAL;

Это выводит:

ID | MISSING_FROM        | MISSING_TO         
-: | :------------------ | :------------------
 1 | 2014-03-01 00:00:00 | 2014-03-31 23:59:59
 1 | 2014-09-01 00:00:00 | 2014-10-31 23:59:59
 2 | 2014-03-01 00:00:00 | 2014-03-31 23:59:59
 2 | 2014-09-01 00:00:00 | 2014-10-31 23:59:59

дБ <> скрипка здесь

0 голосов
/ 21 февраля 2020

С некоторыми изменениями я преобразовал ваш запрос в -

WITH StartingPoints AS
(
    SELECT ID, OCCURRANCE, ROW_NUMBER() OVER(ORDER BY OCCURRANCE) AS rn
    FROM dates_test A
    WHERE NOT EXISTS (
        SELECT *
        FROM dates_test B
        WHERE B.ID = A.ID AND EXTRACT(month FROM B.OCCURRANCE) = EXTRACT(month FROM A.OCCURRANCE) - 1)
),
EndingPoints AS
(
    SELECT ID, OCCURRANCE, ROW_NUMBER() OVER(ORDER BY OCCURRANCE) AS rn
    FROM dates_test A
    WHERE NOT EXISTS (
        SELECT *
        FROM dates_test B
        WHERE B.ID = A.ID AND EXTRACT(month FROM B.OCCURRANCE) = EXTRACT(month FROM A.OCCURRANCE) - 1)
)
SELECT S.ID, EXTRACT(month FROM S.OCCURRANCE) AS start_range, EXTRACT(month FROM E.OCCURRANCE) AS end_range
FROM StartingPoints S
JOIN EndingPoints E ON E.ID = S.ID AND E.rn = S.rn;

Из вашей скрипки кажется, что вы используете Oracle и Oracle не поддерживает псевдонимы таблиц с ключевым словом "AS" , Если это не ваш ожидаемый результат, пожалуйста, поделитесь ожидаемым результатом также.

Демо .

...