Oracle row_number / rank с определенной логикой - PullRequest
3 голосов
/ 03 апреля 2019

Мне нужно выбрать механизм ранжирования строк с помощью row_number или rank. Я пытался использовать оба случая RNK1, RNK2 столбцы, но я не уверен, возможно ли это вообще. Пожалуйста, посмотрите фактический и ожидаемый результат.

with tmp as (
    select 'username1' as username, to_date('2019-04-01 00:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
    select 'username1' as username, to_date('2019-04-01 01:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-01 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-01 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-01 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-02 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-02 01:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-02 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-02 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-02 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual
) 
select 
    tmp.*,
    case when flag = 1 then row_number() over(partition by flag order by flag) else null end as rnk1,
    case when flag = 1 then rank() over(partition by flag order by flag) else null end as rnk2
from tmp
order by startdate, username

Фактический:

+-------------+--------------------+--------+--------+--------+
| "USERNAME"  | "STARTDATE"        | "FLAG" | "RNK1" | "RNK2" |
+-------------+--------------------+--------+--------+--------+
| "username1" | 01-APR-19 00:00:00 | 1      | 6      | 1      |
| "username1" | 01-APR-19 01:00:00 | 0      |        |        |
| "username1" | 01-APR-19 02:00:00 | 1      | 4      | 1      |
| "username1" | 01-APR-19 03:00:00 | 1      | 3      | 1      |
| "username1" | 01-APR-19 04:00:00 | 0      |        |        |
| "username1" | 02-APR-19 01:00:00 | 1      | 5      | 1      |
| "username1" | 02-APR-19 02:00:00 | 1      | 1      | 1      |
| "username1" | 02-APR-19 03:00:00 | 1      | 2      | 1      |
| "username1" | 02-APR-19 04:00:00 | 0      |        |        |
| "username1" | 02-APR-19 05:00:00 | 0      |        |        |
+-------------+--------------------+--------+--------+--------+

Ожидаемое:

+-------------+--------------------+--------+--------+--------+
| "USERNAME"  | "STARTDATE"        | "FLAG" | "RNK1" | "RNK2" |
+-------------+--------------------+--------+--------+--------+
| "username1" | 01-APR-19 00:00:00 | 1      | 1      | 1      |
| "username1" | 01-APR-19 01:00:00 | 0      |        |        |
| "username1" | 01-APR-19 02:00:00 | 1      | 2      | 2      |
| "username1" | 01-APR-19 03:00:00 | 1      | 2      | 2      |
| "username1" | 01-APR-19 04:00:00 | 0      |        |        |
| "username1" | 02-APR-19 01:00:00 | 1      | 3      | 3      |
| "username1" | 02-APR-19 02:00:00 | 1      | 3      | 3      |
| "username1" | 02-APR-19 03:00:00 | 1      | 3      | 3      |
| "username1" | 02-APR-19 04:00:00 | 0      |        |        |
| "username1" | 02-APR-19 05:00:00 | 0      |        |        |
+-------------+--------------------+--------+--------+--------+

Спасибо всем за быстрый ответ. Я начал играть с вашими предложениями и снова застрял

with tmp as (
    select 'username1' as username, to_date('2019-04-01 00:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
    select 'username1' as username, to_date('2019-04-01 01:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual union all
    select 'username1' as username, to_date('2019-04-01 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
    select 'username1' as username, to_date('2019-04-01 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, null as threshold from dual union all
    select 'username1' as username, to_date('2019-04-01 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual union all
    select 'username1' as username, to_date('2019-04-01 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual union all
    select 'username1' as username, to_date('2019-04-02 01:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
    select 'username1' as username, to_date('2019-04-02 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, null as threshold from dual union all
    select 'username1' as username, to_date('2019-04-02 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
    select 'username1' as username, to_date('2019-04-02 04:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, null as threshold from dual union all
    select 'username1' as username, to_date('2019-04-02 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual
) 
select 
    tmp.*,
    dense_rank() over( order by startdate, username, threshold)-
     (case when flag=1 then
     row_number()over(partition by flag, username order by startdate, username) - flag
      else null end) as grp
from tmp
order by 
startdate, username

Фактический:

+-------------+--------------------+--------+-------------+-------+
| "USERNAME"  | "STARTDATE"        | "FLAG" | "THRESHOLD" | "GRP" |
+-------------+--------------------+--------+-------------+-------+
| "username1" | 01-APR-19 00:00:00 | 1      | 1           | 1     |
| "username1" | 01-APR-19 01:00:00 | 0      |             |       |
| "username1" | 01-APR-19 02:00:00 | 1      | 1           | 2     |
| "username1" | 01-APR-19 03:00:00 | 1      |             | 2     |
| "username1" | 01-APR-19 04:00:00 | 0      |             |       |
| "username1" | 01-APR-19 05:00:00 | 0      |             |       |
| "username1" | 02-APR-19 01:00:00 | 1      | 1           | 4     |
| "username1" | 02-APR-19 02:00:00 | 1      |             | 4     |
| "username1" | 02-APR-19 03:00:00 | 1      | 1           | 4     |
| "username1" | 02-APR-19 04:00:00 | 1      |             | 4     |
| "username1" | 02-APR-19 05:00:00 | 0      |             |       |
+-------------+--------------------+--------+-------------+-------+

Ожидаемое:

+-------------+--------------------+--------+-------------+-------+
| "USERNAME"  | "STARTDATE"        | "FLAG" | "THRESHOLD" | "GRP" |
+-------------+--------------------+--------+-------------+-------+
| "username1" | 01-APR-19 00:00:00 | 1      | 1           | 1     |
| "username1" | 01-APR-19 01:00:00 | 0      |             |       |
| "username1" | 01-APR-19 02:00:00 | 1      | 1           | 2     |
| "username1" | 01-APR-19 03:00:00 | 1      |             | 2     |
| "username1" | 01-APR-19 04:00:00 | 0      |             |       |
| "username1" | 01-APR-19 05:00:00 | 0      |             |       |
| "username1" | 02-APR-19 01:00:00 | 1      | 1           | 4     |
| "username1" | 02-APR-19 02:00:00 | 1      |             | 4     |
| "username1" | 02-APR-19 03:00:00 | 1      | 1           | 5     |
| "username1" | 02-APR-19 04:00:00 | 1      |             | 5     |
| "username1" | 02-APR-19 05:00:00 | 0      |             |       |
+-------------+--------------------+--------+-------------+-------+

Ответы [ 4 ]

2 голосов
/ 03 апреля 2019

Предполагая, что вы группируете последовательный флаг = 1 ряд, вы можете сделать это, используя комбинацию техники Tabibitosan и dens_rank, например:

WITH      tmp AS (select 'username1' as username, to_date('2019-04-01 00:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
                  select 'username1' as username, to_date('2019-04-01 01:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual union all
                  select 'username1' as username, to_date('2019-04-01 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
                  select 'username1' as username, to_date('2019-04-01 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
                  select 'username1' as username, to_date('2019-04-01 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual union all
                  select 'username1' as username, to_date('2019-04-02 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual union all
                  select 'username1' as username, to_date('2019-04-02 01:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
                  select 'username1' as username, to_date('2019-04-02 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
                  select 'username1' as username, to_date('2019-04-02 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
                  select 'username1' as username, to_date('2019-04-02 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual),
  tabibitosan AS (SELECT tmp.*,
                         CASE
                           WHEN flag = 1 THEN
                            row_number() over(ORDER BY startdate) - row_number() over(PARTITION BY flag ORDER BY startdate)
                         END grp
                  FROM   tmp)
SELECT username,
       startdate,
       flag,
       CASE
         WHEN flag = 1 THEN
          dense_rank() over(PARTITION BY flag ORDER BY grp)
       END rnk
FROM   tabibitosan
ORDER  BY startdate,
          username;

USERNAME  STARTDATE                 FLAG        RNK
--------- ------------------- ---------- ----------
username1 01/04/2019 00:00:00          1          1
username1 01/04/2019 01:00:00          0 
username1 01/04/2019 02:00:00          1          2
username1 01/04/2019 03:00:00          1          2
username1 01/04/2019 04:00:00          0 
username1 02/04/2019 01:00:00          1          3
username1 02/04/2019 02:00:00          1          3
username1 02/04/2019 03:00:00          1          3
username1 02/04/2019 04:00:00          0 
username1 02/04/2019 05:00:00          0 

Я обновил запрос, чтобы учесть дополнительный столбец порогового значения:

WITH      tmp AS (select 'username1' as username, to_date('2019-04-01 00:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
                  select 'username1' as username, to_date('2019-04-01 01:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual union all
                  select 'username1' as username, to_date('2019-04-01 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
                  select 'username1' as username, to_date('2019-04-01 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, null as threshold from dual union all
                  select 'username1' as username, to_date('2019-04-01 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual union all
                  select 'username1' as username, to_date('2019-04-01 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual union all
                  select 'username1' as username, to_date('2019-04-02 01:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, null as threshold from dual union all
                  select 'username1' as username, to_date('2019-04-02 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
                  select 'username1' as username, to_date('2019-04-02 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
                  select 'username1' as username, to_date('2019-04-02 04:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, null as threshold from dual union all
                  select 'username1' as username, to_date('2019-04-02 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual),
  tabibitosan AS (SELECT tmp.*,
                         CASE
                           WHEN flag = 1 THEN
                            row_number() over(PARTITION BY username ORDER BY startdate) - row_number() over(PARTITION BY username, flag ORDER BY startdate)
                         END grp,
                         SUM(CASE WHEN flag = 1 THEN threshold END) OVER (PARTITION BY username, flag ORDER BY startdate) threshold_sum -- assumes threshold is 1 or null; change the case statement inside the sum if this isn't the case
                  FROM   tmp)
SELECT username,
       startdate,
       flag,
       threshold,
       CASE
         WHEN flag = 1 THEN
          dense_rank() over(PARTITION BY flag ORDER BY grp, threshold_sum)
       END rnk
FROM   tabibitosan
ORDER  BY startdate,
          username;

USERNAME  STARTDATE         FLAG  THRESHOLD        RNK
--------- ----------- ---------- ---------- ----------
username1 01/04/2019           1          1          1
username1 01/04/2019           0            
username1 01/04/2019           1          1          2
username1 01/04/2019           1                     2
username1 01/04/2019           0            
username1 01/04/2019           0            
username1 02/04/2019           1                     3
username1 02/04/2019           1          1          4
username1 02/04/2019           1          1          5
username1 02/04/2019           1                     5
username1 02/04/2019           0            

Обратите внимание, что столбец порогового значения может быть только 1 или нулевым;если это не так, вам придется обновить условную сумму в соответствии с вашими данными.

Я также обновил разделение аналитических функций, включив в него столбец имени пользователя, поскольку я предполагаю, что этопервичный ключ для данных.

Наконец, обратите внимание, что я немного изменил ваши выборочные данные, чтобы показать, что если порог равен нулю для первой строки в группе с флагом = 1, он будет сам по себегруппа, если в следующей строке установлен порог.Если это не то поведение, которое вы желаете, вам нужно обновить свой вопрос с помощью логики, которую вы желаете.

1 голос
/ 03 апреля 2019
with tmp as (
select 'username1' as username, to_date('2019-04-01 00:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
select 'username1' as username, to_date('2019-04-01 01:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-01 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
select 'username1' as username, to_date('2019-04-01 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-01 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-01 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-02 01:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
select 'username1' as username, to_date('2019-04-02 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-02 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, 1 as threshold from dual union all
select 'username1' as username, to_date('2019-04-02 04:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag, null as threshold from dual union all
select 'username1' as username, to_date('2019-04-02 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag, null as threshold from dual
) 
select tmp.*, decode(flag, 1, count(threshold) over (partition by username order by startdate)) rn
from tmp;

USERNAME  STARTDATE                 FLAG  THRESHOLD         RN
--------- ------------------- ---------- ---------- ----------
username1 2019-04-01 00:00:00          1          1          1
username1 2019-04-01 01:00:00          0                      
username1 2019-04-01 02:00:00          1          1          2
username1 2019-04-01 03:00:00          1                     2
username1 2019-04-01 04:00:00          0                      
username1 2019-04-01 05:00:00          0                      
username1 2019-04-02 01:00:00          1          1          3
username1 2019-04-02 02:00:00          1                     3
username1 2019-04-02 03:00:00          1          1          4
username1 2019-04-02 04:00:00          1                     4
username1 2019-04-02 05:00:00          0                      

11 rows selected.
1 голос
/ 03 апреля 2019

попробуйте как показано ниже

with tmp as (
    select 'username1' as username, to_date('2019-04-01 00:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
    select 'username1' as username, to_date('2019-04-01 01:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-01 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-01 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-01 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-02 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-02 01:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-02 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-02 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
    select 'username1' as username, to_date('2019-04-02 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual
) 
select 
    tmp.*,
    dense_rank() over( order by startdate, username)-
     (case when flag=1 then
     row_number()over(partition by flag, username order by startdate, username) - flag
      else null end) as grp


from tmp
order by 
startdate, username


USERNAME    STARTDATE   FLAG    GRP
username1   01-APR-19   1       1
username1   01-APR-19   0   
username1   01-APR-19   1       2
username1   01-APR-19   1       2
username1   01-APR-19   0   
username1   02-APR-19   1       3
username1   02-APR-19   1       3
username1   02-APR-19   1       3
username1   02-APR-19   0   
username1   02-APR-19   0   

Демо онлайн

0 голосов
/ 03 апреля 2019
with tmp as (
select 'username1' as username, to_date('2019-04-01 00:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag from dual union all
select 'username1' as username, to_date('2019-04-01 01:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual union all
select 'username1' as username, to_date('2019-04-01 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
select 'username1' as username, to_date('2019-04-01 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
select 'username1' as username, to_date('2019-04-01 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual union all
select 'username1' as username, to_date('2019-04-02 05:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual union all
select 'username1' as username, to_date('2019-04-02 01:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
select 'username1' as username, to_date('2019-04-02 02:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
select 'username1' as username, to_date('2019-04-02 03:00','YYYY-MM-DD HH24:MI') as startdate, 1 as flag  from dual union all
select 'username1' as username, to_date('2019-04-02 04:00','YYYY-MM-DD HH24:MI') as startdate, 0 as flag  from dual
) 
select username, startdate, flag, decode(flag, 1, match_num) as rn
from tmp
match_recognize(
partition by username
order by startdate
measures match_number() AS match_num
all rows per match
pattern (s* f*)
define f as f.flag = 1, s as s.flag = 0
);

USERNAME  STARTDATE                 FLAG         RN
--------- ------------------- ---------- ----------
username1 2019-04-01 00:00:00          1          1
username1 2019-04-01 01:00:00          0           
username1 2019-04-01 02:00:00          1          2
username1 2019-04-01 03:00:00          1          2
username1 2019-04-01 04:00:00          0           
username1 2019-04-02 01:00:00          1          3
username1 2019-04-02 02:00:00          1          3
username1 2019-04-02 03:00:00          1          3
username1 2019-04-02 04:00:00          0           
username1 2019-04-02 05:00:00          0           

10 rows selected. 
...