Разбиение в SQL - PullRequest
       8

Разбиение в SQL

0 голосов
/ 07 февраля 2019

Я пытался найти среднее число раз, когда учетная запись (человек) была замечена (appt_id) провайдером (provider_code) за определенный период.Прикрепленный SQL-код не захватывает все коды provider_codes, и я не могу понять, почему.Желаемый результат заключается в том, что все коды provider_code указаны со средним числом.

* У меня нет доступа к MS SQL Server 2012 или новее - да, мы сильно отстали от времени, и да, это действительно делает жизнь намного сложнее,

Sample_data

SELECT
   provider_code, office_location,
  CONVERT(INT, count(account)) AS Median

FROM
(
   SELECT
      office_location,provider_code,
   account,appt_date,dept_code,appt_status,appt_class,
      ROW_NUMBER( ) OVER (
         PARTITION BY office_location,provider_code
         ORDER BY account ASC) as RowAsc, 

      ROW_NUMBER( ) OVER (
         PARTITION BY office_location,provider_code
         ORDER BY account DESC) as RowDesc

   FROM appointments_view WITH(NOLOCK)


WHERE account IS NOT NULL AND appt_date BETWEEN '1/1/17' /*24 month prior*/ AND '1/1/19' 


 ) X


WHERE 
   RowAsc IN (RowDesc, RowDesc - 1, RowDesc + 1)


GROUP BY office_location,provider_code
ORDER BY office_location,provider_code

1 Ответ

0 голосов
/ 07 февраля 2019

Для медианы вы можете использовать оконную функцию PERCENTILE_CONT или PERCENTILE_DISC
(MS Sql Server 2012 +)

Пример фрагмента:

declare @Appointments table (
 appt_id int primary key identity(4046100,1), 
 appt_date date not null default GetDate(), 
 account int not null, 
 provider_code varchar(10) not null, 
 office_location char(3) not null default 'REN', 
 appt_class char(3) not null 
 );

insert into @Appointments (appt_date, account, provider_code, appt_class) values
('2019-02-01',100001,'FOO1','IND'),('2019-02-01',100002,'FOO1','IND'),('2019-02-01',100002,'FOO1','PSY'),('2019-02-01',100002,'FOO1','IND'),
('2019-02-01',100002,'FOO1','IND'),('2019-02-01',100003,'FOO1','IND'),('2019-02-01',100003,'FOO1','IND'),('2019-02-01',100003,'FOO1','IND');

select provider_code, office_location, MAX(MedianContTotalAppointments) AS MedianApt
from
(
    select provider_code, office_location, account
    , count(appt_id) as TotalAppointments
    , PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY count(appt_id)) OVER (PARTITION BY provider_code, office_location) AS MedianContTotalAppointments
    -- , PERCENTILE_DISC(0.5) WITHIN GROUP (ORDER BY count(*)) OVER (PARTITION BY provider_code, office_location) AS MedianDiscTotalAppointments
    from @Appointments
    where account IS NOT NULL 
      and appt_date BETWEEN cast('2017-02-01' as date) AND cast('2019-02-01' as date)
    group by provider_code, office_location, account
) q
group by provider_code, office_location
order by provider_code, office_location;

Возвращает:

provider_code   office_location     MedianApt
FOO1            REN                 3

В версии MS Sql Server до 2012 года этот пример фрагмента может работать:

declare @Appointments table (
 appt_id int primary key identity(4046100,1), 
 appt_date date not null default GetDate(), 
 account int not null, 
 provider_code varchar(10) not null, 
 office_location char(3) not null default 'REN', 
 appt_class char(3) not null 
 );

insert into @Appointments (appt_date, account, provider_code, appt_class) values
 ('2019-02-01',100001,'FOO1','IND'),('2019-02-01',100002,'FOO1','IND'),('2019-02-01',100002,'FOO1','PSY'),('2019-02-01',100002,'FOO1','IND')
,('2019-02-01',100002,'FOO1','IND'),('2019-02-01',100003,'FOO1','IND'),('2019-02-01',100003,'FOO1','IND'),('2019-02-01',100003,'FOO1','IND')
--,('2019-02-01',100004,'FOO1','IND'),('2019-02-01',100004,'FOO1','IND')
;

select provider_code, office_location, AVG(TotalAppointments) AS MedianApt
from
(
    select provider_code, office_location, account
    , COUNT(appt_id) as TotalAppointments
    , ROW_NUMBER() OVER (PARTITION BY provider_code, office_location ORDER BY COUNT(appt_id) ASC) AS rn
    , COUNT(*) OVER (PARTITION BY provider_code, office_location) AS cnt
    from @Appointments
    where account IS NOT NULL 
      and appt_date BETWEEN cast('2017-02-01' as date) AND cast('2019-02-01' as date)
    group by provider_code, office_location, account
) q
where rn in (FLOOR((cnt+1)*0.5), CEILING((cnt+1)*0.5))
group by provider_code, office_location
order by provider_code, office_location;
...