Расширение диапазона дат, чтобы иметь строку для каждого случая, принимая во внимание частоту дозы - PullRequest
0 голосов
/ 01 февраля 2020

Привет! У меня есть набор данных, для которого нужно увеличить даты, чтобы в каждой записи была строка. Проблема более сложная из-за переменной расписания. Есть 19 вариантов планирования (см. Прикрепленное изображение).

Для любых вариантов «nx в день» мне просто нужно, чтобы доза была умножена на количество приемов в день.

Другие варианты планирования через неделю, день и т. Д. c необходимо включить в расширение диапазона дат (старт-стоп).

Структура набора данных

structure(list(id = c(1010002, 1010002, 1010002, 1010002, 1010002, 
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010004, 
1010004, 1010016, 1010021, 1010021, 1010026, 1010032, 1010032, 
1010032, 1010032, 1010055, 1010068, 1010107, 1020094, 2010116, 
2010116, 2010125, 2010125, 3010026, 4010026, 4020144), drug = c("Acetaminophen", 
"Acetaminophen", "Calcium Carbonate", "Cefalexin", "Cotrimoxazole", 
"Dexamethasone", "Dextrose 5%/Sodium Chloride 0.9%/Potassium Chloride 20mmol/L", 
"Lactulose", "Morphine", "Morphine", "Oxycodone Immediate Release", 
"Calcitriol", "Vitamin D3", "Heparin Lock", "CMV Immune Globulin 5%", 
"Heparin Lock", "Cysteamine", "CMV Immune Globulin 5%", "Hydromorphone", 
"Leucovorin", "Lorazepam", "Morphine", "Hydromorphone", "Salbutamol", 
"Lorazepam", "Warfarin", "Warfarin", "Heparin", "Lorazepam", 
"Salbutamol", "Sirolimus", "Hydromorphone"), start = structure(c(1247875200, 
1248048000, 1247702400, 1248652800, 1250121600, 1247875200, 1247788800, 
1248220800, 1247961600, 1247961600, 1248134400, 1235001600, 1235001600, 
1280102400, 1290988800, 1290211200, 1298332800, 1284854400, 1365811200, 
1363651200, 1363651200, 1317513600, 1291939200, 1409875200, 1263513600, 
1367452800, 1367366400, 1454803200, 1451088000, 1420070400, 1372809600, 
1342051200), class = c("POSIXct", "POSIXt"), tzone = "UTC"), 
    stop = structure(c(1250035200, 1248048000, 1249948800, 1249689600, 
    1250121600, 1248134400, 1247875200, 1248307200, 1248048000, 
    1248048000, 1248998400, 1235001600, 1235001600, 1280188800, 
    1290988800, 1290816000, 1298332800, 1287360000, 1367452800, 
    1364083200, 1364169600, 1317686400, 1292371200, 1409875200, 
    1264809600, 1371945600, 1371772800, 1456099200, 1455840000, 
    1420070400, 1373155200, 1342051200), class = c("POSIXct", 
    "POSIXt"), tzone = "UTC"), dose = c(1000, 1000, 200, 1000, 
    160, 8, 150, 10, 4, 15, 5, 0.25, 400, 2250, 2500, 250, 1, 
    2500, 0.25, 12, 2.2, 3, 6, 0.5, 0.25, 1, 2, 130, 1, 1, 0.5, 
    1), units = c("mg", "mg", "mg (ca++)", "mg", "mg (trimethoprim)", 
    "mg", "ml/hr", "ml", "mg", "mg", "mg", "mcg", "IU", "U", 
    "mg", "U", "drop(s)", "mg", "mg", "mg", "mg", "mg", "mg", 
    "ml", "mg", "mg", "mg", "U", "mg", "ml", "mg", "mg"), route = c("Oral", 
    "Oral", "Oral", "Oral", "Oral", "Intravenous", "Intravenous", 
    "Oral", "Intravenous", "Oral", "Oral", "Oral", "Oral", "Injection", 
    "Intravenous", "Injection", "Ophthalmic", "Intravenous", 
    "Intravenous", "Intravenous", "Intravenous", "Intravenous", 
    "Oral", "Inhalation", "Intravenous", "Oral", "Oral", "Intravenous", 
    "Intravenous", "Inhalation", "Oral", "Intravenous"), schedule = c("4x a day", 
    "4x a day", "3x a day", "3x a day", "2x a day", "1x a day", 
    "1x a day", "2x a day", "12x a day", "6x a day", "6x a day", 
    "every other day", "every other day", "Every 7 days", "every other week", 
    "Every 7 days", "24x a day", "every other week", "12x a day", 
    "8x a day", "24x a day", "48x a day", "8x a day", "48x a day", 
    "72x a day", "Every 3 days", "Every 3 days", "96x a day", 
    "96x a day", "72x a day", "every 4 days", "144x a day")), row.names = c(NA, 
-32L), class = c("tbl_df", "tbl", "data.frame"))

Набор данных Необходимая структура (показана первая запись лекарственного средства для расширенного идентификатора 1010002 и рассчитана дейлидоза

structure(list(id = c(1010002, 1010002, 1010002, 1010002, 1010002, 
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 
1010002, 1010002, 1010002, 1010002, 1010002, 1010002, 1010002
), drug = c("Acetaminophen", "Acetaminophen", "Acetaminophen", 
"Acetaminophen", "Acetaminophen", "Acetaminophen", "Acetaminophen", 
"Acetaminophen", "Acetaminophen", "Acetaminophen", "Acetaminophen", 
"Acetaminophen", "Acetaminophen", "Acetaminophen", "Acetaminophen", 
"Acetaminophen", "Acetaminophen", "Acetaminophen", "Acetaminophen", 
"Acetaminophen", "Acetaminophen", "Acetaminophen", "Acetaminophen", 
"Acetaminophen", "Acetaminophen", "Acetaminophen"), start = structure(c(1247875200, 
1247961600, 1248048000, 1248134400, 1248220800, 1248307200, 1248393600, 
1248480000, 1248566400, 1248652800, 1248739200, 1248825600, 1248912000, 
1248998400, 1249084800, 1249171200, 1249257600, 1249344000, 1249430400, 
1249516800, 1249603200, 1249689600, 1249776000, 1249862400, 1249948800, 
1250035200), class = c("POSIXct", "POSIXt"), tzone = "UTC"), 
    dailydose = c(4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 
    4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000, 
    4000, 4000, 4000, 4000, 4000, 4000, 4000, 4000), units = c("mg", 
    "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", 
    "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", "mg", 
    "mg", "mg", "mg", "mg", "mg"), route = c("Oral", "Oral", 
    "Oral", "Oral", "Oral", "Oral", "Oral", "Oral", "Oral", "Oral", 
    "Oral", "Oral", "Oral", "Oral", "Oral", "Oral", "Oral", "Oral", 
    "Oral", "Oral", "Oral", "Oral", "Oral", "Oral", "Oral", "Oral"
    )), row.names = c(NA, -26L), class = c("tbl_df", "tbl", "data.frame"
)) 

1 Ответ

1 голос
/ 01 февраля 2020

Мы можем создать столбец 'dailydose', проанализировав число c часть из 'schedule' и умножив на 'дозу', затем l oop на строки с map2, чтобы создать столбец list из столбцы 'start', 'stop' и unnest

library(dplyr)
library(tidyr)
library(readr)
library(purrr)
df1 %>%
     transmute(id, drug, start, stop, 
              dailydose = dose * parse_number(schedule), units, route) %>%
     mutate(start = map2(start, stop, seq, by = 'day')) %>% 
     select(-stop) %>% 
     unnest(c(start))
# A tibble: 378 x 6
#        id drug          start               dailydose units route
#     <dbl> <chr>         <dttm>                  <dbl> <chr> <chr>
# 1 1010002 Acetaminophen 2009-07-18 00:00:00      4000 mg    Oral 
# 2 1010002 Acetaminophen 2009-07-19 00:00:00      4000 mg    Oral 
# 3 1010002 Acetaminophen 2009-07-20 00:00:00      4000 mg    Oral 
# 4 1010002 Acetaminophen 2009-07-21 00:00:00      4000 mg    Oral 
# 5 1010002 Acetaminophen 2009-07-22 00:00:00      4000 mg    Oral 
# 6 1010002 Acetaminophen 2009-07-23 00:00:00      4000 mg    Oral 
# 7 1010002 Acetaminophen 2009-07-24 00:00:00      4000 mg    Oral 
# 8 1010002 Acetaminophen 2009-07-25 00:00:00      4000 mg    Oral 
# 9 1010002 Acetaminophen 2009-07-26 00:00:00      4000 mg    Oral 
#10 1010002 Acetaminophen 2009-07-27 00:00:00      4000 mg    Oral 
# … with 368 more rows

Если readr недоступно, мы можем извлечь значения с помощью str_extract и преобразовать в numeric, т. е. изменить parse_number(schedule) до as.numeric(stringr::str_extract(schedule, '[0-9]+'))

...