library(tidyverse)
data <- tibble::tribble(
~id, ~gender, ~birth_year, ~admit_year,
1, "M", 1973, 2014,
2, "F", 1974, 2016,
3, "M", 1958, 2013,
2, "F", 1974, 2017,
1, "M", 1973, 2011,
1, "M", 1973, 2020,
1, "M", 1973, 2018,
2, "F", 1974, 2009,
)
data
# A tibble: 8 x 4
id gender birth_year admit_year
<dbl> <chr> <dbl> <dbl>
1 1 M 1973 2014
2 2 F 1974 2016
3 3 M 1958 2013
4 2 F 1974 2017
5 1 M 1973 2011
6 1 M 1973 2020
7 1 M 1973 2018
8 2 F 1974 2009
для сохранения первой и последней строки (год первого допуска и год последнего допуска) по идентификатору
df <- data %>%
# I will keep the patient with the last admit year
arrange(admit_year) %>%
# I group by id
group_by(id) %>%
# to keep the first and last row (first admit year and last admit year) by id
slice(unique(c(1, n())))
df
# A tibble: 5 x 4
# Groups: id [3]
id gender birth_year admit_year
<dbl> <chr> <dbl> <dbl>
1 1 M 1973 2011
2 1 M 1973 2020
3 2 F 1974 2009
4 2 F 1974 2017
5 3 M 1958 2013
для сохранения последней строки (год последнего допуска) по идентификатору
df2 <- data %>%
# I will keep the patient with the last admit year
arrange(admit_year) %>%
# I group by id
group_by(id) %>%
# to keep the last row (last admit year) by id
slice(n())
df2
# A tibble: 3 x 4
# Groups: id [3]
id gender birth_year admit_year
<dbl> <chr> <dbl> <dbl>
1 1 M 1973 2020
2 2 F 1974 2017
3 3 M 1958 2013
для сохранения первой строки (год первого допуска) по id
df3 <- data %>%
# I will keep the patient with the last admit year
arrange(admit_year) %>%
# I group by id
group_by(id) %>%
# to keep the first row (first admit year) by id
slice(1)
df3
# A tibble: 3 x 4
# Groups: id [3]
id gender birth_year admit_year
<dbl> <chr> <dbl> <dbl>
1 1 M 1973 2011
2 2 F 1974 2009
3 3 M 1958 2013