Вот один вариант с dplyr/data.table
, где мы группируем по 'cname' и run-length-id
из 'govstruct', и summarise
по paste
по range
из 'year'
library(dplyr)
library(stringr)
library(data.table)
df1 %>%
group_by(cname, grp = rleid(govstruct)) %>%
summarise(govstructure = first(govstruct),
years = str_c(range(year), collapse="-")) %>%
ungroup %>%
select(-grp)
# A tibble: 3 x 3
# cname govstructure years
# <chr> <int> <chr>
#1 China 3 1960-1982
#2 China 1 1983-2005
#3 China 3 2006-2012
Или мы также можем построить grp
, основываясь на сравнении соседних элементов
df1 %>%
group_by(cname, grp = cumsum(c(TRUE, diff(govstruct) != 0))) %>%
summarise(govstructure = first(govstruct),
years = str_c(range(year), collapse="-"))
или используя data.table
, используя тот же метод, что и в dplyr
. т.е. сгруппированные по rleid
из 'govstruct' и 'cname' paste
* range
из 'year'
library(data.table)
setDT(df1)[ , .(govstructure = first(govstruct),
year = paste(range(year), collapse = "-")),
.(cname, grp = rleid(govstruct))][, grp := NULL][]
# cname govstructure year
#1: China 3 1960-1982
#2: China 1 1983-2005
#3: China 3 2006-2012
или другой вариант с base R
grp <- with(rle(df1$govstruct), rep(seq_along(values), lengths))
aggregate(year ~ cname + grp, data = df1,
FUN = function(x) paste(range(x), collapse="-"))
данные
df1 <- structure(list(cname = c("China", "China", "China", "China",
"China", "China", "China", "China", "China", "China", "China",
"China", "China", "China", "China", "China", "China", "China",
"China", "China", "China", "China", "China", "China", "China",
"China", "China", "China", "China", "China", "China", "China",
"China", "China", "China", "China", "China", "China", "China",
"China", "China", "China", "China", "China", "China", "China",
"China", "China", "China", "China", "China", "China", "China"
), year = 1960:2012, govstruct = c(3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 3L, 3L)),
class = "data.frame", row.names = c(NA,
-53L))