Редактируя исходный код, вы можете просто добавить substr()
:
processed <- aggregate(datetime ~ customer + substr(mccmnc, 1, 3), data = raw_data3, max)
В качестве альтернативы
tidyverse
решение:
Код
library(tidyverse)
df %>%
# Group by customer ID and first 3 characters of mccmnc
group_by(customer, mccmnc_group = substr(mccmnc, 1, 3)) %>%
# Get the max datetime per group
summarise(max_datetime = max(datetime)) %>%
# Put columns in original order
select(1, 3, 2)
# A tibble: 3 x 3
# Groups: customer [2]
customer max_datetime mccmnc_group
<fct> <dbl> <chr>
1 John Package 20181201 532
2 Miranda Nuts 20181227 451
3 Miranda Nuts 20181226 623
Данные
df <- data.frame(customer = c(rep("John Package", 3), rep("Miranda Nuts", 4)),
datetime = c(20181123, 20181201, 20181124, 20181125, 20181226, 20181226, 20181227),
mccmnc = c("532-08", "532-08", "532-10", "623-12", "623-30", "451-21", "451-23"))
> df
customer datetime mccmnc
1 John Package 20181123 532-08
2 John Package 20181201 532-08
3 John Package 20181124 532-10
4 Miranda Nuts 20181125 623-12
5 Miranda Nuts 20181226 623-30
6 Miranda Nuts 20181226 451-21
7 Miranda Nuts 20181227 451-23