Мы можем суммировать AMOUNT
значение, где DATE
равно first
DEFAULT_DATE
для каждого CUSTOMER
.
library(dplyr)
df %>%
group_by(CUSTOMER) %>%
summarise(total_sum = sum(AMOUNT[DATE == first(DEFAULT_DATE)]))
# CUSTOMER total_sum
# <int> <int>
#1 1 300
#2 2 0
Чтобы получить количество клиентов по умолчанию для каждого CLASSIFICATION
, мы можем сделать:
df %>%
group_by(CLASSIFICATION) %>%
summarise(no_default_cust = n_distinct(CUSTOMER[CUSTOMER_DEFAULT == "Y"]))
данные
df <- structure(list(CUSTOMER = c(1L, 1L, 1L, 1L, 2L, 2L), LOAN = c(101L,
102L, 103L, 104L, 201L, 202L), DATE = c(201601L, 201603L, 201501L,
201501L, 201601L, 201603L), AMOUNT = c(100L, 100L, 100L, 200L,
100L, 100L), LOAN_DEFAULT = structure(c(2L, 1L, 2L, 1L, 1L, 1L
), .Label = c("N", "Y"), class = "factor"), CUSTOMER_DEFAULT = structure(c(2L,
2L, 2L, 2L, 1L, 1L), .Label = c("N", "Y"), class = "factor"),
DEFAULT_DATE = structure(c(2L, 2L, 2L, 2L, 1L, 1L), .Label = c("-",
"201501"), class = "factor"), CLASSIFICATION = structure(c(2L,
2L, 2L, 2L, 1L, 1L), .Label = c("M", "S"), class = "factor")),
class = "data.frame", row.names = c(NA, -6L))