У меня есть фрейм данных, который выглядит примерно так
user booking_date origin destination price sale_channel
1 user5 2018-11-01 MAD PMI 58.20 online
2 user7 2018-11-01 DUB MAD 147.50 online
3 user4 2018-11-02 TFS MAD 24.05 online
4 user7 2018-11-01 LPA MAD 37.30 call center
5 user1 2018-11-01 AMS MAD 149.74 travel agency
6 user1 2018-11-01 MAD PMI 19.95 online
Теперь я хочу:
- Фильтр по указанной дате c (2018-11-02 ) что я сделал по
df <- df[df$booking_date == '2018-11-02',]
Группируйте по пользователю и суммируйте их общие расходы на билеты.
Я пробовал несколько подходов с использованием group_by или агрегата, но все, что мне удалось получить, это дополнительный столбец, но не группирование или агрегирование значения на пользователя.
Мой конечный результат в идеале позволил бы мне выделить 10 пользователей, которые больше всего тратят на билеты, например, так:
'user1' 'user10' 'user 7' etc.
Пример данных
structure(list(user = c("user4", "user5", "user3", "user10",
"user1", "user2", "user7", "user6", "user5", "user6", "user6",
"user7", "user1", "user7", "user4", "user4", "user1", "user7",
"user7", "user8", "user4", "user10", "user4", "user8", "user3",
"user9", "user5", "user2", "user5", "user3", "user3", "user9",
"user6", "user10", "user9", "user5", "user3", "user5", "user7",
"user9", "user2", "user2", "user7", "user10", "user7", "user3",
"user1", "user2", "user8", "user6", "user6", "user10", "user4",
"user7", "user4", "user1", "user4", "user2", "user1", "user7",
"user5", "user4", "user4", "user7", "user10"), booking_date = structure(c(17837,
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837,
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837,
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837,
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837,
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837,
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837,
17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837, 17837,
17837), class = "Date"), origin = c("TFS", "MAD", "TFN", "MAD",
"LYS", "LPA", "MAD", "MAD", "AMS", "MAD", "MAD", "MAD", "MAD",
"LPA", "MAD", "MAD", "MAD", "TXL", "MAD", "MAD", "TXL", "TXL",
"TFS", "MAD", "NAP", "TFS", "PMI", "TLS", "TFS", "NTE", "AMS",
"FUE", "TFN", "CPH", "TFN", "MAD", "SVQ", "SCQ", "SVQ", "MAD",
"PMI", "MAD", "PMI", "MAD", "MAD", "MAD", "MAD", "MAD", "SVQ",
"NCE", "CDG", "MAD", "MAD", "MAD", "MAD", "MAD", "TFN", "LGW",
"LGW", "MAD", "TFN", "MAD", "RNS", "AGP", "CDG"), destination = c("MAD",
"DUB", "MAD", "TFS", "MAD", "MAD", "NAP", "TLS", "MAD", "SCQ",
"LPA", "TFN", "TXL", "MAD", "TFN", "SVQ", "ACE", "MAD", "TLS",
"SCQ", "MAD", "MAD", "MAD", "LPA", "MAD", "MAD", "MAD", "MAD",
"MAD", "MAD", "MAD", "MAD", "MAD", "MAD", "MAD", "CAG", "MAD",
"MAD", "MAD", "LPA", "MAD", "CDG", "MAD", "LPA", "TFS", "TFN",
"PMI", "NAP", "MAD", "MAD", "MAD", "LPA", "LGW", "LPA", "CDG",
"SPC", "MAD", "MAD", "MAD", "SCQ", "MAD", "SVQ", "MAD", "MAD",
"MAD"), price = c(24.0499992371, 41.5400009155, 251.199996948,
15.6000003815, 44.0099983215, 73.8499984741, 115.470001221, 69.4400024414,
81.3899993896, 15.2399997711, 41.1199989319, 274.559997559, 150,
29.3199996948, 332.440002441, 94.9100036621, 97.9800033569, 55.1199989319,
81.7399978638, 4.86000013351, 39.0299987793, 53.6300010681, 39.3199996948,
114.559997559, 65.4000015259, 96.2900009155, 41.75, 28.9099998474,
25.1900005341, 14, 50.3100013733, 47.2999992371, 53.8199996948,
91.3199996948, 77.6800003052, 17.8099994659, 96.5400009155, 27.6900005341,
34.1399993896, 34.3300018311, 15.5600004196, 158.449996948, 45.2999992371,
36.8100013733, 50.6800003052, 62.9000015259, 13.7399997711, 49.4399986267,
66.1100006104, 95.4400024414, 41.8400001526, 69.8300018311, 60.6599998474,
34.3300018311, 97.1399993896, 84.3099975586, 25.8099994659, 185.899993896,
34.8899993896, 304.380004883, 15.1300001144, 29.1399993896, 133.529998779,
208.910003662, 152.960006714), sale_channel = c("online", "online",
"travel agency", "online", "online", "online", "travel agency",
"online", "travel agency", "travel agency", "travel agency",
"online", "travel agency", "online", "online", "travel agency",
"online", "travel agency", "travel agency", "call center", "online",
"online", "online", "travel agency", "travel agency", "travel agency",
"online", "online", "online", "online", "online", "travel agency",
"travel agency", "online", "travel agency", "call center", "online",
"travel agency", "online", "online", "travel agency", "travel agency",
"online", "travel agency", "online", "online", "online", "travel agency",
"online", "travel agency", "travel agency", "online", "online",
"online", "online", "online", "online", "online", "online", "travel agency",
"travel agency", "travel agency", "online", "travel agency",
"online"), total = c(876.0300006858, 250.5000009536, 540.3500022886,
420.1500110628, 424.9299983977, 512.1099882118, 1233.9500045785,
316.9000034332, 250.5000009536, 316.9000034332, 316.9000034332,
1233.9500045785, 424.9299983977, 1233.9500045785, 876.0300006858,
876.0300006858, 424.9299983977, 1233.9500045785, 1233.9500045785,
185.52999830291, 876.0300006858, 420.1500110628, 876.0300006858,
185.52999830291, 540.3500022886, 255.6000022889, 250.5000009536,
512.1099882118, 250.5000009536, 540.3500022886, 540.3500022886,
255.6000022889, 316.9000034332, 420.1500110628, 255.6000022889,
250.5000009536, 540.3500022886, 250.5000009536, 1233.9500045785,
255.6000022889, 512.1099882118, 512.1099882118, 1233.9500045785,
420.1500110628, 1233.9500045785, 540.3500022886, 424.9299983977,
512.1099882118, 185.52999830291, 316.9000034332, 316.9000034332,
420.1500110628, 876.0300006858, 1233.9500045785, 876.0300006858,
424.9299983977, 876.0300006858, 512.1099882118, 424.9299983977,
1233.9500045785, 250.5000009536, 876.0300006858, 876.0300006858,
1233.9500045785, 420.1500110628)), row.names = c(3L,
26L, 37L, 42L, 48L, 82L, 89L, 100L, 112L, 124L, 133L, 144L, 148L,
150L, 166L, 167L, 173L, 182L, 217L, 243L, 259L, 285L, 300L, 304L,
306L, 336L, 341L, 366L, 388L, 397L, 413L, 417L, 423L, 452L, 457L,
473L, 474L, 478L, 482L, 483L, 486L, 496L, 499L, 504L, 510L, 513L,
529L, 531L, 558L, 605L, 615L, 628L, 629L, 664L, 669L, 672L, 684L,
722L, 730L, 752L, 766L, 767L, 779L, 804L, 819L), class = c("data.table",
"data.frame"))
Любые рекомендации о том, как подойти к этому?
Редактировать: Окончательное решение
df <- df[df$booking_date == '2018-11-02',]
total <- aggregate(price ~ user, df, sum)
top_10 <- total[order(total$price, decreasing = T), ]
top_10[1:5, 1]
Это дает мне
"user7" "user4" "user3" "user2" "user1"