Я пытаюсь использовать spread
и gather
, используя tidyr
версию 1.0.0.9000
и dplyr
версию 0.8.3.9000
. Теперь этот кусок кода работает нормально, когда я делаю это автономный скрипт. Но как только я использую это в блестящем приложении. Я получаю следующую ошибку: Warning: Error in : Each row of output must be identified by a unique combination of keys.
Keys are shared for 192 rows:
Ниже приведен код, который имеет проблему. Извиняюсь за длинный код, так как большая часть его - это просто данные.
d1 <- tibble::tribble(
~Date, ~apple_count, ~apple_sale, ~banana_count, ~banana_sale, ~orange_count, ~orange_sale, ~peaches_count, ~peaches_sale, ~watermelon_count, ~watermelon_sale, ~strawberry_count, ~strawberry_sale,
"8/19/19", 10882.05495, 239575, 0, 0, 0, 0, 0, 0, 9643.600102, 630827, 0, 0,
"8/20/19", 516.29755, 11281, 0, 0, 0, 0, 0, 0, 6041.538067, 510219, 1694.44, 684210,
"8/21/19", 949.4084, 20150, 0, 0, 0, 0, 0, 0, 5371.758106, 565440, 9105.89, 3695182,
"8/22/19", 3950.5318, 88679, 0, 0, 0, 0, 0, 0, 5238.308826, 576678, 6179.47, 2501560,
"8/23/19", 2034.02055, 45672, 0, 0, 0, 0, 0, 0, 4994.43054, 518081, 7366.31, 2984563,
"8/24/19", 1770.50415, 38553, 0, 0, 0, 0, 0, 0, 5001.303585, 551733, 6275.43, 2531400,
"8/25/19", 3418.3042, 75686, 0, 0, 0, 0, 0, 0, 5005.408468, 552739, 6454.84, 2590925,
"8/26/19", 4044.93545, 90665, 0, 0, 0, 0, 0, 0, 5713.820592, 598826, 5062.37, 2025959,
"8/27/19", 1246.438172, 353899, 0, 0, 0, 0, 0, 0, 5679.438096, 580955, 3696.86, 1478264,
"8/28/19", 4657.00945, 136864, 0, 0, 0, 0, 0, 0, 5626.486464, 571672, 4153.98, 1676628,
"8/29/19", 4117.79875, 148569, 0, 4, 0, 0, 0, 0, 5562.927825, 571978, 5576.16, 2263248,
"8/30/19", 12408.52652, 610845, 0, 3, 0, 0, 0, 0, 5358.758567, 534372, 4529.15, 1841084,
"8/31/19", 3.812501, 79770, 0, 0, 0, 0, 0, 0, 1318.608575, 143211, 5630.9, 2285788,
"9/1/19", 3259.95555, 52096, 0, 31, 0, 0, 0, 0, 0.403265, 73, 7314.46, 2967691,
"9/2/19", 3118.19395, 49821, 0.2618, 84, 0, 0, 0, 0, 9001.834063, 1092501, 7561.02, 3063684,
"9/3/19", 2577.94215, 41201, 0.0748, 184, 0, 0, 0, 0, 5008.337284, 585832, 5784.59, 2325519,
"9/4/19", 2551.092, 40741, 669.5569, 37265, 0, 0, 0, 0, 14384.24161, 1361752, 479.09, 192116,
"9/5/19", 1910.63475, 30516, 418.931, 34028, 0, 0, 0, 0, 9740.894144, 1025175, 4657.47, 1871629,
"9/6/19", 1729.9115, 27635, 933.5992, 35408, 0, 0, 0, 0, 11535.33576, 1216191, 5423.41, 2189965,
"9/7/19", 1933.2576, 30881, 1625.94205, 52404, 0, 0, 0, 0, 11607.06273, 1274550, 5769.07, 2334982,
"9/8/19", 2354.9107, 37609, 1358.5788, 45251, 0, 0, 0, 0, 11447.76754, 1319610, 6345.95, 2574350,
"9/9/19", 2156.24705, 34440, 1632.42415, 52673, 0, 0, 1141.633875, 60219, 12100.08157, 1332270, 6266.89, 2531336,
"9/10/19", 2195.91555, 35076, 1816.6642, 58719, 0, 0, 2292.24701, 193714, 12264.68769, 1552984, 8804.48, 3555329,
"9/11/19", 1767.93085, 28243, 1856.1076, 60066, 0, 2, 3862.565979, 464879, 12104.56425, 1457483, 5765.86, 2314422,
"9/12/19", 16909.7263, 270128, 2028.57855, 65737, 0, 0, 4031.945994, 492095, 11907.39192, 1389034, 6899.48, 2778142,
"9/13/19", 1635.86595, 26140, 2286.31045, 74663, 0, 0, 4069.372958, 488815, 11413.9593, 1391875, 4538.55, 1828332,
"9/14/19", 1632.651, 26086, 2337.1056, 75633, 0, 0, 3807.516972, 452589, 11438.13724, 1504945, 4435.36, 1796896,
"9/15/19", 1764.6102, 28197, 2151.96115, 71064, 0.065, 9, 4319.488905, 518074, 11405.91464, 1528981, 5016.74, 2034118,
"9/16/19", 13433.71685, 214153, 2163.0511, 71649, 534.793, 74982, 4230.152044, 495831, 18893.07808, 2343183, 5492.47, 2225169,
"9/17/19", 1511.6027, 39954, 2704.8836, 89056, 2505.192, 165836, 3864.815982, 450443, 13621.20195, 1954270, 5883.12, 2385314,
"9/18/19", 1441.447, 90164, 2462.17205, 80965, 2866.423, 218112, 3962.909972, 477259, 13418.99777, 1995541, 5632.52, 2279495,
"9/19/19", 1767.94215, 72662, 2465.20825, 81634, 1169.787, 117449, 3676.161075, 455261, 13179.62418, 1891898, 5351.19, 2163109
)
d2 <- d1 %>%
tidyr::gather(column, value, -Date) %>%
tidyr::separate(column, into=c('partner', 'parameter'), sep='_') %>%
tidyr::spread(parameter, value)%>% dplyr::group_by(partner) %>%
dplyr::mutate(grouped_id = row_number()) %>%
dplyr::summarise( Total_Count = sum(as.numeric(count)),
Total_Sale = sum(as.numeric(sale)))
Ожидаемый результат будет примерно таким, как показано ниже:
partner Total_Count Total_Sale
<chr> <dbl> <dbl>
1 apple 115653. 3115951
2 banana 28911. 986521
3 orange 7076. 576390
4 peaches 39259. 4549179
5 strawberry 173148. 69970409
6 watermelon 285030. 33124879