R: Как составить таблицу rpivotTable или dcast с суммированным столбцом между ними в том же виде, что и в Excel, для отчета о продолжении данных - PullRequest
0 голосов
/ 09 марта 2020

R: Как построить таблицу rpivotTable или dcast с суммированным столбцом между ними в том же виде, что и в Excel, для создания отчетов о продолжении.

Проверьте прикрепленный снимок экрана и набор данных, пробовал другой способ добавить суммированный столбец в dcast и rpivottable но не получая его.

Пример набора данных будет похож на.

Buyer       year_month      Late_Days

A           2018-01         0 or Early
B           2018-01         >=5
C           2018-02         >=10
A           2018-04         0 or Early
A           2018-03         >=5
B           2018-03         >=10
C           2018-05         0 or Early
A           2018-06         >=5
B           2018-07         >=10
A           2018-11         0 or Early
B           2018-11         >=5
A           2019-01         >=10
B           2019-01         0 or Early
A           2019-01         >=5
A           2019-02         >=10

Результат dput (DF)

year_month("12-2019", "01-2020", "01-2020", "06-2018", "08-2018", "09-2018", "12-2018", "03-2019",
 "11-2016", "11-2016", "04-2019", "07-2017", "08-2017", "09-2017", "10-2017", "11-2017", "12-2017",
 "01-2018", "02-2018","03-2018", "04-2018", "05-2018", "06-2018", "07-2018", "08-2018", "09-2018",
"07-2017", "08-2017", "09-2017", "10-2017", "11-2017", "12-2017", "01-2018", "02-2018","03-2018",
 "04-2018", "05-2018", "06-2018", "07-2018", "08-2018", "09-2018","08-2017", "09-2017", "03-2018",
 "04-2019") Late_Days = c("<=20", "<=10", "<=5", "<=20", "0 or early", "0 or early", "0 or early", 
"0 or early", "0 or early", "<=30", "0 or early", "<=10", "<=5", 
"0 or early", "0 or early", "0 or early", ">30", "<=20", "<=20", 
"<=20", "<=20", ">30", "<=20", "<=5", "<=5", "<=5", "<=5", "<=10", 
"<=10", ">30", "<=5", "0 or early", "<=5", "0 or early", "0 or early", 
"0 or early", "<=10", "<=5", "0 or early", "0 or early", "0 or early"
 "<=20", "<=5", "<=5", "<=5")Buyer = c( 
"C", "D", "D", "D", "D", "A", "D", "A", "A", "C", "A", "A", "A", "A", "A", 
"A", "A", "A", "A", "A", "A", "A", "A", "C", "C", "A", "C", "A", "A", "A", 
"A", "B", "B", "A", "A", "B", "B", "B", "A", "A", "C", "C", "A", "A", "A", 
)row.names = c(1L, 2L, 3L, 4L, 
5L, 6L, 7L, 8L, 9L, 10L, 11L, 12L, 13L, 14L, 15L, 16L, 17L, 18L, 
19L, 20L, 21L, 22L, 23L, 24L, 25L, 26L, 27L, 28L, 29L, 20L, 31L, 
32L, 33L, 34L, 35L, 36L, 37L, 38L, 39L, 40L, 41L, 42L, 43L, 44L, 45L)

Код, который я пробовал

datatable(dcast(inventory, Buyer ~ year_month), filter = 'top')

Проверьте скриншот отчета Excel. можно ли сгенерировать в R. enter image description here

1 Ответ

0 голосов
/ 09 марта 2020

Я не был уверен, что суммировать со строками в вашем Late_Days -колонке .. поэтому я заменил строки некоторыми случайными числами (см. Код ниже)

** предоставлены образцы данных **

df <- data.frame(year_month = c("12-2019", "01-2020", "01-2020", "06-2018", "08-2018", "09-2018", "12-2018", "03-2019",
                           "11-2016", "11-2016", "04-2019", "07-2017", "08-2017", "09-2017", "10-2017", "11-2017", "12-2017",
                           "01-2018", "02-2018","03-2018", "04-2018", "05-2018", "06-2018", "07-2018", "08-2018", "09-2018",
                           "07-2017", "08-2017", "09-2017", "10-2017", "11-2017", "12-2017", "01-2018", "02-2018","03-2018",
                           "04-2018", "05-2018", "06-2018", "07-2018", "08-2018", "09-2018","08-2017", "09-2017", "03-2018",
                           "04-2019"), 
                 Late_Days = c("<=20", "<=10", "<=5", "<=20", "0 or early", "0 or early", "0 or early", 
                                                    "0 or early", "0 or early", "<=30", "0 or early", "<=10", "<=5", 
                                                    "0 or early", "0 or early", "0 or early", ">30", "<=20", "<=20", 
                                                    "<=20", "<=20", ">30", "<=20", "<=5", "<=5", "<=5", "<=5", "<=10", 
                                                    "<=10", ">30", "<=5", "0 or early", "<=5", "0 or early", "0 or early", 
                                                    "0 or early", "<=10", "<=5", "0 or early", "0 or early", "0 or early",
                                                    "<=20", "<=5", "<=5", "<=5"),
                 Buyer = c( "C", "D", "D", "D", "D", "A", "D", "A", "A", "C", "A", "A", "A", "A", "A", 
                            "A", "A", "A", "A", "A", "A", "A", "A", "C", "C", "A", "C", "A", "A", "A", 
                            "A", "B", "B", "A", "A", "B", "B", "B", "A", "A", "C", "C", "A", "A", "A" ),
                 stringsAsFactors = FALSE
                 )

код

library( data.table )
library( janitor )
#set data to data.table format
data.table::setDT(df)
#replace Late_Days with numeric, or elese we've got nothing to sum...
set.seed(123)
df[, Late_Days := sample(0:20, nrow(df), replace = TRUE) ]
#get year and month to separate columns
df[, c("month", "year") := data.table::tstrsplit( year_month, "-" ) ][]
#get yearly summarise, with totals
l <- lapply( unique(df$year), function(x) {
  temp <- df[ year == x, ]
  ans <- data.table::dcast( temp[, .(sum(Late_Days)), by = .(Buyer, year, month) ], Buyer ~ year + month, value.var = "V1", fill = 0 )
  ans <- janitor::adorn_totals(ans, where = c("col"), name = paste0( x, "_Total"))
} )
#bind together
ans <- data.table::rbindlist( l, use.names = TRUE, fill = TRUE )
#melt to ling
ans <- data.table::melt( ans, id.vars = "Buyer" )
#get summary per buyer, per period
ans <- ans[, sum(value, na.rm = TRUE), by = .(Buyer, variable)]
#cast to wide again
final <- data.table::dcast( ans, Buyer ~ variable, value.var = "V1" )
#get the order right
colorder = c("Buyer", sort( names(final)[!names(final) == "Buyer"] ) )
#reorder, and get totals by column
janitor::adorn_totals( final[, ..colorder ], where = "row" )

выход

# Buyer 2016_11 2016_Total 2017_07 2017_08 2017_09 2017_10 2017_11 2017_12 2017_Total 2018_01 2018_02 2018_03 2018_04 2018_05 2018_06
# A      19         19      18      17      31      13      14       9        102       8      38       9      13      16      10
# B       0          0       0       0       0       0       0       8          8       9       0       0      20       5       1
# C      13         13      14      12       0       0       0       0         26       0       0       0       0       0       0
# D       0          0       0       0       0       0       0       0          0       0       0       0       0       0       2
# Total  32         32      32      29      31      13      14      17        136      17      38       9      33      21      13
#    2018_07 2018_08 2018_09 2018_12 2018_Total 2019_03 2019_04 2019_12 2019_Total 2020_01 2020_Total
#       4       7      28       0        133       4       9       0         13       0          0
#       0       0       0       0         35       0       0       0          0       0          0
#       6      20      11       0         37       0       0      14         14       0          0
#       0       9       0      10         21       0       0       0          0      31         31
#       10     36      39      10        226       4       9      14         27      31         31
...