Как сгруппировать по сводной таблице с BREAKDOWN в той же таблице data.table R - PullRequest
0 голосов
/ 14 ноября 2018

Data.table имеет различные способы создания сводной таблицы с функцией by =, но как мы можем сгруппировать информацию о разбивке в группу SAME по форме?

Пример данных

# DT
DT <- data.table(GROUP = c("A_GROUP","B_GROUP","B_GROUP","B_GROUP","A_GROUP",
                           "A_GROUP","B_GROUP","B_GROUP","B_GROUP","A_GROUP"),
                 TYPE = c("A","B","C","D","E",
                          "B","B","A","A","E"),
                 AMOUNT =c(123,1424,1244,2111,44559,
                           128,1221,12144,11,439))

Отдельная таблица, но не сгруппированная в один кадр

# ALL
ALL_G <- DT[,.(SUM = format(sum(AMOUNT),big.mark=",")),by = TYPE]

# A_GROUP Breakdown 1
A_G <- DT[grepl("A_GROUP",GROUP),.(SUM =format(sum(AMOUNT),big.mark=",")),by = TYPE]

# B_GROUP Breakdown 2
B_G <- DT[grepl("B_GROUP",GROUP),.(SUM = format(sum(AMOUNT),big.mark=",")),by = TYPE]

Желаемая форма

# TARGET
 TYPE  ALL SUM  A_GROUP_SUM  B_GROUP_SUM
  A     12,278    123          12,155
  B     2,773     128          2,645
  C     1,244      0           1,244
  D     2,111      0           2,111
  E    44,998    44998           0

Как можноя достигну этого?

1 Ответ

0 голосов
/ 14 ноября 2018
library( data.table)

# sample data
DT <- data.table(GROUP = c("A_GROUP","B_GROUP","B_GROUP","B_GROUP","A_GROUP",
                           "A_GROUP","B_GROUP","B_GROUP","B_GROUP","A_GROUP"),
                 TYPE = c("A","B","C","D","E",
                          "B","B","A","A","E"),
                 AMOUNT =c(123,1424,1244,2111,44559,
                           128,1221,12144,11,439))


#create a dt for the sum by TYPE
dt1 <- DT[, list( ALL_SUM = sum( AMOUNT ) ), by = "TYPE" ]

#create a dt for the sum by TYPE and GROUP
dt2 <- DT[, list( sum = sum( AMOUNT ) ), by = c( "TYPE", "GROUP" )]
#rename the groups to the desired column names
dt2[, GROUP := paste0( GROUP, "_SUM" )]
#cast to wide format
dt2 <- dcast( dt2, TYPE ~ GROUP, value.var = "sum", fill = 0 )

# option 1: join together (you can use setcolorder() afterwards to get the desired order of columns)
dt2[dt1, on = "TYPE"]

#option 2: bind together (drop the first colum of dt2, oly works of both dt's have the same number of rows)
cbind( dt1, dt2[, -1] )

#   TYPE ALL_SUM A_GROUP_SUM B_GROUP_SUM
# 1    A   12278         123       12155
# 2    B    2773         128        2645
# 3    C    1244           0        1244
# 4    D    2111           0        2111
# 5    E   44998       44998           0
...