Добавление итоговой строки итогов для каждого факторного уровня - PullRequest
2 голосов
/ 21 мая 2019

Обязательный выход

Это обязательный выход (цифры могут отличаться).

 City      Res       Pop  Pop1

  Total          4503739  4455
  State    Urban 3003948  2966
  State    Rural 1499791  1489

  Total          1000915   986
  A        Urban  500414   493
  A        Rural  500501   494

  Total           999938   1009
  B        Urban  499922   497
  B        Rural  500016   512

  Total          1000912  976
  C       Urban  501638   493
  C       Rural  499274   483

R код

City <- rep(LETTERS[1:3], each = 2) 
Res  <- factor(rep(c("Urban", "Rural"), times = length(City)/2))
set.seed(12345)
Pop  <- rpois(n = length(City), lambda = 500000)
Pop1 <- rpois(n = length(City), lambda = 500)
df   <- data.frame(City, Res, Pop, Pop1)
df

library(tidyverse)

df %>% 
  group_by(Res) %>%
  summarise_if(is.numeric, sum, na.rm = TRUE) %>%
  mutate(City = "State") %>% 
  bind_rows(df) %>% 
  select(City, everything()) %>% 
  ungroup(Res) %>% 
  group_by(., City) %>%
  bind_rows(
        group_by(., Res) %>%
        summarise(Pop = sum(Pop), Pop1 = sum(Pop1)),
        . ) %>% 
  select(City, everything())


# A tibble: 10 x 4
   City  Res       Pop  Pop1
   <chr> <fct>   <int> <int>
 1 NA    Rural 2999582  2978
 2 NA    Urban 3003948  2966
 3 State Rural 1499791  1489
 4 State Urban 1501974  1483
 5 A     Urban  500414   493
 6 A     Rural  500501   494
 7 B     Urban  499922   497
 8 B     Rural  500016   512
 9 C     Urban  501638   493
10 C     Rural  499274   483

Интересно, как получить требуемый результат более эффективно?Спасибо

sessionInfo

sessionInfo()
R version 3.6.0 (2019-04-26)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 18.04.2 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/atlas/libblas.so.3.10.3
LAPACK: /usr/lib/x86_64-linux-gnu/atlas/liblapack.so.3.10.3

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] forcats_0.4.0        stringr_1.4.0        dplyr_0.8.1         
[4] purrr_0.3.2          readr_1.3.1          tidyr_0.8.3.9000    
[7] tibble_2.1.1         ggplot2_3.1.1        tidyverse_1.2.1.9000

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.1       cellranger_1.1.0 pillar_1.4.0     compiler_3.6.0  
 [5] dbplyr_1.4.0     plyr_1.8.4       tools_3.6.0      zeallot_0.1.0   
 [9] lubridate_1.7.4  jsonlite_1.6     nlme_3.1-140     gtable_0.3.0    
[13] lattice_0.20-38  pkgconfig_2.0.2  rlang_0.3.4.9003 reprex_0.3.0    
[17] cli_1.1.0        DBI_1.0.0        rstudioapi_0.10  haven_2.1.0     
[21] withr_2.1.2      xml2_1.2.0.9000  httr_1.4.0       fs_1.3.1        
[25] generics_0.0.2   vctrs_0.1.0.9003 hms_0.4.2        grid_3.6.0      
[29] tidyselect_0.2.5 glue_1.3.1       R6_2.4.0         fansi_0.4.0     
[33] readxl_1.3.1     modelr_0.1.4     magrittr_1.5     backports_1.1.4 
[37] scales_1.0.0     rvest_0.3.4      assertthat_0.2.1 colorspace_1.4-1
[41] utf8_1.1.4       stringi_1.4.3    lazyeval_0.2.2   munsell_0.5.0   
[45] broom_0.5.2      crayon_1.3.4 

Ответы [ 3 ]

1 голос
/ 21 мая 2019

Вот вариант с add_row

library(tidyverse)
df %>% 
  group_split(Res) %>%
   map_df(~ add_row(., City = "State", Res = first(.$Res), 
          Pop = sum(.$Pop), Pop1 = sum(.$Pop1)) %>% 
           add_row(., City = 'Total', Res = first(.$Res), 
          Pop = sum(.$Pop), Pop1 = sum(.$Pop1)))
# A tibble: 10 x 4
#   City  Res       Pop  Pop1
#   <fct> <fct>   <int> <int>
# 1 A     Rural  500501   494
# 2 B     Rural  500016   512
# 3 C     Rural  499274   483
# 4 State Rural 1499791  1489
# 5 Total Rural 2999582  2978
# 6 A     Urban  500414   493
# 7 B     Urban  499922   497
# 8 C     Urban  501638   493
# 9 State Urban 1501974  1483
#10 Total Urban 3003948  2966

Или другой вариант rollup из data.table

library(data.table)
f1 <- function(dat), rollup(dat, lapply(.SD, sum), by = "Res",
       .SDcols = Pop:Pop1)
setDT(df)
out1 <- rbind(f1(df)[-.N][, City := "State"], df)
rbind(f1(out1)[-.N][, City := "Total"], out1)
1 голос
/ 21 мая 2019

Вот опция, основанная на purrr::map_df и split. Мы можем split df, используя Город , а затем циклически проходить через каждый Город : связать в верхней части каждого города строку Всего , что составляет Pop и Pop1

library(dplyr)
library(purrr)
df %>%
  group_by(Res) %>%
  summarise_if(is.numeric, sum, na.rm = TRUE) %>% 
  arrange(Res= factor(Res, levels=c('Urban','Rural'))) %>%
  mutate(City = "State") %>% 
  bind_rows(df) %>% 
  mutate(City=factor(City, levels = c('State','A','B','C'))) %>% 
  split(.$City) %>% 
  map_df(., ~bind_rows(summarise_if(.x,is.numeric, sum) %>% mutate(City='Total', Res=''), 
                       .x %>% mutate_if(is.factor, as.character)) %>% 
  select(City, Res, Pop, Pop1))


# A tibble: 12 x 4
   City  Res       Pop  Pop1
  <chr> <chr>   <int> <int>
1 Total ""    3001765  2972
2 State Urban 1501974  1483
3 State Rural 1499791  1489
4 Total ""    1000915   987
5 A     Urban  500414   493
6 A     Rural  500501   494
7 Total ""     999938  1009
8 B     Urban  499922   497
9 B     Rural  500016   512
10 Total ""    1000912   976
11 C     Urban  501638   493
12 C     Rural  499274   483
1 голос
/ 21 мая 2019
library(tidyverse)
df %>%
  select(Res, Pop, Pop1) %>%
  group_by(Res) %>%
  summarise_all(sum) %>%
  bind_rows(df) %>%
  mutate(City = fct_explicit_na(City, "State")) %>%  # from forcats: renames NA as factor
  select(City, Res, Pop, Pop1)

# A tibble: 8 x 4
  City  Res       Pop  Pop1
  <fct> <fct>   <int> <int>
1 State Rural 1499791  1489
2 State Urban 1501974  1483
3 A     Urban  500414   493
4 A     Rural  500501   494
5 B     Urban  499922   497
6 B     Rural  500016   512
7 C     Urban  501638   493
8 C     Rural  499274   483
...