Использование Group by и Slope с dplyr для получения нового столбца - PullRequest
0 голосов
/ 12 декабря 2018

Я ищу более прямое решение, использующее dplyr для получения столбца с названием slope из моих данных.Набор данных сгруппирован по типам season и stat.Мой текущий код:

library(tidyverse); library(broom)    

full_table_raw <- structure(list(playerID = c("abreujo02", "abreujo02", 
"abreujo02", "abreujo02", "abreujo02", "abreujo02", "abreujo02", 
"abreujo02", "abreujo02", "abreujo02", "abreujo02", "abreujo02", 
"arenano01", "arenano01", "arenano01", "arenano01", "arenano01", 
"arenano01", "arenano01", "arenano01", "arenano01", "arenano01", 
"arenano01", "arenano01", "blackch02", "blackch02", "blackch02", 
"blackch02", "blackch02", "blackch02", "blackch02", "blackch02", 
"blackch02", "blackch02", "blackch02", "blackch02"), season = c(2014L, 
2014L, 2014L, 2014L, 2015L, 2015L, 2015L, 2015L, 2016L, 2016L, 2016L, 
2016L, 2014L, 2014L, 2014L, 2014L, 2015L, 2015L, 2015L, 2015L, 
2016L, 2016L, 2016L, 2016L, 2014L, 2014L, 2014L, 2014L, 2015L, 
2015L, 2015L, 2015L, 2016L, 2016L, 2016L, 2016L), stat = c("HR", 
"R", "RBI", "SB", "HR", "R", "RBI", "SB", "HR", "R", "RBI", "SB", 
"HR", "R", "RBI", "SB", "HR", "R", "RBI", "SB", "HR", "R", "RBI", 
"SB", "HR", "R", "RBI", "SB", "HR", "R", "RBI", "SB", "HR", "R", 
"RBI", "SB"), points = c(3, 2, 3, 2, 2, 1, 2, 1, 1, 1, 2, 1, 
1, 1, 1, 1, 3, 3, 3, 2, 3, 3, 3, 2, 2, 3, 2, 3, 1, 2, 1, 3, 2, 
2, 1, 3), ranks = c(1, 2, 1, 2, 2, 3, 2, 3, 3, 3, 2, 3, 3, 3, 
3, 3, 1, 1, 1, 2, 1, 1, 1, 2, 2, 1, 2, 1, 3, 2, 3, 1, 2, 2, 3, 
1), value = c(36, 80, 107, 3, 30, 88, 101, 0, 25, 67, 100, 0, 
18, 58, 61, 2, 42, 97, 130, 2, 41, 116, 133, 2, 19, 82, 72, 28, 
17, 93, 58, 43, 29, 111, 82, 17)), class = "data.frame", row.names = c(NA, 
-36L))

sgp_table <- full_table_raw %>%  
  group_by(season, stat) %>%   
  do(tidy(lm(value ~ points, data = .))) %>%   
  filter(term == "points") %>%    select(season, stat, estimate) %>%   
  rename(slope = estimate)

Я ищу более краткий способ создания столбца slope из моих текущих данных.

Ответы [ 2 ]

0 голосов
/ 12 декабря 2018

Не уверен, что вы считаете это чище, чем у вас, но при использовании nest вам не нужно group_by

sgp_table <- full_table_raw %>%
  nest(-season, -stat) %>%
  mutate(slope = map(data, ~coef(lm(value ~ points, data = .x))[["points"]])) %>%
  select(-data)

> sgp_table
   season stat slope
1    2014   HR     9
2    2014    R    12
3    2014  RBI    23
4    2014   SB    13
5    2015   HR  12.5
6    2015    R   4.5
7    2015  RBI    36
8    2015   SB  21.5
9    2016   HR     8
10   2016    R  24.5
11   2016  RBI  25.5
12   2016   SB   8.5
0 голосов
/ 12 декабря 2018

Вот вариант с использованием nest/unnest

library(tidyverse)
library(broom)
full_table_raw %>% 
    group_by(season, stat) %>% 
    nest %>% 
    mutate(modelout = map(data, ~ lm(value~ points, data = .x) %>%
      tidy %>% 
      filter(term == "points") %>% 
      select(slope = estimate))) %>%
   select(-data) %>%
   unnest
# A tibble: 12 x 3
#   season stat  slope
#    <int> <chr> <dbl>
# 1   2014 HR     9.  
# 2   2014 R     12   
# 3   2014 RBI   23.  
# 4   2014 SB    13.0 
# 5   2015 HR    12.5 
# 6   2015 R      4.50
# 7   2015 RBI   36   
# 8   2015 SB    21.5 
# 9   2016 HR     8.00
#10   2016 R     24.5 
#11   2016 RBI   25.5 
#12   2016 SB     8.5 
...