пользовательская функция в mutate / tibble - PullRequest
2 голосов
/ 20 апреля 2019

Я изучаю учебник и пытаюсь применить эту часть к своим данным / проблеме

kclusts <- tibble(k = 1:9) %>%
  mutate(
    kclust = map(k, ~kmeans(points, .x)),
    tidied = map(kclust, tidy),
    glanced = map(kclust, glance),
    augmented = map(kclust, augment, points)
  )

Однако мои данные немного отличаются от данных учебников. Я пытаюсь применить заключительную строку augmented = map(kclust, augment, points).

Код, который работает (без последней строки):

kclust <- results %>%
  as_tibble() %>% 
  select(-id_row) %>% 
  group_by(year_row) %>% 
  nest(.key = "value") %>%
  filter(map_int(value, nrow) > 4) %>% 
  mutate(kmeans = map(value, ~kmeans(.x[[1]], centers = 4, iter.max = 10, nstart = 1)),
         tidied = map(kmeans, tidy),
         glanced = map(kmeans, glance))

Код, который не работает (с моей попыткой augment часть:

kclust <- results %>%
  as_tibble() %>% 
  select(-id_row) %>% 
  group_by(year_row) %>% 
  nest(.key = "value") %>%
  filter(map_int(value, nrow) > 4) %>% 
  mutate(kmeans = map(value, ~kmeans(.x[[1]], centers = 4, iter.max = 10, nstart = 1)),
         tidied = map(kmeans, tidy),
         glanced = map(kmeans, glance),
         augmented = map(1:nrow(kclust,  function(x) {augment(kclust$kmeans[[x]], kclust$value[[x]])})))

Следующие Работы вне тиббла, но я не могу заставить его работать внутри;

augment(kclust$kmeans[[1]], kclust$value[[1]])

fun <- function(x) {augment(kclust$kmeans[[x]], kclust$value[[x]])}
ag <- map(1:3, fun)
ag

Моя цель - построить график ggplot в самом конце урока, где каждый год будет иметь один аспект.

p1 <- ggplot(assignments, aes(x1, x2)) +
  geom_point(aes(color = .cluster)) + 
  facet_wrap(~ k)
p1

Данные:

structure(list(id_row = c("1000228", "1000228", "1000228", "1000228", 
"1000228", "1000228", "1000228", "1000228", "1000228", "1000228", 
"1000228", "1000228", "1000228", "100493", "100493", "100493", 
"100493", "100493", "1011006", "1011006", "1011006", "1011006", 
"1011006", "1011006", "1011006", "1011006", "1037949", "1037949", 
"1037949", "1037949", "1037949", "1037949", "1037949", "1037949", 
"1037949", "11199", "11199", "11199", "11199", "11199", "11199", 
"11199", "11199", "1403161", "1403161", "1403161", "1403161", 
"1403161", "1403161", "1403161", "1403161", "1403161", "1403161", 
"1403161", "1403161", "1403161", "1403161", "1403161", "14693", 
"14693", "14693", "14693", "14693", "14693", "14693", "14693", 
"14693", "14693", "14693", "14693", "14693", "14693", "14693", 
"14693", "14693", "14693", "14693", "1603923", "1603923", "1603923", 
"1603923", "1603923", "1603923", "1603923", "1603923", "1603923", 
"1603923", "1603923", "1603923", "1603923", "217346", "217346", 
"217346", "217346", "217346", "217346", "217346", "217346", "217346", 
"217346", "217346", "217346", "217346", "2969", "2969", "2969", 
"2969", "2969", "2969", "2969", "2969", "2969", "2969", "2969", 
"2969", "2969", "2969", "2969", "2969", "318154", "318154", "318154", 
"42582", "42582", "42582", "42582", "42582", "42582", "42582", 
"42582", "42582", "42582", "42582", "42582", "42582", "42582", 
"42582", "42582", "42582", "5513", "5513", "5513", "5513", "5513", 
"5513", "5513", "5513", "5513", "5513", "5513", "5513", "5513", 
"5513", "5513", "5513", "717423", "717423", "717423", "717423", 
"717423", "717423", "717423", "717423", "717423", "80661", "80661", 
"80661", "80661", "80661", "80661", "80661", "80661", "80661", 
"80661", "80661", "80661", "80661", "80661", "80661", "80661", 
"823768", "823768", "823768", "823768", "823768", "823768", "860730", 
"860730", "860730", "860730", "860730", "860730", "860730", "860730", 
"860730", "860730", "860730", "860730", "860730", "860730", "860730", 
"860730", "860730", "860730", "860730", "860730", "860730", "863157", 
"863157", "863157", "863157", "863157", "863157", "863157", "891024", 
"891024", "891024", "891024", "891024", "891024", "891024", "891024", 
"891024", "891024", "891024", "891024", "891024", "891024", "891024", 
"891024", "891024", "912242", "912242", "912242", "912242", "912242", 
"912242", "912242", "912242", "912242", "912242", "912242", "912242", 
"912242", "912242", "912242"), year_row = c("2004", "2005", "2006", 
"2007", "2011", "2012", "2013", "2014", "2015", "2016", "2017", 
"2018", "2019", "2006", "2006", "2007", "2008", "2019", "2012", 
"2013", "2014", "2015", "2016", "2017", "2018", "2019", "2011", 
"2012", "2013", "2014", "2015", "2016", "2017", "2018", "2019", 
"2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", 
"2007", "2008", "2008", "2009", "2009", "2010", "2011", "2012", 
"2013", "2014", "2015", "2016", "2017", "2018", "2019", "2002", 
"2003", "2004", "2005", "2006", "2007", "2008", "2009", "2010", 
"2010", "2011", "2011", "2012", "2013", "2014", "2015", "2016", 
"2017", "2018", "2002", "2003", "2004", "2007", "2008", "2011", 
"2012", "2013", "2014", "2015", "2016", "2017", "2018", "2007", 
"2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015", 
"2016", "2017", "2018", "2019", "2004", "2005", "2006", "2006", 
"2007", "2007", "2008", "2009", "2010", "2011", "2012", "2013", 
"2014", "2015", "2016", "2017", "2013", "2014", "2015", "2003", 
"2004", "2005", "2006", "2007", "2008", "2009", "2010", "2011", 
"2012", "2013", "2014", "2015", "2016", "2017", "2018", "2019", 
"2003", "2004", "2005", "2006", "2007", "2008", "2009", "2010", 
"2011", "2012", "2013", "2014", "2015", "2016", "2017", "2018", 
"2011", "2012", "2013", "2014", "2015", "2016", "2017", "2018", 
"2019", "2004", "2005", "2006", "2009", "2010", "2011", "2012", 
"2013", "2014", "2015", "2016", "2016", "2017", "2017", "2018", 
"2019", "2003", "2004", "2005", "2006", "2007", "2008", "2014", 
"2015", "2015", "2016", "2016", "2016", "2016", "2016", "2016", 
"2017", "2017", "2017", "2017", "2017", "2017", "2018", "2018", 
"2018", "2018", "2019", "2019", "2013", "2014", "2015", "2016", 
"2017", "2018", "2019", "2003", "2004", "2005", "2006", "2007", 
"2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015", 
"2016", "2017", "2018", "2019", "2005", "2006", "2007", "2008", 
"2009", "2010", "2011", "2012", "2013", "2014", "2015", "2016", 
"2017", "2018", "2019"), id_col = c("1000228", "1000228", "1000228", 
"1000228", "1000228", "1000228", "1000228", "1000228", "1000228", 
"1000228", "1000228", "1000228", "1000228", "100493", "100493", 
"100493", "100493", "100493", "1011006", "1011006", "1011006", 
"1011006", "1011006", "1011006", "1011006", "1011006", "1037949", 
"1037949", "1037949", "1037949", "1037949", "1037949", "1037949", 
"1037949", "1037949", "11199", "11199", "11199", "11199", "11199", 
"11199", "11199", "11199", "1403161", "1403161", "1403161", "1403161", 
"1403161", "1403161", "1403161", "1403161", "1403161", "1403161", 
"1403161", "1403161", "1403161", "1403161", "1403161", "14693", 
"14693", "14693", "14693", "14693", "14693", "14693", "14693", 
"14693", "14693", "14693", "14693", "14693", "14693", "14693", 
"14693", "14693", "14693", "14693", "1603923", "1603923", "1603923", 
"1603923", "1603923", "1603923", "1603923", "1603923", "1603923", 
"1603923", "1603923", "1603923", "1603923", "217346", "217346", 
"217346", "217346", "217346", "217346", "217346", "217346", "217346", 
"217346", "217346", "217346", "217346", "2969", "2969", "2969", 
"2969", "2969", "2969", "2969", "2969", "2969", "2969", "2969", 
"2969", "2969", "2969", "2969", "2969", "318154", "318154", "318154", 
"42582", "42582", "42582", "42582", "42582", "42582", "42582", 
"42582", "42582", "42582", "42582", "42582", "42582", "42582", 
"42582", "42582", "42582", "5513", "5513", "5513", "5513", "5513", 
"5513", "5513", "5513", "5513", "5513", "5513", "5513", "5513", 
"5513", "5513", "5513", "717423", "717423", "717423", "717423", 
"717423", "717423", "717423", "717423", "717423", "80661", "80661", 
"80661", "80661", "80661", "80661", "80661", "80661", "80661", 
"80661", "80661", "80661", "80661", "80661", "80661", "80661", 
"823768", "823768", "823768", "823768", "823768", "823768", "860730", 
"860730", "860730", "860730", "860730", "860730", "860730", "860730", 
"860730", "860730", "860730", "860730", "860730", "860730", "860730", 
"860730", "860730", "860730", "860730", "860730", "860730", "863157", 
"863157", "863157", "863157", "863157", "863157", "863157", "891024", 
"891024", "891024", "891024", "891024", "891024", "891024", "891024", 
"891024", "891024", "891024", "891024", "891024", "891024", "891024", 
"891024", "891024", "912242", "912242", "912242", "912242", "912242", 
"912242", "912242", "912242", "912242", "912242", "912242", "912242", 
"912242", "912242", "912242"), year_col = c("2003", "2004", "2005", 
"2006", "2010", "2011", "2012", "2013", "2014", "2015", "2016", 
"2017", "2018", "2005", "2005", "2006", "2007", "2018", "2011", 
"2012", "2013", "2014", "2015", "2016", "2017", "2018", "2010", 
"2011", "2012", "2013", "2014", "2015", "2016", "2017", "2018", 
"2006", "2007", "2008", "2009", "2010", "2011", "2012", "2013", 
"2006", "2007", "2007", "2008", "2008", "2009", "2010", "2011", 
"2012", "2013", "2014", "2015", "2016", "2017", "2018", "2001", 
"2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009", 
"2009", "2010", "2010", "2011", "2012", "2013", "2014", "2015", 
"2016", "2017", "2001", "2002", "2003", "2006", "2007", "2010", 
"2011", "2012", "2013", "2014", "2015", "2016", "2017", "2006", 
"2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", 
"2015", "2016", "2017", "2018", "2003", "2004", "2005", "2005", 
"2006", "2006", "2007", "2008", "2009", "2010", "2011", "2012", 
"2013", "2014", "2015", "2016", "2012", "2013", "2014", "2002", 
"2003", "2004", "2005", "2006", "2007", "2008", "2009", "2010", 
"2011", "2012", "2013", "2014", "2015", "2016", "2017", "2018", 
"2002", "2003", "2004", "2005", "2006", "2007", "2008", "2009", 
"2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017", 
"2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017", 
"2018", "2003", "2004", "2005", "2008", "2009", "2010", "2011", 
"2012", "2013", "2014", "2015", "2015", "2016", "2016", "2017", 
"2018", "2002", "2003", "2004", "2005", "2006", "2007", "2013", 
"2014", "2014", "2015", "2015", "2015", "2015", "2015", "2015", 
"2016", "2016", "2016", "2016", "2016", "2016", "2017", "2017", 
"2017", "2017", "2018", "2018", "2012", "2013", "2014", "2015", 
"2016", "2017", "2018", "2002", "2003", "2004", "2005", "2006", 
"2007", "2008", "2009", "2010", "2011", "2012", "2013", "2014", 
"2015", "2016", "2017", "2018", "2004", "2005", "2006", "2007", 
"2008", "2009", "2010", "2011", "2012", "2013", "2014", "2015", 
"2016", "2017", "2018"), value = c(0.216923921856892, 0.0603860767654988, 
0.0867705092874343, 0.0554689419488736, 0.266461630651174, 0.232882353118748, 
0.233228956106436, 0.429109851221986, 0.143603832921258, 0.103581819938778, 
0.120198289568911, 0.282929320804091, 0.0588034678951873, 0.12687772731908, 
0.125590088599959, 0.152638288234646, 0.0573970894882795, 0.0223190695082444, 
0.0772388947602825, 0.0610470146184067, 0.0498844812709738, 0.0417502588683055, 
0.0709093243272215, 0.0479449809497808, 0.0446101230405492, 0.0688431182443062, 
0.322153599918842, 0.918497422437913, 0.191262613179677, 0.174156380277032, 
0.100398155108399, 0.211635149112375, 0.151663657345723, 0.135964184604919, 
0.198866500317427, 0.552660373888907, 0.11297372482846, 0.136211462866787, 
0.176921966808464, 0.0955796571576842, 0.0626512313467531, 0.056475829268418, 
0.0515077695785625, 0.989753358949878, 0.704651534011912, 0.711391842791094, 
0.18960162095846, 0.398094129350466, 0.277168236576228, 0.140473154935381, 
0.37418849888887, 0.020866808171222, 0.811997463208844, 0.0925009217897454, 
0.0557398356908807, 0.131320037363996, 0.0250001227960738, 0.380924496028996, 
0.0457087256845359, 0.0533845514626979, 0.0592889458291134, 0.074605511680385, 
0.149848111459932, 0.293446525042805, 0.0450946810591767, 0.0445204835445374, 
0.0167243483862213, 0.108009105253104, 0.209377934871732, 0.0724495760891165, 
0.0365343767242302, 0.0680387334780893, 0.0359449679962688, 0.0914223347587717, 
0.250090013711688, 0.0285907018967329, 0.0332181584655898, 0.999507574272309, 
0.105444012216585, 0.187575638790271, 0.162379131764237, 0.102700208200375, 
0.09178360736775, 0.163465216529218, 0.0729776861513878, 0.236016339389482, 
0.0588574175467754, 0.0550992673681143, 0.122626443155744, 0.0918424812269966, 
0.146120253221757, 0.0577653939175298, 0.0348411397934349, 0.0473716341969161, 
0.0178728863162361, 0.0527367807713118, 0.0479903568707429, 0.0730610057439529, 
0.0930097224453987, 0.0658436634783048, 0.190704292535876, 0.0358726716365566, 
0.0307972881129445, 0.0189626337594551, 0.000000000000000111022302462516, 
0.000000000000000111022302462516, 0.762963505977553, 0.000000000000000111022302462516, 
0.762963505977553, 0.000000000000000111022302462516, 0.000000000000000111022302462516, 
0.000000000000000111022302462516, 0.000000000000000111022302462516, 
0.000000000000000111022302462516, 0.000000000000000111022302462516, 
0.000000000000000111022302462516, 0.000000000000000111022302462516, 
0.000000000000000111022302462516, 0.000000000000000111022302462516, 
0.0145186393898035, 0.0188147774806442, 0.00921565692607718, 
0.16582527531479, 0.0687727632750558, 0.204529458170385, 0.0716012445553365, 
0.0718954998189065, 0.0277281993654519, 0.0674740371917119, 0.072012888372702, 
0.0734119880053119, 0.0673902818072857, 0.0331949118036344, 0.0574456113479402, 
0.0551005875496144, 0.0830738397892672, 0.0612316674931673, 0.0375821407994346, 
0.0540598964852267, 0.937300545235406, 0.764273407958804, 0.310278765419343, 
0.115409839050865, 0.180334165153821, 0.0744074972905282, 0.0539052279373005, 
0.1155899920352, 0.607098700786289, 0.475193951744525, 0.500999897379361, 
0.452694613243503, 0.339737698721805, 0.119110509053642, 0.414782001570446, 
0.176478247451898, 0.0118929448699869, 0.0646300143287915, 0.0658444055780221, 
0.0365112498265521, 0.0290522222936732, 0.0361261162544974, 0.55126787604404, 
0.00787685460932497, 0.0125336454598935, 0.171603271017195, 0.318299749392132, 
0.0664370130236998, 0.0919497640181351, 0.211041532792102, 0.269396236145632, 
0.187866298055647, 0.059969004318524, 0.0559161287155419, 0.0885832743614622, 
0.451411471333919, 0.439612859187183, 0.0514195289243331, 0.0498694766579967, 
0.0919350781544515, 0.223510070474301, 0, 1, 0.0646708402796897, 
0.0825731393695558, 0.10537056250177, 0.0696122176816857, 0.0424418302212671, 
0.0478766288959678, 0.955958881662633, 0.0138346259942443, 0.0138346259942443, 
0.95933698140034, 0.95556392397955, 0.95556392397955, 0.0612316691720032, 
0.0327534828616965, 0.959216097627497, 0.0327534828616965, 0.959216097627497, 
0.958839379113644, 0.0494138799831281, 0.0702859969103062, 0.957760286279822, 
0.959990040058998, 0.0665164367326447, 0.953377974332167, 0.0852324991199898, 
0.0335958391478673, 0.0938256364517924, 0.0288861431449183, 0.0927286486173231, 
0.10569451898322, 0.0495326021541909, 0.0472476582675493, 0.17378604346306, 
0.403552152727797, 0.264561987793531, 0.122322841743767, 0.0776316684095162, 
0.205840395760264, 0.17260647446031, 0.105649001057342, 0.0647680128457088, 
0.0386101299858582, 0.0355308548901323, 0.179115185681647, 0.0360302297350318, 
0.0428351049609113, 0.0731618616948397, 0.098280569756635, 0.120526033566675, 
0.0522118355470956, 0.403101292203212, 0.0891701855562025, 0.0810865491545684, 
0.096919562507692, 0.122031932526703, 0.16186248758732, 0.168732393772878, 
0.0279987511755512, 0.0309210793450959, 0.341002639185584, 0.0546379469643276, 
0.0305462198188776, 0.031657147750161, 0.0364650920878082)), row.names = c(NA, 
-246L), class = "data.frame")

1 Ответ

2 голосов
/ 20 апреля 2019

Нам нужен map2 в конце, потому что мы применяем augment к соответствующим list элементам столбцов 'kmeans' и 'value'

library(tidyverse)
library(broom)
kclust2 <- results %>%
             as_tibble() %>% 
             select(-id_row) %>% 
             group_by(year_row) %>% 
             nest(.key = "value") %>%
             filter(map_int(value, nrow) > 4) %>% 
             mutate(kmeans = map(value, ~kmeans(.x[[1]], 
                  centers = 4, iter.max = 10, nstart = 1)),
                    tidied = map(kmeans, tidy),
                   glanced = map(kmeans, glance), 
                   augmented = map2(kmeans, value, augment))       
...