Я хочу индексировать значения из таблицы, которая состоит из нескольких обычных столбцов и нескольких списков столбцов. Базовая структура такова:
# A tibble: 1,200 x 13
city country month data model modelgam glance_lm rsq glance_gam aic_gam tidy_lm augment_lm res
<chr> <chr> <dbl> <list> <list> <list> <list> <dbl> <list> <dbl> <list> <list> <list>
1 Abidjan Côte D'Ivoire 1 <tibble [114 × 6]> <S3: lm> <S3: Gam> <tibble [1 × 11]> 0.233 <tibble [1 × 6]> 154. <tibble [2 × 5]> <tibble [114 × 9]> <dbl [114]>
2 Abidjan Côte D'Ivoire 2 <tibble [114 × 6]> <S3: lm> <S3: Gam> <tibble [1 × 11]> 0.342 <tibble [1 × 6]> 164. <tibble [2 × 5]> <tibble [114 × 9]> <dbl [114]>
3 Abidjan Côte D'Ivoire 3 <tibble [114 × 6]> <S3: lm> <S3: Gam> <tibble [1 × 11]> 0.274 <tibble [1 × 6]> 152. <tibble [2 × 5]> <tibble [114 × 9]> <dbl [114]>
4 Abidjan Côte D'Ivoire 4 <tibble [114 × 6]> <S3: lm> <S3: Gam> <tibble [1 × 11]> 0.376 <tibble [1 × 6]> 145. <tibble [2 × 5]> <tibble [114 × 9]> <dbl [114]>
5 Abidjan Côte D'Ivoire 5 <tibble [114 × 6]> <S3: lm> <S3: Gam> <tibble [1 × 11]> 0.451 <tibble [1 × 6]> 112. <tibble [2 × 5]> <tibble [114 × 9]> <dbl [114]>
6 Abidjan Côte D'Ivoire 6 <tibble [114 × 6]> <S3: lm> <S3: Gam> <tibble [1 × 11]> 0.419 <tibble [1 × 6]> 87.4 <tibble [2 × 5]> <tibble [114 × 9]> <dbl [114]>
7 Abidjan Côte D'Ivoire 7 <tibble [114 × 6]> <S3: lm> <S3: Gam> <tibble [1 × 11]> 0.249 <tibble [1 × 6]> 129. <tibble [2 × 5]> <tibble [114 × 9]> <dbl [114]>
8 Abidjan Côte D'Ivoire 8 <tibble [114 × 6]> <S3: lm> <S3: Gam> <tibble [1 × 11]> 0.228 <tibble [1 × 6]> 153. <tibble [2 × 5]> <tibble [114 × 9]> <dbl [114]>
9 Abidjan Côte D'Ivoire 9 <tibble [114 × 6]> <S3: lm> <S3: Gam> <tibble [1 × 11]> 0.230 <tibble [1 × 6]> 124. <tibble [2 × 5]> <tibble [113 × 9]> <dbl [113]>
10 Abidjan Côte D'Ivoire 10 <tibble [113 × 6]> <S3: lm> <S3: Gam> <tibble [1 × 11]> 0.292 <tibble [1 × 6]> 98.7 <tibble [2 × 5]> <tibble [113 × 9]> <dbl [113]>
# ... with 1,190 more rows
Если я хочу вытащить значение p.value из glance_lm для первой записи (например), я могу использовать двойные скобки (поскольку это список), например:
> cmodels_details$glance_lm[[1]]$p.value
[1] 5.430896e-08
Однако, если мне нужны все значения p.value, это не будет работать:
> cmodels_details$glance_lm[[]]$p.value
NULL
В моем случае я специально хочу вывести оценки для двух моделей из их списка и построить линии регрессии на рисунке для каждой записи. Таким образом, возможность захвата перехватов и уклонов (по одной записи за раз), например:
cmodels_details$tidy_lm[[1]]$estimate
[1] 26.379383982 0.007833912
или просто перехват:
> cmodels_details$tidy_lm[[1]]$estimate[1]
[1] 26.37938
не очень помогает. В конечном счете, я хотел бы добавить некоторый код ggplot в структуру данных, которая отображает фигуры для каждой записи, но для того, чтобы сделать линии регрессии для двух моделей (lm и gam), мне нужно получить оценки из таблицы tibble каждой записи. (или список тиблов)
Вот фрагмент данных, которые я использую, и код, чтобы получить правильную форму.
> dput(small_tb)
structure(list(dt = structure(c(14245, 14276, 14304, 14335, 14365,
14396, 14426, 14457, 14488, 14518, 14549, 14579, 14610, 14641,
14669, 14700, 14730, 14761, 14791, 14822, 14853, 14883, 14914,
14944, 14975, 15006, 15034, 15065, 15095, 15126, 15156, 15187,
15218, 15248, 15279, 15309, 15340, 15371, 15400, 15431, 15461,
15492, 15522, 15553, 15584, 15614, 15645, 15675, 15706, 15737,
15765, 15796, 15826, 15857, 15887, 15918, 15949, 14245, 14276,
14304, 14335, 14365, 14396, 14426, 14457, 14488, 14518, 14549,
14579, 14610, 14641, 14669, 14700, 14730, 14761, 14791, 14822,
14853, 14883, 14914, 14944, 14975, 15006, 15034, 15065, 15095,
15126, 15156, 15187, 15218, 15248, 15279, 15309, 15340, 15371,
15400, 15431, 15461, 15492, 15522, 15553, 15584, 15614, 15645,
15675, 15706, 15737, 15765, 15796, 15826, 15857, 15887, 15918,
15949), class = "Date"), averagetemperature = c(-4.299, 1.454,
4.808, 7.623, 12.627, 17.305, 19.792, 21.724, 19.502, 11.22,
10.261, 1.563, -0.595, 0.771, 6.489, 10.935, 13.803, 19.055,
24.106, 24.948, 19.229, 14.582, 8.539, -0.071, -1.582, 0.276,
3.474, 7.383, 12.133, 18.011, 24.412, 23.414, 18.331, 13.837,
9.555, 5.327, 2.67, 3.698, 12.145, 8.383, 14.956, 19.532, 25.909,
22.778, 18.693, 12.229, 7.27, 5.592, 1.056, -0.509, 1.323, 6.644,
13.734, 17.913, 21.914, 22.23, 19.977, -5.36, -0.372, 3.579,
10.478, 15.447, 19.058, 21.103, 22.769, 17.043, 10.364, 8.217,
-0.624, -2.359, -1.456, 6.715, 12.076, 17.119, 21.943, 24.789,
22.67, 19.172, 11.911, 5.876, -2.165, -4.463, -1.244, 3.474,
10.555, 16.917, 21.032, 24.564, 22.13, 19.301, 12.001, 8.013,
2.987, -0.0410000000000004, 2.185, 8.734, 10.324, 17.779, 20.165,
24.479, 22.731, 18.177, 12.436, 4.103, 2.586, -0.968, -1.365,
2.518, 9.723, 15.544, 20.892, 24.722, 21.001, 17.408), averagetemperatureuncertainty = c(0.336,
0.328, 0.247, 0.348, 0.396, 0.554, 0.481, 0.315, 0.225, 0.162,
0.372, 0.348, 0.348, 0.364, 0.357, 0.538, 0.892, 0.33, 0.325,
0.36, 0.322, 0.241, 0.307, 0.326, 0.522, 0.446, 0.279, 0.265,
0.733, 0.773, 0.255, 0.404, 0.173, 0.154, 0.334, 0.483, 0.727,
0.567, 0.369, 0.347, 0.835, 0.519, 0.516, 0.42, 0.329, 0.333,
0.263, 0.537, 0.528, 0.473, 0.275, 0.462, 0.863, 0.669, 0.322,
0.373, 1.033, 0.288, 0.214, 0.14, 0.259, 0.267, 0.452, 0.348,
0.277, 0.22, 0.153, 0.181, 0.228, 0.314, 0.319, 0.235, 0.135,
0.2, 0.387, 0.28, 0.257, 0.165, 0.154, 0.174, 0.436, 0.355, 0.33,
0.167, 0.222, 0.312, 0.42, 0.438, 0.163, 0.16, 0.23, 0.298, 0.466,
0.493, 0.253, 0.276, 0.258, 0.301, 0.39, 0.403, 0.224, 0.269,
0.344, 0.298, 0.257, 0.29, 0.241, 0.255, 0.355, 0.281, 0.273,
0.279, 0.323, 1.048), city = c("Chicago", "Chicago", "Chicago",
"Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",
"Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",
"Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",
"Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",
"Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",
"Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",
"Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",
"Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",
"Chicago", "Chicago", "Chicago", "Chicago", "Chicago", "Chicago",
"New York", "New York", "New York", "New York", "New York", "New York",
"New York", "New York", "New York", "New York", "New York", "New York",
"New York", "New York", "New York", "New York", "New York", "New York",
"New York", "New York", "New York", "New York", "New York", "New York",
"New York", "New York", "New York", "New York", "New York", "New York",
"New York", "New York", "New York", "New York", "New York", "New York",
"New York", "New York", "New York", "New York", "New York", "New York",
"New York", "New York", "New York", "New York", "New York", "New York",
"New York", "New York", "New York", "New York", "New York", "New York",
"New York", "New York", "New York"), country = c("United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States", "United States", "United States", "United States",
"United States"), latitude = c("42.59N", "42.59N", "42.59N",
"42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N",
"42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N",
"42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N",
"42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N",
"42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N",
"42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N",
"42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "42.59N",
"42.59N", "42.59N", "42.59N", "42.59N", "42.59N", "40.99N", "40.99N",
"40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N",
"40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N",
"40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N",
"40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N",
"40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N",
"40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N",
"40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N",
"40.99N", "40.99N", "40.99N", "40.99N", "40.99N", "40.99N"),
longitude = c("87.27W", "87.27W", "87.27W", "87.27W", "87.27W",
"87.27W", "87.27W", "87.27W", "87.27W", "87.27W", "87.27W",
"87.27W", "87.27W", "87.27W", "87.27W", "87.27W", "87.27W",
"87.27W", "87.27W", "87.27W", "87.27W", "87.27W", "87.27W",
"87.27W", "87.27W", "87.27W", "87.27W", "87.27W", "87.27W",
"87.27W", "87.27W", "87.27W", "87.27W", "87.27W", "87.27W",
"87.27W", "87.27W", "87.27W", "87.27W", "87.27W", "87.27W",
"87.27W", "87.27W", "87.27W", "87.27W", "87.27W", "87.27W",
"87.27W", "87.27W", "87.27W", "87.27W", "87.27W", "87.27W",
"87.27W", "87.27W", "87.27W", "87.27W", "74.56W", "74.56W",
"74.56W", "74.56W", "74.56W", "74.56W", "74.56W", "74.56W",
"74.56W", "74.56W", "74.56W", "74.56W", "74.56W", "74.56W",
"74.56W", "74.56W", "74.56W", "74.56W", "74.56W", "74.56W",
"74.56W", "74.56W", "74.56W", "74.56W", "74.56W", "74.56W",
"74.56W", "74.56W", "74.56W", "74.56W", "74.56W", "74.56W",
"74.56W", "74.56W", "74.56W", "74.56W", "74.56W", "74.56W",
"74.56W", "74.56W", "74.56W", "74.56W", "74.56W", "74.56W",
"74.56W", "74.56W", "74.56W", "74.56W", "74.56W", "74.56W",
"74.56W", "74.56W", "74.56W", "74.56W", "74.56W", "74.56W",
"74.56W")), row.names = c(NA, -114L), class = c("tbl_df",
"tbl", "data.frame"), spec = structure(list(cols = list(dt = structure(list(
format = ""), class = c("collector_date", "collector")),
AverageTemperature = structure(list(), class = c("collector_double",
"collector")), AverageTemperatureUncertainty = structure(list(), class = c("collector_double",
"collector")), City = structure(list(), class = c("collector_character",
"collector")), Country = structure(list(), class = c("collector_character",
"collector")), Latitude = structure(list(), class = c("collector_character",
"collector")), Longitude = structure(list(), class = c("collector_character",
"collector"))), default = structure(list(), class = c("collector_guess",
"collector"))), class = "col_spec"))
И некоторый код:
# load libraries
library(tidyverse)
library(lubridate)
library(gam)
# library(broom)
# library(purrr)
# Load the data from dput() and take a look
summary(gltd)
str(gltd)
names(gltd)
# make lowercasae
gltd <- rename_all(gltd, tolower)
names(gltd)
# nest data, 100 major cities
by_city_month <- gltd %>%
filter(year(dt) >= 1900) %>%
mutate(month = month(dt)) %>%
mutate(yr1900 = year(dt) - 1900) %>%
group_by(city, country, month) %>%
nest()
by_city_month
# define function for linear model
city_model_lm <- function(df) {
lm(averagetemperature ~ yr1900, data = df)
}
# define function for GAM
city_model_gam <- function(df) {
gam(averagetemperature ~ s(yr1900), data = df)
}
# create columns for the models
cmodels <- by_city_month %>%
mutate(model = map(data, city_model_lm),
modelgam = map(data, city_model_gam)
)
cmodels_details <- cmodels %>%
mutate(
glance_lm = model %>% map(glance), #model summary: rsquared...
rsq = glance_lm %>% map_dbl("r.squared"), #extract rsquared
glance_gam = modelgam %>% map(broom::glance), #GAM model summary
aic_gam = glance_gam %>% map_dbl("AIC"), #extract AIC
tidy_lm = model %>% map(tidy), #model estimate: coeff...
augment_lm = model %>% map(augment), #observation stats: resid,hat...
res = augment_lm %>% map(".resid") #extract resid
)
Любой совет будет наиболее ценным.