Я пытаюсь преобразовать переменную с помощью экспоненциальной функции, однако получаю значения 1
.
dat %>%
mutate(estimate_exp = exp(estimate))
, что дает:
# A tibble: 30 x 3
# Groups: .id [30]
.id estimate estimate_exp
<chr> <dbl> <dbl>
1 COP 0.000363 1.00
2 D 0.000128 1.00
3 MKC 0.000661 1.00
4 XRAY -0.000220 1.00
5 AZO 0.000646 1.00
Куда я иду неправильно здесь?
Данные:
dat <- structure(list(.id = c("COP", "D", "MKC", "XRAY", "AZO", "VZ",
"BAX", "SIVB", "PEP", "FBHS", "AMCR", "IQV", "CHD", "AAP", "DVA",
"EA", "AIZ", "T", "BR", "SRE", "AWK", "HON", "WRB", "LYV", "CL",
"MAR", "AVGO", "BLK", "ES", "MMC"), estimate = c(0.000363241058456668,
0.000128341996870166, 0.000660854807743273, -0.000220256089065116,
0.000645752203131285, 0.000374454782366187, 0.000476331305980961,
0.000234315048086034, 0.000267789333706011, -0.000168431145336845,
-0.0000265813880423463, 0.000666131639082966, 0.000548256342531457,
0.000323886569880824, -0.000118885876771656, -0.0000806822483228907,
0.000251736455873582, -0.0000123071469196856, 0.000654303222596272,
0.00034295710655616, 0.000529722246219021, 0.000402357094383043,
0.000483332246532965, 0.000831519092078686, 0.00000597120397089236,
0.000349946739694226, 0.00035884271760866, 0.000148917003006365,
0.00041657220576363, 0.000380141826292226)), row.names = c(NA,
-30L), groups = structure(list(.id = c("AAP", "AIZ", "AMCR",
"AVGO", "AWK", "AZO", "BAX", "BLK", "BR", "CHD", "CL", "COP",
"D", "DVA", "EA", "ES", "FBHS", "HON", "IQV", "LYV", "MAR", "MKC",
"MMC", "PEP", "SIVB", "SRE", "T", "VZ", "WRB", "XRAY"), .rows = structure(list(
14L, 17L, 11L, 27L, 21L, 5L, 7L, 28L, 19L, 13L, 25L, 1L,
2L, 15L, 16L, 29L, 10L, 22L, 12L, 24L, 26L, 3L, 30L, 9L,
8L, 20L, 18L, 6L, 23L, 4L), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr"))), row.names = c(NA, -30L), .drop = FALSE, class = c("tbl_df",
"tbl", "data.frame")), class = c("grouped_df", "tbl_df", "tbl",
"data.frame"))
Информация о сеансе:
R version 3.6.1 (2019-07-05)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 18.04.3 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
locale:
[1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
[4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
[7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
[10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] grid stats graphics grDevices utils datasets methods base
other attached packages:
[1] rvest_0.3.5 xml2_1.2.2 tibbletime_0.1.3
[4] tsfeatures_1.0.1 gsubfn_0.7 proto_1.0.0
[7] ggmap_3.0.0 ggrepel_0.8.1 directlabels_2018.05.22
[10] patchwork_1.0.0 cowplot_1.0.0 gtable_0.3.0
[13] stargazer_5.2.2 e1071_1.7-3 lightgbm_2.3.2
[16] R6_2.4.1 tensorflow_2.0.0 keras_2.2.5.0
[19] drlib_0.1.1 PRROC_1.3.1 pROC_1.16.1
[22] caret_6.0-85 lattice_0.20-38 Matrix_1.2-17
[25] xgboostExplainer_0.1 broom_0.5.3 imputeTS_3.0
[28] data.table_1.12.8 viridis_0.5.1 viridisLite_0.3.0
[31] CVXR_0.99-7 timetk_0.1.2 forcats_0.4.0
[34] readr_1.3.1 tibble_2.99.99.9014 tidyverse_1.3.0
[37] tsibble_0.8.5 rsample_0.0.5.9000 kableExtra_1.1.0
[40] knitr_1.27 xgboost_0.90.0.2 reticulate_1.14-9001
[43] formatR_1.7 tidyquant_0.5.9 quantmod_0.4-15
[46] TTR_0.23-6 PerformanceAnalytics_1.5.3 xts_0.12-0
[49] zoo_1.8-7 ggplot2_3.3.0.9000 purrr_0.3.3
[52] lubridate_1.7.4 tidyr_1.0.2 future.apply_1.4.0
[55] furrr_0.1.0 future_1.16.0 stringr_1.4.0
[58] stringi_1.4.5 dplyr_0.8.99.9000
loaded via a namespace (and not attached):
[1] readxl_1.3.1 backports_1.1.5 scs_1.3-2
[4] selectr_0.4-2 plyr_1.8.5 splines_3.6.1
[7] gmp_0.5-13.6 listenv_0.8.0 tfruns_1.4
[10] digest_0.6.23 foreach_1.4.7 htmltools_0.4.0
[13] fansi_0.4.1 magrittr_1.5 recipes_0.1.9
[16] globals_0.12.5 modelr_0.1.5 gower_0.2.1
[19] R.utils_2.9.2 anytime_0.3.7 forecast_8.10
[22] tseries_0.10-47 jpeg_0.1-8.1 waterfalls_0.1.2
[25] colorspace_1.4-1 lobstr_1.1.1 haven_2.2.0
[28] xfun_0.12 tcltk_3.6.1 crayon_1.3.4
[31] jsonlite_1.6 zeallot_0.1.0 survival_2.44-1.1
[34] iterators_1.0.12 glue_1.3.1 ipred_0.9-9
[37] webshot_0.5.2 Quandl_2.10.0 ECOSolveR_0.5.3
[40] Rmpfr_0.8-1 scales_1.1.0 stinepack_1.4
[43] DBI_1.1.0 Rcpp_1.0.3 bit_1.1-15.1
[46] stats4_3.6.1 lava_1.6.6 prodlim_2019.11.13
[49] httr_1.4.1 ellipsis_0.3.0 farver_2.0.3
[52] pkgconfig_2.0.3 R.methodsS3_1.7.1 nnet_7.3-12
[55] dbplyr_1.4.2 utf8_1.1.4 labeling_0.3
[58] tidyselect_0.2.99.9000 rlang_0.4.4.9000 reshape2_1.4.3
[61] munsell_0.5.0 cellranger_1.1.0 tools_3.6.1
[64] cli_2.0.1 generics_0.0.2 evaluate_0.14
[67] ModelMetrics_1.2.2.1 bit64_0.9-7 fs_1.3.1
[70] RgoogleMaps_1.4.5.2 nlme_3.1-142 whisker_0.4
[73] R.oo_1.23.0 compiler_3.6.1 rstudioapi_0.10
[76] png_0.1-7 curl_4.3 testthat_2.3.1
[79] reprex_0.3.0 desc_1.2.0 urca_1.3-0
[82] vctrs_0.2.99.9005 pillar_1.4.3.9000 lifecycle_0.1.0
[85] lmtest_0.9-37 bitops_1.0-6 gridExtra_2.3
[88] codetools_0.2-16 pkgload_1.0.2 MASS_7.3-51.4
[91] assertthat_0.2.1 rprojroot_1.3-2 rjson_0.2.20
[94] withr_2.1.2 fracdiff_1.5-1 parallel_3.6.1
[97] hms_0.5.3 quadprog_1.5-8 rpart_4.1-15
[100] timeDate_3043.102 class_7.3-15 rmarkdown_2.1
[103] base64enc_0.1-3
РЕДАКТИРОВАТЬ:
Я перезапустил мой сеанс R, и теперь я получаю следующее вывод: Когда я запускаю:
models %>%
unnest(tidymodels) %>%
mutate(estimate_exp = exp(estimate)) %>%
select(estimate_exp)
# A tibble: 60 x 2
# Groups: .id [30]
.id estimate_exp
<chr> <dbl>
1 LEN 40.1
2 LEN 1.00
3 USB 8.47
4 USB 1.00
5 KMI 2.64
6 KMI 1.00
7 CSX 0.00592
8 CSX 1.00
9 AMT 0.000845
10 AMT 1.00
Однако, когда я хочу отфильтровать перехват. Я бегу:
models %>%
unnest(tidymodels) %>%
filter(term == "date") %>%
mutate(estimate_exp = exp(estimate)) %>%
select(estimate_exp)
, который возвращает:
# A tibble: 30 x 2
# Groups: .id [30]
.id estimate_exp
<chr> <dbl>
1 LEN 1.00
2 USB 1.00
3 KMI 1.00
4 CSX 1.00
5 AMT 1.00
6 VRSK 1.00
7 CTSH 1.00
РЕДАКТИРОВАТЬ: Новые данные:
d <- structure(list(date = structure(c(17445, 17410, 17707, 18108,
17273, 17308, 17434, 18179, 17926, 18138, 18169, 17486, 17924,
17892, 17779, 17707, 17778, 17819, 17912, 18018, 18030, 17966,
17451, 18052, 17616, 17779, 17891, 17854, 17290, 17931, 17585,
18047, 17732, 17360, 18191, 18002, 17260, 17616, 18040, 17954,
17556, 17260, 17781, 17197, 17470, 17367, 18144, 17738, 18130,
17904, 17548, 18033, 17352, 17955, 18038, 17918, 17553, 18218,
18117, 17683, 18179, 17722, 17667, 17577, 17220, 17301, 17924,
17883, 17513, 17263, 17170, 17848, 17392, 17445, 17696, 17183,
17840, 18061, 17632, 17604, 17563, 18173, 17354, 17560, 17707,
17506, 18240, 18092, 17284, 17185, 17389, 18030, 18144, 17449,
17262, 17357, 17175, 17371, 17241, 17528, 18094, 17801, 18211,
17836, 17213, 17802, 17347, 17409, 17388, 17961, 17662, 17430,
17948, 18257, 17364, 17521, 17534, 17632, 17633, 17213, 17941,
17850, 17564, 17381, 18239, 17408, 17682, 17205, 17618, 17330,
18248, 17709, 17394, 17448, 17471, 17256, 18029, 17388, 17605,
17606, 18236, 17721, 17766, 18246, 17646, 17645, 17584, 17443,
17204, 18075), class = "Date"), .id = c("AES", "AES", "AES",
"AES", "AES", "AMT", "AMT", "AMT", "AMT", "AMT", "ANET", "ANET",
"ANET", "ANET", "ANET", "BSX", "BSX", "BSX", "BSX", "BSX", "CAH",
"CAH", "CAH", "CAH", "CAH", "CSX", "CSX", "CSX", "CSX", "CSX",
"CTSH", "CTSH", "CTSH", "CTSH", "CTSH", "DRE", "DRE", "DRE",
"DRE", "DRE", "FLS", "FLS", "FLS", "FLS", "FLS", "FMC", "FMC",
"FMC", "FMC", "FMC", "GL", "GL", "GL", "GL", "GL", "IDXX", "IDXX",
"IDXX", "IDXX", "IDXX", "IRM", "IRM", "IRM", "IRM", "IRM", "KHC",
"KHC", "KHC", "KHC", "KHC", "KMI", "KMI", "KMI", "KMI", "KMI",
"LEN", "LEN", "LEN", "LEN", "LEN", "LH", "LH", "LH", "LH", "LH",
"NI", "NI", "NI", "NI", "NI", "PG", "PG", "PG", "PG", "PG", "PKI",
"PKI", "PKI", "PKI", "PKI", "PNR", "PNR", "PNR", "PNR", "PNR",
"STE", "STE", "STE", "STE", "STE", "TGT", "TGT", "TGT", "TGT",
"TGT", "TSN", "TSN", "TSN", "TSN", "TSN", "TXT", "TXT", "TXT",
"TXT", "TXT", "USB", "USB", "USB", "USB", "USB", "V", "V", "V",
"V", "V", "VAR", "VAR", "VAR", "VAR", "VAR", "VRSK", "VRSK",
"VRSK", "VRSK", "VRSK", "WRB", "WRB", "WRB", "WRB", "WRB"), logPrice = c(2.31262690481122,
2.30635169045117, 2.49697165732692, 2.80575823103745, 2.31252086928585,
4.80346125693376, 4.87348396310791, 5.41731548591334, 5.12092195236532,
5.43015970101662, 5.4761287594931, 5.44574551787296, 5.37212532620251,
5.32218037657676, 5.60686770785711, 3.46947885794531, 3.58490683599171,
3.59539261102047, 3.60059451193888, 3.60794048450097, 3.80075118572202,
3.84870347435354, 4.10304216153996, 3.73809095807872, 4.05218391168489,
4.2915093418645, 4.10903510912515, 4.26117499880098, 3.90724989222571,
4.1973687457803, 4.38670773620216, 4.11642557242379, 4.38846510069663,
4.18500121904317, 4.0863504610367, 3.37128269943617, 3.17006958256952,
3.18009021384426, 3.40477500765288, 3.36524445148554, 3.77389751034243,
3.8211151013729, 3.93903725264536, 3.84438609991795, 3.74785342022987,
4.1492065728268, 4.46246432202498, 4.31288943580795, 4.4460987306459,
4.20926604086005, 4.50684788830804, 4.46495030377449, 4.32240935765901,
4.40604986474117, 4.47700696462918, 5.30484599857581, 5.19822073343059,
5.56271801387212, 5.63070980997644, 5.37231105109975, 3.45598934024689,
3.48843809068652, 3.36004231084482, 3.37536460349664, 3.40482080911292,
4.37518653191793, 3.79395975051401, 3.77783910282242, 4.27644505291608,
4.38906386518127, 2.95111736585753, 2.78391712539222, 2.8366591650958,
2.8361334880473, 2.74697785849833, 3.76518525748053, 3.7879280690716,
3.97721446335609, 4.05622744787957, 4.08519037353031, 5.15352274119173,
5.11889157697021, 5.02025551766473, 5.1824013425652, 5.23383215065638,
3.22833126100395, 3.28876491862797, 3.36052072380224, 3.10764723681244,
3.01049166587651, 4.43882718935099, 4.64091289635708, 4.7979409838123,
4.44188794171029, 4.40192526226385, 4.21688739280126, 3.96749580052182,
4.23110131236768, 4.01569559534048, 4.28884091285977, 3.61520549161055,
3.74522408275384, 3.76012095221071, 3.69438149011566, 3.60258850846041,
4.72667073866304, 4.37116125716819, 4.44195754534013, 4.40896534484866,
4.78073885158278, 4.1903947560273, 3.98590899008732, 4.24887435589091,
4.85576407363082, 3.89256022837633, 4.3526464453551, 4.33461888702626,
4.21898405338856, 4.21089132289427, 4.11246263419644, 3.98734563102018,
4.04207282440258, 4.07526104130694, 3.88859502085857, 3.79300948803953,
3.87556692475429, 3.86121658197339, 3.88966687912656, 3.8628154921807,
3.89593807372985, 5.21836789911083, 4.86421073302128, 4.62157623772135,
4.65666772951537, 4.69390605684972, 4.51228702558803, 4.86321746876136,
4.57109622634756, 4.83134910191856, 4.82703288536157, 4.98750307545778,
4.70767708896336, 4.75852663335337, 5.00856650228809, 4.66229597874042,
3.88036423339266, 3.79985582131739, 3.77971436208379, 3.77679041802056,
4.18548608036901)), groups = structure(list(.id = c("AES", "AMT",
"ANET", "BSX", "CAH", "CSX", "CTSH", "DRE", "FLS", "FMC", "GL",
"IDXX", "IRM", "KHC", "KMI", "LEN", "LH", "NI", "PG", "PKI",
"PNR", "STE", "TGT", "TSN", "TXT", "USB", "V", "VAR", "VRSK",
"WRB"), .rows = structure(list(1:5, 6:10, 11:15, 16:20, 21:25,
26:30, 31:35, 36:40, 41:45, 46:50, 51:55, 56:60, 61:65, 66:70,
71:75, 76:80, 81:85, 86:90, 91:95, 96:100, 101:105, 106:110,
111:115, 116:120, 121:125, 126:130, 131:135, 136:140, 141:145,
146:150), ptype = integer(0), class = c("vctrs_list_of",
"vctrs_vctr"))), row.names = c(NA, -30L), class = c("tbl_df",
"tbl", "data.frame"), .drop = TRUE), row.names = c(NA, -150L), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
Теперь я бегу:
models2 <- d %>%
group_by(.id) %>%
nest() %>%
mutate(models = map(data, ~lm(logPrice ~ date, data = .x))) %>%
mutate(
tidymodels = map(models, ~tidy(.x)),
glancemodels = map(models, ~glance(.x)),
augmentmodels = map(models, ~augment(.x))
)
Затем я запускаю:
models2 %>%
unnest(tidymodels) %>%
filter(term == "date") %>%
mutate(estimate_exp = exp(estimate)) %>%
select(estimate_exp)
Что дает мне вывод:
Adding missing grouping variables: `.id`
# A tibble: 30 x 2
# Groups: .id [30]
.id estimate_exp
<chr> <dbl>
1 AES 1.00
2 AMT 1.00
3 ANET 1.00
4 BSX 1.00
5 CAH 0.999
6 CSX 1.00
Так что это проблема округления, я думаю.
РЕДАКТИРОВАТЬ:
Взяв данные d
:
Я получаю следующий вывод:
> models2 <- d %>%
+ group_by(.id) %>%
+ nest() %>%
+ mutate(models = map(data, ~lm(logPrice ~ date, data = .x))) %>%
+ mutate(
+ tidymodels = map(models, ~tidy(.x)),
+ glancemodels = map(models, ~glance(.x)),
+ augmentmodels = map(models, ~augment(.x))
+ )
>
> modl1 <- models2 %>%
+ unnest(tidymodels) %>%
+ filter(term == "date")
>
> exp(modl1$estimate)
[1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
>
РЕДАКТИРОВАТЬ: с сеансом refre sh R Я запускаю следующее:
library(dplyr)
library(broom)
library(purrr)
models2 <- d %>%
group_by(.id) %>%
nest() %>%
mutate(models = map(data, ~lm(logPrice ~ date, data = .x))) %>%
mutate(
tidymodels = map(models, ~tidy(.x)),
glancemodels = map(models, ~glance(.x)),
augmentmodels = map(models, ~augment(.x))
)
mod1 <- models2 %>% unnest(tidymodels) %>% filter(term == "date")
exp(mod1$estimate)
Что дает:
[1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Информация о сеансе:
R version 3.6.1 (2019-07-05)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 18.04.3 LTS
Matrix products: default
BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1
locale:
[1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
[4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
[7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
[10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] grid stats graphics grDevices utils datasets methods base
other attached packages:
[1] gsubfn_0.7 proto_1.0.0 ggmap_3.0.0
[4] ggrepel_0.8.1 directlabels_2018.05.22 patchwork_1.0.0
[7] cowplot_1.0.0 gtable_0.3.0 stargazer_5.2.2
[10] e1071_1.7-3 lightgbm_2.3.2 R6_2.4.1
[13] tensorflow_2.0.0 keras_2.2.5.0 drlib_0.1.1
[16] PRROC_1.3.1 pROC_1.16.1 caret_6.0-85
[19] lattice_0.20-38 Matrix_1.2-17 xgboostExplainer_0.1
[22] broom_0.5.3 imputeTS_3.0 data.table_1.12.8
[25] viridis_0.5.1 viridisLite_0.3.0 CVXR_0.99-7
[28] timetk_0.1.2 forcats_0.4.0 readr_1.3.1
[31] tibble_2.99.99.9014 tidyverse_1.3.0 tsibble_0.8.5
[34] rsample_0.0.5.9000 kableExtra_1.1.0 knitr_1.27
[37] xgboost_0.90.0.2 reticulate_1.14-9001 formatR_1.7
[40] tidyquant_0.5.9 quantmod_0.4-15 TTR_0.23-6
[43] PerformanceAnalytics_1.5.3 xts_0.12-0 zoo_1.8-7
[46] ggplot2_3.3.0.9000 purrr_0.3.3 lubridate_1.7.4
[49] tidyr_1.0.2 future.apply_1.4.0 furrr_0.1.0
[52] future_1.16.0 stringr_1.4.0 stringi_1.4.5
[55] dplyr_0.8.99.9000
loaded via a namespace (and not attached):
[1] readxl_1.3.1 backports_1.1.5 scs_1.3-2
[4] plyr_1.8.5 splines_3.6.1 gmp_0.5-13.6
[7] listenv_0.8.0 tfruns_1.4 digest_0.6.23
[10] foreach_1.4.7 htmltools_0.4.0 fansi_0.4.1
[13] magrittr_1.5 recipes_0.1.9 globals_0.12.5
[16] modelr_0.1.5 gower_0.2.1 R.utils_2.9.2
[19] anytime_0.3.7 forecast_8.10 tseries_0.10-47
[22] jpeg_0.1-8.1 waterfalls_0.1.2 colorspace_1.4-1
[25] rvest_0.3.5 lobstr_1.1.1 haven_2.2.0
[28] xfun_0.12 tcltk_3.6.1 crayon_1.3.4
[31] jsonlite_1.6 zeallot_0.1.0 survival_2.44-1.1
[34] iterators_1.0.12 glue_1.3.1 ipred_0.9-9
[37] webshot_0.5.2 Quandl_2.10.0 ECOSolveR_0.5.3
[40] Rmpfr_0.8-1 scales_1.1.0 stinepack_1.4
[43] DBI_1.1.0 Rcpp_1.0.3 bit_1.1-15.1
[46] stats4_3.6.1 lava_1.6.6 prodlim_2019.11.13
[49] httr_1.4.1 ellipsis_0.3.0 pkgconfig_2.0.3
[52] R.methodsS3_1.7.1 nnet_7.3-12 dbplyr_1.4.2
[55] utf8_1.1.4 tidyselect_0.2.99.9000 rlang_0.4.4.9000
[58] reshape2_1.4.3 munsell_0.5.0 cellranger_1.1.0
[61] tools_3.6.1 cli_2.0.1 generics_0.0.2
[64] evaluate_0.14 ModelMetrics_1.2.2.1 bit64_0.9-7
[67] fs_1.3.1 RgoogleMaps_1.4.5.2 nlme_3.1-142
[70] whisker_0.4 R.oo_1.23.0 xml2_1.2.2
[73] compiler_3.6.1 rstudioapi_0.10 png_0.1-7
[76] curl_4.3 reprex_0.3.0 urca_1.3-0
[79] vctrs_0.2.99.9005 pillar_1.4.3.9000 lifecycle_0.1.0
[82] lmtest_0.9-37 bitops_1.0-6 gridExtra_2.3
[85] codetools_0.2-16 MASS_7.3-51.4 assertthat_0.2.1
[88] rjson_0.2.20 withr_2.1.2 fracdiff_1.5-1
[91] parallel_3.6.1 hms_0.5.3 quadprog_1.5-8
[94] rpart_4.1-15 timeDate_3043.102 class_7.3-15
[97] rmarkdown_2.1 base64enc_0.1-3