Довольно базовая c проблема, в которой я даже не уверен, что это действительно проблема. У меня есть фрейм данных monthly_summary
:
structure(list(year = c(2015L, 2015L, 2013L, 2014L, 2016L, 2016L,
2016L, 2016L, 2014L, 2014L, 2014L, 2015L, 2015L, 2015L, 2014L,
2013L, 2014L, 2014L, 2015L, 2016L, 2014L, 2014L, 2015L, 2013L,
2014L, 2014L, 2015L, 2015L, 2015L, 2016L, 2014L, 2014L, 2015L,
2013L, 2013L, 2014L, 2014L, 2014L, 2014L, 2015L, 2015L, 2015L,
2015L, 2016L, 2016L, 2016L, 2016L, 2017L, 2017L, 2015L, 2015L,
2013L, 2013L, 2014L, 2014L, 2014L, 2015L, 2016L, 2016L, 2016L,
2017L, 2014L, 2014L, 2015L, 2014L, 2014L, 2015L, 2015L, 2015L,
2015L, 2016L, 2016L, 2016L, 2017L, 2017L, 2014L, 2014L, 2015L,
2013L, 2014L, 2014L, 2015L, 2015L, 2016L, 2017L, 2015L, 2015L,
2016L, 2017L), month = c(4L, 5L, 6L, 10L, 2L, 4L, 5L, 10L, 9L,
3L, 9L, 3L, 6L, 11L, 9L, 9L, 3L, 10L, 3L, 6L, 3L, 9L, 2L, 11L,
2L, 4L, 3L, 5L, 10L, 5L, 3L, 9L, 3L, 9L, 11L, 2L, 4L, 7L, 10L,
2L, 4L, 8L, 10L, 2L, 3L, 6L, 9L, 3L, 6L, 3L, 4L, 9L, 11L, 3L,
6L, 10L, 2L, 2L, 4L, 8L, 1L, 3L, 6L, 7L, 2L, 7L, 2L, 3L, 4L,
5L, 3L, 4L, 5L, 3L, 10L, 2L, 7L, 3L, 10L, 3L, 5L, 2L, 12L, 2L,
6L, 3L, 7L, 3L, 9L), EstimatedAge = c(-1, -1, -1, -1, -1, -1,
-1, 2, 3, 4, 4, 5, 5, 6, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8
), Species = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L,
2L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L,
3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L
), .Label = c("Catfish", "Largemouth Bass", "Striped Bass"), class = "factor"),
at_large = c(8L, 13L, 6L, 3L, 14L, 16L, 13L, 2L, 7L, 22L,
24L, 3L, 2L, 1L, 27L, 10L, 16L, 24L, 31L, 24L, 39L, 28L,
48L, 14L, 26L, 27L, 54L, 30L, 29L, 40L, 36L, 29L, 37L, 8L,
14L, 21L, 27L, 27L, 28L, 31L, 26L, 24L, 28L, 29L, 33L, 30L,
23L, 28L, 16L, 32L, 20L, 9L, 9L, 22L, 17L, 17L, 21L, 20L,
14L, 8L, 13L, 11L, 10L, 12L, 15L, 8L, 10L, 15L, 10L, 10L,
7L, 6L, 5L, 10L, 2L, 20L, 20L, 33L, 7L, 21L, 14L, 25L, 21L,
24L, 10L, 2L, 4L, 5L, 1L), Avg_Flow = c(559.1, 566.290322580645,
2047.8, 403.354838709677, 2152.03448275862, 764.6, 956.935483870968,
2887.16129032258, 2021.53333333333, 2055.93548387097, 2021.53333333333,
1228.58064516129, 398.766666666667, 649.8, 2021.53333333333,
3378.16666666667, 2055.93548387097, 403.354838709677, 1228.58064516129,
3291.26666666667, 2055.93548387097, 2021.53333333333, 3950.92857142857,
1930.86666666667, 1284.92857142857, 631.766666666667, 1228.58064516129,
566.290322580645, 301.064516129032, 956.935483870968, 2055.93548387097,
2021.53333333333, 1228.58064516129, 3378.16666666667, 1930.86666666667,
1284.92857142857, 631.766666666667, 1201.45161290323, 403.354838709677,
3950.92857142857, 559.1, 725.967741935484, 301.064516129032,
2152.03448275862, 2643.22580645161, 3291.26666666667, 5504.83333333333,
927.161290322581, 6664.56666666667, 1228.58064516129, 559.1,
3378.16666666667, 1930.86666666667, 2055.93548387097, 631.6,
403.354838709677, 3950.92857142857, 2152.03448275862, 764.6,
6970.35483870968, 7787.61290322581, 2055.93548387097, 631.6,
282.870967741935, 1284.92857142857, 1201.45161290323, 3950.92857142857,
1228.58064516129, 559.1, 566.290322580645, 2643.22580645161,
764.6, 956.935483870968, 927.161290322581, 1395.51612903226,
1284.92857142857, 1201.45161290323, 1228.58064516129, 1170.67741935484,
2055.93548387097, 300.322580645161, 3950.92857142857, 1249.96774193548,
2152.03448275862, 6664.56666666667, 1228.58064516129, 282.870967741935,
2643.22580645161, 5518.5), Avg_Temperature = c(19.3966666666667,
20.6451612903226, 23.57, 21.1322580645161, 13.5448275862069,
19.1833333333333, 20.9741935483871, 19.0548387096774, 24.25,
16.2483870967742, 24.25, 17.8677419354839, 24.9533333333333,
14.24, 24.25, 22.65, 16.2483870967742, 21.1322580645161,
17.8677419354839, 24.0633333333333, 16.2483870967742, 24.25,
13.9785714285714, 14.2, 12.9357142857143, 18.71, 17.8677419354839,
20.6451612903226, 21.1967741935484, 20.9741935483871, 16.2483870967742,
24.25, 17.8677419354839, 22.65, 14.2, 12.9357142857143, 18.71,
26.1903225806452, 21.1322580645161, 13.9785714285714, 19.3966666666667,
25.041935483871, 21.1967741935484, 13.5448275862069, 16.3903225806452,
24.0633333333333, 22.24, 14.6387096774194, 21.96, 17.8677419354839,
19.3966666666667, 22.65, 14.2, 16.2483870967742, 24.148275862069,
21.1322580645161, 13.9785714285714, 13.5448275862069, 19.1833333333333,
24.0225806451613, 10.3903225806452, 16.2483870967742, 24.148275862069,
25.4161290322581, 12.9357142857143, 26.1903225806452, 13.9785714285714,
17.8677419354839, 19.3966666666667, 20.6451612903226, 16.3903225806452,
19.1833333333333, 20.9741935483871, 14.6387096774194, 18.2870967741935,
12.9357142857143, 26.1903225806452, 17.8677419354839, 18.3931034482759,
16.2483870967742, 21.5387096774194, 13.9785714285714, 9.87741935483871,
13.5448275862069, 21.96, 17.8677419354839, 25.4161290322581,
16.3903225806452, 22.7866666666667), Avg_Turbidity = c(8.70294117647059,
9.18064516129032, 14.923, 4.20741935483871, 13.2118518518519,
10.671, 9.1, 5.04903225806452, 7.917, 13.971935483871, 7.917,
7.1641935483871, 6.52310344827586, 6.97133333333333, 7.917,
21.7596666666667, 13.971935483871, 4.20741935483871, 7.1641935483871,
9.308, 13.971935483871, 7.917, 6.77928571428571, 9.01107142857143,
8.74107142857143, 9.863, 7.1641935483871, 9.18064516129032,
5.70387096774194, 9.1, 13.971935483871, 7.917, 7.1641935483871,
21.7596666666667, 9.01107142857143, 8.74107142857143, 9.863,
11.6293548387097, 4.20741935483871, 6.77928571428571, 8.70294117647059,
9.20033333333333, 5.70387096774194, 13.2118518518519, 12.4351612903226,
9.308, 7.12607142857143, 24.1138709677419, 24.168, 7.1641935483871,
8.70294117647059, 21.7596666666667, 9.01107142857143, 13.971935483871,
16.0827586206897, 4.20741935483871, 6.77928571428571, 13.2118518518519,
10.671, 9.17709677419355, 40.8825806451613, 13.971935483871,
16.0827586206897, 8.49, 8.74107142857143, 11.6293548387097,
6.77928571428571, 7.1641935483871, 8.70294117647059, 9.18064516129032,
12.4351612903226, 10.671, 9.1, 24.1138709677419, 6.96903225806452,
8.74107142857143, 11.6293548387097, 7.1641935483871, 8.83931034482759,
13.971935483871, 8.71935483870968, 6.77928571428571, 7.78612903225806,
13.2118518518519, 24.168, 7.1641935483871, 8.49, 12.4351612903226,
10.6056666666667), n_Salmonids = c(25, 3, 10, 0, 187, 46,
31, 0, 0, 120, 0, 56, 0, 0, 0, 0, 120, 0, 56, 1, 120, 0,
101, 0, 6, 324, 56, 3, 0, 31, 120, 0, 56, 0, 0, 6, 324, 0,
0, 101, 25, 0, 0, 187, 394, 1, 0, 227, 2354, 56, 25, 0, 0,
120, 1, 0, 101, 187, 46, 0, 1794, 120, 1, 0, 6, 0, 101, 56,
25, 3, 394, 46, 31, 227, 1, 6, 0, 56, 0, 120, 50, 101, 41,
187, 2354, 56, 0, 394, 0), ENTRY = c(1L, 1L, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, NA, NA, NA, NA, NA,
1L, 1L, 1L, NA, NA, NA, NA, NA, NA, NA, 1L, 1L, 1L, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 3L,
2L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, 1L, 2L, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, 1L, 2L, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA), EXIT = c(1L, 1L, 2L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 5L, 1L, 1L, 1L, 3L, 1L, 2L, 1L, 1L, 1L, 3L,
1L, 1L, 1L, 3L, 1L, 2L, 1L, 1L, 1L, 4L, 1L, 2L, 1L, 1L, 1L,
1L, 3L, 1L, 1L, 1L, 3L, 1L, 2L, 3L, 2L, 1L, 1L, 1L, 1L, 1L,
1L, 4L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, 5L,
1L, 2L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L), n_transit = c(3L,
4L, 2L, 1L, 1L, 3L, 2L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 3L, 2L,
2L, 1L, 2L, 1L, 7L, 2L, 8L, 1L, 3L, 2L, 5L, 1L, 2L, 1L, 8L,
2L, 4L, 1L, 2L, 1L, 3L, 1L, 3L, 3L, 1L, 1L, 5L, 2L, 2L, 2L,
1L, 1L, 1L, 8L, 3L, 2L, 1L, 5L, 1L, 3L, 5L, 6L, 3L, 1L, 1L,
3L, 2L, 3L, 5L, 1L, 2L, 3L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 5L,
2L, 9L, 1L, 4L, 1L, 2L, 1L, 4L, 1L, 1L, 1L, 1L, 1L)), class = c("tbl_df",
"tbl", "data.frame"), row.names = c(NA, -89L))
, пытающийся запустить эту модель, используя пакет 'gam':
scope_list = list(
"EstimatedAge" = ~1 + EstimatedAge + logistic(EstimatedAge) + s(EstimatedAge),
"month"= ~1 + month + Poly(month, 2) + Poly(month, 3) + s(month),
"at_large"= ~1 + at_large + s(at_large) + Poly(at_large, 2),
"Avg_Flow"= ~1 + Avg_Flow + s(Avg_Flow) + Poly(Avg_Flow, df = 2),
"Avg_Temperature"= ~1 + Avg_Temperature + s(Avg_Temperature) + Poly( Avg_Temperature, 2) + Poly(Avg_Temperature, 3),
"Avg_Turbidity"= ~1 + Avg_Turbidity + s(Avg_Turbidity) + Poly(Avg_Turbidity, 2) + Poly(Avg_Turbidity, 3),
"n_Salmonids"= ~1 + n_Salmonids + s(n_Salmonids) + Poly(n_Salmonids,2) + Poly(n_Salmonids,3))
#build a model of Exit transits
start_model_basic = gam(EXIT ~ month, data = monthly_summary[monthly_summary$Species == "Striped Bass" & monthly_summary$EXIT > 0,])
step.Gam(start_model_basic, scope = scope_list)
, который выдает эту ошибку:
Error in model.frame.default(formula = EXIT ~ month + s(at_large), data = monthly_summary[monthly_summary$Species == :
invalid type (list) for variable 's(at_large)'
Но быстрая проверка str () показывает:
Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 650 obs. of 12 variables:
$ year : int 2013 2015 2015 2016 2013 2014 2016 2016 2016 2013 ...
$ month : int 8 4 5 7 6 10 2 4 5 5 ...
$ EstimatedAge : num -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
$ Species : Factor w/ 3 levels "Catfish","Largemouth Bass",..: 1 1 1 1 1 1 1 1 1 1 ...
$ at_large : int 6 8 13 10 6 3 14 16 13 0 ...
$ Avg_Flow : num 5821 559 566 5922 2048 ...
$ Avg_Temperature: num 24.1 19.4 20.6 24.9 23.6 ...
$ Avg_Turbidity : num 14.46 8.7 9.18 14.01 14.92 ...
$ n_Salmonids : num 0 25 3 0 10 ...
$ ENTRY : int 1 1 1 1 NA NA NA NA NA NA ...
$ EXIT : int NA 1 1 NA 2 1 1 1 1 NA ...
$ n_transit : int 1 3 4 1 2 1 1 3 2 0 ...
Итак, если at_large имеет тип "int", почему он видит его как список? Похоже, одна и та же ошибка возникает для всех переменных, если я постепенно удаляю каждую ошибочную переменную.