Я пытаюсь запустить регрессию поперечного сечения для каждой даты моего набора панельных данных в R.
Мой набор данных содержит несколько наблюдений для каждой даты:
Date ISIN Return SIZE
31.03.2010 DE44444 0,5 0,3
30.04.2010 DE44444 0,3 0,2
31.05.2010 DE44444 1,8 0,25
30.06.2010 DE44444 -0,6 0,175
31.03.2010 DE55555 0,3 0,1
30.04.2010 DE55555 -1,2 0,15
31.05.2010 DE55555 1.4 0,33
30.06.2010 DE55555 0,4 0,1
> dput(head(Data, 50))
structure(list(Date = structure(c(1072828800, 1075507200, 1078012800,
1080691200, 1083283200, 1085961600, 1088553600, 1091232000, 1093910400,
1096502400, 1099180800, 1101772800, 1072828800, 1075507200, 1078012800,
1080691200, 1083283200, 1085961600, 1088553600, 1091232000, 1093910400,
1096502400, 1099180800, 1101772800, 1262217600, 1264896000, 1267315200,
1269993600, 1272585600, 1275264000, 1277856000, 1280534400, 1283212800,
1285804800, 1288483200, 1291075200, 1293753600, 1296432000, 1298851200,
1301529600, 1304121600, 1306800000, 1309392000, 1312070400, 1314748800,
1317340800, 1320019200, 1322611200, 1325289600, 1327968000), tzone = "UTC", class = c("POSIXct",
"POSIXt")), Date_ID = c("200312", "200401", "200402", "200403",
"200404", "200405", "200406", "200407", "200408", "200409", "200410",
"200411", "200312", "200401", "200402", "200403", "200404", "200405",
"200406", "200407", "200408", "200409", "200410", "200411", "200912",
"201001", "201002", "201003", "201004", "201005", "201006", "201007",
"201008", "201009", "201010", "201011", "201012", "201101", "201102",
"201103", "201104", "201105", "201106", "201107", "201108", "201109",
"201110", "201111", "201112", "201201"), ISIN = c("AT0000341086",
"AT0000341086", "AT0000341086", "AT0000341086", "AT0000341086",
"AT0000341086", "AT0000341086", "AT0000341086", "AT0000341086",
"AT0000341086", "AT0000341086", "AT0000341086", "AT0000341615",
"AT0000341615", "AT0000341615", "AT0000341615", "AT0000341615",
"AT0000341615", "AT0000341615", "AT0000341615", "AT0000341615",
"AT0000341615", "AT0000341615", "AT0000341615", "AT0000A0DJE7",
"AT0000A0DJE7", "AT0000A0DJE7", "AT0000A0DJE7", "AT0000A0DJE7",
"AT0000A0DJE7", "AT0000A0DJE7", "AT0000A0DJE7", "AT0000A0DJE7",
"AT0000A0DJE7", "AT0000A0DJE7", "AT0000A0DJE7", "AT0000A0DJE7",
"AT0000A0DJE7", "AT0000A0DJE7", "AT0000A0DJE7", "AT0000A0DJE7",
"AT0000A0DJE7", "AT0000A0DJE7", "AT0000A0DJE7", "AT0000A0DJE7",
"AT0000A0DJE7", "AT0000A0DJE7", "AT0000A0DJE7", "AT0000A0DJE7",
"AT0000A0DJE7"), R = c(-1.5825, -1.5299, -1.5526, -1.5028, -0.1483,
-2.892, -0.1882, -1.7565, -1.045, -1.5458, -1.0702, -1.3541,
-1.6795, -1.6259, -1.6416, -1.5968, 1.3737, -3.563, 0.0657999999999999,
-1.3725, -1.127, -0.9028, -0.9112, -1.2051, -0.626, 0.748, 1.4236,
0.1108, 0.3133, 0.538, -0.00440000000000002, -0.5813, 0.897,
-1.2131, -1.0532, -0.6429, -1.0097, -1.6578, -0.7197, -0.5631,
-0.6951, -0.1574, -1.091, -0.2148, -0.1804, -1.5962, -1.1525,
-0.8096, -0.555, -0.2352), DEF = c(-0.66, 1.463, -0.16, 0.584,
1.193, -1.142, 1.661, 0.374, 0.797, 0.956, 0.514, 1.424, -0.66,
1.463, -0.16, 0.584, 1.193, -1.142, 1.661, 0.374, 0.797, 0.956,
0.514, 1.424, 0.433, 0.578, -0.22, 0.779, -0.239, -2.113, -0.016,
1.583, -0.0819999999999999, 0.448, 0.635, -1.327, 0.198, 0.809,
0.593, 0.393, 0.406, -0.116, -0.581, -0.958, -2.986, -1.932,
1.831, -2.855, 1.096, 2.385), TERM = c(-1.0855, -1.5809, -0.9736,
-1.3688, -2.7573, -2.132, -1.9572, -1.4315, -1.054, -1.8468,
-1.3342, -1.4691, -1.0855, -1.5809, -0.9736, -1.3688, -2.7573,
-2.132, -1.9572, -1.4315, -1.054, -1.8468, -1.3342, -1.4691,
-0.978, 0.637, 0.4006, -0.2222, 0.4453, 1.351, -0.1434, -1.1553,
1.414, -1.3961, -1.5512, -0.6689, -1.3797, -2.0738, -0.7597,
-1.6711, -0.6201, -0.00340000000000007, -1.087, 0.6832, 0.5986,
-0.3152, -1.5785, -1.0466, 0.139, -0.6222), SIZE = c(0.10356,
0.10253, 0.10292, 0.09885, 0.0968, 0.09838, 0.10082, 0.10088,
0.10123, 0.10107, 0.09978, 0.09757, 0.09592, 0.09488, 0.09515,
0.0913, 0.09075, 0.09161, 0.09523, 0.09565, 0.09591, 0.09637,
0.09529, 0.09331, 0.01675, 0.01651, 0.01683, 0.01674, 0.01706,
0.01763, 0.01718, 0.01715, 0.01702, 0.01692, 0.01668, 0.01688,
0.017, 0.01706, 0.01705, 0.01716, 0.01705, 0.01707, 0.01658,
0.0168, 0.01735, 0.01769, 0.01749, 0.01812, 0.01779, 0.01746),
VOLA = c(0.985175199478076, 1.00091902369772, 0.941762801877416,
0.189350898246265, 0.196942343508618, 0.570451774181365,
0.868103006368868, 1.02515686604539, 1.03929504505057, 1.03919410249802,
1.04138690344495, 0.902892605832314, 0.782695837261618, 0.808099436125695,
0.813829123137448, 0.205446917718422, 0.213121280026186,
1.2299424945907, 1.58617400989509, 1.6932983867785, 1.68261919756076,
1.66387221945276, 1.64110973510813, 1.21084087214905, 0.502423497659096,
0.410245832398088, 0.507635454501226, 0.745424703105552,
0.749700440842874, 0.680077773248521, 0.682612814851875,
0.520375140643747, 0.666999781109409, 0.504412064354796,
0.774767772733654, 0.85769175504179, 0.776125364658743, 0.773606273673286,
0.885057532028286, 0.366737813703468, 0.402928393969284,
0.409465399840654, 0.503249857095525, 0.511044324561644,
0.348103521479842, 0.371638903596847, 0.590624145854762,
0.625214592759958, 0.558455669682026, 0.553264675961394),
MOMENTUM = c(-12.3647, -11.7702, -12.1935, -9.8288, -9.7426,
-7.8688, -9.2081, -7.8138, -8.0404, -7.5328, -7.5758, -8.4977,
-13.0367, -12.5392, -12.4855, -10.4358, -10.3476, -6.8178,
-8.7331, -6.9878, -6.7344, -6.2198, -5.5258, -7.8107, -0.3711,
-0.0837, 0.361, 1.8276, 1.6755, 2.3585, 2.5077, 3.1293, 1.8,
1.2734, -0.0505, -1.417, -2.5979, -3.6032, -4.6797, -6.2964,
-5.6464, -5.2883, -4.8028, -4.8841, -3.4411, -2.9018, -3.9349,
-4.3923, -5.0445, -4.5085), Rating = c(12, 12, 12, 12, 12,
12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 12, 12, 12, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6), Duartion = c(3.03, 2.95,
2.87, 2.79, 2.72, 2.63, 2.72, 2.64, 2.56, 2.48, 2.4, 2.32,
3.89, 3.81, 3.73, 3.64, 3.59, 3.5, 3.6, 3.52, 3.44, 3.36,
3.28, 3.2, 4.04, 3.96, 3.89, 3.81, 3.73, 3.65, 3.71, 3.63,
3.54, 3.45, 3.37, 3.29, 3.21, 3.11, 3.03, 2.95, 2.87, 2.79,
2.82, 2.74, 2.66, 2.58, 2.49, 2.41, 2.33, 2.25), Liquidity = c(1.537,
1.6219, 1.6995, 1.7842, 1.8661, 1.9508, 2.0328, 2.1175, 2.2022,
2.2842, 2.3689, 2.4508, 0.537, 0.6219, 0.6995, 0.7842, 0.8661,
0.9508, 1.0328, 1.1175, 1.2022, 1.2842, 1.3689, 1.4508, 0.5233,
0.6082, 0.6849, 0.7699, 0.8521, 0.937, 1.0192, 1.1041, 1.189,
1.2712, 1.3562, 1.4384, 1.5233, 1.6082, 1.6849, 1.7699, 1.8521,
1.937, 2.0192, 2.1041, 2.189, 2.2712, 2.3562, 2.4384, 2.5233,
2.6082)), row.names = c(NA, -50L), class = c("tbl_df", "tbl",
"data.frame"))
I хотел бы запустить отдельные регрессии для 31.03.2010, 30.04.2010, 31.05.2010, 30.06.2010.
Для этого я попытался сделать следующее:
the.years <- unique(Data$Date)
a.formula <- Y~X
first.step <- lapply(the.years, function(a.year) {
temp.data <- Data[Date == a.year, ]
an.lm <- lm(a.formula, data = temp.data)
the.coefficients <- an.lm$coef
the.results <- as.data.frame(cbind(a.year, t(the.coefficients)))
the.results
})
К сожалению, используется весь набор данных для каждой даты, чтобы я получал одинаковые результаты для каждого периода.
Как заставить R использовать подмножество данных для каждого периода вместо всего набора данных?
Спасибо за вашу помощь !