У меня есть широкая таблица с 161 переменной, которую я хотел бы преобразовать в узкую таблицу только с 13 переменными. Насколько я могу судить, я не могу использовать pivot_longer
, потому что мне нужно ввести дополнительный столбец для года, а затем выбрать только определенные столбцы из широкой таблицы для каждой строки.
Я начал с пустого фрейма данных:
subset_gathered <- read.csv(text = "FIPS_State, Place_Code, Place_Name, Longitude, Latitude, County, Closest Metro, Year, Population_Density, Labor Force, Employed, Unemployed, Unemployment Rate",
colClasses = c("character", "integer", "character", "numeric", "numeric", "character", "numeric","integer", "numeric", "integer", "integer", "integer", "numeric") )
, а затем попытался для l oop выбрать каждый из соответствующих столбцов (фактическое решение будет повторяться в течение 1990- 2018, где каждая переменная представляет собой отдельный столбец)
for(i in subset[1:5,]){
temp1990 <- c(subset$`FIPS State`[i], subset$`Place Code`[i], subset$Name[i], subset$longitude[i], subset$latitude[i], subset$COUNTY[i], subset$closest_metro[i], 1990, subset$density1990[i], subset$`Labor Force.90`[i], subset$Employed.90[i], subset$`Unemployment Level.90`[i], subset$`Unemployment Rate.90`[i])
subset_gathered[nrow(subset_gathered)+1,]<- temp1990
temp1991 <- c(subset$`FIPS State`[i], subset$`Place Code`[i], subset$Name[i], subset$longitude[i], subset$latitude[i], subset$COUNTY[i], subset$closest_metro[i], 1991, subset$density1991[i], subset$`Labor Force.91`[i], subset$Employed.91[i], subset$`Unemployment Level.91`[i], subset$`Unemployment Rate.91`[i])
subset_gathered[nrow(subset_gathered)+1,]<- temp1991
}
Я предполагаю, что результаты странны, потому что он запускает каждый вектор за раз, а не строку
head(subset_gathered)
FIPS_State Place_Code Place_Name Longitude Latitude County Closest.Metro Year Population_Density Labor.Force
1 1 1 1 1 1 124 124 124 124 124
2 1 1 1 1 1 124 124 124 124 124
3 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
4 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
5 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
6 <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA> <NA>
Employed Unemployed Unemployment.Rate
1 Abbeville city Abbeville city Abbeville city
2 Abbeville city Abbeville city Abbeville city
3 <NA> <NA> <NA>
4 <NA> <NA> <NA>
5 <NA> <NA> <NA>
6 <NA> <NA> <NA>
Если я должны были выделить строки вне a для l oop, я получаю именно те результаты, которые мне нужны:
temp1990 <- c(subset$`FIPS State`[1], subset$`Place Code`[1], subset$Name[1], subset$longitude[1], subset$latitude[1], subset$COUNTY[1], subset$closest_metro[1], 1990, subset$density1990[1], subset$`Labor Force.90`[1], subset$Employed.90[1], subset$`Unemployment Level.90`[1], subset$`Unemployment Rate.90`[1])
subset_gathered[nrow(subset_gathered)+1,]<- temp1990
temp1991 <- c(subset$`FIPS State`[1], subset$`Place Code`[1], subset$Name[1], subset$longitude[1], subset$latitude[1], subset$COUNTY[1], subset$closest_metro[1], 1991, subset$density1991[1], subset$`Labor Force.91`[1], subset$Employed.91[1], subset$`Unemployment Level.91`[1], subset$`Unemployment Rate.91`[1])
subset_gathered[nrow(subset_gathered)+1,]<- temp1991
FIPS_State Place_Code Place_Name Longitude Latitude County Closest.Metro Year Population_Density
1 1 124 Abbeville city -85.2513 31.567949 Henry County 24.3361834333029 1990 7.85948198868711e-05
2 1 124 Abbeville city -85.2513 31.567949 Henry County 24.3361834333029 1991 7.96367966656743e-05
Labor.Force Employed Unemployed Unemployment.Rate
1 6867 6539 328 4.8
2 6648 6106 542 8.2
Затем я попытался написать функцию apply
, но она вернула только одна строка, и она должна была вернуть 5 (в subset
есть 5 строк с данными).
subset_gathered[nrow(subset_gathered)+1,]<- apply(subset, 1, function(x) c(x[1], x[2], x[3], x[37], x[38], x[41], x[40], 1990, x[7], x[118], x[119], x[120], x[121]))
FIPS_State Place_Code Place_Name Longitude Latitude County Closest.Metro Year Population_Density
1 1 124 Abbeville city -85.25130 31.56795 Henry County 24.33618 1990 7.859482e-05
Labor.Force Employed Unemployed Unemployment.Rate
1 6867 6539 328 4.8
Я на правильном пути? Я думаю, что использование для l oop будет работать в Python или SAS, но apply
всегда кажется правильным ответом в R, но я все еще не могу получить нужные мне результаты.
dput:
subset<- structure(list(`FIPS State` = c(1, 1, 1, 1, 1), `Place Code` = c(124,
484, 676, 1228, 1396), Name = c("Abbeville city", "Addison town",
"Akron town", "Aliceville city", "Allgood town"), `1990 Pop` = c(3168,
639, 468, 3052, 461), GEOID = c(100124, 100484, 100676, 101228,
101396), USPS = c("AL", "AL", "AL", "AL", "AL"), density1990 = c(7.85948198868711e-05,
8.31165452653486e-05, 0.000325678496868476, 0.000262064228061137,
0.000171694599627561), density1991 = c(7.96367966656743e-05,
8.28563995837669e-05, 0.000329157967988866, 0.000260604499398935,
0.000176163873370577), density1992 = c(7.89669544507294e-05,
8.37669094693028e-05, 0.000331941544885177, 0.000259402369912416,
0.00017877094972067), density1993 = c(7.89421454798055e-05, 8.49375650364204e-05,
0.000331941544885177, 0.000256568778979907, 0.000183612662942272
), density1994 = c(7.87188647414905e-05, 8.6888657648283e-05,
0.0003312456506611, 0.000252189592993302, 0.000187709497206704
), density1995 = c(7.77381836037938e-05, 7.39452771986497e-05,
0.000332684436284319, 0.000248944989340191, 0.000195857866765266
), density1996 = c(7.63486725020343e-05, 7.4934144204278e-05,
0.000334772413917902, 0.000251349425249651, 0.000203304933942652
), density1997 = c(7.62246090108058e-05, 7.54835147629603e-05,
0.000334772413917902, 0.000249460225606504, 0.000213358474632124
), density1998 = c(7.60012947265945e-05, 7.58131370981698e-05,
0.000338252376640541, 0.0002473134078302, 0.000222667308603857
), density1999 = c(7.51328502879948e-05, 7.66921299920616e-05,
0.000338948369185069, 0.000244479608365479, 0.000230859082498983
), density2000 = c(7.41651550564123e-05, 7.66921299920616e-05,
0.000339644361729597, 0.000225673484645058, 0.000246497923571494
), density2001 = c(7.29741455406185e-05, 7.7021752327271e-05,
0.000329204473561679, 0.000225244121089797, 0.000245008510136017
), density2002 = c(7.21801391967559e-05, 7.77908711094264e-05,
0.00031598061521565, 0.000224900630245588, 0.000243891450059409
), density2003 = c(7.15101963441219e-05, 7.84501157798452e-05,
0.000307628704681315, 0.000221637467225606, 0.00024351909670054
), density2004 = c(7.08898788879792e-05, 7.93291086737371e-05,
0.00029649282396887, 0.000220091758426668, 0.000242029683265062
), density2005 = c(7.00527969547999e-05, 7.47440401111995e-05,
0.000233954174226123, 0.000214997442884744, 0.000239751372251362
), density2006 = c(6.97298585291218e-05, 7.59743946809311e-05,
0.000227205496123447, 0.00021432028558432, 0.000239007957143606
), density2007 = c(6.91585059298451e-05, 7.68971606082299e-05,
0.000222143987546439, 0.000212881326320918, 0.00023826454203585
), density2008 = c(6.8115166400731e-05, 7.71022197031852e-05,
0.000215957699285652, 0.000212542747670706, 0.000237149419374215
), density2009 = c(6.7419606714655e-05, 7.78199265355286e-05,
0.000207521851657307, 0.000211103788407304, 0.000234547466497069
), density2010 = c(6.67737298632987e-05, 7.7717396988051e-05,
0.00020021078371274, 0.00021042663110688, 0.000231202098512166
), density2011 = c(6.65004742723402e-05, 7.69996901557075e-05,
0.000194024495451953, 0.000205347951353697, 0.000231573806066044
), density2012 = c(6.53080846167217e-05, 7.62819207541575e-05,
0.000194238820289117, 0.000205286788530157, 0.000231202012572632
), density2013 = c(6.48857660254055e-05, 7.61794069120457e-05,
0.00019198677599591, 0.000202371348887106, 0.00023083030515692
), density2014 = c(6.44634620351942e-05, 7.58718184588342e-05,
0.000190860753849307, 0.000200340867125697, 0.000231202012572632
), density2015 = c(6.42895721568719e-05, 7.53591710368151e-05,
0.000189734731702703, 0.000199579436465168, 0.00023083030515692
), density2016 = c(6.39169509890384e-05, 7.52566415524112e-05,
0.000186919676336194, 0.000198225781957562, 0.00023083030515692
), density2017 = c(6.37430611107161e-05, 7.46414646459882e-05,
0.000186919781574101, 0.000197548954703759, 0.000231202012572632
), density2018 = c(6.36685368771494e-05, 7.4231339097472e-05,
0.000185793758793534, 0.000194672438875096, 0.000231202012572632
), minpop = c(2563, 637, 330, 2301, 461), longitude = c(-85.2513,
-87.177851, -87.738779, -88.154427, -86.516109), latitude = c(31.567949,
34.202689, 32.879495, 33.126276, 33.907623), is_metro = c(0,
0, 0, 0, 0), closest_metro = c(24.3361834333029, 28.0961047219205,
27.2224172144133, 34.7564812052357, 30.296520864832), COUNTY = c("Henry County",
"Winston County", "Hale County", "Pickens County", "Blount County"
), COUNTY2 = c(NA_character_, NA_character_, NA_character_, NA_character_,
NA_character_), `LAUS Code` = c("CN0106700000000", "CN0113300000000",
"CN0106500000000", "CN0110700000000", "CN0100900000000"), `State FIPS` = c("01",
"01", "01", "01", "01"), `County FIPS` = c("067", "133", "065",
"107", "009"), `Labor Force.00` = c(7634, 11687, 6976, 8597,
25106), Employed.00 = c(7259, 10776, 6557, 7952, 24231), `Unemployment Level.00` = c(375,
911, 419, 645, 875), `Unemployment Rate.00` = c(4.9, 7.8, 6,
7.5, 3.5), `Labor Force.01` = c(7542, 10737, 7019, 8440, 25305
), Employed.01 = c(7137, 9732, 6513, 7732, 24393), `Unemployment Level.01` = c(405,
1005, 506, 708, 912), `Unemployment Rate.01` = c(5.4, 9.4, 7.2,
8.4, 3.6), `Labor Force.02` = c(7530, 10478, 7054, 8248, 25757
), Employed.02 = c(7075, 9561, 6462, 7569, 24366), `Unemployment Level.02` = c(455,
917, 592, 679, 1391), `Unemployment Rate.02` = c(6, 8.8, 8.4,
8.2, 5.4), `Labor Force.03` = c(7494, 10407, 7086, 8138, 25900
), Employed.03 = c(7059, 9418, 6485, 7475, 24702), `Unemployment Level.03` = c(435,
989, 601, 663, 1198), `Unemployment Rate.03` = c(5.8, 9.5, 8.5,
8.1, 4.6), `Labor Force.04` = c(7548, 10337, 7049, 8027, 26208
), Employed.04 = c(7122, 9580, 6516, 7416, 25101), `Unemployment Level.04` = c(426,
757, 533, 611, 1107), `Unemployment Rate.04` = c(5.6, 7.3, 7.6,
7.6, 4.2), `Labor Force.05` = c(7469, 10365, 7053, 7960, 26446
), Employed.05 = c(7146, 9840, 6669, 7527, 25491), `Unemployment Level.05` = c(323,
525, 384, 433, 955), `Unemployment Rate.05` = c(4.3, 5.1, 5.4,
5.4, 3.6), `Labor Force.06` = c(7506, 10591, 7170, 8113, 26770
), Employed.06 = c(7204, 10067, 6792, 7703, 25902), `Unemployment Level.06` = c(302,
524, 378, 410, 868), `Unemployment Rate.06` = c(4, 4.9, 5.3,
5.1, 3.2), `Labor Force.07` = c(7540, 10287, 6976, 8015, 26629
), Employed.07 = c(7135, 9712, 6602, 7613, 25780), `Unemployment Level.07` = c(405,
575, 374, 402, 849), `Unemployment Rate.07` = c(5.4, 5.6, 5.4,
5, 3.2), `Labor Force.08` = c(7376, 9984, 6991, 7925, 26698),
Employed.08 = c(6807, 9120, 6473, 7359, 25453), `Unemployment Level.08` = c(569,
864, 518, 566, 1245), `Unemployment Rate.08` = c(7.7, 8.7,
7.4, 7.1, 4.7), `Labor Force.09` = c(7132, 9519, 6869, 8042,
26480), Employed.09 = c(6334, 7832, 5890, 6942, 23832), `Unemployment Level.09` = c(798,
1687, 979, 1100, 2648), `Unemployment Rate.09` = c(11.2,
17.7, 14.3, 13.7, 10), `Labor Force.10` = c(7259, 9883, 6445,
7699, 24906), Employed.10 = c(6469, 8304, 5473, 6687, 22460
), `Unemployment Level.10` = c(790, 1579, 972, 1012, 2446
), `Unemployment Rate.10` = c(10.9, 16, 15.1, 13.1, 9.8),
`Labor Force.11` = c(7270, 9819, 6296, 7570, 25123), Employed.11 = c(6563,
8492, 5426, 6663, 22939), `Unemployment Level.11` = c(707,
1327, 870, 907, 2184), `Unemployment Rate.11` = c(9.7, 13.5,
13.8, 12, 8.7), `Labor Force.12` = c(7030, 9607, 6212, 7467,
24960), Employed.12 = c(6447, 8647, 5499, 6709, 23244), `Unemployment Level.12` = c(583,
960, 713, 758, 1716), `Unemployment Rate.12` = c(8.3, 10,
11.5, 10.2, 6.9), `Labor Force.13` = c(6954, 9644, 6208,
7420, 24887), Employed.13 = c(6421, 8737, 5486, 6727, 23325
), `Unemployment Level.13` = c(533, 907, 722, 693, 1562),
`Unemployment Rate.13` = c(7.7, 9.4, 11.6, 9.3, 6.3), `Labor Force.14` = c(6775,
9630, 6032, 7760, 24527), Employed.14 = c(6267, 8802, 5436,
7124, 23023), `Unemployment Level.14` = c(508, 828, 596,
636, 1504), `Unemployment Rate.14` = c(7.5, 8.6, 9.9, 8.2,
6.1), `Labor Force.15` = c(6718, 9379, 6050, 8011, 24485),
Employed.15 = c(6262, 8673, 5580, 7469, 23163), `Unemployment Level.15` = c(456,
706, 470, 542, 1322), `Unemployment Rate.15` = c(6.8, 7.5,
7.8, 6.8, 5.4), `Labor Force.16` = c(6733, 9548, 5991, 7779,
24623), Employed.16 = c(6295, 8872, 5534, 7246, 23298), `Unemployment Level.16` = c(438,
676, 457, 533, 1325), `Unemployment Rate.16` = c(6.5, 7.1,
7.6, 6.9, 5.4), `Labor Force.17` = c(6713, 9693, 5945, 7718,
24725), Employed.17 = c(6385, 9203, 5590, 7302, 23726), `Unemployment Level.17` = c(328,
490, 355, 416, 999), `Unemployment Rate.17` = c(4.9, 5.1,
6, 5.4, 4), `Labor Force.90` = c(6867, 10482, 6204, 8606,
19168), Employed.90 = c(6539, 9241, 5725, 7818, 17955), `Unemployment Level.90` = c(328,
1241, 479, 788, 1213), `Unemployment Rate.90` = c(4.8, 11.8,
7.7, 9.2, 6.3), `Labor Force.91` = c(6648, 10435, 6111, 8775,
19132), Employed.91 = c(6106, 9113, 5547, 7906, 18021), `Unemployment Level.91` = c(542,
1322, 564, 869, 1111), `Unemployment Rate.91` = c(8.2, 12.7,
9.2, 9.9, 5.8), `Labor Force.92` = c(6762, 10927, 6425, 9052,
19553), Employed.92 = c(6231, 9895, 5762, 8061, 18334), `Unemployment Level.92` = c(531,
1032, 663, 991, 1219), `Unemployment Rate.92` = c(7.9, 9.4,
10.3, 10.9, 6.2), `Labor Force.93` = c(6553, 11880, 6972,
9171, 19929), Employed.93 = c(5905, 10978, 6284, 8110, 18868
), `Unemployment Level.93` = c(648, 902, 688, 1061, 1061),
`Unemployment Rate.93` = c(9.9, 7.6, 9.9, 11.6, 5.3), `Labor Force.94` = c(6395,
12306, 7192, 9006, 20263), Employed.94 = c(5911, 11567, 6505,
8237, 19408), `Unemployment Level.94` = c(484, 739, 687,
769, 855), `Unemployment Rate.94` = c(7.6, 6, 9.6, 8.5, 4.2
), `Labor Force.95` = c(6472, 12777, 7129, 8938, 20993),
Employed.95 = c(6018, 11981, 6453, 8140, 20055), `Unemployment Level.95` = c(454,
796, 676, 798, 938), `Unemployment Rate.95` = c(7, 6.2, 9.5,
8.9, 4.5), `Labor Force.96` = c(6440, 13038, 7136, 8877,
21631), Employed.96 = c(6004, 12211, 6549, 8041, 20918),
`Unemployment Level.96` = c(436, 827, 587, 836, 713), `Unemployment Rate.96` = c(6.8,
6.3, 8.2, 9.4, 3.3), `Labor Force.97` = c(6456, 13175, 7392,
8824, 22684), Employed.97 = c(6087, 12175, 6823, 8131, 22003
), `Unemployment Level.97` = c(369, 1000, 569, 693, 681),
`Unemployment Rate.97` = c(5.7, 7.6, 7.7, 7.9, 3), `Labor Force.98` = c(6412,
12661, 7368, 8699, 23611), Employed.98 = c(6087, 11934, 6811,
8002, 22839), `Unemployment Level.98` = c(325, 727, 557,
697, 772), `Unemployment Rate.98` = c(5.1, 5.7, 7.6, 8, 3.3
), `Labor Force.99` = c(6428, 12410, 7016, 8577, 23968),
Employed.99 = c(6093, 11623, 6488, 7683, 23297), `Unemployment Level.99` = c(335,
787, 528, 894, 671), `Unemployment Rate.99` = c(5.2, 6.3,
7.5, 10.4, 2.8), `Labor Force.18` = c(6766, 9781, 5991, 7805,
25006), `Unemployment Rate.18` = c(6466, 9371, 5686, 7449,
24128), `Unemployment Level` = c(300, 410, 305, 356, 878),
`Unemployment Rate` = c(4.4, 4.2, 5.1, 4.6, 3.5)), row.names = c(NA,
5L), class = "data.frame")