У меня есть переменная (переменная X047CS
, из Обзоров мировых ценностей (WVS
), из которых (очень малая часть) dput
выглядит следующим образом:
dput(WVS$X047CS)
structure(list(structure(c(-4, -4, -4, -4, 484016, -4, 305, 1702,
307, 310, -1, 4011, -2, 792003, 862010, 6, 5, 392010, -1, 756018,
-4, -4, -4, -4, -4, -4, -4, -4), label = "Income (country specific)", format.stata = "%12.0g", class = "haven_labelled", labels = c(Missing = -5,
`Not asked` = -4, `Not applicable` = -3, `No answer` = -2, `Don´t know` = -1,
`Lower step` = 1, `second step` = 2, `Third step` = 3, `Fourth step` = 4,
`Fifth step` = 5, `Sixth step` = 6, `Seventh step` = 7, `Eigth step` = 8,
`Nineth step` = 9, `Tenth step` = 10, `AL: up to 50 DEM per month` = 8001,
`AL: 50-149 DEM` = 8002, `AL: 150-249 DEM` = 8003, `AL: 250-499 DEM` = 8004,
`AL: 500-749 DEM` = 8005, `AL: 750-999 DEM` = 8006, `AL: 1000-1249 DEM` = 8007,
`AL: 1250-1499 DEM` = 8008, `AL: 1500-1999 DEM` = 8009, `AL: 2000+ DEM` = 8010,
`DZ: Below 10,000 Dinar per month` = 12001, `DZ: 10,000-20,000` = 12002,
`DZ: 20,000-30,000` = 12003, `DZ: 30,000-40,000` = 12004, `DZ: 40,000-50,000` = 12005,
`DZ: 50,000-60,000` = 12006, `DZ: 60,000-70,000` = 12007, `DZ: 70,000-80,000` = 12008,
`DZ: 80,000-90,000` = 12009, `DZ: 90,000 and more` = 12010, `AD: From 0 to 500 euros (monthly household income)` = 20001,
`AD: From 501 to 1000 euros` = 20002, `AD: From 1001 to 1300 euros` = 20003,
`AD: From 1301 to 1600 euros` = 20004, `AD: From 1601 to 2000 euros` = 20005,
`AD: From 2001 to 3000 euros` = 20006, `AD: From 3001 to 4000 euros` = 20007,
`AD: From 4001 to 6000 euros` = 20008, `AD: From 6001 to 8000 euros` = 20009,
`AD: More than 8001 euros` = 20010, `AR: 200 - 275 per month` = 32001,
`AR: 276 - 400` = 32002, `AR: 401 - 524` = 32003, `AR: 525 - 625` = 32004,
`AR: 626 - 772` = 32005, `AR: 773 - 924` = 32006, `AR: 925 - 1150` = 32007,
`AR: 1151 - 1331` = 32008, `AR: 1332 - 1800` = 32009, `AR: 1801 and more` = 32010,
`AU:Less than $18,000 per annum` = 36001, `AU:$18,001 - $24,000` = 36002,
`AU:$24,001 - $34,000` = 36003, `AU:$34,001 - $43,500` = 36004,
`AU:$43,501 - $54,000` = 36005, `AU:$54,001 - $64,000` = 36006,
`AU:$64,001 – $76,500` = 36007, `AU:$76,501 – $92,000` = 36008,
`AU:$92,001 - $115,000` = 36009, `AU:More than $115,000` = 36010,
`AT: Less than 5,999 Schillings (OS) per month` = 40001, `AT: 6,000-9,999 OS` = 40002,
`AT: 10,000-13,999 OS` = 40003, `AT: 14,000-17,999 OS` = 40004,
`AT: 18,000-21,999 OS` = 40005, `AT: 22,000-25,999 OS` = 40006,
`AT: 26,000-29,999 OS` = 40007, `AT: 30,000-33,999 OS` = 40008,
`AT: 34,000-37,999 OS` = 40009, `AT: 38,000 Schillings or more per month` = 40010,
`AT: Less than 6,000 Schillings (OS) per month` = 40011, `AT: 6,000 - 7,999 OS` = 40012,
`AT: 8,000 - 9,999 OS` = 40013, `AT: 10,000 - 11,999 OS` = 40014,
`AT: 12,000 - 13,999 OS` = 40015, `AT: 14,000 - 15,999 OS` = 40016,
`AT: 16,000 - 17,999 OS` = 40017, `AT: 18,000 - 19,999 OS` = 40018,
`AT: 20,000 - 21,999 OS` = 40019, `AT: 22,000 - 23,999 OS` = 40020,
`AT: 24,000 - 25,999 OS` = 40021, `AT: 26,000 - 27,999 OS` = 40022,
`AT: 28,000 - 29,999 OS` = 40023, `AT: 30,000 - 31,999 OS` = 40024,
`AT: 32,000 - 35,999 OS` = 40025, `AT: 36,000 - 39,999 OS` = 40026,
`AT: 40,000 - 43,999 OS` = 40027, `AT: 44,000 - 47,999 OS` = 40028,
`AT: 48,000 - 51,999 OS` = 40029, `AT: 52,000 - 55,999 OS` = 40030,
`AT: 56,000 - 69,999 OS` = 40031, `AT: 60,000 - 63,999 OS` = 40032,
`AT: 64,000 - 67,999 OS` = 40033, `AT: 68,000 - 71,999 OS` = 40034,
`AT:72,000 OS and more` = 40035, `BE: 25,000-29,999 Belgian francs per year` = 56001,
`BE: 30,000-34,999 francs` = 56002, `BE: 35,000-39,999 francs` = 56003,
`BE: 40,000-49,999 francs` = 56004, `BE: 50,000-59,999 francs` = 56005,
`BE: 60,000-69,999 francs` = 56006, `BE: 70,000-79,999 francs` = 56007,
`BE: 80,000-99,999 francs` = 56008, `BE: 100,000-119,999 francs` = 56009))), class = "data.frame", row.names = c(NA, -28L))
IЯ пытаюсь изменить числовые коды в соответствующие строки. Кто-то показал мне, чтобы сделать это (с другой переменной) следующим образом:
tmp<-as.factor(WVS$X047CS) # Stores the variable as a factor
labes<-attr(WVS$X047CS,"labels") # Gets the labels attribute
levnam<-names(labes)[charmatch(levels(tmp),labes)] # matches the names in labes with the levels of tmp
tmp2 <- as.factor(WVS$X047CS)
levels(tmp2)<-levnam # Add the matches as levels of the factors
WVS$brackets <- tmp2 # Stores it in the new variable
Проблема в том, что по какой-то причине tmp2
имеет еще один факторlevel, чем количество уровней, которые сопоставляются в levnam
, из-за которых levels(tmp2)<-levnam
завершается неудачей. Я хочу выяснить, какой фактор-уровень не сопоставлен в levnam <-names(labes)[charmatch(levels(tmp),labes)]
, и добавить этот уровень в levnam, чтобы выполнить перекодирование.
Любые предложения о том, как это сделать?