У меня есть данные, и я задаю 5 переменных с 10 наблюдениями, чтобы объяснить мой вопрос.
originaldata <- read_table2("Desktop/originaldata.txt", col_names = FALSE)
str(originaldata)
Classes ‘spec_tbl_df’, ‘tbl_df’, ‘tbl’ and 'data.frame': 5822 obs. of 86 variables:
- attr(*, "spec")=
.. cols(
.. X1 = col_double(),
.. X2 = col_double(),
..............
.. X86 = col_double()
.. )
dt <- subset(originaldata, select = c(6:10))
dt <- dt[1:10,]
str(dt)
Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 10 obs. of 5 variables:
$ X6 : num 0 1 0 2 1 0 2 0 0 3
$ X7 : num 5 4 4 3 4 5 2 7 1 5
$ X8 : num 1 1 2 2 1 0 0 0 3 0
$ X9 : num 3 4 4 4 4 5 5 2 6 2
$ X10: num 7 6 3 5 7 0 7 7 6 7
> dput(dt)
structure(list(X6 = c(0, 1, 0, 2, 1, 0, 2, 0, 0, 3), X7 = c(5,
4, 4, 3, 4, 5, 2, 7, 1, 5), X8 = c(1, 1, 2, 2, 1, 0, 0, 0, 3,
0), X9 = c(3, 4, 4, 4, 4, 5, 5, 2, 6, 2), X10 = c(7, 6, 3, 5,
7, 0, 7, 7, 6, 7)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-10L))
То, что я хочу сделать, это провести рефакторинг (переуровень) данных. Я использовал for loop
, чтобы провести, но получить пропущенные значения.
for (i in which(colnames(dt)=="X6"):which(colnames(dt)=="X10")){
dt[,i] <- factor(dt[,i],
levels=c(0:9),
labels=c("0%",
"1-10%",
"11-23%",
"24-36%",
"37-49%",
"50-62%",
"63-75%",
"76-88%",
"89-99%",
"100%"))
}
str(dt)
Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 10 obs. of 5 variables:
$ X6 : Factor w/ 10 levels "0%","1-10%","11-23%",..: NA NA NA NA NA NA NA NA NA NA
$ X7 : Factor w/ 10 levels "0%","1-10%","11-23%",..: NA NA NA NA NA NA NA NA NA NA
$ X8 : Factor w/ 10 levels "0%","1-10%","11-23%",..: NA NA NA NA NA NA NA NA NA NA
$ X9 : Factor w/ 10 levels "0%","1-10%","11-23%",..: NA NA NA NA NA NA NA NA NA NA
$ X10: Factor w/ 10 levels "0%","1-10%","11-23%",..: NA NA NA NA NA NA NA NA NA NA
> dput(dt)
structure(list(X6 = structure(c(NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_), class = "factor", .Label = c("0%",
"1-10%", "11-23%", "24-36%", "37-49%", "50-62%", "63-75%", "76-88%",
"89-99%", "100%")), X7 = structure(c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_), class = "factor", .Label = c("0%",
"1-10%", "11-23%", "24-36%", "37-49%", "50-62%", "63-75%", "76-88%",
"89-99%", "100%")), X8 = structure(c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_), class = "factor", .Label = c("0%",
"1-10%", "11-23%", "24-36%", "37-49%", "50-62%", "63-75%", "76-88%",
"89-99%", "100%")), X9 = structure(c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_), class = "factor", .Label = c("0%",
"1-10%", "11-23%", "24-36%", "37-49%", "50-62%", "63-75%", "76-88%",
"89-99%", "100%")), X10 = structure(c(NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_, NA_integer_, NA_integer_,
NA_integer_, NA_integer_, NA_integer_), class = "factor", .Label = c("0%",
"1-10%", "11-23%", "24-36%", "37-49%", "50-62%", "63-75%", "76-88%",
"89-99%", "100%"))), row.names = c(NA, -10L), class = c("tbl_df",
"tbl", "data.frame"))
Как я могу это исправить? Я не знаю, что вызывает это к NA
.
Если я изменил отдельно, это правильно. Смотрите колонку X6
.
dt$X6 <- factor(dt$X6,
levels=c(0:9),
labels=c("0%",
"1-10%",
"11-23%",
"24-36%",
"37-49%",
"50-62%",
"63-75%",
"76-88%",
"89-99%",
"100%"))
str(dt)
Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 10 obs. of 5 variables:
$ X6 : Factor w/ 10 levels "0%","1-10%","11-23%",..: 1 2 1 3 2 1 3 1 1 4
$ X7 : num 5 4 4 3 4 5 2 7 1 5
$ X8 : num 1 1 2 2 1 0 0 0 3 0
$ X9 : num 3 4 4 4 4 5 5 2 6 2
$ X10: num 7 6 3 5 7 0 7 7 6 7
> dput(dt)
structure(list(X6 = structure(c(1L, 2L, 1L, 3L, 2L, 1L, 3L, 1L,
1L, 4L), .Label = c("0%", "1-10%", "11-23%", "24-36%", "37-49%",
"50-62%", "63-75%", "76-88%", "89-99%", "100%"), class = "factor"),
X7 = c(5, 4, 4, 3, 4, 5, 2, 7, 1, 5), X8 = c(1, 1, 2, 2,
1, 0, 0, 0, 3, 0), X9 = c(3, 4, 4, 4, 4, 5, 5, 2, 6, 2),
X10 = c(7, 6, 3, 5, 7, 0, 7, 7, 6, 7)), row.names = c(NA,
-10L), class = c("tbl_df", "tbl", "data.frame"))
Я тоже попробовал и получил NA
.
dt[,2] <- factor(dt[,2],
levels=c(0:9),
labels=c("0%",
"1-10%",
"11-23%",
"24-36%",
"37-49%",
"50-62%",
"63-75%",
"76-88%",
"89-99%",
"100%"))
str(dt)
Classes ‘tbl_df’, ‘tbl’ and 'data.frame': 10 obs. of 5 variables:
$ X6 : Factor w/ 10 levels "0%","1-10%","11-23%",..: 1 2 1 3 2 1 3 1 1 4
$ X7 : Factor w/ 10 levels "0%","1-10%","11-23%",..: NA NA NA NA NA NA NA NA NA NA
$ X8 : num 1 1 2 2 1 0 0 0 3 0
$ X9 : num 3 4 4 4 4 5 5 2 6 2
$ X10: num 7 6 3 5 7 0 7 7 6 7