Включите также ответ в formula
с reformulate
library(randomForest)
rf <- randomForest(reformulate(response = 'mytarget',
names(df)[1:3]), data = df, ntree=10, nodesize=10)
predict(rf, df_test, type="prob")[,2]
# 1 2
# 0.3 0.3
или используйте paste
f <- paste('mytarget ~', paste(colnames(df)[colnames(df)!="mytarget"],
collapse = ' + '))
rf <- randomForest(as.formula(f), data=df, ntree=10, nodesize=10)
predict(rf, df_test, type="prob")[,2]
# 1 2
# 0.2 0.2
Проблема в том, что terms
не имеет переменной ответа, когда мы ее не предоставляем
rf$terms
#~feature1 + feature2 + feature3
#attr(,"variables")
#list(feature1, feature2, feature3)
#attr(,"factors")
# feature1 feature2 feature3
#feature1 1 0 0
#feature2 0 1 0
#feature3 0 0 1
#attr(,"term.labels")
#[1] "feature1" "feature2" "feature3"
, и это изменяется, когда предоставляется
rf$terms
#mytarget ~ feature1 + feature2 + feature3
#attr(,"variables")
#list(mytarget, feature1, feature2, feature3)
#attr(,"factors")
# feature1 feature2 feature3
#mytarget 0 0 0
#feature1 1 0 0
#feature2 0 1 0
#feature3 0 0 1
#attr(,"term.labels")
#[1] "feature1" "feature2" "feature3"
data
df <- structure(list(feature1 = c(1L, 0L, 1L, 0L, 1L), feature2 = c(0L,
0L, 1L, 1L, 1L), feature3 = c(1L, 1L, 0L, 0L, 1L), mytarget = structure(c(1L,
1L, 2L, 2L, 2L), .Label = c("cool", "warm"), class = "factor")), row.names = c(NA,
-5L), class = "data.frame")
df_test <- structure(list(feature1 = 1:0, feature2 = c(0L, 0L), feature3 = c(1L,
1L)), class = "data.frame", row.names = c(NA, -2L))