randomForestSRC выкладывает ошибки - PullRequest
0 голосов
/ 08 ноября 2018

Я пытаюсь запустить несколько случайных лесов выживания из пакета randomForestSRC, но у меня появляются некоторые ошибки, которые я не могу понять. Код, который я использую:

rsc = rfsrc(Surv(Length_of_Service_from_Seniority_Date_Including_Partial_Year__CF_,as.numeric(Active))~.,data = x,
            ntree = 10000,nsplit=10,na.action = 'na.omit',importance=T)

и я получаю следующие ошибки и не совсем понимаю, почему ...:

Error in dimnames(x) <- dn : 
  length of 'dimnames' [2] not equal to array extent
In addition: Warning messages:
1: In doTryCatch(return(expr), name, parentenv, handler) :
  NAs introduced by coercion
2: In matrix(nativeOutput$imputation, nrow = n.miss, byrow = FALSE) :
  data length [335830] is not a sub-multiple or multiple of the number of rows [5694]

пример данных

structure(list(Time_in_Prior_Cohort_._Years__CF_ = c(4.01, 2.26, 
    0, 1, 0, 0, 6.12, 1.32, 2.15, 9.47, 0, 2.95, 0.73, 0, 2.07, 6.12, 
    0, 3.95, 0, 0), Age_Group = structure(c(4L, 2L, 5L, 3L, 3L, 6L, 
    3L, 4L, 2L, 5L, 4L, 3L, 4L, 3L, 3L, 3L, 4L, 3L, 4L, 3L), .Label = c("20 and under", 
    "21 - 30", "31 - 40", "41 - 50", "51 - 59", "60 and over"), class = "factor"), 
        Gender = structure(c(2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 
        1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 2L, 2L, 2L), .Label = c("Female", 
        "Male", "Not Defined"), class = "factor"), Time_in_Current_Cohort_._Years__CF_ = c(7.6, 
        4.78, 10.7, 3.78, 10.44, 10.17, 3.97, 8.73, 7.85, 0.5, 9.98, 
        5.78, 3.78, 9.65, 6.77, 3.28, 9.34, 3.78, 9.21, 9.13), Cohort = structure(c(6L, 
        5L, 4L, 6L, 2L, 2L, 2L, 2L, 3L, 2L, 1L, 4L, 6L, 3L, 5L, 5L, 
        2L, 4L, 5L, 3L), .Label = c("Representative", "Analyst", 
        "Associate", "Manager", "Senior Manager", "Director"), class = c("ordered", 
        "factor")), Length_of_Service_from_Seniority_Date_Including_Partial_Year__CF_ = c(7.92, 
        7.67, 7.25, 7.67, 7.5, 6.75, 3, 7.58, 6.42, 0.25, 6.58, 6.42, 
        7.42, 6, 6.08, 6.75, 5.92, 7.08, 7.67, 6.42), Active = structure(c(2L, 
        2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
        2L, 2L, 2L, 2L), .Label = c("Active", "Terminated"), class = "factor"), 
        Time_in_Prior_Prior_Cohort_._Years__CF_ = c(0, 3.97, 0, 5.85, 
        0, 0, 0, 0, 0, 0, 0, 1.09, 2.8, 0, 0.59, 0, 0, 1.48, 0, 0
        )), row.names = c(NA, 20L), class = "data.frame")
...