У меня проблема при попытке запустить функцию dffits () для объекта моей собственной логистической регрессии.Когда я бегу dffits(log)
, я получаю сообщение об ошибке: error in if (model $ rank == 0) {: Аргумент имеет длину 0
Однако, когда я использую встроенную функцию тренажерного зала (family = binomial), тогда dffits(glm)
работает просто отлично.
Вот моя функция для логистической регрессии и короткий пример моей проблемы:
mydata <- read.csv("https://stats.idre.ucla.edu/stat/data/binary.csv")
mydata$rank <- factor(mydata$rank)
mydata$admit <- factor(mydata$admit)
logRegEst <- function(x, y, threshold = 1e-10, maxIter = 100)
{
calcPi <- function(x, beta)
{
beta <- as.vector(beta)
return(exp(x %*% beta) / (1 + exp(x %*% beta)))
}
beta <- rep(0, ncol(x)) # initial guess for beta
diff <- 1000
# initial value bigger than threshold so that we can enter our while loop
iterCount = 0
# counter to ensure we're not stuck in an infinite loop
while(diff > threshold) # tests for convergence
{
pi <- as.vector(calcPi(x, beta))
# calculate pi by using the current estimate of beta
W <- diag(pi * (1 - pi)) # calculate matrix of weights W
beta_change <- solve(t(x) %*% W %*% x) %*% t(x) %*% (y - pi)
# calculate the change in beta
beta <- beta + beta_change # new beta
diff <- sum(beta_change^2)
# calculate how much we changed beta by in this iteration
# if this is less than threshold, we'll break the while loop
iterCount <- iterCount + 1
# see if we've hit the maximum number of iterations
if(iterCount > maxIter){
stop("This isn't converging.")
}
# stop if we have hit the maximum number of iterations
}
df <- length(y) - ncol(x)
# calculating the degrees of freedom by taking the length of y minus
# the number of x columns
vcov <- solve(t(x) %*% W %*% x)
list(coefficients = beta, vcov = vcov, df = df)
# returning results
}
logReg <- function(formula, data)
{
mf <- model.frame(formula = formula, data = data)
# model.frame() returns us a data.frame with the variables needed to use the
# formula.
x <- model.matrix(attr(mf, "terms"), data = mf)
# model.matrix() creates a disign matrix. That means that for example the
#"Sex"-variable is given as a dummy variable with ones and zeros.
y <- as.numeric(model.response(mf)) - 1
# model.response gives us the response variable.
est <- logRegEst(x, y)
# Now we have the starting position to apply our function from above.
est$formula <- formula
est$call <- match.call()
est$data <- data
# We add the formular and the call to the list.
est$x <- x
est$y <- y
# We add x and y to the list.
class(est) <- "logReg"
# defining the class
est
}
log <- logReg(admit ~ gre + gpa, data= mydata)
glm <- glm(admit ~ gre + gpa, data= mydata, family = binomial)
dffits(glm)
dffits(log)
log$data
glm$data
Я не понимаю, почемуmydata $ rank == 0, потому что когда я смотрю на log$data
, я вижу, что ранг определен как в glm$data
.
Я очень ценю вашу помощь!