Как исправить «Ошибка в Conj (x) * y: неконформируемые массивы» в R? - PullRequest
0 голосов
/ 27 апреля 2019

Я реализую пользовательскую функцию правдоподобия и собираюсь использовать библиотеку maxLik .

Функция my_function работает нормально, когда вектор параметров определен как переменная вглобальная среда, например, beta <- c(0.12, 0.28) и вне функции bm.loglikFun.Есть два коэффициента, потому что у меня есть две объясняющие переменные / особенности.Тем не менее, я получаю сообщение об ошибке, когда пытаюсь использовать нотацию, применимую к библиотеке maxLik.Пока что мой код выглядит так:

# libraries
library(pracma)
library(maxLik)
library(readxl)

# Bringing in the data
my_data <- read_excel("mock_data.xlsx", sheet = 3)

######### ---------------------------------------------- BEGIN HELPER FUNCTIONS ---------------------------------#####################

# Helper function for drift and diffusion to account for firms who never reach the next stage
exp_decay <- function(v, ν, τ) {
  tryCatch(
    expr = {
      return(ifelse(v < ν, 1, exp(-((v - ν) / τ))))
    },
    warning = function(w){
      return(0)
    }
  )
}


# Helper function for diffusion
g <- function(z) {
  0.5 * (z + sqrt(z^2 + 0.25))
}

# Computing drift
calculate_drift <- function(v_drift, ν_drift, τ_drift, X_drift, β_drift) {
      return((dot(X_drift, β_drift)) * exp_decay(v_drift, ν_drift, τ_drift))
}

# Computing diffusion
calculate_diffusion <- function(v_diff, ν_diff, τ_diff, X_diff, γ_diff) {
  tryCatch(
    expr = {
      return((g(dot(X_diff, γ_diff))^2) * exp_decay(v_diff, ν_diff, τ_diff))
    },
    warning = function(w){
      return(0)
    }
  )
}

# Computing M 
calculate_M <- function(lower_lim, upper_lim, covars) {
  tryCatch(
    expr = {
      if (is.na(lower_lim) || is.na(upper_lim)) {
        return(0)
      } else {
        temp_M <- integrate(calculate_drift, lower_lim, upper_lim, ν_drift=nu, τ_drift=tau, X_drift=covars, β_drift=β_c)
        return(temp_M$val)
      }
    },
    warning = function(w){
      return(0)
    }
  )
}

# Computing S 
calculate_S <- function(lower_lim, upper_lim, covars) {
  tryCatch(
    expr = {
      if (is.na(lower_lim) || is.na(upper_lim)) {
        return(0)
      } else {
        temp_S <- 0.5 * integral(calculate_diffusion, lower_lim, upper_lim, ν_diff=nu, τ_diff=tau, X_diff=covars, γ_diff=γ_c)
        return(temp_S)
      }
    },
    warning = function(w){
      return(0)
    }
  )
}

# Computing LL terms 
calculate_LL_term <- function(diffusion, round_size, S_round, M_round) {
      if (is.na(round_size) || diffusion == 0 || S_round == 0 || M_round == 0) {
        return(1)
      } else {
        temp_LL_term <- ((diffusion * round_size) / (sqrt(16 * pi * S_round^3))) * exp(-((round_size - M_round)^2) / (4 * S_round))
        return(temp_LL_term)
      }
}

# For non-exits 
calculate_LL_pt1 <- function(M_current, size_current, S_current) {
    temp_LL_pt1 = pnorm((M_current - size_current) / (sqrt(2 * S_current))) + exp((M_current * size_current) / S_current) * pnorm((-(M_current + size_current)) / (sqrt(2 * S_current)))
    return(temp_LL_pt1)
}

######### ----------------------------------------------END HELPER FUNCTIONS ---------------------------------#####################

########################################## Defining the log-likelihood function ##########################################

bm.loglikFun <- function(theta) {
  myFunction <- function(x) {

    # Drift for each round (returns 0 if NA) ##### TROUBLES START HERE ########
    drift_A = calculate_drift(as.numeric(unlist(x["years_to_A"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[3])
    drift_B = calculate_drift(as.numeric(unlist(x["years_to_B"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[3])
    drift_C = calculate_drift(as.numeric(unlist(x["years_to_C"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[3])
    drift_D = calculate_drift(as.numeric(unlist(x["years_to_D"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[3])
    drift_E = calculate_drift(as.numeric(unlist(x["years_to_E"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[3])
    drift_F = calculate_drift(as.numeric(unlist(x["years_to_F"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[3])
    drift_exit = calculate_drift(as.numeric(unlist(x["years_to_exit"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[3])

    # Used as part of Equation 4.6
    drift_curr = calculate_drift(as.numeric(as.character(x["years_since_founding"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[3])

    # Diffusion for each round (returns 0 if NA)
    diff_A = calculate_diffusion(as.numeric(unlist(x["years_to_A"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[4])
    diff_B = calculate_diffusion(as.numeric(unlist(x["years_to_B"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[4])
    diff_C = calculate_diffusion(as.numeric(unlist(x["years_to_C"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[4])
    diff_D = calculate_diffusion(as.numeric(unlist(x["years_to_D"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[4])
    diff_E = calculate_diffusion(as.numeric(unlist(x["years_to_E"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[4])
    diff_F = calculate_diffusion(as.numeric(unlist(x["years_to_F"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[4])
    diff_exit = calculate_diffusion(as.numeric(unlist(x["years_to_exit"])), theta[1], theta[2], as.numeric(unlist(x[2:3])), theta[4])

    # Used as part of Equation 4.6
    diff_curr = calculate_diffusion(as.numeric(as.character(x["years_since_founding"])), theta[1], theta[2], as.numeric(as.character(x[2:3])), theta[4])

    # M values
    M_A = calculate_M(as.numeric(unlist(x["A_prev_years"])), as.numeric(unlist(x["years_to_A"])), as.numeric(unlist(x[2:3])))
    M_B = calculate_M(as.numeric(unlist(x["B_prev_years"])), as.numeric(unlist(x["years_to_B"])), as.numeric(unlist(x[2:3])))
    M_C = calculate_M(as.numeric(unlist(x["C_prev_years"])), as.numeric(unlist(x["years_to_C"])), as.numeric(unlist(x[2:3])))
    M_D = calculate_M(as.numeric(unlist(x["D_prev_years"])), as.numeric(unlist(x["years_to_D"])), as.numeric(unlist(x[2:3])))
    M_E = calculate_M(as.numeric(unlist(x["E_prev_years"])), as.numeric(unlist(x["years_to_E"])), as.numeric(unlist(x[2:3])))
    M_F = calculate_M(as.numeric(unlist(x["F_prev_years"])), as.numeric(unlist(x["years_to_F"])), as.numeric(unlist(x[2:3])))
    M_exit = calculate_M(as.numeric(unlist(x["Exit_prev_years"])), as.numeric(unlist(x["years_to_exit"])), as.numeric(unlist(x[2:3])))

    # Used as part of Equation 4.6
    M_curr = calculate_M(as.numeric(unlist(x["years_to_current_round"])), as.numeric(unlist(x["years_since_founding"])), as.numeric(unlist(x[2:3])))

    # S values
    S_A = calculate_S(as.numeric(unlist(x["A_prev_years"])), as.numeric(unlist(x["years_to_A"])), as.numeric(unlist(x[2:3])))
    S_B = calculate_S(as.numeric(unlist(x["B_prev_years"])), as.numeric(unlist(x["years_to_B"])), as.numeric(unlist(x[2:3])))
    S_C = calculate_S(as.numeric(unlist(x["C_prev_years"])), as.numeric(unlist(x["years_to_C"])), as.numeric(unlist(x[2:3])))
    S_D = calculate_S(as.numeric(unlist(x["D_prev_years"])), as.numeric(unlist(x["years_to_D"])), as.numeric(unlist(x[2:3])))
    S_E = calculate_S(as.numeric(unlist(x["E_prev_years"])), as.numeric(unlist(x["years_to_E"])), as.numeric(unlist(x[2:3])))
    S_F = calculate_S(as.numeric(unlist(x["F_prev_years"])), as.numeric(unlist(x["years_to_F"])), as.numeric(unlist(x[2:3])))
    S_exit = calculate_S(as.numeric(unlist(x["Exit_prev_years"])), as.numeric(unlist(x["years_to_exit"])), as.numeric(unlist(x[2:3])))

    # Used as part of Equation 4.6
    S_curr = calculate_S(as.numeric(unlist(x["years_to_current_round"])), as.numeric(unlist(x["years_since_founding"])), as.numeric(unlist(x[2:3])))

    # LL_terms
    LL_A = calculate_LL_term(diff_A, as.numeric(unlist(x["size_A"])), S_A, M_A)
    LL_B = calculate_LL_term(diff_B, as.numeric(unlist(x["size_B"])), S_A, M_A)
    LL_C = calculate_LL_term(diff_C, as.numeric(unlist(x["size_C"])), S_A, M_A)
    LL_D = calculate_LL_term(diff_D, as.numeric(unlist(x["size_D"])), S_A, M_A)
    LL_E = calculate_LL_term(diff_E, as.numeric(unlist(x["size_E"])), S_A, M_A)
    LL_F = calculate_LL_term(diff_F, as.numeric(unlist(x["size_F"])), S_A, M_A)
    LL_exit = calculate_LL_term(diff_exit, as.numeric(unlist(x["size_exit"])), S_A, M_A)

    # Equation 4.5 and second term of Equation 4.6
    LL_final_exit = LL_A * LL_B * LL_C * LL_D * LL_E * LL_F * LL_exit

    # For non-exits
    LL_pt1 = calculate_LL_pt1(M_curr, as.numeric(unlist(x["size_current_round"])), S_curr)

    # Equation 4.6 (if the company did not exit)
    LL_final_no_exit =  (1 - LL_pt1) * LL_final_exit

    # The function needs to return different LL-values depending on whether the company has exited 
    ifelse(x["exit"] == 1, LL_final_exit, LL_final_no_exit)

  }

  my_data$c <- apply(my_data, 1, myFunction)
  return(sum(log(my_data$c)))

}

# Calling the optimization algorithm
start = c(3, 3, .1, .1, .1, .1)
names(start) = c("nu", "tau", "beta_leadership_age", "beta_number_of_founders", "gamma_leadership_age", "gamma_number_of_founders")
mle_res <- maxLik( logLik = bm.loglikFun, start = start, method="BFGS" )
print(summary(mle_res))

Полная ошибка выглядит следующим образом:

 Error in Conj(x) * y : non-conformable arrays 
15.
apply(Conj(x) * y, 2, sum) 
14.
dot(X_drift, β_drift) 
13.
calculate_drift(as.numeric(unlist(x["years_to_A"])), theta[1], 
    theta[2], as.numeric(unlist(x[2:3])), theta[3]) 
12.
FUN(newX[, i], ...) 
11.
apply(my_data, 1, myFunction) 
10.
fnOrig(theta, ...) 
9.
logLikFunc(theta, fnOrig = function (theta) 
{
    calculate_drift <- function(v_drift, ν_drift, τ_drift, 
        X_drift, β_drift) { ... 
8.
eval(f, sys.frame(sys.parent())) 
7.
eval(f, sys.frame(sys.parent())) 
6.
callWithoutArgs(theta, fName = fName, args = names(formals(sumt)), 
    ...) 
5.
(function (theta, fName, ...) 
{
    return(callWithoutArgs(theta, fName = fName, args = names(formals(sumt)), 
        ...)) ... 
4.
do.call(callWithoutSumt, argList) 
3.
maxOptim(fn = fn, grad = grad, hess = hess, start = start, method = "BFGS", 
    fixed = fixed, constraints = constraints, finalHessian = finalHessian, 
    parscale = parscale, control = mControl, ...) 
2.
maxRoutine(fn = logLik, grad = grad, hess = hess, start = start, 
    constraints = constraints, ...) 
1.
maxLik(logLik = bm.loglikFun, start = start, method = "BFGS") 

В идеале, я мог бы просто умножить вектор параметров на свой вектор признаковточно так же, как я смог сделать, когда не использовал функцию bm.loglikFun.

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...