Я создаю Logisti c Модель для биномиальной классификации для сетевой системы обнаружения вторжений
После разделения данных:
formula1='label_Normal ~ '+ ' + '.join(Features_after_vif)
train_X, test_X, train_y, test_y = train_test_split( data_train[Features_after_vif],
data_train['label_Normal'],
test_size = 0.3,
random_state = 123 )
и построения модели на наборе данных поезда,
logreg1 = sm.logit(formula1,train_X)
Я получаю сообщение об ошибке ниже:
----> 1 logreg1 = sm.logit(formula1,train_X)
2 result = logreg1.fit()
3 summ = result.summary2()
4 summ
C:\Users\Himanshu\AppData\Roaming\Python\Python37\site-
packages\statsmodels\discrete\discrete_model.py:1788: RuntimeWarning: divide
by zero encountered in log
return np.sum(np.log(self.cdf(q*np.dot(X,params))))
Warning: Maximum number of iterations has been exceeded.
Current function value: inf
Iterations: 35
---------------------------------------------------------------------------
LinAlgError Traceback (most recent call last)
<ipython-input-162-f3465b6dcb3c> in <module>
1 logreg = sm.logit(formula1, data=train_X)
----> 2 result = logreg.fit()
3 summ = result.summary2()
4 summ
~\AppData\Roaming\Python\Python37\site-
packages\statsmodels\discrete\discrete_model.py in fit(self, start_params,
method, maxiter, full_output, disp, callback, **kwargs)
1899 bnryfit = super(Logit, self).fit(start_params=start_params,
1900 method=method, maxiter=maxiter, full_output=full_output,
-> 1901 disp=disp, callback=callback, **kwargs)
1902
1903 discretefit = LogitResults(self, bnryfit)
~\AppData\Roaming\Python\Python37\site-
packages\statsmodels\discrete\discrete_model.py in fit(self, start_params,
method, maxiter, full_output, disp, callback, **kwargs)
214 mlefit = super(DiscreteModel, self).fit(start_params=start_params,
215 method=method, maxiter=maxiter, full_output=full_output,
--> 216 disp=disp, callback=callback, **kwargs)
217
218 return mlefit # up to subclasses to wrap results
~\AppData\Roaming\Python\Python37\site-packages\statsmodels\base\model.py in
fit(self, start_params, method, maxiter, full_output, disp, fargs, callback,
retall, skip_hessian, **kwargs)
475 Hinv = cov_params_func(self, xopt, retvals)
476 elif method == 'newton' and full_output:
--> 477 Hinv = np.linalg.inv(-retvals['Hessian']) / nobs
478 elif not skip_hessian:
479 H = -1 * self.hessian(xopt)
<__array_function__ internals> in inv(*args, **kwargs)
~\Anaconda3\lib\site-packages\numpy\linalg\linalg.py in inv(a)
545 signature = 'D->D' if isComplexType(t) else 'd->d'
546 extobj = get_linalg_error_extobj(_raise_linalgerror_singular)
--> 547 ainv = _umath_linalg.inv(a, signature=signature, extobj=extobj)
548 return wrap(ainv.astype(result_t, copy=False))
549
~\Anaconda3\lib\site-packages\numpy\linalg\linalg.py in
_raise_linalgerror_singular(err, flag)
95
96 def _raise_linalgerror_singular(err, flag):
---> 97 raise LinAlgError("Singular matrix")
98
99 def _raise_linalgerror_nonposdef(err, flag):
LinAlgError: Singular matrix
Не знаю, почему это выдает ошибку «Сингулярная матрица». Пожалуйста, помогите решить эту проблему.
Если необходимо, я могу поделиться записной книжкой и набором данных jupyter.