Я создаю Logisti c Модель для биномиальной классификации для системы обнаружения сетевых вторжений.
После разделения данных:
train_X, test_X, train_y, test_y = train_test_split( data_train[Features_after_vif],
data_train['label_Normal'],
test_size = 0.3,
random_state = 123 )
и построения модели на наборе данных поезда
formula1='label_Normal ~ '+ ' + '.join(Features_after_vif)
logreg1 = sm.logit(formula1,train_X)
Я получаю сообщение об ошибке ниже:
NameError Traceback (most recent call
last)
~\Anaconda3\lib\site-packages\patsy\compat.py in call_and_wrap_exc(msg,
origin, f, *args, **kwargs)
35 try:
---> 36 return f(*args, **kwargs)
37 except Exception as e:
~\Anaconda3\lib\site-packages\patsy\eval.py in eval(self, expr,
source_name, inner_namespace)
165 return eval(code, {}, VarLookupDict([inner_namespace]
--> 166 + self._namespaces))
167
<string> in <module>
NameError: name 'label_Normal' is not defined
The above exception was the direct cause of the following exception:
PatsyError Traceback (most recent call
last)
<ipython-input-138-117e65edfa0f> in <module>
----> 1 logreg1 = sm.logit(formula1,train_X)
2 result = logreg1.fit()
3 summ = result.summary2()
4 summ
~\AppData\Roaming\Python\Python37\site-packages\statsmodels\base\model.py
in from_formula(cls, formula, data, subset, drop_cols, *args, **kwargs)
157
158 tmp = handle_formula_data(data, None, formula, depth=eval_env,
--> 159 missing=missing)
160 ((endog, exog), missing_idx, design_info) = tmp
161
~\AppData\Roaming\Python\Python37\site-
packages\statsmodels\formula\formulatools.py in handle_formula_data(Y, X,
formula, depth, missing)
63 if data_util._is_using_pandas(Y, None):
64 result = dmatrices(formula, Y, depth,
return_type='dataframe',
---> 65 NA_action=na_action)
66 else:
67 result = dmatrices(formula, Y, depth,
return_type='dataframe',
~\Anaconda3\lib\site-packages\patsy\highlevel.py in
dmatrices(formula_like, data, eval_env, NA_action, return_type)
308 eval_env = EvalEnvironment.capture(eval_env, reference=1)
309 (lhs, rhs) = _do_highlevel_design(formula_like, data, eval_env,
--> 310 NA_action, return_type)
311 if lhs.shape[1] == 0:
312 raise PatsyError("model is missing required outcome
variables")
~\Anaconda3\lib\site-packages\patsy\highlevel.py in
_do_highlevel_design(formula_like, data, eval_env, NA_action,
return_type)
163 return iter([data])
164 design_infos = _try_incr_builders(formula_like, data_iter_maker,
eval_env,
--> 165 NA_action)
166 if design_infos is not None:
167 return build_design_matrices(design_infos, data,
~\Anaconda3\lib\site-packages\patsy\highlevel.py in
_try_incr_builders(formula_like, data_iter_maker, eval_env, NA_action)
68 data_iter_maker,
69 eval_env,
---> 70 NA_action)
71 else:
72 return None
~\Anaconda3\lib\site-packages\patsy\build.py in
design_matrix_builders(termlists, data_iter_maker, eval_env, NA_action)
694 factor_states,
695 data_iter_maker,
--> 696 NA_action)
697 # Now we need the factor infos, which encapsulate the knowledge of
698 # how to turn any given factor into a chunk of data:
~\Anaconda3\lib\site-packages\patsy\build.py in
_examine_factor_types(factors, factor_states, data_iter_maker, NA_action)
441 for data in data_iter_maker():
442 for factor in list(examine_needed):
--> 443 value = factor.eval(factor_states[factor], data)
444 if factor in cat_sniffers or guess_categorical(value):
445 if factor not in cat_sniffers:
~\Anaconda3\lib\site-packages\patsy\eval.py in eval(self, memorize_state,
data)
564 return self._eval(memorize_state["eval_code"],
565 memorize_state,
--> 566 data)
567
568 __getstate__ = no_pickling
~\Anaconda3\lib\site-packages\patsy\eval.py in _eval(self, code,
memorize_state, data)
549 memorize_state["eval_env"].eval,
550 code,
--> 551 inner_namespace=inner_namespace)
552
553 def memorize_chunk(self, state, which_pass, data):
~\Anaconda3\lib\site-packages\patsy\compat.py in call_and_wrap_exc(msg,
origin, f, *args, **kwargs)
41 origin)
42 # Use 'exec' to hide this syntax from the Python 2 parser:
---> 43 exec("raise new_exc from e")
44 else:
45 # In python 2, we just let the original exception escape -
- better
~\Anaconda3\lib\site-packages\patsy\compat.py in <module>
PatsyError: Error evaluating factor: NameError: name 'label_Normal' is
not defined
label_Normal ~ Counts + service_link + service_ldap + service_kshell +
service_klogin + service_iso_tsap + service_imap4 + service_http_443 +
service_hostnames + service_gopher + service_ftp_data + service_finger +
service_exec + service_efs + service_login + service_ecr_i + service_echo +
service_domain_u + service_domain + service_discard + service_daytime +
service_ctf + service_csnet_ns + service_courier + service_bgp +
wrong_fragment_1 + service_Z39_50 + service_X11 + service_eco_i +
service_mtp + service_name + service_netbios_dgm + su_attempted_2 +
srv_diff_host_rate + service_whois + service_vmnet + service_uucp_path +
service_uucp + service_urp_i + service_time + service_telnet +
service_systat + service_supdup + service_sunrpc + service_ssh +
service_sql_net + service_smtp + service_netbios_ns + service_netbios_ssn +
service_netstat + service_nnsp + service_nntp + service_ntp_u + root_shell_1
+ service_other + service_pop_3 + service_printer + service_private +
service_remote_job + service_rje + dst_host_serror_rate + service_shell +
service_pop_2 + service_auth + last_flag_17 + last_flag_16 + last_flag_15 +
last_flag_13 + last_flag_12 + protocol_type_udp + last_flag_10 + last_flag_1
+ is_guest_login_1 + flag_SH + flag_S3 + flag_S2 + flag_S1 + flag_RSTR +
flag_RSTOS0 + diff_srv_rate + dst_bytes + dst_host_count +
dst_host_diff_srv_rate + dst_host_rerror_rate + dst_host_same_src_port_rate
+ last_flag_18 + dst_host_srv_count + dst_host_srv_diff_host_rate +
flag_RSTO + last_flag_19 + last_flag_11 + last_flag_20 + num_compromised +
last_flag_2 + num_access_files_2 + num_access_files_1 + logged_in_1 +
last_flag_9 + last_flag_8 + last_flag_7 + wrong_fragment_3 + last_flag_5 +
last_flag_4 + last_flag_3 + last_flag_6 + num_shells_1 + su_attempted_1 +
duration + last_flag_14 + service_urh_i + num_failed_logins_2 +
service_red_i + num_access_files_3 + land_1 + service_pm_dump + hot +
num_access_files_5 + num_failed_logins_3 + num_access_files_4
^^^^^^^^^^^^
Не знаю, почему написано, что зависимая переменная 'label_Normal'
не определена, хотя я выполнил тест VIF и T-тест с помощью та же зависимая переменная.
При необходимости я могу поделиться записной книжкой и набором данных jupyter.