Я пытаюсь подогнать линейную модель к некоторым данным, используя код ниже.
Я получаю сообщение об ошибке при оценке формулы с помощью функции dmatrices из patsy. Сейчас я хочу выполнить регрессию для разных групп отдельно, а позже я буду использовать модель с фиксированными эффектами.
Код:
import statsmodels.api as stm
import statsmodels.formula.api as smf
from patsy import dmatrices
def MapRegression(df):
outcomes = df.columns[range(1, 25, 2)]
print("predicting the following group risks: " + outcomes)
print(df.head())
multimodel = {}
for i in outcomes:
df[i] = df[i].astype('float')/df[i[:-4]].astype('float')
formula = i + ' ~ DB_LO'
print(formula)
y, X = dmatrices(formula, data=df, return_type='dataframe')
print('The exogenous variable matrix:\n')
print(X)
model = smf.OLS(Y, X, missing='drop').fit()
predictions = model.predict(X)
multimodel[i]=model
return multimodel
multimodel = MapRegression(test2)
Ошибка:
Traceback (most recent call last):
File "/home/anaconda/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3291, in run_code
if async_:
File "<ipython-input-192-81c89c9dfb54>", line 1, in <module>
multimodel = MapRegression(test2)
File "<ipython-input-191-0f0d44327aec>", line 14, in MapRegression
y, X = dmatrices(formula, data=df, return_type='dataframe')
File "/home/anaconda/anaconda3/lib/python3.6/site-packages/patsy/highlevel.py", line 310, in dmatrices
NA_action, return_type)
File "/home/anaconda/anaconda3/lib/python3.6/site-packages/patsy/highlevel.py", line 165, in _do_highlevel_design
NA_action)
File "/home/anaconda/anaconda3/lib/python3.6/site-packages/patsy/highlevel.py", line 70, in _try_incr_builders
NA_action)
File "/home/anaconda/anaconda3/lib/python3.6/site-packages/patsy/build.py", line 689, in design_matrix_builders
factor_states = _factors_memorize(all_factors, data_iter_maker, eval_env)
File "/home/anaconda/anaconda3/lib/python3.6/site-packages/patsy/build.py", line 354, in _factors_memorize
which_pass = factor.memorize_passes_needed(state, eval_env)
File "/home/anaconda/anaconda3/lib/python3.6/site-packages/patsy/eval.py", line 474, in memorize_passes_needed
subset_names = [name for name in ast_names(self.code)
File "/home/anaconda/anaconda3/lib/python3.6/site-packages/patsy/eval.py", line 474, in <listcomp>
subset_names = [name for name in ast_names(self.code)
File "/home/anaconda/anaconda3/lib/python3.6/site-packages/patsy/eval.py", line 105, in ast_names
for node in ast.walk(ast.parse(code)):
File "/home/anaconda/anaconda3/lib/python3.6/ast.py", line 35, in parse
return compile(source, filename, mode, PyCF_ONLY_AST)
File "<unknown>", line 1
0 Fdiab
^
SyntaxError: invalid syntax
Мой фрейм данных выглядит так:
0Fdiab 0Mdiab 15Fdiab 15Mdiab 30Fdiab 30Mdiab 45Fdiab \
74 0.000016 0.029507 0.026051 0.021665 0.043729 0.056223 0.142352
74 0.000016 0.029507 0.026051 0.021665 0.043729 0.056223 0.142352
75 104.122675 0.004143 0.003658 0.003042 0.006140 0.007894 0.019987
75 104.122675 0.004143 0.003658 0.003042 0.006140 0.007894 0.019987
75 104.122675 0.004143 0.003658 0.003042 0.006140 0.007894 0.019987
45Mdiab 60Fdiab 60Mdiab ... 15M 30F 30M 45F \
74 0.213063 0.200303 0.212398 ... 2.71932 2.85592 2.84972 3.22577
74 0.213063 0.200303 0.212398 ... 2.71932 2.85592 2.84972 3.22577
75 0.029916 0.028124 0.029823 ... 0.381815 0.400995 0.400125 0.452925
75 0.029916 0.028124 0.029823 ... 0.381815 0.400995 0.400125 0.452925
75 0.029916 0.028124 0.029823 ... 0.381815 0.400995 0.400125 0.452925
45M 60F 60M 75F 75M DB_LO
74 3.26574 2.17438 2.00485 1.41717 0.978697 45.0
74 3.26574 2.17438 2.00485 1.41717 0.978697 55.0
75 0.458538 0.305302 0.281498 0.198982 0.137417 75.0
75 0.458538 0.305302 0.281498 0.198982 0.137417 45.0
75 0.458538 0.305302 0.281498 0.198982 0.137417 65.0