Неверный синтаксис при написании формулы для патси-матриц - PullRequest
0 голосов
/ 05 апреля 2019

Я пытаюсь подогнать линейную модель к некоторым данным, используя код ниже. Я получаю сообщение об ошибке при оценке формулы с помощью функции dmatrices из patsy. Сейчас я хочу выполнить регрессию для разных групп отдельно, а позже я буду использовать модель с фиксированными эффектами. Код:

import statsmodels.api as stm
import statsmodels.formula.api as smf
from patsy import dmatrices

def MapRegression(df):
    outcomes = df.columns[range(1, 25, 2)]
    print("predicting the following group risks: " + outcomes)
    print(df.head())
    multimodel = {}
    for i in outcomes:
        df[i] = df[i].astype('float')/df[i[:-4]].astype('float')
        formula = i + ' ~ DB_LO'
        print(formula)
        y, X = dmatrices(formula, data=df, return_type='dataframe')
        print('The exogenous variable matrix:\n')
        print(X)
        model = smf.OLS(Y, X, missing='drop').fit()
        predictions = model.predict(X)
        multimodel[i]=model
    return multimodel

multimodel = MapRegression(test2)

Ошибка:

Traceback (most recent call last):

  File "/home/anaconda/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 3291, in run_code
    if async_:

  File "<ipython-input-192-81c89c9dfb54>", line 1, in <module>
    multimodel = MapRegression(test2)

  File "<ipython-input-191-0f0d44327aec>", line 14, in MapRegression
    y, X = dmatrices(formula, data=df, return_type='dataframe')

  File "/home/anaconda/anaconda3/lib/python3.6/site-packages/patsy/highlevel.py", line 310, in dmatrices
    NA_action, return_type)

  File "/home/anaconda/anaconda3/lib/python3.6/site-packages/patsy/highlevel.py", line 165, in _do_highlevel_design
    NA_action)

  File "/home/anaconda/anaconda3/lib/python3.6/site-packages/patsy/highlevel.py", line 70, in _try_incr_builders
    NA_action)

  File "/home/anaconda/anaconda3/lib/python3.6/site-packages/patsy/build.py", line 689, in design_matrix_builders
    factor_states = _factors_memorize(all_factors, data_iter_maker, eval_env)

  File "/home/anaconda/anaconda3/lib/python3.6/site-packages/patsy/build.py", line 354, in _factors_memorize
    which_pass = factor.memorize_passes_needed(state, eval_env)

  File "/home/anaconda/anaconda3/lib/python3.6/site-packages/patsy/eval.py", line 474, in memorize_passes_needed
    subset_names = [name for name in ast_names(self.code)

  File "/home/anaconda/anaconda3/lib/python3.6/site-packages/patsy/eval.py", line 474, in <listcomp>
    subset_names = [name for name in ast_names(self.code)

  File "/home/anaconda/anaconda3/lib/python3.6/site-packages/patsy/eval.py", line 105, in ast_names
    for node in ast.walk(ast.parse(code)):

  File "/home/anaconda/anaconda3/lib/python3.6/ast.py", line 35, in parse
    return compile(source, filename, mode, PyCF_ONLY_AST)

  File "<unknown>", line 1
    0 Fdiab
          ^
SyntaxError: invalid syntax

Мой фрейм данных выглядит так:

        0Fdiab    0Mdiab   15Fdiab   15Mdiab   30Fdiab   30Mdiab   45Fdiab  \
74    0.000016  0.029507  0.026051  0.021665  0.043729  0.056223  0.142352   
74    0.000016  0.029507  0.026051  0.021665  0.043729  0.056223  0.142352   
75  104.122675  0.004143  0.003658  0.003042  0.006140  0.007894  0.019987   
75  104.122675  0.004143  0.003658  0.003042  0.006140  0.007894  0.019987   
75  104.122675  0.004143  0.003658  0.003042  0.006140  0.007894  0.019987   

     45Mdiab   60Fdiab   60Mdiab  ...       15M       30F       30M       45F  \
74  0.213063  0.200303  0.212398  ...   2.71932   2.85592   2.84972   3.22577   
74  0.213063  0.200303  0.212398  ...   2.71932   2.85592   2.84972   3.22577   
75  0.029916  0.028124  0.029823  ...  0.381815  0.400995  0.400125  0.452925   
75  0.029916  0.028124  0.029823  ...  0.381815  0.400995  0.400125  0.452925   
75  0.029916  0.028124  0.029823  ...  0.381815  0.400995  0.400125  0.452925   

         45M       60F       60M       75F       75M DB_LO  
74   3.26574   2.17438   2.00485   1.41717  0.978697  45.0  
74   3.26574   2.17438   2.00485   1.41717  0.978697  55.0  
75  0.458538  0.305302  0.281498  0.198982  0.137417  75.0  
75  0.458538  0.305302  0.281498  0.198982  0.137417  45.0  
75  0.458538  0.305302  0.281498  0.198982  0.137417  65.0 
...