Я хотел бы создать функцию, которая постепенно фильтрует информационный фрейм по всем возможным комбинациям значений столбцов. Я уже построил функцию, которая проходит через первые три функции и их соответствующие значения комбинации, но в идеале рекурсивная функция будет выполнять намного более эффективно, независимо от количества функций в фрейме данных.
def range_(feature):
#A utility function
#With extreme values
interval = (min(feature),max(feature))
delta = max(feature)-min(feature)
return (interval,delta)
GP_OP = pd.read_csv('GeneralPreOp_OnePager.csv')
data_GP_OP = { 'GeAr0' : (0,1),
'GeHo2' : (-1,0,1,2,3), #-1 : Patient doesn't take painkillers
'GeAr3' : (0,1),
'GeHo5' : (-1,0,1,2,3), #-1: Patient doesn't take anti-inflammatories
'GeHo6' : (0,1,2,3,4),
'GeAr9' : (0,1),
'GeDo10': (0,1),
'GeDo12': (0,1,2),
'GeDo13': (0,1,2,3)
}
#The actual code
#GP_OP[GP_OP.GeAr0 == 1][GP_OP.GeAr3 == 0]
for i,feature in enumerate(vv):
df_feature = getattr(vv,feature) #take feature
value_df_feature = data[feature] #take values of feature
total_features = len(vv)
for j,value in enumerate(value_df_feature):
interval = range_(GP_OP[df_feature == value].Oxford_Score_6w)
sample = len(GP_OP[df_feature == value])
perc = round((sample/475)*100,2)
GP_OP_oxford3[(feature,value )] = (interval,sample,perc)
#------------
for t,f in enumerate(vv): #for all the other features
if i != t: #don't test the feature with itself
test_feature = getattr(vv,f) #test feature
value_test_feature = data[f] #value of test feature
for test,test_value in enumerate(value_test_feature):
if len(GP_OP[df_feature == value][test_feature == test_value].Oxford_Score_6w) != 0:
interval2 = range_(GP_OP[df_feature == value][test_feature == test_value].Oxford_Score_6w)
sample2 = len(GP_OP[df_feature == value][test_feature == test_value])
perc2 = round((sample2/475)*100,2)
GP_OP_oxford3[(feature,value ),(f,test_value)] = (interval2 ,sample2,perc2)
#---------------
for t1,f1 in enumerate(vv): #for all the other features
if i != t1 and t!=t1: #don't test the feature with itself
test_feature1 = getattr(vv,f1) #test feature
value_test_feature1 = data[f1] #value of test feature
for test1,test_value1 in enumerate(value_test_feature1):
if len(GP_OP[df_feature == value][test_feature == test_value][test_feature1 == test_value1].Oxford_Score_6w) != 0:
interval3 = range_(GP_OP[df_feature == value][test_feature == test_value][test_feature1 == test_value1].Oxford_Score_6w)
sample3 = len(GP_OP[df_feature == value][test_feature == test_value][test_feature1 == test_value1])
perc3 = round((sample3/475)*100,2)
GP_OP_oxford3[(feature,value ),(f,test_value),(f1,test_value1)] = (interval3 ,sample3,perc3)
Теперь рекурсивная функция, которую я пытался построить, не может рекурсивно вызывать правильный «шаблон»
h = tuple()
GP_OP_dict = {}
def recur(h,patt):
for i,feature in enumerate(vv):
df_feature = getattr(vv,feature)
value_df_feature = data[feature]
for j,value in enumerate(value_df_feature):
if len(h)==0:
pattern = GP_OP[df_feature == value]
h = (feature,value)
else:
pattern = patt[df_feature == value]
h +=((feature,value),)
interval = range_(pattern.Oxford_Score_6w)
sample = len(pattern)
perc = round((sample/475)*100,2)
GP_OP_dict[h] = (interval,sample,perc)
recur(h,pattern)
recur((),"")