На основании приведенного ниже набора данных я хотел бы рассчитать AU C для каждого алгоритма, а также позже для каждого набора данных. Я пробовал что-то вроде этого, но это не работает:
from sklearn.metrics import roc_auc_score,roc_curve,scorer
import pandas as pd
test = pd.DataFrame(dico)
def auc_group(y_hat, y):
return roc_auc_score(y_hat, y)
test.groupby(["Dataset", "Algo"]).apply(auc_group)
Позже я хотел бы сделать ту же операцию, но на Folds of KFolds, который будет просто еще один слой groupby
from sklearn.metrics import roc_auc_score,roc_curve,scorer
import pandas as pd
test = pd.DataFrame(dico)
def auc_group(y_hat, y):
return roc_auc_score(y_hat, y)
test.groupby(["Dataset", "Algo", "Folds"]).apply(auc_group)
А вот и данные
dico = {'Dataset': {0: 'UCI',
1: 'UCI',
2: 'UCI',
3: 'UCI',
4: 'UCI',
5: 'UCI',
6: 'UCI',
7: 'UCI',
8: 'UCI',
9: 'UCI',
10: 'UCI',
11: 'UCI',
12: 'UCI',
13: 'UCI',
14: 'UCI',
15: 'UCI',
16: 'UCI',
17: 'UCI',
18: 'UCI',
19: 'UCI',
20: 'UCI',
21: 'UCI',
22: 'UCI',
23: 'UCI',
24: 'UCI',
25: 'UCI',
26: 'UCI',
27: 'UCI',
28: 'UCI',
29: 'UCI',
30: 'UCI',
31: 'UCI',
32: 'UCI',
33: 'UCI',
34: 'UCI',
35: 'UCI',
36: 'UCI',
37: 'UCI',
38: 'UCI',
39: 'UCI'},
'Algo': {0: 'Gnb',
1: 'Gnb',
2: 'Gnb',
3: 'Gnb',
4: 'Gnb',
5: 'Gnb',
6: 'Gnb',
7: 'Gnb',
8: 'Gnb',
9: 'Gnb',
10: 'Gnb',
11: 'Gnb',
12: 'Gnb',
13: 'Gnb',
14: 'Gnb',
15: 'Gnb',
16: 'Gnb',
17: 'Gnb',
18: 'Gnb',
19: 'Gnb',
20: 'LR',
21: 'LR',
22: 'LR',
23: 'LR',
24: 'LR',
25: 'LR',
26: 'LR',
27: 'LR',
28: 'LR',
29: 'LR',
30: 'LR',
31: 'LR',
32: 'LR',
33: 'LR',
34: 'LR',
35: 'LR',
36: 'LR',
37: 'LR',
38: 'LR',
39: 'LR'},
'p(y=1)': {0: 0.008566693461697914,
1: 0.023329740200720657,
2: 0.013079244223084688,
3: 0.0035655899487093525,
4: 0.5412516864202239,
5: 0.02437104068449619,
6: 0.0015772504872503706,
7: 0.01976775149918856,
8: 0.02580128697308947,
9: 0.052349648267671536,
10: 0.016115492810474592,
11: 0.028573206085476182,
12: 0.9975288953422592,
13: 0.1281394485094793,
14: 0.0014564219132441555,
15: 0.015625393606472308,
16: 0.15181450609384148,
17: 0.015221143650194884,
18: 0.022419878846782183,
19: 0.9991431483286071,
20: 0.04281920675218464,
21: 0.035985853029231185,
22: 0.05570563548576814,
23: 0.5468626213371839,
24: 0.01616233084557819,
25: 0.025090866736312712,
26: 0.4368789472788432,
27: 0.5268969392335681,
28: 0.06716466142340655,
29: 0.2093170587100108,
30: 0.008660602880515709,
31: 0.10929145816022637,
32: 0.04069088617214272,
33: 0.06683143493934368,
34: 0.06653318086395299,
35: 0.016010358473692744,
36: 0.08583523793056999,
37: 0.044347932186208014,
38: 0.014208157887412804,
39: 0.007949785472510792},
'y_hat': {0: 0,
1: 0,
2: 0,
3: 0,
4: 1,
5: 0,
6: 0,
7: 0,
8: 0,
9: 0,
10: 0,
11: 0,
12: 1,
13: 0,
14: 0,
15: 0,
16: 0,
17: 0,
18: 0,
19: 1,
20: 0,
21: 0,
22: 0,
23: 1,
24: 0,
25: 0,
26: 0,
27: 1,
28: 0,
29: 0,
30: 0,
31: 0,
32: 0,
33: 0,
34: 0,
35: 0,
36: 0,
37: 0,
38: 0,
39: 0},
'y': {0: 0,
1: 0,
2: 0,
3: 0,
4: 0,
5: 0,
6: 0,
7: 0,
8: 0,
9: 0,
10: 0,
11: 0,
12: 1,
13: 1,
14: 0,
15: 0,
16: 0,
17: 0,
18: 0,
19: 1,
20: 0,
21: 0,
22: 0,
23: 1,
24: 0,
25: 0,
26: 0,
27: 0,
28: 0,
29: 0,
30: 0,
31: 0,
32: 0,
33: 0,
34: 0,
35: 0,
36: 0,
37: 0,
38: 0,
39: 0}}
А вот и сообщение об ошибке:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
/opt/conda/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in apply(self, func, *args, **kwargs)
724 try:
--> 725 result = self._python_apply_general(f)
726 except Exception:
/opt/conda/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in _python_apply_general(self, f)
741 def _python_apply_general(self, f):
--> 742 keys, values, mutated = self.grouper.apply(f, self._selected_obj, self.axis)
743
/opt/conda/lib/python3.7/site-packages/pandas/core/groupby/ops.py in apply(self, f, data, axis)
236 group_axes = _get_axes(group)
--> 237 res = f(group)
238 if not _is_indexed_like(res, group_axes):
TypeError: auc_group() missing 1 required positional argument: 'y'
During handling of the above exception, another exception occurred:
TypeError Traceback (most recent call last)
<ipython-input-23-eab997668f67> in <module>
2 return roc_auc_score(y_hat, y)
3
----> 4 test.groupby(["Dataset", "Algo"]).apply(auc_group)
/opt/conda/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in apply(self, func, *args, **kwargs)
735
736 with _group_selection_context(self):
--> 737 return self._python_apply_general(f)
738
739 return result
/opt/conda/lib/python3.7/site-packages/pandas/core/groupby/groupby.py in _python_apply_general(self, f)
740
741 def _python_apply_general(self, f):
--> 742 keys, values, mutated = self.grouper.apply(f, self._selected_obj, self.axis)
743
744 return self._wrap_applied_output(
/opt/conda/lib/python3.7/site-packages/pandas/core/groupby/ops.py in apply(self, f, data, axis)
235 # group might be modified
236 group_axes = _get_axes(group)
--> 237 res = f(group)
238 if not _is_indexed_like(res, group_axes):
239 mutated = True
TypeError: auc_group() missing 1 required positional argument: 'y'