print(baseline.columns)
print(forecast1.columns)
print(forecast2.columns)
Index(['1', '2', '3'], dtype='object')
Index(['1', '2', '5'], dtype='object')
Index(['1', '3', '5', '6'], dtype='object')
Вы можете взять пересечение столбцов, чтобы выяснить, какие столбцы являются общими для базовой линии и прогноза, и просто применить к ним столбцы precision_score.
from sklearn.metrics import accuracy_score
common_columns = list(set(baseline.columns).intersection(forecast1.columns))
avg_acc = 0.0
for c in common_columns:
c_acc = accuracy_score(baseline[c], forecast1[c])
print(f'Column {c} acc: {c_acc}')
avg_acc += c_acc/len(common_columns)
print(avg_acc)
Написать функцию для принятия базовый уровень и прогноз, чтобы дать вам точность.
from sklearn.metrics import accuracy_score
def calc_acc(baseline, forecast1):
common_columns = list(set(baseline.columns).intersection(forecast1.columns))
avg_acc = 0.0
for c in common_columns:
c_acc = accuracy_score(baseline[c], forecast1[c])
print(f'Column {c} acc: {c_acc}')
avg_acc += c_acc/len(common_columns)
print(avg_acc)
return avg_acc
from sklearn.metrics import accuracy_score
def calc_acc(baseline, forecast1):
penalize = True
common_columns = list(set(baseline.columns).intersection(forecast1.columns))
avg_acc = 0.0
for c in common_columns:
c_acc = accuracy_score(baseline[c], forecast1[c])
print(f'Column {c} acc: {c_acc}')
if penalize:
div = len(common_columns) + abs(len(forecast1.columns) - len(baseline.columns)) # it will penalize for both having more or less columns than baseline, you can change it based on your needs
avg_acc += c_acc/div
else:
avg_acc += c_acc/len(common_columns)
print(avg_acc)
return avg_acc
Для регрессии попробуйте среднюю абсолютную ошибку, чем меньше ошибка, тем лучше прогноз.
from sklearn.metrics import accuracy_score, mean_absolute_error
def calc_acc(baseline, forecast1):
penalize = True
common_columns = list(set(baseline.columns).intersection(forecast1.columns))
avg_acc = 0.0
for c in common_columns:
c_acc = mean_absolute_error(baseline[c], forecast1[c])
print(f'Column {c} mean absolute error: {c_acc}')
if penalize:
div = len(common_columns) + abs(len(forecast1.columns) - len(baseline.columns)) # it will penalize for both having more or less columns than baseline, you can change it based on your needs
avg_acc += c_acc/div
else:
avg_acc += c_acc/len(common_columns)
print(avg_acc)
return avg_acc
Обычно среднее процент правильных составляет примерно 100% - средняя ошибка. Таким образом, вы можете просто вычесть ошибку из 100%.
def perc(a_list, b_list):
ans = 0.0
for i in range(len(a_list)):
ans += (1. - abs(a_list[i]-b_list[i])/a_list[i])
return ans
from sklearn.metrics import accuracy_score, mean_absolute_error
def calc_acc(baseline, forecast1):
penalize = True
common_columns = list(set(baseline.columns).intersection(forecast1.columns))
avg_acc = 0.0
for c in common_columns:
c_acc = perc(baseline[c], forecast1[c])
print(f'Column {c} mean percentange correct: {c_acc}')
if penalize:
div = len(common_columns) + abs(len(forecast1.columns) - len(baseline.columns)) # it will penalize for both having more or less columns than baseline, you can change it based on your needs
avg_acc += c_acc/div
else:
avg_acc += c_acc/len(common_columns)
print(avg_acc)
return avg_acc