Я получил следующие оценки, полученные из кода, показанного ниже.
# R^2 score: 0.95
# Mean Absolute Error: 0.61
# Mean Squared Error: 0.56
# Mean absolute percentage error: 0.17
# Mean absolute percentage error: -0.06
Я хотел бы знать, правильно ли выполнен расчет и нет ли ошибок в логи c. Я получил формулы из этой статьи:
https://www.dataquest.io/blog/understanding-regression-error-metrics/
Код:
import pandas as pd
import numpy as np
from sklearn import linear_model
x = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
y = np.array([1, 3, 2, 5, 7, 8, 8, 9, 10, 12])
X_train, y_train = pd.DataFrame(x), y
learner = linear_model.LinearRegression()
learner.fit(X_train, y_train)
y_pred = learner.predict(X_train)
learner.score(X_train, y_train)
(1 - (((y_train - y_pred) ** 2).sum() / ((y_train - y_train.mean()) ** 2).sum()))
# Mean Absolute Error
mae_sum = 0
for x, y in zip(y_train, y_pred):
mae_sum += abs(x - y)
mae = mae_sum / len(y_train)
print(mae)
# Mean Squared Error
mse_sum = 0
for x, y in zip(y_train, y_pred):
mse_sum += (x - y) ** 2
mse = mse_sum / len(y_train)
print(mse)
# Mean absolute percentage error
mape_sum = 0
for x, y in zip(y_train, y_pred):
mape_sum += abs(x - y) / x
mape = mape_sum / len(y_train)
print(mape)
# Mean absolute percentage error
mpe_sum = 0
for x, y in zip(y_train, y_pred):
mpe_sum += (x - y) / x
mpe = mpe_sum / len(y_train)
print(mpe)