Я недавно начал курс Coursera ML и взял на себя труд обучить модели линейной регрессии посредством градиентного спуска с использованием python. Вот код, который я придумал:
import csv
# hypothesis function
def h(row, a, b, c, d, e, f):
return a + (b * row[0]) + (c * row[1]) + (d * row[2]) + (e * row[3]) + (f * row[4])
# cost function
def costFunc(X, Y, theta0, theta1, theta2, theta3, theta4, theta5):
s = 0
for rowX, y in zip(X, Y):
s += (h(rowX, theta0, theta1, theta2, theta3, theta4, theta5) - y) ** 2
return s / (2 * len(Y))
# loading the data
with open('data.csv') as csvfile:
data = csv.reader(csvfile)
X, Y = [], []
for row in data:
X.append(list(map(float, row[:len(row) - 2])))
Y.append(float(row[len(row) - 1]))
# number of training examples
m = len(X)
# computing the mean and std_deviation for each feature
x1_mean, x1_std = sum([row[0] for row in X]) / len(X), max([row[0] for row in X]) - min([row[0] for row in X])
x2_mean, x2_std = sum([row[1] for row in X]) / len(X), max([row[1] for row in X]) - min([row[1] for row in X])
x3_mean, x3_std = sum([row[2] for row in X]) / len(X), max([row[2] for row in X]) - min([row[2] for row in X])
x4_mean, x4_std = sum([row[3] for row in X]) / len(X), max([row[3] for row in X]) - min([row[3] for row in X])
x5_mean, x5_std = sum([row[4] for row in X]) / len(X), max([row[4] for row in X]) - min([row[4] for row in X])
# scaling the features
for row in X:
row[0] = (row[0] - x1_mean) / x1_std
row[1] = (row[1] - x2_mean) / x2_std
row[2] = (row[2] - x3_mean) / x3_std
row[3] = (row[3] - x4_mean) / x4_std
row[4] = (row[4] - x5_mean) / x5_std
# defining the starting values for gradient descent
theta0, theta1, theta2, theta3, theta4, theta5 = 1, 1, 1, 1, 1, 1
alpha = 1
convergence = 0.1
# initializing the convergence condition
convergent = False
# computing the values of theta through gradient descent
while not convergent:
temp0, temp1, temp2, temp3, temp4, temp5 = 0, 0, 0, 0, 0, 0
for row, y in zip(X, Y):
temp0 += h(row, theta0, theta1, theta2, theta3, theta4, theta5) - y
temp1 += (h(row, theta0, theta1, theta2, theta3, theta4, theta5) - y) * row[0]
temp2 += (h(row, theta0, theta1, theta2, theta3, theta4, theta5) - y) * row[1]
temp3 += (h(row, theta0, theta1, theta2, theta3, theta4, theta5) - y) * row[2]
temp4 += (h(row, theta0, theta1, theta2, theta3, theta4, theta5) - y) * row[3]
temp5 += (h(row, theta0, theta1, theta2, theta3, theta4, theta5) - y) * row[4]
temp0, temp1, temp2, temp3, temp4, temp5 = temp0/m, temp1/m, temp2/m, temp3/m, temp4/m, temp5/m
# updating the values of theta
theta0 = theta0 - alpha * temp0
theta1 = theta1 - alpha * temp1
theta2 = theta2 - alpha * temp2
theta3 = theta3 - alpha * temp3
theta4 = theta4 - alpha * temp4
theta5 = theta5 - alpha * temp5
# computing the value of the cost function and printing it
cost = costFunc(X, Y, theta0, theta1, theta2, theta3, theta4, theta5)
print(cost)
# evaluation whether the cost function has converged
if cost <= convergence:
convergent = True
print("Done Training!")
print('theta0 = ', theta0)
print('theta1 = ', theta1)
print('theta2 = ', theta2)
print('theta3 = ', theta3)
print('theta4 = ', theta4)
print('theta5 = ', theta5)
Моя проблема в том, что независимо от того, какое значение я использую для альфы, значение функции стоимости уменьшается очень медленно и остается на отметке 38,5 ... и для некоторого значения он даже остается неизменным и не меняет каждую итерацию, поэтому проблема, вероятно, связана с моим кодом, но я не могу понять, что я сделал неправильно.
Набор данных имеет 5 функций и 414 примеров.