Я пытаюсь превратить код в класс Python.
мой фрейм данных "URM" выглядит так:
user_id anime_id user_rating
0 1 20 7.808497
1 3 20 8.000000
2 5 20 6.000000
3 6 20 7.808497
4 10 20 7.808497
Я фильтрую только 999 пользователей по причине вычислений.
Если я скопирую и вставлю весь этот код без "self", он будет работать нормально.
когда я пытаюсь погрузиться в класс
вот так
параметры
`class ALS:
def __init__(self, URM, n_factors, lambda_ , n_iterations) :
self.URM = URM
self.n_factors = n_factors
self.lambda_ = lambda_
self.n_iterations = n_iterations
self.n = max(self.URM["anime_id"])
self.m = max(self.URM["user_id"])
функции для нормализации df, метрик и вычислительной матрицы обучения
def normaliseRow(self, x):
return x / sum(x)
def initialiseMatrix(self, n, n_factors):
A = abs(np.random.randn(self.n, self.n_factors))
return np.apply_along_axis(self.normaliseRow, 1, A)
def ratingsPred(X, Y):
return np.dot(X, Y.T)
def MSE(self, ratingsPred, ratingsMatrix):
idx = ratingsMatrix > 0
return sum((ratingsPred[idx] - ratingsMatrix[idx]) ** 2) / np.count_nonzero(ratingsMatrix)
def compute_matrix(self) :
Y = self.initialiseMatrix(self.n, self.n_factors)
X = self.initialiseMatrix(self.m, self.n_factors)
temp = np.zeros((n, 3)) #user_id, anime_id rating_user
for i in range(1,n):
temp[i,] = [m+1,i,0]
COL_NAME = ["user_id","anime_id", "user_rating"]
RM = self.URM.append(pd.DataFrame(temp, columns =COL_NAME))
RM = RM.pivot_table(columns=['anime_id'], index =['user_id'],
values='user_rating', dropna = False)
RM = RM.fillna(0).as_matrix()
ratingsMatrix = RM[0:self.m,0:self.n]
nonZero = ratingsMatrix > 0
reg = lambda_ * np.eye(n_factors,n_factors)
return X, Y, ratingsMatrix, nonZero, reg
тренировка чередующихся наименьших квадратов
def train(self) :
X, Y, ratingsMatrix, nonZero, reg = self.compute_matrix()
print("start training ...")
training_process = []
for k in range(1, self.n_iterations):
for i in range(1, self.m):
idx = nonZero[i,:]
a = Y[idx,]
b = np.dot(np.transpose(Y[idx,]), ratingsMatrix[i, idx])
updateX = np.linalg.solve(np.dot(np.transpose(a), a) + reg, b)
X[i,] = updateX
for j in range(1, self.n):
idx = nonZero[:,j]
a = X[idx,]
b = np.dot(np.transpose(X[idx,]), ratingsMatrix[idx, j])
updateY = np.linalg.solve(np.dot(np.transpose(a), a) + reg, b)
Y[j,] = updateY
ratingsP = self.ratingsPred(X, Y)
mse = self.MSE(ratingsP, ratingsMatrix)
training_process.append((k, mse))
if (k+1) % 5 == 0:
print("Iteration: %d ; mse = %.4f" % (k+1, mse))
return training_process
начать тренировку
# df, n_factors, lambda, iteration
als_model = ALS(urm, 15, 0.1, 10) `
als_mode.train()
возвращает IndexError
---------------------------------------------------------------------------
IndexError Traceback (most recent call last)
<ipython-input-130-ce2849580784> in <module>()
----> 1 als_model.train()
<ipython-input-129-14f3d633244d> in train(self)
53 for j in range(1, self.n):
54 idx = nonZero[:,j]
---> 55 a = X[idx,]
56 b = np.dot(np.transpose(X[idx,]), ratingsMatrix[idx, j])
57 updateY = np.linalg.solve(np.dot(np.transpose(a), a) + reg, b)
IndexError: boolean index did not match indexed array along dimension 0; dimension is 34240 but corresponding boolean dimension is 999