Я реализовывал регуляризацию L1 с помощью pytorch для выбора функций и обнаружил, что у меня другие результаты по сравнению со Sklearn или cvxpy. Возможно, я неправильно реализую nn.L1Loss или, возможно, есть лучший способ оптимизации (я пробовал и Адам, и SGD с несколькими разными lr)?
import numpy as np
from tqdm import tqdm_notebook
import matplotlib.pyplot as plt
import cvxpy as cp
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
import torch
import torch.nn as nn
import torch.optim as optim
# generate data
X,y, coef_true = make_regression(n_samples=200, n_features=10000, n_informative=10,
coef = True, random_state = 123)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25)
print(np.where(coef_true != 0)[0])
# [ 893 4422 4428 5284 5632 5975 6388 7586 8270 9597]
Используя sklearn, я получаю правильный ответ:
# sklearn lasso
lasso_sklearn = Lasso(alpha = 0.2, warm_start = True)
lasso_sklearn.coef_ = np.zeros(X_train.shape[1])
lasso_sklearn.fit(X_train, y_train)
coef_sklearn = lasso_sklearn.coef_
print(np.where(lasso_sklearn.coef_ != 0)[0])
# [ 893 4422 4428 5284 5632 5975 6388 7586 8270 9597]
Используя pytorch, я получаю этот ответ:
# pytorch lasso
class lasso(nn.Module):
def __init__(self, in_dim,):
super(lasso, self).__init__()
self.linear = nn.Linear(in_dim,1)
def forward(self, X):
return self.linear(X)
def weights_init(m):
if isinstance(m, nn.Linear):
torch.nn.init.zeros_(m.weight)
lasso_pytorch = lasso(X_train.shape[1])
lasso_pytorch.apply(weights_init)
l1_loss = nn.L1Loss(reduction = 'sum')
mse_loss = nn.MSELoss()
optimizer = optim.Adam(lasso_pytorch.parameters(), lr = 0.0001)
alpha = 0.20
n_epoch = 5000
loss_history = []
lasso_pytorch.train()
for epoch in tqdm_notebook(range(n_epoch)):
optimizer.zero_grad()
outputs = lasso_pytorch(torch.from_numpy(X_train).float(),)
loss = 0.5 * mse_loss(outputs, torch.from_numpy(y_train.reshape(-1,1)).float())
p = 0
for param in lasso_pytorch.parameters():
loss += alpha * l1_loss(param, torch.zeros_like(param))
loss_history.append(loss)
loss.backward()
optimizer.step()
coef_pytorch = np.array(lasso_pytorch.linear.weight.data).squeeze()
print(np.argsort(-np.abs(coef_pytorch))[:10])
# [5141 2251 902 2848 5002 8925 9328 8084 1888 2208]
Используя cvxpy, я также получаю правильный ответ:
# cvxpy lasso
def loss_fn(X, Y, beta):
return cp.norm2(cp.matmul(X, beta) - Y)**2
def regularizer(beta,):
return cp.norm1(beta)
def objective_fn(X, Y, beta, alpha):
return 0.5/(len(X)) * loss_fn(X, Y, beta) + alpha * regularizer(beta) # from sklearn lasso
coef = cp.Variable(X_train.shape[1])
coef.value = np.zeros(X_train.shape[1])
alpha = cp.Parameter(nonneg=True)
alpha.value = 0.2
problem = cp.Problem(cp.Minimize(objective_fn(X, y, coef, alpha,)))
problem.solve(solver = cp.ECOS, warm_start = True,)
coef_cvxpy = coef.value
print(np.where(coef.value > 0.01)[0])
# [ 893, 4422, 4428, 5284, 5632, 5975, 6388, 7586, 8270, 9597]