Я реализую пороговую регрессию с функцией штрафа SCAD.Я написал целевую функцию и градиент целевой функции, но она не может быть успешно оптимизирована и завершена в optmize.Пожалуйста, помогите мне посмотреть, мне однажды сказали использовать метод оптимизации 'nelder-mead', но этот метод не учитывает градиент, хотя он может быть успешно оптимизирован, но изображение совершенно неверно
import pandas as pd
import scipy as sc
from scipy import stats as st
import numpy as np
from scipy import optimize as opt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import pyplot as plt
def loss_SCAD(beta_matrix):
M_MSE = np.dot(x, beta_matrix[:tmp_lt])[:, None]
M_c = np.array(beta_matrix[-M:], ndmin=1)
scad = 0
for idx, c in enumerate(M_c):
nor_distri_x = (x[:, 1][:, None] - c) / tmp_h
nor_distri_CDF = st.norm.cdf(nor_distri_x) # 光滑示性函数
M_MSE += np.dot(x, beta_matrix[tmp_lt * (idx + 1):tmp_lt * (idx + 2)])[:, None] * nor_distri_CDF
beta_param = np.sum(np.abs(beta_matrix[tmp_lt * (idx + 1):tmp_lt * (idx + 2)]))
if beta_param > a * tmp_lam:
scad += 0.5 * ((a + 1) * tmp_lam ** 2)
elif beta_param > tmp_lam:
scad += ((a ** 2 - 1) * tmp_lam ** 2 - (beta_param - a * tmp_lam) ** 2) / (2 * (a - 1))
else:
scad += tmp_lam * beta_param
MSE = 0.5 * np.average((y - M_MSE) ** 2) + scad
return MSE
def loss_derivative(beta_matrix):
deriv_list = []
other_M_MSE, nor_distri_CDF = 0, 0
first_M_MSE = y - np.dot(x, beta_matrix[:tmp_lt])[:, None]
M_c = np.array(beta_matrix[-M:], ndmin=1)
for idx, c in enumerate(M_c):
nor_distri_CDF = st.norm.cdf((x[:, 1][:, None] - c) / tmp_h) # 光滑示性函数
other_M_MSE += np.dot(x, beta_matrix[tmp_lt * (idx + 1):tmp_lt * (idx + 2)])[:, None] * nor_distri_CDF
square_part = first_M_MSE - other_M_MSE
for idx, beta_param in enumerate(beta_matrix[:tmp_lt]): # 更新梯度:第一段
deriv_list.append(np.average(square_part * -x[:, idx][:, None]))
for idx, c in enumerate(M_c):
beta_param = np.sum(np.abs(beta_matrix[tmp_lt * (idx + 1):tmp_lt * (idx + 2)]))
if beta_param == 0: # 更新梯度:SCAD函数
scad = 0
elif beta_param > tmp_lam:
scad = np.max(((a * tmp_lam - beta_param) / (a - 1), 0))
else:
scad = tmp_lam
for i in range(tmp_lt): # 更新梯度:分段
deriv_list.append(np.average(square_part * -x[:, i][:, None] * nor_distri_CDF) + scad)
for idx, c in enumerate(M_c): # 更新梯度:门槛参数
nor_distri_PDF = st.norm.pdf((x[:, 1][:, None] - c) / tmp_h)
deriv_list.append(np.average(square_part * np.dot(x, beta_matrix[tmp_lt * (idx + 1):tmp_lt * (idx + 2)])[:, None] * nor_distri_PDF / tmp_h))
return np.array(deriv_list)
def loss(beta_matrix):
M_MSE = np.dot(x, beta_matrix[:tmp_lt])[:, None]
M_c = np.array(beta_matrix[-M:], ndmin=1)
for idx, c in enumerate(M_c):
nor_distri_x = (x[:, 1][:, None] - c) / tmp_h
nor_distri_CDF = st.norm.cdf(nor_distri_x) # 光滑示性函数
M_MSE += np.dot(x, beta_matrix[tmp_lt * (idx + 1):tmp_lt * (idx + 2)])[:, None] * nor_distri_CDF
MSE = 0.5 * np.average((y - M_MSE) ** 2)
return MSE
np.random.seed(1171359)
a = 3.7
tmp_x = np.random.normal(loc=0, scale=2, size=(400, 1))
tmp_beta = [2, -2, 5, 2, 9, 7]
tmp_c = [-np.sqrt(2), np.sqrt(2)]
M = np.shape(tmp_c)[0]
x1, x2, x3 = tmp_x[tmp_x < tmp_c[0]], tmp_x[(tmp_x >= tmp_c[0]) & (tmp_x < tmp_c[1])], tmp_x[tmp_x >= tmp_c[1]]
y1, y2, y3 = tmp_beta[0] + tmp_beta[1] * x1, tmp_beta[2] + tmp_beta[3] * x2, tmp_beta[4] + tmp_beta[5] * x3
x = np.reshape(np.concatenate((x1, x2, x3)), (-1, 1))
y = np.reshape(np.concatenate((y1, y2, y3)) + np.random.randn(400), (-1, 1))
x = np.concatenate((np.ones_like(x), x), axis=1)
tmp_lt = x.shape[1]
tmp_h = np.log(400) / 400 * tmp_x.std(ddof=1)
tmp_lam = 0.5
x0 = np.append(np.ones(7), 2)
# tmp_res = opt.minimize(fun=loss, x0=x0)
tmp_result = opt.minimize(fun=loss_SCAD, x0=x0, jac=loss_derivative, )
# kk = tmp_res.x[:-2].reshape((2, -1), order='F')
k = tmp_result.x[:-2].reshape((2, -1), order='F')
y_hat_1 = np.dot(x[x[:, 1] < tmp_result.x[-2]], k[:, 0].reshape((-1, 1)))
y_hat_2 = np.dot(x[(x[:, 1] >= tmp_result.x[-2]) & (x[:, 1] < tmp_result.x[-1])], k[:, 0].reshape((-1, 1)) + k[:, 1].reshape((-1, 1)))
y_hat_3 = np.dot(x[x[:, 1] >= tmp_result.x[-1]], k[:, 0].reshape((-1, 1)) + k[:, 1].reshape((-1, 1)) + k[:, 2].reshape((-1, 1)))
print(tmp_result, sep='\n')
fig = plt.figure(111, figsize=(6, 6))
plt.plot(x[:, 1][x[:, 1] < tmp_result.x[-2]], y_hat_1, c='orange', alpha=0.6, linewidth=4)
plt.plot(x[:, 1][(x[:, 1] >= tmp_result.x[-2]) & (x[:, 1] < tmp_result.x[-1])], y_hat_2, c='blue', alpha=0.6, linewidth=4)
plt.plot(x[:, 1][x[:, 1] >= tmp_result.x[-1]], y_hat_3, c='green', alpha=0.6, linewidth=4)
plt.plot(x1, y1, c='black', alpha=0.6, linewidth=4)
plt.plot(x2, y2, c='black', alpha=0.6, linewidth=4)
plt.plot(x3, y3, c='black', alpha=0.6, linewidth=4)
plt.scatter(x[:, 1], y, s=12, alpha=0.5, c='gray')
plt.axis([tmp_x.min() * 1.05, tmp_x.max() * 1.05, y.min() * 1.05, y.max() * 1.05])
plt.show()