Итерационная версия для производного softmax
import numpy as np
def softmax_grad(s):
# Take the derivative of softmax element w.r.t the each logit which is usually Wi * X
# input s is softmax value of the original input x.
# s.shape = (1, n)
# i.e. s = np.array([0.3, 0.7]), x = np.array([0, 1])
# initialize the 2-D jacobian matrix.
jacobian_m = np.diag(s)
for i in range(len(jacobian_m)):
for j in range(len(jacobian_m)):
if i == j:
jacobian_m[i][j] = s[i] * (1-s[i])
else:
jacobian_m[i][j] = -s[i]*s[j]
return jacobian_m
Векторизованная версия
def softmax_grad(softmax):
# Reshape the 1-d softmax to 2-d so that np.dot will do the matrix multiplication
s = softmax.reshape(-1,1)
return np.diagflat(s) - np.dot(s, s.T)
Ссылка : https://medium.com/@aerinykim/how-to-implement-the-softmax-derivative-independently-from-any-loss-function-ae6d44363a9d