Это может быть реализовано следующим образом:
lr = 0.01
with tf.name_scope('optimizer'):
vars_ = tf.trainable_variables()
grads = tf.gradients(loss_tensor, vars_)
assign_ops = [tf.assign(v, (v - lr*g)) for g, v in zip(grads, vars_)]
with tf.control_dependencies(assign_ops):
vars_norms = [tf.sqrt(2*tf.nn.l2_loss(v)) for v in vars_]
# next line prevents division by zero
vars_norms = [tf.clip_by_value(n, 0.00001, np.inf) for n in vars_norms]
update_ops = [tf.assign(v, v/n) for v, n in zip(vars_, vars_norms)]
update_op = tf.group(update_ops)
Обратите внимание, что если я добавил tf.clip_by_value()
для предотвращения деления на ноль.
Вот полный пример использования:
import tensorflow as tf
import numpy as np
x = tf.placeholder(tf.float32, shape=(None, 2))
y = tf.placeholder(tf.int32, shape=(None))
logits = tf.layers.dense(x, 2)
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=y, logits=logits)
loss_tensor = tf.reduce_mean(xentropy)
lr = 0.01
with tf.name_scope('optimizer'):
vars_ = tf.trainable_variables()
grads = tf.gradients(loss_tensor, vars_)
assign_ops = [tf.assign(v, (v - lr*g)) for g, v in zip(grads, vars_)]
with tf.control_dependencies(assign_ops):
vars_norms = [tf.sqrt(2*tf.nn.l2_loss(v)) for v in vars_]
# next line prevents division by zero
vars_norms = [tf.clip_by_value(n, 0.00001, np.inf) for n in vars_norms]
update_ops = [tf.assign(v, v/n) for v, n in zip(vars_, vars_norms)]
update_op = tf.group(update_ops)
# dummy data for illustration
x_train = np.random.normal(size=(10, 2))
x_train = np.vstack([x_train, 2*np.random.normal(size=(10, 2))])
y_train = [0 for _ in range(10)] + [1 for _ in range(10)]
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for epoch in range(10):
loss, _ = sess.run([loss_tensor, update_op], feed_dict={x:x_train, y:y_train})
print(loss)
# 0.7111398
# 0.7172677
# 0.71517026
# 0.713101
# 0.71105987
# 0.7090467
# 0.70706147
# 0.7051038
# 0.7031738
# 0.7012712