Я работаю над примером объединения многослойного персептрона с потерями центра для набора данных mnist. Вот полный код.
from __future__ import print_function
# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)
import tensorflow as tf
import os
os.environ['CUDA_VISIBLE_DEVICES'] = ''
# Parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 64
display_step = 1
# Network Parameters
n_hidden_1 = 256 # 1st layer number of neurons
n_hidden_2 = 256 # 2nd layer number of neurons
n_input = 784 # MNIST data input (img shape: 28*28)
n_classes = 10 # MNIST total classes (0-9 digits)
# tf Graph input
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("int32", [None, n_classes])
# Store layers weight & bias
weights = {
'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}
biases = {
'b1': tf.Variable(tf.random_normal([n_hidden_1])),
'b2': tf.Variable(tf.random_normal([n_hidden_2])),
'out': tf.Variable(tf.random_normal([n_classes]))
}
# Create model
def multilayer_perceptron(x):
# Hidden fully connected layer with 256 neurons
layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
# Hidden fully connected layer with 256 neurons
layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
# Output fully connected layer with a neuron for each class
out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
return out_layer
def center_loss(features, label, alfa, nrof_classes):
"""Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
(http://ydwen.github.io/papers/WenECCV16.pdf)
"""
print(features.get_shape)
exit()
nrof_features = features.get_shape()[1]
centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32,
initializer=tf.constant_initializer(0), trainable=False)
label = tf.reshape(label, [-1])
centers_batch = tf.gather(centers, label)
diff = (1 - alfa) * (centers_batch - features)
centers = tf.scatter_sub(centers, label, diff)
loss = tf.reduce_mean(tf.square(features - centers_batch))
return loss, centers
# Construct model
logits = multilayer_perceptron(X)
c_loss, _ = center_loss(logits, Y, 0.9, n_classes)
print(c_loss)
# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
logits=logits, labels=Y))
total_loss = loss_op + c_loss
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(total_loss)
# Initializing the variables
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
# Training cycle
for epoch in range(training_epochs):
avg_cost = 0.
total_batch = int(mnist.train.num_examples / batch_size)
# Loop over all batches
for i in range(total_batch):
batch_x, batch_y = mnist.train.next_batch(batch_size)
# Run optimization op (backprop) and cost op (to get loss value)
_, c = sess.run([train_op, total_loss], feed_dict={X: batch_x,
Y: batch_y})
# Compute average loss
avg_cost += c / total_batch
# Display logs per epoch step
if epoch % display_step == 0:
print("Epoch:", '%04d' % (epoch + 1), "cost={:.9f}".format(avg_cost))
print("Optimization Finished!")
# Test model
pred = tf.nn.softmax(logits) # Apply softmax to logits
correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1))
# Calculate accuracy
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print("Accuracy:", accuracy.eval({X: mnist.test.images, Y: mnist.test.labels}))
Я получил следующую ошибку при комбинировании функций softmax и центральных потерь.
Traceback (most recent call last):
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py", line 1278, in _do_call
return fn(*args)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py", line 1263, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py", line 1350, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [64,10] vs. [640,10]
[[Node: gradients/sub_1_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](gradients/sub_1_grad/Shape, gradients/sub_1_grad/Shape_1)]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/home/super/PycharmProjects/recordo-mlp/mlp.py", line 118, in <module>
Y: batch_y})
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py", line 877, in run
run_metadata_ptr)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py", line 1100, in _run
feed_dict_tensor, options, run_metadata)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py", line 1272, in _do_run
run_metadata)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py", line 1291, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [64,10] vs. [640,10]
[[Node: gradients/sub_1_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](gradients/sub_1_grad/Shape, gradients/sub_1_grad/Shape_1)]]
Caused by op 'gradients/sub_1_grad/BroadcastGradientArgs', defined at:
File "/home/super/PycharmProjects/recordo-mlp/mlp.py", line 101, in <module>
train_op = optimizer.minimize(total_loss)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/optimizer.py", line 400, in minimize
grad_loss=grad_loss)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/optimizer.py", line 514, in compute_gradients
colocate_gradients_with_ops=colocate_gradients_with_ops)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gradients_impl.py", line 596, in gradients
gate_gradients, aggregation_method, stop_gradients)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gradients_impl.py", line 779, in _GradientsHelper
lambda: grad_fn(op, *out_grads))
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gradients_impl.py", line 398, in _MaybeCompile
return grad_fn() # Exit early
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gradients_impl.py", line 779, in <lambda>
lambda: grad_fn(op, *out_grads))
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_grad.py", line 889, in _SubGrad
rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 674, in broadcast_gradient_args
"BroadcastGradientArgs", s0=s0, s1=s1, name=name)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/deprecation.py", line 454, in new_func
return func(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 3155, in create_op
op_def=op_def)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 1717, in __init__
self._traceback = tf_stack.extract_stack()
...which was originally created as op 'sub_1', defined at:
File "/home/super/PycharmProjects/recordo-mlp/mlp.py", line 89, in <module>
c_loss, _ = center_loss(logits, Y, 0.9, n_classes)
File "/home/super/PycharmProjects/recordo-mlp/mlp.py", line 82, in center_loss
loss = tf.reduce_mean(tf.square(features - centers_batch))
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py", line 850, in binary_op_wrapper
return func(x, y, name=name)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_math_ops.py", line 8188, in sub
"Sub", x=x, y=y, name=name)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
op_def=op_def)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/deprecation.py", line 454, in new_func
return func(*args, **kwargs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 3155, in create_op
op_def=op_def)
File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 1717, in __init__
self._traceback = tf_stack.extract_stack()
InvalidArgumentError (see above for traceback): Incompatible shapes: [64,10] vs. [640,10]
[[Node: gradients/sub_1_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](gradients/sub_1_grad/Shape, gradients/sub_1_grad/Shape_1)]]
Размер партии = 64. Я не уверен, каким образом я получил формы [64,10] против [640,10]. Пожалуйста, кто-нибудь из ошибок, чтобы решить эту форму несовпадения