Многослойный перцептрон «Несовместимые формы» - PullRequest
0 голосов
/ 18 июня 2019

Я работаю над примером объединения многослойного персептрона с потерями центра для набора данных mnist. Вот полный код.

from __future__ import print_function

# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets("/tmp/data/", one_hot=True)

import tensorflow as tf
import os

os.environ['CUDA_VISIBLE_DEVICES'] = ''

# Parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 64
display_step = 1

# Network Parameters
n_hidden_1 = 256  # 1st layer number of neurons
n_hidden_2 = 256  # 2nd layer number of neurons
n_input = 784  # MNIST data input (img shape: 28*28)
n_classes = 10  # MNIST total classes (0-9 digits)

# tf Graph input
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("int32", [None, n_classes])

# Store layers weight & bias
weights = {
    'h1': tf.Variable(tf.random_normal([n_input, n_hidden_1])),
    'h2': tf.Variable(tf.random_normal([n_hidden_1, n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_hidden_2, n_classes]))
}
biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden_1])),
    'b2': tf.Variable(tf.random_normal([n_hidden_2])),
    'out': tf.Variable(tf.random_normal([n_classes]))
}


# Create model
def multilayer_perceptron(x):
    # Hidden fully connected layer with 256 neurons
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    # Hidden fully connected layer with 256 neurons
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    # Output fully connected layer with a neuron for each class
    out_layer = tf.matmul(layer_2, weights['out']) + biases['out']
    return out_layer


def center_loss(features, label, alfa, nrof_classes):
    """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
       (http://ydwen.github.io/papers/WenECCV16.pdf)
    """
    print(features.get_shape)
    exit()
    nrof_features = features.get_shape()[1]
    centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32,
                              initializer=tf.constant_initializer(0), trainable=False)
    label = tf.reshape(label, [-1])
    centers_batch = tf.gather(centers, label)
    diff = (1 - alfa) * (centers_batch - features)
    centers = tf.scatter_sub(centers, label, diff)
    loss = tf.reduce_mean(tf.square(features - centers_batch))
    return loss, centers


# Construct model
logits = multilayer_perceptron(X)

c_loss, _ = center_loss(logits, Y, 0.9, n_classes)
print(c_loss)

# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=logits, labels=Y))


total_loss = loss_op + c_loss


optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(total_loss)
# Initializing the variables
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)

    # Training cycle
    for epoch in range(training_epochs):
        avg_cost = 0.
        total_batch = int(mnist.train.num_examples / batch_size)

        # Loop over all batches
        for i in range(total_batch):
            batch_x, batch_y = mnist.train.next_batch(batch_size)
            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([train_op, total_loss], feed_dict={X: batch_x,
                                                            Y: batch_y})
            # Compute average loss
            avg_cost += c / total_batch
        # Display logs per epoch step
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch + 1), "cost={:.9f}".format(avg_cost))
    print("Optimization Finished!")

    # Test model
    pred = tf.nn.softmax(logits)  # Apply softmax to logits
    correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(Y, 1))
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
    print("Accuracy:", accuracy.eval({X: mnist.test.images, Y: mnist.test.labels}))

Я получил следующую ошибку при комбинировании функций softmax и центральных потерь.

Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py", line 1278, in _do_call
    return fn(*args)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py", line 1263, in _run_fn
    options, feed_dict, fetch_list, target_list, run_metadata)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py", line 1350, in _call_tf_sessionrun
    run_metadata)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [64,10] vs. [640,10]
     [[Node: gradients/sub_1_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](gradients/sub_1_grad/Shape, gradients/sub_1_grad/Shape_1)]]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/super/PycharmProjects/recordo-mlp/mlp.py", line 118, in <module>
    Y: batch_y})
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py", line 877, in run
    run_metadata_ptr)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py", line 1100, in _run
    feed_dict_tensor, options, run_metadata)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py", line 1272, in _do_run
    run_metadata)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/client/session.py", line 1291, in _do_call
    raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Incompatible shapes: [64,10] vs. [640,10]
     [[Node: gradients/sub_1_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](gradients/sub_1_grad/Shape, gradients/sub_1_grad/Shape_1)]]

Caused by op 'gradients/sub_1_grad/BroadcastGradientArgs', defined at:
  File "/home/super/PycharmProjects/recordo-mlp/mlp.py", line 101, in <module>
    train_op = optimizer.minimize(total_loss)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/optimizer.py", line 400, in minimize
    grad_loss=grad_loss)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/training/optimizer.py", line 514, in compute_gradients
    colocate_gradients_with_ops=colocate_gradients_with_ops)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gradients_impl.py", line 596, in gradients
    gate_gradients, aggregation_method, stop_gradients)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gradients_impl.py", line 779, in _GradientsHelper
    lambda: grad_fn(op, *out_grads))
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gradients_impl.py", line 398, in _MaybeCompile
    return grad_fn()  # Exit early
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gradients_impl.py", line 779, in <lambda>
    lambda: grad_fn(op, *out_grads))
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_grad.py", line 889, in _SubGrad
    rx, ry = gen_array_ops.broadcast_gradient_args(sx, sy)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_array_ops.py", line 674, in broadcast_gradient_args
    "BroadcastGradientArgs", s0=s0, s1=s1, name=name)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/deprecation.py", line 454, in new_func
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 3155, in create_op
    op_def=op_def)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 1717, in __init__
    self._traceback = tf_stack.extract_stack()

...which was originally created as op 'sub_1', defined at:
  File "/home/super/PycharmProjects/recordo-mlp/mlp.py", line 89, in <module>
    c_loss, _ = center_loss(logits, Y, 0.9, n_classes)
  File "/home/super/PycharmProjects/recordo-mlp/mlp.py", line 82, in center_loss
    loss = tf.reduce_mean(tf.square(features - centers_batch))
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_ops.py", line 850, in binary_op_wrapper
    return func(x, y, name=name)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/gen_math_ops.py", line 8188, in sub
    "Sub", x=x, y=y, name=name)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/util/deprecation.py", line 454, in new_func
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 3155, in create_op
    op_def=op_def)
  File "/usr/local/lib/python3.6/dist-packages/tensorflow/python/framework/ops.py", line 1717, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): Incompatible shapes: [64,10] vs. [640,10]
     [[Node: gradients/sub_1_grad/BroadcastGradientArgs = BroadcastGradientArgs[T=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](gradients/sub_1_grad/Shape, gradients/sub_1_grad/Shape_1)]]

Размер партии = 64. Я не уверен, каким образом я получил формы [64,10] против [640,10]. Пожалуйста, кто-нибудь из ошибок, чтобы решить эту форму несовпадения

Добро пожаловать на сайт PullRequest, где вы можете задавать вопросы и получать ответы от других членов сообщества.
...