Я использую тензорный поток для простого 3-слойного регрессионного теста c, я получил очень высокую стоимость (около 550) и 100% точность обучения благодаря программе, но использую тот же набор данных и использую общий нейронный анализ. Сеть будет стоить около 0,3 и 94% точности обучения. Обе программы используют последовательность «RELU-> RELU-> Sigmoid-> cross_entropy cost», learning_rate = 0,003, num_epochs = 30000 и не содержат мини-пакет (установив размер минибата_ размера = набор данных), я проверял снова и снова, но понятия не имею почему версия программы tenorflow так дорого стоит и 100% точность обучения также сомнительна. Может кто-нибудь помочь посоветовать, в чем проблема этого. Заранее спасибо. Мой код здесь, ссылка на набор данных находится на OneDrive
import tensorflow.compat.v1 as tf
from tensorflow.python.framework import ops
import scipy.io
import math
import numpy as np
tf.disable_eager_execution()
data = scipy.io.loadmat('datasets/data.mat')
# train_X.shape(2,211)
# train_Y.shape(1,211)
train_X = data['X'].T
train_Y = data['y'].T
print(train_X.shape)
print(train_Y.shape)
layer_dims = [train_X.shape[0], 20, 3, 1]
def create_placeholders(n_x, n_y):
X = tf.placeholder(tf.float32, shape=[n_x, None], name="X")
Y = tf.placeholder(tf.float32, shape=[n_y, None], name="Y")
return X, Y
def print_parameters(parameters):
L = int(len(parameters) / 2) # number of layers in the network
print("The layer structure is " + str(L))
print("The parameters contains " + str(parameters.keys()))
for l in range(1, L + 1):
print('W' + str(l) + " shape = " + str(parameters['W' + str(l)].shape))
print('b' + str(l) + " shape = " + str(parameters['b' + str(l)].shape))
def initialize_parameters(layer_dims):
tf.set_random_seed(1)
L = len(layer_dims)
parameters = {}
for l in range(1, L):
wl = 'W' + str(l)
bl = 'b' + str(l)
parameters[wl] = tf.get_variable(
wl, [layer_dims[l], layer_dims[l - 1]], initializer=tf.random_normal_initializer(stddev=0.1, seed=1))
parameters[bl] = tf.get_variable(
bl, [layer_dims[l], 1], initializer=tf.zeros_initializer())
print_parameters(parameters)
return parameters
def forward_propagation(X, parameters):
# RELU->RELU->...->RELU->Sigmoid
A = X
L = len(parameters) // 2
for l in range(1, L):
A_prev = A
wl = 'W' + str(l)
bl = 'b' + str(l)
Z = tf.add(
tf.matmul(parameters[wl], A_prev), parameters[bl])
A = tf.nn.relu(Z)
WL = 'W' + str(L)
bL = 'b' + str(L)
ZL = tf.add(tf.matmul(parameters[WL], A), parameters[bL])
AL = tf.nn.sigmoid(ZL)
return AL
def compute_cost(AL, Y):
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(
logits=AL, labels=Y))
return cost
def random_mini_batches(X, Y, mini_batch_size=64, seed=0):
m = X.shape[1] # number of training examples
mini_batches = []
np.random.seed(seed)
permutation = list(np.random.permutation(m))
shuffled_X = X[:, permutation]
shuffled_Y = Y[:, permutation].reshape((Y.shape[0], m))
num_complete_minibatches = math.floor(m / mini_batch_size)
for k in range(0, num_complete_minibatches):
mini_batch_X = shuffled_X[:, k * mini_batch_size: k *
mini_batch_size + mini_batch_size]
mini_batch_Y = shuffled_Y[:, k * mini_batch_size: k *
mini_batch_size + mini_batch_size]
mini_batch = (mini_batch_X, mini_batch_Y)
mini_batches.append(mini_batch)
# Handling the end case (last mini-batch < mini_batch_size)
if m % mini_batch_size != 0:
mini_batch_X = shuffled_X[:,
num_complete_minibatches * mini_batch_size: m]
mini_batch_Y = shuffled_Y[:,
num_complete_minibatches * mini_batch_size: m]
mini_batch = (mini_batch_X, mini_batch_Y)
mini_batches.append(mini_batch)
return mini_batches
def model(X_train, Y_train, learning_rate=0.003,
num_epochs=30000, minibatch_size=211, print_cost=True):
ops.reset_default_graph()
tf.set_random_seed(1)
seed = 3 # to keep consistent results
# (n_x: input size, m : number of examples in the train set)
(n_x, m) = X_train.shape
n_y = Y_train.shape[0] # n_y : output size
costs = [] # To keep track of the cost
X, Y = create_placeholders(n_x, n_y)
parameters = initialize_parameters(layer_dims)
AL = forward_propagation(X, parameters)
cost = compute_cost(AL, Y)
optimizer = tf.train.AdamOptimizer(
learning_rate=learning_rate).minimize(cost)
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
print(sess.run(parameters["W1"]))
for epoch in range(num_epochs):
epoch_cost = 0.
num_minibatches = int(m / minibatch_size)
seed = seed + 1
minibatches = random_mini_batches(
X_train, Y_train, minibatch_size, seed)
for minibatch in minibatches:
(minibatch_X, minibatch_Y) = minibatch
_, minibatch_cost = sess.run([optimizer, cost], feed_dict={
X: minibatch_X, Y: minibatch_Y})
epoch_cost += minibatch_cost / num_minibatches
if print_cost == True and epoch % 1000 == 0:
print("Cost after epoch %i: %f" % (epoch, epoch_cost))
if print_cost == True and epoch % 5 == 0:
costs.append(epoch_cost)
parameters = sess.run(parameters)
print("Parameters have been trained!")
correct_prediction = tf.equal(tf.argmax(AL), tf.argmax(Y))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
print("Train Accuracy:", accuracy.eval({X: X_train, Y: Y_train}))
return parameters
parameters = model(train_X, train_Y)