Я не могу понять, почему веса не похожи на нормальное распределение.
на самом деле я хочу понять, что происходит во время изменения веса ичто происходит для градиентов.но проблема в том, что веса в гистограмме не похожи на нормальное распределение.
ниже вы можете найти код:
iris_data_set = pd.read_csv('iris.csv')
iris_data_set.head()
cols_to_norm = ['Sepal.Length' , 'Sepal.Width' , 'Petal.Length' ,
'Petal.Width']
iris_data_set[cols_to_norm] = iris_data_set[cols_to_norm].apply(lambda x:(x-
x.min()) / (x.max() - x.min()))
feat_data = iris_data_set.drop('Species', axis=1 )
label = iris_data_set['Species']
X_train, X_test, y_train, y_test = train_test_split(feat_data , label,
test_size = 0.3 , random_state =101)
y_train = pd.get_dummies(y_train)
y_test = pd.get_dummies(y_test)
n_features = 4
n_dense_neurons = 3
n_output = 3
training_steps =1000
#tf Graph input
X_data = tf.placeholder(tf.float32 , shape= [None , n_features],
name='Inputdata')
y_target = tf.placeholder(tf.float32 , shape= [None , n_output],
name='Labeldata')
#Store layers
weights = {
'w1': tf.Variable(tf.random_normal(shape=[n_features , n_dense_neurons]) ,
name = 'w1'), # Inputs -> Hidden Layer
'w2': tf.Variable(tf.random_normal(shape=[n_dense_neurons , n_output]) ,
name = 'w2')
}
biases = {
'b1': tf.Variable(tf.random_normal(shape=[n_dense_neurons]) ,
name='b1'), # First Bias
'b2': tf.Variable(tf.random_normal(shape=[n_output]) , name='b2')
}
def multilayer_perceptron(x, weights, biases):
# Hidden layer with RELU activation
layer_1 = tf.add(tf.matmul(X_data , weights['w1']), biases['b1'])
layer_1 = tf.nn.relu(layer_1)
# Create a summary to visualize the first layer ReLU activation
tf.summary.histogram("relu1", layer_1)
# Output layer
out_layer = tf.add(tf.matmul(layer_1, weights['w2']), biases['b2'])
return out_layer
with tf.name_scope('Model'):
pred = multilayer_perceptron(X_data, weights, biases)
with tf.name_scope('Loss'):
final_output = tf.nn.softmax(pred)
deltas = tf.square (final_output - y_target)
loss = tf.reduce_sum (deltas)
with tf.name_scope('SGD'):
optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
grads = tf.gradients(loss, tf.trainable_variables())
grads = list(zip(grads, tf.trainable_variables()))
apply_grads = optimizer.apply_gradients(grads_and_vars=grads)
with tf.name_scope('Accuracy'):
acc = tf.equal(tf.argmax(pred, 1), tf.argmax(y_target, 1))
acc = tf.reduce_mean(tf.cast(acc, tf.float32))
init = tf.global_variables_initializer()
tf.summary.scalar("loss", loss)
tf.summary.scalar("accuracy", acc)
for var in tf.trainable_variables():
tf.summary.histogram(var.name, var)
for grad, var in grads:
tf.summary.histogram(var.name + '/gradient', grad)
# Merge all summaries into a single op
merged_summary_op = tf.summary.merge_all()
with tf.Session() as sess:
sess.run(init)
summary_writer= tf.summary.FileWriter("new6",
for i in range (training_steps):graph=tf.get_default_graph())
_, c, summary = sess.run([apply_grads, loss, merged_summary_op],
feed_dict={X_data: X_train, y_target:
y_train})
if i % 20 == 0:
summary_str = sess.run(merged_summary_op, feed_dict={X_data:
X_train, y_target: y_train})
summary_writer.add_summary(summary_str, i)
summary_writer.flush()