, если я определяю архитектуру нейронной сети, используя только плотные полностью связанные слои, и обучаю их так, что есть две модели, которые обучаются с использованием model.fit () и GradientTape, Оба метода обучения используют одинаковую архитектуру модели.
Случайно инициализированные веса распределяются между двумя моделями, и все другие параметры, такие как оптимизатор, функция потерь и метрики, также одинаковы.
Размеры учебных и тестовых наборов: X_train = (960, 4), y_train = (960,), X_test = (412, 4) & y_test = (412,)
import pandas as pd, numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import tensorflow_model_optimization as tfmot
from tensorflow_model_optimization.sparsity import keras as sparsity
def create_nn():
"""
Function to create a
Neural Network
"""
model = Sequential()
model.add(
Dense(
units = 4, activation = 'relu',
kernel_initializer = tf.keras.initializers.GlorotNormal(),
input_shape = (4,)
)
)
model.add(
Dense(
units = 3, activation = 'relu',
kernel_initializer = tf.keras.initializers.GlorotNormal()
)
)
model.add(
Dense(
units = 1, activation = 'sigmoid'
)
)
"""
# Compile the defined NN model above-
model.compile(
loss = 'binary_crossentropy', # loss = 'categorical_crossentropy'
optimizer = tf.keras.optimizers.Adam(lr = 0.001),
metrics=['accuracy']
)
"""
return model
# Instantiate a model- model = create_nn()
# Save weights for fair comparison- model.save_weights("Random_Weights.h5", overwrite=True)
# Create datasets to be used for GradientTape-
# Use tf.data to batch and shuffle the dataset train_ds = tf.data.Dataset.from_tensor_slices(
(X_train, y_train)).shuffle(100).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices(
(X_test, y_test)).shuffle(100).batch(32)
# Define early stopping- callback = tf.keras.callbacks.EarlyStopping(
monitor='val_loss', patience=3,
min_delta = 0.001, mode = 'min' )
# Train defined model- history_orig = model.fit(
x = X_train, y = y_train,
batch_size = 32, epochs = 500,
validation_data = (X_test, y_test),
callbacks = [callback],
verbose = 1 )
# Instantiate a model- model_gt = create_nn()
# Restore random weights as used by the previous model for fair comparison- model_gt.load_weights("Random_Weights.h5")
# Choose an optimizer and loss function for training- loss_fn = tf.keras.losses.BinaryCrossentropy() optimizer = tf.keras.optimizers.Adam(lr = 0.001)
# Select metrics to measure the error & accuracy of model.
# These metrics accumulate the values over epochs and then
# print the overall result- train_loss = tf.keras.metrics.Mean(name = 'train_loss') train_accuracy = tf.keras.metrics.BinaryAccuracy(name = 'train_accuracy')
test_loss = tf.keras.metrics.Mean(name = 'test_loss') test_accuracy = tf.keras.metrics.BinaryAccuracy(name = 'train_accuracy')
# Use tf.GradientTape to train the model-
@tf.function def train_step(data, labels):
"""
Function to perform one step of Gradient
Descent optimization
"""
with tf.GradientTape() as tape:
predictions = model_gt(data)
loss = loss_fn(labels, predictions)
gradients = tape.gradient(loss, model_gt.trainable_variables)
optimizer.apply_gradients(zip(gradients, model_gt.trainable_variables))
train_loss(loss)
train_accuracy(labels, predictions)
@tf.function def test_step(data, labels):
"""
Function to test model performance
on testing dataset
"""
predictions = model_gt(data)
t_loss = loss_fn(labels, predictions)
test_loss(t_loss)
test_accuracy(labels, predictions)
EPOCHS = 100
# User input- minimum_delta = 0.001 patience = 3
patience_val = np.zeros(patience)
# Dictionary to hold scalar metrics- history = {}
history['accuracy'] = np.zeros(EPOCHS) history['val_accuracy'] = np.zeros(EPOCHS) history['loss'] = np.zeros(EPOCHS) history['val_loss'] = np.zeros(EPOCHS)
for epoch in range(EPOCHS):
# Reset the metrics at the start of the next epoch
train_loss.reset_states()
train_accuracy.reset_states()
test_loss.reset_states()
test_accuracy.reset_states()
for x, y in train_ds:
train_step(x, y)
for x_t, y_t in test_ds:
test_step(x_t, y_t)
template = 'Epoch {0}, Loss: {1:.4f}, Accuracy: {2:.4f}, Test Loss: {3:.4f}, Test Accuracy: {4:4f}'
history['accuracy'][epoch] = train_accuracy.result()
history['loss'][epoch] = train_loss.result()
history['val_loss'][epoch] = test_loss.result()
history['val_accuracy'][epoch] = test_accuracy.result()
print(template.format(epoch + 1,
train_loss.result(), train_accuracy.result()*100,
test_loss.result(), test_accuracy.result()*100))
if epoch > 2:
# Computes absolute differences between 3 consecutive loss values-
differences = np.abs(np.diff(history['val_loss'][epoch - 3:epoch], n = 1))
# Checks whether the absolute differences is greater than 'minimum_delta'-
check = differences > minimum_delta
# print('differences: {0}'.format(differences))
# Count unique element with it's counts-
# elem, count = np.unique(check, return_counts=True)
# print('\nelem = {0}, count = {1}'.format(elem, count))
if np.all(check == False):
# if elem.all() == False and count == 2:
print("\n\nEarlyStopping Evoked! Stopping training\n\n")
break
В методе "model.fit ()" это занимает около 82 эпох, а метод GradientTape - 52 эпохи.
Почему существует такое расхождение в количестве эпох?
Спасибо!