Я пытался подготовить свой собственный начальный повтор snet v2 для перикулярной работы, и проблема заключается в следующем:
Успешное выполнение этапов обучения / этапов проверки, точность очень высокая : Шаги обучения показывают высокую точность
Сохраните вес и загрузите его для проведения тестирования, в то время как найденная классификация полностью не работает. Похоже, вес не обновляется правильно. Тестовые шаги показывают неправильную классификацию
Я так растерялся из-за того, почему это произошло, пожалуйста, помогите мне понять, что происходит, большое спасибо.
Основная функция, как показано ниже:
from __future__ import absolute_import, division, print_function
import tensorflow as tf
import math
import os
import datetime
import sys
# User defined packages
from configuration import IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS, \
EPOCHS, BATCH_SIZE, save_model_root_dir, log_root_dir, GLOBAL_LEARNING_RATE, \
WEIGHT_DECAY, THRESHOLD
from prepare_data import generate_datasets, load_and_preprocess_image
from models import mobilenet_v1, mobilenet_v2, mobilenet_v3_large, mobilenet_v3_small, \
efficientnet, resnext, inception_v4, inception_resnet_v1, inception_resnet_v2, \
se_resnet, squeezenet, densenet, shufflenet_v2, resnet
from models.model_selection import get_model
def print_model_summary(network):
network.build(input_shape=(None, IMAGE_HEIGHT, IMAGE_WIDTH, CHANNELS))
network.summary()
def process_features(features, data_augmentation):
image_raw = features['image_raw'].numpy()
image_tensor_list = []
for image in image_raw:
image_tensor = load_and_preprocess_image(image, data_augmentation=data_augmentation)
image_tensor_list.append(image_tensor)
images = tf.stack(image_tensor_list, axis=0)
labels = features['label'].numpy()
return images, labels
def folder_preparation(job_id, product_id):
# Genearte log file path and precreate log file header
log_dir = log_root_dir + job_id + "/" + product_id + "/"
if not os.path.exists(log_dir):
os.makedirs(log_dir)
file = open(log_dir +"training_result_step" + ".log", "w")
file.write("type\t")
file.write("timestamp\t")
file.write("epoch\t")
file.write("step\t")
file.write("train_accuracy\t")
file.write("predict_labels\t")
file.write("actual_labels\n")
file.close()
file = open(log_dir +"training_result" + ".log","w")
file.write("timestamp\t")
file.write("epoch\t")
file.write("valid accuracy\n")
file.close()
# Generate save model path
save_model_dir = save_model_root_dir + job_id + "/" + product_id + "/"
if not os.path.exists(save_model_dir):
os.makedirs(save_model_dir)
return log_dir, save_model_dir
def main(argv):
# Need the user to provide system argv for job_id and product_id, it is prepared for frontend calling
if len(argv) < 2 or len(argv) > 3:
print("ERROR: Format error, refer to the usage: python test.py job_id product_id")
elif not argv[1].isdigit():
print("ERROR: Format error, job_id must be in int format")
elif not argv[1].isalnum():
print("ERROR: Format error, product_id must be consistent by character or number, without special character")
else:
print("INFO: Start training model " + datetime.datetime.now().strftime("%Y%m%d%H%M%S"))
# GPU settings
gpus = tf.config.list_physical_devices("GPU")
if gpus:
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
# Folder generate for log file and model saving
log_dir, save_model_dir = folder_preparation(argv[1], argv[2])
# get the dataset
train_dataset, valid_dataset, test_dataset, train_count, valid_count, test_count = generate_datasets()
# create model
model = get_model()
print_model_summary(network=model)
# Setup target for validation dataset accuracy, only when the valid_accuracy reachs the threshold the weight can be saved
threshold = THRESHOLD
# define loss calculation
loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
# Tried RMSprop for optimizer, the result is not so good, finetune the optimizer to Adam or Momentum
#optimizer = tf.keras.optimizers.RMSprop(learning_rate = GLOBAL_LEARNING_RATE,
# momentum = MOMENTUM,
# name = 'rms_optimizer')
optimizer = tf.keras.optimizers.Adam(lr = GLOBAL_LEARNING_RATE, decay = WEIGHT_DECAY, name = 'adam_optimizer')
# Define training KPI
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
# Define valid KPI
valid_loss = tf.keras.metrics.Mean(name='valid_loss')
valid_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='valid_accuracy')
# @tf.function
def train(image_batch, label_batch):
with tf.GradientTape() as tape:
predictions = model(image_batch, training=True)
loss = loss_object(y_true=label_batch, y_pred=predictions)
gradients = tape.gradient(loss, model.trainable_variables)
optimizer.apply_gradients(grads_and_vars=zip(gradients, model.trainable_variables))
train_loss.update_state(values=loss)
train_accuracy.update_state(y_true=label_batch, y_pred=predictions)
return predictions.numpy(), tf.math.argmax(predictions, axis =1).numpy()
# @tf.function
def valid(image_batch, label_batch):
predictions = model(image_batch, training=True)
v_loss = loss_object(label_batch, predictions)
valid_loss.update_state(values=v_loss)
valid_accuracy.update_state(y_true=label_batch, y_pred=predictions)
return tf.math.argmax(predictions, axis =1).numpy()
# start training
for epoch in range(EPOCHS):
train_step = 0
#valid_step = 0
for features in train_dataset:
train_step += 1
images, labels = process_features(features, data_augmentation=False)
predictions, predict_labels = train(images, labels)
# Print the info on the screen for developer to monitor training detail
print("Epoch: {}/{}, step: {}/{}, loss: {:.5f}, accuracy: {:.5f}, softmax(logits):{}, "
"predict_label:{}, target_label:{}".format(epoch,
EPOCHS,
train_step,
math.ceil(train_count / BATCH_SIZE),
train_loss.result().numpy(),
train_accuracy.result().numpy(),
predictions,
predict_labels,
labels))
# Record information into the log file
file = open(log_dir +"training_result_step" + ".log", "a")
file.write("train\t")
file.write(datetime.datetime.now().strftime("%Y%m%d%H%M%S") + "\t")
file.write(str(epoch) + "\t")
file.write(str(train_step) + "\t")
file.write(str(train_accuracy.result().numpy()) + "\t")
file.write(str(predict_labels) + "\t")
file.write(str(labels) + "\n")
file.close()
for features in valid_dataset:
#valid_step += 1
valid_images, valid_labels = process_features(features, data_augmentation=False)
predict_labels = valid(valid_images, valid_labels)
#file = open(log_dir +"training_result_step" + ".log", "a")
#file.write("validation\t")
#file.write(datetime.datetime.now().strftime("%Y%m%d%H%M%S") + "\t")
#file.write(str(epoch) + "\t")
#file.write(str(valid_step) + "\t")
#file.write(str(valid_accuracy.result().numpy()) + "\t")
#file.write(str(predict_labels) + "\t")
#file.write(str(labels) + "\n")
#file.close()
# Print the info on the screen for developer to monitor validation result
print("Epoch: {}/{}, train loss: {:.5f}, train accuracy: {:.5f}, "
"valid loss: {:.5f}, valid accuracy: {:.5f}".format(epoch,
EPOCHS,
train_loss.result().numpy(),
train_accuracy.result().numpy(),
valid_loss.result().numpy(),
valid_accuracy.result().numpy()))
# Create log file in txt format, easy for pandas to analysis and for best model selection
file = open(log_dir +"training_result" + ".log","a")
file.write(datetime.datetime.now().strftime("%Y%m%d%H%M%S") + "\t")
file.write(str(epoch) + "\t")
file.write(str(valid_accuracy.result().numpy()) + "\n")
file.close()
valid_accuracy_result = valid_accuracy.result().numpy()
train_loss.reset_states()
train_accuracy.reset_states()
valid_loss.reset_states()
valid_accuracy.reset_states()
# Save the weights for evaluation and prediction only when the valid accuracy is higher than threshold and best ever result
if valid_accuracy_result >= threshold:
model.save_weights(filepath=save_model_dir+"model", save_format='tf')
# Threshold update
threshold = valid_accuracy_result
if __name__ == '__main__':
main(sys.argv)
Построение модели:
import tensorflow as tf
from models.inception_modules import Stem, ReductionA, BasicConv2D, Conv2DLinear
from configuration import NUM_CLASSES, DROPOUT_RATIO, L1_REGULIZER, L2_REGULIZER
class InceptionResNetA(tf.keras.layers.Layer):
def __init__(self):
super(InceptionResNetA, self).__init__()
self.b1_conv = BasicConv2D(filters=32,
kernel_size=(1, 1),
strides=1,
padding="same")
self.b2_conv1 = BasicConv2D(filters=32,
kernel_size=(1, 1),
strides=1,
padding="same")
self.b2_conv2 = BasicConv2D(filters=32,
kernel_size=(3, 3),
strides=1,
padding="same")
self.b3_conv1 = BasicConv2D(filters=32,
kernel_size=(1, 1),
strides=1,
padding="same")
self.b3_conv2 = BasicConv2D(filters=48,
kernel_size=(3, 3),
strides=1,
padding="same")
self.b3_conv3 = BasicConv2D(filters=64,
kernel_size=(3, 3),
strides=1,
padding="same")
self.conv = Conv2DLinear(filters=384,
kernel_size=(1, 1),
strides=1,
padding="same")
def call(self, inputs, training=None, **kwargs):
b1 = self.b1_conv(inputs, training=training)
b2 = self.b2_conv1(inputs, training=training)
b2 = self.b2_conv2(b2, training=training)
b3 = self.b3_conv1(inputs, training=training)
b3 = self.b3_conv2(b3, training=training)
b3 = self.b3_conv3(b3, training=training)
x = tf.concat(values=[b1, b2, b3], axis=-1)
x = self.conv(x, training=training)
output = tf.keras.layers.add([x, inputs])
return tf.nn.relu(output)
class InceptionResNetB(tf.keras.layers.Layer):
def __init__(self):
super(InceptionResNetB, self).__init__()
self.b1_conv = BasicConv2D(filters=192,
kernel_size=(1, 1),
strides=1,
padding="same")
self.b2_conv1 = BasicConv2D(filters=128,
kernel_size=(1, 1),
strides=1,
padding="same")
self.b2_conv2 = BasicConv2D(filters=160,
kernel_size=(1, 7),
strides=1,
padding="same")
self.b2_conv3 = BasicConv2D(filters=192,
kernel_size=(7, 1),
strides=1,
padding="same")
self.conv = Conv2DLinear(filters=1152,
kernel_size=(1, 1),
strides=1,
padding="same")
def call(self, inputs, training=None, **kwargs):
b1 = self.b1_conv(inputs, training=training)
b2 = self.b2_conv1(inputs, training=training)
b2 = self.b2_conv2(b2, training=training)
b2 = self.b2_conv3(b2, training=training)
x = tf.concat(values=[b1, b2], axis=-1)
x = self.conv(x, training=training)
output = tf.keras.layers.add([x, inputs])
return tf.nn.relu(output)
class InceptionResNetC(tf.keras.layers.Layer):
def __init__(self):
super(InceptionResNetC, self).__init__()
self.b1_conv = BasicConv2D(filters=192,
kernel_size=(1, 1),
strides=1,
padding="same")
self.b2_conv1 = BasicConv2D(filters=192,
kernel_size=(1, 1),
strides=1,
padding="same")
self.b2_conv2 = BasicConv2D(filters=224,
kernel_size=(1, 3),
strides=1,
padding="same")
self.b2_conv3 = BasicConv2D(filters=256,
kernel_size=(3, 1),
strides=1,
padding="same")
self.conv = Conv2DLinear(filters=2144,
kernel_size=(1, 1),
strides=1,
padding="same")
def call(self, inputs, training=None, **kwargs):
b1 = self.b1_conv(inputs, training=training)
b2 = self.b2_conv1(inputs, training=training)
b2 = self.b2_conv2(b2, training=training)
b2 = self.b2_conv3(b2, training=training)
x = tf.concat(values=[b1, b2], axis=-1)
x = self.conv(x, training=training)
output = tf.keras.layers.add([x, inputs])
return tf.nn.relu(output)
class ReductionB(tf.keras.layers.Layer):
def __init__(self):
super(ReductionB, self).__init__()
self.b1_maxpool = tf.keras.layers.MaxPool2D(pool_size=(3, 3),
strides=2,
padding="valid")
self.b2_conv1 = BasicConv2D(filters=256,
kernel_size=(1, 1),
strides=1,
padding="same")
self.b2_conv2 = BasicConv2D(filters=384,
kernel_size=(3, 3),
strides=2,
padding="valid")
self.b3_conv1 = BasicConv2D(filters=256,
kernel_size=(1, 1),
strides=1,
padding="same")
self.b3_conv2 = BasicConv2D(filters=288,
kernel_size=(3, 3),
strides=2,
padding="valid")
self.b4_conv1 = BasicConv2D(filters=256,
kernel_size=(1, 1),
strides=1,
padding="same")
self.b4_conv2 = BasicConv2D(filters=288,
kernel_size=(3, 3),
strides=1,
padding="same")
self.b4_conv3 = BasicConv2D(filters=320,
kernel_size=(3, 3),
strides=2,
padding="valid")
def call(self, inputs, training=None, **kwargs):
b1 = self.b1_maxpool(inputs)
b2 = self.b2_conv1(inputs, training=training)
b2 = self.b2_conv2(b2, training=training)
b3 = self.b3_conv1(inputs, training=training)
b3 = self.b3_conv2(b3, training=training)
b4 = self.b4_conv1(inputs, training=training)
b4 = self.b4_conv2(b4, training=training)
b4 = self.b4_conv3(b4, training=training)
return tf.concat(values=[b1, b2, b3, b4], axis=-1)
def build_inception_resnet_a(n):
block = tf.keras.Sequential()
for _ in range(n):
block.add(InceptionResNetA())
return block
def build_inception_resnet_b(n):
block = tf.keras.Sequential()
for _ in range(n):
block.add(InceptionResNetB())
return block
def build_inception_resnet_c(n):
block = tf.keras.Sequential()
for _ in range(n):
block.add(InceptionResNetC())
return block
class InceptionResNetV2(tf.keras.Model):
def __init__(self):
super(InceptionResNetV2, self).__init__()
self.stem = Stem()
self.inception_resnet_a = build_inception_resnet_a(5)
self.reduction_a = ReductionA(k=256, l=256, m=384, n=384)
self.inception_resnet_b = build_inception_resnet_b(10)
self.reduction_b = ReductionB()
self.inception_resnet_c = build_inception_resnet_c(5)
self.avgpool = tf.keras.layers.AveragePooling2D(pool_size=(8, 8))
self.dropout = tf.keras.layers.Dropout(rate=DROPOUT_RATIO)
self.flat = tf.keras.layers.Flatten()
self.fc = tf.keras.layers.Dense(units=NUM_CLASSES,
activation=tf.keras.activations.softmax,
kernel_regularizer=tf.keras.regularizers.l1(L1_REGULIZER),
activity_regularizer=tf.keras.regularizers.l2(L2_REGULIZER)
)
def call(self, inputs, training=None, mask=None):
x = self.stem(inputs, training=training)
x = self.inception_resnet_a(x, training=training)
x = self.reduction_a(x, training=training)
x = self.inception_resnet_b(x, training=training)
x = self.reduction_b(x, training=training)
x = self.inception_resnet_c(x, training=training)
x = self.avgpool(x)
x = self.dropout(x, training=training)
x = self.flat(x)
x = self.fc(x)
return x
Гиперпараметры
lr = 0.0001
weight decay = 1e-4
l1 reg = 0.01
l2 reg = 0.01
drop-out rate = 0.2
Сценарий оценки
import tensorflow as tf
import sys
import datetime
import os
from configuration import save_model_root_dir, log_root_dir
from prepare_data import generate_datasets
from train import get_model, process_features
def folder_preparation(job_id, product_id):
# Genearte log file path and precreate log file header
log_dir = log_root_dir + job_id + "/" + product_id + "/"
if not os.path.exists(log_dir):
os.makedirs(log_dir)
file = open(log_dir +"test_result_step" + ".log", "w")
file.write("type\t")
file.write("timestamp\t")
file.write("batch\t")
file.write("test_accuracy\t")
file.write("predict_labels\t")
file.write("actual_labels\n")
file.close()
file = open(log_dir +"test_result" + ".log","w")
file.write("timestamp\t")
file.write("test accuracy\n")
file.close()
# Generate save model path
save_model_dir = save_model_root_dir + job_id + "/" + product_id + "/"
if not os.path.exists(save_model_dir):
os.makedirs(save_model_dir)
return log_dir, save_model_dir
def main(argv):
# Need the user to provide system argv for job_id and product_id, it is prepared for frontend calling
if len(argv) < 2 or len(argv) > 3:
print("ERROR: Format error, refer to the usage: python test.py job_id product_id")
elif not argv[1].isdigit():
print("ERROR: Format error, job_id must be in int format")
elif not argv[1].isalnum():
print("ERROR: Format error, product_id must be consistent by character or number, without special character")
else:
print("INFO: Start evaluating model " + datetime.datetime.now().strftime("%Y%m%d%H%M%S"))
# GPU settings
gpus = tf.config.list_physical_devices('GPU')
if gpus:
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
# Folder generate for log file and model saving
log_dir, save_model_dir = folder_preparation(argv[1], argv[2])
# get the original_dataset
train_dataset, valid_dataset, test_dataset, train_count, valid_count, test_count = generate_datasets()
# load the model
model = get_model()
model.load_weights(filepath="saved_model/model") #Already copied to the folder
# model = tf.saved_model.load(save_model_dir)
# Get the accuracy on the test set
loss_object = tf.keras.metrics.SparseCategoricalCrossentropy()
test_loss = tf.keras.metrics.Mean()
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
# @tf.function
def test_step(images, labels):
predictions = model(images, training=False)
t_loss = loss_object(labels, predictions)
test_loss(t_loss)
test_accuracy(labels, predictions)
return tf.math.argmax(predictions, axis =1).numpy()
batch = 0
for features in test_dataset:
batch += 1
test_images, test_labels = process_features(features, data_augmentation=False)
predict_labels = test_step(test_images, test_labels)
print("loss: {:.5f}, test accuracy: {:.5f}, predict_labels:{}, test_labels:{}".format(test_loss.result(),
test_accuracy.result(),
predict_labels,
test_labels)
)
file = open(log_dir +"test_result_step" + ".log", "a")
file.write("test\t")
file.write(datetime.datetime.now().strftime("%Y%m%d%H%M%S") + "\t")
file.write(str(batch) + "\t")
file.write(str(test_accuracy.result().numpy()) + "\t")
file.write(str(predict_labels) + "\t")
file.write(str(test_labels) + "\n")
file.close()
print("The accuracy on test set is: {:.3f}%".format(test_accuracy.result()*100))
file = open(log_dir +"test_result" + ".log","a")
file.write(datetime.datetime.now().strftime("%Y%m%d%H%M%S") + "\t")
file.write(str(test_accuracy.result()) + "\n")
file.close()
if __name__ == '__main__':
main(sys.argv)