Я пытаюсь обучить эту модель keras-yolov3 из keras-yolov3 в моем настраиваемом наборе данных (который имеет только один класс для «персонажа») с помощью Google Colab.После первых 50 эпох использования полного Yolo с замороженными выходными слоями я получаю ошибку RunOutOfMemory памяти GPU с Tesla T4 GPU.Итак, я попытался обучить модель до первых 50 эпох, затем сохранил вес модели в logs / 000 / train_weights_stage_1.h5, а затем перезагрузил модель с новыми весами.
Это код изtrain.py, который я использовал для обучения первых 50 эпох.
"""
Retrain the YOLO model for your own dataset.
"""
"""
I am removing the checkpointing, since it takes a lot of space and training
Yolov3 model doesn't take much time, just saving end weights
"""
import numpy as np
import keras.backend as K
from keras.layers import Input, Lambda
from keras.models import Model
from keras.optimizers import Adam
from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from yolo3.model import preprocess_true_boxes, yolo_body, tiny_yolo_body, yolo_loss
from yolo3.utils import get_random_data
def _main():
annotation_path = 'model_data/annotations.txt'
log_dir = 'logs/000/'
classes_path = 'model_data/people_tracking_classes.txt'
anchors_path = 'model_data/yolo_anchors.txt'
class_names = get_classes(classes_path)
num_classes = len(class_names)
anchors = get_anchors(anchors_path)
input_shape = (416,416) # multiple of 32, hw
is_tiny_version = len(anchors)==6 # default setting
if is_tiny_version:
model = create_tiny_model(input_shape, anchors, num_classes,
freeze_body=2, weights_path='model_data/tiny_yolo.h5')
else:
model = create_model(input_shape, anchors, num_classes,
freeze_body=2, weights_path='model_data/yolo.h5') # make sure you know what you freeze
logging = TensorBoard(log_dir=log_dir) #SEE THIS
checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
monitor='val_loss', save_weights_only=True, save_best_only=True, period=3, verbose=0)
#verbose=1 isn't that beneficial.
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=1)
early_stopping = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1)
val_split = 0.1
with open(annotation_path) as f:
lines = f.readlines()
np.random.seed(10101)
np.random.shuffle(lines)
print(lines)
np.random.seed(None)
num_val = int(len(lines)*val_split)
num_train = len(lines) - num_val
# Train with frozen layers first, to get a stable loss.
# Adjust num epochs to your dataset. This step is enough to obtain a not bad model.
if True:
model.compile(optimizer=Adam(lr=1e-3), loss={
# use custom yolo_loss Lambda layer.
'yolo_loss': lambda y_true, y_pred: y_pred})
batch_size = 32
print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
history = model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes),
steps_per_epoch=max(1, num_train//batch_size),
validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes),
validation_steps=max(1, num_val//batch_size),
epochs=50,
initial_epoch=0,
callbacks=[logging]) #checkpoint])
model.save_weights(log_dir + 'trained_weights_stage_1.h5')
model.save(log_dir + 'trained_model_stage_1.h5')
json_string = model.to_json()
print('Saved Model Architecture to: {}'.format(json_string))
# Unfreeze and continue training, to fine-tune.
# Train longer if the result is not good.
"""if True:
for i in range(len(model.layers)):
model.layers[i].trainable = True
model.compile(optimizer=Adam(lr=1e-4), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the change
print('Unfreeze all of the layers.')
batch_size = 32 # note that more GPU memory is required after unfreezing the body
print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
#import pdb; pdb.set_trace()
history = model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes),
steps_per_epoch=max(1, num_train//batch_size),
validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes),
validation_steps=max(1, num_val//batch_size),
epochs=100,
initial_epoch=50,
callbacks=[logging, reduce_lr, early_stopping]) #removed checkpointing.
model.save_weights(log_dir + 'trained_weights_final.h5')"""
# Further training if needed.
def get_classes(classes_path):
'''loads the classes'''
with open(classes_path) as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names
def get_anchors(anchors_path):
'''loads the anchors from a file'''
with open(anchors_path) as f:
anchors = f.readline()
anchors = [float(x) for x in anchors.split(',')]
return np.array(anchors).reshape(-1, 2)
def create_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2,
weights_path='model_data/yolo.h5'):
'''create the training model'''
K.clear_session() # get a new session
image_input = Input(shape=(None, None, 3))
h, w = input_shape
num_anchors = len(anchors)
#This I don't know what's happening, can understand the python, but not the
# Logic behind y_true, but seems like 3 output layers of yolov3
y_true = [Input(shape=(h//{0:32, 1:16, 2:8}[l], w//{0:32, 1:16, 2:8}[l], \
num_anchors//3, num_classes+5)) for l in range(3)]
model_body = yolo_body(image_input, num_anchors//3, num_classes)
print('Create YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))
if load_pretrained:
"""model.load_weights(filepath, by_name=False) loads the weights of
the model from a HDF5 file (created by save_weights). By default, the
architecture is expected to be unchanged. To load weights into a different
architecture (with some layers in common), use by_name=True to load only
those layers with the same name.
"""
model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)
#skip_mismatch
print(len(model_body.layers)) # To find out the number of layers, should
# be 185.
print('Load weights {}.'.format(weights_path))
if freeze_body in [1, 2]:
# Freeze darknet53 body or freeze all but 3 output layers.
num = (185, len(model_body.layers)-3)[freeze_body-1]
for i in range(num): model_body.layers[i].trainable = False
print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))
model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5})(
[*model_body.output, *y_true])
model = Model([model_body.input, *y_true], model_loss)
return model
def create_tiny_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2,
weights_path='model_data/tiny_yolo.h5'):
'''create the training model, for Tiny YOLOv3'''
K.clear_session() # get a new session
image_input = Input(shape=(None, None, 3))
h, w = input_shape
num_anchors = len(anchors)
y_true = [Input(shape=(h//{0:32, 1:16}[l], w//{0:32, 1:16}[l], \
num_anchors//2, num_classes+5)) for l in range(2)]
model_body = tiny_yolo_body(image_input, num_anchors//2, num_classes)
print('Create Tiny YOLOv3 model with {} anchors and {} classes.'.format(num_anchors, num_classes))
if load_pretrained:
model_body.load_weights(weights_path, by_name=True, skip_mismatch=True)
print('Load weights {}.'.format(weights_path))
if freeze_body in [1, 2]:
# Freeze the darknet body or freeze all but 2 output layers.
num = (20, len(model_body.layers)-2)[freeze_body-1]
for i in range(num): model_body.layers[i].trainable = False
print('Freeze the first {} layers of total {} layers.'.format(num, len(model_body.layers)))
model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.7})(
[*model_body.output, *y_true])
model = Model([model_body.input, *y_true], model_loss)
return model
def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes):
'''data generator for fit_generator'''
n = len(annotation_lines)
i = 0
while True:
image_data = []
box_data = []
for b in range(batch_size):
if i==0:
np.random.shuffle(annotation_lines)
image, box = get_random_data(annotation_lines[i], input_shape, random=True)
image_data.append(image)
box_data.append(box)
i = (i+1) % n
image_data = np.array(image_data)
box_data = np.array(box_data)
y_true = preprocess_true_boxes(box_data, input_shape, anchors, num_classes)
yield [image_data, *y_true], np.zeros(batch_size)
def data_generator_wrapper(annotation_lines, batch_size, input_shape, anchors, num_classes):
n = len(annotation_lines)
if n==0 or batch_size<=0: return None
return data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes)
if __name__ == '__main__':
_main()
# Explaining the Skipping loading of weights warning below.
"""These layers are the three output layers of the yolo network. You probably
got the message because you changed the number of classes and therefore the
shape of the last conv layers will change aswell. So you can just ignore the
message, the network will work just fine."""
После завершения первых 50 эпох, как я уже говорил, я загрузил тренировочные веса в модель и попытался отрегулировать ее, разморозив все слои.Как можно видеть ниже:
from keras.callbacks import ModelCheckpoint
from keras.optimizers import Adam
import numpy as np
from keras.layers import Input, Lambda
from yolo3.model import preprocess_true_boxes, yolo_body, tiny_yolo_body, yolo_loss
from yolo3.utils import get_random_data
import keras.losses
keras.losses.custom_loss = yolo_loss
log_dir = 'logs/000/'
annotation_path = 'model_data/annotations.txt'
val_split = 0.1
with open(annotation_path) as f:
lines = f.readlines()
np.random.seed(10101)
np.random.shuffle(lines)
print(lines)
np.random.seed(None)
num_val = int(len(lines)*val_split)
num_train = len(lines) - num_val
checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5',
monitor='val_loss', save_weights_only=True, save_best_only=True, period=10, verbose=0)
classes_path = 'model_data/new_classes.txt'
def get_classes(classes_path):
'''loads the classes'''
with open(classes_path) as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names
class_names = get_classes(classes_path)
num_classes = len(class_names)
from yolo3.model import yolo_head, yolo_body
from keras.layers import Input, Lambda
from keras.utils import CustomObjectScope
model = yolo_body(Input(shape=(None, None, 3)), 3, num_classes)
model.load_weights('logs/000/trained_weights_stage_1.h5', by_name=True)
# Unfreeze and continue training, to fine-tune.
# Train longer if the result is not good.
if True:
for i in range(len(model.layers)):
model.layers[i].trainable = True
model.compile(optimizer=Adam(lr=1e-4), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the change
print('Unfreeze all of the layers.')
batch_size = 32 # note that more GPU memory is required after unfreezing the body
print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
#import pdb; pdb.set_trace()
history = model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes),
steps_per_epoch=max(1, num_train//batch_size),
validation_data=data_generator_wrapper(lines[num_train:], batch_size, input_shape, anchors, num_classes),
validation_steps=max(1, num_val//batch_size),
epochs=100,
initial_epoch=50,
callbacks=[logging, reduce_lr, early_stopping]) #removed checkpointing.
model.save_weights(log_dir + 'trained_weights_final.h5')
# Further training if needed.`
Я получаю следующее ValueError при запуске кода выше:
ValueError Traceback (most recent call last)
<ipython-input-30-d6473c9875b2> in <module>()
61 for i in range(len(model.layers)):
62 model.layers[i].trainable = True
---> 63 model.compile(optimizer=Adam(lr=1e-4), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) # recompile to apply the change
64 print('Unfreeze all of the layers.')
65
/usr/local/lib/python3.6/dist-packages/keras/engine/training.py in compile(self, optimizer, loss, metrics, loss_weights, sample_weight_mode, weighted_metrics, target_tensors, **kwargs)
117 'dictionary: "' + name + '". '
118 'Only expected the following keys: ' +
--> 119 str(self.output_names))
120 loss_functions = []
121 for name in self.output_names:
ValueError: Unknown entry in loss dictionary: "yolo_loss". Only expected the following keys: ['conv2d_209', 'conv2d_217', 'conv2d_225']
В недоумении, как избавиться от этой ошибки и обучитьполная модель с сохраненными весами из первой части.Кроме того, я попытался загрузить полную модель из model.save('trained_model_stage_1.h5')
и использовать model.load_model('trained_model_stage_1.h5', custom_objects={'yolo_loss' : yolo_loss})
, но это также не работает для меня, и я получаю ошибку yolo_head is not defined.