Исправлена ​​потеря при обучении Keras для номерных знаков переменной длины (CNN, GRU, CTC Loss) - PullRequest
0 голосов
/ 29 марта 2019

Проект, над которым я работаю, включает обучение сетей глубокого обучения распознаванию марокканских номерных знаков переменной длины (количество символов от 5 до 8) , насколько я могучтобы обнаружить их и обрезать их с помощью Tensorflow Object Detection API, теперь я хочу распознать текст внутри ограничительных рамок.Поэтому я выбрал Keras Functional API.

. После исправления некоторых проблем я могу начать тренировочный процесс с использованием Keras, но получаю 0,0556 потерь.

Вот код main.py ( original ):

import os
import codecs
import cv2
import numpy as np
from keras import backend as K
from keras.layers import Input, Dense, Activation, Conv2D, Reshape
from keras.layers import BatchNormalization, Lambda, MaxPooling2D, Dropout
from keras.layers.merge import add, concatenate
from keras.callbacks import EarlyStopping,Callback
from keras.layers.recurrent import GRU
from keras.models import Model
from keras import optimizers 
from keras.callbacks import ModelCheckpoint, TensorBoard
from keras.models import load_model
from keras.utils.vis_utils import plot_model
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session

CHARS = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
         'A', 'B', 'J', 'D', 'H', 'O', 'W'
        ]
CHARS_DICT = {char:i for i, char in enumerate(CHARS)}
NUM_CHARS = len(CHARS)
# The GPU used 1060, and is not selected will automatically call the set display
# os.environ['CUDA_VISIBLE_DEVICES'] = '1'
#Dynamic application memory
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
set_session(tf.Session(config=config))


#The necessary parameters
num_channels = 3
ti = '../car_pic/image/train/' #Training picture directory
tl = '../car_pic/image/train_labels.txt' #Training label file
vi = '../car_pic/image/val/'  #Verify image directory
vl = '../car_pic/image/val_labels.txt' #Verify that the label file
img_size = [230,50] #Training picture width and height
label_len = 8 #The length of the label
dir_log = './logs/'
c = '../car_pic/image/' #checkpoints format string
num_epochs = 200 #number of epochs
start_of_epoch = 0

#Network parameters
conv_filters = 16
kernel_size = (3, 3)
pool_size = 2
time_dense_size = 32
rnn_size = 512
batch_size = 16

def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    #Why is from 2 to start?
    y_pred = y_pred[:, 2:, :]  
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

############The model structure############
input_tensor = Input(name='the_input', shape=(img_size[0], img_size[1], num_channels), dtype='float32')
x = input_tensor
base_conv = 32
#Convolutional layer 1
x = Conv2D(base_conv * 1, (3,3), padding="same",name='conv1')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
#Convolution layer 2
x = Conv2D(base_conv * 2, (3,3), padding="same",name='conv2')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
#Convolution layer 3
x = Conv2D(base_conv * 4, (3,3), padding="same",name='conv3')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
# Parameter view
# conv_shape = x.get_shape().as_list()
# rnn_length = conv_shape[1]
# rnn_dimen = conv_shape[2]*conv_shape[3]
# print(conv_shape, rnn_length, rnn_dimen)
#Dimension conversion
conv_to_rnn_dims = (img_size[0]//(2**3),(img_size[1]//(2**3))*128)
x = Reshape(target_shape=conv_to_rnn_dims,name='reshape')(x)
x =Dense(time_dense_size,activation='relu',name='dense1')(x)
x = BatchNormalization()(x)
x = Activation('relu')(x)

# x = Dropout(0.2)(x)
#Two layersbidirecitonal GRUs
gru_1 = GRU(rnn_size,return_sequences=True,kernel_initializer='he_normal',name='gru_1')(x)
gru_1b = GRU(rnn_size,return_sequences=True,go_backwards=True,kernel_initializer='he_normal',name='gru_1b')(x)
gru1_merged = add([gru_1,gru_1b])
gru_2 = GRU(rnn_size,return_sequences=True,kernel_initializer='he_normal',name='gru_2')(gru1_merged)
gru_2b = GRU(rnn_size,return_sequences=True,go_backwards=True,kernel_initializer='he_normal',name='gru_2b')(gru1_merged)

# transforms RNN output to character activations:  
x = Dense(NUM_CHARS+1,kernel_initializer='he_normal',name='dense2')(concatenate([gru_2,gru_2b]))
x = Activation('softmax',name='softmax')(x)

#Print Out model profile
base_model = Model(inputs=input_tensor, outputs=x)
base_model.summary()
#ComputingctcThe necessary parameters
pred_length = int(x.shape[1])  #Why would minus the 2 before you can run it???
labels = Input(name='the_labels', shape=[label_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int32')
label_length = Input(name='label_length', shape=[1], dtype='int32')

loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([x, labels, input_length, label_length])

model = Model(inputs=[input_tensor, labels, input_length, label_length], outputs=[loss_out])

plot_model(model,to_file=" gru_model.png",show_shapes=True) #show_shapes 带参数显示

# adam = optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-08) 

model.compile(loss={'ctc': lambda y_true, y_pred: x}, optimizer='adam')

#The license plate corresponding to thelables
def encode_label(s):
    label = np.zeros([len(s)])
    for i, c in enumerate(s):
        label[i] = CHARS_DICT[c]
    return label

# def encode_label(text):
#     return list(map(lambda x: CHARS.index(x), text))

def labels_to_text(labels):
    return ''.join(list(map(lambda x: CHARS[int(x)], labels)))

def parse_line(line):
    parts = line.split('.')
    filename = parts[0]
    label = encode_label(parts[0].strip().upper())
    return filename, label

class TextImageGenerator:
    def __init__(self, img_dir, label_file, batch_size, img_size, input_length, num_channels=3, label_len=8):
        self._img_dir = img_dir
        self._label_file = label_file
        self._batch_size = batch_size
        self._num_channels = num_channels
        self._label_len = label_len
        self._input_len = input_length
        self._img_w, self._img_h = img_size
        self._num_examples = 0
        self._next_index = 0
        self._num_epoches = 0
        self.filenames = []
        self.labels = None
        self.init()

    def init(self):
        #self.labels = []
        with open(self._label_file) as f:
            for c, l in enumerate(f):
                pass
        self.labels = np.ones([c+1, self._label_len]) * -1
        with open(self._label_file) as f:
            for i, line in enumerate(f):
                filename, label = parse_line(line)
                self.filenames.append(filename+".jpg")
                self.labels[i,0:len(label)] = label
                #self.labels.append(label)
                self._num_examples += 1

        #self.labels = np.array([v + [26.] * (self._label_len - len(v)) for v in self.labels])
        self.labels = np.float32(self.labels)
        # self.labels = [[np.float32(v) for v in lll] for lll in self.labels]

    def next_batch(self):
        # Shuffle the data
        if self._next_index == 0:
            perm = np.arange(self._num_examples)
            np.random.shuffle(perm)
            self._filenames = [self.filenames[i] for i in perm]
            self._labels = self.labels[perm]

        batch_size = self._batch_size
        start = self._next_index
        end = self._next_index + batch_size
        if end >= self._num_examples:
            self._next_index = 0
            self._num_epoches += 1
            end = self._num_examples
            batch_size = self._num_examples - start
        else:
            self._next_index = end
        images = np.zeros([batch_size, self._img_h, self._img_w, self._num_channels])
        # labels = np.zeros([batch_size, self._label_len])
        for j, i in enumerate(range(start, end)):
            fname = self._filenames[i]
            img = cv2.imread(os.path.join(self._img_dir, fname))
            images[j, ...] = img
        images = np.transpose(images, axes=[0, 2, 1, 3])
        labels = self._labels[start:end]
        # print("HHHHHHHHHHHHHHHHHHHHHHH")
        # print("++++",start, "+++",end)
        # print(self._labels)
        # print("HHHHHHHHHHHHHHHHHHHHHHH")
        # print(labels)
        # print("HHHHHHHHHHHHHHHHHHHHHHH")
        input_length = np.zeros([batch_size, 1])
        label_length = np.zeros([batch_size, 1])
        input_length[:] = self._input_len
        label_length[:] = self._label_len
        outputs = {'ctc': np.zeros([batch_size])}
        inputs = {'the_input': images,
                  'the_labels': labels,
                  'input_length': input_length,
                  'label_length': label_length,
                  }
        return inputs, outputs

    def get_data(self):
        while True:
            yield self.next_batch()



#Generated data
train_gen = TextImageGenerator(img_dir=ti,
                                 label_file=tl,
                                 batch_size=batch_size,
                                 img_size=img_size,
                                 input_length=pred_length,
                                 num_channels=num_channels,
                                 label_len=label_len)

val_gen = TextImageGenerator(img_dir=vi,
                                 label_file=vl,
                                 batch_size=batch_size,
                                 img_size=img_size,
                                 input_length=pred_length,
                                 num_channels=num_channels,
                                 label_len=label_len)

# # Model evaluation
def evaluate(steps=10):
    batch_acc = 0
    generator = train_gen
    for i in range(steps):
        x_test, y_test = next(generator)
        y_pred = base_model.predict(x_test)
        shape = y_pred[:,2:,:].shape
        ctc_decode = K.ctc_decode(y_pred[:,2:,:], input_length=np.ones(shape[0])*shape[1])[0][0]
        out = K.get_value(ctc_decode)[:, :label_len]
        if out.shape[1] == label_len:
            batch_acc += (y_test == out).all(axis=1).mean()
    return batch_acc / steps

class Evaluator(Callback):
    def __init__(self):
        self.accs = []

    def on_epoch_end(self, epoch, logs=None):
        acc = evaluate(steps=20)*100
        self.accs.append(acc)
        print('')
        print('acc: %f%%' % acc)

evaluator = Evaluator()        
# #The callback function will in eachepochAfter saving the model to the path
# checkpoints_cb = ModelCheckpoint(c, period=1)
# cbs = [checkpoints_cb]

# #tensorboard 
# if dir_log != '':
# tfboard_cb = TensorBoard(log_dir=dir_log, write_images=True)
# cbs.append(tfboard_cb)
import matplotlib.pyplot as plt

checkpoints_cb = ModelCheckpoint("../checkpoints/'weights.{epoch:02d}-{val_loss:.2f}.h5'", period=1)
cbs = [checkpoints_cb]

h = model.fit_generator(generator=train_gen.get_data(),
                    steps_per_epoch=100,
                    epochs=20,
                    validation_data=val_gen.get_data(),
                    validation_steps=20,
                    callbacks=cbs,
                    initial_epoch=0)
                    # callbacks=[EarlyStopping(patience=10)])

# Save the model to save the weight value
model = Model(inputs=input_tensor, outputs=x)
# model.save(save_name)
model.save_weights('my_model_weight.h5')
print('model saved to {}'.format('my_model_weight.h5'))

Это проблема, которую я получаю, Фиксированная потеря:

Epoch 1/20
100/100 [==============================] - 209s 2s/step - loss: 0.0556 - val_loss: 0.0556
Epoch 2/20
100/100 [==============================] - 203s 2s/step - loss: 0.0556 - val_loss: 0.0556
Epoch 3/20
100/100 [==============================] - 217s 2s/step - loss: 0.0556 - val_loss: 0.0556
Epoch 4/20
100/100 [==============================] - 215s 2s/step - loss: 0.0556 - val_loss: 0.0556

Кроме того, когда я пытаюсь прогнозировать, используя экспортированные веса, яполучить матрицу 0.0556.

...