Keras InvalidArgumentError: sequence_length (0) <= 98 [[{{node ctc / CTCLoss}}]] - PullRequest
0 голосов
/ 03 февраля 2020

Я получаю приведенную ниже ошибку при вызове model.fit_generator () у меня max_text_len = 100, а max_seq_len для каждого текста пакета меньше, чем max_text_len, он выдает эту ошибку

InvalidArgumentError: sequence_length (0) < = 98 [[{{node ctc / CTCLoss}}]]

from keras import backend as K
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Input, Dense, Activation
from keras.layers import Reshape, Lambda, BatchNormalization, Bidirectional
from keras.layers.merge import add, concatenate
from keras.models import Model
from keras.layers.recurrent import LSTM
from parameters import *
K.set_learning_phase(0)
# # Loss and train functions, network architecture
def ctc_lambda_func(args):
    y_pred, labels, input_length, label_length = args
    # the 2 is critical here since the first couple outputs of the RNN
    # tend to be garbage:
    y_pred = y_pred[:, 2:, :]
    return K.ctc_batch_cost(labels, y_pred, input_length, label_length)

def get_Model(training):

    # Make Networkw
    act = 'relu'
    inputs = Input(name='the_input', shape=input_shape, dtype='float32')
    inner = Conv2D(conv_filters[1], kernel_size[0], padding='same',
                   activation=act, kernel_initializer='he_normal',
                   name='conv1')(inputs)
    inner = MaxPooling2D(pool_size=pool_size[0], name='pool1') (inner)
    inner = Conv2D(conv_filters[2], kernel_size[0], padding='same',
                   activation=act, kernel_initializer='he_normal',
                   name='conv2') (inner)
    inner = MaxPooling2D(pool_size=pool_size[1], name='pool2') (inner)
    inner = Conv2D(conv_filters[2], kernel_size[1], padding='same',
                   activation=act, kernel_initializer='he_normal',
                   name='conv3') (inner)
    inner = MaxPooling2D(pool_size=pool_size[0], name='pool3') (inner)
    inner = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, name='BN1') (inner)
    inner = Conv2D(conv_filters[3], kernel_size[1], padding='same',
                   activation=act, kernel_initializer='he_normal',
                   name='conv4') (inner)
    inner = Conv2D(conv_filters[3], kernel_size[1], padding='same',
                   activation=act, kernel_initializer='he_normal',
                   name='conv5') (inner)
    inner = MaxPooling2D(pool_size=pool_size[0], name='pool4') (inner)
    inner = Conv2D(conv_filters[4], kernel_size[1], padding='same',
                   activation=act, kernel_initializer='he_normal',
                   name='conv6') (inner)
    inner = MaxPooling2D(pool_size=pool_size[1], name='pool5') (inner)
    inner = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, name='BN2') (inner)
    inner = Conv2D(conv_filters[4], kernel_size[1], padding='same',
                   activation=act, kernel_initializer='he_normal',
                   name='conv7') (inner)
    inner = MaxPooling2D(pool_size=pool_size[1], name='pool6') (inner)

    # CNN to RNN
    inner = Reshape(target_shape=((100, 512)), name='reshape')(inner)  
    inner = Dense(max_text_len, activation='relu', kernel_initializer='he_normal', name='dense1')(inner)  
    # RNN layer
    lstm_1 = LSTM(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='lstm1')(inner)  
    lstm_1b = LSTM(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm1_b')(inner)
    reversed_lstm_1b = Lambda(lambda inputTensor: K.reverse(inputTensor, axes=1)) (lstm_1b)
    lstm1_merged = add([lstm_1, reversed_lstm_1b])  
    lstm1_merged = BatchNormalization()(lstm1_merged)

    lstm_2 = LSTM(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='lstm2')(lstm1_merged)
    lstm_2b = LSTM(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm2_b')(lstm1_merged)
    reversed_lstm_2b= Lambda(lambda inputTensor: K.reverse(inputTensor, axes=1)) (lstm_2b)
    lstm2_merged = concatenate([lstm_2, reversed_lstm_2b])  
    lstm2_merged = BatchNormalization()(lstm2_merged)

    # transforms RNN output to character activations:
    inner = Dense(num_classes, kernel_initializer='he_normal',name='dense2')(lstm2_merged) 

    y_pred = Activation('softmax', name='softmax')(inner)

    labels = Input(name='the_labels', shape=[max_text_len], dtype='float32') 
    input_length = Input(name='input_length', shape=[1], dtype='int64')     
    label_length = Input(name='label_length', shape=[1], dtype='int64')  

    # Keras doesn't currently support loss funcs with extra parameters
    # so CTC loss is implemented in a lambda layer
    loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length]) 

    if training:
        return Model(inputs=[inputs, labels, input_length,label_length],outputs=loss_out)

    else:
        return Model(inputs=[inputs], outputs=y_pred)
batch_size = 20
val_batch_size = 20
downsample_factor = 4


class TextImageGenerator:
    def __init__(self, img_dirpath, img_w, img_h,
                 batch_size, downsample_factor, max_text_len=100):
        self.img_h = img_h
        self.img_w = img_w
        self.batch_size = batch_size
        self.max_text_len = max_text_len
        self.downsample_factor = downsample_factor
        self.img_dirpath = img_dirpath                  # image dir path
        self.img_dir = os.listdir(self.img_dirpath)     # images list
        self.n = len(self.img_dir)                      # number of images
        self.indexes = list(range(self.n))
        self.cur_index = 0
        self.imgs = np.zeros((self.n, self.img_h, self.img_w))
        self.texts = []
        self.blank_label = self.get_output_size() - 1

    def get_output_size(self):
        return len(CHAR_VECTOR) + 1

    ## samples의 이미지 목록들을 opencv로 읽어 저장하기, texts에는 label 저장
    def build_data(self):
        print(self.n, " Image Loading start...")
        for i, img_file in enumerate(self.img_dir):
            img = cv2.imread(self.img_dirpath + img_file, cv2.IMREAD_GRAYSCALE)
            img = cv2.resize(img, (self.img_w, self.img_h))
            img = img.astype(np.float32)
            img = (img / 255.0) * 2.0 - 1.0

            self.imgs[i, :, :] = img
            self.texts.append(img_file[0:-4])
        print(len(self.texts) == self.n)
        print(self.n, " Image Loading finish...")

    def next_sample(self):      ## index max -> 0 으로 만들기
        self.cur_index += 1
        if self.cur_index >= self.n:
            self.cur_index = 0
            random.shuffle(self.indexes)
        return self.imgs[self.indexes[self.cur_index]], self.texts[self.indexes[self.cur_index]]

    def next_batch(self):       ## batch size만큼 가져오기
        while True:
            X_data = np.ones([self.batch_size, self.img_w, self.img_h, 1])     # (bs, 128, 64, 1)
            Y_data = np.ones([self.batch_size, self.max_text_len])             # (bs, 100)
            input_length = np.ones((self.batch_size, 1)) * (self.img_w // self.downsample_factor - 2)  # (bs, 1)
            label_length = np.zeros((self.batch_size, 1))           # (bs, 1)
            Y_len = [0] * self.batch_size

            for i in range(self.batch_size):
                img, text = self.next_sample()
                img = img.T
                img = np.expand_dims(img, -1)
                #Y_len = len(text)
                X_data[i] = img
                Y_data[i] = text_to_labels(text)
                label_length[i] = len(text)

            # dict 형태로 복사
            inputs = {
                'the_input': X_data,  
                'the_labels': Y_data, 
                'input_length': input_length, 
                'label_length': label_length  
            }
            outputs = {'ctc': np.zeros([self.batch_size])} 
            yield (inputs, outputs)

длина метки ввода заполнена нулями, но я не знаю, где эта ошибка приходит от.

...