Я получаю приведенную ниже ошибку при вызове model.fit_generator () у меня max_text_len = 100, а max_seq_len для каждого текста пакета меньше, чем max_text_len, он выдает эту ошибку
InvalidArgumentError: sequence_length (0) < = 98 [[{{node ctc / CTCLoss}}]]
from keras import backend as K
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Input, Dense, Activation
from keras.layers import Reshape, Lambda, BatchNormalization, Bidirectional
from keras.layers.merge import add, concatenate
from keras.models import Model
from keras.layers.recurrent import LSTM
from parameters import *
K.set_learning_phase(0)
# # Loss and train functions, network architecture
def ctc_lambda_func(args):
y_pred, labels, input_length, label_length = args
# the 2 is critical here since the first couple outputs of the RNN
# tend to be garbage:
y_pred = y_pred[:, 2:, :]
return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
def get_Model(training):
# Make Networkw
act = 'relu'
inputs = Input(name='the_input', shape=input_shape, dtype='float32')
inner = Conv2D(conv_filters[1], kernel_size[0], padding='same',
activation=act, kernel_initializer='he_normal',
name='conv1')(inputs)
inner = MaxPooling2D(pool_size=pool_size[0], name='pool1') (inner)
inner = Conv2D(conv_filters[2], kernel_size[0], padding='same',
activation=act, kernel_initializer='he_normal',
name='conv2') (inner)
inner = MaxPooling2D(pool_size=pool_size[1], name='pool2') (inner)
inner = Conv2D(conv_filters[2], kernel_size[1], padding='same',
activation=act, kernel_initializer='he_normal',
name='conv3') (inner)
inner = MaxPooling2D(pool_size=pool_size[0], name='pool3') (inner)
inner = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, name='BN1') (inner)
inner = Conv2D(conv_filters[3], kernel_size[1], padding='same',
activation=act, kernel_initializer='he_normal',
name='conv4') (inner)
inner = Conv2D(conv_filters[3], kernel_size[1], padding='same',
activation=act, kernel_initializer='he_normal',
name='conv5') (inner)
inner = MaxPooling2D(pool_size=pool_size[0], name='pool4') (inner)
inner = Conv2D(conv_filters[4], kernel_size[1], padding='same',
activation=act, kernel_initializer='he_normal',
name='conv6') (inner)
inner = MaxPooling2D(pool_size=pool_size[1], name='pool5') (inner)
inner = BatchNormalization(axis=-1, momentum=0.99, epsilon=0.001, name='BN2') (inner)
inner = Conv2D(conv_filters[4], kernel_size[1], padding='same',
activation=act, kernel_initializer='he_normal',
name='conv7') (inner)
inner = MaxPooling2D(pool_size=pool_size[1], name='pool6') (inner)
# CNN to RNN
inner = Reshape(target_shape=((100, 512)), name='reshape')(inner)
inner = Dense(max_text_len, activation='relu', kernel_initializer='he_normal', name='dense1')(inner)
# RNN layer
lstm_1 = LSTM(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='lstm1')(inner)
lstm_1b = LSTM(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm1_b')(inner)
reversed_lstm_1b = Lambda(lambda inputTensor: K.reverse(inputTensor, axes=1)) (lstm_1b)
lstm1_merged = add([lstm_1, reversed_lstm_1b])
lstm1_merged = BatchNormalization()(lstm1_merged)
lstm_2 = LSTM(rnn_size, return_sequences=True, kernel_initializer='he_normal', name='lstm2')(lstm1_merged)
lstm_2b = LSTM(rnn_size, return_sequences=True, go_backwards=True, kernel_initializer='he_normal', name='lstm2_b')(lstm1_merged)
reversed_lstm_2b= Lambda(lambda inputTensor: K.reverse(inputTensor, axes=1)) (lstm_2b)
lstm2_merged = concatenate([lstm_2, reversed_lstm_2b])
lstm2_merged = BatchNormalization()(lstm2_merged)
# transforms RNN output to character activations:
inner = Dense(num_classes, kernel_initializer='he_normal',name='dense2')(lstm2_merged)
y_pred = Activation('softmax', name='softmax')(inner)
labels = Input(name='the_labels', shape=[max_text_len], dtype='float32')
input_length = Input(name='input_length', shape=[1], dtype='int64')
label_length = Input(name='label_length', shape=[1], dtype='int64')
# Keras doesn't currently support loss funcs with extra parameters
# so CTC loss is implemented in a lambda layer
loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length])
if training:
return Model(inputs=[inputs, labels, input_length,label_length],outputs=loss_out)
else:
return Model(inputs=[inputs], outputs=y_pred)
batch_size = 20
val_batch_size = 20
downsample_factor = 4
class TextImageGenerator:
def __init__(self, img_dirpath, img_w, img_h,
batch_size, downsample_factor, max_text_len=100):
self.img_h = img_h
self.img_w = img_w
self.batch_size = batch_size
self.max_text_len = max_text_len
self.downsample_factor = downsample_factor
self.img_dirpath = img_dirpath # image dir path
self.img_dir = os.listdir(self.img_dirpath) # images list
self.n = len(self.img_dir) # number of images
self.indexes = list(range(self.n))
self.cur_index = 0
self.imgs = np.zeros((self.n, self.img_h, self.img_w))
self.texts = []
self.blank_label = self.get_output_size() - 1
def get_output_size(self):
return len(CHAR_VECTOR) + 1
## samples의 이미지 목록들을 opencv로 읽어 저장하기, texts에는 label 저장
def build_data(self):
print(self.n, " Image Loading start...")
for i, img_file in enumerate(self.img_dir):
img = cv2.imread(self.img_dirpath + img_file, cv2.IMREAD_GRAYSCALE)
img = cv2.resize(img, (self.img_w, self.img_h))
img = img.astype(np.float32)
img = (img / 255.0) * 2.0 - 1.0
self.imgs[i, :, :] = img
self.texts.append(img_file[0:-4])
print(len(self.texts) == self.n)
print(self.n, " Image Loading finish...")
def next_sample(self): ## index max -> 0 으로 만들기
self.cur_index += 1
if self.cur_index >= self.n:
self.cur_index = 0
random.shuffle(self.indexes)
return self.imgs[self.indexes[self.cur_index]], self.texts[self.indexes[self.cur_index]]
def next_batch(self): ## batch size만큼 가져오기
while True:
X_data = np.ones([self.batch_size, self.img_w, self.img_h, 1]) # (bs, 128, 64, 1)
Y_data = np.ones([self.batch_size, self.max_text_len]) # (bs, 100)
input_length = np.ones((self.batch_size, 1)) * (self.img_w // self.downsample_factor - 2) # (bs, 1)
label_length = np.zeros((self.batch_size, 1)) # (bs, 1)
Y_len = [0] * self.batch_size
for i in range(self.batch_size):
img, text = self.next_sample()
img = img.T
img = np.expand_dims(img, -1)
#Y_len = len(text)
X_data[i] = img
Y_data[i] = text_to_labels(text)
label_length[i] = len(text)
# dict 형태로 복사
inputs = {
'the_input': X_data,
'the_labels': Y_data,
'input_length': input_length,
'label_length': label_length
}
outputs = {'ctc': np.zeros([self.batch_size])}
yield (inputs, outputs)
длина метки ввода заполнена нулями, но я не знаю, где эта ошибка приходит от.