Модель внимания Keras LSTM не компилируется, «Неверные размеры в плотном слое» - PullRequest
0 голосов
/ 18 октября 2019

У меня проблемы с реализацией примера LSTM с вниманием в Керасе из класса. Я получаю следующую ошибку:

Error when checking target: expected dense_6 to have 3 dimensions, but got array with shape (110169, 57)

Я пытался добавить плоский слой перед окончательным плотным слоем, и модель скомпилирована и может тренироваться, но она ничего не выводит буквально. Пустой текст, в то время как он должен предсказывать следующие символы, используя lstm.

Вот весь скрипт, с которым я сейчас работаю:

# -*- coding: utf-8 -*-

#imports
import re
import sys
import numpy
import requests
import numpy as np
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import Dropout
from keras.utils import np_utils
from keras.models import Sequential
from keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split

#loading book data
html = requests.get("http://www.gutenberg.org/files/11/11-0.txt")
text = html.text
#removing some garbage
text = re.sub(r'[^\x00-\x7f]',r'', text)

raw_text = text.lower()

#tutorial says this part is making a dictionary basically of unique characters and their IDs
chars = sorted(list(set(raw_text)))
char_to_int = dict((c, i) for i, c in enumerate(chars))
int_to_char = dict((i, c) for i, c in enumerate(chars))

n_chars = len(raw_text)
n_vocab = len(chars)

# prepare the dataset of input to output pairs encoded as integers
seq_length = 100
dataX = []
dataY = []
for i in range(0, n_chars - seq_length, 1):
    seq_in = raw_text[i:i + seq_length]
    seq_out = raw_text[i + seq_length]
    dataX.append([char_to_int[char] for char in seq_in])
    dataY.append(char_to_int[seq_out])
n_patterns = len(dataX)

#the total patters are I guess moving strings of 100 chars, so there would be
#100 less than the # of chars bc the first 'x_train' pattern would use up the first 100 chars
print("Total Patterns: ", n_patterns)

# reshape X to be [samples, time steps, features]
X = numpy.reshape(dataX, (n_patterns, seq_length, 1))

# normalize, makes weird ass numbers, but I guess the model
# is trying to fit the words to this smaller ID data
X = X / float(n_vocab)

# one hot encode the output variable
y = np_utils.to_categorical(dataY)

#add a thing here for test train split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

#print(X_train.shape)
#print(y_train.shape)

from keras.layers import * 
from keras.models import Model

seq_len   = 100
num_units = 128
embedded = Input(shape=(X.shape[1], X.shape[2]), name='ModelInput')
activations = LSTM(num_units, return_sequences=True, name='LSTM')(embedded)
# compute importance for each step
attention = Dense(1, activation='tanh', name='ImportanceScore')(activations)
attention = Flatten()(attention)
attention = Activation('softmax', name='AttentionScore')(attention)
attention = RepeatVector(num_units)(attention)
attention = Permute([2, 1])(attention)
sent_representation = multiply([activations, attention], name='AppliedAttention')
output = Dense(1)(sent_representation)
attn_model = Model(inputs=embedded, outputs=output)
attn_model.compile(loss='mean_squared_error', optimizer='adam')
attn_model.summary()

attn_model.fit(X_train, y_train, validation_data=(X_test, y_test),epochs=10, batch_size=64)

#multiply preds by float(n_vocab) cause thats what we normalized by
#find the key to each corresponding letter value
# pick a random seed
start = numpy.random.randint(0, len(dataX)-1)
pattern = dataX[start]
print("Seed:")
print("\"", ''.join([int_to_char[value] for value in pattern]), "\"")
print(" ")
print("Output:")
# generate characters
for i in range(500):
    x = numpy.reshape(pattern, (1, len(pattern), 1))
    x = x / float(n_vocab)
    prediction = attn_model.predict(x, verbose=0)
    index = numpy.argmax(prediction)
    result = int_to_char[index]
    seq_in = [int_to_char[value] for value in pattern]
    sys.stdout.write(result)
    pattern.append(index)
    pattern = pattern[1:len(pattern)]

Любая помощь с компиляцией этой моделиили объяснение, почему оно тренируется неправильно с плотным слоем внутри, было бы очень полезно, спасибо!

...