Я применяю LSTM на Манхэттене, как указано в этой статье https://dl.acm.org/doi/10.5555/3016100.3016291. Я не реализовал расширение данных и инициализацию c, описанную в статье, остальные реализованы. Но сеть не может учиться, функция потерь остается неизменной на протяжении итераций. Я что-то здесь упускаю?
import os
from keras import Model
from keras import Sequential
from keras import Input, layers
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import numpy as np
from keras import backend as K
from keras import optimizers</p>
<pre><code>data_path = 'SICK.txt'
train_left = []
train_right = []
allText = []
scores = []
with open(data_path, 'r', encoding='utf-8') as fp:
line = fp.readline()
while line:
line = fp.readline().split('\t')
if (len(line) < 5):
break
train_left.append(line[1])
train_right.append(line[2])
allText.append(line[1])
allText.append(line[2])
scores.append(float(line[4]))
fp.close()
vocab_size = 2200
embedding_dim = 300
tokenizer = Tokenizer(num_words=vocab_size)
tokenizer.fit_on_texts(allText)
word_index = tokenizer.word_index
max_length = 30
train_left_sequence = tokenizer.texts_to_sequences(train_left)
train_left_sequence = pad_sequences(train_left_sequence, maxlen=max_length, padding='post')
train_right_sequence = tokenizer.texts_to_sequences(train_right)
train_right_sequence = pad_sequences(train_right_sequence, maxlen=max_length, padding='post')
reverse_word_index = dict([(val, key) for (key, val) in word_index.items()])
embedding_matrix = np.zeros((vocab_size, embedding_dim))
glove_dir = 'data\\glove.6B'
embeddings_index = {}
f = open(os.path.join(glove_dir, 'glove.6B.300d.txt'), encoding='utf-8')
for line in f:
values = line.split()
word = values[0]
coefs = np.asarray(values[1:], dtype='float32')
embeddings_index[word] = coefs
f.close()
print('Found %s word vectors.' % len(embeddings_index))
for word, i in word_index.items():
if i < vocab_size:
embedding_vector = embeddings_index.get(word)
if embedding_vector is not None:
embedding_matrix[i] = embedding_vector
def exp_manhattan_distance(vects):
x, y = vects
return K.exp(-K.sum(K.abs(x - y), axis=1, keepdims=True))
def exp_manhattan_output_shape(shapes):
shape1, shape2 = shapes
return (shape1[0], 1)
def create_base_network(input_shape):
input = Input(shape=input_shape)
print(input_shape)
x = layers.Embedding(vocab_size, 300, input_length=max_length)(input)
#x = layers.LSTM(50, recurrent_dropout=0.2, dropout=0.2, return_sequences=True)(x)
x = layers.LSTM(50, recurrent_dropout=0.2, dropout=0.2)(x)
return Model(input, x)
input_shape = (max_length,)
base_network = create_base_network(input_shape)
base_network.summary()
base_network.layers[1].trainable = False
base_network.layers[1].set_weights([embedding_matrix])
input_left = Input(shape=input_shape)
input_right = Input(shape=input_shape)
processed_a = base_network(input_left)
processed_b = base_network(input_right)
distance = layers.Lambda(exp_manhattan_distance,
output_shape=exp_manhattan_output_shape)([processed_a, processed_b])
model = Model([input_left, input_right], distance)
# train
model.compile(loss='mean_squared_error', optimizer=optimizers.Adadelta())
history = model.fit([train_left_sequence, train_right_sequence], scores, epochs=30)
sentence1 = "A group of kids is playing in a yard and an old man is standing in the background"
sentence2 = "A group of boys in a yard is playing and a man is standing in the background"
sentence1_seq = pad_sequences(tokenizer.texts_to_sequences([sentence1]), max_length, padding='post')
sentence2_seq = pad_sequences(tokenizer.texts_to_sequences([sentence2]), max_length, padding='post')
model.predict([sentence1_seq, sentence2_seq])
sentence1 = "A brown dog is attacking another animal in front of the man in pants"
sentence2 = "Two dogs are fighting"
sentence1_seq = pad_sequences(tokenizer.texts_to_sequences([sentence1]), max_length, padding='post')
sentence2_seq = pad_sequences(tokenizer.texts_to_sequences([sentence2]), max_length, padding='post')
model.predict([sentence1_seq, sentence2_seq])