Функция вызывается из embedding_matrix. Может кто-нибудь объяснить, почему я получаю эту ошибку и как я могу ее устранить.
def getEmbeddingMatrix (word_index, vectorSource):
wordVecSources = {'fasttext' : './vectors/crawl-300d-2M-subword.vec', 'custom-fasttext' :
'./vectors/' + '20news-fasttext.json' }
f = open (wordVecSources[vectorSource])
allWv = {}
if (vectorSource == 'custom-fasttext'):
allWv = json.loads(f.read())
elif (vectorSource == 'fasttext'):
errorCount = 0
for line in f:
values = line.split()
word = values[0].strip()
try:
wv = np.asarray(values[1:], dtype='float32')
if (len(wv) != wvLength):
errorCount = errorCount + 1
continue
except:
errorCount = errorCount + 1
continue
allWv[word] = wv
print ("# Bad Word Vectors:", errorCount)
f.close()
embedding_matrix = np.zeros((len(word_index)+1, wvLength)) # +1 for the masked 0
for word, i in word_index.items():
if word in allWv:
embedding_matrix[i] = allWv[word]
return embedding_matrix
early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=2, mode='auto', restore_best_weights=False)
model = keras.models.Sequential()
if (vectorSource != 'none'):
embedding_matrix = getEmbeddingMatrix (kTokenizer.word_index, vectorSource)
embedding = keras.layers.embeddings.Embedding(input_dim=len(kTokenizer.word_index)+1, output_dim=wvLength, weights=[embedding_matrix], input_length=sequenceLength, trainable=False, mask_zero=True)
else:
embedding = keras.layers.embeddings.Embedding(input_dim=len(kTokenizer.word_index)+1, output_dim=wvLength, input_length=sequenceLength, trainable=True, mask_zero=True)
model.add(embedding)
model.add(keras.layers.LSTM(units=150, dropout=0.2, recurrent_dropout=0.2, return_sequences=False))
model.add(keras.layers.Dense(numClasses, activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])
print(model.summary())
Когда вызывается getEmbeddingMatrix, он выдает KeyError '-f', даже если оба файла существуют в одном месте
KeyError Traceback (most recent call last)
<ipython-input-45-ca6718845b1d> in <module>
2 model = keras.models.Sequential()
3 if (vectorSource != 'none'):
----> 4 embedding_matrix = getEmbeddingMatrix (kTokenizer.word_index, vectorSource)
5 embedding = keras.layers.embeddings.Embedding(input_dim=len(kTokenizer.word_index)+1, output_dim=wvLength, weights=[embedding_matrix], input_length=sequenceLength, trainable=False, mask_zero=True)
6 else:
<ipython-input-40-b29c56e927a3> in getEmbeddingMatrix(word_index, vectorSource)
1 def getEmbeddingMatrix (word_index, vectorSource):
2 wordVecSources = {'fasttext' : './vectors/crawl-300d-2M-subword.vec', 'custom-fasttext' : './vectors/' + '20news-fasttext.json' }
----> 3 f = open (wordVecSources[vectorSource])
4 allWv = {}
5 if (vectorSource == 'custom-fasttext'):
KeyError: '-f'