Когда я недавно проходил через это слово Pytorch, встраивающее турториал , я заметил, что порядок словаря будет влиять на результат прогнозирования.
Вот пример кода, объясняющий проблему, который изменен из предыдущего кода Роберта Гатри.
import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as functional
import torch.optim as optim
torch.manual_seed(1)
CONTEXT_SIZE = 2
EMBEDDING_DIM = 4
test_sentence = r"""<s> The mathematician ran . <\s>
<s> The mathematician ran to the store . <\s>
<s> The physicist ran to the store . <\s>
<s> The philosopher thought about it . <\s>
<s> The mathematician solved the open problem . <\s>""".split()
# build a list of tuples. Each tuple is ([ word_i-2, word_i-1 ], target word)
trigrams = [([test_sentence[i], test_sentence[i + 1]], test_sentence[i + 2])
for i in range(len(test_sentence) - 2)]
# fix the order of vocabulary
# if the sorted() is removed, the predicting result will be unstable.
vocab = sorted(list(set(test_sentence)))
word_to_ix = {word: i for i, word in enumerate(vocab)}
class NGramLanguageModeler(nn.Module):
def __init__(self, vocab_size, embedding_dim, context_size):
super(NGramLanguageModeler, self).__init__()
self.embeddings = nn.Embedding(vocab_size, embedding_dim)
self.linear1 = nn.Linear(context_size * embedding_dim, 128)
self.linear2 = nn.Linear(128, vocab_size)
def forward(self, inputs):
embeds = self.embeddings(inputs).view((1, -1))
out = functional.relu(self.linear1(embeds))
out = self.linear2(out)
log_probs = functional.log_softmax(out, dim=1)
return log_probs
loss_function = nn.NLLLoss()
model = NGramLanguageModeler(len(vocab), EMBEDDING_DIM, CONTEXT_SIZE)
optimizer = optim.SGD(model.parameters(), lr=0.01)
# train the model
for epoch in range(20):
for context, target in trigrams:
context_indices = [word_to_ix[w] for w in context]
context_var = autograd.Variable(torch.LongTensor(context_indices))
model.zero_grad()
log_probs = model(context_var)
loss = loss_function(log_probs, autograd.Variable(torch.LongTensor([word_to_ix[target]])))
loss.backward()
optimizer.step()
context_tuple = ("<s>", "The")
context_indices = [word_to_ix[w] for w in context_tuple]
context_var = autograd.Variable(torch.LongTensor(context_indices))
model.zero_grad()
log_probs = model(context_var)
sims = []
probs = []
candidates = ["philosopher", "physicist"]
# to calculate which word is closer to mathematician according to cosine similarities
related_embedding = model.embeddings(autograd.Variable(torch.LongTensor([word_to_ix["mathematician"]])))
for word in candidates:
# Probability
probs.append(log_probs[0][word_to_ix[word]])
# Cosine similarity
embedding = model.embeddings(autograd.Variable(torch.LongTensor([word_to_ix[word]])))
sims.append(functional.cosine_similarity(embedding, related_embedding))
print("Predicted word (probability): %s" % (candidates[0] if probs[0] > probs[1] else candidates[1]))
print("Predicted word (cosine similarity): %s" % (candidates[0] if sims[0] > sims[1] else candidates[1]))
Если функция sorted () переменной vocab
удалена, результат будет другим.Так как я уже исправил случайное семя Pytorch, почему результат невоспроизводим?