Почему фиксированный порядок слов в встраивании слов имеет значение? - PullRequest
1 голос
/ 19 апреля 2019

Когда я недавно проходил через это слово Pytorch, встраивающее турториал , я заметил, что порядок словаря будет влиять на результат прогнозирования.

Вот пример кода, объясняющий проблему, который изменен из предыдущего кода Роберта Гатри.

import torch
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as functional
import torch.optim as optim

torch.manual_seed(1)

CONTEXT_SIZE = 2
EMBEDDING_DIM = 4

test_sentence = r"""<s> The mathematician ran . <\s>
<s> The mathematician ran to the store . <\s>
<s> The physicist ran to the store . <\s>
<s> The philosopher thought about it . <\s>
<s> The mathematician solved the open problem . <\s>""".split()

# build a list of tuples.  Each tuple is ([ word_i-2, word_i-1 ], target word)
trigrams = [([test_sentence[i], test_sentence[i + 1]], test_sentence[i + 2])
            for i in range(len(test_sentence) - 2)]

# fix the order of vocabulary
# if the sorted() is removed, the predicting result will be unstable.
vocab = sorted(list(set(test_sentence)))
word_to_ix = {word: i for i, word in enumerate(vocab)}


class NGramLanguageModeler(nn.Module):

    def __init__(self, vocab_size, embedding_dim, context_size):
        super(NGramLanguageModeler, self).__init__()
        self.embeddings = nn.Embedding(vocab_size, embedding_dim)
        self.linear1 = nn.Linear(context_size * embedding_dim, 128)
        self.linear2 = nn.Linear(128, vocab_size)

    def forward(self, inputs):
        embeds = self.embeddings(inputs).view((1, -1))
        out = functional.relu(self.linear1(embeds))
        out = self.linear2(out)
        log_probs = functional.log_softmax(out, dim=1)
        return log_probs


loss_function = nn.NLLLoss()
model = NGramLanguageModeler(len(vocab), EMBEDDING_DIM, CONTEXT_SIZE)
optimizer = optim.SGD(model.parameters(), lr=0.01)

# train the model
for epoch in range(20):
    for context, target in trigrams:
        context_indices = [word_to_ix[w] for w in context]
        context_var = autograd.Variable(torch.LongTensor(context_indices))

        model.zero_grad()
        log_probs = model(context_var)

        loss = loss_function(log_probs, autograd.Variable(torch.LongTensor([word_to_ix[target]])))

        loss.backward()
        optimizer.step()

context_tuple = ("<s>", "The")

context_indices = [word_to_ix[w] for w in context_tuple]
context_var = autograd.Variable(torch.LongTensor(context_indices))

model.zero_grad()
log_probs = model(context_var)

sims = []
probs = []
candidates = ["philosopher", "physicist"]

# to calculate which word is closer to mathematician according to cosine similarities
related_embedding = model.embeddings(autograd.Variable(torch.LongTensor([word_to_ix["mathematician"]])))

for word in candidates:
    # Probability
    probs.append(log_probs[0][word_to_ix[word]])
    # Cosine similarity
    embedding = model.embeddings(autograd.Variable(torch.LongTensor([word_to_ix[word]])))
    sims.append(functional.cosine_similarity(embedding, related_embedding))

print("Predicted word (probability): %s" % (candidates[0] if probs[0] > probs[1] else candidates[1]))
print("Predicted word (cosine similarity): %s" % (candidates[0] if sims[0] > sims[1] else candidates[1]))

Если функция sorted () переменной vocab удалена, результат будет другим.Так как я уже исправил случайное семя Pytorch, почему результат невоспроизводим?

...