Вот реализация word2ve c:
%reset -f
import torch
from torch.autograd import Variable
import numpy as np
import torch.functional as F
import torch.nn.functional as F
corpus = [
'this test',
'this separate test'
]
def get_input_layer(word_idx):
x = torch.zeros(vocabulary_size).float()
x[word_idx] = 1.0
return x
def tokenize_corpus(corpus):
tokens = [x.split() for x in corpus]
return tokens
tokenized_corpus = tokenize_corpus(corpus)
vocabulary = []
for sentence in tokenized_corpus:
for token in sentence:
if token not in vocabulary:
vocabulary.append(token)
word2idx = {w: idx for (idx, w) in enumerate(vocabulary)}
idx2word = {idx: w for (idx, w) in enumerate(vocabulary)}
window_size = 2
idx_pairs = []
# for each sentence
for sentence in tokenized_corpus:
indices = [word2idx[word] for word in sentence]
# for each word, threated as center word
for center_word_pos in range(len(indices)):
# for each window position
for w in range(-window_size, window_size + 1):
context_word_pos = center_word_pos + w
# make soure not jump out sentence
if context_word_pos < 0 or context_word_pos >= len(indices) or center_word_pos == context_word_pos:
continue
context_word_idx = indices[context_word_pos]
idx_pairs.append((indices[center_word_pos], context_word_idx))
idx_pairs = np.array(idx_pairs) # it will be useful to have this as numpy array
vocabulary_size = len(vocabulary)
embedding_dims = 4
W1 = Variable(torch.randn(embedding_dims, vocabulary_size).float(), requires_grad=True)
W2 = Variable(torch.randn(vocabulary_size, embedding_dims).float(), requires_grad=True)
num_epochs = 1
learning_rate = 0.001
for epo in range(num_epochs):
loss_val = 0
for data, target in idx_pairs:
x = Variable(get_input_layer(data)).float()
y_true = Variable(torch.from_numpy(np.array([target])).long())
z1 = torch.matmul(W1, x)
z2 = torch.matmul(W2, z1)
log_softmax = F.log_softmax(z2, dim=0)
loss = F.nll_loss(log_softmax.view(1,-1), y_true)
print(float(loss))
loss_val += loss.data.item()
loss.backward()
W1.data -= learning_rate * W1.grad.data
W2.data -= learning_rate * W2.grad.data
W1.grad.data.zero_()
W2.grad.data.zero_()
print(W1.shape)
print(W2.shape)
print(f'Loss at epo {epo}: {loss_val/len(idx_pairs)}')
Это напечатает:
0.33185482025146484
torch.Size([4, 3])
torch.Size([3, 4])
Loss at epo 0: 0.041481852531433105
3.302438735961914
torch.Size([4, 3])
torch.Size([3, 4])
Loss at epo 0: 0.45428669452667236
2.3144636154174805
torch.Size([4, 3])
torch.Size([3, 4])
Loss at epo 0: 0.7435946464538574
0.33418864011764526
torch.Size([4, 3])
torch.Size([3, 4])
Loss at epo 0: 0.7853682264685631
1.0644199848175049
torch.Size([4, 3])
torch.Size([3, 4])
Loss at epo 0: 0.9184207245707512
0.4970806837081909
torch.Size([4, 3])
torch.Size([3, 4])
Loss at epo 0: 0.980555810034275
3.2861199378967285
torch.Size([4, 3])
torch.Size([3, 4])
Loss at epo 0: 1.3913208022713661
6.170125961303711
torch.Size([4, 3])
torch.Size([3, 4])
Loss at epo 0: 2.16258654743433
Изменение кода для использования mse_loss измените y_true на float:
y_true = Variable(torch.from_numpy(np.array([target])).float())
Используйте mse_loss:
loss = F.mse_loss(log_softmax.view(1,-1), y_true)
Встроенные обновления:
for epo in range(num_epochs):
loss_val = 0
for data, target in idx_pairs:
x = Variable(get_input_layer(data)).float()
y_true = Variable(torch.from_numpy(np.array([target])).float())
z1 = torch.matmul(W1, x)
z2 = torch.matmul(W2, z1)
log_softmax = F.log_softmax(z2, dim=0)
loss = F.mse_loss(log_softmax.view(1,-1), y_true)
print(float(loss))
loss_val += loss.data.item()
loss.backward()
W1.data -= learning_rate * W1.grad.data
W2.data -= learning_rate * W2.grad.data
W1.grad.data.zero_()
W2.grad.data.zero_()
print(W1.shape)
print(W2.shape)
print(f'Loss at epo {epo}: {loss_val/len(idx_pairs)}')
Теперь вывод:
41.75048828125
torch.Size([4, 3])
torch.Size([3, 4])
Loss at epo 0: 5.21881103515625
16.929386138916016
torch.Size([4, 3])
torch.Size([3, 4])
Loss at epo 0: 7.334984302520752
50.63690948486328
torch.Size([4, 3])
torch.Size([3, 4])
Loss at epo 0: 13.664597988128662
36.21110534667969
torch.Size([4, 3])
torch.Size([3, 4])
Loss at epo 0: 18.190986156463623
5.304859638214111
torch.Size([4, 3])
torch.Size([3, 4])
Loss at epo 0: 18.854093611240387
9.802173614501953
torch.Size([4, 3])
torch.Size([3, 4])
Loss at epo 0: 20.07936531305313
15.515325546264648
torch.Size([4, 3])
torch.Size([3, 4])
Loss at epo 0: 22.018781006336212
30.408292770385742
torch.Size([4, 3])
torch.Size([3, 4])
Loss at epo 0: 25.81981760263443
-c:12: UserWarning: Using a target size (torch.Size([1])) that is different to the input size (torch.Size([1, 3])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.
Почему mse loss не работает так же хорошо, как nll потеря? Связано ли это с предупреждением PyTorch:
Using a target size (torch.Size([1])) that is different to the input size (torch.Size([1, 3])). This will likely lead to incorrect results due to broadcasting. Please ensure they have the same size.
?