Я попробовал два кода версии для простой задачи классификации.
Версия Tensorflow
import tensorflow as tf
from tensorflow.keras
import datasets, layers, optimizers, Sequential
import time
(xs, ys),(xs_, ys_) = datasets.mnist.load_data()
xs = tf.convert_to_tensor(xs, dtype=tf.float32)/255.
db = tf.data.Dataset.from_tensor_slices((xs,ys))
db = db.batch(30000)
network = Sequential([layers.Dense(8, activation='relu'),
layers.Dense(8, activation='relu'),
layers.Dense(10)])
network.build(input_shape=(None, 28*28))
optimizer = optimizers.SGD(lr=0.1)
tic=time.time()
for epoch in range(5):
for step, (x,y) in enumerate(db):
with tf.GradientTape() as tape:
# [b, 28, 28] => [b, 784]
x = tf.reshape(x, (-1, 28*28))
# [b, 784] => [b, 10]
# print(tf.reduce_max(x),tf.reduce_min(x))
out = network(x)
# [b] => [b, 10]
y_onehot = tf.one_hot(y, depth=10)
# [b, 10]
# print(out,y_onehot)
loss=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
logits=out, labels=y_onehot))
grads = tape.gradient(loss, network.trainable_variables)
optimizer.apply_gradients(zip(grads, network.trainable_variables))
print('epoch=',epoch,'loss=',loss.numpy())
toc=time.time()
print('elapsed time:',toc-tic)
# test
xs_=tf.convert_to_tensor(xs_,dtype=tf.float32)/255.
xs_=tf.reshape(xs_,[-1,28*28])
out = network(xs_)
pred=tf.argmax(out,axis=1)
correct=tf.reduce_sum((pred==ys_).numpy().astype('int'))
print(correct.numpy())
import tensorflow as tf
from tensorflow.keras import datasets, layers, optimizers, Sequential
import time
(xs, ys),(xs_, ys_) = datasets.mnist.load_data()
xs = tf.convert_to_tensor(xs, dtype=tf.float32)/255.
db = tf.data.Dataset.from_tensor_slices((xs,ys))
db = db.batch(30000)
network = Sequential([layers.Dense(8, activation='relu'),
layers.Dense(8, activation='relu'),
layers.Dense(10)])
network.build(input_shape=(None, 28*28))
optimizer = optimizers.SGD(lr=0.1)
tic=time.time()
for epoch in range(5):
for step, (x,y) in enumerate(db):
with tf.GradientTape() as tape:
# [b, 28, 28] => [b, 784]
x = tf.reshape(x, (-1, 28*28))
# [b, 784] => [b, 10]
# print(tf.reduce_max(x),tf.reduce_min(x))
out = network(x)
# [b] => [b, 10]
y_onehot = tf.one_hot(y, depth=10)
# [b, 10]
# print(out,y_onehot)
loss=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
logits=out, labels=y_onehot))
grads = tape.gradient(loss, network.trainable_variables)
optimizer.apply_gradients(zip(grads, network.trainable_variables))
print('epoch=',epoch,'loss=',loss.numpy())
toc=time.time()
print('elapsed time:',toc-tic)
# test
xs_=tf.convert_to_tensor(xs_,dtype=tf.float32)/255.
xs_=tf.reshape(xs_,[-1,28*28])
out = network(xs_)
pred=tf.argmax(out,axis=1)
correct=tf.reduce_sum((pred==ys_).numpy().astype('int'))
print(correct.numpy())
Версия Pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy
import time
print(torch.cuda.is_available())
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
input_size = 28*28 # images are 28x28 pixels
output_size = 10 # there are 10 classes
train_loader = torch.utils.data.DataLoader(
datasets.MNIST('~/data', train=True, download=True,
transform=transforms.Compose([
transforms.ToTensor(),
# transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=30000, shuffle=True,num_workers=2,pin_memory=True)
test_loader = torch.utils.data.DataLoader(
datasets.MNIST('~/data', train=False, transform=transforms.Compose([
transforms.ToTensor(),
# transforms.Normalize((0.1307,), (0.3081,))
])),
batch_size=1000, shuffle=True)
class FC2Layer(nn.Module):
def __init__(self, input_size, n_hidden, output_size):
super(FC2Layer, self).__init__()
self.input_size = input_size
self.network = nn.Sequential(
nn.Linear(input_size, n_hidden),
nn.ReLU(),
nn.Linear(n_hidden, n_hidden),
nn.ReLU(),
nn.Linear(n_hidden, output_size),
nn.LogSoftmax(dim=1)
)
def forward(self, x):
#x = x.view(-1, self.input_size)
return self.network(x)
accuracy_list = []
def train(epoch, model, perm=torch.arange(0, 784).long()):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
# send to device
data, target = data.to(device), target.to(device)
# print(torch.min(data))
# permute pixels
data = data.view(-1, 28 * 28)
#data = data[:, perm]
#data = data.view(-1, 1, 28, 28)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
print('epoch=',epoch,'loss=',loss.cpu().item())
def test(model, perm=torch.arange(0, 784).long()):
model.eval()
test_loss = 0
correct = 0
for data, target in test_loader:
# send to device
data, target = data.to(device), target.to(device)
# permute pixels
data = data.view(-1, 28 * 28)
#data = data[:, perm]
#data = data.view(-1, 1, 28, 28)
output = model(data)
test_loss += F.nll_loss(output, target,
reduction='sum').item() # sum up batch loss
pred = output.data.max(1, keepdim=True)[
1] # get the index of the max log-probability
correct += pred.eq(target.data.view_as(pred)).cpu().sum().item()
test_loss /= len(test_loader.dataset)
accuracy = 100. * correct / len(test_loader.dataset)
accuracy_list.append(accuracy)
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
test_loss, correct, len(test_loader.dataset),
accuracy))
n_hidden = 8 # number of hidden units
model_fnn = FC2Layer(input_size, n_hidden, output_size)
model_fnn.to(device)
optimizer = optim.SGD(model_fnn.parameters(), lr=0.01)
tic=time.time()
for epoch in range(0, 5):
train(epoch, model_fnn)
toc=time.time()
print('elapsed time:',toc-tic)
test(model_fnn)
Два приведенных выше кода используют одну и ту же архитектуру нейронных сетей, запускают 5 эпох.
Учитывается только время обучения.
Результаты следующие.
затраченное время для pytorch 1.5
затраченное время для тензорного потока 2.2
Почему так сильно отличается прошедшее время?
Спасибо!