Я изучаю ИИ, и теперь я пытаюсь понять и обновить код, который был сделан для более старой версии Pytorch. Я пробовал это для версий 1.0 и 1.5 с похожими результатами. Переменная устарела и при создании тензора следует использовать requires_grad=True
. Также мне сообщили, что вместо torch.Tensor
, который является псевдонимом torch.FloatTensor
, я должен использовать torch.tensor
, который должен автоматически определять тип данных.
Но есть ошибки, которые я не могу для обработки:
return map(lambda x: Variable(torch.cat(x, 0)), samples) #putting the samples in to pytorch variable
, а также при удалении Переменная
return map(lambda x: torch.cat(x, 0), samples)
получает ту же ошибку:
RuntimeError: Тензоры должны иметь одинаковое количество измерений: получили 2 и 1
Код:
# AI for Self Driving Car
# Importing the libraries
import numpy as np
import random
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.autograd as autograd
from torch.autograd import Variable
# Creating the architecture of the Neural Network
class Network(nn.Module):
def __init__(self, input_size, nb_action):
super(Network, self).__init__() #to use all tools from nn.Module
self.input_size = input_size
self.nb_action = nb_action
self.fc1 = nn.Linear(input_size, 30) #creating the full connection between input & hidden layer
self.fc2 = nn.Linear(30, nb_action)
def forward(self, state): #forward propagation
x = F.relu(self.fc1(state))
q_values = self.fc2(x)
return q_values
# Implementing Experience Replay
class ReplayMemory(object):
def __init__(self, capacity):
self.capacity = capacity #maximum number of events in memory
self.memory = []
def push(self, event): #append new event in to the memory up to a maximum memory size
self.memory.append(event)
if len(self.memory) > self.capacity:
del self.memory[0]
def sample(self, batch_size): #take random samples form the memory
# if list=((1,2,3),(4,5,6)) then zip(*list)=((1,4),(2,5),(3,6))
# events = (state,action,reward) we need (state1,state2), (action1,action2), (reward1,reward2)
samples = zip(*random.sample(self.memory, batch_size))
# torch.cat aligns everything as (state, action, reward)
# OLD-> return map(lambda x: Variable(torch.cat(x, 0)), samples) #putting samples in a pytorch variable
return map(lambda x: torch.cat(x, 0), samples) #<-NEW #putting samples in a pytorch variable
# Implementing Deep Q Learning
class Dqn():
def __init__(self, input_size, nb_action, gamma):
self.gamma = gamma
self.reward_window = []
self.model = Network(input_size, nb_action)
self.memory = ReplayMemory(100000) #memory capacity
self.optimizer = optim.Adam(self.model.parameters(), lr = 0.001)
#OLD-> self.last_state = torch.Tensor(input_size).unsqueeze(0)
self.last_state = torch.tensor(input_size, requires_grad=True).unsqueeze(0) #<-NEW
self.last_action = 0
self.last_reward = 0
def select_action(self, state):
with torch.no_grad():
probs = F.softmax(self.model(state), dim=1)*100 # T=100
action = probs.multinomial(num_samples=1)
return action.data[0,0]
# obsolete
# def select_action(self, state):
# probs = F.softmax(self.model(Variable(state, volatile = True))*100) # T=100
# action = probs.multinomial()
# return action.data[0,0]
def learn(self, batch_state, batch_next_state, batch_reward, batch_action):
outputs = self.model(batch_state).gather(1, batch_action.unsqueeze(1)).squeeze(1)
next_outputs = self.model(batch_next_state).detach().max(1)[0]
target = self.gamma*next_outputs + batch_reward
td_loss = F.smooth_l1_loss(outputs, target)
self.optimizer.zero_grad()
td_loss.backward(retain_graph = True)
self.optimizer.step()
def update(self, reward, new_signal):
#OLD-> new_state = torch.Tensor(new_signal).float().unsqueeze(0)
new_state = torch.tensor(new_signal, requires_grad=True, dtype=torch.float).float().unsqueeze(0) #<-NEW
self.memory.push((self.last_state, new_state, torch.LongTensor([int(self.last_action)]), torch.Tensor([self.last_reward])))
action = self.select_action(new_state)
if len(self.memory.memory) > 100:
batch_state, batch_next_state, batch_action, batch_reward = self.memory.sample(100)
self.learn(batch_state, batch_next_state, batch_reward, batch_action)
self.last_action = action
self.last_state = new_state
self.last_reward = reward
self.reward_window.append(reward)
if len(self.reward_window) > 1000:
del self.reward_window[0]
return action
def score(self):
return sum(self.reward_window)/(len(self.reward_window)+1.)
def save(self):
torch.save({'state_dict': self.model.state_dict(),
'optimizer' : self.optimizer.state_dict(),
}, 'last_brain.pth')
def load(self):
if os.path.isfile('last_brain.pth'):
print("=> loading checkpoint... ")
checkpoint = torch.load('last_brain.pth')
self.model.load_state_dict(checkpoint['state_dict'])
self.optimizer.load_state_dict(checkpoint['optimizer'])
print("done !")
else:
print("no checkpoint found...")