Я пытаюсь построить модель A3 C с LSTM для выполнения Breakout с помощью PyTorch, но он возвращает серию ошибок.
main.py
from __future__ import print_function
import os
import torch
import torch.multiprocessing as mp
from envs import create_atari_env
from model import ActorCritic
from train import train
from test_script import test
import my_optim
# Gathering all the parameters (that we can modify to explore)
class Params():
def __init__(self):
self.lr = 0.0001
self.gamma = 0.99
self.tau = 1.
self.seed = 1
self.num_processes = 16
self.num_steps = 20
self.max_episode_length = 10000
self.env_name = 'Breakout-v0'
# Main run
os.environ['OMP_NUM_THREADS'] = '1' # 1 thread per core
params = Params() # creating the params object from the Params class, that sets all the model parameters
torch.manual_seed(params.seed) # setting the seed (not essential)
env = create_atari_env(params.env_name) # we create an optimized environment thanks to universe
shared_model = ActorCritic(env.observation_space.shape[0], env.action_space) # shared_model is the model shared by the different agents (different threads in different cores)
shared_model.share_memory() # storing the model in the shared memory of the computer, which allows the threads to have access to this shared memory even if they are in different cores
optimizer = my_optim.SharedAdam(shared_model.parameters(), lr=params.lr) # the optimizer is also shared because it acts on the shared model
optimizer.share_memory() # same, we store the optimizer in the shared memory so that all the agents can have access to this shared memory to optimize the model
processes = [] # initializing the processes with an empty list
p = mp.Process(target=test, args=(params.num_processes, params, shared_model)) # allowing to create the 'test' process with some arguments 'args' passed to the 'test' target function - the 'test' process doesn't update the shared model but uses it on a part of it - torch.multiprocessing.Process runs a function in an independent thread
p.start() # starting the created process p
processes.append(p) # adding the created process p to the list of processes
for rank in range(0, params.num_processes): # making a loop to run all the other processes that will be trained by updating the shared model
p = mp.Process(target=train, args=(rank, params, shared_model, optimizer))
p.start()
processes.append(p)
for p in processes: # creating a pointer that will allow to kill all the threads when at least one of the threads, or main.py will be killed, allowing to stop the program safely
p.join()
model.py
# AI for Breakout
# Importing the librairies
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
# Initializing and setting the variance of a tensor of weights
def normalized_columns_initializer(weights, std=1.0):
out = torch.randn(weights.size())
out *= std / torch.sqrt(out.pow(2).sum(1, keepdim=True))# var(out) == std ^ 2 # thanks to this initialization, we have var(out) = std^2
return out
# Initializing the weights of the neural network in an optimal way for the learning
def weights_init(m):
classname = m.__class__.__name__ # python trick that will look for the type of connection in the object "m" (convolution or full connection)
if classname.find('Conv') != -1: # if the connection is a convolution
weight_shape = list(m.weight.data.size()) # list containing the shape of the weights in the object "m"
fan_in = np.prod(weight_shape[1:4]) # dim1 * dim2 * dim3
fan_out = np.prod(weight_shape[2:4]) * weight_shape[0] # dim0 * dim2 * dim3
w_bound = np.sqrt(6. / (fan_in + fan_out)) # weight bound
m.weight.data.uniform_(-w_bound, w_bound) # generating some random weights of order inversely proportional to the size of the tensor of weights
m.bias.data.fill_(0) # initializing all the bias with zeros
elif classname.find('Linear') != -1: # if the connection is a full connection
weight_shape = list(m.weight.data.size()) # list containing the shape of the weights in the object "m"
fan_in = weight_shape[1] # dim1
fan_out = weight_shape[0] # dim0
w_bound = np.sqrt(6. / (fan_in + fan_out)) # weight bound
m.weight.data.uniform_(-w_bound, w_bound) # generating some random weights of order inversely proportional to the size of the tensor of weights
m.bias.data.fill_(0) # initializing all the bias with zeros
# Making the A3C brain
class ActorCritic(torch.nn.Module):
def __init__(self, num_inputs, action_space):
super(ActorCritic, self).__init__()
self.conv1 = nn.Conv2d(num_inputs, 32, 3, stride=2, padding=1) # first convolution
self.conv2 = nn.Conv2d(32, 32, 3, stride=2, padding=1) # second convolution
self.conv3 = nn.Conv2d(32, 32, 3, stride=2, padding=1) # third convolution
self.conv4 = nn.Conv2d(32, 32, 3, stride=2, padding=1) # fourth convolution
self.lstm = nn.LSTMCell(32 * 3 * 3, 256) # making an LSTM (Long Short Term Memory) to learn the temporal properties of the input - we obtain a big encoded vector S of size 256 that encodes an event of the game
num_outputs = action_space.n # getting the number of possible actions
self.critic_linear = nn.Linear(256, 1) # full connection of the critic: output = V(S)
self.actor_linear = nn.Linear(256, num_outputs) # full connection of the actor: output = Q(S,A)
self.apply(weights_init) # initilizing the weights of the model with random weights
self.actor_linear.weight.data = normalized_columns_initializer(self.actor_linear.weight.data, 0.01) # setting the standard deviation of the actor tensor of weights to 0.01
self.actor_linear.bias.data.fill_(0) # initializing the actor bias with zeros
self.critic_linear.weight.data = normalized_columns_initializer(self.critic_linear.weight.data, 1.0) # setting the standard deviation of the critic tensor of weights to 0.01
self.critic_linear.bias.data.fill_(0) # initializing the critic bias with zeros
self.lstm.bias_ih.data.fill_(0) # initializing the lstm bias with zeros
self.lstm.bias_hh.data.fill_(0) # initializing the lstm bias with zeros
self.train() # setting the module in "train" mode to activate the dropouts and batchnorms
def forward(self, inputs):
inputs, (hx, cx) = inputs # getting separately the input images to the tuple (hidden states, cell states)
x = F.elu(self.conv1(inputs)) # forward propagating the signal from the input images to the 1st convolutional layer
x = F.elu(self.conv2(x)) # forward propagating the signal from the 1st convolutional layer to the 2nd convolutional layer
x = F.elu(self.conv3(x)) # forward propagating the signal from the 2nd convolutional layer to the 3rd convolutional layer
x = F.elu(self.conv4(x)) # forward propagating the signal from the 3rd convolutional layer to the 4th convolutional layer
x = x.view(-1, 32 * 3 * 3) # flattening the last convolutional layer into this 1D vector x
hx, cx = self.lstm(x, (hx, cx)) # the LSTM takes as input x and the old hidden & cell states and ouputs the new hidden & cell states
x = hx # getting the useful output, which are the hidden states (principle of the LSTM)
return self.critic_linear(x), self.actor_linear(x), (hx, cx) # returning the output of the critic (V(S)), the output of the actor (Q(S,A)), and the new hidden & cell states ((hx, cx))
Ошибка
[2020-06-16 10:58:11,665] Making new env: Breakout-v0
[2020-06-16 10:58:11,666] Making new env: Breakout-v0
[2020-06-16 10:58:11,676] Making new env: Breakout-v0
[2020-06-16 10:58:12,229] Clearing 2 monitor files from previous run (because force=True was provided)
Process Process-2:
Traceback (most recent call last):
Process Process-6:
Traceback (most recent call last):
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
Process Process-3:
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
Traceback (most recent call last):
Process Process-7:
Process Process-4:
File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
model.load_state_dict(state_dict())
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
Traceback (most recent call last):
Traceback (most recent call last):
File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
model.load_state_dict(state_dict())
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
NameError: name 'state_dict' is not defined
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
model.load_state_dict(state_dict())
Process Process-8:
NameError: name 'state_dict' is not defined
NameError: name 'state_dict' is not defined
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
Traceback (most recent call last):
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
Process Process-11:
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
Process Process-15:
File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
model.load_state_dict(state_dict())
Traceback (most recent call last):
Process Process-12:
Process Process-16:
Process Process-10:
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
model.load_state_dict(state_dict())
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
Traceback (most recent call last):
Traceback (most recent call last):
File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
model.load_state_dict(state_dict())
NameError: name 'state_dict' is not defined
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
[2020-06-16 10:58:12,961] Starting new video recorder writing to /home/asimbhadra/Coding/artificial-intelligence/Breakout/code/test/openaigym.video.0.14740.video000000.mp4
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
Traceback (most recent call last):
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
model.load_state_dict(state_dict())
Traceback (most recent call last):
NameError: name 'state_dict' is not defined
NameError: name 'state_dict' is not defined
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
model.load_state_dict(state_dict())
NameError: name 'state_dict' is not defined
File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
model.load_state_dict(state_dict())
NameError: name 'state_dict' is not defined
File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
model.load_state_dict(state_dict())
NameError: name 'state_dict' is not defined
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
Process Process-5:
Traceback (most recent call last):
NameError: name 'state_dict' is not defined
File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
model.load_state_dict(state_dict())
NameError: name 'state_dict' is not defined
Process Process-9:
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
Traceback (most recent call last):
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
model.load_state_dict(state_dict())
Process Process-17:
File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
model.load_state_dict(state_dict())
NameError: name 'state_dict' is not defined
Traceback (most recent call last):
NameError: name 'state_dict' is not defined
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
model.load_state_dict(state_dict())
NameError: name 'state_dict' is not defined
Process Process-14:
Traceback (most recent call last):
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
model.load_state_dict(state_dict())
NameError: name 'state_dict' is not defined
Process Process-13:
Traceback (most recent call last):
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
model.load_state_dict(state_dict())
NameError: name 'state_dict' is not defined
/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/test_script.py:29: UserWarning: volatile was removed and now has no effect. Use `with torch.no_grad():` instead.
cx = Variable(torch.zeros(1, 256), volatile=True)
/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/test_script.py:30: UserWarning: volatile was removed and now has no effect. Use `with torch.no_grad():` instead.
hx = Variable(torch.zeros(1, 256), volatile=True)
/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/test_script.py:34: UserWarning: volatile was removed and now has no effect. Use `with torch.no_grad():` instead.
value, action_value, (hx, cx) = model((Variable(state.unsqueeze(0), volatile=True), (hx, cx)))
/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/test_script.py:35: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
prob = F.softmax(action_value)
Process Process-1:
Traceback (most recent call last):
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
self.run()
File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
self._target(*self._args, **self._kwargs)
File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/test_script.py", line 37, in test
state, reward, done, _ = env.step(action[0, 0]) # done = done or episode_length >= params.max_episode_length
IndexError: too many indices for array
[2020-06-16 10:58:16,057] Finished writing results. You can upload them to the scoreboard via gym.upload('/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/test')
Он запускается, но вскоре завершается, не записывая видео в тестовую папку. Этот код взят из курса искусственного интеллекта AZ от Udemy. Кто-нибудь может мне с этим помочь? При необходимости могу предоставить другие python скрипты.