Невозможно построить модель A3 C с LSTM для игры в Breakout с помощью PyTorch - PullRequest
0 голосов
/ 16 июня 2020

Я пытаюсь построить модель A3 C с LSTM для выполнения Breakout с помощью PyTorch, но он возвращает серию ошибок.

main.py

from __future__ import print_function
import os
import torch
import torch.multiprocessing as mp
from envs import create_atari_env
from model import ActorCritic
from train import train
from test_script import test
import my_optim

# Gathering all the parameters (that we can modify to explore)
class Params():
    def __init__(self):
        self.lr = 0.0001
        self.gamma = 0.99
        self.tau = 1.
        self.seed = 1
        self.num_processes = 16
        self.num_steps = 20
        self.max_episode_length = 10000
        self.env_name = 'Breakout-v0'

# Main run
os.environ['OMP_NUM_THREADS'] = '1' # 1 thread per core
params = Params() # creating the params object from the Params class, that sets all the model parameters
torch.manual_seed(params.seed) # setting the seed (not essential)
env = create_atari_env(params.env_name) # we create an optimized environment thanks to universe
shared_model = ActorCritic(env.observation_space.shape[0], env.action_space) # shared_model is the model shared by the different agents (different threads in different cores)
shared_model.share_memory() # storing the model in the shared memory of the computer, which allows the threads to have access to this shared memory even if they are in different cores
optimizer = my_optim.SharedAdam(shared_model.parameters(), lr=params.lr) # the optimizer is also shared because it acts on the shared model
optimizer.share_memory() # same, we store the optimizer in the shared memory so that all the agents can have access to this shared memory to optimize the model
processes = [] # initializing the processes with an empty list
p = mp.Process(target=test, args=(params.num_processes, params, shared_model)) # allowing to create the 'test' process with some arguments 'args' passed to the 'test' target function - the 'test' process doesn't update the shared model but uses it on a part of it - torch.multiprocessing.Process runs a function in an independent thread
p.start() # starting the created process p
processes.append(p) # adding the created process p to the list of processes
for rank in range(0, params.num_processes): # making a loop to run all the other processes that will be trained by updating the shared model
    p = mp.Process(target=train, args=(rank, params, shared_model, optimizer))
    p.start()
    processes.append(p)
for p in processes: # creating a pointer that will allow to kill all the threads when at least one of the threads, or main.py will be killed, allowing to stop the program safely
    p.join()

model.py

# AI for Breakout

# Importing the librairies
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

# Initializing and setting the variance of a tensor of weights
def normalized_columns_initializer(weights, std=1.0):
    out = torch.randn(weights.size())
    out *= std / torch.sqrt(out.pow(2).sum(1, keepdim=True))# var(out) == std ^ 2 # thanks to this initialization, we have var(out) = std^2
    return out

# Initializing the weights of the neural network in an optimal way for the learning
def weights_init(m):
    classname = m.__class__.__name__ # python trick that will look for the type of connection in the object "m" (convolution or full connection)
    if classname.find('Conv') != -1: # if the connection is a convolution
        weight_shape = list(m.weight.data.size()) # list containing the shape of the weights in the object "m"
        fan_in = np.prod(weight_shape[1:4]) # dim1 * dim2 * dim3
        fan_out = np.prod(weight_shape[2:4]) * weight_shape[0] # dim0 * dim2 * dim3
        w_bound = np.sqrt(6. / (fan_in + fan_out)) # weight bound
        m.weight.data.uniform_(-w_bound, w_bound) # generating some random weights of order inversely proportional to the size of the tensor of weights
        m.bias.data.fill_(0) # initializing all the bias with zeros
    elif classname.find('Linear') != -1: # if the connection is a full connection
        weight_shape = list(m.weight.data.size()) # list containing the shape of the weights in the object "m"
        fan_in = weight_shape[1] # dim1
        fan_out = weight_shape[0] # dim0
        w_bound = np.sqrt(6. / (fan_in + fan_out)) # weight bound
        m.weight.data.uniform_(-w_bound, w_bound) # generating some random weights of order inversely proportional to the size of the tensor of weights
        m.bias.data.fill_(0) # initializing all the bias with zeros

# Making the A3C brain

class ActorCritic(torch.nn.Module):

    def __init__(self, num_inputs, action_space):
        super(ActorCritic, self).__init__()
        self.conv1 = nn.Conv2d(num_inputs, 32, 3, stride=2, padding=1) # first convolution
        self.conv2 = nn.Conv2d(32, 32, 3, stride=2, padding=1) # second convolution
        self.conv3 = nn.Conv2d(32, 32, 3, stride=2, padding=1) # third convolution
        self.conv4 = nn.Conv2d(32, 32, 3, stride=2, padding=1) # fourth convolution
        self.lstm = nn.LSTMCell(32 * 3 * 3, 256) # making an LSTM (Long Short Term Memory) to learn the temporal properties of the input - we obtain a big encoded vector S of size 256 that encodes an event of the game
        num_outputs = action_space.n # getting the number of possible actions
        self.critic_linear = nn.Linear(256, 1) # full connection of the critic: output = V(S)
        self.actor_linear = nn.Linear(256, num_outputs) # full connection of the actor: output = Q(S,A)
        self.apply(weights_init) # initilizing the weights of the model with random weights
        self.actor_linear.weight.data = normalized_columns_initializer(self.actor_linear.weight.data, 0.01) # setting the standard deviation of the actor tensor of weights to 0.01
        self.actor_linear.bias.data.fill_(0) # initializing the actor bias with zeros
        self.critic_linear.weight.data = normalized_columns_initializer(self.critic_linear.weight.data, 1.0) # setting the standard deviation of the critic tensor of weights to 0.01
        self.critic_linear.bias.data.fill_(0) # initializing the critic bias with zeros
        self.lstm.bias_ih.data.fill_(0) # initializing the lstm bias with zeros
        self.lstm.bias_hh.data.fill_(0) # initializing the lstm bias with zeros
        self.train() # setting the module in "train" mode to activate the dropouts and batchnorms

    def forward(self, inputs):
        inputs, (hx, cx) = inputs # getting separately the input images to the tuple (hidden states, cell states)
        x = F.elu(self.conv1(inputs)) # forward propagating the signal from the input images to the 1st convolutional layer
        x = F.elu(self.conv2(x)) # forward propagating the signal from the 1st convolutional layer to the 2nd convolutional layer
        x = F.elu(self.conv3(x)) # forward propagating the signal from the 2nd convolutional layer to the 3rd convolutional layer
        x = F.elu(self.conv4(x)) # forward propagating the signal from the 3rd convolutional layer to the 4th convolutional layer
        x = x.view(-1, 32 * 3 * 3) # flattening the last convolutional layer into this 1D vector x
        hx, cx = self.lstm(x, (hx, cx)) # the LSTM takes as input x and the old hidden & cell states and ouputs the new hidden & cell states
        x = hx # getting the useful output, which are the hidden states (principle of the LSTM)
        return self.critic_linear(x), self.actor_linear(x), (hx, cx) # returning the output of the critic (V(S)), the output of the actor (Q(S,A)), and the new hidden & cell states ((hx, cx))

Ошибка

[2020-06-16 10:58:11,665] Making new env: Breakout-v0
[2020-06-16 10:58:11,666] Making new env: Breakout-v0
[2020-06-16 10:58:11,676] Making new env: Breakout-v0
[2020-06-16 10:58:12,229] Clearing 2 monitor files from previous run (because force=True was provided)
Process Process-2:
Traceback (most recent call last):
Process Process-6:
Traceback (most recent call last):
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
Process Process-3:
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
Traceback (most recent call last):
Process Process-7:
Process Process-4:
  File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
    model.load_state_dict(state_dict())
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
    model.load_state_dict(state_dict())
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
NameError: name 'state_dict' is not defined
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
    model.load_state_dict(state_dict())
Process Process-8:
NameError: name 'state_dict' is not defined
NameError: name 'state_dict' is not defined
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
Traceback (most recent call last):
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
Process Process-11:
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
Process Process-15:
  File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
    model.load_state_dict(state_dict())
Traceback (most recent call last):
Process Process-12:
Process Process-16:
Process Process-10:
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
    model.load_state_dict(state_dict())
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
Traceback (most recent call last):
Traceback (most recent call last):
  File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
    model.load_state_dict(state_dict())
NameError: name 'state_dict' is not defined
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
[2020-06-16 10:58:12,961] Starting new video recorder writing to /home/asimbhadra/Coding/artificial-intelligence/Breakout/code/test/openaigym.video.0.14740.video000000.mp4
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
    model.load_state_dict(state_dict())
Traceback (most recent call last):
NameError: name 'state_dict' is not defined
NameError: name 'state_dict' is not defined
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
    model.load_state_dict(state_dict())
NameError: name 'state_dict' is not defined
  File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
    model.load_state_dict(state_dict())
NameError: name 'state_dict' is not defined
  File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
    model.load_state_dict(state_dict())
NameError: name 'state_dict' is not defined
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
Process Process-5:
Traceback (most recent call last):
NameError: name 'state_dict' is not defined
  File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
    model.load_state_dict(state_dict())
NameError: name 'state_dict' is not defined
Process Process-9:
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
    model.load_state_dict(state_dict())
Process Process-17:
  File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
    model.load_state_dict(state_dict())
NameError: name 'state_dict' is not defined
Traceback (most recent call last):
NameError: name 'state_dict' is not defined
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
    model.load_state_dict(state_dict())
NameError: name 'state_dict' is not defined
Process Process-14:
Traceback (most recent call last):
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
    model.load_state_dict(state_dict())
NameError: name 'state_dict' is not defined
Process Process-13:
Traceback (most recent call last):
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/train.py", line 27, in train
    model.load_state_dict(state_dict())
NameError: name 'state_dict' is not defined
/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/test_script.py:29: UserWarning: volatile was removed and now has no effect. Use `with torch.no_grad():` instead.
  cx = Variable(torch.zeros(1, 256), volatile=True)
/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/test_script.py:30: UserWarning: volatile was removed and now has no effect. Use `with torch.no_grad():` instead.
  hx = Variable(torch.zeros(1, 256), volatile=True)
/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/test_script.py:34: UserWarning: volatile was removed and now has no effect. Use `with torch.no_grad():` instead.
  value, action_value, (hx, cx) = model((Variable(state.unsqueeze(0), volatile=True), (hx, cx)))
/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/test_script.py:35: UserWarning: Implicit dimension choice for softmax has been deprecated. Change the call to include dim=X as an argument.
  prob = F.softmax(action_value)
Process Process-1:
Traceback (most recent call last):
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/asimbhadra/anaconda3/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/test_script.py", line 37, in test
    state, reward, done, _ = env.step(action[0, 0]) # done = done or episode_length >= params.max_episode_length
IndexError: too many indices for array
[2020-06-16 10:58:16,057] Finished writing results. You can upload them to the scoreboard via gym.upload('/home/asimbhadra/Coding/artificial-intelligence/Breakout/code/test')

Он запускается, но вскоре завершается, не записывая видео в тестовую папку. Этот код взят из курса искусственного интеллекта AZ от Udemy. Кто-нибудь может мне с этим помочь? При необходимости могу предоставить другие python скрипты.

...