Я пробовал первую архитектуру нейронной сети и вторую, но, сохраняя константы всех остальных переменных, я получаю лучшие результаты со второй архитектурой. Почему одна и та же архитектура нейронных сетей дает разные результаты? или я ошибаюсь?
Первый:
class DuelingQNetwork(nn.Module):
"""Actor (Policy) Model."""
def __init__(self, state_size, action_size, seed, hidden_advantage=[512, 512], hidden_state_value=[512,512]):
super(DuelingQNetwork, self).__init__()
self.seed = torch.manual_seed(seed)
hidden_layers = [state_size] + hidden_advantage
self.adv_network = nn.Sequential(nn.Linear(hidden_layers[0], hidden_layers[1]), nn.ReLU(),
nn.Linear(hidden_layers[1], hidden_layers[2]), nn.ReLU(),
nn.Linear(hidden_layers[2], action_size))
hidden_layers = [state_size] + hidden_state_value
self.val_network = nn.Sequential(nn.Linear(hidden_layers[0], hidden_layers[1]), nn.ReLU(),
nn.Linear(hidden_layers[1], hidden_layers[2]), nn.ReLU(),
nn.Linear(hidden_layers[2], 1))
def forward(self, state):
"""Build a network that maps state -> action values."""
# Perform a feed-forward pass through the networks
advantage = self.adv_network(state)
value = self.val_network(state)
return advantage.sub_(advantage.mean()).add_(value)
Второй:
class DuelingQNetwork(nn.Module):
def __init__(self, state_size, action_size, seed, hidden_advantage=[512, 512], hidden_state_value=[512,512]):
super(DuelingQNetwork, self).__init__()
self.seed = torch.manual_seed(seed)
hidden_layers = [state_size] + hidden_advantage
advantage_layers = OrderedDict()
for idx, (hl_in, hl_out) in enumerate(zip(hidden_layers[:-1],hidden_layers[1:])):
advantage_layers['adv_fc_'+str(idx)] = nn.Linear(hl_in, hl_out)
advantage_layers['adv_activation_'+str(idx)] = nn.ReLU()
advantage_layers['adv_output'] = nn.Linear(hidden_layers[-1], action_size)
self.network_advantage = nn.Sequential(advantage_layers)
value_layers = OrderedDict()
hidden_layers = [state_size] + hidden_state_value
# Iterate over the parameters to create the value network
for idx, (hl_in, hl_out) in enumerate(zip(hidden_layers[:-1],hidden_layers[1:])):
# Add a linear layer
value_layers['val_fc_'+str(idx)] = nn.Linear(hl_in, hl_out)
# Add an activation function
value_layers['val_activation_'+str(idx)] = nn.ReLU()
# Create the output layer for the value network
value_layers['val_output'] = nn.Linear(hidden_layers[-1], 1)
# Create the value network
self.network_value = nn.Sequential(value_layers)
def forward(self, state):
"""Build a network that maps state -> action values."""
# Perform a feed-forward pass through the networks
advantage = self.network_advantage(state)
value = self.network_value(state)
return advantage.sub_(advantage.mean()).add_(value)