Оптимальное решение проблемы опрятным не доминирует - PullRequest
0 голосов
/ 12 октября 2018

Я пытаюсь решить проблему ходока openaigym, используя аккуратные

Проблема ниже https://gym.openai.com/envs/BipedalWalker-v2/

Я пытаюсь решить эту проблему, используя аккуратный Python, но даже после 1000 поколений решениене исправленоЧто мне делать?

avg_fitness

avg_fitness

speiation

speiation

Digraph

Digraph

from __future__ import print_function
import os
import neat
import visualize
import gym
import numpy as np
from gym import wrappers

env = gym.make("BipedalWalker-v2")

def eval_genomes(genomes, config):
    global env
    global MAX_STEPS

    for genome_id, genome in genomes:
        genome.fitness = 0
        net = neat.nn.FeedForwardNetwork.create(genome, config)
        observation = env.reset()
        total_reward = 0
        for _ in range(1):
            observatin = env.reset()
            episode_reward = 0
            while True:
                action = net.activate(observation)
                action = np.clip(action,-1,1)
                observation,reward,done,info = env.step(action)
                episode_reward += reward
                if done:
                    total_reward += episode_reward
                    break
        genome.fitness = total_reward / 1

def run(config_file):
    global env
    # Load configuration.
    config = neat.Config(neat.DefaultGenome, neat.DefaultReproduction,
                     neat.DefaultSpeciesSet, neat.DefaultStagnation,
                     config_file)

    # Create the population, which is the top-level object for a NEAT run.
    p = neat.Population(config)
    # Add a stdout reporter to show progress in the terminal.
    p.add_reporter(neat.StdOutReporter(True))
    stats = neat.StatisticsReporter()
    p.add_reporter(stats)
    p.add_reporter(neat.Checkpointer(5))

    # Run for up to 300 generations.

    winner = p.run(eval_genomes, 200)


    # Display the winning genome.
    print('\nBest genome:\n{!s}'.format(winner))

    # Show output of the most
    #fit genome against training data.
    print('\nOutput:')
    #winner_net = neat.nn.FeedForwardNetwork.create(winner, config)
    visualize.draw_net(config, winner, True,)
    visualize.plot_stats(stats, ylog=False, view=True)
    visualize.plot_species(stats, view=True)
    winner_net = neat.nn.FeedForwardNetwork.create(winner, config)



if __name__ == '__main__':
# Determine path to configuration file. This path manipulation is
# here so that the script will run successfully regardless of the
# current working directory.

    local_dir = os.path.dirname(__file__)
    config_path = os.path.join(local_dir, 'config-feedforward')
    run(config_path)

Это гиперпараметр

fitness_criterion = среднее значение

fitness_threshold = 300

pop_size = 100

reset_on_extinction = True

no_fitness_termination = 1

Activation_default = Relu

активации_mutate_rate = 0,3

Activ_options = сигмовидная Tanh

агрегации_дефо = сумма

aggregation_mutate_rate = 0,3

aggregation_options = среднее

bias_init_mean = 0

bias_init_stdev = 1,0

bias_max_value = 100

bias_min_value = -100

bias_mutate_power = 0,5

bias_mutate_rate = 0,7

bias_replace_rate = 0,1

compatibility_disjoint_coefficient = 1,0

Эффективность = 0,5_co1064 *

conn_add_prob = 0,5

conn_delete_prob = 0,5

enabled_default = True

enabled_mutate_rate = 0,01

feed_forward = True

initial_connection = full

node_add_prob = 0,3

node_delete_prob = 0.3

num_hidden = 0

num_inputs = 24

num_outputs = 4

response_init_mean = 2

response_init_stdev= 1

response_max_value = 5.0

response_min_value = -5.0

response_mutate_power = 0.0

response_mutate_rate = 0.0

response_replace_rate = 0.0

weight_init_mean = 0,5

weight_init_stdev = 1,0

weight_max_value = 30

weight_min_value = -30

weight_mutate_power = 0,5

weight_mutate_rate = 0,8

weight_replace_rate = 0,1

совместимость_порог = 3,0

pes_fitness_func = среднее

max_stagnation = 20

видов_элитизм = 3

элитарность = 3

Предел выживания = 0,3

...