Мой партнер по проекту и я в настоящее время сталкиваемся с проблемой в нашем последнем университетском проекте.Наша миссия - внедрить нейронную сеть, в которую играет игра Pong.Мы задаем положение мяча, скорость мяча и положение лопастей в нашей сети, и имеем три выхода: UP DOWN DO_NOTHING.После того, как игрок набрал 11 очков, мы тренируем сеть со всеми государствами, принятыми решениями и вознаграждением принятых решений (см. Reward_cal ()).Проблема, с которой мы сталкиваемся, заключается в том, что потери постоянно находятся на определенном уровне, только в зависимости от скорости обучения.Из-за этого сеть всегда принимает одно и то же решение, даже если мы вознаграждаем его за ужасно неправильное.
Пожалуйста, помогите нам выяснить, что мы сделали не так, мы благодарны за каждый совет!Ниже приведен наш код, пожалуйста, не стесняйтесь спрашивать, если есть какие-либо вопросы.Мы довольно плохо знакомы с этой темой, поэтому, пожалуйста, не будьте грубым, если есть что-то совершенно глупое: D
это наш код:
import sys, pygame, time
import numpy as np
import random
from os.path import isfile
import keras
from keras.optimizers import SGD
from keras.layers import Dense
from keras.layers.core import Flatten
pygame.init()
pygame.mixer.init()
#surface of the game
width = 400
height = 600
black = 0, 0, 0 #RGB value
screen = pygame.display.set_mode((width, height), 0, 32)
#(Resolution(x,y), flags, colour depth)
font = pygame.font.SysFont('arial', 36, bold=True)
pygame.display.set_caption('PyPong') #title of window
#consts for the game
acceleration = 0.0025 # ball becomes faster during the game
mousematch = 1
delay_time = 0
paddleP = pygame.image.load("schlaeger.gif")
playerRect = paddleP.get_rect(center = (200, 550))
paddleC = pygame.image.load("schlaeger.gif")
comRect = paddleC.get_rect(center=(200,50))
ball = pygame.image.load("ball.gif")
ballRect = ball.get_rect(center=(200,300))
#Variables for the game
pointsPlayer = [0]
pointsCom = [0]
playermove = [0, 0]
speedbar = [0, 0]
speed = [6, 6]
hitX = 0
#neural const
learning_rate = 0.01
number_of_actions = 3
filehandler = open('logfile.log', 'a')
filename = sys.argv[1]
#neural variables
states, action_prob_grads, rewards, action_probs = [], [], [], []
reward_sum = 0
episode_number = 0
reward_sums = []
pygame.display.flip()
def pointcontrol(): #having a look at the points in the game and restart()
if pointsPlayer[0] >= 11:
print('Player Won ', pointsPlayer[0], '/', pointsCom[0])
restart(1)
return 1
if pointsCom[0] >= 11:
print('Computer Won ', pointsPlayer[0], '/', pointsCom[0])
restart(1)
return 1
elif pointsCom[0] < 11 and pointsPlayer[0] < 11:
restart(0)
return 0
def restart(finished): #resetting the positions and the ball speed and
(if point limit was reached) the points
ballRect.center = 200,300
comRect.center = 200,50
playerRect.center = 200, 550
speed[0] = 6
speed[1] = 6
screen.blit(paddleC, comRect)
screen.blit(paddleP, playerRect)
pygame.display.flip()
if finished:
pointsPlayer[0] = 0
pointsCom[0] = 0
def reward_cal(r, gamma = 0.99): #rewarding every move
discounted_r = np.zeros_like(r) #making zero array with size of
reward array
running_add = 0
for t in range(r.size - 1, 0, -1): #iterating beginning in the end
if r[t] != 0: #if reward -1 or 1 (point made or lost)
running_add = 0
running_add = running_add * gamma + r[t] #making every move
before the point the same reward but a little bit smaller
discounted_r[t] = running_add #putting the value in the new
reward array
#e.g r = 000001000-1 -> discounted_r = 0.5 0.6 0.7 0.8 0.9 1 -0.7
-0.8 -0.9 -1 values are not really correct just to make it clear
return discounted_r
#neural net
model = keras.models.Sequential()
model.add(Dense(16, input_dim = (8), kernel_initializer =
'glorot_normal', activation = 'relu'))
model.add(Dense(32, kernel_initializer = 'glorot_normal', activation =
'relu'))
model.add(Dense(number_of_actions, activation='softmax'))
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam')
model.summary()
if isfile(filename):
model.load_weights(filename)
# one ball movement before the AI gets to make a decision
ballRect = ballRect.move(speed)
reward_temp = 0.0
if ballRect.left < 0 or ballRect.right > width:
speed[0] = -speed[0]
if ballRect.top < 0:
pointsPlayer[0] += 1
reward_temp = 1.0
done = pointcontrol()
if ballRect.bottom > height:
pointsCom[0] += 1
done = pointcontrol()
reward_temp = -1.0
if ballRect.colliderect(playerRect):
speed[1] = -speed[1]
if ballRect.colliderect(comRect):
speed[1] = -speed[1]
if speed[0] < 0:
speed[0] -= acceleration
if speed[0] > 0:
speed[0] += acceleration
if speed[1] < 0:
speed[1] -= acceleration
if speed[1] > 0 :
speed[1] += acceleration
while True: #game
for event in pygame.event.get():
if event.type == pygame.QUIT:
pygame.quit()
sys.exit()
state = np.array([ballRect.center[0], ballRect.center[1], speed[0],
speed[1], playerRect.center[0], playerRect.center[1], comRect.center[0],
comRect.center[1]])
states.append(state)
action_prob = model.predict_on_batch(state.reshape(1, 8))[0, :]
action_probs.append(action_prob)
action = np.random.choice(number_of_actions, p=action_prob)
if(action == 0): playermove = [0, 0]
elif(action == 1): playermove = [5, 0]
elif(action == 2): playermove = [-5, 0]
playerRect = playerRect.move(playermove)
y = np.array([-1, -1, -1])
y[action] = 1
action_prob_grads.append(y-action_prob)
#enemy move
comRect = comRect.move(speedbar)
ballY = ballRect.left+5
comRectY = comRect.left+30
if comRect.top <= (height/1.5):
if comRectY - ballY > 0:
speedbar[0] = -7
elif comRectY - ballY < 0:
speedbar[0] = 7
if comRect.top > (height/1.5):
speedbar[0] = 0
if(mousematch == 1):
done = 0
reward_temp = 0.0
ballRect = ballRect.move(speed)
if ballRect.left < 0 or ballRect.right > width:
speed[0] = -speed[0]
if ballRect.top < 0:
pointsPlayer[0] += 1
done = pointcontrol()
reward_temp = 1.0
if ballRect.bottom > height:
pointsCom[0] += 1
done = pointcontrol()
reward_temp = -1.0
if ballRect.colliderect(playerRect):
speed[1] = -speed[1]
if ballRect.colliderect(comRect):
speed[1] = -speed[1]
if speed[0] < 0:
speed[0] -= acceleration
if speed[0] > 0:
speed[0] += acceleration
if speed[1] < 0:
speed[1] -= acceleration
if speed[1] > 0 :
speed[1] += acceleration
rewards.append(reward_temp)
if (done):
episode_number += 1
reward_sums.append(np.sum(rewards))
if len(reward_sums) > 40:
reward_sums.pop(0)
s = 'Episode %d Total Episode Reward: %f , Mean %f' % (
episode_number, np.sum(rewards), np.mean(reward_sums))
print(s)
filehandler.write(s + '\n')
filehandler.flush()
# Propagate the rewards back to actions where no reward
was given.
# Rewards for earlier actions are attenuated
rewards = np.vstack(rewards)
action_prob_grads = np.vstack(action_prob_grads)
rewards = reward_cal(rewards)
X = np.vstack(states).reshape(-1, 8)
Y = action_probs + learning_rate * rewards * y
print('loss: ', model.train_on_batch(X, Y))
model.save_weights(filename)
states, action_prob_grads, rewards, action_probs = [], [], [], []
reward_sum = 0
screen.fill(black)
screen.blit(paddleP, playerRect)
screen.blit(ball, ballRect)
screen.blit(paddleC, comRect)
pygame.display.flip()
pygame.time.delay(delay_time)
это наш вывод:
pygame 1.9.4 Hello from the pygame community. https://www.pygame.org/contribute.html Using TensorFlow backend.
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_1 (Dense) (None, 16) 144
_________________________________________________________________
dense_2 (Dense) (None, 32) 544
_________________________________________________________________
dense_3 (Dense) (None, 3) 99
=================================================================
Total params: 787 Trainable params: 787 Non-trainable params: 0
_________________________________________________________________ 2019-02-14 11:18:10.543401: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 AVX512F FMA 2019-02-14 11:18:10.666634: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1432] Found device 0 with properties: name: GeForce GTX 1080 Ti major: 6 minor: 1 memoryClockRate(GHz): 1.6705 pciBusID: 0000:17:00.0 totalMemory:
10.92GiB freeMemory: 10.76GiB 2019-02-14 11:18:10.775144: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1432] Found device 1 with properties: name: GeForce GTX 1080 Ti major: 6 minor: 1 memoryClockRate(GHz): 1.6705 pciBusID: 0000:65:00.0 totalMemory:
10.91GiB freeMemory: 10.73GiB 2019-02-14 11:18:10.776037: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1511] Adding visible gpu devices: 0, 1 2019-02-14 11:18:11.176560: I tensorflow/core/common_runtime/gpu/gpu_device.cc:982] Device interconnect StreamExecutor with strength 1 edge matrix: 2019-02-14 11:18:11.176590: I tensorflow/core/common_runtime/gpu/gpu_device.cc:988] 0 1 2019-02-14 11:18:11.176596: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1001] 0: N Y 2019-02-14 11:18:11.176600: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1001] 1: Y N 2019-02-14 11:18:11.176914: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10403 MB memory) -> physical GPU (device: 0, name: GeForce GTX 1080 Ti, pci bus id: 0000:17:00.0, compute capability: 6.1) 2019-02-14 11:18:11.177216: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1115] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:1 with 10382 MB memory) -> physical GPU (device: 1, name: GeForce GTX 1080 Ti, pci bus id: 0000:65:00.0, compute capability: 6.1)
Computer Won 0 / 11 Episode 1 Total Episode Reward: -11.000000 , Mean -11.000000
loss: 0.254405
Computer Won 0 / 11 Episode 2 Total Episode Reward: -11.000000 , Mean -11.000000
loss: 0.254304
Computer Won 0 / 11 Episode 3 Total Episode Reward: -11.000000 , Mean -11.000000
loss: 0.254304
Computer Won 0 / 11 Episode 4 Total Episode Reward: -11.000000 , Mean -11.000000
loss: 0.254304
Computer Won 0 / 11 Episode 5 Total Episode Reward: -11.000000 , Mean -11.000000
loss: 0.254304
Computer Won 0 / 11 Episode 6 Total Episode Reward: -11.000000 , Mean -11.000000
loss: 0.254304