Я узнал об искусственном интеллекте (в основном через YouTube и другие онлайн-источники).Я смотрел видео об обучении подкреплению (проблема gridworld).
Приведенный код написан на python, и я не слишком знаком с python.Я решил изменить код, указанный для создания собственных сценариев использования.
Это код:
Learner.py:
import World
import threading
import time
discount = 0.3
actions = World.actions
states = []
Q = {}
for i in range(World.x):
for j in range(World.y):
states.append((i, j))
for state in states:
temp = {}
for action in actions:
temp[action] = 0.1
World.set_cell_score(state, action, temp[action])
Q[state] = temp
for (i, j, c, w) in World.specials:
for action in actions:
Q[(i, j)][action] = w
World.set_cell_score((i, j), action, w)
def do_action(action):
s = World.player
r = -World.score
if action == actions[0]:
World.try_move(0, -1)
elif action == actions[1]:
World.try_move(0, 1)
elif action == actions[2]:
World.try_move(-1, 0)
elif action == actions[3]:
World.try_move(1, 0)
else:
return
s2 = World.player
r += World.score
return s, action, r, s2
def max_Q(s):
val = None
act = None
for a, q in Q[s].items():
if val is None or (q > val):
val = q
act = a
return act, val
def inc_Q(s, a, alpha, inc):
Q[s][a] *= 1 - alpha
Q[s][a] += alpha * inc
World.set_cell_score(s, a, Q[s][a])
def run():
global discount
time.sleep(1)
alpha = 1
t = 1
while True:
# Pick the right action
s = World.player
max_act, max_val = max_Q(s)
(s, a, r, s2) = do_action(max_act)
# Update Q
max_act, max_val = max_Q(s2)
inc_Q(s, a, alpha, r + discount * max_val)
# Check if the game has restarted
t += 1.0
if World.has_restarted():
World.restart_game()
time.sleep(0.01)
t = 1.0
# Update the learning rate
alpha = pow(t, -0.1)
# MODIFY THIS SLEEP IF THE GAME IS GOING TOO FAST.
time.sleep(0.05)
t = threading.Thread(target=run)
t.daemon = True
t.start()
World.start_game()
World.py:
from tkinter import *
master = Tk()
triangle_size = 0.1
cell_score_min = -0.2
cell_score_max = 0.2
Width = 50
(x, y) = (16, 16)
actions = ["up", "down", "left", "right"]
board = Canvas(master, width=x*Width, height=y*Width)
player = (13, 0)
score = 100
restart = False
walk_reward = .01
walls = [(0, 0), (0, 1), (0, 2), (0, 3),(0, 4), (0, 5), (0, 6), (0, 7),(0, 8), (0, 9), (0, 10), (0, 11),(0, 12), (0, 13), (0, 14), (0, 15),
(0, 0), (1, 0), (2, 0), (3, 0), (4, 0),(5, 0), (6, 0), (7, 0), (8, 0),(9, 0), (10, 0), (11, 0), (12, 0), (15, 0),
(15, 0), (15, 1), (15, 2), (15, 3),(15, 4), (15, 5), (15, 6), (15, 7),(15, 8), (15, 9), (15, 10), (15, 11),(15, 12), (15, 13), (15, 14), (15, 15),
(0, 15), (1, 15), (2, 15), (3, 15), (4, 15),(5, 15), (6, 15), (7, 15), (8, 15),(9, 15), (10, 15), (11, 15), (12, 15),(13,15),(14,15), (15, 15),
(2, 3), (2, 4), (2, 5), (3, 3), (3, 4),(3, 5), (4, 3), (4, 4), (4, 5),
(7, 3), (7, 4), (7, 5), (8, 3), (8, 4),(8, 5), (9, 3), (9, 4), (9, 5),
(1, 14), (1, 13), (1, 12), (1, 11), (1, 10),(1, 9), (1, 8),
(2, 14), (2, 13), (2, 12), (2, 11), (2, 10),(2, 9), (2, 8),
(3, 10),(3, 9), (3, 8),
(4, 10),(4, 9), (4, 8),
(5, 10),(5, 9), (5, 8),
(6, 10),(6, 9), (6, 8),
(7, 10),(7, 9), (7, 8),
(8, 10),(8, 9), (8, 8),
(9, 10),(9, 9), (9, 8),
(10, 10),(10, 9), (10, 8),
(6, 13), (7, 13), (8, 13),
(6, 12), (7, 12), (8, 12),
]
specials = [(14, 0, "green", -2)]
crum = [(player[0],player[1],"blue",-.05)]
cell_scores = {}
def create_triangle(i, j, action):
if action == actions[0]:
return board.create_polygon((i+0.5-triangle_size)*Width, (j+triangle_size)*Width,
(i+0.5+triangle_size)*Width, (j+triangle_size)*Width,
(i+0.5)*Width, j*Width,
fill="white", width=1)
elif action == actions[1]:
return board.create_polygon((i+0.5-triangle_size)*Width, (j+1-triangle_size)*Width,
(i+0.5+triangle_size)*Width, (j+1-triangle_size)*Width,
(i+0.5)*Width, (j+1)*Width,
fill="white", width=5)
elif action == actions[2]:
return board.create_polygon((i+triangle_size)*Width, (j+0.5-triangle_size)*Width,
(i+triangle_size)*Width, (j+0.5+triangle_size)*Width,
i*Width, (j+0.5)*Width,
fill="white", width=1)
elif action == actions[3]:
return board.create_polygon((i+1-triangle_size)*Width, (j+0.5-triangle_size)*Width,
(i+1-triangle_size)*Width, (j+0.5+triangle_size)*Width,
(i+1)*Width, (j+0.5)*Width,
fill="white", width=1)
def render_grid():
global specials, walls, Width, x, y, player,crum
for i in range(x):
for j in range(y):
board.create_rectangle(i*Width, j*Width, (i+1)*Width, (j+1)*Width, fill="white", width=1)
temp = {}
for action in actions:
temp[action] = create_triangle(i, j, action)
cell_scores[(i,j)] = temp
for (i, j, c, w) in specials:
board.create_rectangle(i*Width, j*Width, (i+1)*Width, (j+1)*Width, fill=c, width=1)
for (i, j) in walls:
board.create_rectangle(i*Width, j*Width, (i+1)*Width, (j+1)*Width, fill="black", width=1)
for (m, h,c,w) in crum:
board.create_rectangle(m*Width, h*Width, (m+1)*Width, ( h+1)*Width, fill=c, width=1)
render_grid()
def set_cell_score(state, action, val):
global cell_score_min, cell_score_max
triangle = cell_scores[state][action]
green_dec = int(min(255, max(0, (val - cell_score_min) * 255.0 / (cell_score_max - cell_score_min))))
green = hex(green_dec)[2:]
red = hex(255-green_dec)[2:]
if len(red) == 1:
red += "0"
if len(green) == 1:
green += "0"
color = "#" + red + green + "00"
board.itemconfigure(triangle, fill=color)
def try_move(dx, dy):
global player, x, y, score, walk_reward, me, restart
if restart == True:
restart_game()
new_x = player[0] + dx
new_y = player[1] + dy
score += walk_reward
if (new_x >= 0) and (new_x < x) and (new_y >= 0) and (new_y < y) and not ((new_x, new_y) in walls):
board.coords(me, new_x*Width+Width*2/10, new_y*Width+Width*2/10, new_x*Width+Width*8/10, new_y*Width+Width*8/10)
player = (new_x, new_y)
for (i, j, c, w) in specials:
if new_x == i and new_y == j:
score -= walk_reward
score += w
if score > 0:
print ("Success! score: ", score)
else:
print ("Fail! score: ", score)
restart = True
return
for (m, h, c, w) in crum:
if new_x == m and new_y == h:
score -= walk_reward
score += w
if score > 0:
print ("Success! score: ", score)
else:
print ("Fail! score: ", score)
restart = True
return
#print "score: ", score
def call_up(event):
try_move(0, -1)
def call_down(event):
try_move(0, 1)
def call_left(event):
try_move(-1, 0)
def call_right(event):
try_move(1, 0)
def restart_game():
global player, score, me, restart
player = (13, 0)
score = 1
restart = False
board.coords(me, player[0]*Width+Width*2/10, player[1]*Width+Width*2/10, player[0]*Width+Width*8/10, player[1]*Width+Width*8/10)
def has_restarted():
return restart
master.bind("<Up>", call_up)
master.bind("<Down>", call_down)
master.bind("<Right>", call_right)
master.bind("<Left>", call_left)
me = board.create_rectangle(player[0]*Width+Width*2/10, player[1]*Width+Width*2/10,
player[0]*Width+Width*8/10, player[1]*Width+Width*8/10, fill="orange", width=1, tag="me")
board.grid(row=0, column=0)
def start_game():
master.mainloop()
Я пытаюсь заставить агента оставить след, преобразовав белые состояния в синие и применяя очень низкую награду, чтобы отговорить их посещать те же самыесостояния.В коде, который я модифицировал, он только преобразует первый квадрат (тот, на котором он начинается) в синий, но ни одно из состояний после этого.Я хотел бы, чтобы он преобразовал пробелы в синий после того, как он покинет штат.
Если кто-то мог бы показать мне, что я делаю неправильно в моем подходе, и дать совет / помощь в выполнении моей задачи оставить следЯ был бы очень признателен.crum
- это переменная, которую я использую для следа.