Недавно у меня возникла идея запрограммировать игру «Connect 4» (надеюсь, она называется так по-английски sh). Но когда это было закончено, я нашел интересным разработать противника для него на основе Q-Learning. Все видео, которые я нашел, были о Q-Learning с готовой средой. Мой вопрос сейчас заключается в том, как перенести Q-Learning в программу. Я думал, что агент получает +100, если он выигрывает, -100, если он проигрывает, и +20, если он др aws, но также -1 на каждом ходу. Программирование награды не проблема, но у меня есть трудности с формулой Q-Learning и Q-таблицей, как именно я должен войти в текущее состояние et c. Я был бы очень рад о помощи или возможных решениях! Спасибо заранее . Вот код:
from collections import namedtuple
from itertools import cycle
from termcolor import colored
Player = namedtuple("Player", "name number")
def ask_column(field, players, player_index):
while True:
answer = input(
f"{players[player_index].name}, du bist dran."
f" Welche Spalte waehlst du (1-{len(field)+1})?"
)
if answer.isdigit():
selected_column = int(answer) - 1
if 0 <= selected_column < len(field):
return selected_column
print(colored(
"Deine Eingabe war nicht in dem genannten Bereich,"
" versuche es erneut.",
"red"))
else:
print(colored("Die Eingabe war keine Zahl, versuche es erneut.","red"))
def place(field, selected_column, player):
if field[0][selected_column] != 0:
raise ValueError(colored(f"Spalte {selected_column} ist voll! ","red"))
for row in range(1, len(field) + 1):
if field[row - 1][selected_column] == 0:
if row == len(field):
field[row - 1][selected_column] = player
for field_row in field:
print(field_row)
return (selected_column, row - 1)
else:
field[row - 2][selected_column] = player
for field_row in field:
print(field_row)
return (selected_column, row - 2)
assert False
def check_victory(field, latest_occupied_coordinate, players, player_index):
column, row = latest_occupied_coordinate
# Row won?
in_row_counter = 0
for neighbouring_fields_in_row in range(column - 3, column + 4):
if (
not neighbouring_fields_in_row <= -1
and not neighbouring_fields_in_row > len(field)
):
if (
field[row][neighbouring_fields_in_row]
== players[player_index].number
):
in_row_counter += 1
if in_row_counter >= 4:
break
else:
in_row_counter = 0
# Column won?
in_column_counter = 0
for neighbouring_fields_in_column in range(row - 3, row + 4):
if (
not neighbouring_fields_in_column <= -1
and not neighbouring_fields_in_column >= len(field)
):
if (
field[neighbouring_fields_in_column][column]
== players[player_index].number
):
in_column_counter += 1
if in_column_counter >= 4:
break
else:
in_column_counter = 0
# Diagonal won?
in_diagonal_top_to_bottom = 0
for nf_top_to_bottom in range(-3, 4):
if (
not column + nf_top_to_bottom < 0
and not column + nf_top_to_bottom > len(field)
and not row + nf_top_to_bottom < 0
and not row + nf_top_to_bottom >= len(field)
):
if (
field[row + nf_top_to_bottom][column + nf_top_to_bottom]
== players[player_index].number
):
in_diagonal_top_to_bottom += 1
if in_diagonal_top_to_bottom >= 4:
break
else:
in_diagonal_top_to_bottom = 0
in_diagonal_bottom_to_top = 0
for nf_bottom_to_top in range(-3, 4):
if (
not column - nf_bottom_to_top <= -1
and not column - nf_bottom_to_top > len(field)
and not row + nf_bottom_to_top <= -1
and not row + nf_bottom_to_top >= len(field)
):
if (
field[row + nf_bottom_to_top][column - nf_bottom_to_top]
== players[player_index].number
):
in_diagonal_bottom_to_top += 1
if in_diagonal_bottom_to_top >= 4:
break
else:
in_diagonal_bottom_to_top = 0
return (
in_row_counter >= 4
or in_column_counter >= 4
or in_diagonal_top_to_bottom >= 4
or in_diagonal_bottom_to_top >= 4
)
def main():
players = [
Player(input("Spielername Spieler 1:"), 1),
Player(input("Spielername Spieler 2:"), 2),
]
for does_player_2_start in cycle([False, True]):
#
# TODO Use `None` and `Player` objects instead of 0, 1, and 2.
#
field = [
[0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0],
[0, 0, 0, 0, 0, 0, 0],
]
player_indices = cycle([0, 1])
if does_player_2_start:
next(player_indices)
for player_index in player_indices:
while True:
try:
latest_occupied_coordinate = place(
field,
ask_column(field, players, player_index),
players[player_index].number,
)
except ValueError:
print(colored(
"Das ist leider nicht moeglich."
" Probiere eine andere Spalte"
,"red"))
else:
break
if check_victory(
field, latest_occupied_coordinate, players, player_index
):
print(colored("{} hat gewonnen!".format(players[player_index].name),"blue"))
break
if all(all(cell != 0 for cell in column) for column in field):
print(colored("Unentschieden! Keiner hat gewonnen.","blue"))
break
#player_turn += 1
continue_playing = input(
"Moechtet ihr nochmal spielen? 1 = Ja, 2 = Nein"
)
if continue_playing != "1":
break
if __name__ == "__main__":
main()