Q-Learning для "Connect 4" - PullRequest
       24

Q-Learning для "Connect 4"

0 голосов
/ 13 марта 2020

Недавно у меня возникла идея запрограммировать игру «Connect 4» (надеюсь, она называется так по-английски sh). Но когда это было закончено, я нашел интересным разработать противника для него на основе Q-Learning. Все видео, которые я нашел, были о Q-Learning с готовой средой. Мой вопрос сейчас заключается в том, как перенести Q-Learning в программу. Я думал, что агент получает +100, если он выигрывает, -100, если он проигрывает, и +20, если он др aws, но также -1 на каждом ходу. Программирование награды не проблема, но у меня есть трудности с формулой Q-Learning и Q-таблицей, как именно я должен войти в текущее состояние et c. Я был бы очень рад о помощи или возможных решениях! Спасибо заранее . Вот код:



from collections import namedtuple
from itertools import cycle
from termcolor import colored

Player = namedtuple("Player", "name number")


def ask_column(field, players, player_index):
    while True:
        answer = input(
            f"{players[player_index].name}, du bist dran."
            f" Welche Spalte waehlst du (1-{len(field)+1})?"
        )
        if answer.isdigit():
            selected_column = int(answer) - 1
            if 0 <= selected_column < len(field):
                return selected_column
            print(colored(
                "Deine Eingabe war nicht in dem genannten Bereich,"
                " versuche es erneut.",
            "red"))
        else:
            print(colored("Die Eingabe war keine Zahl, versuche es erneut.","red"))


def place(field, selected_column, player):
    if field[0][selected_column] != 0:
        raise ValueError(colored(f"Spalte {selected_column} ist voll! ","red"))
    for row in range(1, len(field) + 1):
        if field[row - 1][selected_column] == 0:
            if row == len(field):
                field[row - 1][selected_column] = player
                for field_row in field:
                    print(field_row)
                return (selected_column, row - 1)
        else:
            field[row - 2][selected_column] = player
            for field_row in field:
                print(field_row)
            return (selected_column, row - 2)

    assert False


def check_victory(field, latest_occupied_coordinate, players, player_index):
    column, row = latest_occupied_coordinate

    # Row won?
    in_row_counter = 0
    for neighbouring_fields_in_row in range(column - 3, column + 4):
        if (
                not neighbouring_fields_in_row <= -1
                and not neighbouring_fields_in_row > len(field)
        ):
            if (
                    field[row][neighbouring_fields_in_row]
                    == players[player_index].number
            ):
                in_row_counter += 1
                if in_row_counter >= 4:
                    break
            else:
                in_row_counter = 0

    # Column won?
    in_column_counter = 0
    for neighbouring_fields_in_column in range(row - 3, row + 4):
        if (
                not neighbouring_fields_in_column <= -1
                and not neighbouring_fields_in_column >= len(field)
        ):
            if (
                    field[neighbouring_fields_in_column][column]
                    == players[player_index].number
            ):
                in_column_counter += 1
                if in_column_counter >= 4:
                    break
            else:
                in_column_counter = 0

    # Diagonal won?
    in_diagonal_top_to_bottom = 0
    for nf_top_to_bottom in range(-3, 4):
        if (
                not column + nf_top_to_bottom < 0
                and not column + nf_top_to_bottom > len(field)
                and not row + nf_top_to_bottom < 0
                and not row + nf_top_to_bottom >= len(field)
        ):
            if (
                    field[row + nf_top_to_bottom][column + nf_top_to_bottom]
                    == players[player_index].number
            ):
                in_diagonal_top_to_bottom += 1
                if in_diagonal_top_to_bottom >= 4:
                    break
            else:
                in_diagonal_top_to_bottom = 0

    in_diagonal_bottom_to_top = 0
    for nf_bottom_to_top in range(-3, 4):
        if (
                not column - nf_bottom_to_top <= -1
                and not column - nf_bottom_to_top > len(field)
                and not row + nf_bottom_to_top <= -1
                and not row + nf_bottom_to_top >= len(field)
        ):
            if (
                    field[row + nf_bottom_to_top][column - nf_bottom_to_top]
                    == players[player_index].number
            ):
                in_diagonal_bottom_to_top += 1
                if in_diagonal_bottom_to_top >= 4:
                    break
            else:
                in_diagonal_bottom_to_top = 0

    return (
            in_row_counter >= 4
            or in_column_counter >= 4
            or in_diagonal_top_to_bottom >= 4
            or in_diagonal_bottom_to_top >= 4
    )


def main():
    players = [
        Player(input("Spielername Spieler 1:"), 1),
        Player(input("Spielername Spieler 2:"), 2),
    ]
    for does_player_2_start in cycle([False, True]):
        #
        # TODO Use `None` and `Player` objects instead of 0, 1, and 2.
        #
        field = [
            [0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0],
            [0, 0, 0, 0, 0, 0, 0],
        ]

        player_indices = cycle([0, 1])
        if does_player_2_start:
            next(player_indices)

        for player_index in player_indices:
            while True:
                try:
                    latest_occupied_coordinate = place(
                        field,
                        ask_column(field, players, player_index),
                        players[player_index].number,
                    )
                except ValueError:
                    print(colored(
                        "Das ist leider nicht moeglich."
                        " Probiere eine andere Spalte"
                    ,"red"))
                else:
                    break

            if check_victory(
                    field, latest_occupied_coordinate, players, player_index
            ):
                print(colored("{} hat gewonnen!".format(players[player_index].name),"blue"))
                break

            if all(all(cell != 0 for cell in column) for column in field):
                print(colored("Unentschieden! Keiner hat gewonnen.","blue"))
                break

            #player_turn += 1

        continue_playing = input(
            "Moechtet ihr nochmal spielen? 1 = Ja, 2 = Nein"
        )
        if continue_playing != "1":
            break


if __name__ == "__main__":
    main()
...