Минимаксный алгоритм дает неверные результаты для tictactoe - PullRequest
1 голос
/ 01 июня 2019

Я практикую свои алгоритмы и хотел реализовать искусственный интеллект на основе минимаксного алгоритма в Python (код ниже):

import itertools

import more_itertools
import numpy as np


class Position:
    win_positions = np.array(([1, 2, 3], [4, 5, 6], [7, 8, 9], [1, 5, 9],
                              [1, 4, 7], [2, 5, 8], [3, 6, 9], [3, 5, 7]))

    def __init__(self, position=None):

        """If a position is given ensure 0 is in the first index, 
        for a dictionary lookup later for
            an empty board otherwise use the position
        """
        self.position = np.array([0]) if position is None else \
            np.array(position) if position[0] == 0 else np.append([0], np.array(
                position))

        self.turn = itertools.cycle(
            ['x', 'o']) if self.position.size % 2 else itertools.cycle(
            ['o', 'x'])
        self.p = more_itertools.peekable(self.turn)

    def add(self, value):
        next(self.turn)
        self.position = np.append(self.position, value)
        return self.win()

    def win(self):
        x = np.array(self.position[1::2])
        o = np.array(self.position[2::2])

        """
        takes the difference of a players moves and the winning move sets,
        an array resulting from this with a size of 0 indicates the player 
        contains
        a winning arrangement in their moves
        """

        if any([False if np.setdiff1d(position, x).size else True for position
                in Position.win_positions]):
            return 10
        elif any([False if np.setdiff1d(position, o).size else True for position
                  in Position.win_positions]):
            return -10
        elif self.valid_moves().size == 0:
            return 0
        else:
            return None

    def whose_turn(self, player=None):
        return self.p.peek() if player is None else self.p.peek() == player

    def valid_moves(self):
        return np.setdiff1d(np.arange(1, 10), self.position)

    def clone(self, next_pos=None):
        """returns a new position object with one of two options, either a copy,
            of the current position or a copy with a move added"""
        return Position(
            np.append(self.position, next_pos)) if next_pos else Position(
            np.copy(self.position))

    def mini_max(self, depth=0):

        best_move = -1
        """return the win value of the board position
            draws are 0, 'o' wins are -10 and 'x' wins are
            10. A board with valid moves available returns none for 
            this call """
        if self.win() is not None:
            return self.win(), None

        minimax = float('-inf') if self.whose_turn('x') else float('inf')
        for move in self.valid_moves():

            val = self.clone(move).mini_max()[0]
            if minimax < val and self.whose_turn(
                    'x') or minimax > val and self.whose_turn('o'):
                minimax = val
                best_move = move

        return minimax + (-1 if self.whose_turn('x') else 1), best_move

    def print_board(self):
        blank = np.chararray(shape=(9,), unicode=True)
        for i in range(1, self.position.size):
            blank[self.position[i] - 1] = 'X' if i % 2 else 'O'
        string = ("{:^3s}█{:^3s}█{:^3s}\n"
                  "▄▄▄█▄▄▄█▄▄▄\n"
                  "{:^3s}█{:^3s}█{:^3s}\n"
                  "▀▀▀█▀▀▀█▀▀▀\n"
                  "{:^3s}█{:^3s}█{:^3s}\n".format(*blank))
        print(string)

    def __str__(self):
        return "".join(map(str, self.position))


if __name__ == '__main__':
    # test winning conditions
    assert Position([1, 4, 2, 5, 3]).win() == 10
    assert Position([1, 4, 2, 5, 7, 6]).win() == -10
    assert Position([1, 4, 2, 5, 7, 8, 6, 5, 9, 3]).win() == 0

    # testing turns
    assert Position([1, 2, 3]).whose_turn() == 'o'
    assert Position([1, 5, 6, 4]).whose_turn() == 'x'
    assert Position([5, 6, 3, 2]).whose_turn('x')
    assert Position([5, 6, 3, 2, 1]).whose_turn('o')

    # valid moves
    assert all(
        Position([1, 2, 3, 4, 5, 6]).valid_moves() == np.array([7, 8, 9]))
    assert all(
        Position([9, 5, 3, 8]).valid_moves() == np.array([1, 2, 4, 6, 7]))

    # test minimax correct moves
    assert Position([1, 4, 2, 5]).mini_max()[1] == 3
    assert Position([1, 3, 4, 5]).mini_max()[1] == 7
    assert Position([1, 2, 5]).mini_max()[1] == 9  # error here

Я пытался переписать свой код несколько раз, номои тесты продолжают проваливаться на оптимальных ходах.О игрок, кажется, игнорирует выигрышную позицию х игрока.Я в растерянности.Я проверил это по псевдокоду в википедии, но, похоже, что-то явно не так

...