minimax 仅在一种井字游戏中失败

问题描述 投票:0回答:1

我有一个 minimax 函数,在其他时候运行良好。但是当我同时实现 alpha beta 剪枝和哈希表时,它只会在一种情况下丢失。如果我应用其中一种优化或不应用任何优化,那么在所有情况下都可以正常工作。这种失败的情况只有当人工智能排在第二位时才会发生:

我去9、1、3、6。

爱去5、7、2。

 X | O | X 
---+---+---
 4 | O | X 
---+---+---
 O | 8 | X

艾:哦

功能:


def minimax(depth, alpha, beta):
    new_depth = depth + 1
    avails = core_func.availables()
    if new_depth % 2:  # maximizer
        if core_func.check_win():
            return -1  # previously minimizer
        elif not avails:
            return 0
        best_score = -float('inf')
        for c in avails:
            i = c - 1
            variables.grid[i] = variables.symbol
            t = tuple(variables.grid)
            if t not in variables.states:
                variables.states[t] = minimax(new_depth, alpha, beta)
            best_score = max(best_score, variables.states[t])
            alpha = max(alpha, best_score)
            variables.grid[i] = c
            if beta <= alpha: break
    else:  # minimizer
        if core_func.check_win():
            return 1  # previously maximizer
        elif not avails:
            return 0
        opponent = variables.valid_symbols - {variables.symbol}
        opponent = opponent.pop()
        best_score = float('inf')
        for c in avails:
            i = c - 1
            variables.grid[i] = opponent
            t = tuple(variables.grid)
            if t not in variables.states:
                variables.states[t] = minimax(new_depth, alpha, beta)
            best_score = min(best_score, variables.states[t])
            beta = min(beta, best_score)
            variables.grid[i] = c
            if beta <= alpha: break
    return best_score

.

如果有人需要,我在下面添加了运行游戏的所有代码:

core_func.py

import variables
import turns_func


def update_grid():
    print(f' {variables.grid[0]} | {variables.grid[1]} | {variables.grid[2]} \n'
          f'---+---+---\n'
          f' {variables.grid[3]} | {variables.grid[4]} | {variables.grid[5]} \n'
          f'---+---+---\n'
          f' {variables.grid[6]} | {variables.grid[7]} | {variables.grid[8]} \n')


def correct_input(message, lst):
    i = int(input(f'{message}: '))
    if i not in lst:
        print(f'input error: input is not valid. valid inputs: {lst}')
        return correct_input(message, lst)
    return i


def initialization():
    option = correct_input(variables.initialization_message, variables.valid_initials)
    if option in {1, 4, 5}:
        variables.turn = 'H'
    else:
        variables.turn = 'C'
    if option in {2, 4}:
        variables.difficulty = 'R'
    if option in {3, 5}:
        variables.difficulty = 'P'
    print('Coordinates of the grid:')
    update_grid()


def check_win():
    for x, y, z in (0, 1, 2), (3, 4, 5), (6, 7, 8), (0, 3, 6), (1, 4, 7), (2, 5, 8), (0, 4, 8), (2, 4, 6):
        if variables.grid[x] == variables.grid[y] == variables.grid[z]:
            return True
    return False


def availables():
    availables = []
    for c in variables.grid:
        if c not in variables.valid_symbols:
            availables.append(c)
    return availables


def get_coordinate():
    if not variables.difficulty:
        return turns_func.human_turn()
    elif variables.turn == 'H':
        variables.turn = 'C'
        return turns_func.human_turn()
    else:
        print(f'computer ({variables.symbol}):')
        variables.turn = 'H'
        if variables.difficulty == 'R':
            return turns_func.random_turn()
        else:
            return turns_func.perfect_turn()


def play():
    coordinate = get_coordinate()
    variables.grid[coordinate - 1] = variables.symbol
    update_grid()
    variables.moves += 1
    if check_win():
        print(f'"{variables.symbol}" wins in {variables.moves} moves!')
    elif not availables():
        print('"Game Ties"')
    else:
        changed = variables.valid_symbols - {variables.symbol}
        variables.symbol = changed.pop()
        play()

.

turns_func.py

import variables
import core_func
from random import choice


def human_turn():
    return core_func.correct_input(variables.get_coordinate_message + variables.symbol, core_func.availables())


def random_turn():
    return choice(core_func.availables())


# alpha - maximizer's max - it should be less than its parent node minimizer's min
# beta - minimizer's min - it should be greater than its parent node maximizer's max


def minimax(depth, alpha, beta):
    new_depth = depth + 1
    avails = core_func.availables()
    if new_depth % 2:  # maximizer
        if core_func.check_win():
            return -1  # previously minimizer
        elif not avails:
            return 0
        best_score = -float('inf')
        for c in avails:
            i = c - 1
            variables.grid[i] = variables.symbol
            t = tuple(variables.grid)
            if t not in variables.states:
                variables.states[t] = minimax(new_depth, alpha, beta)
            best_score = max(best_score, variables.states[t])
            alpha = max(alpha, best_score)
            variables.grid[i] = c
            if beta <= alpha: break
    else:  # minimizer
        if core_func.check_win():
            return 1  # previously maximizer
        elif not avails:
            return 0
        opponent = variables.valid_symbols - {variables.symbol}
        opponent = opponent.pop()
        best_score = float('inf')
        for c in avails:
            i = c - 1
            variables.grid[i] = opponent
            t = tuple(variables.grid)
            if t not in variables.states:
                variables.states[t] = minimax(new_depth, alpha, beta)
            best_score = min(best_score, variables.states[t])
            beta = min(beta, best_score)
            variables.grid[i] = c
            if beta <= alpha: break
    return best_score


def perfect_turn():
    max_score = -float('inf')
    coordinate = None
    for c in variables.grid:
        if c in variables.valid_symbols: continue
        i = c - 1
        variables.grid[i] = variables.symbol
        score = minimax(1, -float('inf'), float('inf'))
        variables.grid[i] = c
        if score > max_score:
            max_score = score
            coordinate = c
    return coordinate

.

变量.py

initialization_message = ('1. Play with Human\n'
                          'Play with computer:\n'
                          'For computer going first, Difficulties:\n'
                          '\t2. Random\n'
                          '\t3. Perfect\n'
                          'For computer going second, Difficulties:\n'
                          '\t4. Random\n'
                          '\t5. Perfect\n'
                          'Choose an option. Enter the number')

get_coordinate_message = 'Input a coordinate for '

valid_initials = (1, 2, 3, 4, 5)
grid = [1, 2, 3, 4, 5, 6, 7, 8, 9]
valid_symbols = {'X', 'O'}
symbol = 'X'
difficulty = None
turn = None
moves = 0
states = {}

.

main.py

import core_func

core_func.initialization()
core_func.play()
python python-3.x algorithm minimax alpha-beta-pruning
1个回答
0
投票

问题确实在于记忆化(“散列”)和 alpha-beta 剪枝的结合。您使用的键

t
代表网格的状态,但不包括当前有效的 alpha/beta 窗口。当 minimax 返回的值来自 alpha-beta 切割 (a
break
) 时,则不能保证它是网格的准确值,只是足够准确以选择递归树上更高的另一个移动,但如果您使用记忆化,您会希望该值基于 complete 评估(没有
break
),就像在另一种情况下(再次遇到该状态时),alpha-beta 窗口可能不同,这需要不提前退出所产生的价值(不
break
)。

这就是为什么当你删除两个功能之一时它会起作用:记忆或 alpha-beta 修剪。

要激活这两个功能,您有一些选择。如果由于修剪而导致评估循环过早退出,我建议您不要在

states
字典中存储分数。

这是适合执行此操作的代码:

def minimax(depth, alpha, beta):
    new_depth = depth + 1
    avails = core_func.availables()
    count = 0 # count the number of evaluated variants
    if new_depth % 2:  # maximizer
        if core_func.check_win():
            return -1  # previously minimizer
        elif not avails:
            return 0
        best_score = -float('inf')
        for c in avails:
            i = c - 1
            variables.grid[i] = variables.symbol
            t = tuple(variables.grid)
            score = variables.states[t] if t in variables.states else minimax(new_depth, alpha, beta)
            best_score = max(best_score, score)
            alpha = max(alpha, best_score)
            variables.grid[i] = c
            count += 1 # update counter
            if beta <= alpha:
                break
    else:  # minimizer
        opponent = variables.valid_symbols - {variables.symbol}
        opponent = opponent.pop()
        if core_func.check_win():
            return 1  # previously maximizer
        elif not avails:
            return 0
        best_score = float('inf')
        for c in avails:
            i = c - 1
            variables.grid[i] = opponent
            t = tuple(variables.grid)
            score = variables.states[t] if t in variables.states else minimax(new_depth, alpha, beta)
            best_score = min(best_score, score)
            beta = min(beta, best_score)
            variables.grid[i] = c
            count += 1 # update counter
            if beta <= alpha:
                break
    if count == len(avails):  # We did a complete evaluation
        # Only then update variables.state
        t = tuple(variables.grid)
        variables.states[t] = best_score
    return best_score
© www.soinside.com 2019 - 2024. All rights reserved.