我有一个 minimax 函数,在其他时候运行良好。但是当我同时实现 alpha beta 剪枝和哈希表时,它只会在一种情况下丢失。如果我应用其中一种优化或不应用任何优化,那么在所有情况下都可以正常工作。这种失败的情况只有当人工智能排在第二位时才会发生:
我去9、1、3、6。
爱去5、7、2。
X | O | X
---+---+---
4 | O | X
---+---+---
O | 8 | X
艾:哦
功能:
def minimax(depth, alpha, beta):
new_depth = depth + 1
avails = core_func.availables()
if new_depth % 2: # maximizer
if core_func.check_win():
return -1 # previously minimizer
elif not avails:
return 0
best_score = -float('inf')
for c in avails:
i = c - 1
variables.grid[i] = variables.symbol
t = tuple(variables.grid)
if t not in variables.states:
variables.states[t] = minimax(new_depth, alpha, beta)
best_score = max(best_score, variables.states[t])
alpha = max(alpha, best_score)
variables.grid[i] = c
if beta <= alpha: break
else: # minimizer
if core_func.check_win():
return 1 # previously maximizer
elif not avails:
return 0
opponent = variables.valid_symbols - {variables.symbol}
opponent = opponent.pop()
best_score = float('inf')
for c in avails:
i = c - 1
variables.grid[i] = opponent
t = tuple(variables.grid)
if t not in variables.states:
variables.states[t] = minimax(new_depth, alpha, beta)
best_score = min(best_score, variables.states[t])
beta = min(beta, best_score)
variables.grid[i] = c
if beta <= alpha: break
return best_score
.
如果有人需要,我在下面添加了运行游戏的所有代码:
core_func.py
import variables
import turns_func
def update_grid():
print(f' {variables.grid[0]} | {variables.grid[1]} | {variables.grid[2]} \n'
f'---+---+---\n'
f' {variables.grid[3]} | {variables.grid[4]} | {variables.grid[5]} \n'
f'---+---+---\n'
f' {variables.grid[6]} | {variables.grid[7]} | {variables.grid[8]} \n')
def correct_input(message, lst):
i = int(input(f'{message}: '))
if i not in lst:
print(f'input error: input is not valid. valid inputs: {lst}')
return correct_input(message, lst)
return i
def initialization():
option = correct_input(variables.initialization_message, variables.valid_initials)
if option in {1, 4, 5}:
variables.turn = 'H'
else:
variables.turn = 'C'
if option in {2, 4}:
variables.difficulty = 'R'
if option in {3, 5}:
variables.difficulty = 'P'
print('Coordinates of the grid:')
update_grid()
def check_win():
for x, y, z in (0, 1, 2), (3, 4, 5), (6, 7, 8), (0, 3, 6), (1, 4, 7), (2, 5, 8), (0, 4, 8), (2, 4, 6):
if variables.grid[x] == variables.grid[y] == variables.grid[z]:
return True
return False
def availables():
availables = []
for c in variables.grid:
if c not in variables.valid_symbols:
availables.append(c)
return availables
def get_coordinate():
if not variables.difficulty:
return turns_func.human_turn()
elif variables.turn == 'H':
variables.turn = 'C'
return turns_func.human_turn()
else:
print(f'computer ({variables.symbol}):')
variables.turn = 'H'
if variables.difficulty == 'R':
return turns_func.random_turn()
else:
return turns_func.perfect_turn()
def play():
coordinate = get_coordinate()
variables.grid[coordinate - 1] = variables.symbol
update_grid()
variables.moves += 1
if check_win():
print(f'"{variables.symbol}" wins in {variables.moves} moves!')
elif not availables():
print('"Game Ties"')
else:
changed = variables.valid_symbols - {variables.symbol}
variables.symbol = changed.pop()
play()
.
turns_func.py
import variables
import core_func
from random import choice
def human_turn():
return core_func.correct_input(variables.get_coordinate_message + variables.symbol, core_func.availables())
def random_turn():
return choice(core_func.availables())
# alpha - maximizer's max - it should be less than its parent node minimizer's min
# beta - minimizer's min - it should be greater than its parent node maximizer's max
def minimax(depth, alpha, beta):
new_depth = depth + 1
avails = core_func.availables()
if new_depth % 2: # maximizer
if core_func.check_win():
return -1 # previously minimizer
elif not avails:
return 0
best_score = -float('inf')
for c in avails:
i = c - 1
variables.grid[i] = variables.symbol
t = tuple(variables.grid)
if t not in variables.states:
variables.states[t] = minimax(new_depth, alpha, beta)
best_score = max(best_score, variables.states[t])
alpha = max(alpha, best_score)
variables.grid[i] = c
if beta <= alpha: break
else: # minimizer
if core_func.check_win():
return 1 # previously maximizer
elif not avails:
return 0
opponent = variables.valid_symbols - {variables.symbol}
opponent = opponent.pop()
best_score = float('inf')
for c in avails:
i = c - 1
variables.grid[i] = opponent
t = tuple(variables.grid)
if t not in variables.states:
variables.states[t] = minimax(new_depth, alpha, beta)
best_score = min(best_score, variables.states[t])
beta = min(beta, best_score)
variables.grid[i] = c
if beta <= alpha: break
return best_score
def perfect_turn():
max_score = -float('inf')
coordinate = None
for c in variables.grid:
if c in variables.valid_symbols: continue
i = c - 1
variables.grid[i] = variables.symbol
score = minimax(1, -float('inf'), float('inf'))
variables.grid[i] = c
if score > max_score:
max_score = score
coordinate = c
return coordinate
.
变量.py
initialization_message = ('1. Play with Human\n'
'Play with computer:\n'
'For computer going first, Difficulties:\n'
'\t2. Random\n'
'\t3. Perfect\n'
'For computer going second, Difficulties:\n'
'\t4. Random\n'
'\t5. Perfect\n'
'Choose an option. Enter the number')
get_coordinate_message = 'Input a coordinate for '
valid_initials = (1, 2, 3, 4, 5)
grid = [1, 2, 3, 4, 5, 6, 7, 8, 9]
valid_symbols = {'X', 'O'}
symbol = 'X'
difficulty = None
turn = None
moves = 0
states = {}
.
main.py
import core_func
core_func.initialization()
core_func.play()
问题确实在于记忆化(“散列”)和 alpha-beta 剪枝的结合。您使用的键
t
代表网格的状态,但不包括当前有效的 alpha/beta 窗口。当 minimax 返回的值来自 alpha-beta 切割 (a break
) 时,则不能保证它是网格的准确值,只是足够准确以选择递归树上更高的另一个移动,但如果您使用记忆化,您会希望该值基于 complete 评估(没有 break
),就像在另一种情况下(再次遇到该状态时),alpha-beta 窗口可能不同,这需要不提前退出所产生的价值(不break
)。
这就是为什么当你删除两个功能之一时它会起作用:记忆或 alpha-beta 修剪。
要激活这两个功能,您有一些选择。如果由于修剪而导致评估循环过早退出,我建议您不要在
states
字典中存储分数。
这是适合执行此操作的代码:
def minimax(depth, alpha, beta):
new_depth = depth + 1
avails = core_func.availables()
count = 0 # count the number of evaluated variants
if new_depth % 2: # maximizer
if core_func.check_win():
return -1 # previously minimizer
elif not avails:
return 0
best_score = -float('inf')
for c in avails:
i = c - 1
variables.grid[i] = variables.symbol
t = tuple(variables.grid)
score = variables.states[t] if t in variables.states else minimax(new_depth, alpha, beta)
best_score = max(best_score, score)
alpha = max(alpha, best_score)
variables.grid[i] = c
count += 1 # update counter
if beta <= alpha:
break
else: # minimizer
opponent = variables.valid_symbols - {variables.symbol}
opponent = opponent.pop()
if core_func.check_win():
return 1 # previously maximizer
elif not avails:
return 0
best_score = float('inf')
for c in avails:
i = c - 1
variables.grid[i] = opponent
t = tuple(variables.grid)
score = variables.states[t] if t in variables.states else minimax(new_depth, alpha, beta)
best_score = min(best_score, score)
beta = min(beta, best_score)
variables.grid[i] = c
count += 1 # update counter
if beta <= alpha:
break
if count == len(avails): # We did a complete evaluation
# Only then update variables.state
t = tuple(variables.grid)
variables.states[t] = best_score
return best_score