-
Notifications
You must be signed in to change notification settings - Fork 0
/
alphabeta.py
90 lines (67 loc) · 2.5 KB
/
alphabeta.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/env python
"""
"""
from api import State, util
import random
class Bot:
__max_depth = -1
__randomize = True
def __init__(self, randomize=True, depth=8):
self.__randomize = randomize
self.__max_depth = depth
def get_move(self, state):
val, move = self.value(state)
return move
def value(self, state, alpha=float('-inf'), beta=float('inf'), depth = 0):
"""
Return the value of this state and the associated move
:param State state:
:param float alpha: The highest score that the maximizing player can guarantee given current knowledge
:param float beta: The lowest score that the minimizing player can guarantee given current knowledge
:param int depth: How deep we are in the tree
:return val, move: the value of the state, and the best move.
"""
if state.finished():
winner, points = state.winner()
return (points, None) if winner == 1 else (-points, None)
if depth == self.__max_depth:
return heuristic(state)
best_value = float('-inf') if maximizing(state) else float('inf')
best_move = None
moves = state.moves()
if self.__randomize:
random.shuffle(moves)
for move in moves:
next_state = state.next(move)
value, m= self.value(next_state, depth + 1) ## ADDED
if maximizing(state):
if value > best_value:
best_value = value
best_move = move
alpha = best_value
else:
if value < best_value:
best_value = value
best_move = move
beta = best_value
# Prune the search tree
# We know this state will never be chosen, so we stop evaluating its children
if alpha <= beta: ## ADDED
break
return best_value, best_move
def maximizing(state):
# type: (State) -> bool
"""
Whether we're the maximizing player (1) or the minimizing player (2).
:param state:
:return:
"""
return state.whose_turn() == 1
def heuristic(state):
# type: (State) -> float
"""
Estimate the value of this state: -1.0 is a certain win for player 2, 1.0 is a certain win for player 1
:param state:
:return: A heuristic evaluation for the given state (between -1.0 and 1.0)
"""
return util.ratio_points(state, 1) * 2.0 - 1.0, None