-
Notifications
You must be signed in to change notification settings - Fork 0
/
basic_adaptive.py
193 lines (136 loc) · 6.17 KB
/
basic_adaptive.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
#!/usr/bin/env python
"""
A basic adaptive bot. This is part of the third worksheet.
"""
from api import State, util
import random, os
from itertools import chain
from sklearn.externals import joblib
# Path of the model we will use. If you make a model
# with a different name, point this line to its path.
DEFAULT_MODEL = os.path.dirname(os.path.realpath(__file__)) + '/ml.pkl'
class Bot:
__randomize = True
__model = None
def __init__(self, randomize=True, model_file=DEFAULT_MODEL):
print(model_file)
self.__randomize = randomize
# Load the model
self.__model = joblib.load(model_file)
def get_move(self, state):
val, move = self.value(state)
return move
def value(self, state):
"""
Return the value of this state and the associated move
:param state:
:return: val, move: the value of the state, and the best move.
"""
best_value = float('-inf') if maximizing(state) else float('inf')
best_move = None
moves = state.moves()
if self.__randomize:
random.shuffle(moves)
for move in moves:
next_state = state.next(move)
# IMPLEMENT: Add a function call so that 'value' will
# contain the predicted value of 'next_state'
# NOTE: This is different from the line in the minimax/alphabeta bot
value = self.heuristic(next_state)
if maximizing(state):
if value > best_value:
best_value = value
best_move = move
else:
if value < best_value:
best_value = value
best_move = move
return best_value, best_move
def heuristic(self, state):
# Convert the state to a feature vector
feature_vector = [features(state)]
# These are the classes: ('won', 'lost')
classes = list(self.__model.classes_)
# Ask the model for a prediction
# This returns a probability for each class
prob = self.__model.predict_proba(feature_vector)[0]
# Weigh the win/loss outcomes (-1 and 1) by their probabilities
res = -1.0 * prob[classes.index('lost')] + 1.0 * prob[classes.index('won')]
return res
def maximizing(state):
"""
Whether we're the maximizing player (1) or the minimizing player (2).
:param state:
:return:
"""
return state.whose_turn() == 1
def features(state):
# type: (State) -> tuple[float, ...]
"""
Extract features from this state. Remember that every feature vector returned should have the same length.
:param state: A state to be converted to a feature vector
:return: A tuple of floats: a feature vector representing this state.
"""
feature_set = []
# Add player 1's points to feature set
p1_points = state.get_points(1)
# Add player 2's points to feature set
p2_points = state.get_points(2)
# Add player 1's pending points to feature set
p1_pending_points = state.get_pending_points(1)
# Add plauer 2's pending points to feature set
p2_pending_points = state.get_pending_points(2)
# Get trump suit
trump_suit = state.get_trump_suit()
# Add phase to feature set
phase = state.get_phase()
# Add stock size to feature set
stock_size = state.get_stock_size()
# Add leader to feature set
leader = state.leader()
# Add whose turn it is to feature set
whose_turn = state.whose_turn()
# Add opponent's played card to feature set
opponents_played_card = state.get_opponents_played_card()
################## You do not need to do anything below this line ########################
perspective = state.get_perspective()
# Convert the card state array containing strings, to an array of integers.
# The integers here just represent card state IDs. In a way they can be
# thought of as arbitrary, as long as they are different from each other.
perspective = [card if card != 'U' else [1, 0, 0, 0, 0, 0] for card in perspective]
perspective = [card if card != 'S' else [0, 1, 0, 0, 0, 0] for card in perspective]
perspective = [card if card != 'P1H' else [0, 0, 1, 0, 0, 0] for card in perspective]
perspective = [card if card != 'P2H' else [0, 0, 0, 1, 0, 0] for card in perspective]
perspective = [card if card != 'P1W' else [0, 0, 0, 0, 1, 0] for card in perspective]
perspective = [card if card != 'P2W' else [0, 0, 0, 0, 0, 1] for card in perspective]
# Append one-hot encoded perspective to feature_set
feature_set += list(chain(*perspective))
# Append normalized points to feature_set
total_points = p1_points + p2_points
feature_set.append(p1_points/total_points if total_points > 0 else 0.)
feature_set.append(p2_points/total_points if total_points > 0 else 0.)
# Append normalized pending points to feature_set
total_pending_points = p1_pending_points + p2_pending_points
feature_set.append(p1_pending_points/total_pending_points if total_pending_points > 0 else 0.)
feature_set.append(p2_pending_points/total_pending_points if total_pending_points > 0 else 0.)
# Convert trump suit to id and add to feature set
# You don't need to add anything to this part
suits = ["C", "D", "H", "S"]
trump_suit_onehot = [0, 0, 0, 0]
trump_suit_onehot[suits.index(trump_suit)] = 1
feature_set += trump_suit_onehot
# Append one-hot encoded phase to feature set
feature_set += [1, 0] if phase == 1 else [0, 1]
# Append normalized stock size to feature set
feature_set.append(stock_size/10)
# Append one-hot encoded leader to feature set
feature_set += [1, 0] if leader == 1 else [0, 1]
# Append one-hot encoded whose_turn to feature set
feature_set += [1, 0] if whose_turn == 1 else [0, 1]
# Append one-hot encoded opponent's card to feature set
opponents_played_card_onehot = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
opponents_played_card_onehot[opponents_played_card if opponents_played_card is not None else 20] = 1
feature_set += opponents_played_card_onehot
# Return feature set
#Print(feature_set)
return feature_set