Skip to content

Commit

Permalink
Version 1.2.0 (#8)
Browse files Browse the repository at this point in the history
  • Loading branch information
sdsomma authored and janhartigan committed Sep 22, 2018
1 parent f1380b1 commit 0d80866
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 8 deletions.
2 changes: 1 addition & 1 deletion montecarlo/montecarlo.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def simulate(self, expansion_count = 1):
current_node = self.root_node

while current_node.expanded:
current_node = current_node.get_preferred_child()
current_node = current_node.get_preferred_child(self.root_node)

self.expand(current_node)

Expand Down
11 changes: 7 additions & 4 deletions montecarlo/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ def __init__(self, state):
self.parent = None
self.children = []
self.expanded = False
self.player_number = None

def update_win_value(self, value):
self.win_value += value
Expand All @@ -30,12 +31,12 @@ def add_children(self, children):
for child in children:
self.add_child(child)

def get_preferred_child(self):
def get_preferred_child(self, root_node):
best_children = []
best_score = float('-inf')

for child in self.children:
score = child.get_score()
score = child.get_score(root_node)

if score > best_score:
best_score = score
Expand All @@ -45,10 +46,12 @@ def get_preferred_child(self):

return random.choice(best_children)

def get_score(self):
def get_score(self, root_node):
discovery_constant = 0.35
discovery_operand = discovery_constant * (self.policy_value or 1) * sqrt(log(self.parent.visits) / (self.visits or 1))
win_operand = self.win_value / (self.visits or 1)

win_multiplier = 1 if self.parent.player_number == root_node.player_number else -1
win_operand = win_multiplier * self.win_value / (self.visits or 1)

self.score = win_operand + discovery_operand

Expand Down
13 changes: 11 additions & 2 deletions readme.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
A Python3 library that you can use to run a Monte Carlo tree search, either traditionally with drilling down to end game states or with expert policies as you might provide from a neural network.

- **Version:** 1.1.2
- **Version:** 1.2.0

[![Build Status](https://travis-ci.org/ImparaAI/monte-carlo-tree-search.png?branch=master)](https://travis-ci.org/ImparaAI/monte-carlo-tree-search)

Expand Down Expand Up @@ -90,4 +90,13 @@ After you've chosen a new root node, you can override it on the `montecarlo` ins

```python
montecarlo.root_node = montecarlo.make_choice()
```
```

## Turn based environments

If you are modeling a turn based environment (E.g. a two player board game), set the player_number on each node in order for the selection process to invert child win values.

```python
node = Node(state)
node.player_number = 1
```
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setuptools.setup(
name="imparaai-montecarlo",
version="1.1.2",
version="1.2.0",
license='MIT',
author="ImparaAI",
author_email="[email protected]",
Expand Down

0 comments on commit 0d80866

Please sign in to comment.