Skip to content

Commit

Permalink
update boltzmann_action definition
Browse files Browse the repository at this point in the history
To solve the 
ValueError: sum(pvals[:-1]) > 1.0 
as described in endgameinc#7
  • Loading branch information
allewwaly authored Jun 26, 2018
1 parent 673c93c commit 18c18b7
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions test_agent_kerasrl.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def evaluate( action_function ):

# option 1: Boltzmann sampling from Q-function network output
softmax = lambda x : np.exp( x ) / np.sum( np.exp( x ))
boltzmann_action = lambda x : np.argmax( np.random.multinomial( 1, softmax(x).flatten()))
boltzmann_action = lambda x : np.random.choice( range(len(x)), p=softmax(x).flatten())
# option 2: maximize the Q value, ignoring stochastic action space
best_action = lambda x : np.argmax( x )

Expand Down Expand Up @@ -70,4 +70,4 @@ def f(bytez):
print("Success rate of random chance: {}\n".format( len(random_success) / total ))
print("Success rate (dqn): {}\n".format( len(dqn_success) / total ) )
print("Success rate (dqn): {}\n".format( len(dqn_score_success) / total ) )


0 comments on commit 18c18b7

Please sign in to comment.