-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathexample.py
executable file
·90 lines (67 loc) · 2.44 KB
/
example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
#!/usr/bin/env python
# coding: utf-8
from __future__ import (absolute_import, division, print_function,
unicode_literals)
from builtins import input
import deeprl_hw1.lake_envs as lake_env
import gym
import time
def run_random_policy(env):
"""Run a random policy for the given environment.
Logs the total reward and the number of steps until the terminal
state was reached.
Parameters
----------
env: gym.envs.Environment
Instance of an OpenAI gym.
Returns
-------
(float, int)
First number is the total undiscounted reward received. The
second number is the total number of actions taken before the
episode finished.
"""
initial_state = env.reset()
env.render()
time.sleep(1) # just pauses so you can see the output
total_reward = 0
num_steps = 0
while True:
nextstate, reward, is_terminal, debug_info = env.step(
env.action_space.sample())
env.render()
total_reward += reward
num_steps += 1
if is_terminal:
break
time.sleep(1)
return total_reward, num_steps
def print_env_info(env):
print('Environment has %d states and %d actions.' % (env.nS, env.nA))
def print_model_info(env, state, action):
transition_table_row = env.P[state][action]
print(
('According to transition function, '
'taking action %s(%d) in state %d leads to'
' %d possible outcomes') % (lake_env.action_names[action],
action, state, len(transition_table_row)))
for prob, nextstate, reward, is_terminal in transition_table_row:
state_type = 'terminal' if is_terminal else 'non-terminal'
print(
'\tTransitioning to %s state %d with probability %f and reward %f'
% (state_type, nextstate, prob, reward))
def main():
# create the environment
env = gym.make('FrozenLake-v0')
# uncomment next line to try the deterministic version
# env = gym.make('Deterministic-4x4-FrozenLake-v0')
print_env_info(env)
print_model_info(env, 0, lake_env.DOWN)
print_model_info(env, 1, lake_env.DOWN)
print_model_info(env, 14, lake_env.RIGHT)
input('Hit enter to run a random policy...')
total_reward, num_steps = run_random_policy(env)
print('Agent received total reward of: %f' % total_reward)
print('Agent took %d steps' % num_steps)
if __name__ == '__main__':
main()