-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnavigation.py
85 lines (64 loc) · 2.66 KB
/
navigation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#!/usr/bin/env python
# Project 1: Navigation
# Udacity Nanodegree: Deep Reinforcement Learning
# https://www.udacity.com/course/deep-reinforcement-learning-nanodegree--nd893
import numpy as np
import matplotlib.pyplot as plt
from unityagents import UnityEnvironment
from qlearning.random_agent import RandomAgent
from qlearning.dqn_agent import DQNAgent
from qlearning.simulation import simulate, moving_average # , train
def main():
"""
Collect yellow bananas, avoid blue ones
"""
rnd_seed = 42
np.random.seed(rnd_seed)
# file_name pointing to Unity environment
env = UnityEnvironment(file_name="envs/Banana_Linux/Banana.x86_64", seed=rnd_seed) # with visualization
# env = UnityEnvironment(file_name="envs/Banana_Linux_NoVis/Banana.x86_64", seed=rnd_seed) # no visualization (about 5% faster)
# Environments contain brains which are responsible
# for deciding the actions of their associated agents.
brain_name = env.brain_names[0] # get the default brain name
env_info = env.reset(train_mode=True)[brain_name]
# 2. Examine the State and Action Spaces
# The simulation contains a single agent that navigates a large environment.
# At each time step, it has four actions at its disposal:
# - `0` - walk forward
# - `1` - walk backward
# - `2` - turn left
# - `3` - turn right
# The state space has `37` dimensions and contains the agent's velocity,
# along with ray-based perception of objects around agent's forward direction.
# A reward of `+1` is provided for collecting a yellow banana,
# and a reward of `-1` is provided for collecting a blue banana.
# Print basic environment info
action_size = env.brains[brain_name].vector_action_space_size
state = env_info.vector_observations[0]
state_size = len(state)
print('Number of agents: ', len(env_info.agents))
print('Number of actions:', action_size)
print('States have length:', state_size)
# define agents
layer_sizes = [2*state_size, state_size, 16, 8]
agents = {
'random': RandomAgent(action_size),
'dqn': DQNAgent(state_size, action_size, layer_sizes)
}
# simulate
scores = simulate(env, agents['dqn'], brain_name, learn=True, n_episodes=1000)
window_size = 32
plt.plot(
range(window_size, len(scores)+1),
moving_average(scores, window_size)
)
plt.title("Reward per episode (running mean)")
plt.xlabel("number of episodes")
plt.ylabel("total reward")
plt.draw()
for key in agents:
simulate(env, agents[key], brain_name, learn=False, n_episodes=5)
env.close()
plt.show()
if __name__ == "__main__":
main()