forked from upb-lea/reinforcement_learning_course_materials
-
Notifications
You must be signed in to change notification settings - Fork 0
/
rocket_env.py
278 lines (202 loc) · 8.17 KB
/
rocket_env.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
# SOURCE:
# https://github.com/osannolik/gym-goddard/blob/master/gym_goddard/envs/goddard_env.py
# Distributed under MIT Licence
import gymnasium as gym
import numpy as np
class Rocket(object):
'''
Expected parameters and methods of the rocket environment being simulated.
V0/H0/M0: Initial velocity/height/weight
M1: Final weight (set equal to dry mass if all fuel should be used)
THRUST_MAX: Maximum possible force of thrust [N]
GAMMA: Fuel consumption [kg/N/s]
DT: Assumed time [s] between calls to step()
'''
V0 = H0 = M0 = M1 = THRUST_MAX = GAMMA = DT = None
H_MAX_RENDER = None # Sets upper window bound for rendering
def drag(self, v, h):
raise NotImplementedError
def g(self, h):
raise NotImplementedError
class Default(Rocket):
'''
Models the surface-to-air missile (SA-2 Guideline) described in
http://dcsl.gatech.edu/papers/jgcd92.pdf
https://www.mcs.anl.gov/~more/cops/bcops/rocket.html
The equations of motion is made dimensionless by scaling and choosing
the model parameters in terms of initial height, mass and gravity.
'''
DT = 0.001
V0 = 0.0
H0 = M0 = G0 = 1.0
H_MAX_RENDER = 1.015
HC = 500.0
MC = 0.6
VC = 620.0
M1 = MC * M0
thrust_to_weight_ratio = 3.5
THRUST_MAX = thrust_to_weight_ratio * G0 * M0
DC = 0.5 * VC * M0 / G0
GAMMA = 1.0 / (0.5*np.sqrt(G0*H0))
def drag(self, v, h):
return self.DC * v * abs(v) * np.exp(-self.HC*(h-self.H0)/self.H0)
def g(self, h):
return self.G0 * (self.H0/h)**2
class SaturnV(Rocket):
'''
Throttled first stage burn of Saturn V rocket
Specifications taken from:
1. https://en.wikipedia.org/wiki/Saturn_V
2. https://web.archive.org/web/20170313142729/http://www.braeunig.us/apollo/saturnV.htm
'''
DT = 0.5
G = 9.81
V0 = 0.0
H0 = 0.0
H_MAX_RENDER = 170e3
# Vehicle mass + fuel: 2.97e6 kg
# Thrust of first stage: 35.1e6 N
M0 = 2.97e6
THRUST_MAX = 35.1e6
# First stage gross and dry weight: 2.29e6 kg, 130e3 kg
# => Saturn V gross weight immediately before first stage separation
M1 = M0 - (2.29e6-130e3) # = 810e3 kg
# Specific pulse: 263 s
GAMMA = 1.0/(G*263.0) # = 387e-6 kg/N/s
D = 0.35 * 0.27 * 113.0 / 2.0
def drag(self, v, h):
return self.D * v * abs(v)
def g(self, h):
return self.G
class GoddardEnv(gym.Env):
metadata = {
'render.modes': ['human', 'rgb_array'],
'video.frames_per_second': 30
}
TIMEOUT = 300
def __init__(self, rocket=Default(), render_mode = "human"):
super(GoddardEnv, self).__init__()
self._r = rocket
self.viewer = None
self.number_of_steps = 0
self.U_INDEX = 0
self.action_space = gym.spaces.box.Box(
low = np.array([0.0]),
high = np.array([1.0]),
shape = (1,),
dtype = np.float32
)
self.V_INDEX, self.H_INDEX, self.M_INDEX = 0, 1, 2
self.observation_space = gym.spaces.box.Box(
low = np.array([np.finfo(np.float32).min, 0.0, self._r.M1]),
high = np.array([np.finfo(np.float32).max, np.finfo(np.float32).max, self._r.M0]),
dtype = np.float32
)
self.render_mode = render_mode
self.reset()
def extras_labels(self):
return ['action', 'thrust', 'drag', 'gravity']
def step(self, action):
self.number_of_steps += 1
v, h, m = self._state
is_tank_empty = (m <= self._r.M1)
a = 0.0 if is_tank_empty else action[self.U_INDEX]
thrust = self._r.THRUST_MAX*a
self._thrust_last = thrust
drag = self._r.drag(v, h)
g = self._r.g(h)
# Forward Euler
self._state = (
0.0 if h==self._r.H0 and v != 0.0 else (v + self._r.DT * ((thrust-drag)/m - g)),
max(h + self._r.DT * v, self._r.H0),
max(m - self._r.DT * self._r.GAMMA * thrust, self._r.M1)
)
self._h_max = max(self._h_max, self._state[self.H_INDEX])
terminated = bool(is_tank_empty and self._state[self.V_INDEX] < 0 and self._h_max > self._r.H0)
truncated = bool(self.number_of_steps >= self.TIMEOUT)
if terminated:
reward = self._h_max - self._r.H0
elif truncated:
reward = -1
else:
reward = 0.0
info = dict(zip(self.extras_labels(), [action[self.U_INDEX], thrust, drag, g]))
if self.render_mode == "human":
self.render(mode=self.render_mode)
return self._observation(), reward, terminated, truncated, info
def maximum_altitude(self):
return self._h_max
def _observation(self, normalize=True):
# normalize
state = np.array(self._state)
if normalize:
return (state - np.array([self._r.V0, self._r.H0, self._r.M1])) / np.array([0.14, 0.015, self._r.M0 - self._r.M1])
else:
return state
def reset(self, seed=None):
self._state = (self._r.V0, self._r.H0, self._r.M0)
self._h_max = self._r.H0
self._thrust_last = None
self.number_of_steps = 0
drag = self._r.drag(self._r.V0, self._r.H0)
g = self._r.g(self._r.H0)
info = dict(zip(self.extras_labels(), [0.0, 0.0, drag, g]))
return self._observation(), info
def render(self, mode='human'):
_, h, m = self._observation(normalize=False)
if self.viewer is None:
from gym.envs.classic_control import rendering
y = self._r.H_MAX_RENDER
y0 = self._r.H0
GY = (y-y0)/20
Y = y-y0+GY
H = Y/10
W = H/10
self.flame_offset = W/2
self.viewer = rendering.Viewer(500, 500)
self.viewer.set_bounds(left=-Y/2, right=Y/2, bottom=y-Y, top=y)
ground = rendering.make_polygon([(-Y/2,y0-GY), (-Y/2,y0), (Y/2,y0), (Y/2,y0-GY)])
ground.set_color(.3, .6, .3)
pad = rendering.make_polygon([(-3*W,y0-GY/3), (-3*W,y0), (3*W,y0), (3*W,y0-GY/3)])
pad.set_color(.6, .6, .6)
rocket = rendering.make_polygon([(-W/2,0), (-W/2,H), (W/2,H), (W/2,0)], filled=True)
rocket.set_color(0, 0, 0)
self.r_trans = rendering.Transform()
rocket.add_attr(self.r_trans)
self.make_fuel_poly = lambda mass: [
(-W/2, 0),
(-W/2, H*((mass-self._r.M1)/(self._r.M0-self._r.M1))),
(W/2, H*((mass-self._r.M1)/(self._r.M0-self._r.M1))),
(W/2, 0)
]
self.fuel = rendering.make_polygon(self.make_fuel_poly(m), filled=True)
self.fuel.set_color(.8, .1, .14)
self.fuel.add_attr(self.r_trans)
flame = rendering.make_circle(radius=W, res=30)
flame.set_color(.96, 0.85, 0.35)
self.f_trans = rendering.Transform()
flame.add_attr(self.f_trans)
flame_outer = rendering.make_circle(radius=2*W, res=30)
flame_outer.set_color(.95, 0.5, 0.2)
self.fo_trans = rendering.Transform()
flame_outer.add_attr(self.fo_trans)
for g in [ground, pad, rocket, self.fuel, flame_outer, flame]:
self.viewer.add_geom(g)
self.r_trans.set_translation(newx=0, newy=h)
self.f_trans.set_translation(newx=0, newy=h)
self.fo_trans.set_translation(newx=0, newy=h-self.flame_offset)
self.fuel.v = self.make_fuel_poly(m)
s = 0 if self._thrust_last is None else self._thrust_last/self._r.THRUST_MAX
self.f_trans.set_scale(newx=s, newy=s)
self.fo_trans.set_scale(newx=s, newy=s)
return self.viewer.render(return_rgb_array=mode == 'rgb_array')
def close(self):
if self.viewer:
self.viewer.close()
self.viewer = None
class GoddardDefaultEnv(GoddardEnv):
def __init__(self):
super(GoddardDefaultEnv, self).__init__(rocket=Default())
class GoddardSaturnEnv(GoddardEnv):
def __init__(self):
super(GoddardSaturnEnv, self).__init__(rocket=SaturnV())