forked from equinor/team-dogleg
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathenvironment_config.py
76 lines (61 loc) · 2.61 KB
/
environment_config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
"""
This config file completely describes the "physical" aspects of the environment aswell as its rewards system
"""
import numpy as np
# Limits on the angles. They should match the limits of angles describing a sphere in spherical coordinates
MIN_INCL_ANGLE = 0
MAX_INCL_ANGLE = np.pi
MAX_AZIMUTH_ANGLE = 2*np.pi
# Max values for angular velocity and acceleration for both angles
MAX_ANGVEL = 0.1
MAX_ANGACC = 0.02
# The allowed increment. We either add or remove this value to both angles angular acceleration
ANGACC_INCREMENT = 0.02
# Step size. For each step the bit position gets updated with DRILL_SPEED multiplied with cos/sin of one of the angles
DRILL_SPEED = STEP_SIZE = 5
MC_DRILL_SPEED = MC_STEP_SIZE = 10
# Envrionment dimensions
SCREEN_X = 2000
SCREEN_Y = 2000
SCREEN_Z = 2000
# Step budget agent has available
NUM_MAX_STEPS = ((SCREEN_X+SCREEN_Y+SCREEN_Z)/DRILL_SPEED)*1.5
# Target specs specifying where a target can exist and how big it (the radius) can be
TARGET_BOUND_X = [0.25*SCREEN_X,0.75*SCREEN_X]
TARGET_BOUND_Y = [0.25*SCREEN_Y,0.75*SCREEN_Y]
TARGET_BOUND_Z = [0.40*SCREEN_Z,0.85*SCREEN_Z]
TARGET_RADII_BOUND = [40,50]
# Numbers of targets in the environment and number of targets stored in the observation space at all times
NUM_TARGETS = 8
TARGET_WINDOW_SIZE = 1
# Hazard specs. Can exist in entire screen
HAZARD_BOUND_X = [0,SCREEN_X]
HAZARD_BOUND_Y = [0,SCREEN_Y]
HAZARD_BOUND_Z = [0,SCREEN_Z]
HAZARD_RADII_BOUND = [80,120]
# Numbers of hazards in the environment and number of hazards stored in the observation space at all times
NUM_HAZARDS = 8
HAZARD_WINDOW_SIZE = 1
# Common specs for both targets and hazards
VER_DIST_BOUND = [-SCREEN_Z, SCREEN_Z] # bounds for the vertical distance
HOR_DIST_BOUND = [0,np.sqrt(SCREEN_X**2+SCREEN_Y**2)] # bounds for the horizontal distance
REL_AZIMUTH_BOUND = [-np.pi,np.pi]
# Observation space specs (vectorized bounds)
SPACE_BOUNDS = [0,SCREEN_X,0,SCREEN_Y,0,SCREEN_Z]
BIT_BOUNDS = [0,2*np.pi,0,np.pi,-MAX_ANGVEL,MAX_ANGVEL,-MAX_ANGVEL,MAX_ANGVEL,-MAX_ANGACC,MAX_ANGACC,-MAX_ANGACC,MAX_ANGACC]
HAZARD_BOUNDS = [VER_DIST_BOUND,HOR_DIST_BOUND,REL_AZIMUTH_BOUND,HAZARD_RADII_BOUND]
TARGET_BOUNDS = [VER_DIST_BOUND,HOR_DIST_BOUND,REL_AZIMUTH_BOUND,TARGET_RADII_BOUND]
# Rewards
STEP_PENALTY = -0.0
ANGULAR_VELOCITY_PENALTY = 0.0
ANGULAR_ACCELERATION_PENALTY = 0.0
OUTSIDE_SCREEN_PENALTY = 0.0
TARGET_REWARD = 100.0
HAZARD_PENALTY = -200.0
ANGLE_REWARD_FACTOR = 0.5
INCLINATION_REWARD_FACTOR = 0.5
FINISHED_EARLY_FACTOR = 1 # Point per unused step
# Monte Carlo simulation specs
NUM_MONTE_CARLO_ENVS = int(2e4)
MC_PATH_LENGTH_BOUND = [100,340]
ENVIRONMENT_FILENAME = "environments.txt"