Skip to content

Commit

Permalink
Still skeptical whether advantage buffer
Browse files Browse the repository at this point in the history
  • Loading branch information
josiahls committed Feb 4, 2024
1 parent 41a59f8 commit dd04f5f
Show file tree
Hide file tree
Showing 4 changed files with 1,848 additions and 215 deletions.
8 changes: 4 additions & 4 deletions fastrl/envs/continuous_debug_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@ def __init__(self, goal_position=None, proximity_threshold=0.5):
self.state = None

def step(self, action):
self.state += action[0] # Assuming action is a NumPy array, use the first element
self.state[0] += action[0] # Assuming action is a NumPy array, use the first element

distance_to_goal = np.abs(self.state - self.goal_position)
distance_to_goal = np.abs(self.state[0] - self.goal_position)
reward = -distance_to_goal.item() # Ensure reward is a float

done = distance_to_goal <= self.proximity_threshold
Expand All @@ -48,10 +48,10 @@ def step(self, action):

def reset(self, seed=None, options=None):
super().reset(seed=seed) # Call the superclass reset, which handles the seeding

self.state = np.array([0.0], dtype=np.float32)
if self.goal_position is None:
self.goal_position = np.random.uniform(-10, 10)
# The state is {current position, goal position}
self.state = np.array([0.0, self.goal_position], dtype=np.float32)

return self.state, {} # Return observation and an empty info dictionary

Expand Down
Loading

0 comments on commit dd04f5f

Please sign in to comment.