Skip to content

Commit

Permalink
Replace gym with gymnasium. (#2034)
Browse files Browse the repository at this point in the history
  • Loading branch information
Adaickalavan authored May 23, 2023
1 parent fdf0b7c commit c46f762
Show file tree
Hide file tree
Showing 39 changed files with 485 additions and 388 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Copy and pasting the git commit messages is __NOT__ enough.
- Added `Condition`, `ConditionRequires`, `ConditionState` and various condition implementations to enable logical operations in scenarios.
- Traffic light signals are now visualized in Envision.
- Interest vehicles now show up in Envision.
- Seed of `hiway-v1` env can be retrieved through a new property `seed`.
### Changed
- Changed waypoints in sumo maps to use more incoming lanes into junctions.
- Increased the cutoff radius for filtering out waypoints that are too far away in junctions in sumo maps.
Expand All @@ -27,6 +28,8 @@ Copy and pasting the git commit messages is __NOT__ enough.
- `TrapEntryTactic.wait_to_hijack_limit_s` field now defaults to `0`.
- `EntryTactic` derived classes now contain `condition` to provide extra filtering of candidate actors.
- `EntryTactic` derived classes now contain `start_time`.
- `info` returned by `hiway-v1` in `reset()` and `step()` methods are unified.
- Changed instances of `hiway-v0` and `gym` to use `hiway-v1` and `gymnasium`, respectively.
### Deprecated
- `visdom` is set to be removed from the SMARTS object parameters.
- Deprecated `start_time` on missions.
Expand Down
2 changes: 1 addition & 1 deletion docs/minimal.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import gym
import gymnasium as gym
from smarts.core.agent import Agent
from smarts.core.agent_interface import AgentInterface, AgentType

Expand Down
34 changes: 15 additions & 19 deletions examples/control/chase_via_points.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
import sys
from pathlib import Path

import gym
import gymnasium as gym

sys.path.insert(0, str(Path(__file__).parents[2].absolute()))
from examples.tools.argument_parser import default_argument_parser
from smarts.core.agent import Agent
from smarts.core.agent_interface import AgentInterface, AgentType
from smarts.core.observations import Observation
from smarts.core.utils.episodes import episodes
from smarts.env.utils.observation_conversion import ObservationOptions
from smarts.sstudio.scenario_construction import build_scenarios
from smarts.zoo.agent_spec import AgentSpec

N_AGENTS = 3
AGENT_IDS = ["Agent_%i" % i for i in range(N_AGENTS)]
Expand All @@ -35,41 +35,37 @@ def act(self, obs: Observation):


def main(scenarios, headless, num_episodes, max_episode_steps=None):
agent_specs = {
agent_id: AgentSpec(
interface=AgentInterface.from_type(
AgentType.LanerWithSpeed,
max_episode_steps=max_episode_steps,
),
agent_builder=ChaseViaPointsAgent,
agent_interfaces = {
agent_id: AgentInterface.from_type(
AgentType.LanerWithSpeed,
max_episode_steps=max_episode_steps,
)
for agent_id in AGENT_IDS
}

env = gym.make(
"smarts.env:hiway-v0",
"smarts.env:hiway-v1",
scenarios=scenarios,
agent_specs=agent_specs,
agent_interfaces=agent_interfaces,
headless=headless,
sumo_headless=True,
observation_options=ObservationOptions.unformatted,
)

for episode in episodes(n=num_episodes):
agents = {
agent_id: agent_spec.build_agent()
for agent_id, agent_spec in agent_specs.items()
agent_id: ChaseViaPointsAgent() for agent_id in agent_interfaces.keys()
}
observations = env.reset()
observations, _ = env.reset()
episode.record_scenario(env.scenario_log)

dones = {"__all__": False}
while not dones["__all__"]:
terminateds = {"__all__": False}
while not terminateds["__all__"]:
actions = {
agent_id: agents[agent_id].act(agent_obs)
for agent_id, agent_obs in observations.items()
}
observations, rewards, dones, infos = env.step(actions)
episode.record_step(observations, rewards, dones, infos)
observations, rewards, terminateds, truncateds, infos = env.step(actions)
episode.record_step(observations, rewards, terminateds, truncateds, infos)

env.close()

Expand Down
18 changes: 11 additions & 7 deletions examples/control/ego_open_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
import sys
from pathlib import Path

import gym
import gymnasium as gym

from smarts.core.utils.episodes import episodes
from smarts.env.utils.observation_conversion import ObservationOptions
from smarts.sstudio.scenario_construction import build_scenarios

sys.path.insert(0, str(Path(__file__).parents[2].absolute()))
Expand All @@ -29,25 +30,28 @@
def main(scenarios, headless, num_episodes):
open_agent_spec = open_agent.entrypoint(debug=False, aggressiveness=3)
env = gym.make(
"smarts.env:hiway-v0",
"smarts.env:hiway-v1",
scenarios=scenarios,
agent_interfaces={AGENT_ID: open_agent_spec.interface},
headless=headless,
sumo_headless=True,
observation_options=ObservationOptions.unformatted,
)

for episode in episodes(n=num_episodes):
agent = open_agent_spec.build_agent()

observations = env.reset()
observations, _ = env.reset()
episode.record_scenario(env.scenario_log)

dones = {"__all__": False}
while not dones["__all__"]:
terminateds = {"__all__": False}
while not terminateds["__all__"]:
agent_obs = observations[AGENT_ID]
agent_action = agent.act(agent_obs)
observations, rewards, dones, infos = env.step({AGENT_ID: agent_action})
episode.record_step(observations, rewards, dones, infos)
observations, rewards, terminateds, truncateds, infos = env.step(
{AGENT_ID: agent_action}
)
episode.record_step(observations, rewards, terminateds, truncateds, infos)

del agent

Expand Down
4 changes: 2 additions & 2 deletions examples/control/hiway_env_v1_lane_follower.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@


class LaneFollowerAgent(Agent):
def act(self, obs: Dict[Any, Union[Any, Dict]]):
def act(self, obs):
return (obs["waypoint_paths"]["speed_limit"][0][0], 0)


Expand All @@ -38,7 +38,7 @@ def main(scenarios, headless, num_episodes, max_episode_steps=None):
observation, reward, terminated, truncated, info = env.step(
{"SingleAgent": agent_action}
)
episode.record_step(observation, reward, terminated, info)
episode.record_step(observation, reward, terminated, truncated, info)

env.close()

Expand Down
17 changes: 9 additions & 8 deletions examples/control/laner.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
import sys
from pathlib import Path

import gym
import gymnasium as gym

sys.path.insert(0, str(Path(__file__).parents[2].absolute()))
from examples.tools.argument_parser import default_argument_parser
from smarts.core.agent import Agent
from smarts.core.agent_interface import AgentInterface, AgentType
from smarts.core.utils.episodes import episodes
from smarts.env.utils.action_conversion import ActionOptions
from smarts.sstudio.scenario_construction import build_scenarios
from smarts.zoo.agent_spec import AgentSpec

Expand All @@ -34,31 +35,31 @@ def main(scenarios, headless, num_episodes, max_episode_steps=None):
}

env = gym.make(
"smarts.env:hiway-v0",
"smarts.env:hiway-v1",
scenarios=scenarios,
agent_interfaces={
a_id: a_intrf.interface for a_id, a_intrf in agent_specs.items()
},
headless=headless,
sumo_headless=True,
action_options=ActionOptions.unformatted,
)

for episode in episodes(n=num_episodes):
agents = {
agent_id: agent_spec.build_agent()
for agent_id, agent_spec in agent_specs.items()
}
observations = env.reset()
observations, _ = env.reset()
episode.record_scenario(env.scenario_log)

dones = {"__all__": False}
while not dones["__all__"]:
terminateds = {"__all__": False}
while not terminateds["__all__"]:
actions = {
agent_id: agents[agent_id].act(agent_obs)
for agent_id, agent_obs in observations.items()
}
observations, rewards, dones, infos = env.step(actions)
episode.record_step(observations, rewards, dones, infos)
observations, rewards, terminateds, truncateds, infos = env.step(actions)
episode.record_step(observations, rewards, terminateds, truncateds, infos)

env.close()

Expand Down
84 changes: 47 additions & 37 deletions examples/control/parallel_environment.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import gym
import gymnasium as gym

gym.logger.set_level(40)

Expand All @@ -13,10 +13,9 @@
from smarts.core.agent_interface import AgentInterface
from smarts.core.controllers import ActionSpaceType
from smarts.core.observations import Observation
from smarts.env.hiway_env import HiWayEnv
from smarts.env.wrappers.parallel_env import ParallelEnv
from smarts.env.gymnasium.wrappers.parallel_env import ParallelEnv
from smarts.env.utils.observation_conversion import ObservationOptions
from smarts.sstudio.scenario_construction import build_scenarios
from smarts.zoo.agent_spec import AgentSpec


class LaneAgent(Agent):
Expand All @@ -43,38 +42,34 @@ def main(
agent_ids = [f"Agent_{i}" for i in range(num_agents)]

# Define agent specification
agent_specs = {
agent_id: AgentSpec(
interface=AgentInterface(
top_down_rgb=True,
waypoint_paths=True,
action=ActionSpaceType.LaneWithContinuousSpeed,
max_episode_steps=max_episode_steps,
),
agent_builder=LaneAgent,
agent_interfaces = {
agent_id: AgentInterface(
top_down_rgb=True,
waypoint_paths=True,
action=ActionSpaceType.LaneWithContinuousSpeed,
max_episode_steps=max_episode_steps,
)
for agent_id in agent_ids
}

# Unique `sim_name` is required by each HiWayEnv in order to be displayed
# Unique `sim_name` is required by each HiWayEnvV1 in order to be displayed
# in Envision.
env_constructor = lambda sim_name: HiWayEnv(
env_constructor = lambda sim_name, seed: gym.make(
"smarts.env:hiway-v1",
scenarios=scenarios,
agent_specs=agent_specs,
agent_interfaces=agent_interfaces,
sim_name=sim_name,
headless=headless,
observation_options=ObservationOptions.unformatted,
seed=seed,
)

# A list of env constructors of type `Callable[[], gym.Env]`
# A list of env constructors of type `Callable[[int], gym.Env]`
env_constructors = [
partial(env_constructor, sim_name=f"{sim_name}_{ind}") for ind in range(num_env)
]

# Build multiple agents
agents = {
agent_id: agent_spec.build_agent()
for agent_id, agent_spec in agent_specs.items()
}
agents = {agent_id: LaneAgent() for agent_id in agent_interfaces.keys()}

# Create parallel environments
env = ParallelEnv(
Expand Down Expand Up @@ -103,27 +98,37 @@ def parallel_env_async(
num_steps (int): Number of steps to step the environment.
"""

batched_dones = [{"__all__": False} for _ in range(num_env)]
batched_observations = env.reset()
batched_terminateds = [{"__all__": False} for _ in range(num_env)]
batched_truncateds = [{"__all__": False} for _ in range(num_env)]
batched_observations, _ = env.reset()

for _ in range(num_steps):
# Compute actions for all active(i.e., not done) agents
batched_actions = []
for observations, dones in zip(batched_observations, batched_dones):
for observations, terminateds, truncateds in zip(
batched_observations, batched_terminateds, batched_truncateds
):
actions = {
agent_id: agents[agent_id].act(agent_obs)
for agent_id, agent_obs in observations.items()
if not dones.get(agent_id, False)
or dones[
if (
not terminateds.get(agent_id, False)
and not truncateds.get(agent_id, False)
)
or terminateds[
"__all__"
] # `dones[__all__]==True` implies the env was auto-reset in previous iteration
] # `terminateds[__all__]==True` implies the env was auto-reset in previous iteration
}
batched_actions.append(actions)

# Step all environments in parallel
batched_observations, batched_rewards, batched_dones, batched_infos = env.step(
batched_actions
)
(
batched_observations,
batched_rewards,
batched_terminateds,
batched_truncateds,
batched_infos,
) = env.step(batched_actions)

env.close()

Expand All @@ -144,26 +149,31 @@ def parallel_env_sync(
"""

for _ in range(num_episodes):
batched_dones = [{"__all__": False} for _ in range(num_env)]
batched_observations = env.reset()
batched_terminateds = [{"__all__": False} for _ in range(num_env)]
batched_truncateds = [{"__all__": False} for _ in range(num_env)]
batched_observations, _ = env.reset()

# Iterate until all environments complete an episode each.
while not all(dones["__all__"] for dones in batched_dones):
while not all(terminateds["__all__"] for terminateds in batched_terminateds):
# Compute actions for all active(i.e., not done) agents
batched_actions = []
for observations, dones in zip(batched_observations, batched_dones):
for observations, terminateds, truncateds in zip(
batched_observations, batched_terminateds, batched_truncateds
):
actions = {
agent_id: agents[agent_id].act(agent_obs)
for agent_id, agent_obs in observations.items()
if not dones.get(agent_id, False)
if not terminateds.get(agent_id, False)
and not truncateds.get(agent_id, False)
}
batched_actions.append(actions)

# Step all environments in parallel
(
batched_observations,
batched_rewards,
batched_dones,
batched_terminateds,
batched_truncateds,
batched_infos,
) = env.step(batched_actions)

Expand Down
Loading

0 comments on commit c46f762

Please sign in to comment.