diff --git a/CHANGELOG.md b/CHANGELOG.md index e6a93e366e..bd0dded488 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ Copy and pasting the git commit messages is __NOT__ enough. - Added `Condition`, `ConditionRequires`, `ConditionState` and various condition implementations to enable logical operations in scenarios. - Traffic light signals are now visualized in Envision. - Interest vehicles now show up in Envision. +- Seed of `hiway-v1` env can be retrieved through a new property `seed`. ### Changed - Changed waypoints in sumo maps to use more incoming lanes into junctions. - Increased the cutoff radius for filtering out waypoints that are too far away in junctions in sumo maps. @@ -27,6 +28,8 @@ Copy and pasting the git commit messages is __NOT__ enough. - `TrapEntryTactic.wait_to_hijack_limit_s` field now defaults to `0`. - `EntryTactic` derived classes now contain `condition` to provide extra filtering of candidate actors. - `EntryTactic` derived classes now contain `start_time`. +- `info` returned by `hiway-v1` in `reset()` and `step()` methods are unified. +- Changed instances of `hiway-v0` and `gym` to use `hiway-v1` and `gymnasium`, respectively. ### Deprecated - `visdom` is set to be removed from the SMARTS object parameters. - Deprecated `start_time` on missions. diff --git a/docs/minimal.py b/docs/minimal.py index d40198b9d3..597c758847 100644 --- a/docs/minimal.py +++ b/docs/minimal.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym from smarts.core.agent import Agent from smarts.core.agent_interface import AgentInterface, AgentType diff --git a/examples/control/chase_via_points.py b/examples/control/chase_via_points.py index 13df01d537..fcb4b40a11 100644 --- a/examples/control/chase_via_points.py +++ b/examples/control/chase_via_points.py @@ -1,7 +1,7 @@ import sys from pathlib import Path -import gym +import gymnasium as gym sys.path.insert(0, str(Path(__file__).parents[2].absolute())) from examples.tools.argument_parser import default_argument_parser @@ -9,8 +9,8 @@ from smarts.core.agent_interface import AgentInterface, AgentType from smarts.core.observations import Observation from smarts.core.utils.episodes import episodes +from smarts.env.utils.observation_conversion import ObservationOptions from smarts.sstudio.scenario_construction import build_scenarios -from smarts.zoo.agent_spec import AgentSpec N_AGENTS = 3 AGENT_IDS = ["Agent_%i" % i for i in range(N_AGENTS)] @@ -35,41 +35,37 @@ def act(self, obs: Observation): def main(scenarios, headless, num_episodes, max_episode_steps=None): - agent_specs = { - agent_id: AgentSpec( - interface=AgentInterface.from_type( - AgentType.LanerWithSpeed, - max_episode_steps=max_episode_steps, - ), - agent_builder=ChaseViaPointsAgent, + agent_interfaces = { + agent_id: AgentInterface.from_type( + AgentType.LanerWithSpeed, + max_episode_steps=max_episode_steps, ) for agent_id in AGENT_IDS } env = gym.make( - "smarts.env:hiway-v0", + "smarts.env:hiway-v1", scenarios=scenarios, - agent_specs=agent_specs, + agent_interfaces=agent_interfaces, headless=headless, - sumo_headless=True, + observation_options=ObservationOptions.unformatted, ) for episode in episodes(n=num_episodes): agents = { - agent_id: agent_spec.build_agent() - for agent_id, agent_spec in agent_specs.items() + agent_id: ChaseViaPointsAgent() for agent_id in agent_interfaces.keys() } - observations = env.reset() + observations, _ = env.reset() episode.record_scenario(env.scenario_log) - dones = {"__all__": False} - while not dones["__all__"]: + terminateds = {"__all__": False} + while not terminateds["__all__"]: actions = { agent_id: agents[agent_id].act(agent_obs) for agent_id, agent_obs in observations.items() } - observations, rewards, dones, infos = env.step(actions) - episode.record_step(observations, rewards, dones, infos) + observations, rewards, terminateds, truncateds, infos = env.step(actions) + episode.record_step(observations, rewards, terminateds, truncateds, infos) env.close() diff --git a/examples/control/ego_open_agent.py b/examples/control/ego_open_agent.py index ddba19c533..4d612aa0e8 100644 --- a/examples/control/ego_open_agent.py +++ b/examples/control/ego_open_agent.py @@ -7,9 +7,10 @@ import sys from pathlib import Path -import gym +import gymnasium as gym from smarts.core.utils.episodes import episodes +from smarts.env.utils.observation_conversion import ObservationOptions from smarts.sstudio.scenario_construction import build_scenarios sys.path.insert(0, str(Path(__file__).parents[2].absolute())) @@ -29,25 +30,28 @@ def main(scenarios, headless, num_episodes): open_agent_spec = open_agent.entrypoint(debug=False, aggressiveness=3) env = gym.make( - "smarts.env:hiway-v0", + "smarts.env:hiway-v1", scenarios=scenarios, agent_interfaces={AGENT_ID: open_agent_spec.interface}, headless=headless, sumo_headless=True, + observation_options=ObservationOptions.unformatted, ) for episode in episodes(n=num_episodes): agent = open_agent_spec.build_agent() - observations = env.reset() + observations, _ = env.reset() episode.record_scenario(env.scenario_log) - dones = {"__all__": False} - while not dones["__all__"]: + terminateds = {"__all__": False} + while not terminateds["__all__"]: agent_obs = observations[AGENT_ID] agent_action = agent.act(agent_obs) - observations, rewards, dones, infos = env.step({AGENT_ID: agent_action}) - episode.record_step(observations, rewards, dones, infos) + observations, rewards, terminateds, truncateds, infos = env.step( + {AGENT_ID: agent_action} + ) + episode.record_step(observations, rewards, terminateds, truncateds, infos) del agent diff --git a/examples/control/hiway_env_v1_lane_follower.py b/examples/control/hiway_env_v1_lane_follower.py index a221b7981c..f1d6ed662d 100644 --- a/examples/control/hiway_env_v1_lane_follower.py +++ b/examples/control/hiway_env_v1_lane_follower.py @@ -12,7 +12,7 @@ class LaneFollowerAgent(Agent): - def act(self, obs: Dict[Any, Union[Any, Dict]]): + def act(self, obs): return (obs["waypoint_paths"]["speed_limit"][0][0], 0) @@ -38,7 +38,7 @@ def main(scenarios, headless, num_episodes, max_episode_steps=None): observation, reward, terminated, truncated, info = env.step( {"SingleAgent": agent_action} ) - episode.record_step(observation, reward, terminated, info) + episode.record_step(observation, reward, terminated, truncated, info) env.close() diff --git a/examples/control/laner.py b/examples/control/laner.py index 1cee419dd3..c1584dfbc2 100644 --- a/examples/control/laner.py +++ b/examples/control/laner.py @@ -2,13 +2,14 @@ import sys from pathlib import Path -import gym +import gymnasium as gym sys.path.insert(0, str(Path(__file__).parents[2].absolute())) from examples.tools.argument_parser import default_argument_parser from smarts.core.agent import Agent from smarts.core.agent_interface import AgentInterface, AgentType from smarts.core.utils.episodes import episodes +from smarts.env.utils.action_conversion import ActionOptions from smarts.sstudio.scenario_construction import build_scenarios from smarts.zoo.agent_spec import AgentSpec @@ -34,13 +35,13 @@ def main(scenarios, headless, num_episodes, max_episode_steps=None): } env = gym.make( - "smarts.env:hiway-v0", + "smarts.env:hiway-v1", scenarios=scenarios, agent_interfaces={ a_id: a_intrf.interface for a_id, a_intrf in agent_specs.items() }, headless=headless, - sumo_headless=True, + action_options=ActionOptions.unformatted, ) for episode in episodes(n=num_episodes): @@ -48,17 +49,17 @@ def main(scenarios, headless, num_episodes, max_episode_steps=None): agent_id: agent_spec.build_agent() for agent_id, agent_spec in agent_specs.items() } - observations = env.reset() + observations, _ = env.reset() episode.record_scenario(env.scenario_log) - dones = {"__all__": False} - while not dones["__all__"]: + terminateds = {"__all__": False} + while not terminateds["__all__"]: actions = { agent_id: agents[agent_id].act(agent_obs) for agent_id, agent_obs in observations.items() } - observations, rewards, dones, infos = env.step(actions) - episode.record_step(observations, rewards, dones, infos) + observations, rewards, terminateds, truncateds, infos = env.step(actions) + episode.record_step(observations, rewards, terminateds, truncateds, infos) env.close() diff --git a/examples/control/parallel_environment.py b/examples/control/parallel_environment.py index 5c46fe90e8..8b45fbccd7 100644 --- a/examples/control/parallel_environment.py +++ b/examples/control/parallel_environment.py @@ -1,4 +1,4 @@ -import gym +import gymnasium as gym gym.logger.set_level(40) @@ -13,10 +13,9 @@ from smarts.core.agent_interface import AgentInterface from smarts.core.controllers import ActionSpaceType from smarts.core.observations import Observation -from smarts.env.hiway_env import HiWayEnv -from smarts.env.wrappers.parallel_env import ParallelEnv +from smarts.env.gymnasium.wrappers.parallel_env import ParallelEnv +from smarts.env.utils.observation_conversion import ObservationOptions from smarts.sstudio.scenario_construction import build_scenarios -from smarts.zoo.agent_spec import AgentSpec class LaneAgent(Agent): @@ -43,38 +42,34 @@ def main( agent_ids = [f"Agent_{i}" for i in range(num_agents)] # Define agent specification - agent_specs = { - agent_id: AgentSpec( - interface=AgentInterface( - top_down_rgb=True, - waypoint_paths=True, - action=ActionSpaceType.LaneWithContinuousSpeed, - max_episode_steps=max_episode_steps, - ), - agent_builder=LaneAgent, + agent_interfaces = { + agent_id: AgentInterface( + top_down_rgb=True, + waypoint_paths=True, + action=ActionSpaceType.LaneWithContinuousSpeed, + max_episode_steps=max_episode_steps, ) for agent_id in agent_ids } - # Unique `sim_name` is required by each HiWayEnv in order to be displayed + # Unique `sim_name` is required by each HiWayEnvV1 in order to be displayed # in Envision. - env_constructor = lambda sim_name: HiWayEnv( + env_constructor = lambda sim_name, seed: gym.make( + "smarts.env:hiway-v1", scenarios=scenarios, - agent_specs=agent_specs, + agent_interfaces=agent_interfaces, sim_name=sim_name, headless=headless, + observation_options=ObservationOptions.unformatted, + seed=seed, ) - - # A list of env constructors of type `Callable[[], gym.Env]` + # A list of env constructors of type `Callable[[int], gym.Env]` env_constructors = [ partial(env_constructor, sim_name=f"{sim_name}_{ind}") for ind in range(num_env) ] # Build multiple agents - agents = { - agent_id: agent_spec.build_agent() - for agent_id, agent_spec in agent_specs.items() - } + agents = {agent_id: LaneAgent() for agent_id in agent_interfaces.keys()} # Create parallel environments env = ParallelEnv( @@ -103,27 +98,37 @@ def parallel_env_async( num_steps (int): Number of steps to step the environment. """ - batched_dones = [{"__all__": False} for _ in range(num_env)] - batched_observations = env.reset() + batched_terminateds = [{"__all__": False} for _ in range(num_env)] + batched_truncateds = [{"__all__": False} for _ in range(num_env)] + batched_observations, _ = env.reset() for _ in range(num_steps): # Compute actions for all active(i.e., not done) agents batched_actions = [] - for observations, dones in zip(batched_observations, batched_dones): + for observations, terminateds, truncateds in zip( + batched_observations, batched_terminateds, batched_truncateds + ): actions = { agent_id: agents[agent_id].act(agent_obs) for agent_id, agent_obs in observations.items() - if not dones.get(agent_id, False) - or dones[ + if ( + not terminateds.get(agent_id, False) + and not truncateds.get(agent_id, False) + ) + or terminateds[ "__all__" - ] # `dones[__all__]==True` implies the env was auto-reset in previous iteration + ] # `terminateds[__all__]==True` implies the env was auto-reset in previous iteration } batched_actions.append(actions) # Step all environments in parallel - batched_observations, batched_rewards, batched_dones, batched_infos = env.step( - batched_actions - ) + ( + batched_observations, + batched_rewards, + batched_terminateds, + batched_truncateds, + batched_infos, + ) = env.step(batched_actions) env.close() @@ -144,18 +149,22 @@ def parallel_env_sync( """ for _ in range(num_episodes): - batched_dones = [{"__all__": False} for _ in range(num_env)] - batched_observations = env.reset() + batched_terminateds = [{"__all__": False} for _ in range(num_env)] + batched_truncateds = [{"__all__": False} for _ in range(num_env)] + batched_observations, _ = env.reset() # Iterate until all environments complete an episode each. - while not all(dones["__all__"] for dones in batched_dones): + while not all(terminateds["__all__"] for terminateds in batched_terminateds): # Compute actions for all active(i.e., not done) agents batched_actions = [] - for observations, dones in zip(batched_observations, batched_dones): + for observations, terminateds, truncateds in zip( + batched_observations, batched_terminateds, batched_truncateds + ): actions = { agent_id: agents[agent_id].act(agent_obs) for agent_id, agent_obs in observations.items() - if not dones.get(agent_id, False) + if not terminateds.get(agent_id, False) + and not truncateds.get(agent_id, False) } batched_actions.append(actions) @@ -163,7 +172,8 @@ def parallel_env_sync( ( batched_observations, batched_rewards, - batched_dones, + batched_terminateds, + batched_truncateds, batched_infos, ) = env.step(batched_actions) diff --git a/examples/control/trajectory_tracking.py b/examples/control/trajectory_tracking.py index fd64a43061..1d3df6de6e 100644 --- a/examples/control/trajectory_tracking.py +++ b/examples/control/trajectory_tracking.py @@ -1,15 +1,16 @@ import sys from pathlib import Path -import gym +import gymnasium as gym sys.path.insert(0, str(Path(__file__).parents[2].absolute())) from examples.tools.argument_parser import default_argument_parser from smarts.core.agent import Agent from smarts.core.agent_interface import AgentInterface, AgentType from smarts.core.utils.episodes import episodes +from smarts.env.utils.action_conversion import ActionOptions +from smarts.env.utils.observation_conversion import ObservationOptions from smarts.sstudio.scenario_construction import build_scenarios -from smarts.zoo.agent_spec import AgentSpec AGENT_ID = "Agent-007" @@ -39,32 +40,34 @@ def act(self, obs): def main(scenarios, headless, num_episodes, max_episode_steps=None): - agent_spec = AgentSpec( - interface=AgentInterface.from_type( + agent_interfaces = { + AGENT_ID: AgentInterface.from_type( AgentType.Tracker, max_episode_steps=max_episode_steps ), - agent_builder=TrackingAgent, - ) + } env = gym.make( - "smarts.env:hiway-v0", + "smarts.env:hiway-v1", scenarios=scenarios, - agent_specs={AGENT_ID: agent_spec}, + agent_interfaces=agent_interfaces, headless=headless, - sumo_headless=True, + observation_options=ObservationOptions.unformatted, + action_options=ActionOptions.unformatted, ) for episode in episodes(n=num_episodes): - agent = agent_spec.build_agent() - observations = env.reset() + agent = TrackingAgent() + observations, _ = env.reset() episode.record_scenario(env.scenario_log) - dones = {"__all__": False} - while not dones["__all__"]: + terminateds = {"__all__": False} + while not terminateds["__all__"]: agent_obs = observations[AGENT_ID] agent_action = agent.act(agent_obs) - observations, rewards, dones, infos = env.step({AGENT_ID: agent_action}) - episode.record_step(observations, rewards, dones, infos) + observations, rewards, terminateds, truncateds, infos = env.step( + {AGENT_ID: agent_action} + ) + episode.record_step(observations, rewards, terminateds, truncateds, infos) env.close() diff --git a/examples/egoless.py b/examples/egoless.py index bc0b49fa4f..f5e07c4415 100644 --- a/examples/egoless.py +++ b/examples/egoless.py @@ -1,6 +1,6 @@ from pathlib import Path -import gym +import gymnasium as gym from tools.argument_parser import default_argument_parser from smarts.core.utils.episodes import episodes @@ -9,11 +9,10 @@ def main(scenarios, headless, num_episodes, max_episode_steps=None): env = gym.make( - "smarts.env:hiway-v0", + "smarts.env:hiway-v1", scenarios=scenarios, - agent_specs={}, + agent_interfaces={}, headless=headless, - sumo_headless=True, ) if max_episode_steps is None: @@ -25,7 +24,7 @@ def main(scenarios, headless, num_episodes, max_episode_steps=None): for _ in range(max_episode_steps): env.step({}) - episode.record_step({}, {}, {}, {}) + episode.record_step({}, {}, {}, {}, {}) env.close() diff --git a/examples/env/figure_eight_env.py b/examples/env/figure_eight_env.py index 8cc46ac91a..e758677d88 100644 --- a/examples/env/figure_eight_env.py +++ b/examples/env/figure_eight_env.py @@ -1,43 +1,35 @@ from pathlib import Path -import gym +import gymnasium as gym from smarts.core.agent_interface import AgentInterface, AgentType -from smarts.env.wrappers.single_agent import SingleAgent -from smarts.zoo.agent_spec import AgentSpec - -agent_spec = AgentSpec( - interface=AgentInterface.from_type( - AgentType.Laner, - max_episode_steps=150, - top_down_rgb=True, - occupancy_grid_map=True, - drivable_area_grid_map=True, - ), - agent_builder=None, +from smarts.env.gymnasium.wrappers.single_agent import SingleAgent + +agent_interface = AgentInterface.from_type( + AgentType.Laner, + max_episode_steps=150, + top_down_rgb=True, + occupancy_grid_map=True, + drivable_area_grid_map=True, ) def entry_point(*args, **kwargs): - from smarts.env.hiway_env import HiWayEnv - scenario = str( (Path(__file__).parent / "../../scenarios/sumo/figure_eight").resolve() ) - ## Note: can build the scenario here + # Note: can build the scenario here from smarts.sstudio.scenario_construction import build_scenario build_scenario(scenario=scenario, clean=True) - hiwayenv = HiWayEnv( - agent_specs={"agent-007": agent_spec}, + env = gym.make( + "smarts.env:hiway-v1", + agent_interfaces={"agent-007": agent_interface}, scenarios=[scenario], headless=True, - sumo_headless=True, ) - hiwayenv.metadata["render.modes"] = set(hiwayenv.metadata["render.modes"]) | { - "rgb_array" - } - return SingleAgent(hiwayenv) + env.metadata["render.modes"] = set(env.metadata["render.modes"]) | {"rgb_array"} + return SingleAgent(env) gym.register("figure_eight-v0", entry_point=entry_point) diff --git a/examples/replay/replay_klws_agent.py b/examples/replay/replay_klws_agent.py index bfa3e60462..b94c92c459 100644 --- a/examples/replay/replay_klws_agent.py +++ b/examples/replay/replay_klws_agent.py @@ -5,12 +5,13 @@ import sys from pathlib import Path -import gym +import gymnasium as gym sys.path.insert(0, str(Path(__file__).parents[2].absolute())) from tools.argument_parser import default_argument_parser from smarts.core.utils.episodes import episodes +from smarts.env.utils.observation_conversion import ObservationOptions from smarts.zoo.registry import make as zoo_make logging.basicConfig(level=logging.INFO) @@ -72,38 +73,40 @@ def main(scenarios, sim_name, headless, seed, speed, max_steps, save_dir, write) copy_scenarios(save_dir, scenarios) env = gym.make( - "smarts.env:hiway-v0", + "smarts.env:hiway-v1", scenarios=scenarios, - agent_specs={AGENT_ID: agent_spec}, + agent_interfaces={AGENT_ID: agent_spec.interface}, sim_name=sim_name, headless=headless, timestep_sec=0.1, - sumo_headless=True, seed=seed, + observation_options=ObservationOptions.unformatted, ) # Carry out the experiment episode = next(episodes(n=1)) agent = agent_spec.build_agent() - observations = env.reset() + observations, _ = env.reset() - dones = {"__all__": False} + terminateds = {"__all__": False} MAX_STEPS = 2550 i = 0 try: - while not dones["__all__"] and i < max_steps: + while not terminateds["__all__"] and i < max_steps: agent_obs = observations[AGENT_ID] agent_action = agent.act(agent_obs) - observations, rewards, dones, infos = env.step({AGENT_ID: agent_action}) + observations, rewards, terminateds, truncateds, infos = env.step( + {AGENT_ID: agent_action} + ) i += 1 if i % 10 == 0: print("Step: ", i) - episode.record_step(observations, rewards, dones, infos) + episode.record_step(observations, rewards, terminateds, truncateds, infos) except KeyboardInterrupt: # discard result i = MAX_STEPS finally: - if dones["__all__"]: + if terminateds["__all__"]: i = MAX_STEPS try: episode.record_scenario(env.scenario_log) diff --git a/examples/rl/drive/train/env.py b/examples/rl/drive/train/env.py index 2730d0f3a8..6f1668b56f 100644 --- a/examples/rl/drive/train/env.py +++ b/examples/rl/drive/train/env.py @@ -13,7 +13,7 @@ def make_env(env_id, scenario, agent_interface, config, seed): from train.reward import Reward from smarts.env.gymnasium.wrappers.api_reversion import Api021Reversion - from smarts.env.wrappers.single_agent import SingleAgent + from smarts.env.gymnasium.wrappers.single_agent import SingleAgent env = gym.make( env_id, @@ -24,8 +24,8 @@ def make_env(env_id, scenario, agent_interface, config, seed): headless=not config.head, # If False, enables Envision display. ) env = Reward(env) - env = Api021Reversion(env) env = SingleAgent(env) + env = Api021Reversion(env) env = Preprocess(env, agent_interface) env = Monitor(env) diff --git a/examples/rl/platoon/train/env.py b/examples/rl/platoon/train/env.py index 2730d0f3a8..6f1668b56f 100644 --- a/examples/rl/platoon/train/env.py +++ b/examples/rl/platoon/train/env.py @@ -13,7 +13,7 @@ def make_env(env_id, scenario, agent_interface, config, seed): from train.reward import Reward from smarts.env.gymnasium.wrappers.api_reversion import Api021Reversion - from smarts.env.wrappers.single_agent import SingleAgent + from smarts.env.gymnasium.wrappers.single_agent import SingleAgent env = gym.make( env_id, @@ -24,8 +24,8 @@ def make_env(env_id, scenario, agent_interface, config, seed): headless=not config.head, # If False, enables Envision display. ) env = Reward(env) - env = Api021Reversion(env) env = SingleAgent(env) + env = Api021Reversion(env) env = Preprocess(env, agent_interface) env = Monitor(env) diff --git a/examples/tools/regression_rllib.py b/examples/tools/regression_rllib.py index 95cf6a46fb..2986c0db7d 100644 --- a/examples/tools/regression_rllib.py +++ b/examples/tools/regression_rllib.py @@ -5,7 +5,7 @@ import tempfile from pathlib import Path -import gym +import gymnasium as gym import numpy as np import pandas as pd diff --git a/smarts/core/agent.py b/smarts/core/agent.py index 1331b4e417..76e266fca2 100644 --- a/smarts/core/agent.py +++ b/smarts/core/agent.py @@ -21,8 +21,6 @@ import warnings from typing import Any, Callable -from smarts.core.observations import Observation - warnings.simplefilter("once") logger = logging.getLogger(__name__) @@ -49,7 +47,7 @@ def act(self, obs): return FunctionAgent() - def act(self, obs: Observation, **configs): + def act(self, obs, **configs): """The agent action. See documentation on observations, `AgentSpec`, and `AgentInterface`. Expects an adapted observation and returns an unadapted action. diff --git a/smarts/core/tests/test_env_frame_rate.py b/smarts/core/tests/test_env_frame_rate.py index 9f8212fedd..1e0460bf2a 100644 --- a/smarts/core/tests/test_env_frame_rate.py +++ b/smarts/core/tests/test_env_frame_rate.py @@ -23,7 +23,7 @@ import logging import time -import gym +import gymnasium as gym import pytest from smarts.core.agent import Agent @@ -61,13 +61,12 @@ def env_and_spec(scenarios, seed, headless=True, max_episode_steps=None): ) env = gym.make( - "smarts.env:hiway-v0", + "smarts.env:hiway-v1", scenarios=scenarios, - agent_specs={AGENT_ID: agent_spec}, + agent_interfaces={AGENT_ID: agent_spec.interface}, sim_name=None, headless=headless, timestep_sec=0.1, - sumo_headless=True, seed=seed, ) return env, agent_spec @@ -79,21 +78,23 @@ def test_env_frame_test(scenarios, seed): for episode in episodes(n=10): episode_counter += 1 agent = agent_spec.build_agent() - observations = env.reset() + observations, _ = env.reset() episode.record_scenario(env.scenario_log) - dones = {"__all__": False} + terminateds = {"__all__": False} maximum_frame_rate = 0 minimum_frame_rate = float("inf") step_counter = 0 fps_sum = 0 - while not dones["__all__"]: + while not terminateds["__all__"]: agent_obs = observations[AGENT_ID] agent_action = agent.act(agent_obs) step_start_time = int(time.time() * 1000) - observations, rewards, dones, infos = env.step({AGENT_ID: agent_action}) + observations, rewards, terminateds, truncateds, infos = env.step( + {AGENT_ID: agent_action} + ) step_end_time = int(time.time() * 1000) delta = step_end_time - step_start_time step_fps = round(1000 / delta, 2) @@ -104,7 +105,7 @@ def test_env_frame_test(scenarios, seed): f"The time delta at episode {episode_counter}, step {step_counter+1} is {delta} milliseconds which is {step_fps} fps." ) - episode.record_step(observations, rewards, dones, infos) + episode.record_step(observations, rewards, terminateds, truncateds, infos) step_counter += 1 avg_frame_rate = fps_sum / (step_counter or 1) test_logger.info( diff --git a/smarts/core/tests/test_notebook.py b/smarts/core/tests/test_notebook.py index 9f70d3f25c..ec755ad154 100644 --- a/smarts/core/tests/test_notebook.py +++ b/smarts/core/tests/test_notebook.py @@ -23,7 +23,7 @@ import os import tempfile -import gym +import gymnasium as gym import importlib_resources import pytest import pytest_notebook.nb_regression as nb @@ -61,27 +61,28 @@ def run_scenario( ) env = gym.make( - "smarts.env:hiway-v0", + "smarts.env:hiway-v1", scenarios=scenarios, - agent_specs={AGENT_ID: agent_spec}, + agent_specs={AGENT_ID: agent_spec.interface}, sim_name=sim_name, headless=headless, fixed_timestep_sec=0.1, - sumo_headless=True, seed=seed, ) for episode in episodes(n=num_episodes): agent = agent_spec.build_agent() - observations = env.reset() + observations, _ = env.reset() episode.record_scenario(env.scenario_log) - dones = {"__all__": False} - while not dones["__all__"]: + terminateds = {"__all__": False} + while not terminateds["__all__"]: agent_obs = observations[AGENT_ID] agent_action = agent.act(agent_obs) - observations, rewards, dones, infos = env.step({AGENT_ID: agent_action}) - episode.record_step(observations, rewards, dones, infos) + observations, rewards, terminateds, truncateds, infos = env.step( + {AGENT_ID: agent_action} + ) + episode.record_step(observations, rewards, terminateds, truncateds, infos) env.close() diff --git a/smarts/core/tests/test_observations.py b/smarts/core/tests/test_observations.py index b102f75d92..f1b71e8fac 100644 --- a/smarts/core/tests/test_observations.py +++ b/smarts/core/tests/test_observations.py @@ -23,7 +23,7 @@ import math from typing import Dict -import gym +import gymnasium as gym import numpy as np import pytest from panda3d.core import OrthographicLens, Point2, Point3 @@ -53,6 +53,7 @@ from smarts.core.signals import SignalLightState from smarts.core.smarts import SMARTS from smarts.core.sumo_traffic_simulation import SumoTrafficSimulation +from smarts.env.utils.observation_conversion import ObservationOptions from smarts.zoo.agent_spec import AgentSpec logging.basicConfig(level=logging.INFO) @@ -93,19 +94,20 @@ def agent_interface(): def agent_spec(agent_interface): return AgentSpec( interface=agent_interface, - agent_builder=lambda: Agent.from_function(lambda _: "keep_lane"), + agent_builder=lambda: Agent.from_function(lambda _: 0), ) @pytest.fixture def env(agent_spec: AgentSpec): _env = gym.make( - "smarts.env:hiway-v0", + "smarts.env:hiway-v1", scenarios=["scenarios/sumo/figure_eight"], agent_interfaces={AGENT_ID: agent_spec.interface}, headless=True, fixed_timestep_sec=0.1, seed=42, + observation_options=ObservationOptions.unformatted, ) yield _env @@ -184,13 +186,13 @@ def sample_vehicle_pos( def test_observations(env, agent_spec): agent = agent_spec.build_agent() - observations: Dict[str, Observation] = env.reset() + observations: Dict[str, Observation] = env.reset()[0] # Let the agent step for a while for _ in range(NUM_STEPS): agent_obs = observations[AGENT_ID] agent_action = agent.act(agent_obs) - observations, _, _, _ = env.step({AGENT_ID: agent_action}) + observations, _, _, _, _ = env.step({AGENT_ID: agent_action}) # RGB rgb = observations[AGENT_ID].top_down_rgb diff --git a/smarts/core/tests/test_smarts_memory_growth.py b/smarts/core/tests/test_smarts_memory_growth.py index 219ba8de4b..c16ea09886 100644 --- a/smarts/core/tests/test_smarts_memory_growth.py +++ b/smarts/core/tests/test_smarts_memory_growth.py @@ -22,7 +22,7 @@ import gc import logging -import gym +import gymnasium as gym import pytest from pympler import muppy, summary, tracker @@ -98,12 +98,11 @@ def act(self, obs): agent_builder=Policy, ) env = gym.make( - "smarts.env:hiway-v0", + "smarts.env:hiway-v1", scenarios=scenarios, - agent_specs={agent_id: agent_spec}, + agent_interfaces={agent_id: agent_spec.interface}, headless=True, fixed_timestep_sec=TIMESTEP_SEC, - sumo_headless=True, seed=seed, ) @@ -125,17 +124,24 @@ def _every_nth_episode(agent_id, episode_count, env_and_spec, steps_per_yield): for episode_index in range(episode_count): agent = agent_spec.build_agent() - observations = env.reset() + observations, _ = env.reset() - dones = {"__all__": False} - while not dones["__all__"]: + terminateds = {"__all__": False} + while not terminateds["__all__"]: agent_obs = observations[agent_id] agent_action = agent.act(agent_obs) - observations, rewards, dones, infos = env.step({agent_id: agent_action}) - # episode.record_step(observations, rewards, dones, infos) + observations, rewards, terminateds, truncateds, infos = env.step( + {agent_id: agent_action} + ) agent_obs = None agent_action = None - observations, rewards, dones, infos = None, None, None, None + observations, rewards, terminateds, truncateds, infos = ( + None, + None, + None, + None, + None, + ) if episode_index % steps_per_yield == 0: yield episode_index @@ -149,13 +155,15 @@ def _memory_buildup( for _ in range(episode_count): agent = agent_spec.build_agent() - observations = env.reset() + observations, _ = env.reset() - dones = {"__all__": False} - while not dones["__all__"]: + terminateds = {"__all__": False} + while not terminateds["__all__"]: agent_obs = observations[agent_id] agent_action = agent.act(agent_obs) - observations, _, dones, _ = env.step({agent_id: agent_action}) + observations, _, terminateds, truncateds, _ = env.step( + {agent_id: agent_action} + ) env.close() diff --git a/smarts/core/utils/episodes.py b/smarts/core/utils/episodes.py index 338a1f9960..f4e3bd748c 100644 --- a/smarts/core/utils/episodes.py +++ b/smarts/core/utils/episodes.py @@ -141,25 +141,32 @@ def record_scenario(self, scenario_log): ) self.mission_hash = scenario_log["mission_hash"] - def record_step(self, observations=None, rewards=None, dones=None, infos=None): + def record_step(self, observations, rewards, terminateds, truncateds, infos): """Record a step end.""" self.steps += 1 - if not isinstance(dones, dict): - observations, rewards, dones, infos = self._convert_to_dict( - observations, rewards, dones, infos + if not isinstance(terminateds, dict): + ( + observations, + rewards, + terminateds, + truncateds, + infos, + ) = self._convert_to_dict( + observations, rewards, terminateds, truncateds, infos ) - if dones.get("__all__", False) and infos is not None: + if terminateds.get("__all__", False) and infos is not None: for agent, score in infos.items(): self.scores[agent] = score["score"] - def _convert_to_dict(self, observations, rewards, dones, infos): + def _convert_to_dict(self, observations, rewards, terminateds, truncateds, infos): observations, rewards, infos = [ {"SingleAgent": obj} for obj in [observations, rewards, infos] ] - dones = {"SingleAgent": dones, "__all__": dones} - return observations, rewards, dones, infos + terminateds = {"SingleAgent": terminateds, "__all__": terminateds} + truncateds = {"SingleAgent": truncateds, "__all__": truncateds} + return observations, rewards, terminateds, truncateds, infos def episodes(n): diff --git a/smarts/diagnostic/run.py b/smarts/diagnostic/run.py index 720ff529a7..99b1cca722 100644 --- a/smarts/diagnostic/run.py +++ b/smarts/diagnostic/run.py @@ -28,7 +28,7 @@ from typing import Any, Callable, Dict, Sequence import cpuinfo -import gym +import gymnasium as gym import matplotlib.pyplot as plt import psutil from mdutils.mdutils import MdUtils @@ -36,6 +36,7 @@ import smarts from smarts.core.scenario import Scenario from smarts.core.utils.math import welford +from smarts.env.gymnasium.hiway_env_v1 import ScenarioOrder from smarts.sstudio.scenario_construction import build_scenarios _SEED = 42 @@ -49,13 +50,12 @@ def _compute(scenario_dir, ep_per_scenario=10, max_episode_steps=_MAX_EPISODE_STEPS): build_scenarios(scenarios=scenario_dir, seed=_SEED) env = gym.make( - "smarts.env:hiway-v0", + "smarts.env:hiway-v1", scenarios=scenario_dir, - shuffle_scenarios=False, + scenarios_order=ScenarioOrder.Sequential, sim_name="Diagnostic", - agent_specs={}, + agent_interfaces={}, headless=True, - sumo_headless=True, seed=_SEED, ) scenarios = Scenario.get_scenario_list(scenario_dir) diff --git a/smarts/env/custom_observations.py b/smarts/env/custom_observations.py index 1e3823e455..a60e7da3fb 100644 --- a/smarts/env/custom_observations.py +++ b/smarts/env/custom_observations.py @@ -20,7 +20,7 @@ from dataclasses import dataclass from typing import Callable, Dict -import gym +import gymnasium as gym import numpy as np from smarts.core.coordinates import Heading diff --git a/smarts/env/gymnasium/hiway_env_v1.py b/smarts/env/gymnasium/hiway_env_v1.py index 0e9df8ac6e..800b2d9326 100644 --- a/smarts/env/gymnasium/hiway_env_v1.py +++ b/smarts/env/gymnasium/hiway_env_v1.py @@ -21,7 +21,6 @@ # THE SOFTWARE. import logging import os -import warnings from enum import IntEnum from functools import partial from pathlib import Path @@ -47,6 +46,7 @@ from envision import types as envision_types from envision.client import Client as Envision from envision.data_formatter import EnvisionDataFormatterArgs +from smarts.core import current_seed from smarts.core import seed as smarts_seed from smarts.core.agent_interface import AgentInterface from smarts.core.local_traffic_provider import LocalTrafficProvider @@ -269,12 +269,13 @@ def step( info = { agent_id: { - "score": value, + "score": agent_score, "env_obs": observations[agent_id], "done": dones[agent_id], "reward": rewards[agent_id], + "map_source": self._smarts.scenario.road_map.source, } - for agent_id, value in extras["scores"].items() + for agent_id, agent_score in extras["scores"].items() } if self._env_renderer is not None: @@ -348,7 +349,16 @@ def reset( observations = self._smarts.reset( scenario, start_time=options.get("start_time", 0) ) - info = {"map_source": self._smarts.scenario.road_map.source} + info = { + agent_id: { + "score": 0, + "env_obs": agent_obs, + "done": False, + "reward": 0, + "map_source": self._smarts.scenario.road_map.source, + } + for agent_id, agent_obs in observations.items() + } if self._env_renderer is not None: self._env_renderer.reset(observations) @@ -494,3 +504,12 @@ def smarts(self): smarts.core.smarts.SMARTS: The smarts simulator instance. """ return self._smarts + + @property + def seed(self): + """Returns the environment seed. + + Returns: + int: Environment seed. + """ + return current_seed() diff --git a/smarts/env/wrappers/episode_logger.py b/smarts/env/gymnasium/wrappers/episode_logger.py similarity index 92% rename from smarts/env/wrappers/episode_logger.py rename to smarts/env/gymnasium/wrappers/episode_logger.py index bfbc6ebd5a..041014c735 100644 --- a/smarts/env/wrappers/episode_logger.py +++ b/smarts/env/gymnasium/wrappers/episode_logger.py @@ -19,12 +19,11 @@ # THE SOFTWARE. from typing import Any, Dict, Iterator, Tuple -import gym +import gymnasium as gym from smarts.core.utils.episodes import EpisodeLog, EpisodeLogs Action = Any -Operation = Any class EpisodeLogger(gym.Wrapper): @@ -36,20 +35,19 @@ def __init__(self, env: gym.Env, col_width: int = 18): self._closed = False self._log_iter = self._episode_logs(col_width) - def step(self, action: Action) -> Tuple[Operation, float, bool, Dict[str, Any]]: + def step(self, action: Action): """Mark a step for logging.""" - step_vals = super().step(action) self._current_episode.record_step(*step_vals) return step_vals - def reset(self) -> Any: + def reset(self) -> Tuple[Any, Dict[str, Any]]: """Mark an episode reset for logging.""" - obs = super().reset() + out = super().reset() self._current_episode: EpisodeLog = next(self._log_iter) self._current_episode.record_scenario(self.scenario_log) - return obs + return out def close(self): """Cap off the episode logging.""" diff --git a/smarts/env/wrappers/parallel_env.py b/smarts/env/gymnasium/wrappers/parallel_env.py similarity index 85% rename from smarts/env/wrappers/parallel_env.py rename to smarts/env/gymnasium/wrappers/parallel_env.py index ae5e3c6446..8313ec80fb 100644 --- a/smarts/env/wrappers/parallel_env.py +++ b/smarts/env/gymnasium/wrappers/parallel_env.py @@ -28,12 +28,12 @@ from typing import Any, Callable, Dict, Sequence, Tuple import cloudpickle -import gym +import gymnasium as gym __all__ = ["ParallelEnv"] -EnvConstructor = Callable[[], gym.Env] +EnvConstructor = Callable[[int], gym.Env] class _Message(Enum): @@ -86,7 +86,7 @@ def __init__( if any([not callable(ctor) for ctor in env_constructors]): raise TypeError( f"Found non-callable `env_constructors`. Expected `env_constructors` of type " - f"`Sequence[Callable[[], gym.Env]]`, but got {env_constructors})." + f"`Sequence[Callable[[int], gym.Env]]`, but got {env_constructors})." ) self._num_envs = len(env_constructors) @@ -101,12 +101,14 @@ def __init__( self._parent_pipes = [] self._processes = [] for idx, env_constructor in enumerate(env_constructors): + cur_seed = seed + idx parent_pipe, child_pipe = mp_ctx.Pipe() process = mp_ctx.Process( target=_worker, name=f"Worker-<{type(self).__name__}>-<{idx}>", args=( cloudpickle.dumps(env_constructor), + cur_seed, auto_reset, child_pipe, self._polling_period, @@ -122,7 +124,6 @@ def __init__( child_pipe.close() self._wait_start() - self.seed(seed) self._single_observation_space, self._single_action_space = self._get_spaces() @property @@ -187,29 +188,25 @@ def _get_spaces(self) -> Tuple[gym.Space, gym.Space]: return observation_space, action_space - def seed(self, seed: int) -> Sequence[int]: - """Sets unique seed for each environment. - - Args: - seed (int): Seed number. + def seed(self) -> Sequence[int]: + """Retrieves the seed used in each environment. Returns: Sequence[int]: Seed of each environment. """ - seeds = [seed + i for i in range(self._num_envs)] - - seeds = self._call(_Message.SEED, seeds) + seeds = self._call(_Message.SEED, [None] * self._num_envs) return seeds - def reset(self) -> Sequence[Dict[str, Any]]: + def reset(self) -> Tuple[Sequence[Dict[str, Any]], Sequence[Dict[str, Any]]]: """Reset all environments. Returns: - Sequence[Dict[str, Any]]: A batch of observations from the vectorized environment. + Tuple[Sequence[Dict[str, Any]], Sequence[Dict[str, Any]]]: A batch of + observations and infos from the vectorized environment. """ - observations = self._call(_Message.RESET, [None] * self._num_envs) - return observations + observations, infos = self._call(_Message.RESET, [None] * self._num_envs) + return observations, infos def step( self, actions: Sequence[Dict[str, Any]] @@ -217,6 +214,7 @@ def step( Sequence[Dict[str, Any]], Sequence[Dict[str, float]], Sequence[Dict[str, bool]], + Sequence[Dict[str, bool]], Sequence[Dict[str, Any]], ]: """Steps all environments. @@ -225,12 +223,12 @@ def step( actions (Sequence[Dict[str,Any]]): Actions for each environment. Returns: - Tuple[ Sequence[Dict[str, Any]], Sequence[Dict[str, float]], Sequence[Dict[str, bool]], Sequence[Dict[str, Any]] ]: - A batch of (observations, rewards, dones, infos) from the vectorized environment. + Tuple[ Sequence[Dict[str, Any]], Sequence[Dict[str, float]], Sequence[Dict[str, bool]], Sequence[Dict[str, bool]], Sequence[Dict[str, Any]] ]: + A batch of (observations, rewards, terminateds, truncateds, infos) from the vectorized environment. """ result = self._call(_Message.STEP, actions) - observations, rewards, dones, infos = zip(*result) - return (observations, rewards, dones, infos) + observations, rewards, terminateds, truncateds, infos = zip(*result) + return (observations, rewards, terminateds, truncateds, infos) def close(self, terminate=False): """Sends a close message to all external processes. @@ -266,6 +264,7 @@ def __del__(self): def _worker( env_constructor: bytes, + seed: int, auto_reset: bool, pipe: mp.connection.Connection, polling_period: float = 0.1, @@ -276,6 +275,7 @@ def _worker( Args: env_constructor (bytes): Cloudpickled callable which constructs the environment. + seed (int): Seed for the environment. auto_reset (bool): If True, auto resets environment when episode ends. pipe (mp.connection.Connection): Child's end of the pipe. polling_period (float, optional): Time to wait for keyboard interrupts. Defaults to 0.1. @@ -283,7 +283,7 @@ def _worker( Raises: KeyError: If unknown message type is received. """ - env = cloudpickle.loads(env_constructor)() + env = cloudpickle.loads(env_constructor)(seed=seed) pipe.send((_Message.RESULT, None)) try: @@ -292,21 +292,26 @@ def _worker( continue message, payload = pipe.recv() if message == _Message.SEED: - env_seed = env.seed(payload) + env_seed = env.seed pipe.send((_Message.RESULT, env_seed)) elif message == _Message.ACCESS: result = getattr(env, payload, None) pipe.send((_Message.RESULT, result)) elif message == _Message.RESET: - observation = env.reset() - pipe.send((_Message.RESULT, observation)) + observation, info = env.reset() + pipe.send((_Message.RESULT, (observation, info))) elif message == _Message.STEP: - observation, reward, done, info = env.step(payload) - if done["__all__"] and auto_reset: + observation, reward, terminated, truncated, info = env.step(payload) + if terminated["__all__"] and auto_reset: # Final observation can be obtained from `info` as follows: # `final_obs = info[agent_id]["env_obs"]` - observation = env.reset() - pipe.send((_Message.RESULT, (observation, reward, done, info))) + observation, _ = env.reset() + pipe.send( + ( + _Message.RESULT, + (observation, reward, terminated, truncated, info), + ) + ) elif message == _Message.CLOSE: break else: diff --git a/smarts/env/wrappers/single_agent.py b/smarts/env/gymnasium/wrappers/single_agent.py similarity index 79% rename from smarts/env/wrappers/single_agent.py rename to smarts/env/gymnasium/wrappers/single_agent.py index caa5021477..37f879dc0e 100644 --- a/smarts/env/wrappers/single_agent.py +++ b/smarts/env/gymnasium/wrappers/single_agent.py @@ -22,7 +22,7 @@ from typing import Any, Tuple -import gym +import gymnasium as gym class SingleAgent(gym.Wrapper): @@ -48,28 +48,32 @@ def __init__(self, env: gym.Env): if self.action_space: self.action_space = self.action_space[self._agent_id] - def step(self, action: Any) -> Tuple[Any, float, bool, Any]: + def step(self, action: Any) -> Tuple[Any, float, bool, bool, Any]: """Steps a single-agent SMARTS environment. Args: action (Any): Agent's action Returns: - Tuple[Any, float, bool, Any]: Agent's observation, reward, done, and info + Tuple[Any, float, bool, bool, Any]: Agent's observation, reward, + terminated, truncated, and info """ - obs, reward, done, info = self.env.step({self._agent_id: action}) + obs, reward, terminated, truncated, info = self.env.step( + {self._agent_id: action} + ) return ( obs[self._agent_id], reward[self._agent_id], - done[self._agent_id], + terminated[self._agent_id], + truncated[self._agent_id], info[self._agent_id], ) - def reset(self) -> Any: + def reset(self) -> Tuple[Any, Any]: """Resets a single-agent SMARTS environment. Returns: - Any: Agent's observation + Tuple[Any, Any]: Agent's observation and info """ - obs = self.env.reset() - return obs[self._agent_id] + obs, info = self.env.reset() + return obs[self._agent_id], info[self._agent_id] diff --git a/smarts/env/tests/test_benchmark.py b/smarts/env/tests/test_benchmark.py index 7dbdf77308..34ca8aeab6 100644 --- a/smarts/env/tests/test_benchmark.py +++ b/smarts/env/tests/test_benchmark.py @@ -19,12 +19,11 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. -import gym +import gymnasium as gym import pytest from smarts.core.agent_interface import AgentInterface, AgentType from smarts.core.controllers import ActionSpaceType -from smarts.zoo.agent_spec import AgentSpec @pytest.fixture(params=[5, 10]) @@ -43,19 +42,19 @@ def agent_interface(request): @pytest.fixture -def agent_specs(agent_ids, agent_interface): - return {id_: AgentSpec(interface=agent_interface) for id_ in agent_ids} +def agent_interfaces(agent_ids, agent_interface): + return {id_: agent_interface for id_ in agent_ids} @pytest.fixture def env(agent_specs): env = gym.make( - "smarts.env:hiway-v0", + "smarts.env:hiway-v1", scenarios=["scenarios/sumo/loop"], - agent_specs=agent_specs, + agent_interfaces=agent_interfaces, headless=True, seed=2008, - fixed_timestep_sec=0.01, + fixed_timestep_sec=0.1, ) env.reset() yield env diff --git a/smarts/env/tests/test_determinism.py b/smarts/env/tests/test_determinism.py index 63abb8d50a..9424149bf8 100644 --- a/smarts/env/tests/test_determinism.py +++ b/smarts/env/tests/test_determinism.py @@ -19,9 +19,7 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. -import gym -import numpy as np -import pytest +import gymnasium as gym # Reference: https://stackoverflow.com/a/53978543/2783780 try: @@ -50,37 +48,41 @@ def agent_spec(max_steps_per_episode): neighborhood_vehicle_states=True, action=ActionSpaceType.Lane, ), - agent_builder=lambda: Agent.from_function(lambda _: "keep_lane"), + agent_builder=lambda: Agent.from_function(lambda _: 0), ) def run(agent_spec, callback, scenarios, episode_count, capture_step): AGENT_ID = "Agent-007" env = gym.make( - "smarts.env:hiway-v0", + "smarts.env:hiway-v1", scenarios=[scenarios], - agent_specs={AGENT_ID: agent_spec}, + agent_interfaces={AGENT_ID: agent_spec.interface}, headless=True, - fixed_timestep_sec=0.01, + fixed_timestep_sec=0.1, seed=42, ) i = 0 for episode in episodes(n=episode_count): agent = agent_spec.build_agent() - observations = env.reset() + observations, _ = env.reset() episode.record_scenario(env.scenario_log) - dones = {"__all__": False} - while not dones["__all__"]: + terminateds = {"__all__": False} + while not terminateds["__all__"]: agent_obs = observations[AGENT_ID] agent_action = agent.act(agent_obs) - observations, rewards, dones, infos = env.step({AGENT_ID: agent_action}) + observations, rewards, terminateds, truncateds, infos = env.step( + {AGENT_ID: agent_action} + ) - episode.record_step(observations, rewards, dones, infos) + episode.record_step(observations, rewards, terminateds, truncateds, infos) if i % capture_step == 0: - callback(rewards, agent_obs, dones, int(i / capture_step)) + callback( + rewards, agent_obs, terminateds, truncateds, int(i / capture_step) + ) i += 1 env.close() @@ -97,23 +99,28 @@ def vehicle_state_check(vs_now, vs_prev): def determinism(agent_spec, scenarios, episode_count, capture_step): rewards_capture = [] - dones_capture = [] + terminateds_capture = [] + truncateds_capture = [] observations_capture = [] - def capture_callback(rewards, agent_obs, dones, index): + def capture_callback(rewards, agent_obs, terminateds, truncateds, index): rewards_capture.append(rewards) - dones_capture.append(dones) + terminateds_capture.append(terminateds) + truncateds_capture.append(truncateds) observations_capture.append(agent_obs) - def check_callback(rewards, agent_obs, dones, index): + def check_callback(rewards, agent_obs, terminateds, truncateds, index): assert len(rewards_capture) > index - 1 orig_agent_obs = observations_capture[index] assert rewards_capture[index] == rewards - assert len(dones) == len(dones_capture[index]) - assert all([ds == ds2 for (ds, ds2) in zip(dones, dones_capture[index])]) - + # fmt: off + assert len(terminateds) == len(terminateds_capture[index]) + assert len(truncateds) == len(truncateds_capture[index]) + assert all([ds == ds2 for (ds, ds2) in zip(terminateds, terminateds_capture[index])]) + assert all([ds == ds2 for (ds, ds2) in zip(truncateds, truncateds_capture[index])]) + # fmt: on assert diff_unpackable(agent_obs, orig_agent_obs) == "" run(agent_spec, capture_callback, scenarios, episode_count, capture_step) diff --git a/smarts/env/tests/test_hiway_env.py b/smarts/env/tests/test_hiway_env.py index d4631ca23a..eae430909d 100644 --- a/smarts/env/tests/test_hiway_env.py +++ b/smarts/env/tests/test_hiway_env.py @@ -85,7 +85,7 @@ def test_hiway_env(env: HiWayEnv, agent_spec: AgentSpec): [-3 < reward < 3 for reward in rewards.values()] ), f"Expected bounded reward per timestep, but got {rewards}." - episode.record_step(observations, rewards, dones, infos) + episode.record_step(observations, rewards, dones, dones, infos) assert episode is not None and episode.index == ( MAX_EPISODES - 1 diff --git a/smarts/env/tests/test_hiway_env_v1.py b/smarts/env/tests/test_hiway_env_v1.py index 4025920d87..a94017f4a6 100644 --- a/smarts/env/tests/test_hiway_env_v1.py +++ b/smarts/env/tests/test_hiway_env_v1.py @@ -76,9 +76,9 @@ def test_hiway_env_v1_unformatted(env: HiWayEnvV1): observations = env.reset() episode.record_scenario(env.scenario_log) - terminated = {"__all__": False} - while not terminated["__all__"]: - observations, rewards, terminated, truncated, infos = env.step( + terminateds = {"__all__": False} + while not terminateds["__all__"]: + observations, rewards, terminateds, truncateds, infos = env.step( {AGENT_ID: "keep_lane"} ) @@ -91,7 +91,7 @@ def test_hiway_env_v1_unformatted(env: HiWayEnvV1): [-3 < reward < 3 for reward in rewards.values()] ), f"Expected bounded reward per timestep, but got {rewards}." - episode.record_step(observations, rewards, terminated, infos) + episode.record_step(observations, rewards, terminateds, truncateds, infos) assert episode is not None and episode.index == ( MAX_EPISODES - 1 diff --git a/smarts/env/tests/test_parallel_env.py b/smarts/env/tests/test_parallel_env.py index eaaa182a9d..01db906d04 100644 --- a/smarts/env/tests/test_parallel_env.py +++ b/smarts/env/tests/test_parallel_env.py @@ -20,7 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. -import gym +import gymnasium as gym gym.logger.set_level(40) import pytest @@ -28,10 +28,12 @@ from smarts.core.agent import Agent from smarts.core.agent_interface import RGB, AgentInterface from smarts.core.controllers import ActionSpaceType -from smarts.env.hiway_env import HiWayEnv -from smarts.env.wrappers.parallel_env import ParallelEnv +from smarts.env.gymnasium.wrappers.parallel_env import ParallelEnv +from smarts.env.utils.action_conversion import ActionOptions from smarts.zoo.agent_spec import AgentSpec +DEFAULT_SEED = 42 + @pytest.fixture(scope="module") def agent_specs(): @@ -56,26 +58,32 @@ def single_env_actions(agent_specs): @pytest.fixture(scope="module") def env_constructor(agent_specs): - env_constructor = lambda: HiWayEnv( + env_constructor = lambda seed: gym.make( + "smarts.env:hiway-v1", scenarios=["scenarios/sumo/figure_eight"], - agent_specs=agent_specs, + agent_interfaces={ + agent_id: agent_spec.interface + for agent_id, agent_spec in agent_specs.items() + }, sim_name="Test_env", headless=True, + action_options=ActionOptions.unformatted, + seed=seed, ) return env_constructor def test_non_callable_env_constructors(env_constructor): env_constructed = [ - env_constructor(), - env_constructor(), + env_constructor(seed=DEFAULT_SEED), + env_constructor(seed=DEFAULT_SEED + 1), ] with pytest.raises(TypeError): env = ParallelEnv(env_constructors=env_constructed, auto_reset=True) env.close() -def _make_parallel_env(env_constructor, num_env, auto_reset=True, seed=42): +def _make_parallel_env(env_constructor, num_env, auto_reset=True, seed=DEFAULT_SEED): env_constructors = [env_constructor] * num_env return ParallelEnv( env_constructors=env_constructors, @@ -86,7 +94,7 @@ def _make_parallel_env(env_constructor, num_env, auto_reset=True, seed=42): @pytest.mark.parametrize("num_env", [2]) def test_spaces(env_constructor, num_env): - single_env = env_constructor() + single_env = env_constructor(seed=DEFAULT_SEED) env = _make_parallel_env(env_constructor, num_env) assert env.batch_size == num_env @@ -99,10 +107,10 @@ def test_spaces(env_constructor, num_env): @pytest.mark.parametrize("num_env", [2]) def test_seed(env_constructor, num_env): - env = _make_parallel_env(env_constructor, num_env) + first_seed = DEFAULT_SEED + env = _make_parallel_env(env_constructor, num_env, seed=first_seed) - first_seed = 7 - seeds = env.seed(first_seed) + seeds = env.seed() assert len(seeds) == num_env for index, seed in enumerate(seeds): assert seed == first_seed + index @@ -113,45 +121,65 @@ def test_seed(env_constructor, num_env): def _compare_outputs(num_env, batched_outputs, single_outputs): assert len(batched_outputs) == num_env for outputs in batched_outputs: - outputs.pop("__all__", None) - assert outputs.keys() == outputs.keys() + assert outputs.keys() == single_outputs.keys() for agent_id, out in outputs.items(): assert type(out) is type(single_outputs[agent_id]) @pytest.mark.parametrize("num_env", [2]) def test_reset(env_constructor, num_env): - single_env = env_constructor() - single_observations = single_env.reset() + single_env = env_constructor(seed=DEFAULT_SEED) + single_observations, single_infos = single_env.reset() single_env.close() env = _make_parallel_env(env_constructor, num_env) - batched_observations = env.reset() + batched_observations, batched_infos = env.reset() env.close() _compare_outputs(num_env, batched_observations, single_observations) + _compare_outputs(num_env, batched_infos, single_infos) @pytest.mark.parametrize("num_env", [2]) @pytest.mark.parametrize("auto_reset", [True]) def test_step(env_constructor, single_env_actions, num_env, auto_reset): - single_env = env_constructor() + single_env = env_constructor(seed=DEFAULT_SEED) single_env.reset() - single_observations, single_rewards, single_dones, single_infos = single_env.step( - single_env_actions - ) + ( + single_observations, + single_rewards, + single_terminateds, + single_truncateds, + single_infos, + ) = single_env.step(single_env_actions) single_env.close() env = _make_parallel_env(env_constructor, num_env, auto_reset=auto_reset) env.reset() - batched_observations, batched_rewards, batched_dones, batched_infos = env.step( - [single_env_actions] * num_env - ) + ( + batched_observations, + batched_rewards, + batched_terminateds, + batched_truncateds, + batched_infos, + ) = env.step([single_env_actions] * num_env) env.close() for batched_outputs, single_outputs in zip( - [batched_observations, batched_rewards, batched_dones, batched_infos], - [single_observations, single_rewards, single_dones, single_infos], + [ + batched_observations, + batched_rewards, + batched_terminateds, + batched_truncateds, + batched_infos, + ], + [ + single_observations, + single_rewards, + single_terminateds, + single_truncateds, + single_infos, + ], ): _compare_outputs(num_env, batched_outputs, single_outputs) @@ -165,19 +193,29 @@ def test_sync_async_episodes(env_constructor, single_env_actions, auto_reset): # Step 1 env.reset() # Step 2 - _, _, batched_dones, _ = env.step(batched_actions) - assert all(dones["__all__"] == False for dones in batched_dones) + _, _, batched_terminateds, _, _ = env.step(batched_actions) + assert all( + terminateds["__all__"] == False for terminateds in batched_terminateds + ) # Step 3 - _, _, batched_dones, _ = env.step(batched_actions) - assert all(dones["__all__"] == True for dones in batched_dones) + _, _, batched_terminateds, _, _ = env.step(batched_actions) + assert all( + terminateds["__all__"] == True for terminateds in batched_terminateds + ) # Step 4 - _, _, batched_dones, _ = env.step(batched_actions) + _, _, batched_terminateds, _, _ = env.step(batched_actions) if auto_reset: - assert all(dones["__all__"] == False for dones in batched_dones) + assert all( + terminateds["__all__"] == False for terminateds in batched_terminateds + ) else: - assert all(dones["__all__"] == True for dones in batched_dones) + assert all( + terminateds["__all__"] == True for terminateds in batched_terminateds + ) # Step 5 - _, _, batched_dones, _ = env.step(batched_actions) - assert all(dones["__all__"] == True for dones in batched_dones) + _, _, batched_terminateds, _, _ = env.step(batched_actions) + assert all( + terminateds["__all__"] == True for terminateds in batched_terminateds + ) finally: env.close() diff --git a/smarts/env/tests/test_rllib_hiway_env.py b/smarts/env/tests/test_rllib_hiway_env.py index 98bec77d2f..12373c6761 100644 --- a/smarts/env/tests/test_rllib_hiway_env.py +++ b/smarts/env/tests/test_rllib_hiway_env.py @@ -21,7 +21,7 @@ # THE SOFTWARE. from pathlib import Path -import gym +import gymnasium as gym import numpy as np import pytest diff --git a/smarts/env/tests/test_shutdown.py b/smarts/env/tests/test_shutdown.py index 39b27be4b3..90e8f79a19 100644 --- a/smarts/env/tests/test_shutdown.py +++ b/smarts/env/tests/test_shutdown.py @@ -21,12 +21,13 @@ # THE SOFTWARE. from unittest import mock -import gym +import gymnasium as gym import pytest from smarts.core.agent import Agent from smarts.core.agent_interface import AgentInterface, AgentType from smarts.core.smarts import SMARTSNotSetupError +from smarts.env.utils.action_conversion import ActionOptions from smarts.zoo.agent_spec import AgentSpec AGENT_ID = "AGENT-007" @@ -34,13 +35,13 @@ def build_env(agent_spec): return gym.make( - "smarts.env:hiway-v0", + "smarts.env:hiway-v1", # TODO: Switch to a test scenario that has routes, and missions scenarios=["scenarios/sumo/loop"], - agent_specs={AGENT_ID: agent_spec}, + agent_interfaces={AGENT_ID: agent_spec.interface}, headless=True, seed=2008, - fixed_timestep_sec=0.01, + action_options=ActionOptions.unformatted, ) @@ -52,9 +53,9 @@ def test_graceful_shutdown(): ) env = build_env(agent_spec) agent = agent_spec.build_agent() - obs = env.reset() + obs, _ = env.reset() for _ in range(10): - obs = env.step({AGENT_ID: agent.act(obs)}) + obs, _, _, _, _ = env.step({AGENT_ID: agent.act(obs)}) env.close() @@ -70,7 +71,7 @@ def test_graceful_interrupt(monkeypatch): env = build_env(agent_spec) with pytest.raises(KeyboardInterrupt): - obs = env.reset() + obs, _ = env.reset() episode = 0 # To simulate a user interrupting the sim (e.g. ctrl-c). We just need to @@ -80,7 +81,7 @@ def test_graceful_interrupt(monkeypatch): side_effect=KeyboardInterrupt, ): for episode in range(10): - obs, _, _, _ = env.step({AGENT_ID: agent.act(obs)}) + obs, _, _, _, _ = env.step({AGENT_ID: agent.act(obs)}) assert episode == 0, "SMARTS should have been interrupted, ending early" diff --git a/smarts/env/tests/test_single_agent.py b/smarts/env/tests/test_single_agent.py index 508a3518c1..64e7b1c40d 100644 --- a/smarts/env/tests/test_single_agent.py +++ b/smarts/env/tests/test_single_agent.py @@ -22,69 +22,53 @@ from typing import Dict -import gym +import gymnasium as gym import numpy as np import pytest -from smarts.core.agent import Agent from smarts.core.agent_interface import RGB, AgentInterface from smarts.core.controllers import ActionSpaceType -from smarts.env.wrappers.single_agent import SingleAgent -from smarts.zoo.agent_spec import AgentSpec +from smarts.env.gymnasium.wrappers.single_agent import SingleAgent +from smarts.env.utils.action_conversion import ActionOptions -def _make_agent_specs(num_agent): - agent_specs: Dict[str, AgentSpec] = { +def _make_agent_interfaces(num_agent): + agent_interfaces: Dict[str, AgentInterface] = { "AGENT_" - + str(agent_id): AgentSpec( - interface=AgentInterface( - top_down_rgb=RGB(), - action=ActionSpaceType.Lane, - ), - agent_builder=lambda: Agent.from_function(lambda _: "keep_lane"), + + str(agent_id): AgentInterface( + top_down_rgb=RGB(), + action=ActionSpaceType.Lane, ) for agent_id in range(num_agent) } - return agent_specs + return agent_interfaces class ObservationWrapper(gym.ObservationWrapper): def __init__(self, env): super().__init__(env) - obs_space = {} - for agent_id, agent_spec in self.env.agent_specs.items(): - rgb: RGB = ( - agent_spec.interface.rgb - ) # pytype: disable=annotation-type-mismatch - obs_space[agent_id] = gym.spaces.Box( - low=0, - high=255, - shape=( - rgb.width, - rgb.height, - 3, - ), - dtype=np.uint8, - ) - self.observation_space = gym.spaces.Dict(obs_space) + self.observation_space = { + agent_id: agent_space["top_down_rgb"] + for agent_id, agent_space in self.env.observation_space.items() + } def observation(self, obs): return { - agent_name: agent_obs.top_down_rgb.data + agent_name: agent_obs["top_down_rgb"] for agent_name, agent_obs in obs.items() } @pytest.fixture def base_env(request): - agent_specs = _make_agent_specs(request.param) + agent_interfaces = _make_agent_interfaces(request.param) env = gym.make( - "smarts.env:hiway-v0", + "smarts.env:hiway-v1", scenarios=["scenarios/sumo/figure_eight"], - agent_specs=agent_specs, + agent_interfaces=agent_interfaces, headless=True, - fixed_timestep_sec=0.01, + action_options=ActionOptions.unformatted, ) env = ObservationWrapper(env) @@ -95,11 +79,11 @@ def base_env(request): @pytest.mark.parametrize("base_env", [1, 2], indirect=True) def test_init(base_env): # Compute multiagent specs and space, i.e., ma_* - ma_agent_specs = base_env.agent_specs + ma_agent_interfaces = base_env.agent_interfaces ma_obs_space = base_env.observation_space # Test wrapping an env containing one and more than one agent - if len(ma_agent_specs) > 1: + if len(ma_agent_interfaces) > 1: with pytest.raises(AssertionError): env = SingleAgent(base_env) env.close() @@ -108,7 +92,7 @@ def test_init(base_env): env = SingleAgent(base_env) # Test env observation space - agent_id = next(iter(ma_agent_specs.keys())) + agent_id = next(iter(ma_agent_interfaces.keys())) assert env.observation_space == ma_obs_space[agent_id] env.close() @@ -116,22 +100,32 @@ def test_init(base_env): @pytest.mark.parametrize("base_env", [1], indirect=True) def test_reset_and_step(base_env): - ma_agent_specs = base_env.agent_specs - ma_obs_space = base_env.observation_space + ma_agent_interfaces = base_env.agent_interfaces + ma_obs, ma_info = base_env.reset() + env = SingleAgent(base_env) + single_obs, single_info = env.reset() # Test resetting the env - obs = env.reset() - assert isinstance(obs, np.ndarray) - agent_id = next(iter(ma_agent_specs.keys())) - assert obs.shape == ma_obs_space[agent_id].shape + assert isinstance(single_obs, np.ndarray) + agent_id = next(iter(ma_agent_interfaces.keys())) + assert single_obs.shape == ma_obs[agent_id].shape + assert set(single_info.keys()) == set(ma_info[agent_id].keys()) # Test stepping the env - obs, reward, done, info = env.step("keep_lane") - assert isinstance(obs, np.ndarray) - assert obs.shape == ma_obs_space[agent_id].shape - assert isinstance(reward, float) - assert type(done) is bool - assert set(info.keys()) == set(["score", "env_obs", "done"]) + ma_obs, _, _, _, ma_info = base_env.step({agent_id: "keep_lane"}) + ( + single_obs, + single_reward, + single_terminated, + single_truncated, + single_info, + ) = env.step("keep_lane") + assert isinstance(single_obs, np.ndarray) + assert single_obs.shape == ma_obs[agent_id].shape + assert isinstance(single_reward, float) + assert type(single_terminated) is bool + assert type(single_truncated) is bool + assert set(single_info.keys()) == set(ma_info[agent_id].keys()) env.close() diff --git a/smarts/env/tests/test_social_agent.py b/smarts/env/tests/test_social_agent.py index 62edcbbeba..0832ab8620 100644 --- a/smarts/env/tests/test_social_agent.py +++ b/smarts/env/tests/test_social_agent.py @@ -19,14 +19,16 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. -import gym +import gymnasium as gym import pytest from smarts.core.agent_interface import AgentInterface, AgentType -from smarts.env.hiway_env import HiWayEnv +from smarts.env.gymnasium.hiway_env_v1 import HiWayEnvV1 +from smarts.env.utils.action_conversion import ActionOptions +from smarts.env.utils.observation_conversion import ObservationOptions AGENT_ID = "Agent-007" -SOCIAL_AGENT_ID = "Alec Trevelyan" +SOCIAL_AGENT_ID_PREFIX = "zoo" MAX_EPISODES = 1 @@ -41,39 +43,45 @@ def agent_interface(): @pytest.fixture def env(agent_interface: AgentInterface): env = gym.make( - "smarts.env:hiway-v0", + "smarts.env:hiway-v1", scenarios=["scenarios/sumo/zoo_intersection"], agent_interfaces={AGENT_ID: agent_interface}, headless=True, - fixed_timestep_sec=0.01, + observation_options=ObservationOptions.unformatted, + action_options=ActionOptions.unformatted, ) yield env env.close() -def test_social_agents_not_in_env_obs_keys(env: HiWayEnv): +def test_social_agents_not_in_env_obs_keys(env: HiWayEnvV1): for _ in range(MAX_EPISODES): observations = env.reset() - dones = {"__all__": False} - while not dones["__all__"]: - observations, rewards, dones, infos = env.step({AGENT_ID: "keep_lane"}) + terminateds = {"__all__": False} + while not terminateds["__all__"]: + observations, rewards, terminateds, truncateds, infos = env.step( + {AGENT_ID: "keep_lane"} + ) + # fmt: off + assert len([key for key in observations.keys() if SOCIAL_AGENT_ID_PREFIX in key])==0 + assert isinstance(terminateds,dict) and isinstance(truncateds,dict) + assert len([key for key in terminateds.keys() if SOCIAL_AGENT_ID_PREFIX in key])==0 + assert len([key for key in truncateds.keys() if SOCIAL_AGENT_ID_PREFIX in key])==0 + # fmt: on - assert SOCIAL_AGENT_ID not in observations - assert SOCIAL_AGENT_ID not in dones - -def test_social_agents_in_env_neighborhood_vehicle_obs( - env: HiWayEnv, agent_interface: AgentInterface -): +def test_social_agents_in_env_neighborhood_vehicle_obs(env: HiWayEnvV1): first_seen_vehicles = {} for _ in range(MAX_EPISODES): - observations = env.reset() + observations, _ = env.reset() - dones = {"__all__": False} - while not dones["__all__"]: - observations, rewards, dones, infos = env.step({AGENT_ID: "keep_lane"}) + terminateds = {"__all__": False} + while not terminateds["__all__"]: + observations, rewards, terminateds, _, infos = env.step( + {AGENT_ID: "keep_lane"} + ) new_nvs_ids = [ nvs.id @@ -89,4 +97,4 @@ def test_social_agents_in_env_neighborhood_vehicle_obs( (v_id for v_id in seen_zoo_social_vehicles if "zoo-car1" in v_id), None ) assert late_entry is not None, seen_zoo_social_vehicles - assert first_seen_vehicles[late_entry] == 70 + assert first_seen_vehicles[late_entry] == 7 diff --git a/smarts/env/utils/record.py b/smarts/env/utils/record.py index 4add9dc1a3..b5ce2b5508 100644 --- a/smarts/env/utils/record.py +++ b/smarts/env/utils/record.py @@ -17,10 +17,9 @@ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. -import os from typing import Any -import gym +import gymnasium as gym import numpy as np from smarts.core.sensors import Observation diff --git a/smarts/env/wrappers/recorder_wrapper.py b/smarts/env/wrappers/recorder_wrapper.py index 10b4eab5c1..1f95c5f409 100644 --- a/smarts/env/wrappers/recorder_wrapper.py +++ b/smarts/env/wrappers/recorder_wrapper.py @@ -20,7 +20,6 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. import os -import typing from pathlib import Path import gym diff --git a/zoo/evaluation/egoless_example.py b/zoo/evaluation/egoless_example.py index 88a6796271..637da4e32a 100644 --- a/zoo/evaluation/egoless_example.py +++ b/zoo/evaluation/egoless_example.py @@ -1,6 +1,6 @@ import argparse -import gym +import gymnasium as gym from smarts.core.utils.episodes import episodes @@ -25,11 +25,10 @@ f"./{args.replay_data}/{args.scenarios[0].split('/')[-1]}/data_replay" ) env = gym.make( - "smarts.env:hiway-v0", + "smarts.env:hiway-v1", scenarios=args.scenarios, - agent_specs={}, + agent_interfaces={}, headless=args.headless, - visdom=False, fixed_timestep_sec=0.1, envision_record_data_replay_path=data_replay_path, ) @@ -40,6 +39,6 @@ for _ in range(600): env.step({}) - episode.record_step({}, {}, {}, {}) + episode.record_step({}, {}, {}, {}, {}) env.close() diff --git a/zoo/policies/rl-agent/rl_agent/agent.py b/zoo/policies/rl-agent/rl_agent/agent.py index 468f292896..4ea6c0eaaa 100644 --- a/zoo/policies/rl-agent/rl_agent/agent.py +++ b/zoo/policies/rl-agent/rl_agent/agent.py @@ -2,7 +2,6 @@ This file contains an RLlib-trained policy evaluation usage (not for training). """ import pickle -from pathlib import Path import gym import tensorflow.compat.v1 as tf