Replace gym with gymnasium. (#2034)

huawei-noah · May 23, 2023 · c46f762 · c46f762
1 parent fdf0b7c
commit c46f762
Show file tree

Hide file tree

Showing 39 changed files with 485 additions and 388 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -15,6 +15,7 @@ Copy and pasting the git commit messages is __NOT__ enough.
 - Added `Condition`, `ConditionRequires`, `ConditionState` and various condition implementations to enable logical operations in scenarios.
 - Traffic light signals are now visualized in Envision.
 - Interest vehicles now show up in Envision.
+- Seed of `hiway-v1` env can be retrieved through a new property `seed`.
 ### Changed
 - Changed waypoints in sumo maps to use more incoming lanes into junctions.
 - Increased the cutoff radius for filtering out waypoints that are too far away in junctions in sumo maps.
@@ -27,6 +28,8 @@ Copy and pasting the git commit messages is __NOT__ enough.
 - `TrapEntryTactic.wait_to_hijack_limit_s` field now defaults to `0`.
 - `EntryTactic` derived classes now contain `condition` to provide extra filtering of candidate actors.
 - `EntryTactic` derived classes now contain `start_time`.
+- `info` returned by `hiway-v1` in `reset()` and `step()` methods are unified.
+- Changed instances of `hiway-v0` and `gym` to use `hiway-v1` and `gymnasium`, respectively.
 ### Deprecated
 - `visdom` is set to be removed from the SMARTS object parameters.
 - Deprecated `start_time` on missions.

diff --git a/docs/minimal.py b/docs/minimal.py
@@ -1,4 +1,4 @@
-import gym
+import gymnasium as gym
 from smarts.core.agent import Agent
 from smarts.core.agent_interface import AgentInterface, AgentType
 

diff --git a/examples/control/chase_via_points.py b/examples/control/chase_via_points.py
@@ -1,16 +1,16 @@
 import sys
 from pathlib import Path
 
-import gym
+import gymnasium as gym
 
 sys.path.insert(0, str(Path(__file__).parents[2].absolute()))
 from examples.tools.argument_parser import default_argument_parser
 from smarts.core.agent import Agent
 from smarts.core.agent_interface import AgentInterface, AgentType
 from smarts.core.observations import Observation
 from smarts.core.utils.episodes import episodes
+from smarts.env.utils.observation_conversion import ObservationOptions
 from smarts.sstudio.scenario_construction import build_scenarios
-from smarts.zoo.agent_spec import AgentSpec
 
 N_AGENTS = 3
 AGENT_IDS = ["Agent_%i" % i for i in range(N_AGENTS)]
@@ -35,41 +35,37 @@ def act(self, obs: Observation):
 
 
 def main(scenarios, headless, num_episodes, max_episode_steps=None):
-    agent_specs = {
-        agent_id: AgentSpec(
-            interface=AgentInterface.from_type(
-                AgentType.LanerWithSpeed,
-                max_episode_steps=max_episode_steps,
-            ),
-            agent_builder=ChaseViaPointsAgent,
+    agent_interfaces = {
+        agent_id: AgentInterface.from_type(
+            AgentType.LanerWithSpeed,
+            max_episode_steps=max_episode_steps,
         )
         for agent_id in AGENT_IDS
     }
 
     env = gym.make(
-        "smarts.env:hiway-v0",
+        "smarts.env:hiway-v1",
         scenarios=scenarios,
-        agent_specs=agent_specs,
+        agent_interfaces=agent_interfaces,
         headless=headless,
-        sumo_headless=True,
+        observation_options=ObservationOptions.unformatted,
     )
 
     for episode in episodes(n=num_episodes):
         agents = {
-            agent_id: agent_spec.build_agent()
-            for agent_id, agent_spec in agent_specs.items()
+            agent_id: ChaseViaPointsAgent() for agent_id in agent_interfaces.keys()
         }
-        observations = env.reset()
+        observations, _ = env.reset()
         episode.record_scenario(env.scenario_log)
 
-        dones = {"__all__": False}
-        while not dones["__all__"]:
+        terminateds = {"__all__": False}
+        while not terminateds["__all__"]:
             actions = {
                 agent_id: agents[agent_id].act(agent_obs)
                 for agent_id, agent_obs in observations.items()
             }
-            observations, rewards, dones, infos = env.step(actions)
-            episode.record_step(observations, rewards, dones, infos)
+            observations, rewards, terminateds, truncateds, infos = env.step(actions)
+            episode.record_step(observations, rewards, terminateds, truncateds, infos)
 
     env.close()
 

diff --git a/examples/control/ego_open_agent.py b/examples/control/ego_open_agent.py
@@ -7,9 +7,10 @@
 import sys
 from pathlib import Path
 
-import gym
+import gymnasium as gym
 
 from smarts.core.utils.episodes import episodes
+from smarts.env.utils.observation_conversion import ObservationOptions
 from smarts.sstudio.scenario_construction import build_scenarios
 
 sys.path.insert(0, str(Path(__file__).parents[2].absolute()))
@@ -29,25 +30,28 @@
 def main(scenarios, headless, num_episodes):
     open_agent_spec = open_agent.entrypoint(debug=False, aggressiveness=3)
     env = gym.make(
-        "smarts.env:hiway-v0",
+        "smarts.env:hiway-v1",
         scenarios=scenarios,
         agent_interfaces={AGENT_ID: open_agent_spec.interface},
         headless=headless,
         sumo_headless=True,
+        observation_options=ObservationOptions.unformatted,
     )
 
     for episode in episodes(n=num_episodes):
         agent = open_agent_spec.build_agent()
 
-        observations = env.reset()
+        observations, _ = env.reset()
         episode.record_scenario(env.scenario_log)
 
-        dones = {"__all__": False}
-        while not dones["__all__"]:
+        terminateds = {"__all__": False}
+        while not terminateds["__all__"]:
             agent_obs = observations[AGENT_ID]
             agent_action = agent.act(agent_obs)
-            observations, rewards, dones, infos = env.step({AGENT_ID: agent_action})
-            episode.record_step(observations, rewards, dones, infos)
+            observations, rewards, terminateds, truncateds, infos = env.step(
+                {AGENT_ID: agent_action}
+            )
+            episode.record_step(observations, rewards, terminateds, truncateds, infos)
 
         del agent
 

diff --git a/examples/control/hiway_env_v1_lane_follower.py b/examples/control/hiway_env_v1_lane_follower.py
@@ -12,7 +12,7 @@
 
 
 class LaneFollowerAgent(Agent):
-    def act(self, obs: Dict[Any, Union[Any, Dict]]):
+    def act(self, obs):
         return (obs["waypoint_paths"]["speed_limit"][0][0], 0)
 
 
@@ -38,7 +38,7 @@ def main(scenarios, headless, num_episodes, max_episode_steps=None):
             observation, reward, terminated, truncated, info = env.step(
                 {"SingleAgent": agent_action}
             )
-            episode.record_step(observation, reward, terminated, info)
+            episode.record_step(observation, reward, terminated, truncated, info)
 
     env.close()
 

diff --git a/examples/control/laner.py b/examples/control/laner.py
@@ -2,13 +2,14 @@
 import sys
 from pathlib import Path
 
-import gym
+import gymnasium as gym
 
 sys.path.insert(0, str(Path(__file__).parents[2].absolute()))
 from examples.tools.argument_parser import default_argument_parser
 from smarts.core.agent import Agent
 from smarts.core.agent_interface import AgentInterface, AgentType
 from smarts.core.utils.episodes import episodes
+from smarts.env.utils.action_conversion import ActionOptions
 from smarts.sstudio.scenario_construction import build_scenarios
 from smarts.zoo.agent_spec import AgentSpec
 
@@ -34,31 +35,31 @@ def main(scenarios, headless, num_episodes, max_episode_steps=None):
     }
 
     env = gym.make(
-        "smarts.env:hiway-v0",
+        "smarts.env:hiway-v1",
         scenarios=scenarios,
         agent_interfaces={
             a_id: a_intrf.interface for a_id, a_intrf in agent_specs.items()
         },
         headless=headless,
-        sumo_headless=True,
+        action_options=ActionOptions.unformatted,
     )
 
     for episode in episodes(n=num_episodes):
         agents = {
             agent_id: agent_spec.build_agent()
             for agent_id, agent_spec in agent_specs.items()
         }
-        observations = env.reset()
+        observations, _ = env.reset()
         episode.record_scenario(env.scenario_log)
 
-        dones = {"__all__": False}
-        while not dones["__all__"]:
+        terminateds = {"__all__": False}
+        while not terminateds["__all__"]:
             actions = {
                 agent_id: agents[agent_id].act(agent_obs)
                 for agent_id, agent_obs in observations.items()
             }
-            observations, rewards, dones, infos = env.step(actions)
-            episode.record_step(observations, rewards, dones, infos)
+            observations, rewards, terminateds, truncateds, infos = env.step(actions)
+            episode.record_step(observations, rewards, terminateds, truncateds, infos)
 
     env.close()
 

diff --git a/examples/control/parallel_environment.py b/examples/control/parallel_environment.py
@@ -1,4 +1,4 @@
-import gym
+import gymnasium as gym
 
 gym.logger.set_level(40)
 
@@ -13,10 +13,9 @@
 from smarts.core.agent_interface import AgentInterface
 from smarts.core.controllers import ActionSpaceType
 from smarts.core.observations import Observation
-from smarts.env.hiway_env import HiWayEnv
-from smarts.env.wrappers.parallel_env import ParallelEnv
+from smarts.env.gymnasium.wrappers.parallel_env import ParallelEnv
+from smarts.env.utils.observation_conversion import ObservationOptions
 from smarts.sstudio.scenario_construction import build_scenarios
-from smarts.zoo.agent_spec import AgentSpec
 
 
 class LaneAgent(Agent):
@@ -43,38 +42,34 @@ def main(
     agent_ids = [f"Agent_{i}" for i in range(num_agents)]
 
     # Define agent specification
-    agent_specs = {
-        agent_id: AgentSpec(
-            interface=AgentInterface(
-                top_down_rgb=True,
-                waypoint_paths=True,
-                action=ActionSpaceType.LaneWithContinuousSpeed,
-                max_episode_steps=max_episode_steps,
-            ),
-            agent_builder=LaneAgent,
+    agent_interfaces = {
+        agent_id: AgentInterface(
+            top_down_rgb=True,
+            waypoint_paths=True,
+            action=ActionSpaceType.LaneWithContinuousSpeed,
+            max_episode_steps=max_episode_steps,
         )
         for agent_id in agent_ids
     }
 
-    # Unique `sim_name` is required by each HiWayEnv in order to be displayed
+    # Unique `sim_name` is required by each HiWayEnvV1 in order to be displayed
     # in Envision.
-    env_constructor = lambda sim_name: HiWayEnv(
+    env_constructor = lambda sim_name, seed: gym.make(
+        "smarts.env:hiway-v1",
         scenarios=scenarios,
-        agent_specs=agent_specs,
+        agent_interfaces=agent_interfaces,
         sim_name=sim_name,
         headless=headless,
+        observation_options=ObservationOptions.unformatted,
+        seed=seed,
     )
-
-    # A list of env constructors of type `Callable[[], gym.Env]`
+    # A list of env constructors of type `Callable[[int], gym.Env]`
     env_constructors = [
         partial(env_constructor, sim_name=f"{sim_name}_{ind}") for ind in range(num_env)
     ]
 
     # Build multiple agents
-    agents = {
-        agent_id: agent_spec.build_agent()
-        for agent_id, agent_spec in agent_specs.items()
-    }
+    agents = {agent_id: LaneAgent() for agent_id in agent_interfaces.keys()}
 
     # Create parallel environments
     env = ParallelEnv(
@@ -103,27 +98,37 @@ def parallel_env_async(
         num_steps (int): Number of steps to step the environment.
     """
 
-    batched_dones = [{"__all__": False} for _ in range(num_env)]
-    batched_observations = env.reset()
+    batched_terminateds = [{"__all__": False} for _ in range(num_env)]
+    batched_truncateds = [{"__all__": False} for _ in range(num_env)]
+    batched_observations, _ = env.reset()
 
     for _ in range(num_steps):
         # Compute actions for all active(i.e., not done) agents
         batched_actions = []
-        for observations, dones in zip(batched_observations, batched_dones):
+        for observations, terminateds, truncateds in zip(
+            batched_observations, batched_terminateds, batched_truncateds
+        ):
             actions = {
                 agent_id: agents[agent_id].act(agent_obs)
                 for agent_id, agent_obs in observations.items()
-                if not dones.get(agent_id, False)
-                or dones[
+                if (
+                    not terminateds.get(agent_id, False)
+                    and not truncateds.get(agent_id, False)
+                )
+                or terminateds[
                     "__all__"
-                ]  # `dones[__all__]==True` implies the env was auto-reset in previous iteration
+                ]  # `terminateds[__all__]==True` implies the env was auto-reset in previous iteration
             }
             batched_actions.append(actions)
 
         # Step all environments in parallel
-        batched_observations, batched_rewards, batched_dones, batched_infos = env.step(
-            batched_actions
-        )
+        (
+            batched_observations,
+            batched_rewards,
+            batched_terminateds,
+            batched_truncateds,
+            batched_infos,
+        ) = env.step(batched_actions)
 
     env.close()
 
@@ -144,26 +149,31 @@ def parallel_env_sync(
     """
 
     for _ in range(num_episodes):
-        batched_dones = [{"__all__": False} for _ in range(num_env)]
-        batched_observations = env.reset()
+        batched_terminateds = [{"__all__": False} for _ in range(num_env)]
+        batched_truncateds = [{"__all__": False} for _ in range(num_env)]
+        batched_observations, _ = env.reset()
 
         # Iterate until all environments complete an episode each.
-        while not all(dones["__all__"] for dones in batched_dones):
+        while not all(terminateds["__all__"] for terminateds in batched_terminateds):
             # Compute actions for all active(i.e., not done) agents
             batched_actions = []
-            for observations, dones in zip(batched_observations, batched_dones):
+            for observations, terminateds, truncateds in zip(
+                batched_observations, batched_terminateds, batched_truncateds
+            ):
                 actions = {
                     agent_id: agents[agent_id].act(agent_obs)
                     for agent_id, agent_obs in observations.items()
-                    if not dones.get(agent_id, False)
+                    if not terminateds.get(agent_id, False)
+                    and not truncateds.get(agent_id, False)
                 }
                 batched_actions.append(actions)
 
             # Step all environments in parallel
             (
                 batched_observations,
                 batched_rewards,
-                batched_dones,
+                batched_terminateds,
+                batched_truncateds,
                 batched_infos,
             ) = env.step(batched_actions)