Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace gym with gymnasium #2034

Merged
merged 1 commit into from
May 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Copy and pasting the git commit messages is __NOT__ enough.
- Added `Condition`, `ConditionRequires`, `ConditionState` and various condition implementations to enable logical operations in scenarios.
- Traffic light signals are now visualized in Envision.
- Interest vehicles now show up in Envision.
- Seed of `hiway-v1` env can be retrieved through a new property `seed`.
### Changed
- Changed waypoints in sumo maps to use more incoming lanes into junctions.
- Increased the cutoff radius for filtering out waypoints that are too far away in junctions in sumo maps.
Expand All @@ -27,6 +28,8 @@ Copy and pasting the git commit messages is __NOT__ enough.
- `TrapEntryTactic.wait_to_hijack_limit_s` field now defaults to `0`.
- `EntryTactic` derived classes now contain `condition` to provide extra filtering of candidate actors.
- `EntryTactic` derived classes now contain `start_time`.
- `info` returned by `hiway-v1` in `reset()` and `step()` methods are unified.
- Changed instances of `hiway-v0` and `gym` to use `hiway-v1` and `gymnasium`, respectively.
### Deprecated
- `visdom` is set to be removed from the SMARTS object parameters.
- Deprecated `start_time` on missions.
Expand Down
2 changes: 1 addition & 1 deletion docs/minimal.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import gym
import gymnasium as gym
from smarts.core.agent import Agent
from smarts.core.agent_interface import AgentInterface, AgentType

Expand Down
34 changes: 15 additions & 19 deletions examples/control/chase_via_points.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
import sys
from pathlib import Path

import gym
import gymnasium as gym

sys.path.insert(0, str(Path(__file__).parents[2].absolute()))
from examples.tools.argument_parser import default_argument_parser
from smarts.core.agent import Agent
from smarts.core.agent_interface import AgentInterface, AgentType
from smarts.core.observations import Observation
from smarts.core.utils.episodes import episodes
from smarts.env.utils.observation_conversion import ObservationOptions
from smarts.sstudio.scenario_construction import build_scenarios
from smarts.zoo.agent_spec import AgentSpec

N_AGENTS = 3
AGENT_IDS = ["Agent_%i" % i for i in range(N_AGENTS)]
Expand All @@ -35,41 +35,37 @@ def act(self, obs: Observation):


def main(scenarios, headless, num_episodes, max_episode_steps=None):
agent_specs = {
agent_id: AgentSpec(
interface=AgentInterface.from_type(
AgentType.LanerWithSpeed,
max_episode_steps=max_episode_steps,
),
agent_builder=ChaseViaPointsAgent,
agent_interfaces = {
agent_id: AgentInterface.from_type(
AgentType.LanerWithSpeed,
max_episode_steps=max_episode_steps,
)
for agent_id in AGENT_IDS
}

env = gym.make(
"smarts.env:hiway-v0",
"smarts.env:hiway-v1",
scenarios=scenarios,
agent_specs=agent_specs,
agent_interfaces=agent_interfaces,
headless=headless,
sumo_headless=True,
observation_options=ObservationOptions.unformatted,
)

for episode in episodes(n=num_episodes):
agents = {
agent_id: agent_spec.build_agent()
for agent_id, agent_spec in agent_specs.items()
agent_id: ChaseViaPointsAgent() for agent_id in agent_interfaces.keys()
}
observations = env.reset()
observations, _ = env.reset()
episode.record_scenario(env.scenario_log)

dones = {"__all__": False}
while not dones["__all__"]:
terminateds = {"__all__": False}
while not terminateds["__all__"]:
actions = {
agent_id: agents[agent_id].act(agent_obs)
for agent_id, agent_obs in observations.items()
}
observations, rewards, dones, infos = env.step(actions)
episode.record_step(observations, rewards, dones, infos)
observations, rewards, terminateds, truncateds, infos = env.step(actions)
episode.record_step(observations, rewards, terminateds, truncateds, infos)

env.close()

Expand Down
18 changes: 11 additions & 7 deletions examples/control/ego_open_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
import sys
from pathlib import Path

import gym
import gymnasium as gym

from smarts.core.utils.episodes import episodes
from smarts.env.utils.observation_conversion import ObservationOptions
from smarts.sstudio.scenario_construction import build_scenarios

sys.path.insert(0, str(Path(__file__).parents[2].absolute()))
Expand All @@ -29,25 +30,28 @@
def main(scenarios, headless, num_episodes):
open_agent_spec = open_agent.entrypoint(debug=False, aggressiveness=3)
env = gym.make(
"smarts.env:hiway-v0",
"smarts.env:hiway-v1",
scenarios=scenarios,
agent_interfaces={AGENT_ID: open_agent_spec.interface},
headless=headless,
sumo_headless=True,
observation_options=ObservationOptions.unformatted,
)

for episode in episodes(n=num_episodes):
agent = open_agent_spec.build_agent()

observations = env.reset()
observations, _ = env.reset()
episode.record_scenario(env.scenario_log)

dones = {"__all__": False}
while not dones["__all__"]:
terminateds = {"__all__": False}
while not terminateds["__all__"]:
agent_obs = observations[AGENT_ID]
agent_action = agent.act(agent_obs)
observations, rewards, dones, infos = env.step({AGENT_ID: agent_action})
episode.record_step(observations, rewards, dones, infos)
observations, rewards, terminateds, truncateds, infos = env.step(
{AGENT_ID: agent_action}
)
episode.record_step(observations, rewards, terminateds, truncateds, infos)

del agent

Expand Down
4 changes: 2 additions & 2 deletions examples/control/hiway_env_v1_lane_follower.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@


class LaneFollowerAgent(Agent):
def act(self, obs: Dict[Any, Union[Any, Dict]]):
def act(self, obs):
return (obs["waypoint_paths"]["speed_limit"][0][0], 0)


Expand All @@ -38,7 +38,7 @@ def main(scenarios, headless, num_episodes, max_episode_steps=None):
observation, reward, terminated, truncated, info = env.step(
{"SingleAgent": agent_action}
)
episode.record_step(observation, reward, terminated, info)
episode.record_step(observation, reward, terminated, truncated, info)

env.close()

Expand Down
17 changes: 9 additions & 8 deletions examples/control/laner.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@
import sys
from pathlib import Path

import gym
import gymnasium as gym

sys.path.insert(0, str(Path(__file__).parents[2].absolute()))
from examples.tools.argument_parser import default_argument_parser
from smarts.core.agent import Agent
from smarts.core.agent_interface import AgentInterface, AgentType
from smarts.core.utils.episodes import episodes
from smarts.env.utils.action_conversion import ActionOptions
from smarts.sstudio.scenario_construction import build_scenarios
from smarts.zoo.agent_spec import AgentSpec

Expand All @@ -34,31 +35,31 @@ def main(scenarios, headless, num_episodes, max_episode_steps=None):
}

env = gym.make(
"smarts.env:hiway-v0",
"smarts.env:hiway-v1",
scenarios=scenarios,
agent_interfaces={
a_id: a_intrf.interface for a_id, a_intrf in agent_specs.items()
},
headless=headless,
sumo_headless=True,
action_options=ActionOptions.unformatted,
)

for episode in episodes(n=num_episodes):
agents = {
agent_id: agent_spec.build_agent()
for agent_id, agent_spec in agent_specs.items()
}
observations = env.reset()
observations, _ = env.reset()
episode.record_scenario(env.scenario_log)

dones = {"__all__": False}
while not dones["__all__"]:
terminateds = {"__all__": False}
while not terminateds["__all__"]:
actions = {
agent_id: agents[agent_id].act(agent_obs)
for agent_id, agent_obs in observations.items()
}
observations, rewards, dones, infos = env.step(actions)
episode.record_step(observations, rewards, dones, infos)
observations, rewards, terminateds, truncateds, infos = env.step(actions)
episode.record_step(observations, rewards, terminateds, truncateds, infos)

env.close()

Expand Down
84 changes: 47 additions & 37 deletions examples/control/parallel_environment.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import gym
import gymnasium as gym

gym.logger.set_level(40)

Expand All @@ -13,10 +13,9 @@
from smarts.core.agent_interface import AgentInterface
from smarts.core.controllers import ActionSpaceType
from smarts.core.observations import Observation
from smarts.env.hiway_env import HiWayEnv
from smarts.env.wrappers.parallel_env import ParallelEnv
from smarts.env.gymnasium.wrappers.parallel_env import ParallelEnv
from smarts.env.utils.observation_conversion import ObservationOptions
from smarts.sstudio.scenario_construction import build_scenarios
from smarts.zoo.agent_spec import AgentSpec


class LaneAgent(Agent):
Expand All @@ -43,38 +42,34 @@ def main(
agent_ids = [f"Agent_{i}" for i in range(num_agents)]

# Define agent specification
agent_specs = {
agent_id: AgentSpec(
interface=AgentInterface(
top_down_rgb=True,
waypoint_paths=True,
action=ActionSpaceType.LaneWithContinuousSpeed,
max_episode_steps=max_episode_steps,
),
agent_builder=LaneAgent,
agent_interfaces = {
agent_id: AgentInterface(
top_down_rgb=True,
waypoint_paths=True,
action=ActionSpaceType.LaneWithContinuousSpeed,
max_episode_steps=max_episode_steps,
)
for agent_id in agent_ids
}

# Unique `sim_name` is required by each HiWayEnv in order to be displayed
# Unique `sim_name` is required by each HiWayEnvV1 in order to be displayed
# in Envision.
env_constructor = lambda sim_name: HiWayEnv(
env_constructor = lambda sim_name, seed: gym.make(
"smarts.env:hiway-v1",
scenarios=scenarios,
agent_specs=agent_specs,
agent_interfaces=agent_interfaces,
sim_name=sim_name,
headless=headless,
observation_options=ObservationOptions.unformatted,
seed=seed,
)

# A list of env constructors of type `Callable[[], gym.Env]`
# A list of env constructors of type `Callable[[int], gym.Env]`
env_constructors = [
partial(env_constructor, sim_name=f"{sim_name}_{ind}") for ind in range(num_env)
]

# Build multiple agents
agents = {
agent_id: agent_spec.build_agent()
for agent_id, agent_spec in agent_specs.items()
}
agents = {agent_id: LaneAgent() for agent_id in agent_interfaces.keys()}

# Create parallel environments
env = ParallelEnv(
Expand Down Expand Up @@ -103,27 +98,37 @@ def parallel_env_async(
num_steps (int): Number of steps to step the environment.
"""

batched_dones = [{"__all__": False} for _ in range(num_env)]
batched_observations = env.reset()
batched_terminateds = [{"__all__": False} for _ in range(num_env)]
batched_truncateds = [{"__all__": False} for _ in range(num_env)]
batched_observations, _ = env.reset()

for _ in range(num_steps):
# Compute actions for all active(i.e., not done) agents
batched_actions = []
for observations, dones in zip(batched_observations, batched_dones):
for observations, terminateds, truncateds in zip(
batched_observations, batched_terminateds, batched_truncateds
):
actions = {
agent_id: agents[agent_id].act(agent_obs)
for agent_id, agent_obs in observations.items()
if not dones.get(agent_id, False)
or dones[
if (
not terminateds.get(agent_id, False)
and not truncateds.get(agent_id, False)
)
or terminateds[
"__all__"
] # `dones[__all__]==True` implies the env was auto-reset in previous iteration
] # `terminateds[__all__]==True` implies the env was auto-reset in previous iteration
}
batched_actions.append(actions)

# Step all environments in parallel
batched_observations, batched_rewards, batched_dones, batched_infos = env.step(
batched_actions
)
(
batched_observations,
batched_rewards,
batched_terminateds,
batched_truncateds,
batched_infos,
) = env.step(batched_actions)

env.close()

Expand All @@ -144,26 +149,31 @@ def parallel_env_sync(
"""

for _ in range(num_episodes):
batched_dones = [{"__all__": False} for _ in range(num_env)]
batched_observations = env.reset()
batched_terminateds = [{"__all__": False} for _ in range(num_env)]
batched_truncateds = [{"__all__": False} for _ in range(num_env)]
batched_observations, _ = env.reset()

# Iterate until all environments complete an episode each.
while not all(dones["__all__"] for dones in batched_dones):
while not all(terminateds["__all__"] for terminateds in batched_terminateds):
# Compute actions for all active(i.e., not done) agents
batched_actions = []
for observations, dones in zip(batched_observations, batched_dones):
for observations, terminateds, truncateds in zip(
batched_observations, batched_terminateds, batched_truncateds
):
actions = {
agent_id: agents[agent_id].act(agent_obs)
for agent_id, agent_obs in observations.items()
if not dones.get(agent_id, False)
if not terminateds.get(agent_id, False)
and not truncateds.get(agent_id, False)
}
batched_actions.append(actions)

# Step all environments in parallel
(
batched_observations,
batched_rewards,
batched_dones,
batched_terminateds,
batched_truncateds,
batched_infos,
) = env.step(batched_actions)

Expand Down
Loading