From fd3721f4a02111dc784d42f366c86f5dd146626c Mon Sep 17 00:00:00 2001 From: Florian Felten Date: Wed, 7 Aug 2024 15:13:12 +0200 Subject: [PATCH 01/12] migration to gymnasium 1.0 --- examples/envelope_minecart.py | 2 +- examples/eupg_fishwood.py | 2 +- examples/mo_q_learning_DST.py | 2 +- examples/mp_mo_q_learning_DST.py | 2 +- examples/pcn_minecart.py | 2 +- examples/pgmorl_halfcheetah.py | 2 +- experiments/benchmark/launch_experiment.py | 13 ++++++----- .../hyperparameter_search/launch_sweep.py | 2 +- morl_baselines/__init__.py | 2 +- morl_baselines/common/morl_algorithm.py | 2 +- morl_baselines/multi_policy/morld/morld.py | 2 +- morl_baselines/multi_policy/pgmorl/pgmorl.py | 5 +++-- morl_baselines/single_policy/ser/mo_ppo.py | 22 +++++++++++-------- 13 files changed, 33 insertions(+), 27 deletions(-) diff --git a/examples/envelope_minecart.py b/examples/envelope_minecart.py index bf41f0f6..c3f43bf4 100644 --- a/examples/envelope_minecart.py +++ b/examples/envelope_minecart.py @@ -1,6 +1,6 @@ import mo_gymnasium as mo_gym import numpy as np -from mo_gymnasium.utils import MORecordEpisodeStatistics +from mo_gymnasium.wrappers import MORecordEpisodeStatistics from morl_baselines.multi_policy.envelope.envelope import Envelope diff --git a/examples/eupg_fishwood.py b/examples/eupg_fishwood.py index 7b253522..55a77583 100644 --- a/examples/eupg_fishwood.py +++ b/examples/eupg_fishwood.py @@ -1,7 +1,7 @@ import mo_gymnasium as mo_gym import numpy as np import torch as th -from mo_gymnasium.utils import MORecordEpisodeStatistics +from mo_gymnasium.wrappers import MORecordEpisodeStatistics from morl_baselines.common.evaluation import eval_mo_reward_conditioned from morl_baselines.single_policy.esr.eupg import EUPG diff --git a/examples/mo_q_learning_DST.py b/examples/mo_q_learning_DST.py index ab234086..8910519e 100644 --- a/examples/mo_q_learning_DST.py +++ b/examples/mo_q_learning_DST.py @@ -2,7 +2,7 @@ import mo_gymnasium as mo_gym import numpy as np -from mo_gymnasium.utils import MORecordEpisodeStatistics +from mo_gymnasium.wrappers import MORecordEpisodeStatistics from morl_baselines.common.evaluation import eval_mo from morl_baselines.common.scalarization import tchebicheff diff --git a/examples/mp_mo_q_learning_DST.py b/examples/mp_mo_q_learning_DST.py index 89977c3f..a6d418b6 100644 --- a/examples/mp_mo_q_learning_DST.py +++ b/examples/mp_mo_q_learning_DST.py @@ -1,6 +1,6 @@ import mo_gymnasium as mo_gym import numpy as np -from mo_gymnasium import MORecordEpisodeStatistics +from mo_gymnasium.wrappers import MORecordEpisodeStatistics from morl_baselines.common.scalarization import tchebicheff from morl_baselines.multi_policy.multi_policy_moqlearning.mp_mo_q_learning import ( diff --git a/examples/pcn_minecart.py b/examples/pcn_minecart.py index aabc577f..363fcd5f 100644 --- a/examples/pcn_minecart.py +++ b/examples/pcn_minecart.py @@ -1,6 +1,6 @@ import mo_gymnasium as mo_gym import numpy as np -from mo_gymnasium.utils import MORecordEpisodeStatistics +from mo_gymnasium.wrappers import MORecordEpisodeStatistics from morl_baselines.multi_policy.pcn.pcn import PCN diff --git a/examples/pgmorl_halfcheetah.py b/examples/pgmorl_halfcheetah.py index 5b54ed5b..2b5bd5f0 100644 --- a/examples/pgmorl_halfcheetah.py +++ b/examples/pgmorl_halfcheetah.py @@ -19,7 +19,7 @@ algo.train( total_timesteps=int(5e6), eval_env=make_env(env_id, 42, 0, "PGMORL_eval_env", gamma=0.995)(), - ref_point=np.array([0.0, -5.0]), + ref_point=np.array([-100.0, -100.0]), known_pareto_front=None, ) env = make_env(env_id, 422, 1, "PGMORL_test", gamma=0.995)() # idx != 0 to avoid taking videos diff --git a/experiments/benchmark/launch_experiment.py b/experiments/benchmark/launch_experiment.py index 4b093b20..cf1a1b6d 100644 --- a/experiments/benchmark/launch_experiment.py +++ b/experiments/benchmark/launch_experiment.py @@ -15,9 +15,8 @@ import numpy as np import requests from gym_super_mario_bros.actions import SIMPLE_MOVEMENT -from gymnasium.wrappers import FlattenObservation -from gymnasium.wrappers.record_video import RecordVideo -from mo_gymnasium.utils import MORecordEpisodeStatistics +from gymnasium.wrappers import FlattenObservation, RecordVideo +from mo_gymnasium.wrappers import MORecordEpisodeStatistics from morl_baselines.common.evaluation import seed_everything from morl_baselines.common.experiments import ( @@ -90,13 +89,15 @@ def autotag() -> str: git_commit = subprocess.check_output(["git", "rev-parse", "--verify", "HEAD"]).decode("ascii").strip() try: # try finding the pull request number on github - prs = requests.get(f"https://api.github.com/search/issues?q=repo:LucasAlegre/morl-baselines+is:pr+{git_commit}") + prs = requests.get( + f"https://api.github.com/search/issues?q=repo:LucasAlegre/morl-baselines+is:pr+{git_commit}" # noqa + ) if prs.status_code == 200: prs = prs.json() if len(prs["items"]) > 0: pr = prs["items"][0] pr_number = pr["number"] - wandb_tag += f",pr-{pr_number}" + wandb_tag += f",pr-{pr_number}" # noqa print(f"identified github pull request: {pr_number}") except Exception as e: print(e) @@ -165,7 +166,7 @@ def wrap_mario(env): TimeLimit, ) from mo_gymnasium.envs.mario.joypad_space import JoypadSpace - from mo_gymnasium.utils import MOMaxAndSkipObservation + from mo_gymnasium.wrappers import MOMaxAndSkipObservation env = JoypadSpace(env, SIMPLE_MOVEMENT) env = MOMaxAndSkipObservation(env, skip=4) diff --git a/experiments/hyperparameter_search/launch_sweep.py b/experiments/hyperparameter_search/launch_sweep.py index de48d782..e6fedb44 100644 --- a/experiments/hyperparameter_search/launch_sweep.py +++ b/experiments/hyperparameter_search/launch_sweep.py @@ -7,7 +7,7 @@ import numpy as np import wandb import yaml -from mo_gymnasium.utils import MORecordEpisodeStatistics +from mo_gymnasium.wrappers import MORecordEpisodeStatistics from morl_baselines.common.evaluation import seed_everything from morl_baselines.common.experiments import ( diff --git a/morl_baselines/__init__.py b/morl_baselines/__init__.py index 498632ab..6fc312e4 100644 --- a/morl_baselines/__init__.py +++ b/morl_baselines/__init__.py @@ -1,4 +1,4 @@ """MORL-Baselines contains various MORL algorithms and utility functions.""" -__version__ = "1.0.0" +__version__ = "1.1.0" diff --git a/morl_baselines/common/morl_algorithm.py b/morl_baselines/common/morl_algorithm.py index b7af6cd5..bb6810e3 100644 --- a/morl_baselines/common/morl_algorithm.py +++ b/morl_baselines/common/morl_algorithm.py @@ -11,7 +11,7 @@ import torch.nn import wandb from gymnasium import spaces -from mo_gymnasium.utils import MOSyncVectorEnv +from mo_gymnasium.wrappers.vector import MOSyncVectorEnv from morl_baselines.common.evaluation import ( eval_mo_reward_conditioned, diff --git a/morl_baselines/multi_policy/morld/morld.py b/morl_baselines/multi_policy/morld/morld.py index c931efc7..11a3e459 100644 --- a/morl_baselines/multi_policy/morld/morld.py +++ b/morl_baselines/multi_policy/morld/morld.py @@ -10,7 +10,7 @@ import gymnasium as gym import numpy as np import torch as th -from mo_gymnasium import MONormalizeReward +from mo_gymnasium.wrappers import MONormalizeReward from torch import optim from morl_baselines.common.evaluation import log_all_multi_policy_metrics diff --git a/morl_baselines/multi_policy/pgmorl/pgmorl.py b/morl_baselines/multi_policy/pgmorl/pgmorl.py index 02c77fcf..ad55ac01 100644 --- a/morl_baselines/multi_policy/pgmorl/pgmorl.py +++ b/morl_baselines/multi_policy/pgmorl/pgmorl.py @@ -420,7 +420,7 @@ def __init__( envs = [make_env(env_id, self.seed + i, i, experiment_name, self.gamma) for i in range(self.num_envs)] else: envs = [make_env(env_id, i, i, experiment_name, self.gamma) for i in range(self.num_envs)] - self.env = mo_gym.MOSyncVectorEnv(envs) + self.env = mo_gym.wrappers.vector.MOSyncVectorEnv(envs) else: raise ValueError("Environments should be vectorized for PPO. You should provide an environment id instead.") @@ -507,6 +507,7 @@ def get_config(self) -> dict: def __train_all_agents(self, iteration: int, max_iterations: int): for i, agent in enumerate(self.agents): agent.train(self.start_time, iteration, max_iterations) + self.global_step += self.steps_per_iteration * self.num_envs def __eval_all_agents( self, @@ -646,7 +647,7 @@ def train( # Warmup for i in range(1, self.warmup_iterations + 1): - print(f"Warmup iteration #{iteration}") + print(f"Warmup iteration #{iteration}, global step: {self.global_step}") if self.log: wandb.log({"charts/warmup_iterations": i, "global_step": self.global_step}) self.__train_all_agents(iteration=iteration, max_iterations=max_iterations) diff --git a/morl_baselines/single_policy/ser/mo_ppo.py b/morl_baselines/single_policy/ser/mo_ppo.py index 09385534..32d62242 100644 --- a/morl_baselines/single_policy/ser/mo_ppo.py +++ b/morl_baselines/single_policy/ser/mo_ppo.py @@ -9,7 +9,7 @@ import numpy as np import torch as th import wandb -from mo_gymnasium import MORecordEpisodeStatistics +from mo_gymnasium.wrappers import MORecordEpisodeStatistics from torch import nn, optim from torch.distributions import Normal @@ -122,7 +122,7 @@ def thunk(): env = mo_gym.make(env_id, render_mode="rgb_array") else: env = mo_gym.make(env_id) - reward_dim = env.reward_space.shape[0] + reward_dim = env.unwrapped.reward_space.shape[0] """ if idx == 0: env = gym.wrappers.RecordVideo( env, @@ -131,10 +131,10 @@ def thunk(): ) """ env = gym.wrappers.ClipAction(env) env = gym.wrappers.NormalizeObservation(env) - env = gym.wrappers.TransformObservation(env, lambda obs: np.clip(obs, -10, 10)) + env = gym.wrappers.TransformObservation(env, lambda obs: np.clip(obs, -10, 10), env.observation_space) for o in range(reward_dim): - env = mo_gym.utils.MONormalizeReward(env, idx=o, gamma=gamma) - env = mo_gym.utils.MOClipReward(env, idx=o, min_r=-10, max_r=10) + env = mo_gym.wrappers.MONormalizeReward(env, idx=o, gamma=gamma) + env = mo_gym.wrappers.MOClipReward(env, idx=o, min_r=-10, max_r=10) env = MORecordEpisodeStatistics(env, gamma=gamma) env.reset(seed=seed) env.action_space.seed(seed) @@ -404,7 +404,7 @@ def __collect_samples(self, obs: th.Tensor, done: th.Tensor): value = value.view(self.num_envs, self.networks.reward_dim) # Perform action on the environment - next_obs, reward, next_terminated, _, info = self.envs.step(action.cpu().numpy()) + next_obs, reward, next_terminated, next_truncated, info = self.envs.step(action.cpu().numpy()) reward = th.tensor(reward).to(self.device).view(self.num_envs, self.networks.reward_dim) # storing to batch self.batch.add(obs, action, logprob, reward, done, value) @@ -414,15 +414,18 @@ def __collect_samples(self, obs: th.Tensor, done: th.Tensor): # Episode info logging if "episode" in info.keys(): - for item in info["episode"]: + indices = np.where(next_terminated | next_truncated)[0] + for idx in indices: + # Reconstructs the dict by extracting the relevant information for each vectorized env + info_log = {k: v[idx] for k, v in info["episode"].items()} + log_episode_info( - item, + info_log, scalarization=np.dot, weights=self.weights, global_timestep=self.global_step, id=self.id, ) - break return obs, done @@ -603,6 +606,7 @@ def train(self, start_time, current_iteration: int, max_iterations: int): # Logging print("SPS:", int(self.global_step / (time.time() - start_time))) if self.log: + print(f"Worker {self.id} - Global step: {self.global_step}") wandb.log( {"charts/SPS": int(self.global_step / (time.time() - start_time)), "global_step": self.global_step}, ) From eb25592bc626a89d0870faf75b9c39a6cc2ead61 Mon Sep 17 00:00:00 2001 From: Florian Felten Date: Wed, 7 Aug 2024 15:20:03 +0200 Subject: [PATCH 02/12] update dependencies --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 362d1037..c5922928 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,8 +21,8 @@ classifiers = [ 'Topic :: Scientific/Engineering :: Artificial Intelligence', ] dependencies = [ - "mo-gymnasium >=1.0.1", - "gymnasium>=0.28.1,<0.30", + "mo-gymnasium >=1.1.0", + "gymnasium>=1.0.1a1", "numpy >=1.21.0,<2.0.0", "torch >=1.12.0", "pygame >=2.1.0", From e21ea8b182206acbf06158cec9a09fd1751c9fad Mon Sep 17 00:00:00 2001 From: Florian Felten Date: Wed, 7 Aug 2024 15:27:38 +0200 Subject: [PATCH 03/12] Fix iteration big in PGMORL --- morl_baselines/multi_policy/pgmorl/pgmorl.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/morl_baselines/multi_policy/pgmorl/pgmorl.py b/morl_baselines/multi_policy/pgmorl/pgmorl.py index ad55ac01..14af362e 100644 --- a/morl_baselines/multi_policy/pgmorl/pgmorl.py +++ b/morl_baselines/multi_policy/pgmorl/pgmorl.py @@ -632,7 +632,9 @@ def train( } ) self.num_eval_weights_for_eval = num_eval_weights_for_eval - max_iterations = total_timesteps // self.steps_per_iteration // self.num_envs + # 1 iteration is a full batch for each agents + # -> (steps_per_iteration * num_envs * pop_size) timesteps per iteration + max_iterations = total_timesteps // self.steps_per_iteration // self.num_envs // self.pop_size iteration = 0 # Init current_evaluations = [np.zeros(self.reward_dim) for _ in range(len(self.agents))] From d8db45bfc7cbaa016aa277996cfb2559ea9177d5 Mon Sep 17 00:00:00 2001 From: Florian Felten Date: Wed, 7 Aug 2024 15:38:58 +0200 Subject: [PATCH 04/12] fix bugs --- morl_baselines/multi_policy/pareto_q_learning/pql.py | 2 +- morl_baselines/single_policy/ser/mo_ppo.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/morl_baselines/multi_policy/pareto_q_learning/pql.py b/morl_baselines/multi_policy/pareto_q_learning/pql.py index 45077aa5..b314678f 100644 --- a/morl_baselines/multi_policy/pareto_q_learning/pql.py +++ b/morl_baselines/multi_policy/pareto_q_learning/pql.py @@ -83,7 +83,7 @@ def __init__( raise Exception("PQL only supports discretizable observation spaces.") self.num_states = np.prod(self.env_shape) - self.num_objectives = self.env.reward_space.shape[0] + self.num_objectives = self.env.unwrapped.reward_space.shape[0] self.counts = np.zeros((self.num_states, self.num_actions)) self.non_dominated = [ [{tuple(np.zeros(self.num_objectives))} for _ in range(self.num_actions)] for _ in range(self.num_states) diff --git a/morl_baselines/single_policy/ser/mo_ppo.py b/morl_baselines/single_policy/ser/mo_ppo.py index 32d62242..f0af4be8 100644 --- a/morl_baselines/single_policy/ser/mo_ppo.py +++ b/morl_baselines/single_policy/ser/mo_ppo.py @@ -413,7 +413,7 @@ def __collect_samples(self, obs: th.Tensor, done: th.Tensor): obs, done = th.Tensor(next_obs).to(self.device), th.Tensor(next_terminated).to(self.device) # Episode info logging - if "episode" in info.keys(): + if self.log and "episode" in info.keys(): indices = np.where(next_terminated | next_truncated)[0] for idx in indices: # Reconstructs the dict by extracting the relevant information for each vectorized env From 7e3e51cd291e7fe55d95d816046250641cedc343 Mon Sep 17 00:00:00 2001 From: Florian Felten Date: Thu, 8 Aug 2024 14:56:16 +0200 Subject: [PATCH 05/12] Make global step monotonic in pgmorl --- morl_baselines/multi_policy/pgmorl/pgmorl.py | 1 + 1 file changed, 1 insertion(+) diff --git a/morl_baselines/multi_policy/pgmorl/pgmorl.py b/morl_baselines/multi_policy/pgmorl/pgmorl.py index 14af362e..44e02ccc 100644 --- a/morl_baselines/multi_policy/pgmorl/pgmorl.py +++ b/morl_baselines/multi_policy/pgmorl/pgmorl.py @@ -506,6 +506,7 @@ def get_config(self) -> dict: def __train_all_agents(self, iteration: int, max_iterations: int): for i, agent in enumerate(self.agents): + agent.global_step = self.global_step agent.train(self.start_time, iteration, max_iterations) self.global_step += self.steps_per_iteration * self.num_envs From 962ddae6e7f695ab3c6111e693bec6bb41ed513a Mon Sep 17 00:00:00 2001 From: Lucas Alegre Date: Fri, 11 Oct 2024 09:58:07 -0300 Subject: [PATCH 06/12] Require gymnasium>=1.0.0 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c5922928..e115fb01 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,7 +22,7 @@ classifiers = [ ] dependencies = [ "mo-gymnasium >=1.1.0", - "gymnasium>=1.0.1a1", + "gymnasium>=1.0.0", "numpy >=1.21.0,<2.0.0", "torch >=1.12.0", "pygame >=2.1.0", From 15c0809f8f8a0bec92e07389cdce7db3643d7dd8 Mon Sep 17 00:00:00 2001 From: Lucas Alegre Date: Fri, 11 Oct 2024 10:01:12 -0300 Subject: [PATCH 07/12] upload-artifact v4 --- .github/workflows/build-publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-publish.yml b/.github/workflows/build-publish.yml index cba96336..5117c91e 100644 --- a/.github/workflows/build-publish.yml +++ b/.github/workflows/build-publish.yml @@ -44,7 +44,7 @@ jobs: - name: Build sdist and wheels run: python -m build - name: Store wheels - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v4 with: path: dist From e6dec33e6eebc1b3750f92053179b090b9c00009 Mon Sep 17 00:00:00 2001 From: Florian Felten Date: Wed, 16 Oct 2024 14:53:31 +0200 Subject: [PATCH 08/12] Bump action versions --- .github/workflows/build-publish.yml | 34 ++++++++--------------------- 1 file changed, 9 insertions(+), 25 deletions(-) diff --git a/.github/workflows/build-publish.yml b/.github/workflows/build-publish.yml index 5117c91e..6c7962fc 100644 --- a/.github/workflows/build-publish.yml +++ b/.github/workflows/build-publish.yml @@ -4,7 +4,7 @@ # - https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/ # # derived from https://github.com/Farama-Foundation/PettingZoo/blob/e230f4d80a5df3baf9bd905149f6d4e8ce22be31/.github/workflows/build-publish.yml -name: build-publish +name: Build artifact for PyPI on: push: @@ -16,33 +16,16 @@ on: jobs: build-wheels: - runs-on: ${{ matrix.os }} - strategy: - matrix: - include: - - os: ubuntu-latest - python: 38 - platform: manylinux_x86_64 - - os: ubuntu-latest - python: 39 - platform: manylinux_x86_64 - - os: ubuntu-latest - python: 310 - platform: manylinux_x86_64 - - os: ubuntu-latest - python: 311 - platform: manylinux_x86_64 + runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.x' + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + - name: Install dependencies - run: python -m pip install --upgrade pip setuptools build + run: pipx install setuptools build - name: Build sdist and wheels - run: python -m build + run: pyproject-build - name: Store wheels uses: actions/upload-artifact@v4 with: @@ -55,10 +38,11 @@ jobs: if: github.event_name == 'release' && github.event.action == 'published' steps: - name: Download dists - uses: actions/download-artifact@v4.1.7 + uses: actions/download-artifact@v4 with: name: artifact path: dist + - name: Publish uses: pypa/gh-action-pypi-publish@release/v1 with: From a4052e4a0eed46202118a8ee22affaa251b19084 Mon Sep 17 00:00:00 2001 From: Florian Felten Date: Wed, 16 Oct 2024 14:55:16 +0200 Subject: [PATCH 09/12] Bump pre-commit actions --- .github/workflows/pre-commit.yml | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 80ce02af..9f2cc2ab 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -13,9 +13,7 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 - - run: python -m pip install pre-commit - - run: python -m pre_commit --version - - run: python -m pre_commit install - - run: python -m pre_commit run --all-files + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + - run: pipx install pre-commit + - run: pre-commit run --all-files From 2e8a18a18318582f1de39b825209a1e265280bc2 Mon Sep 17 00:00:00 2001 From: Florian Felten Date: Wed, 16 Oct 2024 15:04:41 +0200 Subject: [PATCH 10/12] Update pre-commit hooks --- .pre-commit-config.yaml | 12 ++--- morl_baselines/__init__.py | 1 - morl_baselines/common/buffer.py | 1 + morl_baselines/common/diverse_buffer.py | 8 ++- morl_baselines/common/evaluation.py | 1 + morl_baselines/common/experiments.py | 1 + .../model_based/probabilistic_ensemble.py | 1 + .../common/model_based/tabular_model.py | 1 + morl_baselines/common/model_based/utils.py | 53 +++++++++++++++---- morl_baselines/common/morl_algorithm.py | 1 + morl_baselines/common/pareto.py | 1 + .../common/performance_indicators.py | 1 + morl_baselines/common/prioritized_buffer.py | 1 + morl_baselines/common/scalarization.py | 1 + morl_baselines/common/utils.py | 1 + morl_baselines/multi_policy/capql/capql.py | 1 + .../multi_policy/envelope/envelope.py | 1 + morl_baselines/multi_policy/gpi_pd/gpi_pd.py | 1 + .../gpi_pd/gpi_pd_continuous_action.py | 1 + .../linear_support/linear_support.py | 1 + morl_baselines/multi_policy/morld/morld.py | 1 + .../mp_mo_q_learning.py | 1 + .../multi_policy/pareto_q_learning/pql.py | 17 +++--- morl_baselines/multi_policy/pcn/pcn.py | 1 + morl_baselines/multi_policy/pgmorl/pgmorl.py | 1 + morl_baselines/single_policy/esr/eupg.py | 1 + morl_baselines/single_policy/ser/mo_ppo.py | 1 + .../single_policy/ser/mo_q_learning.py | 1 + pyproject.toml | 1 - tests/test_algos.py | 1 + 30 files changed, 91 insertions(+), 25 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 69680e4e..af334549 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v5.0.0 hooks: - id: check-symlinks - id: destroyed-symlinks @@ -18,13 +18,13 @@ repos: - id: detect-private-key - id: debug-statements - repo: https://github.com/codespell-project/codespell - rev: v2.2.4 + rev: v2.3.0 hooks: - id: codespell args: - --ignore-words-list=reacher,ure,referenc,wile,mor,ser,esr,nowe - repo: https://github.com/PyCQA/flake8 - rev: 6.0.0 + rev: 7.1.1 hooks: - id: flake8 args: @@ -35,16 +35,16 @@ repos: - --show-source - --statistics - repo: https://github.com/asottile/pyupgrade - rev: v3.3.1 + rev: v3.18.0 hooks: - id: pyupgrade args: ["--py37-plus"] - repo: https://github.com/PyCQA/isort - rev: 5.12.0 + rev: 5.13.2 hooks: - id: isort - repo: https://github.com/python/black - rev: 23.1.0 + rev: 24.10.0 hooks: - id: black - repo: https://github.com/pycqa/pydocstyle diff --git a/morl_baselines/__init__.py b/morl_baselines/__init__.py index 6fc312e4..d2af2963 100644 --- a/morl_baselines/__init__.py +++ b/morl_baselines/__init__.py @@ -1,4 +1,3 @@ """MORL-Baselines contains various MORL algorithms and utility functions.""" - __version__ = "1.1.0" diff --git a/morl_baselines/common/buffer.py b/morl_baselines/common/buffer.py index 58b53397..ee3722b7 100644 --- a/morl_baselines/common/buffer.py +++ b/morl_baselines/common/buffer.py @@ -1,4 +1,5 @@ """Replay buffer for multi-objective reinforcement learning.""" + import numpy as np import torch as th diff --git a/morl_baselines/common/diverse_buffer.py b/morl_baselines/common/diverse_buffer.py index a0d19c85..e49330fb 100644 --- a/morl_baselines/common/diverse_buffer.py +++ b/morl_baselines/common/diverse_buffer.py @@ -1,4 +1,5 @@ """Diverse Experience Replay Buffer. Code extracted from https://github.com/axelabels/DynMORL.""" + from dataclasses import dataclass import numpy as np @@ -154,7 +155,7 @@ def update(self, idx: int, p, tree_id=None): Keyword Arguments: tree_id {object} -- Tree to be updated (default: {None}) """ - if type(p) == dict: + if isinstance(p, dict): for k in p: self.update(idx, p[k], k) return @@ -476,7 +477,10 @@ def get_data(self, include_indices: bool = False): Returns: The data """ - all_data = list(np.arange(self.capacity) + self.capacity - 1), list(self.tree.data) + all_data = ( + list(np.arange(self.capacity) + self.capacity - 1), + list(self.tree.data), + ) indices = [] data = [] for i, d in zip(all_data[0], all_data[1]): diff --git a/morl_baselines/common/evaluation.py b/morl_baselines/common/evaluation.py index 79b6eff3..af106c08 100644 --- a/morl_baselines/common/evaluation.py +++ b/morl_baselines/common/evaluation.py @@ -1,4 +1,5 @@ """Utilities related to evaluation.""" + import os import random from typing import List, Optional, Tuple diff --git a/morl_baselines/common/experiments.py b/morl_baselines/common/experiments.py index dff6237c..71dda225 100644 --- a/morl_baselines/common/experiments.py +++ b/morl_baselines/common/experiments.py @@ -1,4 +1,5 @@ """Common experiment utilities.""" + import argparse from morl_baselines.multi_policy.capql.capql import CAPQL diff --git a/morl_baselines/common/model_based/probabilistic_ensemble.py b/morl_baselines/common/model_based/probabilistic_ensemble.py index db40b5f7..f5333468 100644 --- a/morl_baselines/common/model_based/probabilistic_ensemble.py +++ b/morl_baselines/common/model_based/probabilistic_ensemble.py @@ -1,4 +1,5 @@ """Probabilistic ensemble of neural networks.""" + import os import numpy as np diff --git a/morl_baselines/common/model_based/tabular_model.py b/morl_baselines/common/model_based/tabular_model.py index 96664800..903429e7 100644 --- a/morl_baselines/common/model_based/tabular_model.py +++ b/morl_baselines/common/model_based/tabular_model.py @@ -1,4 +1,5 @@ """Tabular dynamics model S_{t+1}, R_t ~ m(.,.|s,a) .""" + import random import numpy as np diff --git a/morl_baselines/common/model_based/utils.py b/morl_baselines/common/model_based/utils.py index 10d58467..1d13fa0d 100644 --- a/morl_baselines/common/model_based/utils.py +++ b/morl_baselines/common/model_based/utils.py @@ -1,4 +1,5 @@ """Utility functions for the model.""" + from typing import Tuple import matplotlib.pyplot as plt @@ -34,7 +35,7 @@ def termination_fn_dst(obs, act, next_obs): def termination_fn_mountaincar(obs, act, next_obs): - """Termination function of mountin car.""" + """Termination function of mountain car.""" assert len(obs.shape) == len(next_obs.shape) == len(act.shape) == 2 position = next_obs[:, 0] velocity = next_obs[:, 1] @@ -147,16 +148,29 @@ def step( var_obs = var_obs[0] var_rewards = var_rewards[0] - info = {"uncertainty": uncertainties, "var_obs": var_obs, "var_rewards": var_rewards} + info = { + "uncertainty": uncertainties, + "var_obs": var_obs, + "var_rewards": var_rewards, + } # info = {'mean': return_means, 'std': return_stds, 'log_prob': log_prob, 'dev': dev} return next_obs, rewards, terminals, info def visualize_eval( - agent, env, model=None, w=None, horizon=10, init_obs=None, compound=True, deterministic=False, show=False, filename=None + agent, + env, + model=None, + w=None, + horizon=10, + init_obs=None, + compound=True, + deterministic=False, + show=False, + filename=None, ): - """Generates a plot of the evolution of the state, reward and model predicitions ove time. + """Generates a plot of the evolution of the state, reward and model predictions over time. Args: agent: agent to be evaluated @@ -213,10 +227,16 @@ def visualize_eval( acts = F.one_hot(acts, num_classes=env.action_space.n).squeeze(1) for step in range(len(real_obs)): if compound or step == 0: - obs, r, done, info = model_env.step(th.tensor(obs).to(agent.device), acts[step], deterministic=deterministic) + obs, r, done, info = model_env.step( + th.tensor(obs).to(agent.device), + acts[step], + deterministic=deterministic, + ) else: obs, r, done, info = model_env.step( - th.tensor(real_obs[step - 1]).to(agent.device), acts[step], deterministic=deterministic + th.tensor(real_obs[step - 1]).to(agent.device), + acts[step], + deterministic=deterministic, ) model_obs.append(obs.copy()) model_obs_stds.append(np.sqrt(info["var_obs"].copy())) @@ -240,11 +260,26 @@ def visualize_eval( axs[i].set_ylabel(f"Reward {i - obs_dim}") axs[i].grid(alpha=0.25) if w is not None: - axs[i].plot(x, [real_vec_rewards[step][i - obs_dim] for step in x], label="Environment", color="black") + axs[i].plot( + x, + [real_vec_rewards[step][i - obs_dim] for step in x], + label="Environment", + color="black", + ) else: - axs[i].plot(x, [real_rewards[step] for step in x], label="Environment", color="black") + axs[i].plot( + x, + [real_rewards[step] for step in x], + label="Environment", + color="black", + ) if model is not None: - axs[i].plot(x, [model_rewards[step][i - obs_dim] for step in x], label="Model", color="blue") + axs[i].plot( + x, + [model_rewards[step][i - obs_dim] for step in x], + label="Model", + color="blue", + ) axs[i].fill_between( x, [model_rewards[step][i - obs_dim] + model_rewards_stds[step][i - obs_dim] for step in x], diff --git a/morl_baselines/common/morl_algorithm.py b/morl_baselines/common/morl_algorithm.py index bb6810e3..920a5342 100644 --- a/morl_baselines/common/morl_algorithm.py +++ b/morl_baselines/common/morl_algorithm.py @@ -1,4 +1,5 @@ """MORL algorithm base classes.""" + import os import time from abc import ABC, abstractmethod diff --git a/morl_baselines/common/pareto.py b/morl_baselines/common/pareto.py index 76a64254..63828bd1 100644 --- a/morl_baselines/common/pareto.py +++ b/morl_baselines/common/pareto.py @@ -1,4 +1,5 @@ """Pareto utilities.""" + from copy import deepcopy from typing import List, Union diff --git a/morl_baselines/common/performance_indicators.py b/morl_baselines/common/performance_indicators.py index 3d957f1b..8462dbb3 100644 --- a/morl_baselines/common/performance_indicators.py +++ b/morl_baselines/common/performance_indicators.py @@ -2,6 +2,7 @@ We mostly rely on pymoo for the computation of axiomatic indicators (HV and IGD), but some are customly made. """ + from copy import deepcopy from typing import Callable, List diff --git a/morl_baselines/common/prioritized_buffer.py b/morl_baselines/common/prioritized_buffer.py index 99ba8b84..24662b76 100644 --- a/morl_baselines/common/prioritized_buffer.py +++ b/morl_baselines/common/prioritized_buffer.py @@ -2,6 +2,7 @@ Code adapted from https://github.com/sfujim/LAP-PAL """ + import numpy as np import torch as th diff --git a/morl_baselines/common/scalarization.py b/morl_baselines/common/scalarization.py index 3fd2ffc2..a8a7f7f5 100644 --- a/morl_baselines/common/scalarization.py +++ b/morl_baselines/common/scalarization.py @@ -1,4 +1,5 @@ """Scalarization functions relying on numpy.""" + import numpy as np from pymoo.decomposition.tchebicheff import Tchebicheff diff --git a/morl_baselines/common/utils.py b/morl_baselines/common/utils.py index 88157a2e..00c01d8e 100644 --- a/morl_baselines/common/utils.py +++ b/morl_baselines/common/utils.py @@ -1,4 +1,5 @@ """General utils for the MORL baselines.""" + import math import os from typing import Callable, List diff --git a/morl_baselines/multi_policy/capql/capql.py b/morl_baselines/multi_policy/capql/capql.py index 54ecd625..1ae46bdc 100644 --- a/morl_baselines/multi_policy/capql/capql.py +++ b/morl_baselines/multi_policy/capql/capql.py @@ -1,4 +1,5 @@ """CAPQL algorithm.""" + import os import random from itertools import chain diff --git a/morl_baselines/multi_policy/envelope/envelope.py b/morl_baselines/multi_policy/envelope/envelope.py index 6899c585..c4ad7903 100644 --- a/morl_baselines/multi_policy/envelope/envelope.py +++ b/morl_baselines/multi_policy/envelope/envelope.py @@ -1,4 +1,5 @@ """Envelope Q-Learning implementation.""" + import os from typing import List, Optional, Union from typing_extensions import override diff --git a/morl_baselines/multi_policy/gpi_pd/gpi_pd.py b/morl_baselines/multi_policy/gpi_pd/gpi_pd.py index 9d315a85..8903288b 100644 --- a/morl_baselines/multi_policy/gpi_pd/gpi_pd.py +++ b/morl_baselines/multi_policy/gpi_pd/gpi_pd.py @@ -1,4 +1,5 @@ """GPI-PD algorithm.""" + import os import random from itertools import chain diff --git a/morl_baselines/multi_policy/gpi_pd/gpi_pd_continuous_action.py b/morl_baselines/multi_policy/gpi_pd/gpi_pd_continuous_action.py index a9e31fbd..37e9b165 100644 --- a/morl_baselines/multi_policy/gpi_pd/gpi_pd_continuous_action.py +++ b/morl_baselines/multi_policy/gpi_pd/gpi_pd_continuous_action.py @@ -1,4 +1,5 @@ """GPI-PD algorithm with continuous actions.""" + import os import random from itertools import chain diff --git a/morl_baselines/multi_policy/linear_support/linear_support.py b/morl_baselines/multi_policy/linear_support/linear_support.py index a9d6dda1..12df3f92 100644 --- a/morl_baselines/multi_policy/linear_support/linear_support.py +++ b/morl_baselines/multi_policy/linear_support/linear_support.py @@ -1,4 +1,5 @@ """Linear Support implementation.""" + import random from copy import deepcopy from typing import List, Optional diff --git a/morl_baselines/multi_policy/morld/morld.py b/morl_baselines/multi_policy/morld/morld.py index 11a3e459..69c0ee3c 100644 --- a/morl_baselines/multi_policy/morld/morld.py +++ b/morl_baselines/multi_policy/morld/morld.py @@ -2,6 +2,7 @@ See Felten, Talbi & Danoy (2024): https://arxiv.org/abs/2311.12495. """ + import math import time from typing import Callable, List, Optional, Tuple, Union diff --git a/morl_baselines/multi_policy/multi_policy_moqlearning/mp_mo_q_learning.py b/morl_baselines/multi_policy/multi_policy_moqlearning/mp_mo_q_learning.py index 232e9b51..0341e08e 100644 --- a/morl_baselines/multi_policy/multi_policy_moqlearning/mp_mo_q_learning.py +++ b/morl_baselines/multi_policy/multi_policy_moqlearning/mp_mo_q_learning.py @@ -1,4 +1,5 @@ """Outer-loop MOQ-learning algorithm (uses multiple weights).""" + import time from copy import deepcopy from typing import List, Optional diff --git a/morl_baselines/multi_policy/pareto_q_learning/pql.py b/morl_baselines/multi_policy/pareto_q_learning/pql.py index b314678f..2b315dd7 100644 --- a/morl_baselines/multi_policy/pareto_q_learning/pql.py +++ b/morl_baselines/multi_policy/pareto_q_learning/pql.py @@ -1,4 +1,5 @@ """Pareto Q-Learning.""" + import numbers from typing import Callable, List, Optional @@ -60,19 +61,19 @@ def __init__( # Algorithm setup self.ref_point = ref_point - if type(self.env.action_space) == gym.spaces.Discrete: + if isinstance(self.env.action_space, gym.spaces.Discrete): self.num_actions = self.env.action_space.n - elif type(self.env.action_space) == gym.spaces.MultiDiscrete: + elif isinstance(self.env.action_space, gym.spaces.MultiDiscrete): self.num_actions = np.prod(self.env.action_space.nvec) else: raise Exception("PQL only supports (multi)discrete action spaces.") - if type(self.env.observation_space) == gym.spaces.Discrete: + if isinstance(self.env.observation_space, gym.spaces.Discrete): self.env_shape = (self.env.observation_space.n,) - elif type(self.env.observation_space) == gym.spaces.MultiDiscrete: + elif isinstance(self.env.observation_space, gym.spaces.MultiDiscrete): self.env_shape = self.env.observation_space.nvec elif ( - type(self.env.observation_space) == gym.spaces.Box + isinstance(self.env.observation_space, gym.spaces.Box) and self.env.observation_space.is_bounded(manner="both") and issubclass(self.env.observation_space.dtype.type, numbers.Integral) ): @@ -96,7 +97,11 @@ def __init__( self.log = log if self.log: - self.setup_wandb(project_name=self.project_name, experiment_name=self.experiment_name, entity=wandb_entity) + self.setup_wandb( + project_name=self.project_name, + experiment_name=self.experiment_name, + entity=wandb_entity, + ) def get_config(self) -> dict: """Get the configuration dictionary. diff --git a/morl_baselines/multi_policy/pcn/pcn.py b/morl_baselines/multi_policy/pcn/pcn.py index 2e380024..48c162b9 100644 --- a/morl_baselines/multi_policy/pcn/pcn.py +++ b/morl_baselines/multi_policy/pcn/pcn.py @@ -1,4 +1,5 @@ """Pareto Conditioned Network. Code adapted from https://github.com/mathieu-reymond/pareto-conditioned-networks .""" + import heapq import os from abc import ABC diff --git a/morl_baselines/multi_policy/pgmorl/pgmorl.py b/morl_baselines/multi_policy/pgmorl/pgmorl.py index 44e02ccc..4ca3aef8 100644 --- a/morl_baselines/multi_policy/pgmorl/pgmorl.py +++ b/morl_baselines/multi_policy/pgmorl/pgmorl.py @@ -4,6 +4,7 @@ (!) Limited to 2 objectives for now. (!) The post-processing phase has not been implemented yet. """ + import time from copy import deepcopy from typing import List, Optional, Tuple, Union diff --git a/morl_baselines/single_policy/esr/eupg.py b/morl_baselines/single_policy/esr/eupg.py index 9eaa9d89..efd417e1 100644 --- a/morl_baselines/single_policy/esr/eupg.py +++ b/morl_baselines/single_policy/esr/eupg.py @@ -1,4 +1,5 @@ """EUPG is an ESR algorithm based on Policy Gradient (REINFORCE like).""" + import time from copy import deepcopy from typing import Callable, List, Optional, Union diff --git a/morl_baselines/single_policy/ser/mo_ppo.py b/morl_baselines/single_policy/ser/mo_ppo.py index f0af4be8..6c0cf84a 100644 --- a/morl_baselines/single_policy/ser/mo_ppo.py +++ b/morl_baselines/single_policy/ser/mo_ppo.py @@ -1,4 +1,5 @@ """Multi-Objective PPO Algorithm.""" + import time from copy import deepcopy from typing import List, Optional, Union diff --git a/morl_baselines/single_policy/ser/mo_q_learning.py b/morl_baselines/single_policy/ser/mo_q_learning.py index 5abe72d3..1061fcc2 100644 --- a/morl_baselines/single_policy/ser/mo_q_learning.py +++ b/morl_baselines/single_policy/ser/mo_q_learning.py @@ -1,4 +1,5 @@ """Scalarized Q-learning for single policy multi-objective reinforcement learning.""" + import time from typing import Optional from typing_extensions import override diff --git a/pyproject.toml b/pyproject.toml index e115fb01..598d1618 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -66,7 +66,6 @@ morl_baselines = ["*.json", "assets/*"] # Linting, testing, ... ######################################################## [tool.black] -safe = true line-length = 127 target-version = ['py38', 'py39', 'py310'] include = '\.pyi?$' diff --git a/tests/test_algos.py b/tests/test_algos.py index d64174e7..584b704c 100644 --- a/tests/test_algos.py +++ b/tests/test_algos.py @@ -1,4 +1,5 @@ """Mostly tests to make sure the algorithms are able to run.""" + import time import mo_gymnasium as mo_gym From 6b86faf5f07502b3664b94a6dd7ad2e8003f5964 Mon Sep 17 00:00:00 2001 From: Florian Felten Date: Wed, 16 Oct 2024 15:09:40 +0200 Subject: [PATCH 11/12] Remove setuptools --- .github/workflows/build-publish.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-publish.yml b/.github/workflows/build-publish.yml index 6c7962fc..c944eeee 100644 --- a/.github/workflows/build-publish.yml +++ b/.github/workflows/build-publish.yml @@ -23,7 +23,7 @@ jobs: - uses: actions/setup-python@v5 - name: Install dependencies - run: pipx install setuptools build + run: pipx install build - name: Build sdist and wheels run: pyproject-build - name: Store wheels From fb05165ad22dc29c2682010cd9f29b908892722a Mon Sep 17 00:00:00 2001 From: Florian Felten Date: Wed, 16 Oct 2024 15:25:10 +0200 Subject: [PATCH 12/12] Fix pycddlib version --- pyproject.toml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 598d1618..09a544d8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,12 +40,12 @@ dynamic = ["version"] [project.optional-dependencies] # Update dependencies in `all` if any are added or removed # OLS requires pycddlib and libgmp to be installed, which does not work on MacOS for now. -ols = ["pycddlib"] -gpi = ["pycddlib"] +ols = ["pycddlib==2.1.6"] +gpi = ["pycddlib==2.1.6"] all = [ # OLS & GPI - "pycddlib", + "pycddlib==2.1.6", ] testing = ["pytest ==7.1.3"]