LucasAlegre · ffelten · Oct 16, 2024 · Aug 7, 2024 · Aug 7, 2024 · Aug 7, 2024
diff --git a/.github/workflows/build-publish.yml b/.github/workflows/build-publish.yml
@@ -4,7 +4,7 @@
 #   - https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/
 #
 # derived from https://github.com/Farama-Foundation/PettingZoo/blob/e230f4d80a5df3baf9bd905149f6d4e8ce22be31/.github/workflows/build-publish.yml
-name: build-publish
+name: Build artifact for PyPI
 
 on:
   push:
@@ -16,35 +16,18 @@ on:
 
 jobs:
   build-wheels:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      matrix:
-        include:
-        - os: ubuntu-latest
-          python: 38
-          platform: manylinux_x86_64
-        - os: ubuntu-latest
-          python: 39
-          platform: manylinux_x86_64
-        - os: ubuntu-latest
-          python: 310
-          platform: manylinux_x86_64
-        - os: ubuntu-latest
-          python: 311
-          platform: manylinux_x86_64
+    runs-on: ubuntu-latest
 
     steps:
-    - uses: actions/checkout@v3
-    - name: Set up Python
-      uses: actions/setup-python@v4
-      with:
-        python-version: '3.x'
+    - uses: actions/checkout@v4
+    - uses: actions/setup-python@v5
+
     - name: Install dependencies
-      run: python -m pip install --upgrade pip setuptools build
+      run: pipx install build
     - name: Build sdist and wheels
-      run: python -m build
+      run: pyproject-build
     - name: Store wheels
-      uses: actions/upload-artifact@v2
+      uses: actions/upload-artifact@v4
       with:
         path: dist
 
@@ -55,10 +38,11 @@ jobs:
     if: github.event_name == 'release' && github.event.action == 'published'
     steps:
     - name: Download dists
-      uses: actions/download-artifact@v4.1.7
+      uses: actions/download-artifact@v4
       with:
         name: artifact
         path: dist
+
     - name: Publish
       uses: pypa/gh-action-pypi-publish@release/v1
       with:

diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
@@ -13,9 +13,7 @@ jobs:
   pre-commit:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v3
-      - uses: actions/setup-python@v4
-      - run: python -m pip install pre-commit
-      - run: python -m pre_commit --version
-      - run: python -m pre_commit install
-      - run: python -m pre_commit run --all-files
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+      - run: pipx install pre-commit
+      - run: pre-commit run --all-files
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -2,7 +2,7 @@
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.4.0
+    rev: v5.0.0
     hooks:
       - id: check-symlinks
       - id: destroyed-symlinks
@@ -18,13 +18,13 @@ repos:
       - id: detect-private-key
       - id: debug-statements
   - repo: https://github.com/codespell-project/codespell
-    rev: v2.2.4
+    rev: v2.3.0
     hooks:
       - id: codespell
         args:
           - --ignore-words-list=reacher,ure,referenc,wile,mor,ser,esr,nowe
   - repo: https://github.com/PyCQA/flake8
-    rev: 6.0.0
+    rev: 7.1.1
     hooks:
       - id: flake8
         args:
@@ -35,16 +35,16 @@ repos:
           - --show-source
           - --statistics
   - repo: https://github.com/asottile/pyupgrade
-    rev: v3.3.1
+    rev: v3.18.0
     hooks:
       - id: pyupgrade
         args: ["--py37-plus"]
   - repo: https://github.com/PyCQA/isort
-    rev: 5.12.0
+    rev: 5.13.2
     hooks:
       - id: isort
   - repo: https://github.com/python/black
-    rev: 23.1.0
+    rev: 24.10.0
     hooks:
       - id: black
   - repo: https://github.com/pycqa/pydocstyle

diff --git a/examples/envelope_minecart.py b/examples/envelope_minecart.py
@@ -1,6 +1,6 @@
 import mo_gymnasium as mo_gym
 import numpy as np
-from mo_gymnasium.utils import MORecordEpisodeStatistics
+from mo_gymnasium.wrappers import MORecordEpisodeStatistics
 
 from morl_baselines.multi_policy.envelope.envelope import Envelope
 

diff --git a/examples/eupg_fishwood.py b/examples/eupg_fishwood.py
@@ -1,7 +1,7 @@
 import mo_gymnasium as mo_gym
 import numpy as np
 import torch as th
-from mo_gymnasium.utils import MORecordEpisodeStatistics
+from mo_gymnasium.wrappers import MORecordEpisodeStatistics
 
 from morl_baselines.common.evaluation import eval_mo_reward_conditioned
 from morl_baselines.single_policy.esr.eupg import EUPG

diff --git a/examples/mo_q_learning_DST.py b/examples/mo_q_learning_DST.py
@@ -2,7 +2,7 @@
 
 import mo_gymnasium as mo_gym
 import numpy as np
-from mo_gymnasium.utils import MORecordEpisodeStatistics
+from mo_gymnasium.wrappers import MORecordEpisodeStatistics
 
 from morl_baselines.common.evaluation import eval_mo
 from morl_baselines.common.scalarization import tchebicheff

diff --git a/examples/mp_mo_q_learning_DST.py b/examples/mp_mo_q_learning_DST.py
@@ -1,6 +1,6 @@
 import mo_gymnasium as mo_gym
 import numpy as np
-from mo_gymnasium import MORecordEpisodeStatistics
+from mo_gymnasium.wrappers import MORecordEpisodeStatistics
 
 from morl_baselines.common.scalarization import tchebicheff
 from morl_baselines.multi_policy.multi_policy_moqlearning.mp_mo_q_learning import (

diff --git a/examples/pcn_minecart.py b/examples/pcn_minecart.py
@@ -1,6 +1,6 @@
 import mo_gymnasium as mo_gym
 import numpy as np
-from mo_gymnasium.utils import MORecordEpisodeStatistics
+from mo_gymnasium.wrappers import MORecordEpisodeStatistics
 
 from morl_baselines.multi_policy.pcn.pcn import PCN
 

diff --git a/examples/pgmorl_halfcheetah.py b/examples/pgmorl_halfcheetah.py
@@ -19,7 +19,7 @@
     algo.train(
         total_timesteps=int(5e6),
         eval_env=make_env(env_id, 42, 0, "PGMORL_eval_env", gamma=0.995)(),
-        ref_point=np.array([0.0, -5.0]),
+        ref_point=np.array([-100.0, -100.0]),
         known_pareto_front=None,
     )
     env = make_env(env_id, 422, 1, "PGMORL_test", gamma=0.995)()  # idx != 0 to avoid taking videos

diff --git a/experiments/benchmark/launch_experiment.py b/experiments/benchmark/launch_experiment.py
@@ -15,9 +15,8 @@
 import numpy as np
 import requests
 from gym_super_mario_bros.actions import SIMPLE_MOVEMENT
-from gymnasium.wrappers import FlattenObservation
-from gymnasium.wrappers.record_video import RecordVideo
-from mo_gymnasium.utils import MORecordEpisodeStatistics
+from gymnasium.wrappers import FlattenObservation, RecordVideo
+from mo_gymnasium.wrappers import MORecordEpisodeStatistics
 
 from morl_baselines.common.evaluation import seed_everything
 from morl_baselines.common.experiments import (
@@ -90,13 +89,15 @@ def autotag() -> str:
     git_commit = subprocess.check_output(["git", "rev-parse", "--verify", "HEAD"]).decode("ascii").strip()
     try:
         # try finding the pull request number on github
-        prs = requests.get(f"https://api.github.com/search/issues?q=repo:LucasAlegre/morl-baselines+is:pr+{git_commit}")
+        prs = requests.get(
+            f"https://api.github.com/search/issues?q=repo:LucasAlegre/morl-baselines+is:pr+{git_commit}"  # noqa
+        )
         if prs.status_code == 200:
             prs = prs.json()
             if len(prs["items"]) > 0:
                 pr = prs["items"][0]
                 pr_number = pr["number"]
-                wandb_tag += f",pr-{pr_number}"
+                wandb_tag += f",pr-{pr_number}"  # noqa
         print(f"identified github pull request: {pr_number}")
     except Exception as e:
         print(e)
@@ -165,7 +166,7 @@ def wrap_mario(env):
                     TimeLimit,
                 )
                 from mo_gymnasium.envs.mario.joypad_space import JoypadSpace
-                from mo_gymnasium.utils import MOMaxAndSkipObservation
+                from mo_gymnasium.wrappers import MOMaxAndSkipObservation
 
                 env = JoypadSpace(env, SIMPLE_MOVEMENT)
                 env = MOMaxAndSkipObservation(env, skip=4)

diff --git a/experiments/hyperparameter_search/launch_sweep.py b/experiments/hyperparameter_search/launch_sweep.py
@@ -7,7 +7,7 @@
 import numpy as np
 import wandb
 import yaml
-from mo_gymnasium.utils import MORecordEpisodeStatistics
+from mo_gymnasium.wrappers import MORecordEpisodeStatistics
 
 from morl_baselines.common.evaluation import seed_everything
 from morl_baselines.common.experiments import (

diff --git a/morl_baselines/__init__.py b/morl_baselines/__init__.py
@@ -1,4 +1,3 @@
 """MORL-Baselines contains various MORL algorithms and utility functions."""
 
-
-__version__ = "1.0.0"
+__version__ = "1.1.0"
diff --git a/morl_baselines/common/buffer.py b/morl_baselines/common/buffer.py
@@ -1,4 +1,5 @@
 """Replay buffer for multi-objective reinforcement learning."""
+
 import numpy as np
 import torch as th
 

diff --git a/morl_baselines/common/diverse_buffer.py b/morl_baselines/common/diverse_buffer.py
@@ -1,4 +1,5 @@
 """Diverse Experience Replay Buffer. Code extracted from https://github.com/axelabels/DynMORL."""
+
 from dataclasses import dataclass
 
 import numpy as np
@@ -154,7 +155,7 @@ def update(self, idx: int, p, tree_id=None):
         Keyword Arguments:
             tree_id {object} -- Tree to be updated (default: {None})
         """
-        if type(p) == dict:
+        if isinstance(p, dict):
             for k in p:
                 self.update(idx, p[k], k)
             return
@@ -476,7 +477,10 @@ def get_data(self, include_indices: bool = False):
         Returns:
             The data
         """
-        all_data = list(np.arange(self.capacity) + self.capacity - 1), list(self.tree.data)
+        all_data = (
+            list(np.arange(self.capacity) + self.capacity - 1),
+            list(self.tree.data),
+        )
         indices = []
         data = []
         for i, d in zip(all_data[0], all_data[1]):

diff --git a/morl_baselines/common/evaluation.py b/morl_baselines/common/evaluation.py
@@ -1,4 +1,5 @@
 """Utilities related to evaluation."""
+
 import os
 import random
 from typing import List, Optional, Tuple

diff --git a/morl_baselines/common/experiments.py b/morl_baselines/common/experiments.py
@@ -1,4 +1,5 @@
 """Common experiment utilities."""
+
 import argparse
 
 from morl_baselines.multi_policy.capql.capql import CAPQL

diff --git a/morl_baselines/common/model_based/probabilistic_ensemble.py b/morl_baselines/common/model_based/probabilistic_ensemble.py
@@ -1,4 +1,5 @@
 """Probabilistic ensemble of neural networks."""
+
 import os
 
 import numpy as np

diff --git a/morl_baselines/common/model_based/tabular_model.py b/morl_baselines/common/model_based/tabular_model.py
@@ -1,4 +1,5 @@
 """Tabular dynamics model S_{t+1}, R_t ~ m(.,.|s,a) ."""
+
 import random
 
 import numpy as np

diff --git a/morl_baselines/common/model_based/utils.py b/morl_baselines/common/model_based/utils.py
@@ -1,4 +1,5 @@
 """Utility functions for the model."""
+
 from typing import Tuple
 
 import matplotlib.pyplot as plt
@@ -34,7 +35,7 @@ def termination_fn_dst(obs, act, next_obs):
 
 
 def termination_fn_mountaincar(obs, act, next_obs):
-    """Termination function of mountin car."""
+    """Termination function of mountain car."""
     assert len(obs.shape) == len(next_obs.shape) == len(act.shape) == 2
     position = next_obs[:, 0]
     velocity = next_obs[:, 1]
@@ -147,16 +148,29 @@ def step(
             var_obs = var_obs[0]
             var_rewards = var_rewards[0]
 
-        info = {"uncertainty": uncertainties, "var_obs": var_obs, "var_rewards": var_rewards}
+        info = {
+            "uncertainty": uncertainties,
+            "var_obs": var_obs,
+            "var_rewards": var_rewards,
+        }
 
         # info = {'mean': return_means, 'std': return_stds, 'log_prob': log_prob, 'dev': dev}
         return next_obs, rewards, terminals, info
 
 
 def visualize_eval(
-    agent, env, model=None, w=None, horizon=10, init_obs=None, compound=True, deterministic=False, show=False, filename=None
+    agent,
+    env,
+    model=None,
+    w=None,
+    horizon=10,
+    init_obs=None,
+    compound=True,
+    deterministic=False,
+    show=False,
+    filename=None,
 ):
-    """Generates a plot of the evolution of the state, reward and model predicitions ove time.
+    """Generates a plot of the evolution of the state, reward and model predictions over time.
 
     Args:
         agent: agent to be evaluated
@@ -213,10 +227,16 @@ def visualize_eval(
             acts = F.one_hot(acts, num_classes=env.action_space.n).squeeze(1)
         for step in range(len(real_obs)):
             if compound or step == 0:
-                obs, r, done, info = model_env.step(th.tensor(obs).to(agent.device), acts[step], deterministic=deterministic)
+                obs, r, done, info = model_env.step(
+                    th.tensor(obs).to(agent.device),
+                    acts[step],
+                    deterministic=deterministic,
+                )
             else:
                 obs, r, done, info = model_env.step(
-                    th.tensor(real_obs[step - 1]).to(agent.device), acts[step], deterministic=deterministic
+                    th.tensor(real_obs[step - 1]).to(agent.device),
+                    acts[step],
+                    deterministic=deterministic,
                 )
             model_obs.append(obs.copy())
             model_obs_stds.append(np.sqrt(info["var_obs"].copy()))
@@ -240,11 +260,26 @@ def visualize_eval(
             axs[i].set_ylabel(f"Reward {i - obs_dim}")
             axs[i].grid(alpha=0.25)
             if w is not None:
-                axs[i].plot(x, [real_vec_rewards[step][i - obs_dim] for step in x], label="Environment", color="black")
+                axs[i].plot(
+                    x,
+                    [real_vec_rewards[step][i - obs_dim] for step in x],
+                    label="Environment",
+                    color="black",
+                )
             else:
-                axs[i].plot(x, [real_rewards[step] for step in x], label="Environment", color="black")
+                axs[i].plot(
+                    x,
+                    [real_rewards[step] for step in x],
+                    label="Environment",
+                    color="black",
+                )
             if model is not None:
-                axs[i].plot(x, [model_rewards[step][i - obs_dim] for step in x], label="Model", color="blue")
+                axs[i].plot(
+                    x,
+                    [model_rewards[step][i - obs_dim] for step in x],
+                    label="Model",
+                    color="blue",
+                )
                 axs[i].fill_between(
                     x,
                     [model_rewards[step][i - obs_dim] + model_rewards_stds[step][i - obs_dim] for step in x],

diff --git a/morl_baselines/common/morl_algorithm.py b/morl_baselines/common/morl_algorithm.py
@@ -1,4 +1,5 @@
 """MORL algorithm base classes."""
+
 import os
 import time
 from abc import ABC, abstractmethod
@@ -11,7 +12,7 @@
 import torch.nn
 import wandb
 from gymnasium import spaces
-from mo_gymnasium.utils import MOSyncVectorEnv
+from mo_gymnasium.wrappers.vector import MOSyncVectorEnv
 
 from morl_baselines.common.evaluation import (
     eval_mo_reward_conditioned,