python/ Require pandas>=2. Add example to tests. Improve Pandas example.

multi-objective · Oct 30, 2024 · 1e9f854 · 1e9f854
1 parent 9d04878
commit 1e9f854
Show file tree

Hide file tree

Showing 5 changed files with 111 additions and 44 deletions.
diff --git a/python/examples/plot_pandas.py b/python/examples/plot_pandas.py
@@ -1,12 +1,15 @@
-"""Using moocore with Pandas
-==========================
+"""=========================
+Using moocore with Pandas
+=========================
 
-This example shows how to use ``moocore`` functions with Pandas (https://pandas.pydata.org/).
+This example shows how to use ``moocore`` functions with Pandas (https://pandas.pydata.org/). This example requires pandas version >= 2.0.0
 
 """
 
-import pandas as pd
 import moocore
+import pandas as pd
+
+print(f"pandas version: {pd.__version__}")
 
 # %%
 # First, we create a toy Pandas :class:`~pandas.DataFrame`.
@@ -22,26 +25,25 @@
 df
 
 # %%
-# Now we normalize it being careful to replace the correct columns.
+# Normalize it (only replace the objective columns!).
 
 obj_cols = ["obj1", "obj2", "obj3"]
 df[obj_cols] = moocore.normalise(df[obj_cols], to_range=[1, 2])
 df
 
 # %%
-# Now we calculate the hypervolume for each ``algo`` using :meth:`~pandas.DataFrame.groupby` and :meth:`~pandas.core.groupby.DataFrameGroupBy.apply`.
+# Calculate the hypervolume for each ``algo`` using :meth:`~pandas.DataFrame.groupby` and :meth:`~pandas.core.groupby.DataFrameGroupBy.apply`.
 
 ref = 2.1
 hv = (
-    df.groupby("algo")
-    .apply(moocore.hypervolume, ref=ref, include_groups=False)
+    df.groupby("algo")[obj_cols]
+    .apply(moocore.hypervolume, ref=ref)
     .reset_index(name="hv")
 )
 hv
 
 # %%
-# We can also use
-
+# Or we can just use:
 
 hv = moocore.apply_within_sets(
     df[obj_cols], df["algo"], moocore.hypervolume, ref=ref
@@ -50,7 +52,7 @@
 
 
 # %%
-# Note that :func:`moocore.apply_within_sets()` processes each group in
+# :func:`moocore.apply_within_sets()` processes each group in
 # order, even if the elements of the same group are not contiguous. That is, it
 # processes the groups like :meth:`pandas.Series.unique` and not like
 # :class:`set` or :func:`numpy.unique()`.
@@ -69,30 +71,31 @@
         run=[1, 1, 2, 1, 1, 2, 2, 2, 1, 1],
     )
 )
+obj_cols = ["obj1", "obj2", "obj3"]
 df
 
 # %%
 # We can still use :meth:`~pandas.DataFrame.groupby` but we may need to reset and clean-up the index.
 
-df.groupby(["algo", "run"]).apply(
-    moocore.filter_dominated, include_groups=False
-).reset_index().drop(columns="level_2")
+df.groupby(["algo", "run"])[obj_cols].apply(
+    moocore.filter_dominated
+).reset_index(level=["algo", "run"])
 
 # %%
-# Or we can combine the multiple columns as one to define the sets.
+# Or we can combine the multiple columns as one to define the sets:
 #
 sets = df["algo"].astype(str) + "-" + df["run"].astype(str)
 sets
 
 # %%
-# Identify nondominated rows within each set.
+# then identify nondominated rows within each set:
 #
-is_nondom = moocore.is_nondominated_within_sets(
-    df[["obj1", "obj2", "obj2"]], sets=sets
-)
+is_nondom = moocore.is_nondominated_within_sets(df[obj_cols], sets=sets)
 is_nondom
 
 # %%
-# And use the boolean vector above to filter rows.
+# And use the boolean vector above to filter rows:
 #
 df[is_nondom]
+
+# %%
diff --git a/python/requirements_dev.txt b/python/requirements_dev.txt
@@ -26,5 +26,5 @@ jupyterlab
 ipywidgets
 
 # Gallery examples
-pandas
+pandas >=2.0.0
 seaborn
diff --git a/python/tests/test_moocore.py b/python/tests/test_moocore.py
@@ -1,6 +1,7 @@
 # ruff: noqa: D100, D101, D102, D103
 import pytest
 import numpy as np
+from numpy.testing import assert_array_equal, assert_allclose
 import math
 
 import moocore
@@ -28,9 +29,11 @@ def check_testdata(testpath, expected_name, expected_shape):
                 test_datapath(f"expected_output/read_datasets/{expected_name}")
             )
 
-        assert np.allclose(
-            testdata, check_data
-        ), f"read_datasets does not produce expected array for file {testpath}"
+        assert_allclose(
+            testdata,
+            check_data,
+            err_msg=f"read_datasets does not produce expected array for file {testpath}",
+        )
 
     test_names = [
         "input1.dat",
@@ -153,7 +156,7 @@ def test_is_nondominated(test_datapath):
     assert (
         dominated
         == [False, False, False, False, True, False, True, True, False, True]
-    ).all
+    ).all()
     T = np.array(
         [[1, 0, 1], [1, 1, 1], [0, 1, 1], [1, 0, 1], [1, 1, 0], [1, 1, 1]]
     )
@@ -162,8 +165,8 @@ def test_is_nondominated(test_datapath):
     non_dominated_weak = T[moocore.is_nondominated(T, keep_weakly=True)]
     expct_nondom_weak = np.array([[1, 0, 1], [0, 1, 1], [1, 0, 1], [1, 1, 0]])
 
-    assert np.array_equal(non_dominated_weak, expct_nondom_weak)
-    assert np.array_equal(
+    assert_array_equal(non_dominated_weak, expct_nondom_weak)
+    assert_array_equal(
         moocore.filter_dominated(T, keep_weakly=True), expct_nondom_weak
     )
 
@@ -183,16 +186,16 @@ def test_is_nondominated(test_datapath):
     expected_x_nondom = np.array(
         [[0, 0, 1, 2], [10, 20, 0, 0], [20, 10, 0, 0]]
     )
-    assert np.array_equal(x_nondom, expected_x_nondom)
-    assert np.array_equal(
+    assert_array_equal(x_nondom, expected_x_nondom)
+    assert_array_equal(
         moocore.filter_dominated(x, maximise=True), expected_x_nondom
     )
     minmax = np.array([1, 2, 2, 1, 5, 6, 7, 5]).reshape((-1, 2))
-    assert np.array_equal(
+    assert_array_equal(
         moocore.filter_dominated(minmax, maximise=[True, False]),
         np.array([[2, 1], [7, 5]]),
     )
-    assert np.array_equal(
+    assert_array_equal(
         moocore.filter_dominated(minmax, maximise=[False, True]),
         np.array([[1, 2], [5, 6]]),
     )
@@ -222,8 +225,8 @@ def test_normalise():
     # With default to_range = [0,1] - all columns should have their values normalised to same value
     expected_outcome = np.tile(np.linspace(0, 1, num=6).reshape(6, -1), 3)
 
-    assert np.allclose(moocore.normalise(A), expected_outcome)
-    assert np.allclose(
+    assert_allclose(moocore.normalise(A), expected_outcome)
+    assert_allclose(
         moocore.normalise(A, to_range=[0, 10]), 10 * expected_outcome
     )
     expected_with_bounds = np.transpose(
@@ -235,7 +238,7 @@ def test_normalise():
             ]
         )
     )
-    assert np.allclose(
+    assert_allclose(
         moocore.normalise(A, upper=[25, 25, 25], lower=[0, 0, 0]),
         expected_with_bounds,
     )
@@ -244,8 +247,8 @@ def test_normalise():
     A = np.array([[1.0, 2.0], [2.0, 1.0]])
     A_copy = A.copy()
     B = moocore.normalise(A)
-    assert np.allclose(A, A_copy)
-    assert np.allclose(B, np.array([[0.0, 1.0], [1.0, 0.0]]))
+    assert_allclose(A, A_copy)
+    assert_allclose(B, np.array([[0.0, 1.0], [1.0, 0.0]]))
 
 
 def test_eaf(test_datapath):
@@ -280,12 +283,16 @@ def test_eaf(test_datapath):
         assert (
             eaf_test.shape == expected_eaf_result.shape
         ), f"Shapes of {test_name} and {expected_eaf_name} do not match"
-        assert np.allclose(
-            eaf_test, expected_eaf_result
-        ), f"{expected_eaf_name} test failed"
-        assert np.allclose(
-            eaf_pct_test, expected_eaf_pct_result
-        ), f"pct_{expected_eaf_name} test failed"
+        assert_allclose(
+            eaf_test,
+            expected_eaf_result,
+            err_msg=f"{expected_eaf_name} test failed",
+        )
+        assert_allclose(
+            eaf_pct_test,
+            expected_eaf_pct_result,
+            err_msg=f"pct_{expected_eaf_name} test failed",
+        )
 
 
 # def test_eafdiff(test_datapath):

diff --git a/python/tests/test_pandas.py b/python/tests/test_pandas.py
@@ -1,6 +1,8 @@
 # ruff: noqa: D100, D101, D102, D103
 import pytest
 import moocore
+import numpy as np
+from numpy.testing import assert_array_equal, assert_allclose
 
 pd = pytest.importorskip("pandas")
 
@@ -27,3 +29,58 @@ def test_normalise_pandas():
     )
 
     pd.testing.assert_frame_equal(df, df_true)
+
+
+def test_example_pandas():
+    """Corresponds to ``examples/plot_pandas.py``."""
+    df = pd.DataFrame(
+        dict(
+            obj1=[1, 2, 3, 4, 5],
+            obj2=[5, 4, 3, 2, 1],
+            obj3=[100, 200, 200, 300, 100],
+            algo=2 * ["foo"] + 2 * ["bar"] + ["foo"],
+        )
+    )
+    obj_cols = ["obj1", "obj2", "obj3"]
+
+    df[obj_cols] = moocore.normalise(df[obj_cols], to_range=[1, 2])
+
+    ref = 2.1
+    hv = (
+        df.groupby("algo")[obj_cols]
+        .apply(moocore.hypervolume, ref=ref)
+        .reset_index(name="hv")
+    )
+    pd.testing.assert_frame_equal(
+        hv, pd.DataFrame(dict(algo=["bar", "foo"], hv=[0.22475, 0.34350]))
+    )
+
+    hv = moocore.apply_within_sets(
+        df[obj_cols], df["algo"], moocore.hypervolume, ref=ref
+    )
+    assert_allclose(hv, [0.3435, 0.22475])
+
+    df = pd.DataFrame(
+        dict(
+            algo=["a"] * 3 + ["b"] * 3 + ["a", "b"] * 2,
+            run=[1, 1, 2, 1, 1, 2, 2, 2, 1, 1],
+            obj1=[1, 2, 3, 4, 5, 6, 5, 4, 3, 1],
+            obj2=[6, 5, 4, 3, 2, 1, 5, 4, 5, 6],
+            obj3=[1, 2, 3, 4, 5, 6, 6, 7, 5, 2],
+        )
+    )
+    pd.testing.assert_frame_equal(
+        df.groupby(["algo", "run"])[obj_cols]
+        .apply(moocore.filter_dominated)
+        .reset_index(level=["algo", "run"]),
+        df.iloc[[0, 1, 2, 3, 4, 9, 5, 7], :],
+    )
+
+    sets = df["algo"].astype(str) + "-" + df["run"].astype(str)
+    is_nondom = moocore.is_nondominated_within_sets(df[obj_cols], sets=sets)
+    assert_array_equal(
+        is_nondom,
+        np.array(
+            [True, True, False, True, False, True, True, True, True, True]
+        ),
+    )
diff --git a/python/tox.ini b/python/tox.ini
@@ -11,7 +11,7 @@ package = wheel
 wheel_build_env = .pkg
 deps =
     numpy<2
-    pandas
+    pandas>=2
     pytest>=7
     cov: coverage[toml]
     cov: gcovr
@@ -25,7 +25,7 @@ package = wheel
 wheel_build_env = .pkg
 deps =
     numpy>=2
-    pandas
+    pandas>=2
     pytest>=7
     cov: coverage[toml]
     cov: gcovr