diff --git a/python/examples/plot_pandas.py b/python/examples/plot_pandas.py index 4351e00c..c444b987 100644 --- a/python/examples/plot_pandas.py +++ b/python/examples/plot_pandas.py @@ -1,12 +1,15 @@ -"""Using moocore with Pandas -========================== +"""========================= +Using moocore with Pandas +========================= -This example shows how to use ``moocore`` functions with Pandas (https://pandas.pydata.org/). +This example shows how to use ``moocore`` functions with Pandas (https://pandas.pydata.org/). This example requires pandas version >= 2.0.0 """ -import pandas as pd import moocore +import pandas as pd + +print(f"pandas version: {pd.__version__}") # %% # First, we create a toy Pandas :class:`~pandas.DataFrame`. @@ -22,26 +25,25 @@ df # %% -# Now we normalize it being careful to replace the correct columns. +# Normalize it (only replace the objective columns!). obj_cols = ["obj1", "obj2", "obj3"] df[obj_cols] = moocore.normalise(df[obj_cols], to_range=[1, 2]) df # %% -# Now we calculate the hypervolume for each ``algo`` using :meth:`~pandas.DataFrame.groupby` and :meth:`~pandas.core.groupby.DataFrameGroupBy.apply`. +# Calculate the hypervolume for each ``algo`` using :meth:`~pandas.DataFrame.groupby` and :meth:`~pandas.core.groupby.DataFrameGroupBy.apply`. ref = 2.1 hv = ( - df.groupby("algo") - .apply(moocore.hypervolume, ref=ref, include_groups=False) + df.groupby("algo")[obj_cols] + .apply(moocore.hypervolume, ref=ref) .reset_index(name="hv") ) hv # %% -# We can also use - +# Or we can just use: hv = moocore.apply_within_sets( df[obj_cols], df["algo"], moocore.hypervolume, ref=ref @@ -50,7 +52,7 @@ # %% -# Note that :func:`moocore.apply_within_sets()` processes each group in +# :func:`moocore.apply_within_sets()` processes each group in # order, even if the elements of the same group are not contiguous. That is, it # processes the groups like :meth:`pandas.Series.unique` and not like # :class:`set` or :func:`numpy.unique()`. @@ -69,30 +71,31 @@ run=[1, 1, 2, 1, 1, 2, 2, 2, 1, 1], ) ) +obj_cols = ["obj1", "obj2", "obj3"] df # %% # We can still use :meth:`~pandas.DataFrame.groupby` but we may need to reset and clean-up the index. -df.groupby(["algo", "run"]).apply( - moocore.filter_dominated, include_groups=False -).reset_index().drop(columns="level_2") +df.groupby(["algo", "run"])[obj_cols].apply( + moocore.filter_dominated +).reset_index(level=["algo", "run"]) # %% -# Or we can combine the multiple columns as one to define the sets. +# Or we can combine the multiple columns as one to define the sets: # sets = df["algo"].astype(str) + "-" + df["run"].astype(str) sets # %% -# Identify nondominated rows within each set. +# then identify nondominated rows within each set: # -is_nondom = moocore.is_nondominated_within_sets( - df[["obj1", "obj2", "obj2"]], sets=sets -) +is_nondom = moocore.is_nondominated_within_sets(df[obj_cols], sets=sets) is_nondom # %% -# And use the boolean vector above to filter rows. +# And use the boolean vector above to filter rows: # df[is_nondom] + +# %% diff --git a/python/requirements_dev.txt b/python/requirements_dev.txt index 2a82bbad..1c60bcdc 100644 --- a/python/requirements_dev.txt +++ b/python/requirements_dev.txt @@ -26,5 +26,5 @@ jupyterlab ipywidgets # Gallery examples -pandas +pandas >=2.0.0 seaborn diff --git a/python/tests/test_moocore.py b/python/tests/test_moocore.py index 277412ff..e7987b48 100644 --- a/python/tests/test_moocore.py +++ b/python/tests/test_moocore.py @@ -1,6 +1,7 @@ # ruff: noqa: D100, D101, D102, D103 import pytest import numpy as np +from numpy.testing import assert_array_equal, assert_allclose import math import moocore @@ -28,9 +29,11 @@ def check_testdata(testpath, expected_name, expected_shape): test_datapath(f"expected_output/read_datasets/{expected_name}") ) - assert np.allclose( - testdata, check_data - ), f"read_datasets does not produce expected array for file {testpath}" + assert_allclose( + testdata, + check_data, + err_msg=f"read_datasets does not produce expected array for file {testpath}", + ) test_names = [ "input1.dat", @@ -153,7 +156,7 @@ def test_is_nondominated(test_datapath): assert ( dominated == [False, False, False, False, True, False, True, True, False, True] - ).all + ).all() T = np.array( [[1, 0, 1], [1, 1, 1], [0, 1, 1], [1, 0, 1], [1, 1, 0], [1, 1, 1]] ) @@ -162,8 +165,8 @@ def test_is_nondominated(test_datapath): non_dominated_weak = T[moocore.is_nondominated(T, keep_weakly=True)] expct_nondom_weak = np.array([[1, 0, 1], [0, 1, 1], [1, 0, 1], [1, 1, 0]]) - assert np.array_equal(non_dominated_weak, expct_nondom_weak) - assert np.array_equal( + assert_array_equal(non_dominated_weak, expct_nondom_weak) + assert_array_equal( moocore.filter_dominated(T, keep_weakly=True), expct_nondom_weak ) @@ -183,16 +186,16 @@ def test_is_nondominated(test_datapath): expected_x_nondom = np.array( [[0, 0, 1, 2], [10, 20, 0, 0], [20, 10, 0, 0]] ) - assert np.array_equal(x_nondom, expected_x_nondom) - assert np.array_equal( + assert_array_equal(x_nondom, expected_x_nondom) + assert_array_equal( moocore.filter_dominated(x, maximise=True), expected_x_nondom ) minmax = np.array([1, 2, 2, 1, 5, 6, 7, 5]).reshape((-1, 2)) - assert np.array_equal( + assert_array_equal( moocore.filter_dominated(minmax, maximise=[True, False]), np.array([[2, 1], [7, 5]]), ) - assert np.array_equal( + assert_array_equal( moocore.filter_dominated(minmax, maximise=[False, True]), np.array([[1, 2], [5, 6]]), ) @@ -222,8 +225,8 @@ def test_normalise(): # With default to_range = [0,1] - all columns should have their values normalised to same value expected_outcome = np.tile(np.linspace(0, 1, num=6).reshape(6, -1), 3) - assert np.allclose(moocore.normalise(A), expected_outcome) - assert np.allclose( + assert_allclose(moocore.normalise(A), expected_outcome) + assert_allclose( moocore.normalise(A, to_range=[0, 10]), 10 * expected_outcome ) expected_with_bounds = np.transpose( @@ -235,7 +238,7 @@ def test_normalise(): ] ) ) - assert np.allclose( + assert_allclose( moocore.normalise(A, upper=[25, 25, 25], lower=[0, 0, 0]), expected_with_bounds, ) @@ -244,8 +247,8 @@ def test_normalise(): A = np.array([[1.0, 2.0], [2.0, 1.0]]) A_copy = A.copy() B = moocore.normalise(A) - assert np.allclose(A, A_copy) - assert np.allclose(B, np.array([[0.0, 1.0], [1.0, 0.0]])) + assert_allclose(A, A_copy) + assert_allclose(B, np.array([[0.0, 1.0], [1.0, 0.0]])) def test_eaf(test_datapath): @@ -280,12 +283,16 @@ def test_eaf(test_datapath): assert ( eaf_test.shape == expected_eaf_result.shape ), f"Shapes of {test_name} and {expected_eaf_name} do not match" - assert np.allclose( - eaf_test, expected_eaf_result - ), f"{expected_eaf_name} test failed" - assert np.allclose( - eaf_pct_test, expected_eaf_pct_result - ), f"pct_{expected_eaf_name} test failed" + assert_allclose( + eaf_test, + expected_eaf_result, + err_msg=f"{expected_eaf_name} test failed", + ) + assert_allclose( + eaf_pct_test, + expected_eaf_pct_result, + err_msg=f"pct_{expected_eaf_name} test failed", + ) # def test_eafdiff(test_datapath): diff --git a/python/tests/test_pandas.py b/python/tests/test_pandas.py index d81ee230..85947c3d 100644 --- a/python/tests/test_pandas.py +++ b/python/tests/test_pandas.py @@ -1,6 +1,8 @@ # ruff: noqa: D100, D101, D102, D103 import pytest import moocore +import numpy as np +from numpy.testing import assert_array_equal, assert_allclose pd = pytest.importorskip("pandas") @@ -27,3 +29,58 @@ def test_normalise_pandas(): ) pd.testing.assert_frame_equal(df, df_true) + + +def test_example_pandas(): + """Corresponds to ``examples/plot_pandas.py``.""" + df = pd.DataFrame( + dict( + obj1=[1, 2, 3, 4, 5], + obj2=[5, 4, 3, 2, 1], + obj3=[100, 200, 200, 300, 100], + algo=2 * ["foo"] + 2 * ["bar"] + ["foo"], + ) + ) + obj_cols = ["obj1", "obj2", "obj3"] + + df[obj_cols] = moocore.normalise(df[obj_cols], to_range=[1, 2]) + + ref = 2.1 + hv = ( + df.groupby("algo")[obj_cols] + .apply(moocore.hypervolume, ref=ref) + .reset_index(name="hv") + ) + pd.testing.assert_frame_equal( + hv, pd.DataFrame(dict(algo=["bar", "foo"], hv=[0.22475, 0.34350])) + ) + + hv = moocore.apply_within_sets( + df[obj_cols], df["algo"], moocore.hypervolume, ref=ref + ) + assert_allclose(hv, [0.3435, 0.22475]) + + df = pd.DataFrame( + dict( + algo=["a"] * 3 + ["b"] * 3 + ["a", "b"] * 2, + run=[1, 1, 2, 1, 1, 2, 2, 2, 1, 1], + obj1=[1, 2, 3, 4, 5, 6, 5, 4, 3, 1], + obj2=[6, 5, 4, 3, 2, 1, 5, 4, 5, 6], + obj3=[1, 2, 3, 4, 5, 6, 6, 7, 5, 2], + ) + ) + pd.testing.assert_frame_equal( + df.groupby(["algo", "run"])[obj_cols] + .apply(moocore.filter_dominated) + .reset_index(level=["algo", "run"]), + df.iloc[[0, 1, 2, 3, 4, 9, 5, 7], :], + ) + + sets = df["algo"].astype(str) + "-" + df["run"].astype(str) + is_nondom = moocore.is_nondominated_within_sets(df[obj_cols], sets=sets) + assert_array_equal( + is_nondom, + np.array( + [True, True, False, True, False, True, True, True, True, True] + ), + ) diff --git a/python/tox.ini b/python/tox.ini index 0a8f9dbd..d330aa5c 100644 --- a/python/tox.ini +++ b/python/tox.ini @@ -11,7 +11,7 @@ package = wheel wheel_build_env = .pkg deps = numpy<2 - pandas + pandas>=2 pytest>=7 cov: coverage[toml] cov: gcovr @@ -25,7 +25,7 @@ package = wheel wheel_build_env = .pkg deps = numpy>=2 - pandas + pandas>=2 pytest>=7 cov: coverage[toml] cov: gcovr