Skip to content

Commit

Permalink
python/ Require pandas>=2. Add example to tests. Improve Pandas example.
Browse files Browse the repository at this point in the history
  • Loading branch information
MLopez-Ibanez committed Oct 30, 2024
1 parent 9d04878 commit 1e9f854
Show file tree
Hide file tree
Showing 5 changed files with 111 additions and 44 deletions.
43 changes: 23 additions & 20 deletions python/examples/plot_pandas.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
"""Using moocore with Pandas
==========================
"""=========================
Using moocore with Pandas
=========================
This example shows how to use ``moocore`` functions with Pandas (https://pandas.pydata.org/).
This example shows how to use ``moocore`` functions with Pandas (https://pandas.pydata.org/). This example requires pandas version >= 2.0.0
"""

import pandas as pd
import moocore
import pandas as pd

print(f"pandas version: {pd.__version__}")

# %%
# First, we create a toy Pandas :class:`~pandas.DataFrame`.
Expand All @@ -22,26 +25,25 @@
df

# %%
# Now we normalize it being careful to replace the correct columns.
# Normalize it (only replace the objective columns!).

obj_cols = ["obj1", "obj2", "obj3"]
df[obj_cols] = moocore.normalise(df[obj_cols], to_range=[1, 2])
df

# %%
# Now we calculate the hypervolume for each ``algo`` using :meth:`~pandas.DataFrame.groupby` and :meth:`~pandas.core.groupby.DataFrameGroupBy.apply`.
# Calculate the hypervolume for each ``algo`` using :meth:`~pandas.DataFrame.groupby` and :meth:`~pandas.core.groupby.DataFrameGroupBy.apply`.

ref = 2.1
hv = (
df.groupby("algo")
.apply(moocore.hypervolume, ref=ref, include_groups=False)
df.groupby("algo")[obj_cols]
.apply(moocore.hypervolume, ref=ref)
.reset_index(name="hv")
)
hv

# %%
# We can also use

# Or we can just use:

hv = moocore.apply_within_sets(
df[obj_cols], df["algo"], moocore.hypervolume, ref=ref
Expand All @@ -50,7 +52,7 @@


# %%
# Note that :func:`moocore.apply_within_sets()` processes each group in
# :func:`moocore.apply_within_sets()` processes each group in
# order, even if the elements of the same group are not contiguous. That is, it
# processes the groups like :meth:`pandas.Series.unique` and not like
# :class:`set` or :func:`numpy.unique()`.
Expand All @@ -69,30 +71,31 @@
run=[1, 1, 2, 1, 1, 2, 2, 2, 1, 1],
)
)
obj_cols = ["obj1", "obj2", "obj3"]
df

# %%
# We can still use :meth:`~pandas.DataFrame.groupby` but we may need to reset and clean-up the index.

df.groupby(["algo", "run"]).apply(
moocore.filter_dominated, include_groups=False
).reset_index().drop(columns="level_2")
df.groupby(["algo", "run"])[obj_cols].apply(
moocore.filter_dominated
).reset_index(level=["algo", "run"])

# %%
# Or we can combine the multiple columns as one to define the sets.
# Or we can combine the multiple columns as one to define the sets:
#
sets = df["algo"].astype(str) + "-" + df["run"].astype(str)
sets

# %%
# Identify nondominated rows within each set.
# then identify nondominated rows within each set:
#
is_nondom = moocore.is_nondominated_within_sets(
df[["obj1", "obj2", "obj2"]], sets=sets
)
is_nondom = moocore.is_nondominated_within_sets(df[obj_cols], sets=sets)
is_nondom

# %%
# And use the boolean vector above to filter rows.
# And use the boolean vector above to filter rows:
#
df[is_nondom]

# %%
2 changes: 1 addition & 1 deletion python/requirements_dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,5 @@ jupyterlab
ipywidgets

# Gallery examples
pandas
pandas >=2.0.0
seaborn
49 changes: 28 additions & 21 deletions python/tests/test_moocore.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# ruff: noqa: D100, D101, D102, D103
import pytest
import numpy as np
from numpy.testing import assert_array_equal, assert_allclose
import math

import moocore
Expand Down Expand Up @@ -28,9 +29,11 @@ def check_testdata(testpath, expected_name, expected_shape):
test_datapath(f"expected_output/read_datasets/{expected_name}")
)

assert np.allclose(
testdata, check_data
), f"read_datasets does not produce expected array for file {testpath}"
assert_allclose(
testdata,
check_data,
err_msg=f"read_datasets does not produce expected array for file {testpath}",
)

test_names = [
"input1.dat",
Expand Down Expand Up @@ -153,7 +156,7 @@ def test_is_nondominated(test_datapath):
assert (
dominated
== [False, False, False, False, True, False, True, True, False, True]
).all
).all()
T = np.array(
[[1, 0, 1], [1, 1, 1], [0, 1, 1], [1, 0, 1], [1, 1, 0], [1, 1, 1]]
)
Expand All @@ -162,8 +165,8 @@ def test_is_nondominated(test_datapath):
non_dominated_weak = T[moocore.is_nondominated(T, keep_weakly=True)]
expct_nondom_weak = np.array([[1, 0, 1], [0, 1, 1], [1, 0, 1], [1, 1, 0]])

assert np.array_equal(non_dominated_weak, expct_nondom_weak)
assert np.array_equal(
assert_array_equal(non_dominated_weak, expct_nondom_weak)
assert_array_equal(
moocore.filter_dominated(T, keep_weakly=True), expct_nondom_weak
)

Expand All @@ -183,16 +186,16 @@ def test_is_nondominated(test_datapath):
expected_x_nondom = np.array(
[[0, 0, 1, 2], [10, 20, 0, 0], [20, 10, 0, 0]]
)
assert np.array_equal(x_nondom, expected_x_nondom)
assert np.array_equal(
assert_array_equal(x_nondom, expected_x_nondom)
assert_array_equal(
moocore.filter_dominated(x, maximise=True), expected_x_nondom
)
minmax = np.array([1, 2, 2, 1, 5, 6, 7, 5]).reshape((-1, 2))
assert np.array_equal(
assert_array_equal(
moocore.filter_dominated(minmax, maximise=[True, False]),
np.array([[2, 1], [7, 5]]),
)
assert np.array_equal(
assert_array_equal(
moocore.filter_dominated(minmax, maximise=[False, True]),
np.array([[1, 2], [5, 6]]),
)
Expand Down Expand Up @@ -222,8 +225,8 @@ def test_normalise():
# With default to_range = [0,1] - all columns should have their values normalised to same value
expected_outcome = np.tile(np.linspace(0, 1, num=6).reshape(6, -1), 3)

assert np.allclose(moocore.normalise(A), expected_outcome)
assert np.allclose(
assert_allclose(moocore.normalise(A), expected_outcome)
assert_allclose(
moocore.normalise(A, to_range=[0, 10]), 10 * expected_outcome
)
expected_with_bounds = np.transpose(
Expand All @@ -235,7 +238,7 @@ def test_normalise():
]
)
)
assert np.allclose(
assert_allclose(
moocore.normalise(A, upper=[25, 25, 25], lower=[0, 0, 0]),
expected_with_bounds,
)
Expand All @@ -244,8 +247,8 @@ def test_normalise():
A = np.array([[1.0, 2.0], [2.0, 1.0]])
A_copy = A.copy()
B = moocore.normalise(A)
assert np.allclose(A, A_copy)
assert np.allclose(B, np.array([[0.0, 1.0], [1.0, 0.0]]))
assert_allclose(A, A_copy)
assert_allclose(B, np.array([[0.0, 1.0], [1.0, 0.0]]))


def test_eaf(test_datapath):
Expand Down Expand Up @@ -280,12 +283,16 @@ def test_eaf(test_datapath):
assert (
eaf_test.shape == expected_eaf_result.shape
), f"Shapes of {test_name} and {expected_eaf_name} do not match"
assert np.allclose(
eaf_test, expected_eaf_result
), f"{expected_eaf_name} test failed"
assert np.allclose(
eaf_pct_test, expected_eaf_pct_result
), f"pct_{expected_eaf_name} test failed"
assert_allclose(
eaf_test,
expected_eaf_result,
err_msg=f"{expected_eaf_name} test failed",
)
assert_allclose(
eaf_pct_test,
expected_eaf_pct_result,
err_msg=f"pct_{expected_eaf_name} test failed",
)


# def test_eafdiff(test_datapath):
Expand Down
57 changes: 57 additions & 0 deletions python/tests/test_pandas.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# ruff: noqa: D100, D101, D102, D103
import pytest
import moocore
import numpy as np
from numpy.testing import assert_array_equal, assert_allclose

pd = pytest.importorskip("pandas")

Expand All @@ -27,3 +29,58 @@ def test_normalise_pandas():
)

pd.testing.assert_frame_equal(df, df_true)


def test_example_pandas():
"""Corresponds to ``examples/plot_pandas.py``."""
df = pd.DataFrame(
dict(
obj1=[1, 2, 3, 4, 5],
obj2=[5, 4, 3, 2, 1],
obj3=[100, 200, 200, 300, 100],
algo=2 * ["foo"] + 2 * ["bar"] + ["foo"],
)
)
obj_cols = ["obj1", "obj2", "obj3"]

df[obj_cols] = moocore.normalise(df[obj_cols], to_range=[1, 2])

ref = 2.1
hv = (
df.groupby("algo")[obj_cols]
.apply(moocore.hypervolume, ref=ref)
.reset_index(name="hv")
)
pd.testing.assert_frame_equal(
hv, pd.DataFrame(dict(algo=["bar", "foo"], hv=[0.22475, 0.34350]))
)

hv = moocore.apply_within_sets(
df[obj_cols], df["algo"], moocore.hypervolume, ref=ref
)
assert_allclose(hv, [0.3435, 0.22475])

df = pd.DataFrame(
dict(
algo=["a"] * 3 + ["b"] * 3 + ["a", "b"] * 2,
run=[1, 1, 2, 1, 1, 2, 2, 2, 1, 1],
obj1=[1, 2, 3, 4, 5, 6, 5, 4, 3, 1],
obj2=[6, 5, 4, 3, 2, 1, 5, 4, 5, 6],
obj3=[1, 2, 3, 4, 5, 6, 6, 7, 5, 2],
)
)
pd.testing.assert_frame_equal(
df.groupby(["algo", "run"])[obj_cols]
.apply(moocore.filter_dominated)
.reset_index(level=["algo", "run"]),
df.iloc[[0, 1, 2, 3, 4, 9, 5, 7], :],
)

sets = df["algo"].astype(str) + "-" + df["run"].astype(str)
is_nondom = moocore.is_nondominated_within_sets(df[obj_cols], sets=sets)
assert_array_equal(
is_nondom,
np.array(
[True, True, False, True, False, True, True, True, True, True]
),
)
4 changes: 2 additions & 2 deletions python/tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ package = wheel
wheel_build_env = .pkg
deps =
numpy<2
pandas
pandas>=2
pytest>=7
cov: coverage[toml]
cov: gcovr
Expand All @@ -25,7 +25,7 @@ package = wheel
wheel_build_env = .pkg
deps =
numpy>=2
pandas
pandas>=2
pytest>=7
cov: coverage[toml]
cov: gcovr
Expand Down

0 comments on commit 1e9f854

Please sign in to comment.