Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ignore_nan argument to concordance_cc() #43

Merged
merged 15 commits into from
May 22, 2023
34 changes: 22 additions & 12 deletions audmetric/core/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ def accuracy(
def concordance_cc(
truth: typing.Sequence[float],
prediction: typing.Sequence[float],
*,
ignore_nan: bool = False,
) -> float:
r"""Concordance correlation coefficient.

Expand All @@ -92,6 +94,10 @@ def concordance_cc(
Args:
truth: ground truth values
prediction: predicted values
ignore_nan: if ``True``
all samples that contain ``NaN``
in ``truth`` or ``prediction``
are ignored

Returns:
concordance correlation coefficient :math:`\in [-1, 1]`
Expand All @@ -101,7 +107,7 @@ def concordance_cc(

Examples:
>>> concordance_cc([0, 1, 2], [0, 1, 1])
0.6666666666666666
0.6666666666666665

"""
assert_equal_length(truth, prediction)
Expand All @@ -111,23 +117,27 @@ def concordance_cc(
if not isinstance(prediction, np.ndarray):
prediction = np.array(list(prediction))

if ignore_nan:
mask = ~(np.isnan(truth) | np.isnan(prediction))
truth = truth[mask]
prediction = prediction[mask]

if len(prediction) < 2:
return np.NaN

r = pearson_cc(prediction, truth)
x_mean = prediction.mean()
y_mean = truth.mean()
x_std = prediction.std()
y_std = truth.std()
denominator = (
x_std * x_std
+ y_std * y_std
+ (x_mean - y_mean) * (x_mean - y_mean)
)
length = prediction.size
mean_y = np.mean(truth)
mean_x = np.mean(prediction)
a = prediction - mean_x
b = truth - mean_y

numerator = 2 * np.dot(a, b)
denominator = np.dot(a, a) + np.dot(b, b) + length * (mean_x - mean_y) ** 2

if denominator == 0:
ccc = np.nan
else:
ccc = 2 * r * x_std * y_std / denominator
ccc = numerator / denominator

return float(ccc)

Expand Down
57 changes: 0 additions & 57 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,63 +233,6 @@ def test_event_error_rate(truth, prediction, eer):
)


@pytest.mark.parametrize('truth,prediction', [
(
np.random.randint(0, 10, size=5),
np.random.randint(0, 10, size=5),
),
(
pd.Series(np.random.randint(0, 10, size=5)).astype('Int64'),
pd.Series(np.random.randint(0, 10, size=5)).astype('Int64'),
),
(
np.random.randint(0, 10, size=1),
np.random.randint(0, 10, size=1),
),
(
np.random.randint(0, 10, size=10),
np.random.randint(0, 10, size=10),
),
(
np.random.randint(0, 2, size=100),
np.random.randint(0, 2, size=100),
),
(
np.array([]),
np.array([]),
),
(
np.zeros(10),
np.zeros(10),
),
])
def test_concordance_cc(truth, prediction):

ccc = audmetric.concordance_cc(truth, prediction)

prediction = np.array(list(prediction))
truth = np.array(list(truth))

if len(prediction) < 2:
ccc_expected = np.NaN
else:
denominator = (
prediction.std() ** 2
+ truth.std() ** 2
+ (prediction.mean() - truth.mean()) ** 2
)
if denominator == 0:
ccc_expected = np.NaN
else:
r = np.corrcoef(list(prediction), list(truth))[0][1]
ccc_expected = 2 * r * prediction.std() * truth.std() / denominator

np.testing.assert_almost_equal(
ccc,
ccc_expected,
)


@pytest.mark.parametrize('class_range,num_elements,to_string,percentage', [
([0, 10], 5, False, False),
([0, 10], 1, False, False),
Expand Down
179 changes: 179 additions & 0 deletions tests/test_concordance_cc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
import numpy as np
import pandas as pd
import pytest

import audmetric


def expected_ccc(truth, prediction):
r"""Expecte Concordance Correlation Coefficient.

This is a direct implementation of its math equation.

If only a single sample is given,
it should return NaN.

"""
prediction = np.array(list(prediction))
truth = np.array(list(truth))

if len(prediction) < 2:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we actually need those special cases where we return np.NaN or can we simplify the function now?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, forgot to remove this. We don't need this and it is now removed.

ccc = np.NaN
else:
denominator = (
prediction.std() ** 2
+ truth.std() ** 2
+ (prediction.mean() - truth.mean()) ** 2
)
if denominator == 0:
ccc = np.NaN
else:
r = np.corrcoef(list(prediction), list(truth))[0][1]
numerator = 2 * r * prediction.std() * truth.std()
ccc = numerator / denominator
return ccc


@pytest.mark.parametrize('ignore_nan', [True, False])
@pytest.mark.parametrize(
'truth, prediction',
[
# NOTE: this test assumes
# that all truth and prediction values
# do not contain NaN
(
np.random.randint(0, 10, size=5),
np.random.randint(0, 10, size=5),
),
(
pd.Series(np.random.randint(0, 10, size=5)).astype('Int64'),
pd.Series(np.random.randint(0, 10, size=5)).astype('Int64'),
),
(
np.random.randint(0, 10, size=1),
np.random.randint(0, 10, size=1),
),
(
np.random.randint(0, 10, size=10),
np.random.randint(0, 10, size=10),
),
(
np.random.randint(0, 2, size=100),
np.random.randint(0, 2, size=100),
),
(
np.array([]),
np.array([]),
),
(
np.zeros(10),
np.zeros(10),
),
]
)
def test_concordance_cc(truth, prediction, ignore_nan):

ccc = audmetric.concordance_cc(truth, prediction, ignore_nan=ignore_nan)

np.testing.assert_almost_equal(
ccc,
expected_ccc(truth, prediction),
)


@pytest.mark.parametrize(
'truth, prediction, ignore_nan, expected',
[
(
[],
[],
True,
np.NaN,
),
(
[],
[],
False,
np.NaN,
),
(
[0],
[0],
True,
np.NaN,
),
(
[0],
[0],
False,
np.NaN,
),
(
[0, np.NaN],
[0, np.NaN],
True,
np.NaN,
),
(
[0, np.NaN],
[0, np.NaN],
False,
np.NaN,
),
(
[0, 1, 2, 3],
[1, 2, 3, 4],
True,
expected_ccc([0, 1, 2, 3], [1, 2, 3, 4]),
),
(
[np.NaN, 1, 2, 3],
[np.NaN, 2, 3, 4],
True,
expected_ccc([1, 2, 3], [2, 3, 4]),
),
(
[np.NaN, 1, 2, 3],
[1, 2, 3, np.NaN],
True,
expected_ccc([1, 2], [2, 3]),
),
(
[0, np.NaN, 2, 3],
[1, 2, 3, 4],
True,
expected_ccc([0, 2, 3], [1, 3, 4]),
),
(
[0, 1, 2, 3],
[1, 2, np.NaN, 4],
True,
expected_ccc([0, 1, 3], [1, 2, 4]),
),
(
[np.NaN, np.NaN, 2, 3],
[1, 2, 3, np.NaN],
True,
expected_ccc([2], [3]),
),
(
[np.NaN, np.NaN, 2, 3],
[1, 2, 3, np.NaN],
False,
np.NaN,
),
]
)
def test_concordance_cc_ignore_nan(
truth,
prediction,
ignore_nan,
expected,
):

ccc = audmetric.concordance_cc(truth, prediction, ignore_nan=ignore_nan)

np.testing.assert_almost_equal(
ccc,
expected,
)