Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ignore_nan argument to concordance_cc() #43

Merged
merged 15 commits into from
May 22, 2023
34 changes: 22 additions & 12 deletions audmetric/core/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ def accuracy(
def concordance_cc(
truth: typing.Sequence[float],
prediction: typing.Sequence[float],
*,
ignore_nan: bool = False,
) -> float:
r"""Concordance correlation coefficient.

Expand All @@ -92,6 +94,10 @@ def concordance_cc(
Args:
truth: ground truth values
prediction: predicted values
ignore_nan: if ``True``
all samples that contain ``NaN``
in ``truth`` or ``prediction``
are ignored

Returns:
concordance correlation coefficient :math:`\in [-1, 1]`
Expand All @@ -101,7 +107,7 @@ def concordance_cc(

Examples:
>>> concordance_cc([0, 1, 2], [0, 1, 1])
0.6666666666666666
0.6666666666666665

"""
assert_equal_length(truth, prediction)
Expand All @@ -111,23 +117,27 @@ def concordance_cc(
if not isinstance(prediction, np.ndarray):
prediction = np.array(list(prediction))

if ignore_nan:
mask = ~(np.isnan(truth) | np.isnan(prediction))
truth = truth[mask]
prediction = prediction[mask]

if len(prediction) < 2:
return np.NaN

r = pearson_cc(prediction, truth)
x_mean = prediction.mean()
y_mean = truth.mean()
x_std = prediction.std()
y_std = truth.std()
denominator = (
x_std * x_std
+ y_std * y_std
+ (x_mean - y_mean) * (x_mean - y_mean)
)
length = prediction.size
mean_y = np.mean(truth)
mean_x = np.mean(prediction)
a = prediction - mean_x
b = truth - mean_y

numerator = 2 * np.dot(a, b)
denominator = np.dot(a, a) + np.dot(b, b) + length * (mean_x - mean_y) ** 2

if denominator == 0:
ccc = np.nan
else:
ccc = 2 * r * x_std * y_std / denominator
ccc = numerator / denominator

return float(ccc)

Expand Down
57 changes: 0 additions & 57 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,63 +233,6 @@ def test_event_error_rate(truth, prediction, eer):
)


@pytest.mark.parametrize('truth,prediction', [
(
np.random.randint(0, 10, size=5),
np.random.randint(0, 10, size=5),
),
(
pd.Series(np.random.randint(0, 10, size=5)).astype('Int64'),
pd.Series(np.random.randint(0, 10, size=5)).astype('Int64'),
),
(
np.random.randint(0, 10, size=1),
np.random.randint(0, 10, size=1),
),
(
np.random.randint(0, 10, size=10),
np.random.randint(0, 10, size=10),
),
(
np.random.randint(0, 2, size=100),
np.random.randint(0, 2, size=100),
),
(
np.array([]),
np.array([]),
),
(
np.zeros(10),
np.zeros(10),
),
])
def test_concordance_cc(truth, prediction):

ccc = audmetric.concordance_cc(truth, prediction)

prediction = np.array(list(prediction))
truth = np.array(list(truth))

if len(prediction) < 2:
ccc_expected = np.NaN
else:
denominator = (
prediction.std() ** 2
+ truth.std() ** 2
+ (prediction.mean() - truth.mean()) ** 2
)
if denominator == 0:
ccc_expected = np.NaN
else:
r = np.corrcoef(list(prediction), list(truth))[0][1]
ccc_expected = 2 * r * prediction.std() * truth.std() / denominator

np.testing.assert_almost_equal(
ccc,
ccc_expected,
)


@pytest.mark.parametrize('class_range,num_elements,to_string,percentage', [
([0, 10], 5, False, False),
([0, 10], 1, False, False),
Expand Down
122 changes: 122 additions & 0 deletions tests/test_concordance_cc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import numpy as np
import pandas as pd
import pytest

import audmetric


def expected_ccc(truth, prediction, ignore_nan):
r"""Expecte Concordance Correlation Coefficient.

This is a direct implementation of its math equation.

If only a single sample is given,
it should return NaN.

"""
prediction = np.array(list(prediction))
truth = np.array(list(truth))

if ignore_nan:
mask = ~(np.isnan(truth) | np.isnan(prediction))
truth = truth[mask]
prediction = prediction[mask]

if len(prediction) < 2:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we actually need those special cases where we return np.NaN or can we simplify the function now?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, forgot to remove this. We don't need this and it is now removed.

ccc = np.NaN
else:
denominator = (
prediction.std() ** 2
+ truth.std() ** 2
+ (prediction.mean() - truth.mean()) ** 2
)
if denominator == 0:
ccc = np.NaN
else:
r = np.corrcoef(list(prediction), list(truth))[0][1]
numerator = 2 * r * prediction.std() * truth.std()
ccc = numerator / denominator
return ccc


@pytest.mark.parametrize(
'truth, prediction, ignore_nan',
[
(
np.random.randint(0, 10, size=5),
np.random.randint(0, 10, size=5),
False,
),
(
pd.Series(np.random.randint(0, 10, size=5)).astype('Int64'),
pd.Series(np.random.randint(0, 10, size=5)).astype('Int64'),
False,
),
(
np.random.randint(0, 10, size=1),
np.random.randint(0, 10, size=1),
False,
),
(
np.random.randint(0, 10, size=10),
np.random.randint(0, 10, size=10),
False,
),
(
np.random.randint(0, 2, size=100),
np.random.randint(0, 2, size=100),
False,
),
(
np.array([]),
np.array([]),
False,
),
(
np.zeros(10),
np.zeros(10),
False,
),
(
[0, 1, 2, 3, 4, 5, 6, np.NaN],
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think in addition we should also add cases where np.NaN is in either truth or prediction and in both, but different locations.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I updated the tests and added now an additional test for different np.NaN locations and the possibility to specify the expected truth and prediction values after the mask is applied to avoid using the same code for masking in the test and the implementation.

[0, 2, 3, 5, 6, 7, 7, np.NaN],
False,
),
(
[0, 1, 2, 3, 4, 5, 6, np.NaN],
[0, 2, 3, 5, 6, 7, 7, np.NaN],
True,
),
]
)
def test_concordance_cc(truth, prediction, ignore_nan):

ccc = audmetric.concordance_cc(truth, prediction, ignore_nan=ignore_nan)

np.testing.assert_almost_equal(
ccc,
expected_ccc(truth, prediction, ignore_nan),
)


@pytest.mark.parametrize('ignore_nan', [True, False])
@pytest.mark.parametrize(
'truth, prediction',
[
(
[],
[],
),
(
[0],
[0],
),
(
[0, np.NaN],
[0, np.NaN],
),
]
)
def test_concordance_cc_expected_nan(truth, prediction, ignore_nan):
ccc = audmetric.concordance_cc(truth, prediction, ignore_nan=ignore_nan)
assert np.isnan(ccc)