Skip to content

Commit

Permalink
Reduce code complexity for unit tests of edge correlations
Browse files Browse the repository at this point in the history
  • Loading branch information
NilsWinter committed Aug 2, 2024
1 parent 58891fc commit ff9868c
Showing 1 changed file with 29 additions and 60 deletions.
89 changes: 29 additions & 60 deletions tests/test_edge_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pandas as pd
import pingouin as pg

from scipy.stats import pearsonr, spearmanr, t
from scipy.stats import pearsonr, spearmanr

from cpm.simulate_data import simulate_data
from cpm.edge_selection import (pearson_correlation_with_pvalues, spearman_correlation_with_pvalues,
Expand All @@ -16,81 +16,50 @@ def setUp(self):
super(TestEdgeStatistics, self).setUp()
self.X, self.y, self.covariates = simulate_data(n_samples=100, n_features=45)

def test_cpm_pearson(self):
"""Test CPM implementation of Pearson correlation with p-values"""
cpm_r, cpm_p = pearson_correlation_with_pvalues(self.y, self.X)
scipy_r = list()
scipy_p = list()
for feature in range(self.X.shape[1]):
c = pearsonr(self.X[:, feature], self.y)
scipy_r.append(c.correlation)
scipy_p.append(c.pvalue)
scipy_r = np.array(scipy_r)
scipy_p = np.array(scipy_p)
np.testing.assert_almost_equal(scipy_r, cpm_r, decimal=10)
np.testing.assert_almost_equal(scipy_p, cpm_p, decimal=10)
def _test_correlation(self, method, cpm_func, scipy_func):
"""Generalized test for correlation with p-values"""
cpm_r, cpm_p = cpm_func(self.y, self.X)
scipy_r, scipy_p = [], []

def test_cpm_spearman(self):
"""Test CPM implementation of Spearman correlation with p-values"""
cpm_r, cpm_p = spearman_correlation_with_pvalues(self.y, self.X)
scipy_r = list()
scipy_p = list()
for feature in range(self.X.shape[1]):
c = spearmanr(self.X[:, feature], self.y)
scipy_r.append(c.statistic)
c = scipy_func(self.X[:, feature], self.y)
scipy_r.append(c.correlation if method == 'pearson' else c.statistic)
scipy_p.append(c.pvalue)
scipy_r = np.array(scipy_r)
scipy_p = np.array(scipy_p)
np.testing.assert_almost_equal(scipy_r, cpm_r, decimal=10)
np.testing.assert_almost_equal(scipy_p, cpm_p, decimal=10)

def test_semi_partial_correlation_pearson(self):
# Calculate partial correlation using the provided function
partial_corr, p_values = semi_partial_correlation_pearson(self.y, self.X, self.covariates)

# Calculate partial correlation using pingouin
df = pd.DataFrame(np.column_stack([self.y, self.X, self.covariates]),
columns=["y"] + [f"x{i}" for i in range(self.X.shape[1])] + [f"cov{i}" for i in range(self.covariates.shape[1])])
pcorr_pingouin = []
pval_pingouin = []
for i in range(self.X.shape[1]):
result = pg.partial_corr(data=df, x="y", y=f"x{i}", covar=[f"cov{j}" for j in range(self.covariates.shape[1])], method='pearson')
pcorr_pingouin.append(result['r'].values[0])
pval_pingouin.append(result['p-val'].values[0])

# Convert to numpy arrays for easier comparison
pcorr_pingouin = np.array(pcorr_pingouin)
pval_pingouin = np.array(pval_pingouin)
np.testing.assert_almost_equal(np.array(scipy_r), cpm_r, decimal=10)
np.testing.assert_almost_equal(np.array(scipy_p), cpm_p, decimal=10)

# Assert that the partial correlation results are almost equal between the two methods
np.testing.assert_almost_equal(partial_corr, pcorr_pingouin, decimal=10)
def test_cpm_pearson(self):
self._test_correlation('pearson', pearson_correlation_with_pvalues, pearsonr)

# Assert that the p-values results are almost equal between the two methods
np.testing.assert_almost_equal(p_values, pval_pingouin, decimal=10)
def test_cpm_spearman(self):
self._test_correlation('spearman', spearman_correlation_with_pvalues, spearmanr)

def test_semi_partial_correlation_spearman(self):
def _test_semi_partial_correlation(self, method, func):
# Calculate partial correlation using the provided function
partial_corr, p_values = semi_partial_correlation_spearman(self.y, self.X, self.covariates)
partial_corr, p_values = func(self.y, self.X, self.covariates)

# Calculate partial correlation using pingouin
# Prepare DataFrame
df = pd.DataFrame(np.column_stack([self.y, self.X, self.covariates]),
columns=["y"] + [f"x{i}" for i in range(self.X.shape[1])] + [f"cov{i}" for i in range(self.covariates.shape[1])])
pcorr_pingouin = []
pval_pingouin = []
columns=["y"] + [f"x{i}" for i in range(self.X.shape[1])] + [f"cov{i}" for i in
range(self.covariates.shape[1])])
pcorr_pingouin, pval_pingouin = [], []

for i in range(self.X.shape[1]):
result = pg.partial_corr(data=df, x="y", y=f"x{i}", covar=[f"cov{j}" for j in range(self.covariates.shape[1])], method='spearman')
result = pg.partial_corr(data=df, x="y", y=f"x{i}",
covar=[f"cov{j}" for j in range(self.covariates.shape[1])],
method=method)
pcorr_pingouin.append(result['r'].values[0])
pval_pingouin.append(result['p-val'].values[0])

# Convert to numpy arrays for easier comparison
pcorr_pingouin = np.array(pcorr_pingouin)
pval_pingouin = np.array(pval_pingouin)
np.testing.assert_almost_equal(partial_corr, np.array(pcorr_pingouin), decimal=10)
np.testing.assert_almost_equal(p_values, np.array(pval_pingouin), decimal=10)

# Assert that the partial correlation results are almost equal between the two methods
np.testing.assert_almost_equal(partial_corr, pcorr_pingouin, decimal=10)
def test_semi_partial_correlation_pearson(self):
self._test_semi_partial_correlation('pearson', semi_partial_correlation_pearson)

# Assert that the p-values results are almost equal between the two methods
np.testing.assert_almost_equal(p_values, pval_pingouin, decimal=10)
def test_semi_partial_correlation_spearman(self):
self._test_semi_partial_correlation('spearman', semi_partial_correlation_spearman)


if __name__ == '__main__':
Expand Down

0 comments on commit ff9868c

Please sign in to comment.