From ff9868c64e8c5827cb57149200adfdad0cc54782 Mon Sep 17 00:00:00 2001 From: Nils Winter Date: Fri, 2 Aug 2024 14:38:16 +0200 Subject: [PATCH] Reduce code complexity for unit tests of edge correlations --- tests/test_edge_selection.py | 89 ++++++++++++------------------------ 1 file changed, 29 insertions(+), 60 deletions(-) diff --git a/tests/test_edge_selection.py b/tests/test_edge_selection.py index 4c0e591..d44eadb 100644 --- a/tests/test_edge_selection.py +++ b/tests/test_edge_selection.py @@ -4,7 +4,7 @@ import pandas as pd import pingouin as pg -from scipy.stats import pearsonr, spearmanr, t +from scipy.stats import pearsonr, spearmanr from cpm.simulate_data import simulate_data from cpm.edge_selection import (pearson_correlation_with_pvalues, spearman_correlation_with_pvalues, @@ -16,81 +16,50 @@ def setUp(self): super(TestEdgeStatistics, self).setUp() self.X, self.y, self.covariates = simulate_data(n_samples=100, n_features=45) - def test_cpm_pearson(self): - """Test CPM implementation of Pearson correlation with p-values""" - cpm_r, cpm_p = pearson_correlation_with_pvalues(self.y, self.X) - scipy_r = list() - scipy_p = list() - for feature in range(self.X.shape[1]): - c = pearsonr(self.X[:, feature], self.y) - scipy_r.append(c.correlation) - scipy_p.append(c.pvalue) - scipy_r = np.array(scipy_r) - scipy_p = np.array(scipy_p) - np.testing.assert_almost_equal(scipy_r, cpm_r, decimal=10) - np.testing.assert_almost_equal(scipy_p, cpm_p, decimal=10) + def _test_correlation(self, method, cpm_func, scipy_func): + """Generalized test for correlation with p-values""" + cpm_r, cpm_p = cpm_func(self.y, self.X) + scipy_r, scipy_p = [], [] - def test_cpm_spearman(self): - """Test CPM implementation of Spearman correlation with p-values""" - cpm_r, cpm_p = spearman_correlation_with_pvalues(self.y, self.X) - scipy_r = list() - scipy_p = list() for feature in range(self.X.shape[1]): - c = spearmanr(self.X[:, feature], self.y) - scipy_r.append(c.statistic) + c = scipy_func(self.X[:, feature], self.y) + scipy_r.append(c.correlation if method == 'pearson' else c.statistic) scipy_p.append(c.pvalue) - scipy_r = np.array(scipy_r) - scipy_p = np.array(scipy_p) - np.testing.assert_almost_equal(scipy_r, cpm_r, decimal=10) - np.testing.assert_almost_equal(scipy_p, cpm_p, decimal=10) - - def test_semi_partial_correlation_pearson(self): - # Calculate partial correlation using the provided function - partial_corr, p_values = semi_partial_correlation_pearson(self.y, self.X, self.covariates) - - # Calculate partial correlation using pingouin - df = pd.DataFrame(np.column_stack([self.y, self.X, self.covariates]), - columns=["y"] + [f"x{i}" for i in range(self.X.shape[1])] + [f"cov{i}" for i in range(self.covariates.shape[1])]) - pcorr_pingouin = [] - pval_pingouin = [] - for i in range(self.X.shape[1]): - result = pg.partial_corr(data=df, x="y", y=f"x{i}", covar=[f"cov{j}" for j in range(self.covariates.shape[1])], method='pearson') - pcorr_pingouin.append(result['r'].values[0]) - pval_pingouin.append(result['p-val'].values[0]) - # Convert to numpy arrays for easier comparison - pcorr_pingouin = np.array(pcorr_pingouin) - pval_pingouin = np.array(pval_pingouin) + np.testing.assert_almost_equal(np.array(scipy_r), cpm_r, decimal=10) + np.testing.assert_almost_equal(np.array(scipy_p), cpm_p, decimal=10) - # Assert that the partial correlation results are almost equal between the two methods - np.testing.assert_almost_equal(partial_corr, pcorr_pingouin, decimal=10) + def test_cpm_pearson(self): + self._test_correlation('pearson', pearson_correlation_with_pvalues, pearsonr) - # Assert that the p-values results are almost equal between the two methods - np.testing.assert_almost_equal(p_values, pval_pingouin, decimal=10) + def test_cpm_spearman(self): + self._test_correlation('spearman', spearman_correlation_with_pvalues, spearmanr) - def test_semi_partial_correlation_spearman(self): + def _test_semi_partial_correlation(self, method, func): # Calculate partial correlation using the provided function - partial_corr, p_values = semi_partial_correlation_spearman(self.y, self.X, self.covariates) + partial_corr, p_values = func(self.y, self.X, self.covariates) - # Calculate partial correlation using pingouin + # Prepare DataFrame df = pd.DataFrame(np.column_stack([self.y, self.X, self.covariates]), - columns=["y"] + [f"x{i}" for i in range(self.X.shape[1])] + [f"cov{i}" for i in range(self.covariates.shape[1])]) - pcorr_pingouin = [] - pval_pingouin = [] + columns=["y"] + [f"x{i}" for i in range(self.X.shape[1])] + [f"cov{i}" for i in + range(self.covariates.shape[1])]) + pcorr_pingouin, pval_pingouin = [], [] + for i in range(self.X.shape[1]): - result = pg.partial_corr(data=df, x="y", y=f"x{i}", covar=[f"cov{j}" for j in range(self.covariates.shape[1])], method='spearman') + result = pg.partial_corr(data=df, x="y", y=f"x{i}", + covar=[f"cov{j}" for j in range(self.covariates.shape[1])], + method=method) pcorr_pingouin.append(result['r'].values[0]) pval_pingouin.append(result['p-val'].values[0]) - # Convert to numpy arrays for easier comparison - pcorr_pingouin = np.array(pcorr_pingouin) - pval_pingouin = np.array(pval_pingouin) + np.testing.assert_almost_equal(partial_corr, np.array(pcorr_pingouin), decimal=10) + np.testing.assert_almost_equal(p_values, np.array(pval_pingouin), decimal=10) - # Assert that the partial correlation results are almost equal between the two methods - np.testing.assert_almost_equal(partial_corr, pcorr_pingouin, decimal=10) + def test_semi_partial_correlation_pearson(self): + self._test_semi_partial_correlation('pearson', semi_partial_correlation_pearson) - # Assert that the p-values results are almost equal between the two methods - np.testing.assert_almost_equal(p_values, pval_pingouin, decimal=10) + def test_semi_partial_correlation_spearman(self): + self._test_semi_partial_correlation('spearman', semi_partial_correlation_spearman) if __name__ == '__main__':