Reduce code complexity for unit tests of edge correlations

wwu-mmll · Aug 2, 2024 · ff9868c · ff9868c
1 parent 58891fc
commit ff9868c
Showing 1 changed file with 29 additions and 60 deletions.
diff --git a/tests/test_edge_selection.py b/tests/test_edge_selection.py
@@ -4,7 +4,7 @@
 import pandas as pd
 import pingouin as pg
 
-from scipy.stats import pearsonr, spearmanr, t
+from scipy.stats import pearsonr, spearmanr
 
 from cpm.simulate_data import simulate_data
 from cpm.edge_selection import (pearson_correlation_with_pvalues, spearman_correlation_with_pvalues,
@@ -16,81 +16,50 @@ def setUp(self):
         super(TestEdgeStatistics, self).setUp()
         self.X, self.y, self.covariates = simulate_data(n_samples=100, n_features=45)
 
-    def test_cpm_pearson(self):
-        """Test CPM implementation of Pearson correlation with p-values"""
-        cpm_r, cpm_p = pearson_correlation_with_pvalues(self.y, self.X)
-        scipy_r = list()
-        scipy_p = list()
-        for feature in range(self.X.shape[1]):
-            c = pearsonr(self.X[:, feature], self.y)
-            scipy_r.append(c.correlation)
-            scipy_p.append(c.pvalue)
-        scipy_r = np.array(scipy_r)
-        scipy_p = np.array(scipy_p)
-        np.testing.assert_almost_equal(scipy_r, cpm_r, decimal=10)
-        np.testing.assert_almost_equal(scipy_p, cpm_p, decimal=10)
+    def _test_correlation(self, method, cpm_func, scipy_func):
+        """Generalized test for correlation with p-values"""
+        cpm_r, cpm_p = cpm_func(self.y, self.X)
+        scipy_r, scipy_p = [], []
 
-    def test_cpm_spearman(self):
-        """Test CPM implementation of Spearman correlation with p-values"""
-        cpm_r, cpm_p = spearman_correlation_with_pvalues(self.y, self.X)
-        scipy_r = list()
-        scipy_p = list()
         for feature in range(self.X.shape[1]):
-            c = spearmanr(self.X[:, feature], self.y)
-            scipy_r.append(c.statistic)
+            c = scipy_func(self.X[:, feature], self.y)
+            scipy_r.append(c.correlation if method == 'pearson' else c.statistic)
             scipy_p.append(c.pvalue)
-        scipy_r = np.array(scipy_r)
-        scipy_p = np.array(scipy_p)
-        np.testing.assert_almost_equal(scipy_r, cpm_r, decimal=10)
-        np.testing.assert_almost_equal(scipy_p, cpm_p, decimal=10)
-
-    def test_semi_partial_correlation_pearson(self):
-        # Calculate partial correlation using the provided function
-        partial_corr, p_values = semi_partial_correlation_pearson(self.y, self.X, self.covariates)
-
-        # Calculate partial correlation using pingouin
-        df = pd.DataFrame(np.column_stack([self.y, self.X, self.covariates]),
-                          columns=["y"] + [f"x{i}" for i in range(self.X.shape[1])] + [f"cov{i}" for i in range(self.covariates.shape[1])])
-        pcorr_pingouin = []
-        pval_pingouin = []
-        for i in range(self.X.shape[1]):
-            result = pg.partial_corr(data=df, x="y", y=f"x{i}", covar=[f"cov{j}" for j in range(self.covariates.shape[1])], method='pearson')
-            pcorr_pingouin.append(result['r'].values[0])
-            pval_pingouin.append(result['p-val'].values[0])
 
-        # Convert to numpy arrays for easier comparison
-        pcorr_pingouin = np.array(pcorr_pingouin)
-        pval_pingouin = np.array(pval_pingouin)
+        np.testing.assert_almost_equal(np.array(scipy_r), cpm_r, decimal=10)
+        np.testing.assert_almost_equal(np.array(scipy_p), cpm_p, decimal=10)
 
-        # Assert that the partial correlation results are almost equal between the two methods
-        np.testing.assert_almost_equal(partial_corr, pcorr_pingouin, decimal=10)
+    def test_cpm_pearson(self):
+        self._test_correlation('pearson', pearson_correlation_with_pvalues, pearsonr)
 
-        # Assert that the p-values results are almost equal between the two methods
-        np.testing.assert_almost_equal(p_values, pval_pingouin, decimal=10)
+    def test_cpm_spearman(self):
+        self._test_correlation('spearman', spearman_correlation_with_pvalues, spearmanr)
 
-    def test_semi_partial_correlation_spearman(self):
+    def _test_semi_partial_correlation(self, method, func):
         # Calculate partial correlation using the provided function
-        partial_corr, p_values = semi_partial_correlation_spearman(self.y, self.X, self.covariates)
+        partial_corr, p_values = func(self.y, self.X, self.covariates)
 
-        # Calculate partial correlation using pingouin
+        # Prepare DataFrame
         df = pd.DataFrame(np.column_stack([self.y, self.X, self.covariates]),
-                          columns=["y"] + [f"x{i}" for i in range(self.X.shape[1])] + [f"cov{i}" for i in range(self.covariates.shape[1])])
-        pcorr_pingouin = []
-        pval_pingouin = []
+                          columns=["y"] + [f"x{i}" for i in range(self.X.shape[1])] + [f"cov{i}" for i in
+                                                                                       range(self.covariates.shape[1])])
+        pcorr_pingouin, pval_pingouin = [], []
+
         for i in range(self.X.shape[1]):
-            result = pg.partial_corr(data=df, x="y", y=f"x{i}", covar=[f"cov{j}" for j in range(self.covariates.shape[1])], method='spearman')
+            result = pg.partial_corr(data=df, x="y", y=f"x{i}",
+                                     covar=[f"cov{j}" for j in range(self.covariates.shape[1])],
+                                     method=method)
             pcorr_pingouin.append(result['r'].values[0])
             pval_pingouin.append(result['p-val'].values[0])
 
-        # Convert to numpy arrays for easier comparison
-        pcorr_pingouin = np.array(pcorr_pingouin)
-        pval_pingouin = np.array(pval_pingouin)
+        np.testing.assert_almost_equal(partial_corr, np.array(pcorr_pingouin), decimal=10)
+        np.testing.assert_almost_equal(p_values, np.array(pval_pingouin), decimal=10)
 
-        # Assert that the partial correlation results are almost equal between the two methods
-        np.testing.assert_almost_equal(partial_corr, pcorr_pingouin, decimal=10)
+    def test_semi_partial_correlation_pearson(self):
+        self._test_semi_partial_correlation('pearson', semi_partial_correlation_pearson)
 
-        # Assert that the p-values results are almost equal between the two methods
-        np.testing.assert_almost_equal(p_values, pval_pingouin, decimal=10)
+    def test_semi_partial_correlation_spearman(self):
+        self._test_semi_partial_correlation('spearman', semi_partial_correlation_spearman)
 
 
 if __name__ == '__main__':