✨ lint src files with flake8 - might introduce regression errors

- set max-line-length to 120
RasmussenLab · May 16, 2024 · 3ff9842 · 3ff9842
1 parent 439c8cf
commit 3ff9842
Show file tree

Hide file tree

Showing 14 changed files with 102 additions and 74 deletions.
diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
@@ -10,9 +10,27 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - uses: psf/black@stable 
+  lint:
+    name: Lint with flake8
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - name: Install flake8
+        run: pip install flake8 flake8-bugbear
+      - name: Lint with flake8  
+        run: flake8 src
+
   publish:
     name: Publish package
     runs-on: ubuntu-latest
+    if: startsWith(github.ref, 'refs/tags')
+    needs:
+      - format
+      - lint
     steps:
       - name: Checkout
         uses: actions/checkout@v4
@@ -24,8 +42,7 @@ jobs:
         run: python -m pip install --upgrade twine build
       - name: Build
         run: python -m build
-      - name: Publish package
-        if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
+      - name: Publish package  
         uses: pypa/gh-action-pypi-publish@release/v1
         with:
           user: __token__

diff --git a/setup.cfg b/setup.cfg
@@ -34,3 +34,7 @@ where = src
 [options.entry_points]
 console_scripts =
     move-dl=move.__main__:main
+
+[flake8]
+max-line-length = 120
+aggressive = 2
diff --git a/src/move/__init__.py b/src/move/__init__.py
@@ -1,11 +1,10 @@
 from __future__ import annotations
+from move.training.training_loop import training_loop
+from move.models.vae import VAE
+from move import conf, data, models
 
 __license__ = "MIT"
 __version__ = (1, 4, 10)
 __all__ = ["conf", "data", "models", "training_loop", "VAE"]
 
 HYDRA_VERSION_BASE = "1.2"
-
-from move import conf, data, models
-from move.models.vae import VAE
-from move.training.training_loop import training_loop
diff --git a/src/move/data/perturbations.py b/src/move/data/perturbations.py
@@ -11,7 +11,6 @@
 from move.data.preprocessing import feature_stats
 from move.visualization.dataset_distributions import plot_value_distributions
 
-
 ContinuousPerturbationType = Literal["minimum", "maximum", "plus_std", "minus_std"]
 
 
@@ -42,7 +41,7 @@ def perturb_categorical_data(
     splits = np.cumsum(
         [0] + [int.__mul__(*shape) for shape in baseline_dataset.cat_shapes]
     )
-    slice_ = slice(*splits[target_idx : target_idx + 2])
+    slice_ = slice(*splits[target_idx: target_idx + 2])
 
     target_shape = baseline_dataset.cat_shapes[target_idx]
     num_features = target_shape[0]  # CHANGE
@@ -94,7 +93,7 @@ def perturb_continuous_data(
 
     target_idx = con_dataset_names.index(target_dataset_name)
     splits = np.cumsum([0] + baseline_dataset.con_shapes)
-    slice_ = slice(*splits[target_idx : target_idx + 2])
+    slice_ = slice(*splits[target_idx: target_idx + 2])
 
     num_features = baseline_dataset.con_shapes[target_idx]
 
@@ -155,7 +154,7 @@ def perturb_continuous_data_extended(
 
     target_idx = con_dataset_names.index(target_dataset_name)  # dataset index
     splits = np.cumsum([0] + baseline_dataset.con_shapes)
-    slice_ = slice(*splits[target_idx : target_idx + 2])
+    slice_ = slice(*splits[target_idx: target_idx + 2])
 
     num_features = baseline_dataset.con_shapes[target_idx]
     dataloaders = []

diff --git a/src/move/models/vae.py b/src/move/models/vae.py
@@ -43,7 +43,7 @@ def __init__(
         continuous_shapes: Optional[list[int]] = None,
         categorical_weights: Optional[list[int]] = None,
         continuous_weights: Optional[list[int]] = None,
-        num_hidden: list[int] = [200, 200],
+        num_hidden: list[int] = (200, 200),
         num_latent: int = 20,
         beta: float = 0.01,
         dropout: float = 0.2,
@@ -99,11 +99,11 @@ def __init__(
 
         # Initialize simple attributes
         self.beta = beta
-        self.num_hidden = num_hidden
+        self.num_hidden = list(num_hidden)
         self.num_latent = num_latent
         self.dropout = dropout
 
-        self.device = torch.device("cuda" if cuda == True else "cpu")
+        self.device = torch.device("cuda" if cuda else "cpu")
 
         # Activation functions
         self.relu = nn.LeakyReLU()
@@ -116,7 +116,7 @@ def __init__(
         self.decoderlayers = nn.ModuleList()
         self.decodernorms = nn.ModuleList()
 
-        ### Layers
+        # Layers
         # Hidden layers
         for nin, nout in zip([self.input_size] + self.num_hidden, self.num_hidden):
             self.encoderlayers.append(nn.Linear(nin, nout))
@@ -190,7 +190,7 @@ def decompose_categorical(self, reconstruction: torch.Tensor) -> list[torch.Tens
         cat_out = []
         pos = 0
         for cat_shape in self.categorical_shapes:
-            cat_dataset = cat_tmp[:, pos : (cat_shape[0] * cat_shape[1] + pos)]
+            cat_dataset = cat_tmp[:, pos: (cat_shape[0] * cat_shape[1] + pos)]
 
             cat_out_tmp = cat_dataset.view(
                 cat_dataset.shape[0], cat_shape[0], cat_shape[1]
@@ -287,7 +287,7 @@ def calculate_cat_error(
         cat_errors = []
         pos = 0
         for cat_shape in self.categorical_shapes:
-            cat_dataset = cat_in[:, pos : (cat_shape[0] * cat_shape[1] + pos)]
+            cat_dataset = cat_in[:, pos: (cat_shape[0] * cat_shape[1] + pos)]
 
             cat_dataset = cat_dataset.view(cat_in.shape[0], cat_shape[0], cat_shape[1])
             cat_target = cat_dataset
@@ -327,8 +327,8 @@ def calculate_con_error(
         total_shape = 0
         con_errors_list: list[torch.Tensor] = []
         for s in self.continuous_shapes:
-            c_in = con_in[:, total_shape : (s + total_shape - 1)]
-            c_re = con_out[:, total_shape : (s + total_shape - 1)]
+            c_in = con_in[:, total_shape: (s + total_shape - 1)]
+            c_re = con_out[:, total_shape: (s + total_shape - 1)]
             error = loss(c_re, c_in) / batch_size
             con_errors_list.append(error)
             total_shape += s
@@ -451,7 +451,7 @@ def encoding(
             elif self.num_continuous > 0:
                 tensor = con
             else:
-                assert False, "Must have at least 1 categorial or 1 continuous feature"
+                raise ValueError("Must have at least 1 categorial or 1 continuous feature")
 
             optimizer.zero_grad()
 
@@ -538,21 +538,21 @@ def get_cat_recon(
         shape_1 = 0
         for cat_shape in self.categorical_shapes:
             # Get input categorical data
-            cat_in_tmp = cat[:, pos : (cat_shape[0] * cat_shape[1] + pos)]
+            cat_in_tmp = cat[:, pos: (cat_shape[0] * cat_shape[1] + pos)]
             cat_in_tmp = cat_in_tmp.view(cat.shape[0], cat_shape[0], cat_shape[1])
 
             # Calculate target values for input
             cat_target_tmp = cat_in_tmp
             cat_target_tmp = torch.argmax(cat_target_tmp.detach(), dim=2)
             cat_target_tmp[cat_in_tmp.sum(dim=2) == 0] = -1
-            cat_target[:, shape_1 : (cat_shape[0] + shape_1)] = (
+            cat_target[:, shape_1: (cat_shape[0] + shape_1)] = (
                 cat_target_tmp  # .numpy()
             )
 
             # Get reconstructed categorical data
             cat_out_tmp = cat_out[count]
             cat_out_tmp = cat_out_tmp.transpose(1, 2)
-            cat_out_class[:, shape_1 : (cat_shape[0] + shape_1)] = torch.argmax(
+            cat_out_class[:, shape_1: (cat_shape[0] + shape_1)] = torch.argmax(
                 cat_out_tmp, dim=2
             )  # .numpy()
 
@@ -694,7 +694,7 @@ def latent(
             elif self.num_continuous > 0:
                 tensor = con
             else:
-                assert False, "Must have at least 1 categorial or 1 continuous feature"
+                raise ValueError("Must have at least 1 categorial or 1 continuous feature")
 
             # Evaluate
             cat_out, con_out, mu, logvar = self(tensor)
@@ -713,14 +713,14 @@ def latent(
                 cat_out_class, cat_target = self.get_cat_recon(
                     batch, cat_total_shape, cat, cat_out
                 )
-                cat_recon[row : row + len(cat_out_class)] = torch.Tensor(cat_out_class)
-                cat_class[row : row + len(cat_target)] = torch.Tensor(cat_target)
+                cat_recon[row: row + len(cat_out_class)] = torch.Tensor(cat_out_class)
+                cat_class[row: row + len(cat_target)] = torch.Tensor(cat_target)
 
             if self.num_continuous > 0:
-                con_recon[row : row + len(con_out)] = con_out
+                con_recon[row: row + len(con_out)] = con_out
 
-            latent_var[row : row + len(logvar)] = logvar
-            latent[row : row + len(mu)] = mu
+            latent_var[row: row + len(logvar)] = logvar
+            latent[row: row + len(mu)] = mu
             row += len(mu)
 
         test_loss /= len(dataloader)

diff --git a/src/move/tasks/analyze_latent.py b/src/move/tasks/analyze_latent.py
@@ -48,17 +48,17 @@ def find_feature_values(
         Tuple containing (1) index of dataset containing feature and (2)
         values corresponding to the feature
     """
-    dataset_index, feature_index = [None] * 2
-    for dataset_index, feature_names in enumerate(feature_names_lists):
+    _dataset_index, feature_index = [None] * 2
+    for _dataset_index, feature_names in enumerate(feature_names_lists):
         try:
             feature_index = feature_names.index(feature_name)
         except ValueError:
             continue
         break
-    if dataset_index is not None and feature_index is not None:
+    if _dataset_index is not None and feature_index is not None:
         return (
-            dataset_index,
-            np.take(feature_values[dataset_index], feature_index, axis=1),
+            _dataset_index,
+            np.take(feature_values[_dataset_index], feature_index, axis=1),
         )
     raise KeyError(f"Feature '{feature_name}' not in any dataset.")
 
@@ -98,7 +98,7 @@ def analyze_latent(config: MOVEConfig) -> None:
     df_index = pd.Index(sample_names, name="sample")
 
     assert task_config.model is not None
-    device = torch.device("cuda" if task_config.model.cuda == True else "cpu")
+    device = torch.device("cuda" if task_config.model.cuda else "cpu")
     model: VAE = hydra.utils.instantiate(
         task_config.model,
         continuous_shapes=test_dataset.con_shapes,

diff --git a/src/move/tasks/identify_associations.py b/src/move/tasks/identify_associations.py
@@ -14,7 +14,6 @@
 from torch.utils.data import DataLoader
 
 from move.analysis.metrics import get_2nd_order_polynomial
-
 from move.conf.schema import (
     IdentifyAssociationsBayesConfig,
     IdentifyAssociationsConfig,
@@ -202,7 +201,7 @@ def _bayes_approach(
 ) -> tuple[Union[IntArray, FloatArray], ...]:
 
     assert task_config.model is not None
-    device = torch.device("cuda" if task_config.model.cuda == True else "cpu")
+    device = torch.device("cuda" if task_config.model.cuda else "cpu")
 
     # Train models
     logger = get_logger(__name__)
@@ -319,7 +318,7 @@ def _ttest_approach(
     from scipy.stats import ttest_rel
 
     assert task_config.model is not None
-    device = torch.device("cuda" if task_config.model.cuda == True else "cpu")
+    device = torch.device("cuda" if task_config.model.cuda else "cpu")
 
     # Train models
     logger = get_logger(__name__)
@@ -463,7 +462,7 @@ def _ks_approach(
     """
 
     assert task_config.model is not None
-    device = torch.device("cuda" if task_config.model.cuda == True else "cpu")
+    device = torch.device("cuda" if task_config.model.cuda else "cpu")
     figure_path = output_path / "figures"
     figure_path.mkdir(exist_ok=True, parents=True)
 
@@ -524,7 +523,7 @@ def _ks_approach(
         min_baseline = np.min(baseline_recon, axis=0)
         max_baseline = np.max(baseline_recon, axis=0)
 
-        ############ QC of feature's reconstruction ##############################
+        # QC of feature's reconstruction ##############################
         logger.debug("Calculating quality control of the feature reconstructions")
         # Correlation and slope for each feature's reconstruction
         feature_names = reduce(list.__add__, con_names)
@@ -549,7 +548,7 @@ def _ks_approach(
                     dpi=50,
                 )
 
-        ################## Calculate perturbed reconstruction and shifts #############################
+        # Calculate perturbed reconstruction and shifts #############################
         logger.debug("Computing KS scores")
 
         # Save original latent space for first refit:
@@ -646,7 +645,7 @@ def _ks_approach(
     qc_df = pd.DataFrame({"Feature names": feature_names})
     qc_df["slope"] = np.nanmean(slope, axis=0)
     qc_df["reconstruction_correlation"] = np.nanmean(rec_corr, axis=0)
-    qc_df.to_csv(output_path / f"QC_summary_KS.tsv", sep="\t", index=False)
+    qc_df.to_csv(output_path / "QC_summary_KS.tsv", sep="\t", index=False)
 
     # Return first idx associations: redefined for reasonable threshold
 
@@ -739,8 +738,8 @@ def identify_associations(config: MOVEConfig) -> None:
         2) Evaluate associations using bayes or ttest approach.
         3) Save results.
     """
-    #################### DATA PREPARATION ######################
-    ####### Read original data and create perturbed datasets####
+    # DATA PREPARATION ######################
+    # Read original data and create perturbed datasets####
 
     logger = get_logger(__name__)
     task_config = cast(IdentifyAssociationsConfig, config.task)
@@ -811,7 +810,7 @@ def identify_associations(config: MOVEConfig) -> None:
     num_perturbed = len(dataloaders) - 1  # P
     logger.debug(f"# perturbed features: {num_perturbed}")
 
-    ################# APPROACH EVALUATION ##########################
+    # APPROACH EVALUATION ##########################
 
     if task_type == "bayes":
         task_config = cast(IdentifyAssociationsBayesConfig, task_config)
@@ -870,7 +869,7 @@ def identify_associations(config: MOVEConfig) -> None:
     else:
         raise ValueError()
 
-    ###################### RESULTS ################################
+    # RESULTS ################################
     save_results(
         config,
         con_shapes,

diff --git a/src/move/tasks/tune_model.py b/src/move/tasks/tune_model.py
@@ -1,7 +1,6 @@
 __all__ = ["tune_model"]
 
 from pathlib import Path
-from random import shuffle
 from typing import Any, Literal, cast
 
 import hydra
@@ -26,7 +25,7 @@
     TuneModelStabilityConfig,
 )
 from move.core.logging import get_logger
-from move.core.typing import BoolArray, FloatArray
+from move.core.typing import BoolArray
 from move.data import io
 from move.data.dataloaders import MOVEDataset, make_dataloader, split_samples
 from move.models.vae import VAE
@@ -87,7 +86,7 @@ def tune_model(config: MOVEConfig) -> float:
     )
 
     assert task_config.model is not None
-    device = torch.device("cuda" if task_config.model.cuda == True else "cpu")
+    device = torch.device("cuda" if task_config.model.cuda is True else "cpu")
 
     def _tune_stability(
         task_config: TuneModelStabilityConfig,