diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml
index 1f7a8044..f0589f2f 100644
--- a/.github/workflows/release.yaml
+++ b/.github/workflows/release.yaml
@@ -10,9 +10,27 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - uses: psf/black@stable 
+  lint:
+    name: Lint with flake8
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - name: Install flake8
+        run: pip install flake8 flake8-bugbear
+      - name: Lint with flake8  
+        run: flake8 src
+
   publish:
     name: Publish package
     runs-on: ubuntu-latest
+    if: startsWith(github.ref, 'refs/tags')
+    needs:
+      - format
+      - lint
     steps:
       - name: Checkout
         uses: actions/checkout@v4
@@ -24,8 +42,7 @@ jobs:
         run: python -m pip install --upgrade twine build
       - name: Build
         run: python -m build
-      - name: Publish package
-        if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
+      - name: Publish package  
         uses: pypa/gh-action-pypi-publish@release/v1
         with:
           user: __token__
diff --git a/setup.cfg b/setup.cfg
index cb3ead97..88946402 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -34,3 +34,7 @@ where = src
 [options.entry_points]
 console_scripts =
     move-dl=move.__main__:main
+
+[flake8]
+max-line-length = 120
+aggressive = 2
\ No newline at end of file
diff --git a/src/move/__init__.py b/src/move/__init__.py
index a4afcdcb..ae9b8e45 100644
--- a/src/move/__init__.py
+++ b/src/move/__init__.py
@@ -1,11 +1,10 @@
 from __future__ import annotations
+from move.training.training_loop import training_loop
+from move.models.vae import VAE
+from move import conf, data, models
 
 __license__ = "MIT"
 __version__ = (1, 4, 10)
 __all__ = ["conf", "data", "models", "training_loop", "VAE"]
 
 HYDRA_VERSION_BASE = "1.2"
-
-from move import conf, data, models
-from move.models.vae import VAE
-from move.training.training_loop import training_loop
diff --git a/src/move/data/perturbations.py b/src/move/data/perturbations.py
index 4249428c..21ebddbf 100644
--- a/src/move/data/perturbations.py
+++ b/src/move/data/perturbations.py
@@ -11,7 +11,6 @@
 from move.data.preprocessing import feature_stats
 from move.visualization.dataset_distributions import plot_value_distributions
 
-
 ContinuousPerturbationType = Literal["minimum", "maximum", "plus_std", "minus_std"]
 
 
@@ -42,7 +41,7 @@ def perturb_categorical_data(
     splits = np.cumsum(
         [0] + [int.__mul__(*shape) for shape in baseline_dataset.cat_shapes]
     )
-    slice_ = slice(*splits[target_idx : target_idx + 2])
+    slice_ = slice(*splits[target_idx: target_idx + 2])
 
     target_shape = baseline_dataset.cat_shapes[target_idx]
     num_features = target_shape[0]  # CHANGE
@@ -94,7 +93,7 @@ def perturb_continuous_data(
 
     target_idx = con_dataset_names.index(target_dataset_name)
     splits = np.cumsum([0] + baseline_dataset.con_shapes)
-    slice_ = slice(*splits[target_idx : target_idx + 2])
+    slice_ = slice(*splits[target_idx: target_idx + 2])
 
     num_features = baseline_dataset.con_shapes[target_idx]
 
@@ -155,7 +154,7 @@ def perturb_continuous_data_extended(
 
     target_idx = con_dataset_names.index(target_dataset_name)  # dataset index
     splits = np.cumsum([0] + baseline_dataset.con_shapes)
-    slice_ = slice(*splits[target_idx : target_idx + 2])
+    slice_ = slice(*splits[target_idx: target_idx + 2])
 
     num_features = baseline_dataset.con_shapes[target_idx]
     dataloaders = []
diff --git a/src/move/models/vae.py b/src/move/models/vae.py
index cd42f7de..23de0124 100644
--- a/src/move/models/vae.py
+++ b/src/move/models/vae.py
@@ -43,7 +43,7 @@ def __init__(
         continuous_shapes: Optional[list[int]] = None,
         categorical_weights: Optional[list[int]] = None,
         continuous_weights: Optional[list[int]] = None,
-        num_hidden: list[int] = [200, 200],
+        num_hidden: list[int] = (200, 200),
         num_latent: int = 20,
         beta: float = 0.01,
         dropout: float = 0.2,
@@ -99,11 +99,11 @@ def __init__(
 
         # Initialize simple attributes
         self.beta = beta
-        self.num_hidden = num_hidden
+        self.num_hidden = list(num_hidden)
         self.num_latent = num_latent
         self.dropout = dropout
 
-        self.device = torch.device("cuda" if cuda == True else "cpu")
+        self.device = torch.device("cuda" if cuda else "cpu")
 
         # Activation functions
         self.relu = nn.LeakyReLU()
@@ -116,7 +116,7 @@ def __init__(
         self.decoderlayers = nn.ModuleList()
         self.decodernorms = nn.ModuleList()
 
-        ### Layers
+        # Layers
         # Hidden layers
         for nin, nout in zip([self.input_size] + self.num_hidden, self.num_hidden):
             self.encoderlayers.append(nn.Linear(nin, nout))
@@ -190,7 +190,7 @@ def decompose_categorical(self, reconstruction: torch.Tensor) -> list[torch.Tens
         cat_out = []
         pos = 0
         for cat_shape in self.categorical_shapes:
-            cat_dataset = cat_tmp[:, pos : (cat_shape[0] * cat_shape[1] + pos)]
+            cat_dataset = cat_tmp[:, pos: (cat_shape[0] * cat_shape[1] + pos)]
 
             cat_out_tmp = cat_dataset.view(
                 cat_dataset.shape[0], cat_shape[0], cat_shape[1]
@@ -287,7 +287,7 @@ def calculate_cat_error(
         cat_errors = []
         pos = 0
         for cat_shape in self.categorical_shapes:
-            cat_dataset = cat_in[:, pos : (cat_shape[0] * cat_shape[1] + pos)]
+            cat_dataset = cat_in[:, pos: (cat_shape[0] * cat_shape[1] + pos)]
 
             cat_dataset = cat_dataset.view(cat_in.shape[0], cat_shape[0], cat_shape[1])
             cat_target = cat_dataset
@@ -327,8 +327,8 @@ def calculate_con_error(
         total_shape = 0
         con_errors_list: list[torch.Tensor] = []
         for s in self.continuous_shapes:
-            c_in = con_in[:, total_shape : (s + total_shape - 1)]
-            c_re = con_out[:, total_shape : (s + total_shape - 1)]
+            c_in = con_in[:, total_shape: (s + total_shape - 1)]
+            c_re = con_out[:, total_shape: (s + total_shape - 1)]
             error = loss(c_re, c_in) / batch_size
             con_errors_list.append(error)
             total_shape += s
@@ -451,7 +451,7 @@ def encoding(
             elif self.num_continuous > 0:
                 tensor = con
             else:
-                assert False, "Must have at least 1 categorial or 1 continuous feature"
+                raise ValueError("Must have at least 1 categorial or 1 continuous feature")
 
             optimizer.zero_grad()
 
@@ -538,21 +538,21 @@ def get_cat_recon(
         shape_1 = 0
         for cat_shape in self.categorical_shapes:
             # Get input categorical data
-            cat_in_tmp = cat[:, pos : (cat_shape[0] * cat_shape[1] + pos)]
+            cat_in_tmp = cat[:, pos: (cat_shape[0] * cat_shape[1] + pos)]
             cat_in_tmp = cat_in_tmp.view(cat.shape[0], cat_shape[0], cat_shape[1])
 
             # Calculate target values for input
             cat_target_tmp = cat_in_tmp
             cat_target_tmp = torch.argmax(cat_target_tmp.detach(), dim=2)
             cat_target_tmp[cat_in_tmp.sum(dim=2) == 0] = -1
-            cat_target[:, shape_1 : (cat_shape[0] + shape_1)] = (
+            cat_target[:, shape_1: (cat_shape[0] + shape_1)] = (
                 cat_target_tmp  # .numpy()
             )
 
             # Get reconstructed categorical data
             cat_out_tmp = cat_out[count]
             cat_out_tmp = cat_out_tmp.transpose(1, 2)
-            cat_out_class[:, shape_1 : (cat_shape[0] + shape_1)] = torch.argmax(
+            cat_out_class[:, shape_1: (cat_shape[0] + shape_1)] = torch.argmax(
                 cat_out_tmp, dim=2
             )  # .numpy()
 
@@ -694,7 +694,7 @@ def latent(
             elif self.num_continuous > 0:
                 tensor = con
             else:
-                assert False, "Must have at least 1 categorial or 1 continuous feature"
+                raise ValueError("Must have at least 1 categorial or 1 continuous feature")
 
             # Evaluate
             cat_out, con_out, mu, logvar = self(tensor)
@@ -713,14 +713,14 @@ def latent(
                 cat_out_class, cat_target = self.get_cat_recon(
                     batch, cat_total_shape, cat, cat_out
                 )
-                cat_recon[row : row + len(cat_out_class)] = torch.Tensor(cat_out_class)
-                cat_class[row : row + len(cat_target)] = torch.Tensor(cat_target)
+                cat_recon[row: row + len(cat_out_class)] = torch.Tensor(cat_out_class)
+                cat_class[row: row + len(cat_target)] = torch.Tensor(cat_target)
 
             if self.num_continuous > 0:
-                con_recon[row : row + len(con_out)] = con_out
+                con_recon[row: row + len(con_out)] = con_out
 
-            latent_var[row : row + len(logvar)] = logvar
-            latent[row : row + len(mu)] = mu
+            latent_var[row: row + len(logvar)] = logvar
+            latent[row: row + len(mu)] = mu
             row += len(mu)
 
         test_loss /= len(dataloader)
diff --git a/src/move/tasks/analyze_latent.py b/src/move/tasks/analyze_latent.py
index 788d08d5..c6d59125 100644
--- a/src/move/tasks/analyze_latent.py
+++ b/src/move/tasks/analyze_latent.py
@@ -48,17 +48,17 @@ def find_feature_values(
         Tuple containing (1) index of dataset containing feature and (2)
         values corresponding to the feature
     """
-    dataset_index, feature_index = [None] * 2
-    for dataset_index, feature_names in enumerate(feature_names_lists):
+    _dataset_index, feature_index = [None] * 2
+    for _dataset_index, feature_names in enumerate(feature_names_lists):
         try:
             feature_index = feature_names.index(feature_name)
         except ValueError:
             continue
         break
-    if dataset_index is not None and feature_index is not None:
+    if _dataset_index is not None and feature_index is not None:
         return (
-            dataset_index,
-            np.take(feature_values[dataset_index], feature_index, axis=1),
+            _dataset_index,
+            np.take(feature_values[_dataset_index], feature_index, axis=1),
         )
     raise KeyError(f"Feature '{feature_name}' not in any dataset.")
 
@@ -98,7 +98,7 @@ def analyze_latent(config: MOVEConfig) -> None:
     df_index = pd.Index(sample_names, name="sample")
 
     assert task_config.model is not None
-    device = torch.device("cuda" if task_config.model.cuda == True else "cpu")
+    device = torch.device("cuda" if task_config.model.cuda else "cpu")
     model: VAE = hydra.utils.instantiate(
         task_config.model,
         continuous_shapes=test_dataset.con_shapes,
diff --git a/src/move/tasks/identify_associations.py b/src/move/tasks/identify_associations.py
index c099624c..94c9bc2a 100644
--- a/src/move/tasks/identify_associations.py
+++ b/src/move/tasks/identify_associations.py
@@ -14,7 +14,6 @@
 from torch.utils.data import DataLoader
 
 from move.analysis.metrics import get_2nd_order_polynomial
-
 from move.conf.schema import (
     IdentifyAssociationsBayesConfig,
     IdentifyAssociationsConfig,
@@ -202,7 +201,7 @@ def _bayes_approach(
 ) -> tuple[Union[IntArray, FloatArray], ...]:
 
     assert task_config.model is not None
-    device = torch.device("cuda" if task_config.model.cuda == True else "cpu")
+    device = torch.device("cuda" if task_config.model.cuda else "cpu")
 
     # Train models
     logger = get_logger(__name__)
@@ -319,7 +318,7 @@ def _ttest_approach(
     from scipy.stats import ttest_rel
 
     assert task_config.model is not None
-    device = torch.device("cuda" if task_config.model.cuda == True else "cpu")
+    device = torch.device("cuda" if task_config.model.cuda else "cpu")
 
     # Train models
     logger = get_logger(__name__)
@@ -463,7 +462,7 @@ def _ks_approach(
     """
 
     assert task_config.model is not None
-    device = torch.device("cuda" if task_config.model.cuda == True else "cpu")
+    device = torch.device("cuda" if task_config.model.cuda else "cpu")
     figure_path = output_path / "figures"
     figure_path.mkdir(exist_ok=True, parents=True)
 
@@ -524,7 +523,7 @@ def _ks_approach(
         min_baseline = np.min(baseline_recon, axis=0)
         max_baseline = np.max(baseline_recon, axis=0)
 
-        ############ QC of feature's reconstruction ##############################
+        # QC of feature's reconstruction ##############################
         logger.debug("Calculating quality control of the feature reconstructions")
         # Correlation and slope for each feature's reconstruction
         feature_names = reduce(list.__add__, con_names)
@@ -549,7 +548,7 @@ def _ks_approach(
                     dpi=50,
                 )
 
-        ################## Calculate perturbed reconstruction and shifts #############################
+        # Calculate perturbed reconstruction and shifts #############################
         logger.debug("Computing KS scores")
 
         # Save original latent space for first refit:
@@ -646,7 +645,7 @@ def _ks_approach(
     qc_df = pd.DataFrame({"Feature names": feature_names})
     qc_df["slope"] = np.nanmean(slope, axis=0)
     qc_df["reconstruction_correlation"] = np.nanmean(rec_corr, axis=0)
-    qc_df.to_csv(output_path / f"QC_summary_KS.tsv", sep="\t", index=False)
+    qc_df.to_csv(output_path / "QC_summary_KS.tsv", sep="\t", index=False)
 
     # Return first idx associations: redefined for reasonable threshold
 
@@ -739,8 +738,8 @@ def identify_associations(config: MOVEConfig) -> None:
         2) Evaluate associations using bayes or ttest approach.
         3) Save results.
     """
-    #################### DATA PREPARATION ######################
-    ####### Read original data and create perturbed datasets####
+    # DATA PREPARATION ######################
+    # Read original data and create perturbed datasets####
 
     logger = get_logger(__name__)
     task_config = cast(IdentifyAssociationsConfig, config.task)
@@ -811,7 +810,7 @@ def identify_associations(config: MOVEConfig) -> None:
     num_perturbed = len(dataloaders) - 1  # P
     logger.debug(f"# perturbed features: {num_perturbed}")
 
-    ################# APPROACH EVALUATION ##########################
+    # APPROACH EVALUATION ##########################
 
     if task_type == "bayes":
         task_config = cast(IdentifyAssociationsBayesConfig, task_config)
@@ -870,7 +869,7 @@ def identify_associations(config: MOVEConfig) -> None:
     else:
         raise ValueError()
 
-    ###################### RESULTS ################################
+    # RESULTS ################################
     save_results(
         config,
         con_shapes,
diff --git a/src/move/tasks/tune_model.py b/src/move/tasks/tune_model.py
index 89bc6f1f..9d530028 100644
--- a/src/move/tasks/tune_model.py
+++ b/src/move/tasks/tune_model.py
@@ -1,7 +1,6 @@
 __all__ = ["tune_model"]
 
 from pathlib import Path
-from random import shuffle
 from typing import Any, Literal, cast
 
 import hydra
@@ -26,7 +25,7 @@
     TuneModelStabilityConfig,
 )
 from move.core.logging import get_logger
-from move.core.typing import BoolArray, FloatArray
+from move.core.typing import BoolArray
 from move.data import io
 from move.data.dataloaders import MOVEDataset, make_dataloader, split_samples
 from move.models.vae import VAE
@@ -87,7 +86,7 @@ def tune_model(config: MOVEConfig) -> float:
     )
 
     assert task_config.model is not None
-    device = torch.device("cuda" if task_config.model.cuda == True else "cpu")
+    device = torch.device("cuda" if task_config.model.cuda is True else "cpu")
 
     def _tune_stability(
         task_config: TuneModelStabilityConfig,
diff --git a/src/move/training/training_loop.py b/src/move/training/training_loop.py
index 8f59afc4..9a54fdf6 100644
--- a/src/move/training/training_loop.py
+++ b/src/move/training/training_loop.py
@@ -23,14 +23,18 @@ def dilate_batch(dataloader: DataLoader) -> DataLoader:
     return DataLoader(dataset, batch_size, shuffle=True, drop_last=True)
 
 
+BATCH_DILATION_STEPS = []
+KLD_WARMUP_STEPS = []
+
+
 def training_loop(
     model: VAE,
     train_dataloader: DataLoader,
     valid_dataloader: Optional[DataLoader] = None,
     lr: float = 1e-4,
     num_epochs: int = 100,
-    batch_dilation_steps: list[int] = [],
-    kld_warmup_steps: list[int] = [],
+    batch_dilation_steps: list[int] = BATCH_DILATION_STEPS,
+    kld_warmup_steps: list[int] = KLD_WARMUP_STEPS,
     early_stopping: bool = False,
     patience: int = 0,
 ) -> TrainingLoopOutput:
@@ -41,13 +45,17 @@ def training_loop(
     Args:
         model (VAE): trained VAE model object
         train_dataloader (DataLoader):  An object feeding data to the VAE with training data
-        valid_dataloader (Optional[DataLoader], optional): An object feeding data to the VAE with validation data. Defaults to None.
+        valid_dataloader (Optional[DataLoader], optional): An object feeding data to the VAE with validation data.
+                                                           Defaults to None.
         lr (float, optional): learning rate. Defaults to 1e-4.
         num_epochs (int, optional): number of epochs. Defaults to 100.
-        batch_dilation_steps (list[int], optional): a list with integers corresponding to epochs when batch size is increased. Defaults to [].
-        kld_warmup_steps (list[int], optional):  a list with integers corresponding to epochs when kld is decreased by the selected rate. Defaults to [].
+        batch_dilation_steps (list[int], optional): a list with integers corresponding to epochs when batch size is
+                                                    increased. Defaults to [].
+        kld_warmup_steps (list[int], optional):  a list with integers corresponding to epochs when kld is decreased by
+                                                 the selected rate. Defaults to [].
         early_stopping (bool, optional):  boolean if use early stopping . Defaults to False.
-        patience (int, optional): number of epochs to wait before early stop if no progress on the validation set . Defaults to 0.
+        patience (int, optional): number of epochs to wait before early stop if no progress on the validation set.
+                                  Defaults to 0.
 
     Returns:
         (tuple): a tuple containing:
diff --git a/src/move/visualization/dataset_distributions.py b/src/move/visualization/dataset_distributions.py
index 3af4b1d2..ba448d2c 100644
--- a/src/move/visualization/dataset_distributions.py
+++ b/src/move/visualization/dataset_distributions.py
@@ -70,9 +70,9 @@ def plot_reconstruction_diff(
     Plot the reconstruction differences as a heatmap.
     """
     with style_settings(style):
-        if vmin == None:
+        if vmin is None:
             vmin = np.min(diff_array)
-        elif vmax == None:
+        elif vmax is None:
             vmax = np.max(diff_array)
         fig = plt.figure(layout="constrained", figsize=(7, 7))
         plt.imshow(diff_array, cmap=colormap, vmin=vmin, vmax=vmax)
@@ -140,7 +140,7 @@ def plot_feature_association_graph(
             with_labels = True
         elif layout == "circular":
             pos = nx.circular_layout(G)
-            texts = [
+            _ = [
                 plt.text(
                     pos[node][0],
                     pos[node][1],
@@ -269,7 +269,7 @@ def plot_cumulative_distributions(
             (edges[:-1] + edges[1:]) / 2,
             np.cumsum(hist_pert),
             color="red",
-            label=f"Perturbed",
+            label="Perturbed",
             alpha=0.5,
         )
 
diff --git a/src/move/visualization/feature_importance.py b/src/move/visualization/feature_importance.py
index 1d6208d6..f80f4b14 100644
--- a/src/move/visualization/feature_importance.py
+++ b/src/move/visualization/feature_importance.py
@@ -10,11 +10,10 @@
 from matplotlib.colors import TwoSlopeNorm
 
 from move.core.typing import FloatArray
-from move.visualization.style import (
+from move.visualization.style import (  # color_cycle,
     DEFAULT_DIVERGING_PALETTE,
     DEFAULT_PLOT_STYLE,
     DEFAULT_QUALITATIVE_PALETTE,
-    color_cycle,
     style_settings,
 )
 
diff --git a/src/move/visualization/latent_space.py b/src/move/visualization/latent_space.py
index 4ebccf66..c53f2384 100644
--- a/src/move/visualization/latent_space.py
+++ b/src/move/visualization/latent_space.py
@@ -159,8 +159,8 @@ def plot_3D_latent_and_displacement(
     ax.view_init(altitude, azimuth)
 
     if show_baseline:
-        vmin, vmax = np.min(feature_values[::step]), np.max(feature_values[::step])
-        abs_max = np.max([abs(vmin), abs(vmax)])
+        # vmin, vmax = np.min(feature_values[::step]), np.max(feature_values[::step])
+        # abs_max = np.max([abs(vmin), abs(vmax)])
         ax.scatter(
             mu_baseline[::step, 0],
             mu_baseline[::step, 1],
@@ -192,10 +192,14 @@ def plot_3D_latent_and_displacement(
         v = mu_perturbed[::step, 1] - mu_baseline[::step, 1]
         w = mu_perturbed[::step, 2] - mu_baseline[::step, 2]
 
-        module = np.sqrt(u * u + v * v + w * w)
+        # module = np.sqrt(u * u + v * v + w * w)
 
         max_u, max_v, max_w = np.max(abs(u)), np.max(abs(v)), np.max(abs(w))
-        # Arrow colors will be weighted contributions of red -> dim1, green -> dim2, and blue-> dim3. I.e. purple arrow means movement in dims 1 and 3
+        # Arrow colors will be weighted contributions of
+        # red -> dim1,
+        # green -> dim2,
+        # and blue-> dim3.
+        # I.e. purple arrow means movement in dims 1 and 3
         colors = [
             (abs(du) / max_u, abs(dv) / max_v, abs(dw) / max_w, 0.7)
             for du, dv, dw in zip(u, v, w)
diff --git a/src/move/visualization/style.py b/src/move/visualization/style.py
index d414c2ef..8b29a48a 100644
--- a/src/move/visualization/style.py
+++ b/src/move/visualization/style.py
@@ -28,11 +28,11 @@ def color_cycle(colormap: str) -> ContextManager:
     Returns:
         Context manager
     """
-    registry: ColormapRegistry = getattr(matplotlib, "colormaps")
+    registry: ColormapRegistry = matplotlib.colormaps
     colormap = registry[colormap]
     if not isinstance(colormap, ListedColormap):
         raise ValueError("Only colormaps that are list of colors supported.")
-    prop_cycle = cycler(color=getattr(colormap, "colors"))
+    prop_cycle = cycler(color=colormap.colors)
     return matplotlib.rc_context({"axes.prop_cycle": prop_cycle})
 
 
diff --git a/src/move/visualization/vae_visualization.py b/src/move/visualization/vae_visualization.py
index 1cb00d07..4bd226cf 100644
--- a/src/move/visualization/vae_visualization.py
+++ b/src/move/visualization/vae_visualization.py
@@ -59,7 +59,7 @@ def plot_vae(
     latent_node_distance = 550
     latent_sep = 5 * latent_node_distance
 
-    ########################### Adding nodes to the graph ##############################
+    # Adding nodes to the graph ##############################
     # Bias nodes
     G.add_node(
         "input_bias",
@@ -150,7 +150,7 @@ def plot_vae(
             color=0.0,
         )
 
-    ########################## Adding weights to the graph #########################
+    # Adding weights to the graph #########################
 
     if plot_edges:
         for layer, values in model_weights.items():
@@ -166,7 +166,7 @@ def plot_vae(
             elif layer == "encoderlayers.0.bias":
                 for j in range(values.shape[0]):  # encoder_hidden
                     G.add_edge(
-                        f"input_bias", f"encoder_hidden_{j}", weight=values.numpy()[j]
+                        "input_bias", f"encoder_hidden_{j}", weight=values.numpy()[j]
                     )
 
             elif layer == "mu.weight":
@@ -180,7 +180,7 @@ def plot_vae(
 
             elif layer == "mu.bias":
                 for i in range(values.shape[0]):  # encoder_hidden
-                    G.add_edge(f"mu_bias", f"mu_{i}", weight=values.numpy()[i])
+                    G.add_edge("mu_bias", f"mu_{i}", weight=values.numpy()[i])
 
             elif layer == "var.weight":
                 for j in range(values.shape[1]):  # encoder hidden
@@ -193,7 +193,7 @@ def plot_vae(
 
             elif layer == "var.bias":
                 for i in range(values.shape[0]):  # encoder_hidden
-                    G.add_edge(f"var_bias", f"var_{i}", weight=values.numpy()[i])
+                    G.add_edge("var_bias", f"var_{i}", weight=values.numpy()[i])
 
             # Sampled layer from mu and var:
             elif layer == "decoderlayers.0.weight":
@@ -209,7 +209,7 @@ def plot_vae(
             elif layer == "decoderlayers.0.bias":
                 for j in range(values.shape[0]):  # decoder_hidden
                     G.add_edge(
-                        f"sam_bias", f"decoder_hidden_{j}", weight=values.numpy()[j]
+                        "sam_bias", f"decoder_hidden_{j}", weight=values.numpy()[j]
                     )
 
             elif layer == "out.weight":
@@ -223,7 +223,7 @@ def plot_vae(
 
             elif layer == "out.bias":
                 for k in range(values.shape[0]):  # output
-                    G.add_edge(f"out_bias", f"output_{k}", weight=values.numpy()[k])
+                    G.add_edge("out_bias", f"output_{k}", weight=values.numpy()[k])
 
     fig = plt.figure(figsize=(60, 60))
     pos = nx.get_node_attributes(G, "pos")
@@ -237,7 +237,7 @@ def plot_vae(
     abs_max = np.max([abs(np.min(color)), abs(np.max(color))])
     abs_max_edge = np.max([abs(np.min(edge_color)), abs(np.max(edge_color))])
 
-    sm_node = cm.ScalarMappable(
+    _ = cm.ScalarMappable(
         cmap=node_cmap, norm=matplotlib.colors.Normalize(vmin=-abs_max, vmax=abs_max)
     )
     sm_edge = cm.ScalarMappable(