Skip to content

Commit

Permalink
✨ lint src files with flake8 - might introduce regression errors
Browse files Browse the repository at this point in the history
- set max-line-length to 120
  • Loading branch information
Henry committed May 16, 2024
1 parent 439c8cf commit 3ff9842
Show file tree
Hide file tree
Showing 14 changed files with 102 additions and 74 deletions.
21 changes: 19 additions & 2 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,27 @@ jobs:
steps:
- uses: actions/checkout@v4
- uses: psf/black@stable
lint:
name: Lint with flake8
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4

- uses: actions/setup-python@v5
with:
python-version: "3.11"
- name: Install flake8
run: pip install flake8 flake8-bugbear
- name: Lint with flake8
run: flake8 src

publish:
name: Publish package
runs-on: ubuntu-latest
if: startsWith(github.ref, 'refs/tags')
needs:
- format
- lint
steps:
- name: Checkout
uses: actions/checkout@v4
Expand All @@ -24,8 +42,7 @@ jobs:
run: python -m pip install --upgrade twine build
- name: Build
run: python -m build
- name: Publish package
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
- name: Publish package
uses: pypa/gh-action-pypi-publish@release/v1
with:
user: __token__
Expand Down
4 changes: 4 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,7 @@ where = src
[options.entry_points]
console_scripts =
move-dl=move.__main__:main

[flake8]
max-line-length = 120
aggressive = 2
7 changes: 3 additions & 4 deletions src/move/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
from __future__ import annotations
from move.training.training_loop import training_loop
from move.models.vae import VAE
from move import conf, data, models

__license__ = "MIT"
__version__ = (1, 4, 10)
__all__ = ["conf", "data", "models", "training_loop", "VAE"]

HYDRA_VERSION_BASE = "1.2"

from move import conf, data, models
from move.models.vae import VAE
from move.training.training_loop import training_loop
7 changes: 3 additions & 4 deletions src/move/data/perturbations.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from move.data.preprocessing import feature_stats
from move.visualization.dataset_distributions import plot_value_distributions


ContinuousPerturbationType = Literal["minimum", "maximum", "plus_std", "minus_std"]


Expand Down Expand Up @@ -42,7 +41,7 @@ def perturb_categorical_data(
splits = np.cumsum(
[0] + [int.__mul__(*shape) for shape in baseline_dataset.cat_shapes]
)
slice_ = slice(*splits[target_idx : target_idx + 2])
slice_ = slice(*splits[target_idx: target_idx + 2])

target_shape = baseline_dataset.cat_shapes[target_idx]
num_features = target_shape[0] # CHANGE
Expand Down Expand Up @@ -94,7 +93,7 @@ def perturb_continuous_data(

target_idx = con_dataset_names.index(target_dataset_name)
splits = np.cumsum([0] + baseline_dataset.con_shapes)
slice_ = slice(*splits[target_idx : target_idx + 2])
slice_ = slice(*splits[target_idx: target_idx + 2])

num_features = baseline_dataset.con_shapes[target_idx]

Expand Down Expand Up @@ -155,7 +154,7 @@ def perturb_continuous_data_extended(

target_idx = con_dataset_names.index(target_dataset_name) # dataset index
splits = np.cumsum([0] + baseline_dataset.con_shapes)
slice_ = slice(*splits[target_idx : target_idx + 2])
slice_ = slice(*splits[target_idx: target_idx + 2])

num_features = baseline_dataset.con_shapes[target_idx]
dataloaders = []
Expand Down
36 changes: 18 additions & 18 deletions src/move/models/vae.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def __init__(
continuous_shapes: Optional[list[int]] = None,
categorical_weights: Optional[list[int]] = None,
continuous_weights: Optional[list[int]] = None,
num_hidden: list[int] = [200, 200],
num_hidden: list[int] = (200, 200),
num_latent: int = 20,
beta: float = 0.01,
dropout: float = 0.2,
Expand Down Expand Up @@ -99,11 +99,11 @@ def __init__(

# Initialize simple attributes
self.beta = beta
self.num_hidden = num_hidden
self.num_hidden = list(num_hidden)
self.num_latent = num_latent
self.dropout = dropout

self.device = torch.device("cuda" if cuda == True else "cpu")
self.device = torch.device("cuda" if cuda else "cpu")

# Activation functions
self.relu = nn.LeakyReLU()
Expand All @@ -116,7 +116,7 @@ def __init__(
self.decoderlayers = nn.ModuleList()
self.decodernorms = nn.ModuleList()

### Layers
# Layers
# Hidden layers
for nin, nout in zip([self.input_size] + self.num_hidden, self.num_hidden):
self.encoderlayers.append(nn.Linear(nin, nout))
Expand Down Expand Up @@ -190,7 +190,7 @@ def decompose_categorical(self, reconstruction: torch.Tensor) -> list[torch.Tens
cat_out = []
pos = 0
for cat_shape in self.categorical_shapes:
cat_dataset = cat_tmp[:, pos : (cat_shape[0] * cat_shape[1] + pos)]
cat_dataset = cat_tmp[:, pos: (cat_shape[0] * cat_shape[1] + pos)]

cat_out_tmp = cat_dataset.view(
cat_dataset.shape[0], cat_shape[0], cat_shape[1]
Expand Down Expand Up @@ -287,7 +287,7 @@ def calculate_cat_error(
cat_errors = []
pos = 0
for cat_shape in self.categorical_shapes:
cat_dataset = cat_in[:, pos : (cat_shape[0] * cat_shape[1] + pos)]
cat_dataset = cat_in[:, pos: (cat_shape[0] * cat_shape[1] + pos)]

cat_dataset = cat_dataset.view(cat_in.shape[0], cat_shape[0], cat_shape[1])
cat_target = cat_dataset
Expand Down Expand Up @@ -327,8 +327,8 @@ def calculate_con_error(
total_shape = 0
con_errors_list: list[torch.Tensor] = []
for s in self.continuous_shapes:
c_in = con_in[:, total_shape : (s + total_shape - 1)]
c_re = con_out[:, total_shape : (s + total_shape - 1)]
c_in = con_in[:, total_shape: (s + total_shape - 1)]
c_re = con_out[:, total_shape: (s + total_shape - 1)]
error = loss(c_re, c_in) / batch_size
con_errors_list.append(error)
total_shape += s
Expand Down Expand Up @@ -451,7 +451,7 @@ def encoding(
elif self.num_continuous > 0:
tensor = con
else:
assert False, "Must have at least 1 categorial or 1 continuous feature"
raise ValueError("Must have at least 1 categorial or 1 continuous feature")

optimizer.zero_grad()

Expand Down Expand Up @@ -538,21 +538,21 @@ def get_cat_recon(
shape_1 = 0
for cat_shape in self.categorical_shapes:
# Get input categorical data
cat_in_tmp = cat[:, pos : (cat_shape[0] * cat_shape[1] + pos)]
cat_in_tmp = cat[:, pos: (cat_shape[0] * cat_shape[1] + pos)]
cat_in_tmp = cat_in_tmp.view(cat.shape[0], cat_shape[0], cat_shape[1])

# Calculate target values for input
cat_target_tmp = cat_in_tmp
cat_target_tmp = torch.argmax(cat_target_tmp.detach(), dim=2)
cat_target_tmp[cat_in_tmp.sum(dim=2) == 0] = -1
cat_target[:, shape_1 : (cat_shape[0] + shape_1)] = (
cat_target[:, shape_1: (cat_shape[0] + shape_1)] = (
cat_target_tmp # .numpy()
)

# Get reconstructed categorical data
cat_out_tmp = cat_out[count]
cat_out_tmp = cat_out_tmp.transpose(1, 2)
cat_out_class[:, shape_1 : (cat_shape[0] + shape_1)] = torch.argmax(
cat_out_class[:, shape_1: (cat_shape[0] + shape_1)] = torch.argmax(
cat_out_tmp, dim=2
) # .numpy()

Expand Down Expand Up @@ -694,7 +694,7 @@ def latent(
elif self.num_continuous > 0:
tensor = con
else:
assert False, "Must have at least 1 categorial or 1 continuous feature"
raise ValueError("Must have at least 1 categorial or 1 continuous feature")

# Evaluate
cat_out, con_out, mu, logvar = self(tensor)
Expand All @@ -713,14 +713,14 @@ def latent(
cat_out_class, cat_target = self.get_cat_recon(
batch, cat_total_shape, cat, cat_out
)
cat_recon[row : row + len(cat_out_class)] = torch.Tensor(cat_out_class)
cat_class[row : row + len(cat_target)] = torch.Tensor(cat_target)
cat_recon[row: row + len(cat_out_class)] = torch.Tensor(cat_out_class)
cat_class[row: row + len(cat_target)] = torch.Tensor(cat_target)

if self.num_continuous > 0:
con_recon[row : row + len(con_out)] = con_out
con_recon[row: row + len(con_out)] = con_out

latent_var[row : row + len(logvar)] = logvar
latent[row : row + len(mu)] = mu
latent_var[row: row + len(logvar)] = logvar
latent[row: row + len(mu)] = mu
row += len(mu)

test_loss /= len(dataloader)
Expand Down
12 changes: 6 additions & 6 deletions src/move/tasks/analyze_latent.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,17 +48,17 @@ def find_feature_values(
Tuple containing (1) index of dataset containing feature and (2)
values corresponding to the feature
"""
dataset_index, feature_index = [None] * 2
for dataset_index, feature_names in enumerate(feature_names_lists):
_dataset_index, feature_index = [None] * 2
for _dataset_index, feature_names in enumerate(feature_names_lists):
try:
feature_index = feature_names.index(feature_name)
except ValueError:
continue
break
if dataset_index is not None and feature_index is not None:
if _dataset_index is not None and feature_index is not None:
return (
dataset_index,
np.take(feature_values[dataset_index], feature_index, axis=1),
_dataset_index,
np.take(feature_values[_dataset_index], feature_index, axis=1),
)
raise KeyError(f"Feature '{feature_name}' not in any dataset.")

Expand Down Expand Up @@ -98,7 +98,7 @@ def analyze_latent(config: MOVEConfig) -> None:
df_index = pd.Index(sample_names, name="sample")

assert task_config.model is not None
device = torch.device("cuda" if task_config.model.cuda == True else "cpu")
device = torch.device("cuda" if task_config.model.cuda else "cpu")
model: VAE = hydra.utils.instantiate(
task_config.model,
continuous_shapes=test_dataset.con_shapes,
Expand Down
21 changes: 10 additions & 11 deletions src/move/tasks/identify_associations.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from torch.utils.data import DataLoader

from move.analysis.metrics import get_2nd_order_polynomial

from move.conf.schema import (
IdentifyAssociationsBayesConfig,
IdentifyAssociationsConfig,
Expand Down Expand Up @@ -202,7 +201,7 @@ def _bayes_approach(
) -> tuple[Union[IntArray, FloatArray], ...]:

assert task_config.model is not None
device = torch.device("cuda" if task_config.model.cuda == True else "cpu")
device = torch.device("cuda" if task_config.model.cuda else "cpu")

# Train models
logger = get_logger(__name__)
Expand Down Expand Up @@ -319,7 +318,7 @@ def _ttest_approach(
from scipy.stats import ttest_rel

assert task_config.model is not None
device = torch.device("cuda" if task_config.model.cuda == True else "cpu")
device = torch.device("cuda" if task_config.model.cuda else "cpu")

# Train models
logger = get_logger(__name__)
Expand Down Expand Up @@ -463,7 +462,7 @@ def _ks_approach(
"""

assert task_config.model is not None
device = torch.device("cuda" if task_config.model.cuda == True else "cpu")
device = torch.device("cuda" if task_config.model.cuda else "cpu")
figure_path = output_path / "figures"
figure_path.mkdir(exist_ok=True, parents=True)

Expand Down Expand Up @@ -524,7 +523,7 @@ def _ks_approach(
min_baseline = np.min(baseline_recon, axis=0)
max_baseline = np.max(baseline_recon, axis=0)

############ QC of feature's reconstruction ##############################
# QC of feature's reconstruction ##############################
logger.debug("Calculating quality control of the feature reconstructions")
# Correlation and slope for each feature's reconstruction
feature_names = reduce(list.__add__, con_names)
Expand All @@ -549,7 +548,7 @@ def _ks_approach(
dpi=50,
)

################## Calculate perturbed reconstruction and shifts #############################
# Calculate perturbed reconstruction and shifts #############################
logger.debug("Computing KS scores")

# Save original latent space for first refit:
Expand Down Expand Up @@ -646,7 +645,7 @@ def _ks_approach(
qc_df = pd.DataFrame({"Feature names": feature_names})
qc_df["slope"] = np.nanmean(slope, axis=0)
qc_df["reconstruction_correlation"] = np.nanmean(rec_corr, axis=0)
qc_df.to_csv(output_path / f"QC_summary_KS.tsv", sep="\t", index=False)
qc_df.to_csv(output_path / "QC_summary_KS.tsv", sep="\t", index=False)

# Return first idx associations: redefined for reasonable threshold

Expand Down Expand Up @@ -739,8 +738,8 @@ def identify_associations(config: MOVEConfig) -> None:
2) Evaluate associations using bayes or ttest approach.
3) Save results.
"""
#################### DATA PREPARATION ######################
####### Read original data and create perturbed datasets####
# DATA PREPARATION ######################
# Read original data and create perturbed datasets####

logger = get_logger(__name__)
task_config = cast(IdentifyAssociationsConfig, config.task)
Expand Down Expand Up @@ -811,7 +810,7 @@ def identify_associations(config: MOVEConfig) -> None:
num_perturbed = len(dataloaders) - 1 # P
logger.debug(f"# perturbed features: {num_perturbed}")

################# APPROACH EVALUATION ##########################
# APPROACH EVALUATION ##########################

if task_type == "bayes":
task_config = cast(IdentifyAssociationsBayesConfig, task_config)
Expand Down Expand Up @@ -870,7 +869,7 @@ def identify_associations(config: MOVEConfig) -> None:
else:
raise ValueError()

###################### RESULTS ################################
# RESULTS ################################
save_results(
config,
con_shapes,
Expand Down
5 changes: 2 additions & 3 deletions src/move/tasks/tune_model.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
__all__ = ["tune_model"]

from pathlib import Path
from random import shuffle
from typing import Any, Literal, cast

import hydra
Expand All @@ -26,7 +25,7 @@
TuneModelStabilityConfig,
)
from move.core.logging import get_logger
from move.core.typing import BoolArray, FloatArray
from move.core.typing import BoolArray
from move.data import io
from move.data.dataloaders import MOVEDataset, make_dataloader, split_samples
from move.models.vae import VAE
Expand Down Expand Up @@ -87,7 +86,7 @@ def tune_model(config: MOVEConfig) -> float:
)

assert task_config.model is not None
device = torch.device("cuda" if task_config.model.cuda == True else "cpu")
device = torch.device("cuda" if task_config.model.cuda is True else "cpu")

def _tune_stability(
task_config: TuneModelStabilityConfig,
Expand Down
Loading

0 comments on commit 3ff9842

Please sign in to comment.