Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ Enable Ruff security rules (bandit) #1686

Merged
merged 24 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions lamindb/_annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -962,7 +962,10 @@

artifact = None
if data_is_anndata(data):
assert adata is not None
if adata is None:
raise ValueError(

Check warning on line 966 in lamindb/_annotate.py

View check run for this annotation

Codecov / codecov/patch

lamindb/_annotate.py#L966

Added line #L966 was not covered by tests
"Argument `adata` cannot be None if the passed `data` is of type AnnData."
)
artifact = Artifact.from_anndata(data, description=description, **kwargs)
artifact.n_observations = adata.shape[0]
data = adata
Expand Down Expand Up @@ -1168,7 +1171,8 @@
def save_ulabels_with_parent(values: list[str], field: FieldAttr, key: str) -> None:
"""Save a parent label for the given labels."""
registry = field.field.model
assert registry == ULabel
if registry != ULabel:
raise TypeError("Field must be of type ULabel.")

Check warning on line 1175 in lamindb/_annotate.py

View check run for this annotation

Codecov / codecov/patch

lamindb/_annotate.py#L1175

Added line #L1175 was not covered by tests
all_records = registry.from_values(values, field=field)
is_feature = registry.filter(name=f"is_{key}").one_or_none()
if is_feature is None:
Expand Down
9 changes: 6 additions & 3 deletions lamindb/_artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@
logger.hint(hint)


def data_is_anndata(data: AnnData | UPathStr):
def data_is_anndata(data: AnnData | UPathStr) -> bool:
if isinstance(data, AnnData):
return True
if isinstance(data, (str, Path, UPath)):
Expand All @@ -464,7 +464,7 @@
return False


def data_is_mudata(data: MuData | UPathStr):
def data_is_mudata(data: MuData | UPathStr) -> bool:
if _mudata_is_installed():
from mudata import MuData

Expand Down Expand Up @@ -965,7 +965,10 @@
)
delete_record = response == "y"
else:
assert permanent
if not permanent:
raise ValueError(
"Attempting to delete a record but permanent is not set to True."

Check warning on line 970 in lamindb/_artifact.py

View check run for this annotation

Codecov / codecov/patch

lamindb/_artifact.py#L970

Added line #L970 was not covered by tests
)
delete_record = True

if delete_record:
Expand Down
2 changes: 1 addition & 1 deletion lamindb/_can_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@
try:
self.add_synonym(value, save=False)
except Exception: # pragma: no cover
pass
logger.debug("Encountered an Exception while attempting to add synonyms.")

Check warning on line 227 in lamindb/_can_validate.py

View check run for this annotation

Codecov / codecov/patch

lamindb/_can_validate.py#L227

Added line #L227 was not covered by tests

if not self._state.adding:
self.save()
Expand Down
5 changes: 4 additions & 1 deletion lamindb/_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,10 @@
else:
if not hasattr(artifacts, "__getitem__"):
raise ValueError("Artifact or List[Artifact] is allowed.")
assert isinstance(artifacts[0], Artifact) # type: ignore
if not isinstance(artifacts[0], Artifact): # type: ignore
raise TypeError(

Check warning on line 104 in lamindb/_collection.py

View check run for this annotation

Codecov / codecov/patch

lamindb/_collection.py#L104

Added line #L104 was not covered by tests
f"Expected an Artifact but encountered {type(artifacts[0])}."
)
hash, feature_sets = from_artifacts(artifacts) # type: ignore
if meta is not None:
if not isinstance(meta, Artifact):
Expand Down
5 changes: 4 additions & 1 deletion lamindb/_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,10 @@
finally:
settings.verbosity = verbosity

assert len(features) == len(df.columns)
if len(features) != len(df.columns):
raise ValueError(

Check warning on line 143 in lamindb/_feature.py

View check run for this annotation

Codecov / codecov/patch

lamindb/_feature.py#L143

Added line #L143 was not covered by tests
f"The number of features ({len(features)}) and columns ({len(df.columns)}) of the DataFrame do not match."
)

# if len(categoricals_with_unmapped_categories) > 0:
# n_max = 20
Expand Down
24 changes: 18 additions & 6 deletions lamindb/_finish.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@
if run_context.run is None:
raise TrackNotCalled("Please run `ln.track()` before `ln.finish()`")
if run_context.path is None:
assert run_context.transform.type not in {"script", "notebook"}
if run_context.transform.type in {"script", "notebook"}:
raise ValueError(

Check warning on line 45 in lamindb/_finish.py

View check run for this annotation

Codecov / codecov/patch

lamindb/_finish.py#L45

Added line #L45 was not covered by tests
f"Transform type must be 'script' or 'notebook' but is {run_context.transform.type}."
)
Zethson marked this conversation as resolved.
Show resolved Hide resolved
run_context.run.finished_at = datetime.now(timezone.utc)
run_context.run.save()
# nothing else to do
Expand Down Expand Up @@ -107,9 +110,14 @@
# convert the notebook file to html
# log_level is set to 40 to silence the nbconvert logging
subprocess.run(
"jupyter nbconvert --to html"
f" '{filepath.as_posix()}' --Application.log_level=40",
shell=True,
[
"jupyter",
"nbconvert",
"--to",
"html",
filepath.as_posix(),
"--Application.log_level=40",
],
check=True,
)
# move the temporary file into the cache dir in case it's accidentally
Expand All @@ -129,8 +137,12 @@
source_code_path = ln_setup.settings.storage.cache_dir / filepath.name
shutil.copy2(filepath, source_code_path) # copy
subprocess.run(
f"nbstripout '{source_code_path}' --extra-keys='metadata.version metadata.kernelspec metadata.language_info metadata.pygments_lexer metadata.name metadata.file_extension'",
shell=True,
[
"nbstripout",
source_code_path,
"--extra-keys",
"metadata.version metadata.kernelspec metadata.language_info metadata.pygments_lexer metadata.name metadata.file_extension",
],
check=True,
)
# find initial versions of source codes and html reports
Expand Down
19 changes: 14 additions & 5 deletions lamindb/core/_feature_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,7 +430,8 @@
if isinstance(keys, DICT_KEYS_TYPE):
keys = list(keys) # type: ignore
# deal with other cases later
assert all(isinstance(key, str) for key in keys)
if not all(isinstance(key, str) for key in keys):
raise TypeError("Not all values keys were strings.")
registry = feature_param_field.field.model
is_param = registry == Param
model = Param if is_param else Feature
Expand Down Expand Up @@ -666,10 +667,12 @@
):
"""Add feature set corresponding to column names of DataFrame."""
if isinstance(self._host, Artifact):
assert self._host.accessor == "DataFrame"
if not self._host.accessor == "DataFrame":
raise TypeError(f"Accessor is not DataFrame but {self._host.accessor}.")
else:
# Collection
assert self._host.artifact.accessor == "DataFrame"
if not self._host.artifact.accessor == "DataFrame":
raise TypeError(f"Accessor is not DataFrame but {self._host.accessor}.")

# parse and register features
registry = field.field.model
Expand Down Expand Up @@ -697,7 +700,10 @@
):
"""Add features from AnnData."""
if isinstance(self._host, Artifact):
assert self._host.accessor == "AnnData"
if not self._host.accessor == "AnnData":
raise TypeError(
f"Accessor is not AnnData object but {self._host.accessor}."
)

Check warning on line 706 in lamindb/core/_feature_manager.py

View check run for this annotation

Codecov / codecov/patch

lamindb/core/_feature_manager.py#L706

Added line #L706 was not covered by tests
else:
raise NotImplementedError()

Expand Down Expand Up @@ -727,7 +733,10 @@
if obs_fields is None:
obs_fields = {}
if isinstance(self._host, Artifact):
assert self._host.accessor == "MuData"
if not self._host.accessor == "MuData":
raise TypeError(
f"Accessor is not a MuData object but {self._host.accessor}."
)

Check warning on line 739 in lamindb/core/_feature_manager.py

View check run for this annotation

Codecov / codecov/patch

lamindb/core/_feature_manager.py#L739

Added line #L739 was not covered by tests
else:
raise NotImplementedError()

Expand Down
4 changes: 2 additions & 2 deletions lamindb/core/_label_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
print_values = _print_values(labels_list, n=10)
type_str = f": {related_model}" if print_types else ""
labels_msg += f" .{related_name}{type_str} = {print_values}\n"
except Exception:
except Exception: # noqa: S112
continue
msg = ""
if labels_msg:
Expand Down Expand Up @@ -102,7 +102,7 @@
records = registry.from_values(label_uids, field=field)
if len(records) > 0:
save(records, parents=parents)
except Exception:
except Exception: # noqa S110

Check warning on line 105 in lamindb/core/_label_manager.py

View check run for this annotation

Codecov / codecov/patch

lamindb/core/_label_manager.py#L105

Added line #L105 was not covered by tests
pass
field = "uid"
label_uids = np.array(
Expand Down
5 changes: 4 additions & 1 deletion lamindb/core/_mapped_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,10 @@
parallel: bool = False,
dtype: str | None = None,
):
assert join in {None, "inner", "outer"}
if join not in {None, "inner", "outer"}:
raise ValueError(
f"join must be one of None, 'inner, or 'outer' but was {type(join)}"

Check warning on line 112 in lamindb/core/_mapped_collection.py

View check run for this annotation

Codecov / codecov/patch

lamindb/core/_mapped_collection.py#L111-L112

Added lines #L111 - L112 were not covered by tests
)

if layers_keys is None:
self.layers_keys = ["X"]
Expand Down
4 changes: 2 additions & 2 deletions lamindb/core/_run_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def get_uid_ext(version: str) -> str:
# merely zero-padding the nbproject version such that the base62 encoding is
# at least 4 characters long doesn't yields sufficiently diverse hashes and
# leads to collisions; it'd be nice because the uid_ext would be ordered
return encodebytes(hashlib.md5(version.encode()).digest())[:4]
return encodebytes(hashlib.md5(version.encode()).digest())[:4] # noqa: S324


def update_stem_uid_or_version(
Expand Down Expand Up @@ -113,7 +113,7 @@ def get_notebook_name_colab() -> str:

ip = gethostbyname(gethostname()) # 172.28.0.12
try:
name = get(f"http://{ip}:9000/api/sessions").json()[0]["name"]
name = get(f"http://{ip}:9000/api/sessions").json()[0]["name"] # noqa: S113
except Exception:
logger.warning(
"could not get notebook name from Google Colab, using: notebook.ipynb"
Expand Down
3 changes: 2 additions & 1 deletion lamindb/core/_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ def sync_git_repo(self, value) -> None:
For example: `ln.sync_git_repo = https://github.com/laminlabs/redun-lamin`
"""
self._sync_git_repo = sanitize_git_repo_url(value)
assert self._sync_git_repo.startswith("https://")
if not self._sync_git_repo.startswith("https://"):
raise ValueError("Github repository URL must start with 'https://'.")
Zethson marked this conversation as resolved.
Show resolved Hide resolved

@property
def storage(self) -> StorageSettings:
Expand Down
37 changes: 23 additions & 14 deletions lamindb/core/_sync_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@
f"running outside of synched git repo, cloning {repo_url} into {repo_dir}"
)
result = subprocess.run(
f"git clone --depth 10 {repo_url}.git",
shell=True,
Zethson marked this conversation as resolved.
Show resolved Hide resolved
["git", "clone", "--depth", "10", f"{repo_url}.git"],
Zethson marked this conversation as resolved.
Show resolved Hide resolved
capture_output=True,
cwd=setup_settings.storage.cache_dir,
)
Expand All @@ -36,8 +35,7 @@

def check_local_git_repo() -> bool:
result = subprocess.run(
"git config --get remote.origin.url",
shell=True,
["git", "config", "--get remote.origin.url"],
capture_output=True,
)
result_str = result.stdout.decode().strip()
Expand All @@ -55,10 +53,9 @@


def get_git_commit_hash(blob_hash: str, repo_dir: Path | None = None) -> str | None:
command = f"git log --find-object={blob_hash} --pretty=format:%H"
command = ["git", "log", f"--find-object={blob_hash}", "--pretty=format:%H"]
result = subprocess.run(
command,
shell=True,
capture_output=True,
cwd=repo_dir,
)
Expand All @@ -68,9 +65,8 @@
if commit_hash == "" or result.returncode == 1:
return None
else:
assert (
Zethson marked this conversation as resolved.
Show resolved Hide resolved
len(commit_hash) == 40
), f"commit hash |{commit_hash}| is not 40 characters long"
if not len(commit_hash) == 40:
Zethson marked this conversation as resolved.
Show resolved Hide resolved
raise ValueError(f"commit hash |{commit_hash}| is not 40 characters long")

Check warning on line 69 in lamindb/core/_sync_git.py

View check run for this annotation

Codecov / codecov/patch

lamindb/core/_sync_git.py#L69

Added line #L69 was not covered by tests
return commit_hash


Expand All @@ -82,21 +78,34 @@
# from anywhere in the repo, hence, let's get the root
repo_root = (
subprocess.run(
"git rev-parse --show-toplevel",
shell=True,
["git", "rev-parse", "--show-toplevel"],
capture_output=True,
cwd=repo_dir,
)
.stdout.decode()
.strip()
)
command = f"git ls-tree -r {commit_hash} | grep -E {blob_hash}"
# Run the git commands separately to circumvent spawning a shell
git_command = ["git", "ls-tree", "-r", commit_hash]
git_process = subprocess.Popen(
git_command,
stdout=subprocess.PIPE,
cwd=repo_root,
)

grep_command = ["grep", "-E", blob_hash]
result = subprocess.run(
command,
shell=True,
grep_command,
stdin=git_process.stdout,
capture_output=True,
cwd=repo_root,
)

# Close the stdout to allow git_process to receive a SIGPIPE if grep_command exits
git_process.stdout.close()
git_process.wait()

command = " ".join(git_command) + " | " + " ".join(grep_command)
if result.returncode != 0 and result.stderr.decode() != "":
raise RuntimeError(f"{command}\n{result.stderr.decode()}")
if len(result.stdout.decode()) == 0:
Expand Down
6 changes: 5 additions & 1 deletion lamindb/core/_track_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@ def track_environment(run: Run) -> None:
# create a requirements.txt
# we don't create a conda environment.yml mostly for its slowness
try:
result = subprocess.run(f"pip freeze > {str(filepath)}", shell=True)
with open(filepath, "w") as f:
result = subprocess.run(
["pip", "freeze"],
stdout=f,
)
except OSError as e:
result = None
logger.warning(f"could not run pip freeze with error {e}")
Expand Down
8 changes: 6 additions & 2 deletions lamindb/core/storage/_backed_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,9 @@
ds = CSRDataset(elem)
result = _subset_sparse(ds, indices)
except Exception:
pass
logger.debug(

Check warning on line 211 in lamindb/core/storage/_backed_access.py

View check run for this annotation

Codecov / codecov/patch

lamindb/core/storage/_backed_access.py#L211

Added line #L211 was not covered by tests
Zethson marked this conversation as resolved.
Show resolved Hide resolved
"Encountered an exception while attempting to subset a sparse dataset by indices."
Zethson marked this conversation as resolved.
Show resolved Hide resolved
)
if result is None:
raise ValueError(
"Can not get a subset of the element of type"
Expand Down Expand Up @@ -306,7 +308,9 @@
ds = CSRDataset(elem)
return _subset_sparse(ds, indices)
except Exception:
pass
logger.debug(

Check warning on line 311 in lamindb/core/storage/_backed_access.py

View check run for this annotation

Codecov / codecov/patch

lamindb/core/storage/_backed_access.py#L311

Added line #L311 was not covered by tests
"Encountered an exception while attempting to subset a sparse dataset by indices."
)
raise ValueError(
"Can not get a subset of the element of type"
f" {type(elem).__name__} with an empty spec."
Expand Down
3 changes: 2 additions & 1 deletion lamindb/core/storage/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ def auto_storage_key_from_artifact(artifact: Artifact):


def auto_storage_key_from_artifact_uid(uid: str, suffix: str, is_dir: bool) -> str:
assert isinstance(suffix, str)
if not isinstance(suffix, str):
raise TypeError(f"Suffix must be of type str but was {type(suffix)}.")
if is_dir:
uid_storage = uid[:16] # 16 chars, leave 4 chars for versioning
else:
Expand Down
7 changes: 5 additions & 2 deletions lamindb/integrations/_vitessce.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,12 @@
if "url" not in file:
raise ValueError("Each file must have a 'url' key.")
filename = file["url"].split("/")[-1]
assert filename.endswith(
if not filename.endswith(

Check warning on line 42 in lamindb/integrations/_vitessce.py

View check run for this annotation

Codecov / codecov/patch

lamindb/integrations/_vitessce.py#L42

Added line #L42 was not covered by tests
Zethson marked this conversation as resolved.
Show resolved Hide resolved
(".anndata.zarr", ".spatialdata.zarr", ".ome.zarr")
)
):
raise ValueError(

Check warning on line 45 in lamindb/integrations/_vitessce.py

View check run for this annotation

Codecov / codecov/patch

lamindb/integrations/_vitessce.py#L45

Added line #L45 was not covered by tests
"filename must end with '.anndata.zarr', '.spatialdata.zarr', or '.ome.zarr'."
)
filestem = (
filename.replace(".anndata.zarr", "")
.replace(".spatialdata.zarr", "")
Expand Down
Loading
Loading