Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ Enable Ruff security rules (bandit) #1686

Merged
merged 24 commits into from
Jul 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions lamindb/_annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -1081,7 +1081,7 @@ def save_artifact(

artifact = None
if data_is_anndata(data):
assert adata is not None
assert adata is not None # noqa: S101
artifact = Artifact.from_anndata(data, description=description, **kwargs)
artifact.n_observations = adata.shape[0]
data = adata
Expand Down Expand Up @@ -1302,7 +1302,7 @@ def log_saved_labels(
def save_ulabels_with_parent(values: list[str], field: FieldAttr, key: str) -> None:
"""Save a parent label for the given labels."""
registry = field.field.model
assert registry == ULabel
assert registry == ULabel # noqa: S101
all_records = registry.from_values(values, field=field)
is_feature = registry.filter(name=f"is_{key}").one_or_none()
if is_feature is None:
Expand Down
6 changes: 3 additions & 3 deletions lamindb/_artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ def log_storage_hint(
logger.hint(hint)


def data_is_anndata(data: AnnData | UPathStr):
def data_is_anndata(data: AnnData | UPathStr) -> bool:
if isinstance(data, AnnData):
return True
if isinstance(data, (str, Path, UPath)):
Expand All @@ -444,7 +444,7 @@ def data_is_anndata(data: AnnData | UPathStr):
return False


def data_is_mudata(data: MuData | UPathStr):
def data_is_mudata(data: MuData | UPathStr) -> bool:
if _mudata_is_installed():
from mudata import MuData

Expand Down Expand Up @@ -948,7 +948,7 @@ def delete(
)
delete_record = response == "y"
else:
assert permanent
assert permanent # noqa: S101
delete_record = True

if delete_record:
Expand Down
6 changes: 4 additions & 2 deletions lamindb/_can_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,10 @@
else:
try:
self.add_synonym(value, save=False)
except Exception: # pragma: no cover
pass
except Exception as e: # pragma: no cover
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sunnyosun - why do we need this? This seems like it should error.

logger.debug(

Check warning on line 227 in lamindb/_can_validate.py

View check run for this annotation

Codecov / codecov/patch

lamindb/_can_validate.py#L226-L227

Added lines #L226 - L227 were not covered by tests
f"Encountered an Exception while attempting to add synonyms.\n{e}"
)

if not self._state.adding:
self.save()
Expand Down
2 changes: 1 addition & 1 deletion lamindb/_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def __init__(
else:
if not hasattr(artifacts, "__getitem__"):
raise ValueError("Artifact or List[Artifact] is allowed.")
assert isinstance(artifacts[0], Artifact) # type: ignore
assert isinstance(artifacts[0], Artifact) # type: ignore # noqa: S101
hash, feature_sets = from_artifacts(artifacts) # type: ignore
if meta is not None:
if not isinstance(meta, Artifact):
Expand Down
2 changes: 1 addition & 1 deletion lamindb/_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def from_df(cls, df: pd.DataFrame, field: FieldAttr | None = None) -> RecordsLis
finally:
settings.verbosity = verbosity

assert len(features) == len(df.columns)
assert len(features) == len(df.columns) # noqa: S101

# if len(categoricals_with_unmapped_categories) > 0:
# n_max = 20
Expand Down
24 changes: 18 additions & 6 deletions lamindb/_finish.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@
if run_context.run is None:
raise TrackNotCalled("Please run `ln.track()` before `ln.finish()`")
if run_context.path is None:
assert run_context.transform.type not in {"script", "notebook"}
if run_context.transform.type in {"script", "notebook"}:
raise ValueError(

Check warning on line 45 in lamindb/_finish.py

View check run for this annotation

Codecov / codecov/patch

lamindb/_finish.py#L45

Added line #L45 was not covered by tests
f"Transform type must be 'script' or 'notebook' but is {run_context.transform.type}."
)
Zethson marked this conversation as resolved.
Show resolved Hide resolved
run_context.run.finished_at = datetime.now(timezone.utc)
run_context.run.save()
# nothing else to do
Expand Down Expand Up @@ -107,9 +110,14 @@
# convert the notebook file to html
# log_level is set to 40 to silence the nbconvert logging
subprocess.run(
"jupyter nbconvert --to html"
f" '{filepath.as_posix()}' --Application.log_level=40",
shell=True,
[
"jupyter",
"nbconvert",
"--to",
"html",
filepath.as_posix(),
"--Application.log_level=40",
],
check=True,
)
# move the temporary file into the cache dir in case it's accidentally
Expand All @@ -129,8 +137,12 @@
source_code_path = ln_setup.settings.storage.cache_dir / filepath.name
shutil.copy2(filepath, source_code_path) # copy
subprocess.run(
f"nbstripout '{source_code_path}' --extra-keys='metadata.version metadata.kernelspec metadata.language_info metadata.pygments_lexer metadata.name metadata.file_extension'",
shell=True,
[
"nbstripout",
source_code_path,
"--extra-keys",
"metadata.version metadata.kernelspec metadata.language_info metadata.pygments_lexer metadata.name metadata.file_extension",
],
check=True,
)
# find initial versions of source codes and html reports
Expand Down
10 changes: 5 additions & 5 deletions lamindb/core/_feature_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@
if isinstance(keys, DICT_KEYS_TYPE):
keys = list(keys) # type: ignore
# deal with other cases later
assert all(isinstance(key, str) for key in keys)
assert all(isinstance(key, str) for key in keys) # noqa: S101
registry = feature_param_field.field.model
is_param = registry == Param
model = Param if is_param else Feature
Expand Down Expand Up @@ -668,10 +668,10 @@
):
"""Add feature set corresponding to column names of DataFrame."""
if isinstance(self._host, Artifact):
assert self._host.accessor == "DataFrame"
assert self._host.accessor == "DataFrame" # noqa: S101
else:
# Collection
assert self._host.artifact.accessor == "DataFrame"
assert self._host.artifact.accessor == "DataFrame" # noqa: S101

Check warning on line 674 in lamindb/core/_feature_manager.py

View check run for this annotation

Codecov / codecov/patch

lamindb/core/_feature_manager.py#L674

Added line #L674 was not covered by tests

# parse and register features
registry = field.field.model
Expand Down Expand Up @@ -699,7 +699,7 @@
):
"""Add features from AnnData."""
if isinstance(self._host, Artifact):
assert self._host.accessor == "AnnData"
assert self._host.accessor == "AnnData" # noqa: S101
else:
raise NotImplementedError()

Expand Down Expand Up @@ -729,7 +729,7 @@
if obs_fields is None:
obs_fields = {}
if isinstance(self._host, Artifact):
assert self._host.accessor == "MuData"
assert self._host.accessor == "MuData" # noqa: S101
else:
raise NotImplementedError()

Expand Down
4 changes: 2 additions & 2 deletions lamindb/core/_label_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
print_values = _print_values(labels_list, n=10)
type_str = f": {related_model}" if print_types else ""
labels_msg += f" .{related_name}{type_str} = {print_values}\n"
except Exception:
except Exception: # noqa: S112
continue
msg = ""
if labels_msg:
Expand Down Expand Up @@ -102,7 +102,7 @@
records = registry.from_values(label_uids, field=field)
if len(records) > 0:
save(records, parents=parents)
except Exception:
except Exception: # noqa S110

Check warning on line 105 in lamindb/core/_label_manager.py

View check run for this annotation

Codecov / codecov/patch

lamindb/core/_label_manager.py#L105

Added line #L105 was not covered by tests
pass
field = "uid"
label_uids = np.array(
Expand Down
5 changes: 4 additions & 1 deletion lamindb/core/_mapped_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,10 @@
parallel: bool = False,
dtype: str | None = None,
):
assert join in {None, "inner", "outer"}
if join not in {None, "inner", "outer"}: # pragma: nocover
raise ValueError(
f"join must be one of None, 'inner, or 'outer' but was {type(join)}"

Check warning on line 112 in lamindb/core/_mapped_collection.py

View check run for this annotation

Codecov / codecov/patch

lamindb/core/_mapped_collection.py#L111-L112

Added lines #L111 - L112 were not covered by tests
)

if layers_keys is None:
self.layers_keys = ["X"]
Expand Down
4 changes: 2 additions & 2 deletions lamindb/core/_run_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def get_uid_ext(version: str) -> str:
# merely zero-padding the nbproject version such that the base62 encoding is
# at least 4 characters long doesn't yields sufficiently diverse hashes and
# leads to collisions; it'd be nice because the uid_ext would be ordered
return encodebytes(hashlib.md5(version.encode()).digest())[:4]
return encodebytes(hashlib.md5(version.encode()).digest())[:4] # noqa: S324


def update_stem_uid_or_version(
Expand Down Expand Up @@ -113,7 +113,7 @@ def get_notebook_name_colab() -> str:

ip = gethostbyname(gethostname()) # 172.28.0.12
try:
name = get(f"http://{ip}:9000/api/sessions").json()[0]["name"]
name = get(f"http://{ip}:9000/api/sessions").json()[0]["name"] # noqa: S113
except Exception:
logger.warning(
"could not get notebook name from Google Colab, using: notebook.ipynb"
Expand Down
3 changes: 2 additions & 1 deletion lamindb/core/_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ def sync_git_repo(self, value) -> None:
For example: `ln.sync_git_repo = https://github.com/laminlabs/redun-lamin`
"""
self._sync_git_repo = sanitize_git_repo_url(value)
assert self._sync_git_repo.startswith("https://")
if not self._sync_git_repo.startswith("https://"): # pragma: nocover
raise ValueError("git repository URL must start with 'https://'.")

@property
def storage(self) -> StorageSettings:
Expand Down
35 changes: 21 additions & 14 deletions lamindb/core/_sync_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ def get_git_repo_from_remote() -> Path:
f"running outside of synched git repo, cloning {repo_url} into {repo_dir}"
)
result = subprocess.run(
f"git clone --depth 10 {repo_url}.git",
shell=True,
Zethson marked this conversation as resolved.
Show resolved Hide resolved
["git", "clone", "--depth", "10", f"{repo_url}.git"],
Zethson marked this conversation as resolved.
Show resolved Hide resolved
capture_output=True,
cwd=setup_settings.storage.cache_dir,
)
Expand All @@ -36,8 +35,7 @@ def get_git_repo_from_remote() -> Path:

def check_local_git_repo() -> bool:
result = subprocess.run(
"git config --get remote.origin.url",
shell=True,
["git", "config", "--get remote.origin.url"],
capture_output=True,
)
result_str = result.stdout.decode().strip()
Expand All @@ -55,10 +53,9 @@ def check_local_git_repo() -> bool:


def get_git_commit_hash(blob_hash: str, repo_dir: Path | None = None) -> str | None:
command = f"git log --find-object={blob_hash} --pretty=format:%H"
command = ["git", "log", f"--find-object={blob_hash}", "--pretty=format:%H"]
result = subprocess.run(
command,
shell=True,
capture_output=True,
cwd=repo_dir,
)
Expand All @@ -68,9 +65,6 @@ def get_git_commit_hash(blob_hash: str, repo_dir: Path | None = None) -> str | N
if commit_hash == "" or result.returncode == 1:
return None
else:
assert (
Zethson marked this conversation as resolved.
Show resolved Hide resolved
len(commit_hash) == 40
), f"commit hash |{commit_hash}| is not 40 characters long"
return commit_hash


Expand All @@ -82,21 +76,34 @@ def get_filepath_within_git_repo(
# from anywhere in the repo, hence, let's get the root
repo_root = (
subprocess.run(
"git rev-parse --show-toplevel",
shell=True,
["git", "rev-parse", "--show-toplevel"],
capture_output=True,
cwd=repo_dir,
)
.stdout.decode()
.strip()
)
command = f"git ls-tree -r {commit_hash} | grep -E {blob_hash}"
# Run the git commands separately to circumvent spawning a shell
git_command = ["git", "ls-tree", "-r", commit_hash]
git_process = subprocess.Popen(
git_command,
stdout=subprocess.PIPE,
cwd=repo_root,
)

grep_command = ["grep", "-E", blob_hash]
result = subprocess.run(
command,
shell=True,
grep_command,
stdin=git_process.stdout,
capture_output=True,
cwd=repo_root,
)

# Close the stdout to allow git_process to receive a SIGPIPE if grep_command exits
git_process.stdout.close()
git_process.wait()

command = " ".join(git_command) + " | " + " ".join(grep_command)
if result.returncode != 0 and result.stderr.decode() != "":
raise RuntimeError(f"{command}\n{result.stderr.decode()}")
if len(result.stdout.decode()) == 0:
Expand Down
6 changes: 5 additions & 1 deletion lamindb/core/_track_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@ def track_environment(run: Run) -> None:
# create a requirements.txt
# we don't create a conda environment.yml mostly for its slowness
try:
result = subprocess.run(f"pip freeze > {str(filepath)}", shell=True)
with open(filepath, "w") as f:
result = subprocess.run(
["pip", "freeze"],
stdout=f,
)
except OSError as e:
result = None
logger.warning(f"could not run pip freeze with error {e}")
Expand Down
12 changes: 8 additions & 4 deletions lamindb/core/storage/_backed_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,10 @@
try:
ds = CSRDataset(elem)
result = _subset_sparse(ds, indices)
except Exception:
pass
except Exception as e:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this the only way this can be done, @Koncopd?

logger.debug(

Check warning on line 213 in lamindb/core/storage/_backed_access.py

View check run for this annotation

Codecov / codecov/patch

lamindb/core/storage/_backed_access.py#L212-L213

Added lines #L212 - L213 were not covered by tests
Zethson marked this conversation as resolved.
Show resolved Hide resolved
f"Encountered an exception while attempting to subset a sparse dataset by indices.\n{e}"
)
if result is None:
raise ValueError(
"Can not get a subset of the element of type"
Expand Down Expand Up @@ -307,8 +309,10 @@
try:
ds = CSRDataset(elem)
return _subset_sparse(ds, indices)
except Exception:
pass
except Exception as e:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here, @Koncopd.

logger.debug(

Check warning on line 313 in lamindb/core/storage/_backed_access.py

View check run for this annotation

Codecov / codecov/patch

lamindb/core/storage/_backed_access.py#L312-L313

Added lines #L312 - L313 were not covered by tests
f"Encountered an exception while attempting to subset a sparse dataset by indices.\n{e}"
)
raise ValueError(
"Can not get a subset of the element of type"
f" {type(elem).__name__} with an empty spec."
Expand Down
2 changes: 1 addition & 1 deletion lamindb/core/storage/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def auto_storage_key_from_artifact(artifact: Artifact):


def auto_storage_key_from_artifact_uid(uid: str, suffix: str, is_dir: bool) -> str:
assert isinstance(suffix, str)
assert isinstance(suffix, str) # noqa: S101 Suffix cannot be None.
if is_dir:
uid_storage = uid[:16] # 16 chars, leave 4 chars for versioning
else:
Expand Down
7 changes: 5 additions & 2 deletions lamindb/integrations/_vitessce.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,12 @@
if "url" not in file:
raise ValueError("Each file must have a 'url' key.")
filename = file["url"].split("/")[-1]
assert filename.endswith(
if not filename.endswith(

Check warning on line 42 in lamindb/integrations/_vitessce.py

View check run for this annotation

Codecov / codecov/patch

lamindb/integrations/_vitessce.py#L42

Added line #L42 was not covered by tests
Zethson marked this conversation as resolved.
Show resolved Hide resolved
(".anndata.zarr", ".spatialdata.zarr", ".ome.zarr")
)
):
raise ValueError(

Check warning on line 45 in lamindb/integrations/_vitessce.py

View check run for this annotation

Codecov / codecov/patch

lamindb/integrations/_vitessce.py#L45

Added line #L45 was not covered by tests
"filename must end with '.anndata.zarr', '.spatialdata.zarr', or '.ome.zarr'."
)
filestem = (
filename.replace(".anndata.zarr", "")
.replace(".spatialdata.zarr", "")
Expand Down
Loading
Loading