Skip to content

Commit

Permalink
Merge branch 'main' into mudata-type-checking
Browse files Browse the repository at this point in the history
  • Loading branch information
sunnyosun authored Jul 17, 2024
2 parents 7aa1ac5 + 30c3ead commit 4283a54
Show file tree
Hide file tree
Showing 22 changed files with 107 additions and 58 deletions.
4 changes: 2 additions & 2 deletions lamindb/_annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -1085,7 +1085,7 @@ def save_artifact(

artifact = None
if data_is_anndata(data):
assert adata is not None
assert adata is not None # noqa: S101
artifact = Artifact.from_anndata(data, description=description, **kwargs)
artifact.n_observations = adata.shape[0]
data = adata
Expand Down Expand Up @@ -1306,7 +1306,7 @@ def log_saved_labels(
def save_ulabels_with_parent(values: list[str], field: FieldAttr, key: str) -> None:
"""Save a parent label for the given labels."""
registry = field.field.model
assert registry == ULabel
assert registry == ULabel # noqa: S101
all_records = registry.from_values(values, field=field)
is_feature = registry.filter(name=f"is_{key}").one_or_none()
if is_feature is None:
Expand Down
6 changes: 3 additions & 3 deletions lamindb/_artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,7 +424,7 @@ def log_storage_hint(
logger.hint(hint)


def data_is_anndata(data: AnnData | UPathStr):
def data_is_anndata(data: AnnData | UPathStr) -> bool:
if isinstance(data, AnnData):
return True
if isinstance(data, (str, Path, UPath)):
Expand All @@ -444,7 +444,7 @@ def data_is_anndata(data: AnnData | UPathStr):
return False


def data_is_mudata(data: MuData | UPathStr):
def data_is_mudata(data: MuData | UPathStr) -> bool:
if _mudata_is_installed():
from mudata import MuData

Expand Down Expand Up @@ -948,7 +948,7 @@ def delete(
)
delete_record = response == "y"
else:
assert permanent
assert permanent # noqa: S101
delete_record = True

if delete_record:
Expand Down
6 changes: 4 additions & 2 deletions lamindb/_can_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,8 +223,10 @@ def set_abbr(self, value: str):
else:
try:
self.add_synonym(value, save=False)
except Exception: # pragma: no cover
pass
except Exception as e: # pragma: no cover
logger.debug(
f"Encountered an Exception while attempting to add synonyms.\n{e}"
)

if not self._state.adding:
self.save()
Expand Down
2 changes: 1 addition & 1 deletion lamindb/_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def __init__(
else:
if not hasattr(artifacts, "__getitem__"):
raise ValueError("Artifact or List[Artifact] is allowed.")
assert isinstance(artifacts[0], Artifact) # type: ignore
assert isinstance(artifacts[0], Artifact) # type: ignore # noqa: S101
hash, feature_sets = from_artifacts(artifacts) # type: ignore
if meta is not None:
if not isinstance(meta, Artifact):
Expand Down
2 changes: 1 addition & 1 deletion lamindb/_feature.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ def from_df(cls, df: pd.DataFrame, field: FieldAttr | None = None) -> RecordsLis
finally:
settings.verbosity = verbosity

assert len(features) == len(df.columns)
assert len(features) == len(df.columns) # noqa: S101

# if len(categoricals_with_unmapped_categories) > 0:
# n_max = 20
Expand Down
24 changes: 18 additions & 6 deletions lamindb/_finish.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,10 @@ def finish() -> None:
if run_context.run is None:
raise TrackNotCalled("Please run `ln.track()` before `ln.finish()`")
if run_context.path is None:
assert run_context.transform.type not in {"script", "notebook"}
if run_context.transform.type in {"script", "notebook"}:
raise ValueError(
f"Transform type must be 'script' or 'notebook' but is {run_context.transform.type}."
)
run_context.run.finished_at = datetime.now(timezone.utc)
run_context.run.save()
# nothing else to do
Expand Down Expand Up @@ -107,9 +110,14 @@ def save_run_context_core(
# convert the notebook file to html
# log_level is set to 40 to silence the nbconvert logging
subprocess.run(
"jupyter nbconvert --to html"
f" '{filepath.as_posix()}' --Application.log_level=40",
shell=True,
[
"jupyter",
"nbconvert",
"--to",
"html",
filepath.as_posix(),
"--Application.log_level=40",
],
check=True,
)
# move the temporary file into the cache dir in case it's accidentally
Expand All @@ -129,8 +137,12 @@ def save_run_context_core(
source_code_path = ln_setup.settings.storage.cache_dir / filepath.name
shutil.copy2(filepath, source_code_path) # copy
subprocess.run(
f"nbstripout '{source_code_path}' --extra-keys='metadata.version metadata.kernelspec metadata.language_info metadata.pygments_lexer metadata.name metadata.file_extension'",
shell=True,
[
"nbstripout",
source_code_path,
"--extra-keys",
"metadata.version metadata.kernelspec metadata.language_info metadata.pygments_lexer metadata.name metadata.file_extension",
],
check=True,
)
# find initial versions of source codes and html reports
Expand Down
10 changes: 5 additions & 5 deletions lamindb/core/_feature_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ def _add_values(
if isinstance(keys, DICT_KEYS_TYPE):
keys = list(keys) # type: ignore
# deal with other cases later
assert all(isinstance(key, str) for key in keys)
assert all(isinstance(key, str) for key in keys) # noqa: S101
registry = feature_param_field.field.model
is_param = registry == Param
model = Param if is_param else Feature
Expand Down Expand Up @@ -668,10 +668,10 @@ def _add_set_from_df(
):
"""Add feature set corresponding to column names of DataFrame."""
if isinstance(self._host, Artifact):
assert self._host.accessor == "DataFrame"
assert self._host.accessor == "DataFrame" # noqa: S101
else:
# Collection
assert self._host.artifact.accessor == "DataFrame"
assert self._host.artifact.accessor == "DataFrame" # noqa: S101

# parse and register features
registry = field.field.model
Expand Down Expand Up @@ -699,7 +699,7 @@ def _add_set_from_anndata(
):
"""Add features from AnnData."""
if isinstance(self._host, Artifact):
assert self._host.accessor == "AnnData"
assert self._host.accessor == "AnnData" # noqa: S101
else:
raise NotImplementedError()

Expand Down Expand Up @@ -729,7 +729,7 @@ def _add_set_from_mudata(
if obs_fields is None:
obs_fields = {}
if isinstance(self._host, Artifact):
assert self._host.accessor == "MuData"
assert self._host.accessor == "MuData" # noqa: S101
else:
raise NotImplementedError()

Expand Down
4 changes: 2 additions & 2 deletions lamindb/core/_label_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def print_labels(self: HasFeatures, field: str = "name", print_types: bool = Fal
print_values = _print_values(labels_list, n=10)
type_str = f": {related_model}" if print_types else ""
labels_msg += f" .{related_name}{type_str} = {print_values}\n"
except Exception:
except Exception: # noqa: S112
continue
msg = ""
if labels_msg:
Expand Down Expand Up @@ -102,7 +102,7 @@ def validate_labels_registry(
records = registry.from_values(label_uids, field=field)
if len(records) > 0:
save(records, parents=parents)
except Exception:
except Exception: # noqa S110
pass
field = "uid"
label_uids = np.array(
Expand Down
5 changes: 4 additions & 1 deletion lamindb/core/_mapped_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,10 @@ def __init__(
parallel: bool = False,
dtype: str | None = None,
):
assert join in {None, "inner", "outer"}
if join not in {None, "inner", "outer"}: # pragma: nocover
raise ValueError(
f"join must be one of None, 'inner, or 'outer' but was {type(join)}"
)

if layers_keys is None:
self.layers_keys = ["X"]
Expand Down
4 changes: 2 additions & 2 deletions lamindb/core/_run_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def get_uid_ext(version: str) -> str:
# merely zero-padding the nbproject version such that the base62 encoding is
# at least 4 characters long doesn't yields sufficiently diverse hashes and
# leads to collisions; it'd be nice because the uid_ext would be ordered
return encodebytes(hashlib.md5(version.encode()).digest())[:4]
return encodebytes(hashlib.md5(version.encode()).digest())[:4] # noqa: S324


def update_stem_uid_or_version(
Expand Down Expand Up @@ -113,7 +113,7 @@ def get_notebook_name_colab() -> str:

ip = gethostbyname(gethostname()) # 172.28.0.12
try:
name = get(f"http://{ip}:9000/api/sessions").json()[0]["name"]
name = get(f"http://{ip}:9000/api/sessions").json()[0]["name"] # noqa: S113
except Exception:
logger.warning(
"could not get notebook name from Google Colab, using: notebook.ipynb"
Expand Down
3 changes: 2 additions & 1 deletion lamindb/core/_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ def sync_git_repo(self, value) -> None:
For example: `ln.sync_git_repo = https://github.com/laminlabs/redun-lamin`
"""
self._sync_git_repo = sanitize_git_repo_url(value)
assert self._sync_git_repo.startswith("https://")
if not self._sync_git_repo.startswith("https://"): # pragma: nocover
raise ValueError("git repository URL must start with 'https://'.")

@property
def storage(self) -> StorageSettings:
Expand Down
35 changes: 21 additions & 14 deletions lamindb/core/_sync_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@ def get_git_repo_from_remote() -> Path:
f"running outside of synched git repo, cloning {repo_url} into {repo_dir}"
)
result = subprocess.run(
f"git clone --depth 10 {repo_url}.git",
shell=True,
["git", "clone", "--depth", "10", f"{repo_url}.git"],
capture_output=True,
cwd=setup_settings.storage.cache_dir,
)
Expand All @@ -36,8 +35,7 @@ def get_git_repo_from_remote() -> Path:

def check_local_git_repo() -> bool:
result = subprocess.run(
"git config --get remote.origin.url",
shell=True,
["git", "config", "--get remote.origin.url"],
capture_output=True,
)
result_str = result.stdout.decode().strip()
Expand All @@ -55,10 +53,9 @@ def check_local_git_repo() -> bool:


def get_git_commit_hash(blob_hash: str, repo_dir: Path | None = None) -> str | None:
command = f"git log --find-object={blob_hash} --pretty=format:%H"
command = ["git", "log", f"--find-object={blob_hash}", "--pretty=format:%H"]
result = subprocess.run(
command,
shell=True,
capture_output=True,
cwd=repo_dir,
)
Expand All @@ -68,9 +65,6 @@ def get_git_commit_hash(blob_hash: str, repo_dir: Path | None = None) -> str | N
if commit_hash == "" or result.returncode == 1:
return None
else:
assert (
len(commit_hash) == 40
), f"commit hash |{commit_hash}| is not 40 characters long"
return commit_hash


Expand All @@ -82,21 +76,34 @@ def get_filepath_within_git_repo(
# from anywhere in the repo, hence, let's get the root
repo_root = (
subprocess.run(
"git rev-parse --show-toplevel",
shell=True,
["git", "rev-parse", "--show-toplevel"],
capture_output=True,
cwd=repo_dir,
)
.stdout.decode()
.strip()
)
command = f"git ls-tree -r {commit_hash} | grep -E {blob_hash}"
# Run the git commands separately to circumvent spawning a shell
git_command = ["git", "ls-tree", "-r", commit_hash]
git_process = subprocess.Popen(
git_command,
stdout=subprocess.PIPE,
cwd=repo_root,
)

grep_command = ["grep", "-E", blob_hash]
result = subprocess.run(
command,
shell=True,
grep_command,
stdin=git_process.stdout,
capture_output=True,
cwd=repo_root,
)

# Close the stdout to allow git_process to receive a SIGPIPE if grep_command exits
git_process.stdout.close()
git_process.wait()

command = " ".join(git_command) + " | " + " ".join(grep_command)
if result.returncode != 0 and result.stderr.decode() != "":
raise RuntimeError(f"{command}\n{result.stderr.decode()}")
if len(result.stdout.decode()) == 0:
Expand Down
6 changes: 5 additions & 1 deletion lamindb/core/_track_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,11 @@ def track_environment(run: Run) -> None:
# create a requirements.txt
# we don't create a conda environment.yml mostly for its slowness
try:
result = subprocess.run(f"pip freeze > {str(filepath)}", shell=True)
with open(filepath, "w") as f:
result = subprocess.run(
["pip", "freeze"],
stdout=f,
)
except OSError as e:
result = None
logger.warning(f"could not run pip freeze with error {e}")
Expand Down
12 changes: 8 additions & 4 deletions lamindb/core/storage/_backed_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,10 @@ def safer_read_partial(elem, indices):
try:
ds = CSRDataset(elem)
result = _subset_sparse(ds, indices)
except Exception:
pass
except Exception as e:
logger.debug(
f"Encountered an exception while attempting to subset a sparse dataset by indices.\n{e}"
)
if result is None:
raise ValueError(
"Can not get a subset of the element of type"
Expand Down Expand Up @@ -307,8 +309,10 @@ def safer_read_partial(elem, indices): # noqa
try:
ds = CSRDataset(elem)
return _subset_sparse(ds, indices)
except Exception:
pass
except Exception as e:
logger.debug(
f"Encountered an exception while attempting to subset a sparse dataset by indices.\n{e}"
)
raise ValueError(
"Can not get a subset of the element of type"
f" {type(elem).__name__} with an empty spec."
Expand Down
2 changes: 1 addition & 1 deletion lamindb/core/storage/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def auto_storage_key_from_artifact(artifact: Artifact):


def auto_storage_key_from_artifact_uid(uid: str, suffix: str, is_dir: bool) -> str:
assert isinstance(suffix, str)
assert isinstance(suffix, str) # noqa: S101 Suffix cannot be None.
if is_dir:
uid_storage = uid[:16] # 16 chars, leave 4 chars for versioning
else:
Expand Down
7 changes: 5 additions & 2 deletions lamindb/integrations/_vitessce.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,12 @@ def save_vitessce_config(vitessce_config: VitessceConfig, description: str) -> A
if "url" not in file:
raise ValueError("Each file must have a 'url' key.")
filename = file["url"].split("/")[-1]
assert filename.endswith(
if not filename.endswith(
(".anndata.zarr", ".spatialdata.zarr", ".ome.zarr")
)
):
raise ValueError(
"filename must end with '.anndata.zarr', '.spatialdata.zarr', or '.ome.zarr'."
)
filestem = (
filename.replace(".anndata.zarr", "")
.replace(".spatialdata.zarr", "")
Expand Down
Loading

0 comments on commit 4283a54

Please sign in to comment.