Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🎨 Fix validator #1513

Merged
merged 4 commits into from
Mar 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 2 additions & 22 deletions docs/meta.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -47,31 +47,11 @@
"metadata": {},
"outputs": [],
"source": [
"import lamindb as ln"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "58f1ae0e",
"metadata": {},
"outputs": [],
"source": [
"import lamindb as ln\n",
"\n",
"ln.settings.verbosity = \"info\""
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "60925083",
"metadata": {},
"outputs": [],
"source": [
"ln.settings.transform.stem_uid = \"vldHzF3aTAiW\"\n",
"ln.settings.transform.version = \"1\"\n",
"ln.track()"
]
},
{
"cell_type": "markdown",
"id": "d832785f",
Expand Down
13 changes: 9 additions & 4 deletions lamindb/validation/_anndata_validator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Dict, Optional

import anndata as ad
from lamin_utils import logger
from lnschema_core.types import FieldAttr
from pandas.core.api import DataFrame as DataFrame

Expand Down Expand Up @@ -43,7 +44,7 @@
**kwargs,
)
self._obs_fields = obs_fields
self._fields = {"variables": var_field, **obs_fields}
self._register_variables()

@property
def var_field(self) -> FieldAttr:
Expand Down Expand Up @@ -75,9 +76,6 @@
kwargs=self._kwargs,
)

def register_features(self, validated_only: bool = True, **kwargs) -> None:
self._register_variables(validated_only=validated_only, **kwargs)

def validate(self, **kwargs) -> bool:
"""Validate variables and categorical observations."""
self._add_kwargs(**kwargs)
Expand All @@ -90,6 +88,13 @@

return self._validated

def register_labels(self, feature: str, validated_only: bool = True, **kwargs):
"""Register labels for a feature."""
if feature == "variables":
self._register_variables(validated_only=validated_only, **kwargs)

Check warning on line 94 in lamindb/validation/_anndata_validator.py

View check run for this annotation

Codecov / codecov/patch

lamindb/validation/_anndata_validator.py#L93-L94

Added lines #L93 - L94 were not covered by tests
else:
super().register_labels(feature, validated_only, **kwargs)

Check warning on line 96 in lamindb/validation/_anndata_validator.py

View check run for this annotation

Codecov / codecov/patch

lamindb/validation/_anndata_validator.py#L96

Added line #L96 was not covered by tests

def register_artifact(
self,
description: str,
Expand Down
39 changes: 29 additions & 10 deletions lamindb/validation/_register.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@

organism = kwargs.pop("organism", None)
feature_kwargs: Dict = {}
if check_if_registry_needs_organism(feature_field.field.model, organism):
organism = check_if_registry_needs_organism(feature_field.field.model, organism)
if organism is not None:
feature_kwargs["organism"] = organism

if isinstance(data, ad.AnnData):
Expand All @@ -51,7 +52,8 @@
feature = features.get(feature_name)
registry = field.field.model
filter_kwargs = kwargs.copy()
if check_if_registry_needs_organism(registry, organism):
organism = check_if_registry_needs_organism(registry, organism)
if organism is not None:
filter_kwargs["organism"] = organism
df = data.obs if isinstance(data, ad.AnnData) else data
labels = registry.from_values(df[feature_name], field=field, **filter_kwargs)
Expand Down Expand Up @@ -85,13 +87,17 @@
kwargs: Additional keyword arguments to pass to the registry model.
df: A DataFrame to register labels from.
"""
if kwargs is None:
kwargs = {}
filter_kwargs = {} if kwargs is None else kwargs.copy()
registry = field.field.model
if not hasattr(registry, "public"):
validated_only = False

check_if_registry_needs_organism(registry, kwargs.get("organism"))
organism = filter_kwargs.pop("organism", None)
organism = check_if_registry_needs_organism(registry, organism)
# TODO: use organism record here
if organism is not None:
filter_kwargs["organism"] = organism

verbosity = ln.settings.verbosity
try:
ln.settings.verbosity = "error"
Expand All @@ -115,12 +121,12 @@
inspect_result_current.non_validated,
field=field,
using=using,
kwargs=kwargs,
kwargs=filter_kwargs,
)

# for labels that are not registered in the using instance, register them in the current instance
from_values_records = (
registry.from_values(non_validated_labels, field=field, **kwargs)
registry.from_values(non_validated_labels, field=field, **filter_kwargs)
if len(non_validated_labels) > 0
else []
)
Expand All @@ -143,12 +149,14 @@

else:
non_validated_records = []
if "organism" in filter_kwargs:
filter_kwargs["organism"] = _register_organism(name=organism)

Check warning on line 153 in lamindb/validation/_register.py

View check run for this annotation

Codecov / codecov/patch

lamindb/validation/_register.py#L153

Added line #L153 was not covered by tests
for value in labels_registered["without reference"]:
kwargs[field.field.name] = value
filter_kwargs[field.field.name] = value
if registry == ln.Feature:
kwargs["type"] = "category"
filter_kwargs["type"] = "category"

Check warning on line 157 in lamindb/validation/_register.py

View check run for this annotation

Codecov / codecov/patch

lamindb/validation/_register.py#L157

Added line #L157 was not covered by tests
# register non-validated labels
non_validated_records.append(registry(**kwargs))
non_validated_records.append(registry(**filter_kwargs))
ln.save(non_validated_records)

# for ulabels, also register a parent label: is_{feature_name}
Expand Down Expand Up @@ -244,3 +252,14 @@
labels_registered.append(getattr(label_using, field.field.name))
not_registered = inspect_result_using.non_validated
return labels_registered, not_registered


def _register_organism(name: str):
"""Register an organism record."""
import bionty as bt

Check warning on line 259 in lamindb/validation/_register.py

View check run for this annotation

Codecov / codecov/patch

lamindb/validation/_register.py#L259

Added line #L259 was not covered by tests

organism = bt.Organism.filter(name=name).one_or_none()
if organism is None:
organism = bt.Organism.from_public(name=name)
organism.save()
return organism

Check warning on line 265 in lamindb/validation/_register.py

View check run for this annotation

Codecov / codecov/patch

lamindb/validation/_register.py#L261-L265

Added lines #L261 - L265 were not covered by tests
11 changes: 6 additions & 5 deletions lamindb/validation/_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@ def check_if_registry_needs_organism(
):
"""Check if a registry needs an organism."""
if hasattr(registry, "organism_id"):
if organism is None:
import bionty as bt

if organism is None and bt.settings.organism is None:
raise ValueError(
f"{registry.__name__} registry requires an organism!\n"
" → please pass an organism name via organism="
)
else:
return True
else:
return False
return organism or bt.settings.organism


def validate_categories(
Expand All @@ -50,7 +50,8 @@ def validate_categories(
registry = field.field.model
filter_kwargs = {} # type: Dict[str, str]
organism = kwargs.get("organism")
if check_if_registry_needs_organism(registry, organism):
organism = check_if_registry_needs_organism(registry, organism)
if organism is not None:
filter_kwargs["organism"] = organism
# inspect the default instance
inspect_result = registry.inspect(values, field=field, mute=True, **filter_kwargs)
Expand Down
6 changes: 4 additions & 2 deletions lamindb/validation/_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
if None (default), the lookup is performed on the instance specified in "using" parameter of the Validator.
if "public", the lookup is performed on the public reference.
"""
fields = {**{"feature": ln.Feature.name}, **self.fields}
fields = {**{"feature": self._feature_field}, **self.fields}
return Lookup(fields=fields, using=using or self._using)

def register_features(self, validated_only: bool = True) -> None:
Expand Down Expand Up @@ -112,7 +112,7 @@
)

def register_labels(self, feature: str, validated_only: bool = True, **kwargs):
"""Register labels records.
"""Register labels for a feature.

Args:
feature: The name of the feature to register.
Expand All @@ -121,6 +121,8 @@
"""
if feature == "all":
self._register_labels_all(validated_only=validated_only, **kwargs)
elif feature == "feature":
self.register_features(validated_only=validated_only)

Check warning on line 125 in lamindb/validation/_validator.py

View check run for this annotation

Codecov / codecov/patch

lamindb/validation/_validator.py#L125

Added line #L125 was not covered by tests
else:
if feature not in self.fields:
raise ValueError(f"feature {feature} is not part of the fields!")
Expand Down
Loading