Skip to content

Commit

Permalink
🎨 Fix validator (#1513)
Browse files Browse the repository at this point in the history
  • Loading branch information
sunnyosun authored Mar 21, 2024
1 parent b7a9ce5 commit d6e2751
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 21 deletions.
13 changes: 9 additions & 4 deletions lamindb/validation/_anndata_validator.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Dict, Optional

import anndata as ad
from lamin_utils import logger
from lnschema_core.types import FieldAttr
from pandas.core.api import DataFrame as DataFrame

Expand Down Expand Up @@ -43,7 +44,7 @@ def __init__(
**kwargs,
)
self._obs_fields = obs_fields
self._fields = {"variables": var_field, **obs_fields}
self._register_variables()

@property
def var_field(self) -> FieldAttr:
Expand Down Expand Up @@ -75,9 +76,6 @@ def _register_variables(self, validated_only: bool = True, **kwargs):
kwargs=self._kwargs,
)

def register_features(self, validated_only: bool = True, **kwargs) -> None:
self._register_variables(validated_only=validated_only, **kwargs)

def validate(self, **kwargs) -> bool:
"""Validate variables and categorical observations."""
self._add_kwargs(**kwargs)
Expand All @@ -90,6 +88,13 @@ def validate(self, **kwargs) -> bool:

return self._validated

def register_labels(self, feature: str, validated_only: bool = True, **kwargs):
"""Register labels for a feature."""
if feature == "variables":
self._register_variables(validated_only=validated_only, **kwargs)
else:
super().register_labels(feature, validated_only, **kwargs)

def register_artifact(
self,
description: str,
Expand Down
39 changes: 29 additions & 10 deletions lamindb/validation/_register.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ def register_artifact(

organism = kwargs.pop("organism", None)
feature_kwargs: Dict = {}
if check_if_registry_needs_organism(feature_field.field.model, organism):
organism = check_if_registry_needs_organism(feature_field.field.model, organism)
if organism is not None:
feature_kwargs["organism"] = organism

if isinstance(data, ad.AnnData):
Expand All @@ -51,7 +52,8 @@ def register_artifact(
feature = features.get(feature_name)
registry = field.field.model
filter_kwargs = kwargs.copy()
if check_if_registry_needs_organism(registry, organism):
organism = check_if_registry_needs_organism(registry, organism)
if organism is not None:
filter_kwargs["organism"] = organism
df = data.obs if isinstance(data, ad.AnnData) else data
labels = registry.from_values(df[feature_name], field=field, **filter_kwargs)
Expand Down Expand Up @@ -85,13 +87,17 @@ def register_labels(
kwargs: Additional keyword arguments to pass to the registry model.
df: A DataFrame to register labels from.
"""
if kwargs is None:
kwargs = {}
filter_kwargs = {} if kwargs is None else kwargs.copy()
registry = field.field.model
if not hasattr(registry, "public"):
validated_only = False

check_if_registry_needs_organism(registry, kwargs.get("organism"))
organism = filter_kwargs.pop("organism", None)
organism = check_if_registry_needs_organism(registry, organism)
# TODO: use organism record here
if organism is not None:
filter_kwargs["organism"] = organism

verbosity = ln.settings.verbosity
try:
ln.settings.verbosity = "error"
Expand All @@ -115,12 +121,12 @@ def register_labels(
inspect_result_current.non_validated,
field=field,
using=using,
kwargs=kwargs,
kwargs=filter_kwargs,
)

# for labels that are not registered in the using instance, register them in the current instance
from_values_records = (
registry.from_values(non_validated_labels, field=field, **kwargs)
registry.from_values(non_validated_labels, field=field, **filter_kwargs)
if len(non_validated_labels) > 0
else []
)
Expand All @@ -143,12 +149,14 @@ def register_labels(

else:
non_validated_records = []
if "organism" in filter_kwargs:
filter_kwargs["organism"] = _register_organism(name=organism)
for value in labels_registered["without reference"]:
kwargs[field.field.name] = value
filter_kwargs[field.field.name] = value
if registry == ln.Feature:
kwargs["type"] = "category"
filter_kwargs["type"] = "category"
# register non-validated labels
non_validated_records.append(registry(**kwargs))
non_validated_records.append(registry(**filter_kwargs))
ln.save(non_validated_records)

# for ulabels, also register a parent label: is_{feature_name}
Expand Down Expand Up @@ -244,3 +252,14 @@ def register_labels_from_using_instance(
labels_registered.append(getattr(label_using, field.field.name))
not_registered = inspect_result_using.non_validated
return labels_registered, not_registered


def _register_organism(name: str):
"""Register an organism record."""
import bionty as bt

organism = bt.Organism.filter(name=name).one_or_none()
if organism is None:
organism = bt.Organism.from_public(name=name)
organism.save()
return organism
11 changes: 6 additions & 5 deletions lamindb/validation/_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,15 @@ def check_if_registry_needs_organism(
):
"""Check if a registry needs an organism."""
if hasattr(registry, "organism_id"):
if organism is None:
import bionty as bt

if organism is None and bt.settings.organism is None:
raise ValueError(
f"{registry.__name__} registry requires an organism!\n"
" → please pass an organism name via organism="
)
else:
return True
else:
return False
return organism or bt.settings.organism


def validate_categories(
Expand All @@ -50,7 +50,8 @@ def validate_categories(
registry = field.field.model
filter_kwargs = {} # type: Dict[str, str]
organism = kwargs.get("organism")
if check_if_registry_needs_organism(registry, organism):
organism = check_if_registry_needs_organism(registry, organism)
if organism is not None:
filter_kwargs["organism"] = organism
# inspect the default instance
inspect_result = registry.inspect(values, field=field, mute=True, **filter_kwargs)
Expand Down
6 changes: 4 additions & 2 deletions lamindb/validation/_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def lookup(self, using: Optional[str] = None) -> Lookup:
if None (default), the lookup is performed on the instance specified in "using" parameter of the Validator.
if "public", the lookup is performed on the public reference.
"""
fields = {**{"feature": ln.Feature.name}, **self.fields}
fields = {**{"feature": self._feature_field}, **self.fields}
return Lookup(fields=fields, using=using or self._using)

def register_features(self, validated_only: bool = True) -> None:
Expand Down Expand Up @@ -112,7 +112,7 @@ def register_features(self, validated_only: bool = True) -> None:
)

def register_labels(self, feature: str, validated_only: bool = True, **kwargs):
"""Register labels records.
"""Register labels for a feature.
Args:
feature: The name of the feature to register.
Expand All @@ -121,6 +121,8 @@ def register_labels(self, feature: str, validated_only: bool = True, **kwargs):
"""
if feature == "all":
self._register_labels_all(validated_only=validated_only, **kwargs)
elif feature == "feature":
self.register_features(validated_only=validated_only)
else:
if feature not in self.fields:
raise ValueError(f"feature {feature} is not part of the fields!")
Expand Down

0 comments on commit d6e2751

Please sign in to comment.