From 6c3c6c6c7d354f13be66c8b0c0023c280a5206d9 Mon Sep 17 00:00:00 2001 From: Sunny Sun <38218185+sunnyosun@users.noreply.github.com> Date: Wed, 21 Aug 2024 16:44:46 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Allow=20add=20source=20to=20entity?= =?UTF-8?q?=20(#1824)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lamindb/_can_validate.py | 8 ++++++-- lamindb/_from_values.py | 24 ++++++++++++++---------- sub/bionty | 2 +- tests/test_uid.py | 26 +++++++++++++++++++++----- 4 files changed, 42 insertions(+), 18 deletions(-) diff --git a/lamindb/_can_validate.py b/lamindb/_can_validate.py index 99a76d79e..4899ad44d 100644 --- a/lamindb/_can_validate.py +++ b/lamindb/_can_validate.py @@ -332,7 +332,9 @@ def _standardize( # here, we can safely import bionty from bionty._bionty import create_or_get_organism_record - organism_record = create_or_get_organism_record(organism=organism, orm=registry) + organism_record = create_or_get_organism_record( + organism=organism, registry=registry + ) organism = ( organism_record.name if organism_record is not None else organism_record ) @@ -514,7 +516,9 @@ def _filter_query_based_on_organism( # here, we can safely import bionty from bionty._bionty import create_or_get_organism_record - organism_record = create_or_get_organism_record(organism=organism, orm=registry) + organism_record = create_or_get_organism_record( + organism=organism, registry=registry + ) if organism_record is not None: queryset = queryset.filter(organism__name=organism_record.name) diff --git a/lamindb/_from_values.py b/lamindb/_from_values.py index 1232b4128..7fa8b03bc 100644 --- a/lamindb/_from_values.py +++ b/lamindb/_from_values.py @@ -25,9 +25,9 @@ def get_or_create_records( mute: bool = False, ) -> list[Record]: """Get or create records from iterables.""" - Record = field.field.model + registry = field.field.model if create: - return [Record(**{field.field.name: value}) for value in iterable] + return [registry(**{field.field.name: value}) for value in iterable] creation_search_names = settings.creation.search_names feature: Feature = None organism = _get_organism_record(field, organism) @@ -57,21 +57,23 @@ def get_or_create_records( and records[0].source_id ): source_record = records[0].source - if not source_record and hasattr(Record, "public"): + if not source_record and hasattr(registry, "public"): from bionty._bionty import get_source_record - source_record = get_source_record(Record.public(organism=organism)) + source_record = get_source_record( + registry.public(organism=organism), registry + ) if source_record: from bionty.core._add_ontology import check_source_in_db check_source_in_db( - registry=Record, + registry=registry, source=source_record, update=True, ) from_source = not source_record.in_db - elif hasattr(Record, "source_id"): + elif hasattr(registry, "source_id"): from_source = True else: from_source = False @@ -97,14 +99,14 @@ def get_or_create_records( logger.success(msg) s = "" if len(unmapped_values) == 1 else "s" print_values = colors.yellow(_print_values(unmapped_values)) - name = Record.__name__ + name = registry.__name__ n_nonval = colors.yellow(f"{len(unmapped_values)} non-validated") if not mute: logger.warning( f"{colors.red('did not create')} {name} record{s} for " f"{n_nonval} {colors.italic(f'{field.field.name}{s}')}: {print_values}" ) - if Record.__module__.startswith("bionty.") or Record == ULabel: + if registry.__get_schema_name__() == "bionty" or registry == ULabel: if isinstance(iterable, pd.Series): feature = iterable.name feature_name = None @@ -230,7 +232,7 @@ def create_records_from_source( # for custom records that are not created from public sources return records, iterable_idx # add source record to the kwargs - source_record = get_source_record(public_ontology) + source_record = get_source_record(public_ontology, model) kwargs.update({"source": source_record}) # filter the columns in bionty df based on fields @@ -373,6 +375,8 @@ def _get_organism_record( if _has_organism_field(registry) and check: from bionty._bionty import create_or_get_organism_record - organism_record = create_or_get_organism_record(organism=organism, orm=registry) + organism_record = create_or_get_organism_record( + organism=organism, registry=registry + ) if organism_record is not None: return organism_record diff --git a/sub/bionty b/sub/bionty index 8bb3e013e..4457edb7d 160000 --- a/sub/bionty +++ b/sub/bionty @@ -1 +1 @@ -Subproject commit 8bb3e013e9c60ea6e2241076581fd305425d7ddf +Subproject commit 4457edb7ddde6e2e1edf54034ac868acb4bf6c2c diff --git a/tests/test_uid.py b/tests/test_uid.py index 02e44af49..3f8c6a188 100644 --- a/tests/test_uid.py +++ b/tests/test_uid.py @@ -1,18 +1,14 @@ import bionty as bt -import lamindb as ln -import pytest from bionty._bionty import encode_uid -def test_lb_encode_uid(): +def test_bionty_encode_uid(): assert ( encode_uid(bt.Gene, {"ensembl_gene_id": "ENSG00000081059", "symbol": "TCF7"})[ "uid" ] == "7IkHKPl0ScQR" ) - with pytest.raises(AssertionError): - encode_uid(bt.Organism, {"ensembl_gene_id": "ENSG00000081059"}) assert encode_uid(bt.CellType, {"ontology_id": "CL:0000084"})["uid"] == "22LvKd01" assert ( encode_uid(bt.Organism, {"ontology_id": "NCBITaxon:9606", "name": "human"})[ @@ -21,3 +17,23 @@ def test_lb_encode_uid(): == "1dpCL6Td" ) assert encode_uid(bt.Organism, {"name": "human"})["uid"] == "4gQdjtxb" + assert ( + encode_uid( + bt.Source, + { + "entity": "Source", + "name": "ensembl", + "version": "release-112", + "organism": "vertebrates", + }, + )["uid"] + == "5MUN" + ) + bt.settings.organism = "human" + assert ( + encode_uid(bt.CellMarker, {"name": "test", "organism": bt.settings.organism})[ + "uid" + ] + == "2dZ52W9noUDK" + ) + bt.settings.organism.delete()