Skip to content

Commit

Permalink
✨ Allow add source to entity (#1824)
Browse files Browse the repository at this point in the history
  • Loading branch information
sunnyosun authored Aug 21, 2024
1 parent 754dbe8 commit 6c3c6c6
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 18 deletions.
8 changes: 6 additions & 2 deletions lamindb/_can_validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,7 +332,9 @@ def _standardize(
# here, we can safely import bionty
from bionty._bionty import create_or_get_organism_record

organism_record = create_or_get_organism_record(organism=organism, orm=registry)
organism_record = create_or_get_organism_record(
organism=organism, registry=registry
)
organism = (
organism_record.name if organism_record is not None else organism_record
)
Expand Down Expand Up @@ -514,7 +516,9 @@ def _filter_query_based_on_organism(
# here, we can safely import bionty
from bionty._bionty import create_or_get_organism_record

organism_record = create_or_get_organism_record(organism=organism, orm=registry)
organism_record = create_or_get_organism_record(
organism=organism, registry=registry
)
if organism_record is not None:
queryset = queryset.filter(organism__name=organism_record.name)

Expand Down
24 changes: 14 additions & 10 deletions lamindb/_from_values.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ def get_or_create_records(
mute: bool = False,
) -> list[Record]:
"""Get or create records from iterables."""
Record = field.field.model
registry = field.field.model
if create:
return [Record(**{field.field.name: value}) for value in iterable]
return [registry(**{field.field.name: value}) for value in iterable]
creation_search_names = settings.creation.search_names
feature: Feature = None
organism = _get_organism_record(field, organism)
Expand Down Expand Up @@ -57,21 +57,23 @@ def get_or_create_records(
and records[0].source_id
):
source_record = records[0].source
if not source_record and hasattr(Record, "public"):
if not source_record and hasattr(registry, "public"):
from bionty._bionty import get_source_record

source_record = get_source_record(Record.public(organism=organism))
source_record = get_source_record(
registry.public(organism=organism), registry
)
if source_record:
from bionty.core._add_ontology import check_source_in_db

check_source_in_db(
registry=Record,
registry=registry,
source=source_record,
update=True,
)

from_source = not source_record.in_db
elif hasattr(Record, "source_id"):
elif hasattr(registry, "source_id"):
from_source = True
else:
from_source = False
Expand All @@ -97,14 +99,14 @@ def get_or_create_records(
logger.success(msg)
s = "" if len(unmapped_values) == 1 else "s"
print_values = colors.yellow(_print_values(unmapped_values))
name = Record.__name__
name = registry.__name__
n_nonval = colors.yellow(f"{len(unmapped_values)} non-validated")
if not mute:
logger.warning(
f"{colors.red('did not create')} {name} record{s} for "
f"{n_nonval} {colors.italic(f'{field.field.name}{s}')}: {print_values}"
)
if Record.__module__.startswith("bionty.") or Record == ULabel:
if registry.__get_schema_name__() == "bionty" or registry == ULabel:
if isinstance(iterable, pd.Series):
feature = iterable.name
feature_name = None
Expand Down Expand Up @@ -230,7 +232,7 @@ def create_records_from_source(
# for custom records that are not created from public sources
return records, iterable_idx
# add source record to the kwargs
source_record = get_source_record(public_ontology)
source_record = get_source_record(public_ontology, model)
kwargs.update({"source": source_record})

# filter the columns in bionty df based on fields
Expand Down Expand Up @@ -373,6 +375,8 @@ def _get_organism_record(
if _has_organism_field(registry) and check:
from bionty._bionty import create_or_get_organism_record

organism_record = create_or_get_organism_record(organism=organism, orm=registry)
organism_record = create_or_get_organism_record(
organism=organism, registry=registry
)
if organism_record is not None:
return organism_record
2 changes: 1 addition & 1 deletion sub/bionty
Submodule bionty updated 51 files
+11 −2 README.md
+10 −3 bionty/__init__.py
+58 −49 bionty/_bionty.py
+39 −7 bionty/base/__init__.py
+4 −2 bionty/base/entities/_cellline.py
+0 −1 bionty/base/entities/_cellmarker.py
+0 −1 bionty/base/entities/_celltype.py
+0 −1 bionty/base/entities/_developmentalstage.py
+4 −1 bionty/base/entities/_drug.py
+0 −1 bionty/base/entities/_ethnicity.py
+0 −3 bionty/base/entities/_experimentalfactor.py
+0 −1 bionty/base/entities/_gene.py
+0 −1 bionty/base/entities/_organism.py
+0 −2 bionty/base/entities/_pathway.py
+0 −4 bionty/base/entities/_phenotype.py
+0 −1 bionty/base/entities/_protein.py
+0 −1 bionty/base/entities/_tissue.py
+14 −0 bionty/base/sources.yaml
+1 −1 bionty/core/_add_ontology.py
+70 −6 bionty/models.py
+0 −21 docs/developer/updating_source.md
+1 −6 docs/guide.md
+0 −37 docs/guide/comparison.md
+1 −1 docs/guide/concepts.md
+7 −14 docs/guide/config.md
+1 −1 docs/guide/extend.md
+0 −376 docs/guide/ontology.ipynb
+0 −25 docs/guide/search.ipynb
+0 −276 docs/guide/sources.ipynb
+0 −50 docs/guide/validate.ipynb
+1 −1 noxfile.py
+5 −1 pyproject.toml
+2 −2 tests/entities/test_bfxpipeline.py
+2 −2 tests/entities/test_biosample.py
+22 −2 tests/entities/test_cellline.py
+3 −3 tests/entities/test_cellmarker.py
+4 −4 tests/entities/test_celltype.py
+2 −2 tests/entities/test_developmentalstage.py
+6 −6 tests/entities/test_disease.py
+27 −2 tests/entities/test_drug.py
+2 −2 tests/entities/test_ethnicity.py
+3 −3 tests/entities/test_experimentalfactor.py
+4 −4 tests/entities/test_gene.py
+4 −4 tests/entities/test_organism.py
+3 −3 tests/entities/test_pathway.py
+6 −6 tests/entities/test_phenotype.py
+2 −2 tests/entities/test_protein.py
+2 −2 tests/entities/test_tissue.py
+12 −12 tests/test_bionty.py
+2 −2 tests/test_lamindb.py
+2 −2 tests/test_ontology.py
26 changes: 21 additions & 5 deletions tests/test_uid.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,14 @@
import bionty as bt
import lamindb as ln
import pytest
from bionty._bionty import encode_uid


def test_lb_encode_uid():
def test_bionty_encode_uid():
assert (
encode_uid(bt.Gene, {"ensembl_gene_id": "ENSG00000081059", "symbol": "TCF7"})[
"uid"
]
== "7IkHKPl0ScQR"
)
with pytest.raises(AssertionError):
encode_uid(bt.Organism, {"ensembl_gene_id": "ENSG00000081059"})
assert encode_uid(bt.CellType, {"ontology_id": "CL:0000084"})["uid"] == "22LvKd01"
assert (
encode_uid(bt.Organism, {"ontology_id": "NCBITaxon:9606", "name": "human"})[
Expand All @@ -21,3 +17,23 @@ def test_lb_encode_uid():
== "1dpCL6Td"
)
assert encode_uid(bt.Organism, {"name": "human"})["uid"] == "4gQdjtxb"
assert (
encode_uid(
bt.Source,
{
"entity": "Source",
"name": "ensembl",
"version": "release-112",
"organism": "vertebrates",
},
)["uid"]
== "5MUN"
)
bt.settings.organism = "human"
assert (
encode_uid(bt.CellMarker, {"name": "test", "organism": bt.settings.organism})[
"uid"
]
== "2dZ52W9noUDK"
)
bt.settings.organism.delete()

0 comments on commit 6c3c6c6

Please sign in to comment.