diff --git a/lamindb/_can_curate.py b/lamindb/_can_curate.py index 15dc223fb..0b631b273 100644 --- a/lamindb/_can_curate.py +++ b/lamindb/_can_curate.py @@ -149,6 +149,11 @@ def _inspect( registry = queryset.model model_name = registry._meta.model.__name__ + # do not inspect synonyms if the field is not name field + inspect_synonyms = True + if hasattr(registry, "_name_field") and field != registry._name_field: + inspect_synonyms = False + # inspect in the DB result_db = inspect( df=_filter_query_based_on_organism( @@ -157,13 +162,14 @@ def _inspect( identifiers=values, field=field, mute=mute, + inspect_synonyms=inspect_synonyms, ) nonval = set(result_db.non_validated).difference(result_db.synonyms_mapper.keys()) if len(nonval) > 0 and registry.__get_schema_name__() == "bionty": try: bionty_result = registry.public(organism=organism, source=source).inspect( - values=nonval, field=field, mute=True + values=nonval, field=field, mute=True, inspect_synonyms=inspect_synonyms ) bionty_validated = bionty_result.validated bionty_mapper = bionty_result.synonyms_mapper @@ -194,7 +200,7 @@ def _inspect( f" {colors.italic('.from_values()')}" ) - nonval = bionty_result.non_validated + nonval = [i for i in bionty_result.non_validated if i not in bionty_mapper] # no bionty source is found except ValueError: logger.warning("no Bionty source found, skipping Bionty validation") diff --git a/lamindb/_from_values.py b/lamindb/_from_values.py index 941103c8a..6f0b14481 100644 --- a/lamindb/_from_values.py +++ b/lamindb/_from_values.py @@ -230,7 +230,16 @@ def create_records_from_source( bionty_df = filter_bionty_df_columns(model=model, public_ontology=public_ontology) # standardize in the bionty reference - result = public_ontology.inspect(iterable_idx, field=field.field.name, mute=True) + # do not inspect synonyms if the field is not name field + inspect_synonyms = True + if hasattr(model, "_name_field") and field.field.name != model._name_field: + inspect_synonyms = False + result = public_ontology.inspect( + iterable_idx, + field=field.field.name, + mute=True, + inspect_synonyms=inspect_synonyms, + ) syn_mapper = result.synonyms_mapper msg_syn: str = "" diff --git a/tests/core/test_can_validate.py b/tests/core/test_can_validate.py index 3ff8dcc47..88ecd4988 100644 --- a/tests/core/test_can_validate.py +++ b/tests/core/test_can_validate.py @@ -117,3 +117,30 @@ def test_set_abbr(): def test_validate_int(): result = ln.User.validate([1, 2], field=ln.User.id) assert result.sum() == 1 + + +def test_synonym_mapping(): + # only name field can be standardized + bt.Gene.from_source(symbol="TNFRSF4", organism="human").save() + + bt_result = bt.Gene.public().inspect( + ["ABC1", "TNFRSF4"], field="symbol", organism="human" + ) + assert bt_result.synonyms_mapper == {"ABC1": "HEATR6"} + + bt_result = bt.Gene.public().inspect( + ["ABC1", "TNFRSF4"], field="symbol", organism="human", inspect_synonyms=False + ) + assert bt_result.synonyms_mapper == {} + + result = bt.Gene.inspect( + ["CD134", "TNFRSF4"], field=bt.Gene.symbol, organism="human" + ) + assert result.synonyms_mapper == {"CD134": "TNFRSF4"} + + result = bt.Gene.inspect( + ["CD134", "TNFRSF4"], field=bt.Gene.ensembl_gene_id, organism="human" + ) + assert result.synonyms_mapper == {} + + bt.Gene.filter().delete() diff --git a/tests/core/test_from_values.py b/tests/core/test_from_values.py index 35b1bfe3b..f1da3b8b7 100644 --- a/tests/core/test_from_values.py +++ b/tests/core/test_from_values.py @@ -118,3 +118,16 @@ def test_from_values_synonyms_aware(): assert isinstance(records[0].source, bt.Source) assert records[0].ontology_id == "CL:0000084" bt.CellType.filter().all().delete() + + +def test_standardize(): + # only name field can be standardized + results = bt.Gene.from_values( + ["HES4", "TNFRSF4"], field=bt.Gene.ensembl_gene_id, organism="human" + ) + assert len(results) == 0 + + results = bt.Gene.from_values( + ["HES4", "TNFRSF4"], field=bt.Gene.symbol, organism="human" + ) + assert len(results) == 2