Skip to content

Commit

Permalink
Use curies' Converter in strict=False mode. (#409)
Browse files Browse the repository at this point in the history
- [x] Fixes #408 
- [x] Fixes #269 

This way, when a `prefix_map` has duplicate `uri_prefix` or `prefix`
from the user, `curies` will not throw an error. `sssom-py` already
gives priority to a user-defined prefix map over the default one (which
now is EPM from bioregistry).

---------

Co-authored-by: Nico Matentzoglu <[email protected]>
  • Loading branch information
hrshdhgd and matentzn authored Aug 11, 2023
1 parent 5f618de commit a23c4dd
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/sssom/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,7 +667,7 @@ def from_obographs(
:return: An SSSOM data frame (MappingSetDataFrame)
"""
_ensure_prefix_map(prefix_map)
converter = Converter.from_prefix_map(prefix_map)
converter = Converter.from_prefix_map(prefix_map, strict=False)
ms = _init_mapping_set(meta)
mlist: List[Mapping] = []
# bad_attrs = {}
Expand Down
2 changes: 2 additions & 0 deletions tests/data/hp-subset-metadata.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
curie_map:
HP: http://example/obo/HP_
59 changes: 59 additions & 0 deletions tests/data/hp-subset.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
{
"graphs" : [ {
"nodes" : [ {
"id" : "http://example/obo/HP_0011770",
"meta" : {
"definition" : {
"val" : "A type of hyperparathyroidism that occurs following kidney transplantation, which is a treatment for secondary hyperparathyroidism. Although kidney transplantation leads to a normalization of serum calcium and parathyroid hormone in most patients. The state of persistent hypercalcemia and hyperparathyroidism is referred to as tertiary hyperparathyroidism.",
"xrefs" : [ "DDD:spark" ]
},
"xrefs" : [ {
"val" : "SNOMEDCT_US:78200003"
}, {
"val" : "UMLS:C0271858"
} ],
"basicPropertyValues" : [ {
"pred" : "http://www.geneontology.org/formats/oboInOwl#created_by",
"val" : "peter"
}, {
"pred" : "http://www.geneontology.org/formats/oboInOwl#creation_date",
"val" : "2012-04-22T04:38:20Z"
}, {
"pred" : "http://www.geneontology.org/formats/oboInOwl#hasOBONamespace",
"val" : "human_phenotype"
} ]
},
"type" : "CLASS",
"lbl" : "Tertiary hyperparathyroidism"
}, {
"id" : "http://example/obo/HP_0009450",
"meta" : {
"definition" : {
"val" : "Increased width of the proximal phalanx of the 3rd finger.",
"xrefs" : [ "HPO:curators" ]
},
"xrefs" : [ {
"val" : "UMLS:C4024355"
} ],
"synonyms" : [ {
"pred" : "hasExactSynonym",
"val" : "Broad innermost bone of middle finger",
"xrefs" : [ "ORCID:0000-0001-5208-3432" ],
"synonymType" : "http://purl.obolibrary.org/obo/hp#layperson"
} ],
"basicPropertyValues" : [ {
"pred" : "http://www.geneontology.org/formats/oboInOwl#hasOBONamespace",
"val" : "human_phenotype"
}, {
"pred" : "http://www.geneontology.org/formats/oboInOwl#created_by",
"val" : "doelkens"
}, {
"pred" : "http://www.geneontology.org/formats/oboInOwl#creation_date",
"val" : "2009-01-14T04:33:41Z"
} ]
},
"type" : "CLASS",
"lbl" : "Broad proximal phalanx of the 3rd finger"
} ]
} ]
}
23 changes: 23 additions & 0 deletions tests/test_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from rdflib import Graph

from sssom.context import _raise_on_invalid_prefix_map, get_default_metadata
from sssom.io import parse_file
from sssom.parsers import (
from_alignment_minidom,
from_obographs,
Expand Down Expand Up @@ -223,3 +224,25 @@ def test_read_sssom_table(self):
self.assertEqual(imported_df.iloc[idx][k], v)
else:
self.assertEqual(imported_df.iloc[idx][k], v)

def test_parse_obographs_merged(self):
"""Test parsing OBO Graph JSON using custom prefix_map."""
hp_json = f"{test_data_dir}/hp-subset.json"
hp_meta = f"{test_data_dir}/hp-subset-metadata.yml"
outfile = f"{test_out_dir}/hp-subset-parse.tsv"

with open(hp_meta, "r") as f:
data = yaml.safe_load(f)
custom_curie_map = data["curie_map"]

with open(outfile, "w") as f:
parse_file(
input_path=hp_json,
prefix_map_mode="merged",
clean_prefixes=True,
input_format="obographs-json",
metadata_path=hp_meta,
output=f,
)
msdf = parse_sssom_table(outfile)
self.assertTrue(custom_curie_map.items() <= msdf.prefix_map.items())

0 comments on commit a23c4dd

Please sign in to comment.