Skip to content

Commit

Permalink
Consolidate converter loading
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt committed Jul 28, 2023
1 parent 1016e14 commit 5c6d75c
Show file tree
Hide file tree
Showing 8 changed files with 73 additions and 134 deletions.
110 changes: 36 additions & 74 deletions src/sssom/context.py
Original file line number Diff line number Diff line change
@@ -1,69 +1,45 @@
"""Utilities for loading JSON-LD contexts."""

import json
import logging
import uuid
from typing import Any, Mapping, Optional, Union
from functools import lru_cache
from typing import Union

import curies
import pkg_resources
from curies import Converter
from rdflib.namespace import is_ncname

from sssom.constants import EXTENDED_PREFIX_MAP

from .constants import (
EXTENDED_PREFIX_MAP,
PREFIX_MAP_MODE_MERGED,
PREFIX_MAP_MODE_METADATA_ONLY,
PREFIX_MAP_MODE_SSSOM_DEFAULT_ONLY,
)
from .typehints import Metadata, PrefixMap

SSSOM_URI_PREFIX = "https://w3id.org/sssom/"
SSSOM_BUILT_IN_PREFIXES = ("sssom", "owl", "rdf", "rdfs", "skos", "semapv")
DEFAULT_MAPPING_SET_ID = f"{SSSOM_URI_PREFIX}mappings/{uuid.uuid4()}"
DEFAULT_LICENSE = f"{SSSOM_URI_PREFIX}license/unspecified"
SSSOM_CONTEXT = pkg_resources.resource_filename(
"sssom_schema", "context/sssom_schema.context.jsonld"
)


def _get_jsonld_context():
"""Get JSON-LD form of sssom_context variable from auto-generated 'internal_context.py' file."""
with open(SSSOM_CONTEXT, "r") as c:
context = json.load(c, strict=False)
return context


def get_internal_converter() -> Converter:
"""Get a converter from the SSSOM internal context."""
context = _get_jsonld_context()
prefix_map = {}
for key in context["@context"]:
v = context["@context"][key]
if isinstance(v, str):
prefix_map[key] = v
elif isinstance(v, dict):
if "@id" in v and "@prefix" in v:
if v["@prefix"]:
prefix_map[key] = v["@id"]
del prefix_map["@vocab"]
return Converter.from_prefix_map(prefix_map)
ConverterHint = Union[PrefixMap, None, Converter]


def get_external_converter() -> Converter:
@lru_cache(1)
def get_converter() -> Converter:
"""Get prefix map from bioregistry (obo.epm.json)."""
return Converter.from_extended_prefix_map(EXTENDED_PREFIX_MAP)


def get_built_in_converter() -> Converter:
"""Get built-in prefix map from the sssom_context variable in the auto-generated 'internal_context.py' file."""
context = _get_jsonld_context()
prefix_map = {}
for key in context["@context"]:
if key in list(SSSOM_BUILT_IN_PREFIXES):
v = context["@context"][key]
if isinstance(v, str):
prefix_map[key] = v
return Converter.from_prefix_map(prefix_map)


def add_built_in_prefixes_to_prefix_map(
prefix_map: Union[Converter, PrefixMap, None] = None,
converter = Converter.from_extended_prefix_map(EXTENDED_PREFIX_MAP)
records = []
for record in converter.records:
if not is_ncname(record.prefix):
continue
record.prefix_synonyms = [s for s in record.prefix_synonyms if is_ncname(s)]
records.append(record)
return Converter(records)


def ensure_converter(
prefix_map: ConverterHint = None,
) -> Converter:
"""Add built-in prefix map from the sssom_context variable in the auto-generated 'internal_context.py' file.
Expand All @@ -72,24 +48,21 @@ def add_built_in_prefixes_to_prefix_map(
:return: A prefix map
"""
if prefix_map is None:
return get_built_in_converter()
return get_converter()
if isinstance(prefix_map, Converter):
converter = prefix_map
else:
converter = Converter.from_prefix_map(prefix_map)
return curies.chain([converter, get_built_in_converter()])
return curies.chain([converter, get_converter()])


def get_default_metadata() -> Metadata:
"""Get @context property value from the sssom_context variable in the auto-generated 'internal_context.py' file.
:return: Metadata
"""
converter_internal = get_internal_converter()
converter_external = get_external_converter()
converter = curies.chain([converter_internal, converter_external])
return Metadata(
converter=converter,
converter=get_converter(),
metadata={
"mapping_set_id": DEFAULT_MAPPING_SET_ID,
"license": DEFAULT_LICENSE,
Expand Down Expand Up @@ -120,23 +93,12 @@ def set_default_license(meta: Metadata) -> Metadata:
return meta


def prepare_context(
prefix_map: Optional[PrefixMap] = None,
) -> Mapping[str, Any]:
"""Prepare a JSON-LD context from a prefix map."""
context = _get_jsonld_context()
if prefix_map is None:
prefix_map = get_default_metadata().prefix_map

for k, v in prefix_map.items():
if isinstance(v, str):
if k not in context["@context"]:
context["@context"][k] = v
else:
if context["@context"][k] != v:
logging.info(
f"{k} namespace is already in the context, ({context['@context'][k]}, "
f"but with a different value than {v}. Overwriting!"
)
context["@context"][k] = v
return context
def merge_converter(metadata: Metadata, prefix_map_mode: str = None) -> Converter:
"""Merge the metadata's converter with the default converter."""
if prefix_map_mode is None or prefix_map_mode == PREFIX_MAP_MODE_METADATA_ONLY:
return metadata.converter
if prefix_map_mode == PREFIX_MAP_MODE_SSSOM_DEFAULT_ONLY:
return get_converter()
if prefix_map_mode == PREFIX_MAP_MODE_MERGED:
return curies.chain([metadata.converter, get_converter()])
raise ValueError(f"Invalid prefix map mode: {prefix_map_mode}")
26 changes: 5 additions & 21 deletions src/sssom/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,17 @@
from pathlib import Path
from typing import List, Optional, TextIO, Union

import curies
import pandas as pd
from bioregistry import get_iri
from curies import Converter
from pansql import sqldf

from sssom.validators import validate

from .constants import (
PREFIX_MAP_MODE_MERGED,
PREFIX_MAP_MODE_METADATA_ONLY,
PREFIX_MAP_MODE_SSSOM_DEFAULT_ONLY,
SchemaValidationType,
)
from .constants import SchemaValidationType
from .context import (
add_built_in_prefixes_to_prefix_map,
ensure_converter,
get_default_metadata,
merge_converter,
set_default_license,
set_default_mapping_set_id,
)
Expand Down Expand Up @@ -139,16 +133,6 @@ def split_file(input_path: str, output_directory: Union[str, Path]) -> None:
write_tables(splitted, output_directory)


def _get_converter(metadata: Metadata, prefix_map_mode: str = None) -> Converter:
if prefix_map_mode is None or prefix_map_mode == PREFIX_MAP_MODE_METADATA_ONLY:
return metadata.converter
if prefix_map_mode == PREFIX_MAP_MODE_SSSOM_DEFAULT_ONLY:
return get_default_metadata().converter
if prefix_map_mode == PREFIX_MAP_MODE_MERGED:
return curies.chain([metadata.converter, get_default_metadata().converter])
raise ValueError(f"Invalid prefix map mode: {prefix_map_mode}")


def get_metadata_and_prefix_map(
metadata_path: Optional[str] = None, prefix_map_mode: Optional[str] = None
) -> Metadata:
Expand All @@ -163,7 +147,7 @@ def get_metadata_and_prefix_map(
return get_default_metadata()

metadata = read_metadata(metadata_path)
converter = _get_converter(metadata=metadata, prefix_map_mode=prefix_map_mode)
converter = merge_converter(metadata=metadata, prefix_map_mode=prefix_map_mode)
m = Metadata(converter=converter, metadata=metadata.metadata)
m = set_default_mapping_set_id(m)
m = set_default_license(m)
Expand Down Expand Up @@ -305,7 +289,7 @@ def run_sql_query(query: str, inputs: List[str], output: TextIO) -> MappingSetDa

new_df = sqldf(query)
new_msdf.df = new_df
new_msdf.prefix_map = add_built_in_prefixes_to_prefix_map(msdf.prefix_map).prefix_map
new_msdf.prefix_map = ensure_converter(msdf.prefix_map).prefix_map
new_msdf.metadata = msdf.metadata
write_table(new_msdf, output)
return new_msdf
Expand Down
48 changes: 17 additions & 31 deletions src/sssom/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,12 @@
from .context import (
DEFAULT_LICENSE,
DEFAULT_MAPPING_SET_ID,
add_built_in_prefixes_to_prefix_map,
ConverterHint,
ensure_converter,
get_default_metadata,
)
from .sssom_document import MappingSetDocument
from .typehints import Metadata, MetadataType, PrefixMap
from .typehints import Metadata, MetadataType, PrefixMap, get_bimap
from .util import (
PREFIX_MAP_KEY,
SSSOM_DEFAULT_RDF_SERIALISATION,
Expand Down Expand Up @@ -366,20 +367,11 @@ def parse_obographs_json(


def _get_prefix_map_and_metadata(
prefix_map: Union[Converter, PrefixMap, None] = None, meta: Optional[MetadataType] = None
prefix_map: ConverterHint = None, meta: Optional[MetadataType] = None
) -> Metadata:
default_metadata = get_default_metadata()

if prefix_map is None:
logging.warning("No prefix map provided (not recommended), trying to use defaults..")
converter = default_metadata.converter
elif isinstance(prefix_map, Converter):
converter = prefix_map
else:
converter = Converter.from_prefix_map(prefix_map)

converter = ensure_converter(prefix_map)
if meta is None:
meta = default_metadata.metadata
meta = get_default_metadata().metadata
elif prefix_map is not None and PREFIX_MAP_KEY in meta:
# use specified directly in function then fall back to
# what's in the meta
Expand Down Expand Up @@ -464,7 +456,7 @@ def parse_alignment_xml(

def from_sssom_dataframe(
df: pd.DataFrame,
prefix_map: Optional[PrefixMap] = None,
prefix_map: ConverterHint = None,
meta: Optional[MetadataType] = None,
) -> MappingSetDataFrame:
"""Convert a dataframe to a MappingSetDataFrame.
Expand All @@ -474,7 +466,7 @@ def from_sssom_dataframe(
:param meta: A metadata dictionary
:return: MappingSetDataFrame
"""
converter = _ensure_converter(prefix_map)
converter = ensure_converter(prefix_map)

# Need to revisit this solution.
# This is to address: A value is trying to be set on a copy of a slice from a DataFrame
Expand All @@ -501,7 +493,7 @@ def from_sssom_dataframe(

def from_sssom_rdf(
g: Graph,
prefix_map: Optional[PrefixMap] = None,
prefix_map: ConverterHint = None,
meta: Optional[MetadataType] = None,
) -> MappingSetDataFrame:
"""Convert an SSSOM RDF graph into a SSSOM data table.
Expand All @@ -511,7 +503,7 @@ def from_sssom_rdf(
:param meta: Potentially additional metadata, defaults to None
:return: MappingSetDataFrame object
"""
converter = _ensure_converter(prefix_map)
converter = ensure_converter(prefix_map)

ms = _init_mapping_set(meta)
mlist: List[Mapping] = []
Expand Down Expand Up @@ -577,7 +569,7 @@ def from_sssom_json(
:param meta: metadata
:return: MappingSetDataFrame object
"""
converter = _ensure_converter(prefix_map)
converter = ensure_converter(prefix_map)
mapping_set = cast(MappingSet, JSONLoader().load(source=jsondoc, target_class=MappingSet))

_set_metadata_in_mapping_set(mapping_set, metadata=meta)
Expand All @@ -589,7 +581,7 @@ def from_sssom_json(

def from_alignment_minidom(
dom: Document,
prefix_map: PrefixMap,
prefix_map: ConverterHint,
meta: MetadataType,
mapping_predicates: Optional[List[str]] = None,
) -> MappingSetDataFrame:
Expand All @@ -602,7 +594,7 @@ def from_alignment_minidom(
:return: MappingSetDocument
:raises ValueError: for alignment format: xml element said, but not set to yes. Only XML is supported!
"""
converter = _ensure_converter(prefix_map)
converter = ensure_converter(prefix_map)
ms = _init_mapping_set(meta)
mlist: List[Mapping] = []
# bad_attrs = {}
Expand Down Expand Up @@ -646,7 +638,7 @@ def from_alignment_minidom(

ms.mappings = mlist # type: ignore
_set_metadata_in_mapping_set(mapping_set=ms, metadata=meta)
mapping_set_document = MappingSetDocument(mapping_set=ms, prefix_map=prefix_map)
mapping_set_document = MappingSetDocument(mapping_set=ms, prefix_map=get_bimap(converter))
return to_mapping_set_dataframe(mapping_set_document)


Expand All @@ -658,7 +650,7 @@ def _get_obographs_predicate_id(obographs_predicate: str):

def from_obographs(
jsondoc: Dict,
prefix_map: PrefixMap,
prefix_map: ConverterHint,
meta: Optional[MetadataType] = None,
mapping_predicates: Optional[List[str]] = None,
) -> MappingSetDataFrame:
Expand All @@ -671,7 +663,7 @@ def from_obographs(
:raises Exception: When there is no CURIE
:return: An SSSOM data frame (MappingSetDataFrame)
"""
converter = _ensure_converter(prefix_map)
converter = ensure_converter(prefix_map)
ms = _init_mapping_set(meta)
mlist: List[Mapping] = []
# bad_attrs = {}
Expand Down Expand Up @@ -782,7 +774,7 @@ def from_obographs(

ms.mappings = mlist # type: ignore
_set_metadata_in_mapping_set(mapping_set=ms, metadata=meta)
mdoc = MappingSetDocument(mapping_set=ms, prefix_map=prefix_map)
mdoc = MappingSetDocument(mapping_set=ms, prefix_map=get_bimap(converter))
return to_mapping_set_dataframe(mdoc)


Expand Down Expand Up @@ -814,12 +806,6 @@ def get_parsing_function(input_format: Optional[str], filename: str) -> Callable
raise Exception(f"Unknown input format: {input_format}")


def _ensure_converter(prefix_map: Union[PrefixMap, None, Converter] = None) -> Converter:
if not prefix_map:
raise Exception("No valid prefix_map provided")
return add_built_in_prefixes_to_prefix_map(prefix_map)


def _prepare_mapping(mapping: Mapping) -> Mapping:
p = mapping.predicate_id
if p == "sssom:superClassOf":
Expand Down
7 changes: 6 additions & 1 deletion src/sssom/typehints.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,9 @@ class Metadata(NamedTuple):
@property
def prefix_map(self) -> PrefixMap:
"""Get the prefix bimap out of the converter."""
return {record.prefix: record.uri_prefix for record in self.converter.records}
return get_bimap(self.converter)


def get_bimap(converter: Converter) -> PrefixMap:
"""Get a bidirectional prefix map."""
return {record.prefix: record.uri_prefix for record in converter.records}
Loading

0 comments on commit 5c6d75c

Please sign in to comment.