From 18f75a3d89442171cd35f409e8a35d354d3c6ecb Mon Sep 17 00:00:00 2001 From: Colin Smith Date: Tue, 10 Dec 2024 15:28:36 -0800 Subject: [PATCH] feat: establish SPASE conversion strategy (#213) Establish a new conversion strategy for the SPASE metadata standard, laying the groundwork for future development. This includes: - Connecting metadata to the test suite - Updating relevant utility functions - Modifying test fixtures - Temporarily skipping tests for undeveloped conversion methods --- docs/source/user/api.rst | 7 +++++++ src/soso/strategies/spase.py | 14 +++++++++++++- tests/conftest.py | 9 +++++++-- tests/data/spase.json | 1 + tests/test_spase.py | 19 +++++++++++++++++++ tests/test_strategies.py | 31 +++++++++++++++++++++++++++++++ 6 files changed, 78 insertions(+), 3 deletions(-) create mode 100644 tests/data/spase.json create mode 100644 tests/test_spase.py diff --git a/docs/source/user/api.rst b/docs/source/user/api.rst index 5ab4cde..891523a 100644 --- a/docs/source/user/api.rst +++ b/docs/source/user/api.rst @@ -23,6 +23,13 @@ EML Strategy :members: :noindex: +SPASE Strategy +-------------- + +.. autoclass:: soso.strategies.spase.SPASE + :members: + :noindex: + Utilities --------- diff --git a/src/soso/strategies/spase.py b/src/soso/strategies/spase.py index e9a6d68..c2e6e0c 100644 --- a/src/soso/strategies/spase.py +++ b/src/soso/strategies/spase.py @@ -40,7 +40,7 @@ def __init__(self, file: str, **kwargs: dict): raise ValueError(file + " must be an XML file.") super().__init__(metadata=etree.parse(file)) self.file = file - self.schema_version = None + self.schema_version = get_schema_version(self.metadata) self.kwargs = kwargs def get_id(self) -> None: @@ -169,3 +169,15 @@ def get_was_generated_by(self) -> None: # Below are utility functions for the SPASE strategy. + + +def get_schema_version(metadata: etree.ElementTree) -> str: + """ + :param metadata: The SPASE metadata object as an XML tree. + + :returns: The version of the SPASE schema used in the metadata record. + """ + schema_version = metadata.findtext( + "{http://www.spase-group.org/data/schema}Version" + ) + return schema_version diff --git a/tests/conftest.py b/tests/conftest.py index 50e7bc1..2badfe1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,6 +7,7 @@ from copy import deepcopy import pytest from soso.strategies.eml import EML +from soso.strategies.spase import SPASE from soso.utilities import get_example_metadata_file_path, get_empty_metadata_file_path @@ -18,7 +19,7 @@ def strategy_names() -> list: return ["eml", "spase"] -@pytest.fixture(params=[EML]) +@pytest.fixture(params=[EML, SPASE]) def strategy_instance(request) -> Union[Type, None]: """ :returns: The strategy instances. @@ -26,10 +27,12 @@ def strategy_instance(request) -> Union[Type, None]: res = None if request.param is EML: res = request.param(file=get_example_metadata_file_path("EML")) + elif request.param is SPASE: + res = request.param(file=get_example_metadata_file_path("SPASE")) return res -@pytest.fixture(params=[EML]) +@pytest.fixture(params=[EML, SPASE]) def strategy_instance_no_meta(request) -> Union[Type, None]: """ :returns: The strategy instances parameterized with an empty metadata @@ -38,6 +41,8 @@ def strategy_instance_no_meta(request) -> Union[Type, None]: res = None if request.param is EML: res = request.param(file=get_empty_metadata_file_path("EML")) + elif request.param is SPASE: + res = request.param(file=get_empty_metadata_file_path("SPASE")) return res diff --git a/tests/data/spase.json b/tests/data/spase.json new file mode 100644 index 0000000..4c8f5d3 --- /dev/null +++ b/tests/data/spase.json @@ -0,0 +1 @@ +{"@context": {"@vocab": "https://schema.org/"}, "@type": "Dataset"} \ No newline at end of file diff --git a/tests/test_spase.py b/tests/test_spase.py new file mode 100644 index 0000000..820a708 --- /dev/null +++ b/tests/test_spase.py @@ -0,0 +1,19 @@ +"""Test additional SPASE module functions and methods.""" + +from lxml import etree +from soso.strategies.spase import get_schema_version +from soso.utilities import get_empty_metadata_file_path, get_example_metadata_file_path + + +def test_get_schema_version_returns_expected_value(): + """Test that the get_schema_version function returns the expected value.""" + + # Positive case: The function will return the schema version of the EML + # file. + spase = etree.parse(get_example_metadata_file_path("SPASE")) + assert get_schema_version(spase) == "2.5.0" + + # Negative case: If the schema version is not present, the function will + # return None. + spase = etree.parse(get_empty_metadata_file_path("SPASE")) + assert get_schema_version(spase) is None diff --git a/tests/test_strategies.py b/tests/test_strategies.py index a239357..491b4ec 100644 --- a/tests/test_strategies.py +++ b/tests/test_strategies.py @@ -52,6 +52,7 @@ def test_strategy_reads_schema_version(strategy_instance, strategy_instance_no_m # consistent test suite. +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") @pytest.mark.skipif(strategy_instance="EML", reason="Property not in schema") def test_get_id_returns_expected_type(strategy_instance, strategy_instance_no_meta): """Test that the get_id method returns the expected type.""" @@ -64,6 +65,7 @@ def test_get_id_returns_expected_type(strategy_instance, strategy_instance_no_me assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") def test_get_name_returns_expected_type(strategy_instance, strategy_instance_no_meta): """Test that the get_name method returns the expected type.""" # Positive case @@ -75,6 +77,7 @@ def test_get_name_returns_expected_type(strategy_instance, strategy_instance_no_ assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") def test_get_description_returns_expected_type( strategy_instance, strategy_instance_no_meta ): @@ -88,6 +91,7 @@ def test_get_description_returns_expected_type( assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") @pytest.mark.skipif(strategy_instance="EML", reason="Property not in schema") def test_get_url_returns_expected_type(strategy_instance, strategy_instance_no_meta): """Test that the get_url method returns the expected type.""" @@ -100,6 +104,7 @@ def test_get_url_returns_expected_type(strategy_instance, strategy_instance_no_m assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") @pytest.mark.skipif(strategy_instance="EML", reason="Property not in schema") def test_get_same_as_returns_expected_type( strategy_instance, strategy_instance_no_meta @@ -114,6 +119,7 @@ def test_get_same_as_returns_expected_type( assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") @pytest.mark.skipif(strategy_instance="EML", reason="Property not in schema") def test_get_version_returns_expected_type( strategy_instance, strategy_instance_no_meta @@ -128,6 +134,7 @@ def test_get_version_returns_expected_type( assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") @pytest.mark.skipif(strategy_instance="EML", reason="Property not in schema") def test_get_is_accessible_for_free_returns_expected_type( strategy_instance, strategy_instance_no_meta @@ -143,6 +150,7 @@ def test_get_is_accessible_for_free_returns_expected_type( assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") def test_get_keywords_returns_expected_type( strategy_instance, strategy_instance_no_meta ): @@ -156,6 +164,7 @@ def test_get_keywords_returns_expected_type( assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") def test_get_identifier_returns_expected_type( strategy_instance, strategy_instance_no_meta ): @@ -172,6 +181,7 @@ def test_get_identifier_returns_expected_type( assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") @pytest.mark.skipif(strategy_instance="EML", reason="Property not in schema") def test_get_citation_returns_expected_type( strategy_instance, strategy_instance_no_meta @@ -186,6 +196,7 @@ def test_get_citation_returns_expected_type( assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") def test_get_variable_measured_returns_expected_type( strategy_instance, strategy_instance_no_meta ): @@ -199,6 +210,7 @@ def test_get_variable_measured_returns_expected_type( assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") @pytest.mark.skipif(strategy_instance="EML", reason="Property not in schema") def test_get_included_in_data_catalog_returns_expected_type( strategy_instance, strategy_instance_no_meta @@ -214,6 +226,7 @@ def test_get_included_in_data_catalog_returns_expected_type( assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") @pytest.mark.skipif(strategy_instance="EML", reason="Property not in schema") def test_get_subject_of_returns_expected_type( strategy_instance, strategy_instance_no_meta @@ -228,6 +241,7 @@ def test_get_subject_of_returns_expected_type( assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") def test_get_distribution_returns_expected_type( strategy_instance, strategy_instance_no_meta ): @@ -241,6 +255,7 @@ def test_get_distribution_returns_expected_type( assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") @pytest.mark.skipif(strategy_instance="EML", reason="Property not in schema") def test_get_potential_action_returns_expected_type( strategy_instance, strategy_instance_no_meta @@ -255,6 +270,7 @@ def test_get_potential_action_returns_expected_type( assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") @pytest.mark.skipif(strategy_instance="EML", reason="Property not in schema") def test_get_date_created_returns_expected_type( strategy_instance, strategy_instance_no_meta @@ -269,6 +285,7 @@ def test_get_date_created_returns_expected_type( assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") def test_get_date_modified_returns_expected_type( strategy_instance, strategy_instance_no_meta ): @@ -282,6 +299,7 @@ def test_get_date_modified_returns_expected_type( assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") def test_get_date_published_returns_expected_type( strategy_instance, strategy_instance_no_meta ): @@ -295,6 +313,7 @@ def test_get_date_published_returns_expected_type( assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") @pytest.mark.skipif(strategy_instance="EML", reason="Property not in schema") def test_get_expires_returns_expected_type( strategy_instance, strategy_instance_no_meta @@ -309,6 +328,7 @@ def test_get_expires_returns_expected_type( assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") def test_get_temporal_coverage_returns_expected_type( strategy_instance, strategy_instance_no_meta ): @@ -331,6 +351,7 @@ def test_get_temporal_coverage_returns_expected_type( assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") def test_get_spatial_coverage_returns_expected_type( strategy_instance, strategy_instance_no_meta ): @@ -344,6 +365,7 @@ def test_get_spatial_coverage_returns_expected_type( assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") def test_get_creator_returns_expected_type( strategy_instance, strategy_instance_no_meta ): @@ -360,6 +382,7 @@ def test_get_creator_returns_expected_type( assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") def test_get_contributor_returns_expected_type( strategy_instance, strategy_instance_no_meta ): @@ -376,6 +399,7 @@ def test_get_contributor_returns_expected_type( assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") @pytest.mark.skipif(strategy_instance="EML", reason="Property not in schema") def test_get_provider_returns_expected_type( strategy_instance, strategy_instance_no_meta @@ -390,6 +414,7 @@ def test_get_provider_returns_expected_type( assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") @pytest.mark.skipif(strategy_instance="EML", reason="Property not in schema") def test_get_publisher_returns_expected_type( strategy_instance, strategy_instance_no_meta @@ -404,6 +429,7 @@ def test_get_publisher_returns_expected_type( assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") def test_get_funding_returns_expected_type( strategy_instance, strategy_instance_no_meta ): @@ -417,6 +443,7 @@ def test_get_funding_returns_expected_type( assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") def test_get_license_returns_expected_type( strategy_instance, strategy_instance_no_meta ): @@ -430,6 +457,7 @@ def test_get_license_returns_expected_type( assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") @pytest.mark.skipif(strategy_instance="EML", reason="Property not in schema") def test_get_was_revision_of_returns_expected_type( strategy_instance, strategy_instance_no_meta @@ -444,6 +472,7 @@ def test_get_was_revision_of_returns_expected_type( assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") def test_get_was_derived_from_returns_expected_type( strategy_instance, strategy_instance_no_meta ): @@ -457,6 +486,7 @@ def test_get_was_derived_from_returns_expected_type( assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") def test_get_is_based_on_returns_expected_type( strategy_instance, strategy_instance_no_meta ): @@ -470,6 +500,7 @@ def test_get_is_based_on_returns_expected_type( assert res is None +@pytest.mark.skipif(strategy_instance="SPASE", reason="Not yet implemented") @pytest.mark.skipif(strategy_instance="EML", reason="Property not in schema") def test_get_was_generated_by_returns_expected_type( strategy_instance, strategy_instance_no_meta