diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml new file mode 100644 index 0000000..9fb2a27 --- /dev/null +++ b/.github/workflows/pypi.yml @@ -0,0 +1,70 @@ +name: Publish Python 🐍 distribution 📦 to PyPI and TestPyPI + +on: + release: + types: [published] + +jobs: + build: + name: Build Python 🐍 distributions + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.9" + - name: Install pypa/build + run: >- + python3 -m + pip install + build + --user + - name: Build a binary wheel and a source tarball + run: python3 -m build + - name: Store the distribution packages + uses: actions/upload-artifact@v3 + with: + name: python-package-distributions + path: dist/ + publish-to-testpypi: + name: Publish Python 🐍 distribution 📦 to TestPyPI + needs: + - build + runs-on: ubuntu-latest + environment: + name: testpypi + url: https://test.pypi.org/p/robokop-genetics + permissions: + id-token: write # IMPORTANT: mandatory for trusted publishing + steps: + - name: Download all the dists + uses: actions/download-artifact@v3 + with: + name: python-package-distributions + path: dist/ + - name: Publish distribution 📦 to TestPyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + repository-url: https://test.pypi.org/legacy/ + skip-existing: true + publish-to-pypi: + name: >- + Publish Python 🐍 distribution 📦 to PyPI + needs: + - publish-to-testpypi + runs-on: ubuntu-latest + environment: + name: pypi + url: https://pypi.org/p/robokop-genetics + permissions: + id-token: write # IMPORTANT: mandatory for trusted publishing + steps: + - name: Download all the dists + uses: actions/download-artifact@v3 + with: + name: python-package-distributions + path: dist/ + - name: Publish distribution 📦 to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..ca24a95 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,23 @@ +name: Python package + +on: [push] + +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.9' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install pytest + - name: Run pytest + run: | + python -m pytest tests/test_normalization.py tests/test_services.py diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..7fd26b9 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[build-system] +requires = ["setuptools"] +build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..fd50096 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +requests>=2.32.3 +redis>=5.0.4 diff --git a/robokop_genetics/services/clingen.py b/robokop_genetics/services/clingen.py index 8cb8776..2ea8d6f 100644 --- a/robokop_genetics/services/clingen.py +++ b/robokop_genetics/services/clingen.py @@ -81,7 +81,9 @@ def get_batch_of_synonyms(self, variant_curie_list: list): query_response: ClinGenQueryResponse = self.query_service(query_url, data=variant_pseudo_file) if query_response.success: for allele_json in query_response.response_json: - normalization_results.append(self.parse_result(allele_json)) + parsed_result = self.parse_result(allele_json) + if parsed_result is not None: + normalization_results.append(parsed_result) else: for j in range(len(variant_subset)): normalization_results.append(ClinGenSynonymizationResult(success=False, @@ -131,7 +133,9 @@ def get_synonyms_by_parameter_matching(self, url_param: str, url_param_value: st error_message='Clingen returned a 200 status but no results.')) else: for response_item in query_response.response_json: - synonymization_results.append(self.parse_result(response_item)) + parsed_result = self.parse_result(response_item) + if parsed_result is not None: + synonymization_results.append(parsed_result) if allele_preference: filtered_syn_results = [] for syn_result in synonymization_results: @@ -159,6 +163,13 @@ def parse_result(self, allele_json: dict): error_message=cg_error_description) try: variant_caid = allele_json['@id'].rsplit('/', 1)[1] + # clingen added Protein Allele IDs but we don't want them (for now) + if variant_caid.startswith('PA'): + return None + # we could do something like the following, but it's not really an error, let's just ignore them + # return ClinGenSynonymizationResult(success=False, + # error_type='UnsupportedIdentifier', + # error_message=f'Protein Allele IDs not supported {variant_caid}') except KeyError: return ClinGenSynonymizationResult(success=False, error_type='MissingIdentifier', diff --git a/setup.py b/setup.py index c1914e9..fdc0963 100644 --- a/setup.py +++ b/setup.py @@ -5,19 +5,25 @@ setuptools.setup( name="robokop-genetics", - version="0.4.1", + version="0.5.0", author="Evan Morris", author_email="evandietzmorris@gmail.com", + maintainer="Evan Morris", + maintainer_email="evandietzmorris@gmail.com", description="A package for Robokop genetics tools and services.", long_description=long_description, long_description_content_type="text/markdown", - url="https://github.com/ObesityHub/robokop-genetics", + url="https://github.com/RobokopU24/robokop-genetics", packages=setuptools.find_packages(), classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", ], - python_requires='>=3.8', - install_requires=["requests", "redis"] -) \ No newline at end of file + python_requires='>=3.9', + license="CC-0", + install_requires=[ + "requests>=2.32.3", + "redis>=5.0.4" + ] +) diff --git a/tests/test_normalization.py b/tests/test_normalization.py index 04d8af0..bbb2533 100644 --- a/tests/test_normalization.py +++ b/tests/test_normalization.py @@ -41,10 +41,15 @@ def test_one_at_a_time_normalization(genetics_normalizer): assert normalization_result['error_type'] == 'InefficientUsage' node_id = "CLINVARVARIANT:18390" - normalization_info = genetics_normalizer.get_sequence_variant_normalization(node_id).pop() - assert normalization_info["id"] == 'CAID:CA128085' - assert normalization_info["name"] == 'rs671' - assert 'DBSNP:rs671' in normalization_info["equivalent_identifiers"] + synonymization_results = genetics_normalizer.get_sequence_variant_normalization(node_id) + found_result = False + for normalization_info in synonymization_results: + if 'id' in normalization_info: + assert normalization_info["id"] == 'CAID:CA128085' + assert normalization_info["name"] == 'rs671' + assert 'DBSNP:rs671' in normalization_info["equivalent_identifiers"] + found_result = True + assert found_result # rs369602258 is tri-allelic - the following tests show how a specific allele can be normalized from a DBSNP # if no allele specified return all CAID and their synonym sets