Skip to content

Commit

Permalink
Merge pull request #163 from bigbio/dev
Browse files Browse the repository at this point in the history
small changes realted with OLS.
  • Loading branch information
ypriverol authored Feb 28, 2024
2 parents b20df9d + 3954a3b commit 07c1dea
Show file tree
Hide file tree
Showing 6 changed files with 111 additions and 24 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/pythonapp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ name: Python application

on:
push:
branches: [ main ]
branches: [ main, dev ]
pull_request:
branches: [ main ]
branches: [ main, dev ]

jobs:
build:
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ name: Python package

on:
push:
branches: [ main ]
branches: [ main, dev ]
pull_request:
branches: [ main ]
branches: [ main, dev ]

jobs:
build:
Expand Down
2 changes: 1 addition & 1 deletion sdrf_pipelines/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.0.24"
__version__ = "0.0.25"
4 changes: 4 additions & 0 deletions sdrf_pipelines/sdrf/sdrf.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
from __future__ import annotations

import logging

import pandas as pd
from pandas import DataFrame
from pandas._typing import PythonFuncType

from sdrf_pipelines.sdrf.sdrf_schema import CELL_LINES_TEMPLATE
from sdrf_pipelines.sdrf.sdrf_schema import HUMAN_TEMPLATE
Expand Down
40 changes: 38 additions & 2 deletions sdrf_pipelines/sdrf/sdrf_schema.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import re
import sys
import typing
from typing import Any

Expand Down Expand Up @@ -145,8 +146,8 @@ def validate(self, series: pd.Series) -> pd.Series:

if ontology_terms is not None:
query_labels = [o["label"].lower() for o in ontology_terms]
for label in query_labels:
labels.append(label)
if term[TERM_NAME] in query_labels:
labels.append(term[TERM_NAME])
if self._not_available:
labels.append(NOT_AVAILABLE)
if self._not_applicable:
Expand Down Expand Up @@ -179,6 +180,10 @@ def validate(self, panda_sdrf: sdrf = None) -> typing.List[LogicError]:
)
errors.append(LogicError(error_message, error_type=logging.WARN))

empty_cells_errors = self.validate_empty_cells(panda_sdrf)
if empty_cells_errors:
errors.extend(empty_cells_errors)

# Check the mandatory fields
error_mandatory = self.validate_mandatory_columns(panda_sdrf)
if error_mandatory is not None:
Expand Down Expand Up @@ -310,6 +315,37 @@ def check_recommendations(self, panda_sdrf):
warnings += column.validate_optional(series)
return sorted(warnings, key=lambda e: e.row)

def validate_empty_cells(self, panda_sdrf):
"""
Check for empty cells in the SDRF. This method will return a list of errors if any empty cell is found.
:param panda_sdrf: SDRF dataframe
:return: List of errors
"""
errors = []

def validate_string(cell_value):
return cell_value is not None and cell_value != "nan" and len(cell_value.strip()) > 0

if sys.version_info <= (3, 8):
# Use map for Python versions less than 3.8
validation_results = panda_sdrf.map(validate_string)
else:
# Use applymap for Python versions 3.8 and above
validation_results = panda_sdrf.applymap(validate_string)

# Get the indices where the validation fails
failed_indices = [
(row, col)
for row in validation_results.index
for col in validation_results.columns
if not validation_results.at[row, col]
]

for row, col in failed_indices:
message = f"Empty value found Row: {row}, Column: {col}"
errors.append(LogicError(message, error_type=logging.ERROR))
return errors


default_schema = SDRFSchema(
[
Expand Down
81 changes: 64 additions & 17 deletions sdrf_pipelines/zooma/ols.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,13 +106,16 @@ def get_ancestors(self, ont, iri):

def search(
self,
name,
name: str,
query_fields=None,
ontology=None,
ontology: str = None,
field_list=None,
children_of=None,
exact=None,
bytype="class",
exact: bool = None,
bytype: str = "class",
rows: int = 10,
num_retries: int = 10,
start: int = 0,
):
"""
Searches the OLS with the given term
Expand All @@ -124,6 +127,8 @@ def search(
@:param exact: Forces exact match if not `None`
@:param bytype: restrict to terms one of {class,property,individual,ontology}
@:param childrenOf: Search only under a certain term.
@:param rows: number of rows to query on each call of OLS search
@:param num_retries: Number of retries to OLS when it fails.
"""
params = {"q": name}
if ontology is not None:
Expand All @@ -135,6 +140,9 @@ def search(
if bytype:
params["type"] = _concat_str_or_list(bytype)

if rows:
params["rows"] = rows

if ontology:
params["ontology"] = _concat_str_or_list(ontology)
elif self.ontology:
Expand All @@ -155,26 +163,65 @@ def search(
if len(children_of) > 0:
params["childrenOf"] = _concat_str_or_list(children_of)

retry_num = 0
if start:
params["start"] = start

docs_found = []

while retry_num < 10:
for retry_num in range(num_retries):
try:
req = self.session.get(self.ontology_search, params=params)
logger.debug("Request to OLS search API: %s - %s", req.status_code, name)
logger.debug("Request to OLS search API term %s, status code %s", name, req.status_code)

req.raise_for_status()
if req.json()["response"]["numFound"]:
return req.json()["response"]["docs"]
if exact:
logger.debug("OLS exact search returned empty response for %s", name)
if req.status_code != 200:
logger.error("OLS search term %s error tried number %s", name, retry_num)
req.raise_for_status()
else:
logger.debug("OLS search returned empty response for %s", name)
return None
if req.json()["response"]["numFound"] == 0:
if exact:
logger.debug("OLS exact search returned empty response for %s", name)
else:
logger.debug("OLS search returned empty response for %s", name)
return docs_found
elif len(req.json()["response"]["docs"]) < rows:
return req.json()["response"]["docs"]
else:
docs_found = req.json()["response"]["docs"]
docs_found.extend(
self.search(
name,
query_fields=query_fields,
ontology=ontology,
field_list=field_list,
children_of=children_of,
exact=exact,
bytype=bytype,
rows=rows,
num_retries=num_retries,
start=(rows + (start)),
)
)
return docs_found

if req.status_code == 200 and req.json()["response"]["numFound"] == 0:
if exact:
logger.debug("OLS exact search returned empty response for %s", name)
else:
logger.debug("OLS search returned empty response for %s", name)
return None
elif req.status_code != 200 and req.json()["response"]["numFound"] > 0:
if len(req.json()["response"]["docs"]) <= rows:
return req.json()["response"]["docs"]
else:
start = 0
docs_found = req.json()["response"]["docs"]

except Exception as ex:
retry_num += 1
logger.debug("OLS error searching the following term -- %s iteration %s.\n%e", req.url, retry_num, ex)
logger.exception(
"OLS error searching the following term -- %s iteration %s.\n%e", req.url, retry_num, ex
)

return None
return docs_found

def suggest(self, name, ontology=None):
"""Suggest terms from an optional list of ontologies
Expand Down

0 comments on commit 07c1dea

Please sign in to comment.