-
Notifications
You must be signed in to change notification settings - Fork 1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add author and tools details in RO-Crate #18820
base: dev
Are you sure you want to change the base?
Changes from 14 commits
e824db7
a05e522
effa82e
468de8a
803a558
f26662a
a582e52
5a26dc2
875e8ea
58219b1
f930358
124db0b
0cbc508
f585677
e66652e
f1b404d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||
---|---|---|---|---|
|
@@ -3,6 +3,7 @@ | |||
from typing import ( | ||||
Any, | ||||
Dict, | ||||
List, | ||||
Optional, | ||||
) | ||||
|
||||
|
@@ -85,6 +86,8 @@ def __init__(self, model_store: Any): | |||
self.file_entities: Dict[int, Any] = {} | ||||
self.param_entities: Dict[int, Any] = {} | ||||
self.pv_entities: Dict[str, Any] = {} | ||||
# Cache for tools to avoid duplicating entities for the same tool | ||||
self.tool_cache: Dict[str, ContextEntity] = {} | ||||
|
||||
def build_crate(self): | ||||
crate = ROCrate() | ||||
|
@@ -222,6 +225,162 @@ def _add_workflows(self, crate: ROCrate): | |||
crate.mainEntity["name"] = self.workflow.name | ||||
crate.mainEntity["subjectOf"] = cwl_wf | ||||
|
||||
# Adding multiple creators if available | ||||
if self.workflow.creator_metadata: | ||||
for creator_data in self.workflow.creator_metadata: | ||||
if creator_data.get("class") == "Person": | ||||
# Create the person entity | ||||
creator_entity = crate.add( | ||||
ContextEntity( | ||||
crate, | ||||
creator_data.get("identifier", ""), # Default to empty string if identifier is missing | ||||
properties={ | ||||
"@type": "Person", | ||||
"name": creator_data.get("name", ""), # Default to empty string if name is missing | ||||
"orcid": creator_data.get( | ||||
"identifier", "" | ||||
), # Assuming identifier is ORCID, or adjust as needed | ||||
"url": creator_data.get("url", ""), # Add URL if available, otherwise empty string | ||||
"email": creator_data.get( | ||||
"email", "" | ||||
), # Add email if available, otherwise empty string | ||||
}, | ||||
) | ||||
) | ||||
# Append the person creator entity to the mainEntity | ||||
crate.mainEntity.append_to("creator", creator_entity) | ||||
|
||||
elif creator_data.get("class") == "Organization": | ||||
# Create the organization entity | ||||
organization_entity = crate.add( | ||||
ContextEntity( | ||||
crate, | ||||
creator_data.get( | ||||
"url", "" | ||||
), # Use URL as identifier if available, otherwise empty string | ||||
properties={ | ||||
"@type": "Organization", | ||||
"name": creator_data.get("name", ""), # Default to empty string if name is missing | ||||
"url": creator_data.get("url", ""), # Add URL if available, otherwise empty string | ||||
}, | ||||
) | ||||
) | ||||
# Append the organization entity to the mainEntity | ||||
crate.mainEntity.append_to("creator", organization_entity) | ||||
|
||||
# Add CWL workflow entity if exists | ||||
crate.mainEntity["subjectOf"] = cwl_wf | ||||
|
||||
# Add tools used in the workflow | ||||
self._add_tools(crate) | ||||
self._add_steps(crate) | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Workflows can have workflow inputs. Should we add them here as well? At least the types of the workflow inputs? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That would be nice to have, yes. This can even be done per tool (though more fiddly). There is already some capturing of the inputs and outputs as as
|
||||
|
||||
def _add_steps(self, crate: ROCrate): | ||||
""" | ||||
Add workflow steps (HowToStep) to the RO-Crate. These are unique for each tool occurrence. | ||||
""" | ||||
step_entities: List[ContextEntity] = [] | ||||
# Initialize the position as a list with a single element to keep it mutable | ||||
position = [1] | ||||
self._add_steps_recursive(self.workflow.steps, crate, step_entities, position) | ||||
return step_entities | ||||
|
||||
def _add_steps_recursive(self, steps, crate: ROCrate, step_entities, position): | ||||
""" | ||||
Recursively add HowToStep entities from workflow steps, ensuring that | ||||
the position index is maintained across subworkflows. | ||||
""" | ||||
for step in steps: | ||||
if step.type == "tool": | ||||
# Create a unique HowToStep entity for each step | ||||
step_id = f"step_{position[0]}" | ||||
step_description = None | ||||
if step.annotations: | ||||
annotations_list = [annotation.annotation for annotation in step.annotations if annotation] | ||||
step_description = " ".join(annotations_list) if annotations_list else None | ||||
|
||||
# Add HowToStep entity to the crate | ||||
step_entity = crate.add( | ||||
ContextEntity( | ||||
crate, | ||||
step_id, | ||||
properties={ | ||||
"@type": "HowToStep", | ||||
"position": position[0], | ||||
"name": step.tool_id, | ||||
"description": step_description, | ||||
Marie59 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
}, | ||||
) | ||||
) | ||||
|
||||
# Append the HowToStep entity to the workflow steps list | ||||
step_entities.append(step_entity) | ||||
crate.mainEntity.append_to("step", step_entity) | ||||
|
||||
# Increment the position counter | ||||
position[0] += 1 | ||||
|
||||
# Handle subworkflows recursively | ||||
elif step.type == "subworkflow": | ||||
subworkflow = step.subworkflow | ||||
if subworkflow: | ||||
self._add_steps_recursive(subworkflow.steps, crate, step_entities, position) | ||||
|
||||
def _add_tools(self, crate: ROCrate): | ||||
tool_entities: List[ContextEntity] = [] | ||||
self._add_tools_recursive(self.workflow.steps, crate, tool_entities) | ||||
|
||||
def _add_tools_recursive(self, steps, crate: ROCrate, tool_entities): | ||||
""" | ||||
Recursively add SoftwareApplication entities from workflow steps, reusing tools when necessary. | ||||
""" | ||||
for step in steps: | ||||
if step.type == "tool": | ||||
Marie59 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
tool_id = step.tool_id | ||||
Marie59 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
tool_version = step.tool_version | ||||
|
||||
# Cache key based on tool ID and version | ||||
tool_key = f"{tool_id}:{tool_version}" | ||||
|
||||
# Check if tool entity is already in cache | ||||
if tool_key in self.tool_cache: | ||||
tool_entity = self.tool_cache[tool_key] | ||||
else: | ||||
# Create a new tool entity | ||||
tool_name = tool_id | ||||
tool_description = None | ||||
if step.annotations: | ||||
annotations_list = [annotation.annotation for annotation in step.annotations if annotation] | ||||
tool_description = " ".join(annotations_list) if annotations_list else None | ||||
|
||||
# Add tool entity to the RO-Crate | ||||
tool_entity = crate.add( | ||||
ContextEntity( | ||||
crate, | ||||
tool_id, | ||||
Marie59 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
properties={ | ||||
"@type": "SoftwareApplication", | ||||
"name": tool_name, | ||||
"version": tool_version, | ||||
"description": tool_description, | ||||
"url": "https://toolshed.g2.bx.psu.edu", # URL if relevant | ||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not always true. Tools could come from multiple toolsheds. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes I was wondering if this part was relevant or if I should remove it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Having a |
||||
}, | ||||
) | ||||
) | ||||
|
||||
# Store the tool entity in the cache | ||||
self.tool_cache[tool_key] = tool_entity | ||||
|
||||
# Append the tool entity to the workflow (instrument) and store it in the list | ||||
tool_entities.append(tool_entity) | ||||
crate.mainEntity.append_to("instrument", tool_entity) | ||||
Marie59 marked this conversation as resolved.
Show resolved
Hide resolved
|
||||
|
||||
# Handle subworkflows recursively | ||||
elif step.type == "subworkflow": | ||||
subworkflow = step.subworkflow | ||||
if subworkflow: | ||||
self._add_tools_recursive(subworkflow.steps, crate, tool_entities) | ||||
|
||||
def _add_create_action(self, crate: ROCrate): | ||||
self.create_action = crate.add( | ||||
ContextEntity( | ||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Could also try to use
identifier
here, as that is an option in the UI too when setting an organization as a creator. For example an ROR identifier could be used. (though admittedly it's more likely for an average user to just input their institute's URL in theurl
field)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For this point I was not sure what to do I used
identifier
for the ORCID of the creator. Should I replace it by ROR or is something completely different you were suggesting ?