Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BH2024 paper #238

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 158 additions & 0 deletions docs/publications/biohackathon_2024/paper.bib
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
@article{10.1093/nar/gkac247,
author = {{The Galaxy Community}},
title = "{The Galaxy platform for accessible, reproducible and collaborative biomedical analyses: 2022 update}",
journal = {Nucleic Acids Research},
volume = {50},
number = {W1},
pages = {W345-W351},
year = {2022},
month = {04},
abstract = "{Galaxy is a mature, browser accessible workbench for scientific computing. It enables scientists to share, analyze and visualize their own data, with minimal technical impediments. A thriving global community continues to use, maintain and contribute to the project, with support from multiple national infrastructure providers that enable freely accessible analysis and training services. The Galaxy Training Network supports free, self-directed, virtual training with \\>230 integrated tutorials. Project engagement metrics have continued to grow over the last 2 years, including source code contributions, publications, software packages wrapped as tools, registered users and their daily analysis jobs, and new independent specialized servers. Key Galaxy technical developments include an improved user interface for launching large-scale analyses with many files, interactive tools for exploratory data analysis, and a complete suite of machine learning tools. Important scientific developments enabled by Galaxy include Vertebrate Genome Project (VGP) assembly workflows and global SARS-CoV-2 collaborations.}",
issn = {0305-1048},
doi = {10.1093/nar/gkac247},
url = {https://doi.org/10.1093/nar/gkac247},
eprint = {https://academic.oup.com/nar/article-pdf/50/W1/W345/45189566/gkac247.pdf},
}

@article{black2021edam,
title={EDAM: The bioscientific data analysis ontology (update 2021)[version 1; not peer reviewed]},
author={Black, Melissa and Lamothe, Lucie and {Hager Eldakroury} and Kierkegaard, Mads and {Ankita Priya} and Machinda, Anne and Khanduja, Uttam Singh and {Drashti Patoliya} and {Rashika Rathi} and {Tawah Peggy Che Nico} and Umutesi, Gloria and Blankenburg, Claudia and Op, Anita and Chieke, Precious and {Omodolapo Babatunde} and Laurie, Steve and Neumann, Steffen and Schw\"{a}mmle, Veit and Kuzmin, Ivan and Hunter, Chris and Karr, Jonathan and Ison, Jon and Gaignard, Alban and Brancotte, Bryan and Ménager, Hervé and {Matúš Kalaš}},
year={2022},
doi={10.7490/f1000research.1118900.1},
journal={F1000Research},
publisher={F1000}
}

@article{biotoolsSchema,
author = {Ison, Jon and Ienasescu, Hans and Rydza, Emil and Chmura, Piotr and Rapacki, Kristoffer and Gaignard, Alban and Schwämmle, Veit and van Helden, Jacques and Kala{\v{s}}, Mat{\'u}{\v{s}} and M{\'e}nager, Herv{\'e}},
title = "{biotoolsSchema: a formalized schema for bioinformatics software description}",
journal = {GigaScience},
volume = {10},
number = {1},
pages = {giaa157},
year = {2021},
month = {01},
issn = {2047-217X},
doi = {10.1093/gigascience/giaa157},
url = {https://doi.org/10.1093/gigascience/giaa157},
eprint = {https://academic.oup.com/gigascience/article-pdf/10/1/giaa157/36126150/giaa157.pdf},
}

@article{Ison2019,
title = {The bio.tools registry of software tools and data resources for the life sciences},
volume = {20},
ISSN = {1474-760X},
url = {http://doi.org/10.1186/s13059-019-1772-6},
DOI = {10.1186/s13059-019-1772-6},
number = {1},
journal = {Genome Biology},
publisher = {Springer Science and Business Media LLC},
author = {Ison, Jon and Ienasescu, Hans and Chmura, Piotr and Rydza, Emil and M{\'e}nager, Herv{\'e} and Kala{\v{s}}, Mat{\'u}{\v{s}} and Schw\"{a}mmle, Veit and Gr\"{u}ning, Bj\"{o}rn and Beard, Niall and Lopez, Rodrigo and Duvaud, Severine and Stockinger, Heinz and Persson, Bengt and Vařeková, Radka Svobodová and Raček, Tomáš and Vondrášek, Jiří and Peterson, Hedi and Salumets, Ahto and Jonassen, Inge and Hooft, Rob and Nyr\"{o}nen, Tommi and Valencia, Alfonso and Capella, Salvador and Gelpí, Josep and Zambelli, Federico and Savakis, Babis and Leskošek, Brane and Rapacki, Kristoffer and Blanchet, Christophe and Jimenez, Rafael and Oliveira, Arlindo and Vriend, Gert and Collin, Olivier and van Helden, Jacques and Løngreen, Peter and Brunak, Søren},
year = {2019},
month = aug
}

@article {Bray2022.03.13.483965,
author = {Simon Bray and Matthias Bernt and Nicola Soranzo and Marius van den Beek and B{\'e}r{\'e}nice Batut and Helena Rasche and Martin {\v C}ech and Peter Cock and Anton Nekrutenko and Bj{\"o}rn Gr{\"u}ning and John Chilton},
title = {Planemo: a command-line toolkit for developing, deploying, and executing scientific data analyses},
elocation-id = {2022.03.13.483965},
year = {2022},
doi = {10.1101/2022.03.13.483965},
publisher = {Cold Spring Harbor Laboratory},
abstract = {There are thousands of well-maintained high-quality open-source software utilities for all aspects of scientific data analysis. For over a decade, the Galaxy Project has been providing computational infrastructure and a unified user interface for these tools to make them accessible to a wide range of researchers. In order to streamline the process of integrating tools and constructing workflows as much as possible, we have developed Planemo, a software development kit for tool and workflow developers and Galaxy power users. Here we outline Planemo{\textquoteright}s implementation and describe its broad range of functionality for designing, testing and executing Galaxy tools, workflows and training material. In addition, we discuss the philosophy underlying Galaxy tool and workflow development, and how Planemo encourages the use of development best practices, such as test-driven development, by its users, including those who are not professional software developers. Planemo is a mature project widely used within the Galaxy community which has been downloaded over 80,000 times.Competing Interest StatementThe authors have declared no competing interest.},
URL = {https://www.biorxiv.org/content/early/2022/03/14/2022.03.13.483965},
eprint = {https://www.biorxiv.org/content/early/2022/03/14/2022.03.13.483965.full.pdf},
journal = {bioRxiv}
}

@misc{datatables,
title = {{DataTables} {\textbar} {Table} plug-in for {jQuery}},
url = {https://datatables.net/},
urldate = {2023-11-28},
}

@misc{conda,
title = {Anaconda Software Distribution},
url = {https://anaconda.com},
urldate = {2016-11-01}
}

@article{edamBrowser, doi = {10.21105/joss.00698}, url = {https://doi.org/10.21105/joss.00698}, year = {2018}, publisher = {The Open Journal}, volume = {3}, number = {27}, pages = {698}, author = {Bryan Brancotte and Christophe Blanchet and Hervé Ménager}, title = {A reusable tree-based web-visualization to browse EDAM ontology, and contribute to it.}, journal = {Journal of Open Source Software} }

@misc{edamBrowserCode,
doi = {10.5281/zenodo.5808818},
url = {https://zenodo.org/doi/10.5281/zenodo.5808818},
author = {Eldakroury, Hager and Dhamija, Sakshi and Rathi, Rashika and Patoliya, Drashti and Nkwuda, Sunday Cletus and Singh, Guneet and Yadav, Pooja and D'oleo, Kelly and Cherop, Marlene and Che Nico, Tawah Peggy and Kalaš, Matúš and Ménager, Hervé and Brancotte, Bryan},
keywords = {Ontology browser, Biosciences, Imaging, Machine learning, Domain ontology, EDAM},
language = {en},
title = {{EDAM Browser 2.0.0: Browsing multiple versions of EDAM}},
publisher = {Zenodo},
year = {2021},
copyright = {MIT License}
}

@article{batut_community-driven_2018,
title = {Community-{Driven} {Data} {Analysis} {Training} for {Biology}},
volume = {6},
issn = {24054712},
url = {https://linkinghub.elsevier.com/retrieve/pii/S2405471218302308},
doi = {10.1016/j.cels.2018.05.012},
language = {en},
number = {6},
urldate = {2024-02-26},
journal = {Cell Systems},
author = {Batut, Bérénice and Hiltemann, Saskia and Bagnacani, Andrea and Baker, Dannon and Bhardwaj, Vivek and Blank, Clemens and Bretaudeau, Anthony and Brillet-Guéguen, Loraine and Čech, Martin and Chilton, John and Clements, Dave and Doppelt-Azeroual, Olivia and Erxleben, Anika and Freeberg, Mallory Ann and Gladman, Simon and Hoogstrate, Youri and Hotz, Hans-Rudolf and Houwaart, Torsten and Jagtap, Pratik and Larivière, Delphine and Le Corguillé, Gildas and Manke, Thomas and Mareuil, Fabien and Ramírez, Fidel and Ryan, Devon and Sigloch, Florian Christoph and Soranzo, Nicola and Wolff, Joachim and Videm, Pavankumar and Wolfien, Markus and Wubuli, Aisanjiang and Yusuf, Dilmurat and Taylor, James and Backofen, Rolf and Nekrutenko, Anton and Grüning, Björn},
month = jun,
year = {2018},
pages = {752--758.e1},
}

@misc{RSEc,
doi = {10.7490/f1000research.1119604.1},
url = {https://f1000research.com/slides/12-1044},
author = {Ienasescu, Hans and Capella-Gutiérrez, Salvador and Coppens, Frederik and Fernández, José María and Gaignard, Alban and Goble, Carole and Gr\"{u}ning, Bj\"{o}rn and Gustafsson, Johan and Gelpi, Josep Ll and Harrow, Jennifer and Manos, Steven and Miura, Kota and M\"{o}ller, Steffen and Owen, Stuart and Paul-Gilloteaux, Perrine and Peterson, Hedi and Pitoulias, Manthos and Tedds, Jonathan and Repchevsky, Dmitri and Zambelli, Federico and Zharkov, Oleg and Kala\v{s}, Mat\'{u}\v{s} and Ménager, Hervé},
title = {The ELIXIR research software ecosystem: an open software metadata commons (BOSC track) [version 1; not peer reviewed]},
publisher = {F1000 Research},
year = {2023}
}

@article{blankenberg2014dissemination,
title={{Dissemination of scientific software with Galaxy ToolShed}},
author={Blankenberg, Daniel and Von Kuster, Gregory and Bouvier, Emil and Baker, Dannon and Afgan, Enis and Stoler, Nicholas and Galaxy Team and Taylor, James and Nekrutenko, Anton},
journal={Genome Biology},
volume={15},
pages={1--3},
year={2014},
publisher={Springer}
}

@misc{dev_community_tool_table,
author = {Bérénice Batut},
title = {{Creation of an interactive Galaxy tools table for your community (Galaxy Training Materials)}},
year = {2024},
url = {https://training.galaxyproject.org/training-material/topics/dev/tutorials/community-tool-table/tutorial.html},
note = {Online; accessed Thu Mar 14 2024}
}

@article{Hiltemann_2023,
doi = {10.1371/journal.pcbi.1010752},
url = {https://doi.org/10.1371%2Fjournal.pcbi.1010752},
year = 2023,
month = {jan},
publisher = {Public Library of Science ({PLoS})},
volume = {19},
number = {1},
pages = {e1010752},
author = {Saskia Hiltemann and Helena Rasche and Simon Gladman and Hans-Rudolf Hotz and Delphine Larivi{\`{e}}re and Daniel Blankenberg and Pratik D. Jagtap and Thomas Wollmann and Anthony Bretaudeau and Nadia Gou{\'{e}} and Timothy J. Griffin and Coline Royaux and Yvan Le Bras and Subina Mehta and Anna Syme and Frederik Coppens and Bert Droesbeke and Nicola Soranzo and Wendi Bacon and Fotis Psomopoulos and Crist{\'{o}}bal Gallardo-Alba and John Davis and Melanie Christine Föll and Matthias Fahrner and Maria A. Doyle and Beatriz Serrano-Solano and Anne Claire Fouilloux and Peter van Heusden and Wolfgang Maier and Dave Clements and Florian Heyl and Björn Grüning and B{\'{e}}r{\'{e}}nice Batut and},
editor = {Francis Ouellette},
title = {{Galaxy Training: A powerful framework for teaching!}},
journal = {PLoS Computational Biology}
}

@misc{dev_tool_annotation,
author = {Bérénice Batut and Johan Gustafsson and Paul Zierep},
title = {{Adding and updating best practice metadata for Galaxy tools using the bio.tools registry (Galaxy Training Materials)}},
year = {2024},
url = {https://training.galaxyproject.org/training-material/topics/dev/tutorials/tool-annotation/tutorial.html},
note = {Online; accessed Thu Mar 14 2024}
}
104 changes: 104 additions & 0 deletions docs/publications/biohackathon_2024/paper.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
---
title: 'Galaxy CoDex - Ensuring Galaxy community sustainability through resource aggregation and annotation'
title_short: 'BH24EU project 11: Galaxy CoDex'
tags:
- Findability
- Galaxy
- Community-specific Galaxy tools
- Tools
- EDAM
- bio.tools
- Metadata
- biohackeu24
authors:
- name: Bérénice Batut
orcid: 0000-0001-9852-1987
affiliation: 1, 2, a
- name: Wendi Bacon
orcid: 0000-0002-8170-8806
affiliation: 3, a
- name: Paul Zierep
orcid: 0000-0003-2982-388X
affiliation: 1, a
- name: Matúš Kalaš
orcid: 0000-0002-1509-4981
affiliation: 4
- name: Wai Cheng Thang
orchid: 0000-0002-1480-3563
affiliation: 5, 6
- name: Ove Johan Ragnar Gustafsson
orcid: 0000-0002-2977-5032
affiliation: 7
affiliations:
- name: Bioinformatics Group, Department of Computer Science, University of Freiburg, Freiburg, Germany
index: 1
- name: Institut Français de Bioinformatique, CNRS UAR 3601, Évry, France & Mésocentre Clermont-Auvergne, Université Clermont Auvergne, Aubiere, France
index: 2
- name: The Open University, Milton Keynes, United Kingdom
index: 3
- name: Department of Informatics, University of Bergen, Norway; and ELIXIR Norway
index: 4
- name: Queensland Cyber Infrastructure Foundation (QCIF), Australia
index: 5
- name: Institute of Molecular Bioscience, University of Queensland, St Lucia, Australia
index: 6
- name: Australian BioCommons, University of Melbourne, Melbourne, Victoria, Australia
index: 7
- name: These authors contributed equally to this work
index: a
date: 8 November 2024
bibliography: paper.bib
event: BH24EU
biohackathon_name: "ELIXIR BioHackathon Europe 2024"
biohackathon_url: "https://biohackathon-europe.org/"
biohackathon_location: "Barcelona, Spain, 2024"
group: Project 11 - Galaxy CoDex - Ensuring Galaxy community sustainability through resource aggregation and annotation
git_url:
authors_short: Bérénice Batut, Wendi Bacon, \emph{et al.}
---


# Introduction

Galaxy hosts a vast array of tools, tutorials, and workflows, with the exact number of workflows remaining uncertain. To address the challenge of enhancing tool visibility within this expansive ecosystem, a pipeline called the Galaxy Tool Metadata Extractor was created during the BioHackathon Europe 2023. This pipeline aggregates Galaxy tool suites from various sources, automatically extracts metadata such as bio.tools identifiers and EDAM ontology, and presents the information in an interactive table. Users can filter this table to find tools relevant to their research community. Throughout development, it was noted that many tools lack EDAM annotations. Efforts by the microbial community during both BioHackathon 2023, and a subsequent community-hosted online hackathon in 2024, have improved EDAM annotations for over 200 tools. However, Galaxy communities also offer training materials and workflows, which, like software, may be scattered across different platforms and lack EDAM annotations.

Building upon the achievements of BioHackathon Europe 2023, this new initiative seeks to expand the capabilities of the existing Galaxy tool list table by introducing the Galaxy Communities Dock (**Galaxy CoDex**). Galaxy CoDex will involve enhancing and implementing webpage templates and files that enable domain communities to efficiently gather, organize, integrate, and deploy pertinent tools, workflows, and training materials across various Galaxy servers. Concurrently, best practices for resource annotation will be developed and integrated into different levels of the Galaxy ecosystem.

In essence, the growth of Galaxy Communities necessitates the adoption of sustainable practices to ensure their continued advancement.

This project aims to achieve three main objectives:

1. **Establishing the infrastructure for Galaxy CoDex** to enhance the discoverability of tools, workflows, and training materials within the Galaxy ecosystem,
2. **Ensuring the sustainability of Galaxy CoDex** by implementing comprehensive resource annotations for communities (e.g. microGalaxy, single-cells), and
3. **Establishing ongoing resource annotation best practices within the Galaxy ecosystem.**


# Methods


## CoDex


## Community curation


## Website




# Outcomes and results


# Conclusion and outlook


# Acknowledgements

This work was developed as part of BioHackathon Europe 2024.
This work was supported by [ELIXIR](https://elixir-europe.org), the research infrastructure for life science data.
This work was supported by the Australian BioCommons which is enabled by NCRIS via Bioplatforms Australia funding.


# References