diff --git a/communities/microgalaxy/lab/sections/4_tools.yml b/communities/microgalaxy/lab/sections/4_tools.yml
index 3318efec..f7036faf 100644
--- a/communities/microgalaxy/lab/sections/4_tools.yml
+++ b/communities/microgalaxy/lab/sections/4_tools.yml
@@ -1,744 +1,900 @@
id: tools
-title: Community Tools
+title: Community curated tools
tabs:
-- id: tool_list
- title: List of community curated tools available for microGalaxy
+- id: more_tools
+ title: More tools !
+ heading_md: Request a new tools or look at the complete list
+ content:
+ - title_md: Request a new tool
+ description_md: 'You can request a new tool by contacting the microGalaxy community:
+ microgalaxy@lists.galaxyproject.org'
+ - title_md: See the complete tool list
+ description_md: Our comprehensive curated tool list is available via https://galaxyproject.github.io/galaxy_codex/microgalaxy
+- id: antimicrobial_resistance_prediction
+ title: Antimicrobial resistance prediction
+ heading_md: 'Top 10 for the EDAM operation: Antimicrobial resistance prediction'
content:
- - title_md: PAMPA
- description_md: Tools to compute and analyse biodiversity metrics
- - title_md: TreeBest
- description_md: TreeBeST best
- - title_md: abacas
- description_md: Order and Orientate Contigs
- title_md: abricate
- description_md: Mass screening of contigs for antiobiotic resistance genes
+ description_md: |-
+ Mass screening of contigs for antiobiotic resistance genes
+ (Tool usage: 4319404)
+
+ - title_md: hamronization
+ description_md: |-
+ Convert AMR gene detection tool output to hAMRonization specification format.
+ (Tool usage: 21969)
+
+ - title_md: pharokka
+ description_md: |-
+ rapid standardised annotation tool for bacteriophage genomes and metagenomes
+ (Tool usage: 17313)
+
+ - title_md: seqsero2
+ description_md: |-
+ Salmonella serotype prediction from genome sequencing data
+ (Tool usage: 12546)
+
+ - title_md: fargene
+ description_md: |-
+ fARGene (Fragmented Antibiotic Resistance Gene iENntifiEr )
+ (Tool usage: 5298)
+
- title_md: abritamr
- description_md: A pipeline for running AMRfinderPlus and collating results into
- functional classes
- - title_md: abyss
- description_md: Assembly By Short Sequences - a de novo, parallel, paired-end
- sequence assembler
- - title_md: aldex2
- description_md: Performs analysis Of differential abundance taking sample variation
- into account
- - title_md: amplican
- description_md: AmpliCan is an analysis tool for genome editing.
- - title_md: amrfinderplus
- description_md: '"AMRFinderPlus is designed to find acquired antimicrobial resistance
- genes and point mutations in protein and/or assembled nucleotide sequences.It
- can also search "plus", stress, heat, and biocide resistance and virulence factors
- for some organisms.'
+ description_md: |-
+ A pipeline for running AMRfinderPlus and collating results into functional classes
+ (Tool usage: 3417)
+
+ - title_md: sonneityping
+ description_md: |-
+ Scripts for parsing Mykrobe predict results for Shigella sonnei.
+ (Tool usage: 3)
+
+ - title_md: mykrobe
+ description_md: |-
+ Antibiotic resistance predictions
+ (Tool usage: 0)
+
+- id: dna_barcoding
+ title: DNA barcoding
+ heading_md: 'Top 10 for the EDAM operation: DNA barcoding'
+ content:
+ - title_md: mothur
+ description_md: |-
+ Mothur wrappers
+ (Tool usage: 3250957)
+
+ - title_md: dada2
+ description_md: |-
+ DADA2 wrappers
+ (Tool usage: 851682)
+
+ - title_md: picrust
+ description_md: |-
+ PICRUSt wrappers
+ (Tool usage: 9180)
+
+ - title_md: lotus2
+ description_md: |-
+ LotuS2 OTU processing pipeline
+ (Tool usage: 3768)
+
- title_md: ancombc
- description_md: Performs analysis of compositions of microbiomes with bias correction.
- - title_md: antismash
- description_md: Antismash allows the genome-wide identification, annotation and
- analysis of secondary metabolite biosynthesis gene clusters
- - title_md: artic
- description_md: 'The artic pipeline is designed to help run the artic bioinformatics
- protocols;for example the nCoV-2019 novel coronavirus protocol.Features include:
- read filtering, primer trimming, amplicon coverage normalisation,variant calling
- and consensus building'
- - title_md: assemblystats
- description_md: Summarise an assembly (e.g. N50 metrics)
+ description_md: |-
+ Performs analysis of compositions of microbiomes with bias correction.
+ (Tool usage: 78)
+
+- id: genome_annotation
+ title: Genome annotation
+ heading_md: 'Top 10 for the EDAM operation: Genome annotation'
+ content:
+ - title_md: prokka
+ description_md: |-
+ Rapid annotation of prokaryotic genomes
+ (Tool usage: 2767395)
+
+ - title_md: mitos
+ description_md: |-
+ de-novo annotation of metazoan mitochondrial genomes
+ (Tool usage: 733392)
+
+ - title_md: eggnog_mapper
+ description_md: |-
+ eggnog-mapper fast functional annotation of novel sequences
+ (Tool usage: 166508)
+
- title_md: bakta
- description_md: '"Bakta is a tool for the rapid & standardized annotation of bacterial
- genomes and plasmids from both isolates and MAGs.It provides dbxref-rich and
- sORF-including annotations in machine-readable JSON & bioinformatics standard
- file formats for automatic downstream analysis."'
- - title_md: bamtools
- description_md: Operate on and transform BAM datasets in various ways using bamtools
- - title_md: bandage
- description_md: Bandage - A Bioinformatics Application for Navigating De novo
- Assembly Graphs Easily
- - title_md: bayescan
- description_md: Detecting natural selection from population-based genetic data
- - title_md: bbtools
- description_md: BBTools is a suite of fast, multithreaded bioinformatics tools
- designed for analysis of DNA and RNA sequence data.BBTools can handle common
- sequencing file formats such as fastq, fasta, sam, scarf, fasta+qual, compressed
- or raw,with autodetection of quality encoding and interleaving. It is written
- in Java and works on any platform supportingJava, including Linux, MacOS, and
- Microsoft Windows and Linux; there are no dependencies other than Java (version7
- or higher). Program descriptions and options are shown when running the shell
- scripts with no parameters.
- - title_md: bigscape
- description_md: Construct sequence similarity networks of BGCs and groups them
- into GCF
- - title_md: binning_refiner
- description_md: Reconciles the outputs of different binning programs with the
- aim to improve the quality of genome bins,especially with respect to contamination
- levels.
- - title_md: biohansel
- description_md: Heidelberg and Enteritidis SNP Elucidation
- - title_md: biom_format
- description_md: The biom-format package provides a command line interface and
- Python API for working with BIOM files.
- - title_md: biotradis
- description_md: Bio-Tradis is a tool suite dedicated to essentiality analyses
- with TraDis data.
- - title_md: blast2go
- description_md: Maps BLAST results to GO annotation terms
- - title_md: blast_rbh
- description_md: BLAST Reciprocal Best Hits (RBH) from two FASTA files
- - title_md: blastxml_to_top_descr
- description_md: Make table of top BLAST match descriptions
- - title_md: bracken
- description_md: Bayesian Reestimation of Abundance with KrakEN
+ description_md: |-
+ "Bakta is a tool for the rapid & standardized annotation of bacterial genomes and plasmids from both isolates and MAGs.It provides dbxref-rich and sORF-including annotations in machine-readable JSON & bioinformatics standard file formats for automatic downstream analysis."
+ (Tool usage: 62211)
+
+ - title_md: metabat2
+ description_md: |-
+ MetaBAT2 (Metagenome Binning based on Abundance and Tetranucleotide frequency) is an automated metagenome binningsoftware that integrates empirical probabilistic distances of genome abundance and tetranucleotide frequency.
+ (Tool usage: 24330)
+
+ - title_md: prodigal
+ description_md: |-
+ A protein-coding gene prediction software tool for bacterial and archaeal genomes
+ (Tool usage: 5706)
+
+ - title_md: make_nr
+ description_md: |-
+ Make a FASTA file non-redundant
+ (Tool usage: 0)
+
+- id: genome_assembly
+ title: Genome assembly
+ heading_md: 'Top 10 for the EDAM operation: Genome assembly'
+ content:
+ - title_md: spades
+ description_md: |-
+ SPAdes is an assembly toolkit containing various assembly pipelines. It implements the following 4 stages: assembly graph construction, k-bimer adjustment, construction of paired assembly graph and contig construction.
+ (Tool usage: 783637)
+
- title_md: busco
- description_md: BUSCO assess genome and annotation completeness
- - title_md: cat
- description_md: Contig Annotation Tool (CAT)
- - title_md: cd_hit_dup
- description_md: simple tool for removing duplicates from sequencing reads
- - title_md: cdhit
- description_md: Cluster or compare biological sequence datasets
- - title_md: cemitool
- description_md: Gene co-expression network analysis tool
- - title_md: checkm
- description_md: Assess the quality of microbial genomes recovered from isolates,
- single cells, and metagenomes
- - title_md: clair3
- description_md: Symphonizing pileup and full-alignment for high-performance long-read
- variant calling
- - title_md: clinod
- description_md: 'NoD: a Nucleolar localization sequence detector for eukaryotic
- and viral proteins'
- - title_md: clustalw
- description_md: ClustalW multiple sequence alignment program for DNA or proteins
- - title_md: cmsearch_deoverlap
- description_md: removes lower scoring overlaps from cmsearch results.
- - title_md: codeml
- description_md: Detects positive selection
- - title_md: cojac
- description_md: co-occurrence of mutations on amplicons
- - title_md: combine_assembly_stats
- description_md: Combine multiple Assemblystats datasets into a single tabular
- report
- - title_md: combine_metaphlan_humann
- description_md: Combine MetaPhlAn2 and HUMAnN2 outputs to relate genus/species
- abundances and gene families/pathways abundances
- - title_md: compare_humann2_output
- description_md: Compare outputs of HUMAnN2 for several samples and extract similar
- and specific information
- - title_md: compleasm
- description_md: 'Compleasm: a faster and more accurate reimplementation of BUSCO'
+ description_md: |-
+ BUSCO assess genome and annotation completeness
+ (Tool usage: 511991)
+
+ - title_md: unicycler
+ description_md: |-
+ Unicycler is a hybrid assembly pipeline for bacterial genomes.
+ (Tool usage: 505309)
+
+ - title_md: shovill
+ description_md: |-
+ Faster de novo assembly pipeline based around Spades
+ (Tool usage: 322407)
+
+ - title_md: flye
+ description_md: |-
+ Assembly of long and error-prone reads.
+ (Tool usage: 185534)
+
+ - title_md: racon
+ description_md: |-
+ Consensus module for raw de novo DNA assembly of long uncorrected reads.
+ (Tool usage: 125164)
+
+ - title_md: megahit
+ description_md: |-
+ An ultra-fast single-node solution for large and complex metagenomics assembly via succinct de Bruijn graph.
+ (Tool usage: 107253)
+
+ - title_md: roary
+ description_md: |-
+ Roary the pangenome pipeline
+ (Tool usage: 97942)
+
+ - title_md: trycycler
+ description_md: |-
+ Trycycler toolkit wrappers
+ (Tool usage: 57507)
+
+ - title_md: plasmidfinder
+ description_md: |-
+ "PlasmidFinder provides the detection of replicons in the WGSand assigns the plasmids under study to lineages that trace backthe information to the existing knowledge on Inc groups and suggestspossible reference plasmids for each lineage"
+ (Tool usage: 44259)
+
+- id: sequence_alignment
+ title: Sequence alignment
+ heading_md: 'Top 10 for the EDAM operation: Sequence alignment'
+ content:
+ - title_md: emboss_5
+ description_md: |-
+ Galaxy wrappers for EMBOSS version 5.0.0 tools
+ (Tool usage: 1113305)
+
+ - title_md: blastxml_to_top_descr
+ description_md: |-
+ Make table of top BLAST match descriptions
+ (Tool usage: 794001)
+
+ - title_md: bbtools
+ description_md: |-
+ BBTools is a suite of fast, multithreaded bioinformatics tools designed for analysis of DNA and RNA sequence data.BBTools can handle common sequencing file formats such as fastq, fasta, sam, scarf, fasta+qual, compressed or raw,with autodetection of quality encoding and interleaving. It is written in Java and works on any platform supportingJava, including Linux, MacOS, and Microsoft Windows and Linux; there are no dependencies other than Java (version7 or higher). Program descriptions and options are shown when running the shell scripts with no parameters.
+ (Tool usage: 56862)
+
+ - title_md: smallgenomeutilities
+ description_md: |-
+ Set of utilities for manipulating small viral genome data.
+ (Tool usage: 474)
+
+- id: sequence_clustering
+ title: Sequence clustering
+ heading_md: 'Top 10 for the EDAM operation: Sequence clustering'
+ content:
+ - title_md: antismash
+ description_md: |-
+ Antismash allows the genome-wide identification, annotation and analysis of secondary metabolite biosynthesis gene clusters
+ (Tool usage: 81856)
+
+ - title_md: proteinortho
+ description_md: |-
+ Proteinortho is a tool to detect orthologous proteins/genes within different species.
+ (Tool usage: 11886)
+
- title_md: concoct
- description_md: CONCOCT (Clustering cONtigs with COverage and ComposiTion) does
- unsupervised binning of metagenomic contigs byusing nucleotide composition -
- kmer frequencies - and coverage data for multiple samples. CONCOCT can accurately(up
- to species level) bin metagenomic contigs.
- - title_md: coverm
- description_md: CoverM genome and contig wrappers
- - title_md: cryptogenotyper
- description_md: CryptoGenotyper is a standalone tool to *in-silico* determine
- species and subtype based on SSU rRNA and gp60 markers.
+ description_md: |-
+ CONCOCT (Clustering cONtigs with COverage and ComposiTion) does unsupervised binning of metagenomic contigs byusing nucleotide composition - kmer frequencies - and coverage data for multiple samples. CONCOCT can accurately(up to species level) bin metagenomic contigs.
+ (Tool usage: 6315)
+
+ - title_md: binning_refiner
+ description_md: |-
+ Reconciles the outputs of different binning programs with the aim to improve the quality of genome bins,especially with respect to contamination levels.
+ (Tool usage: 378)
+
+- id: sequence_trimming
+ title: Sequence trimming
+ heading_md: 'Top 10 for the EDAM operation: Sequence trimming'
+ content:
- title_md: cutadapt
- description_md: Flexible tool to remove adapter sequences (and quality trim) high
- throughput sequencing reads (fasta/fastq).
- - title_md: dada2
- description_md: DADA2 wrappers
- - title_md: das_tool
- description_md: DAS Tool for genome resolved metagenomics
- - title_md: deseq2
- description_md: Differential gene expression analysis based on the negative binomial
- distribution
- - title_md: diamond
- description_md: DIAMOND is a new alignment tool for aligning short DNA sequencing
- reads to a protein reference database such as NCBI-NR.
- - title_md: disco
- description_md: DISCO is a overlap-layout-consensus (OLC) metagenome assembler
- - title_md: dram
- description_md: DRAM for distilling microbial metabolism to automate the curation
- of microbiome function
- - title_md: drep
- description_md: dRep compares and dereplicates genome sets
- - title_md: ectyper
- description_md: EC-Typer - in silico serotyping of Escherichia coli species
- - title_md: effectiveT3
- description_md: Find bacterial type III effectors in protein sequences
- - title_md: eggnog_mapper
- description_md: eggnog-mapper fast functional annotation of novel sequences
- - title_md: emboss_5
- description_md: Galaxy wrappers for EMBOSS version 5.0.0 tools
- - title_md: ete
- description_md: Analyse phylogenetic trees using the ETE Toolkit
- - title_md: export2graphlan
- description_md: export2graphlan is a conversion software tool for producing both
- annotation and tree file for GraPhlAn
- - title_md: ez_histograms
- description_md: ggplot2 histograms and density plots
- - title_md: fargene
- description_md: fARGene (Fragmented Antibiotic Resistance Gene iENntifiEr )
- - title_md: fastani
- description_md: Fast alignment-free computation of whole-genome Average Nucleotide
- Identity
- - title_md: fastk
- description_md: 'FastK: A K-mer counter (for HQ assembly data sets)'
+ description_md: |-
+ Flexible tool to remove adapter sequences (and quality trim) high throughput sequencing reads (fasta/fastq).
+ (Tool usage: 2018074)
+
+ - title_md: trim_galore
+ description_md: |-
+ Trim Galore adaptive quality and adapter trimmer
+ (Tool usage: 1585275)
+
+ - title_md: graphmap
+ description_md: |-
+ Mapper for long, error-prone reads.
+ (Tool usage: 21368)
+
+ - title_md: seqkit
+ description_md: |-
+ A cross-platform and ultrafast toolkit for FASTA/Q file manipulation
+ (Tool usage: 10824)
+
+ - title_md: ncbi_fcs_gx
+ description_md: |-
+ FCS-GX detects contamination from foreign organisms in genome sequences using the genome cross-species aligner (GX).
+ (Tool usage: 7842)
+
+- id: sequencing_quality_control
+ title: Sequencing quality control
+ heading_md: 'Top 10 for the EDAM operation: Sequencing quality control'
+ content:
- title_md: fastp
- description_md: Fast all-in-one preprocessing for FASTQ files
- - title_md: fastqe
- description_md: FASTQE
- - title_md: fasttree
- description_md: FastTree infers approximately-maximum-likelihood phylogenetic
- trees from alignments of nucleotide or protein sequences - GVL
- - title_md: featurecounts
- description_md: featureCounts counts the number of reads aligned to defined masked
- regions in a reference genome
- - title_md: filter_spades_repeats
- description_md: Remove short and repeat contigs/scaffolds
+ description_md: |-
+ Fast all-in-one preprocessing for FASTQ files
+ (Tool usage: 4875446)
+
+ - title_md: qualimap
+ description_md: |-
+ Qualimap 2 is a platform-independent application written in Java andR that facilitates the quality control of alignment sequencing data and itsderivatives like feature counts.
+ (Tool usage: 2413023)
+
+ - title_md: multiqc
+ description_md: |-
+ MultiQC aggregates results from bioinformatics analyses across many samples into a single report
+ (Tool usage: 1190347)
+
- title_md: filtlong
- description_md: Filtlong - Filtering long reads by quality
- - title_md: flashlfq
- description_md: FlashLFQ mass-spectrometry proteomics label-free quantification
- - title_md: flye
- description_md: Assembly of long and error-prone reads.
- - title_md: format_metaphlan2_output
- description_md: Format MetaPhlAn2 output to extract abundance at different taxonomic
- levels
- - title_md: fraggenescan
- description_md: Tool for finding (fragmented) genes in short read
- - title_md: freyja
- description_md: lineage abundances estimation
- - title_md: frogs
- description_md: Suite for metabarcoding analysis
- - title_md: funannotate
- description_md: Funannotate is a genome prediction, annotation, and comparison
- software package.
- - title_md: getmlst
- description_md: Download MLST datasets by species from pubmlst.org
- - title_md: ggplot2
- description_md: ggplot2 is a system for declaratively creating graphics, based
- on The Grammar of Graphics.You provide the data, tell ggplot2 how to map variables
- to aesthetics, what graphical primitives to use,and it takes care of the details.
- - title_md: gi2taxonomy
- description_md: Fetch taxonomic representation
- - title_md: glimmer
- description_md: Glimmer makes gene predictions.
- - title_md: glimmer_hmm
- description_md: GlimmerHMM is a new gene finder based on a Generalized Hidden
- Markov Model (GHMM)
- - title_md: goenrichment
- description_md: Performs GO Enrichment analysis.
- - title_md: goseq
- description_md: goseq does selection-unbiased testing for category enrichment
- amongst differentially expressed (DE) genes for RNA-seq data
+ description_md: |-
+ Filtlong - Filtering long reads by quality
+ (Tool usage: 268165)
+
+ - title_md: pycoqc
+ description_md: |-
+ QC metrics for ONT Basecalling
+ (Tool usage: 77031)
+
+ - title_md: fastqe
+ description_md: |-
+ FASTQE
+ (Tool usage: 53844)
+
+ - title_md: checkm
+ description_md: |-
+ Assess the quality of microbial genomes recovered from isolates, single cells, and metagenomes
+ (Tool usage: 22500)
+
+ - title_md: lighter
+ description_md: |-
+ Lighter is a kmer-based error correction method for whole genome sequencing data
+ (Tool usage: 469)
+
+ - title_md: rasusa
+ description_md: |-
+ Randomly subsample sequencing reads to a specified coverage
+ (Tool usage: 126)
+
+- id: taxonomic_classification
+ title: Taxonomic classification
+ heading_md: 'Top 10 for the EDAM operation: Taxonomic classification'
+ content:
+ - title_md: kraken2
+ description_md: |-
+ Kraken2 for taxonomic designation.
+ (Tool usage: 1280462)
+
+ - title_md: kraken
+ description_md: |-
+ Kraken is a system for assigning taxonomic labels to short DNAsequences, usually obtained through metagenomic studies. Previous attempts by otherbioinformatics software to accomplish this task have often used sequence alignmentor machine learning techniques that were quite slow, leading to the developmentof less sensitive but much faster abundance estimation programs. Kraken aims toachieve high sensitivity and high speed by utilizing exact alignments of k-mersand a novel classification algorithm.
+ (Tool usage: 483902)
+
+ - title_md: humann
+ description_md: |-
+ HUMAnN for functionally profiling metagenomes and metatranscriptomes at species-level resolution
+ (Tool usage: 92286)
+
- title_md: graphlan
- description_md: GraPhlAn is a software tool for producing high-quality circular
- representations of taxonomic and phylogenetic trees
- - title_md: graphmap
- description_md: Mapper for long, error-prone reads.
+ description_md: |-
+ GraPhlAn is a software tool for producing high-quality circular representations of taxonomic and phylogenetic trees
+ (Tool usage: 37690)
+
+ - title_md: megan
+ description_md: |-
+ MEGAN Community Edition - Interactive exploration and analysis of large-scale microbiome sequencing data. MEGAN is a tool for studying the taxonomic content of a set of DNA reads, typically collected in a metagenomics project.In a preprocessing step, a sequence alignment of all reads against a suitable database of reference DNA or proteinsequences must be performed to produce an input file for the program. MEGAN is suitable for DNA reads (metagenomedata), RNA reads (metatranscriptome data), peptide sequences (metaproteomics data) and, using a suitable synonymsfile that maps SILVA ids to taxon ids, for 16S rRNA data (amplicon sequencing).
+ (Tool usage: 12390)
+
- title_md: gtdbtk
- description_md: 'GTDB-Tk is a software tool kit for assigning objective taxonomic
- classifications to bacterial and archaeal genomesbased on the Genome Database
- Taxonomy GTDB. It is designed to work with recent advances that allow hundreds
- orthousands of metagenome-assembled genomes (MAGs) to be obtained directly from
- environmental samples. It can alsobe applied to isolate and single-cell genomes. '
- - title_md: gubbins
- description_md: Gubbins - bacterial recombination detection
- - title_md: hamronization
- description_md: Convert AMR gene detection tool output to hAMRonization specification
- format.
- - title_md: hansel
- description_md: Heidelberg and Enteritidis SNP Elucidation
- - title_md: hifiasm_meta
- description_md: A hifiasm fork for metagenome assembly using Hifi reads.
- - title_md: hivtrace
- description_md: An application that identifies potential transmission clusters
- within a supplied FASTA file with an option to find potential links against
- the Los Alamos HIV Sequence Database.
- - title_md: hmmer3
- description_md: HMMER is used for searching sequence databases for homologs of
- proteinsequences, and for making protein sequence alignments. It implementsmethods
- using probabilistic models called profile hidden Markov models(profile HMMs).
- - title_md: humann
- description_md: HUMAnN for functionally profiling metagenomes and metatranscriptomes
- at species-level resolution
- - title_md: hyphy
- description_md: Hypothesis Testing using Phylogenies
- - title_md: hypo
- description_md: Super Fast & Accurate Polisher for Long Read Genome Assemblies
- - title_md: icescreen
- description_md: ICEscreen identifies Integrative Conjugative Elements (ICEs) and
- Integrative Mobilizable Elements (IMEs) in Bacillota genomes.
- - title_md: idba_ud
- description_md: Wrappers for the idba assembler variants.
- - title_md: infernal
- description_md: Infernal ("INFERence of RNA ALignment") is for searching DNA sequence
- databases for RNA structure and sequence similarities.
- - title_md: instrain
- description_md: InStrain is a tool for analysis of co-occurring genome populations
- from metagenomes
- - title_md: integron_finder
- description_md: '"IntegronFinder identify integrons with high accuracy and sensitivity.It
- searches for attC sites using covariance models, for integron-integrases using
- HMM profiles, and for other features (promoters, attI site) using pattern matching"'
- - title_md: interproscan
- description_md: Interproscan queries the interpro database and provides annotations.
- - title_md: iprscan5
- description_md: Interproscan queries the interpro database and provides annotations.
- - title_md: iqtree
- description_md: Efficient phylogenomic software by maximum likelihood
- - title_md: isescan
- description_md: '"ISEScan is a pipeline to identify IS (Insertion Sequence) elements
- in genome and metagenomebased on profile hidden Markov models constructed from
- manually curated IS elements."'
- - title_md: itsx
- description_md: ITSx is an open source software utility to extract the highly
- variable ITS1 and ITS2 subregions from ITS sequences.
- - title_md: ivar
- description_md: iVar is a computational package that contains functions broadly
- useful for viral amplicon-based sequencing
- - title_md: jbrowse
- description_md: JBrowse Genome Browser integrated as a Galaxy Tool
- - title_md: jellyfish
- description_md: Jellyfish is a tool for fast, memory-efficient counting of k-mers
- in DNA
- - title_md: kat_filter
- description_md: Filtering kmers or reads from a database of kmers hashes
- - title_md: kc-align
- description_md: Kc-Align custom tool
- - title_md: khmer
- description_md: In-memory nucleotide sequence k-mer counting, filtering, graph
- traversal and more
- - title_md: kleborate
- description_md: Screen genome assemblies of Klebsiella pneumoniae and the Klebsiella
- pneumoniae species complex (KpSC)
- - title_md: kofamscan
- description_md: Gene function annotation tool based on KEGG Orthology and hidden
- Markov model
- - title_md: kraken
- description_md: Kraken is a system for assigning taxonomic labels to short DNAsequences,
- usually obtained through metagenomic studies. Previous attempts by otherbioinformatics
- software to accomplish this task have often used sequence alignmentor machine
- learning techniques that were quite slow, leading to the developmentof less
- sensitive but much faster abundance estimation programs. Kraken aims toachieve
- high sensitivity and high speed by utilizing exact alignments of k-mersand a
- novel classification algorithm.
- - title_md: kraken2
- description_md: Kraken2 for taxonomic designation.
- - title_md: kraken2tax
- description_md: Convert Kraken output to Galaxy taxonomy data.
- - title_md: kraken_biom
- description_md: Create BIOM-format tables (http://biom-format.org) from Kraken
- output (http://ccb.jhu.edu/software/kraken/)
- - title_md: kraken_taxonomy_report
- description_md: Kraken taxonomy report
+ description_md: |-
+ GTDB-Tk is a software tool kit for assigning objective taxonomic classifications to bacterial and archaeal genomesbased on the Genome Database Taxonomy GTDB. It is designed to work with recent advances that allow hundreds orthousands of metagenome-assembled genomes (MAGs) to be obtained directly from environmental samples. It can alsobe applied to isolate and single-cell genomes.
+ (Tool usage: 3192)
+
+ - title_md: recentrifuge
+ description_md: |-
+ "With Recentrifuge, researchers can analyze results from taxonomic classifiers using interactive charts with emphasis on the confidence level of the classifications.In addition to contamination-subtracted samples.Recentrifuge provides shared and exclusive taxa per sample,thus enabling robust contamination removal and comparative analysis in environmental and clinical metagenomics."
+ (Tool usage: 1530)
+
+ - title_md: frogs
+ description_md: |-
+ Suite for metabarcoding analysis
+ (Tool usage: 0)
+
+- id: visualisation
+ title: Visualisation
+ heading_md: 'Top 10 for the EDAM operation: Visualisation'
+ content:
+ - title_md: quast
+ description_md: |-
+ Quast (Quality ASsessment Tool) evaluates genome assemblies.
+ (Tool usage: 463815)
+
+ - title_md: taxonomy_krona_chart
+ description_md: |-
+ Krona pie chart from taxonomic profile
+ (Tool usage: 214239)
+
- title_md: krakentools
- description_md: KrakenTools is a suite of scripts to be used alongside the Kraken
- - title_md: krocus
- description_md: Predict MLST directly from uncorrected long reads
- - title_md: lca_wrapper
- description_md: Find lowest diagnostic rank
- - title_md: legsta
- description_md: Performs in silico Legionella pneumophila sequence based typing.
- - title_md: lighter
- description_md: Lighter is a kmer-based error correction method for whole genome
- sequencing data
- - title_md: limma_voom
- description_md: Perform RNA-Seq differential expression analysis using limma voom
- pipeline
- - title_md: lineagespot
- description_md: Identification of SARS-CoV-2 related metagenomic mutations based
- on a single (or a list of) variant(s) file(s)
- - title_md: lorikeet
- description_md: Tools for M. tuberculosis DNA fingerprinting (spoligotyping)
- - title_md: lotus2
- description_md: LotuS2 OTU processing pipeline
- - title_md: m6anet
- description_md: m6anet to detect m6A RNA modifications from nanopore data
- - title_md: maaslin2
- description_md: MaAsLin2 is comprehensive R package for efficiently determining
- multivariable association between microbial meta'omic features and clinical
- metadata.
- - title_md: mafft
- description_md: Multiple alignment program for amino acid or nucleotide sequences
- - title_md: make_nr
- description_md: Make a FASTA file non-redundant
- - title_md: maker
- description_md: MAKER is a portable and easily configurable genome annotation
- pipeline.Its purpose is to allow smaller eukaryotic and prokaryotic genome projects
- to independently annotate their genomes and to create genome databases.
- - title_md: mapseq
- description_md: fast and accurate sequence read classification tool designed to
- assign taxonomy and OTU classifications to ribosomal RNA sequences.
- - title_md: mash
- description_md: Fast genome and metagenome distance estimation using MinHash
- - title_md: maxbin2
- description_md: clusters metagenomic contigs into bins
+ description_md: |-
+ KrakenTools is a suite of scripts to be used alongside the Kraken
+ (Tool usage: 185016)
+
+ - title_md: pygenometracks
+ description_md: |-
+ pyGenomeTracks: Standalone program and library to plot beautiful genome browser tracks.
+ (Tool usage: 72938)
+
- title_md: maxquant
- description_md: wrapper for MaxQuant
- - title_md: mcl
- description_md: The Markov Cluster Algorithm, a cluster algorithm for graphs
- - title_md: medaka
- description_md: Sequence correction provided by ONT Research
- - title_md: megahit
- description_md: An ultra-fast single-node solution for large and complex metagenomics
- assembly via succinct de Bruijn graph.
- - title_md: megahit_contig2fastg
- description_md: A subprogram within the Megahit toolkit for converting contigs
- to assembly graphs (fastg)
- - title_md: megan
- description_md: MEGAN Community Edition - Interactive exploration and analysis
- of large-scale microbiome sequencing data. MEGAN is a tool for studying the
- taxonomic content of a set of DNA reads, typically collected in a metagenomics
- project.In a preprocessing step, a sequence alignment of all reads against a
- suitable database of reference DNA or proteinsequences must be performed to
- produce an input file for the program. MEGAN is suitable for DNA reads (metagenomedata),
- RNA reads (metatranscriptome data), peptide sequences (metaproteomics data)
- and, using a suitable synonymsfile that maps SILVA ids to taxon ids, for 16S
- rRNA data (amplicon sequencing).
- - title_md: meningotype
- description_md: Assign sequence type to N. meningitidis genome assemblies
- - title_md: merqury
- description_md: Merqury is a tool for evaluating genomes assemblies based of k-mer
- operations.
- - title_md: meryl
- description_md: Meryl a k-mer counter.
- - title_md: metabat2
- description_md: MetaBAT2 (Metagenome Binning based on Abundance and Tetranucleotide
- frequency) is an automated metagenome binningsoftware that integrates empirical
- probabilistic distances of genome abundance and tetranucleotide frequency.
- - title_md: metaeuk
- description_md: 'MetaEuk is a modular toolkit designed for large-scale gene discovery
- andannotation in eukaryotic metagenomic contigs. Metaeuk combines the fast andsensitive
- homology search capabilities of MMseqs2 with a dynamic programmingprocedure
- to recover optimal exons sets. It reduces redundancies in multiplediscoveries
- of the same gene and resolves conflicting gene predictions onthe same strand. '
- - title_md: metagene_annotator
- description_md: MetaGeneAnnotator gene-finding program for prokaryote and phage
- - title_md: metagenomeseq
- description_md: metagenomeSeq Normalization
- - title_md: metanovo
- description_md: Produce targeted databases for mass spectrometry analysis.
- - title_md: metaphlan
- description_md: MetaPhlAn for Metagenomic Phylogenetic Analysis
+ description_md: |-
+ wrapper for MaxQuant
+ (Tool usage: 38708)
+
+ - title_md: kraken_taxonomy_report
+ description_md: |-
+ Kraken taxonomy report
+ (Tool usage: 25996)
+
+ - title_md: unipept
+ description_md: |-
+ Unipept retrieves metaproteomics information
+ (Tool usage: 18906)
+
- title_md: metaquantome
- description_md: quantitative analysis of microbiome taxonomy and function
- - title_md: metawrapmg
- description_md: A flexible pipeline for genome-resolved metagenomic data analysis
- - title_md: minia
- description_md: Short-read assembler based on a de Bruijn graph
- - title_md: miniasm
- description_md: Miniasm - Ultrafast de novo assembly for long noisy reads (though
- having no consensus step)
- - title_md: minipolish
- description_md: Polishing miniasm assemblies
- - title_md: miniprot
- description_md: Align a protein sequence against a genome with affine gap penalty,
- splicing and frameshift.
- - title_md: mitos
- description_md: de-novo annotation of metazoan mitochondrial genomes
- - title_md: mlst
- description_md: Scan contig files against PubMLST typing schemes
- - title_md: mob_suite
- description_md: MOB-suite is a set of software tools for clustering, reconstruction
- and typing of plasmids from draft assemblies
- - title_md: mothur
- description_md: Mothur wrappers
- - title_md: mrbayes
- description_md: A program for the Bayesian estimation of phylogeny.
- - title_md: msconvert
- description_md: msconvert Convert and/or filter mass spectrometry files (including
- vendor formats) using the official Docker container
- - title_md: msstatstmt
- description_md: MSstatsTMT protein significance analysis in shotgun mass spectrometry-based
- proteomic experiments with tandem mass tag (TMT) labeling
- - title_md: multigsea
- description_md: GSEA-based pathway enrichment analysis for multi-omics data
- - title_md: multiqc
- description_md: MultiQC aggregates results from bioinformatics analyses across
- many samples into a single report
- - title_md: mykrobe
- description_md: Antibiotic resistance predictions
- - title_md: mykrobe_parser
- description_md: RScript to parse the results of mykrobe predictor.
- - title_md: mz_to_sqlite
- description_md: Creates a SQLite database for proteomics data
- - title_md: nanocompore
- description_md: Nanocompore compares 2 ONT nanopore direct RNA sequencing datasets
- from different experimental conditions expected to have a significant impact
- on RNA modifications. It is recommended to have at least 2 replicates per condition.
- For example one can use a control condition with a significantly reduced number
- of modifications such as a cell line for which a modification writing enzyme
- was knocked-down or knocked-out. Alternatively, on a smaller scale transcripts
- of interests could be synthesized in-vitro.
- - title_md: nanoplot
- description_md: Plotting tool for long read sequencing data and alignments
- - title_md: nanopolishcomp
- description_md: NanopolishComp contains 2 modules. Eventalign_collapse collapses
- the raw file generated by nanopolish eventalign by kmers rather than by event.
- Freq_meth_calculate methylation frequency at genomic CpG sites from the output
- of nanopolish call-methylation.
- - title_md: ncbi_blast_plus
- description_md: NCBI BLAST+
- - title_md: ncbi_fcs_gx
- description_md: FCS-GX detects contamination from foreign organisms in genome
- sequences using the genome cross-species aligner (GX).
- - title_md: newick_utils
- description_md: Perform operations on Newick trees
- - title_md: nextclade
- description_md: Identify differences between your sequences and a reference sequence
- used by Nextstrain
- - title_md: nextdenovo
- description_md: String graph-based de novo assembler for long reads
- - title_md: nonpareil
- description_md: Estimate average coverage in metagenomic datasets
- - title_md: nucleosome_prediction
- description_md: Prediction of Nucleosomes Positions on the Genome
- - title_md: nugen_nudup
- description_md: Marks/removes PCR introduced duplicate molecules based on the
- molecular tagging technology used in NuGEN products.
- - title_md: obisindicators
- description_md: Compute biodiveristy indicators for marine data from obis
- - title_md: obitools
- description_md: OBITools is a set of programs developed to simplify the manipulation
- of sequence files
- - title_md: omark
- description_md: Proteome quality assessment software
- - title_md: orfipy
- description_md: Galaxy wrapper for ORFIPY
- - title_md: orthofinder
- description_md: Accurate inference of orthologous gene groups made easy
- - title_md: peptideshaker
- description_md: PeptideShaker and SearchGUI
- - title_md: pfamscan
- description_md: Search a FASTA sequence against a library of Pfam HMM.
- - title_md: pharokka
- description_md: rapid standardised annotation tool for bacteriophage genomes and
- metagenomes
+ description_md: |-
+ quantitative analysis of microbiome taxonomy and function
+ (Tool usage: 12911)
+
+ - title_md: maaslin2
+ description_md: |-
+ MaAsLin2 is comprehensive R package for efficiently determining multivariable association between microbial meta'omic features and clinical metadata.
+ (Tool usage: 8505)
+
- title_md: phyloseq
- description_md: Handling and analysis of high-throughput microbiome census data
- - title_md: phyml
- description_md: PhyML is a phylogeny software based on the maximum-likelihood
- principle.
- - title_md: picrust
- description_md: PICRUSt wrappers
- - title_md: picrust2
- description_md: 'PICRUSt2: Phylogenetic Investigation of Communities by Reconstruction
- of Unobserved States'
- - title_md: plasflow
- description_md: PlasFlow - Prediction of plasmid sequences in metagenomic contigs.
- - title_md: plasmidfinder
- description_md: '"PlasmidFinder provides the detection of replicons in the WGSand
- assigns the plasmids under study to lineages that trace backthe information
- to the existing knowledge on Inc groups and suggestspossible reference plasmids
- for each lineage"'
- - title_md: plasmidspades
- description_md: Genome assembler for assemblying plasmid
- - title_md: polypolish
- description_md: '"Polypolish is a tool for polishing genome assemblies with short
- reads.Polypolish uses SAM files where each read has been aligned to all possible
- locations (not just a single best location).This allows it to repair errors
- in repeat regions that other alignment-based polishers cannot fix."'
- - title_md: prodigal
- description_md: A protein-coding gene prediction software tool for bacterial and
- archaeal genomes
- - title_md: prokka
- description_md: Rapid annotation of prokaryotic genomes
- - title_md: promer
- description_md: Aligns two sets of contigs and reports amino acid substitutions
- between them
- - title_md: proteinortho
- description_md: Proteinortho is a tool to detect orthologous proteins/genes within
- different species.
- - title_md: pycoqc
- description_md: QC metrics for ONT Basecalling
- - title_md: pygenometracks
- description_md: 'pyGenomeTracks: Standalone program and library to plot beautiful
- genome browser tracks.'
- - title_md: qiime_add_on
- description_md: QIIME to perform microbial community analysis
- - title_md: qiime_core
- description_md: QIIME to perform microbial community analysis
- - title_md: qualimap
- description_md: Qualimap 2 is a platform-independent application written in Java
- andR that facilitates the quality control of alignment sequencing data and itsderivatives
- like feature counts.
- - title_md: quast
- description_md: Quast (Quality ASsessment Tool) evaluates genome assemblies.
- - title_md: quickmerge
- description_md: Merge long-read and hybrid assemblies to increase contiguity
- - title_md: rRNA
- description_md: Identification of ribosomal RNA genes in metagenomic fragments.
- - title_md: racon
- description_md: Consensus module for raw de novo DNA assembly of long uncorrected
- reads.
- - title_md: rasusa
- description_md: Randomly subsample sequencing reads to a specified coverage
- - title_md: raxml
- description_md: RAxML - A Maximum Likelihood based phylogenetic inference
- - title_md: read_it_and_keep
- description_md: Rapid decontamination of SARS-CoV-2 sequencing reads
- - title_md: reago
- description_md: Reago is tool to assembly 16S ribosomal RNA recovery from metagenomic
- data.
- - title_md: recentrifuge
- description_md: '"With Recentrifuge, researchers can analyze results from taxonomic
- classifiers using interactive charts with emphasis on the confidence level of
- the classifications.In addition to contamination-subtracted samples.Recentrifuge
- provides shared and exclusive taxa per sample,thus enabling robust contamination
- removal and comparative analysis in environmental and clinical metagenomics."'
- - title_md: repeatexplorer2
- description_md: Tool for annotation of repeats from unassembled shotgun reads.
- - title_md: roary
- description_md: Roary the pangenome pipeline
- - title_md: rseqc
- description_md: an RNA-seq quality control package
- - title_md: salmon
- description_md: Salmon is a wicked-fast program to produce a highly-accurate,
- transcript-level quantification estimates from RNA-seq and single-cell data.
- - title_md: sarscov2formatter
- description_md: sarscov2formatter custom script
- - title_md: sarscov2summary
- description_md: sarscov2summary custom script
- - title_md: scoary
- description_md: Scoary calculates the assocations between all genes in the accessory
- genome and the traits.
- - title_md: semibin
- description_md: 'SemiBin: Semi-supervised Metagenomic Binning Using Siamese Neural
- Networks'
- - title_md: seqkit
- description_md: A cross-platform and ultrafast toolkit for FASTA/Q file manipulation
- - title_md: seqprep
- description_md: Tool for merging paired-end Illumina reads and trimming adapters.
- - title_md: seqsero2
- description_md: Salmonella serotype prediction from genome sequencing data
- - title_md: shorah
- description_md: Reconstruct haplotypes using ShoRAH in amplicon mode
- - title_md: shovill
- description_md: Faster de novo assembly pipeline based around Spades
- - title_md: sistr_cmd
- description_md: SISTR in silico serotyping tool
- - title_md: smallgenomeutilities
- description_md: Set of utilities for manipulating small viral genome data.
- - title_md: smalt
- description_md: SMALT aligns DNA sequencing reads with a reference genome.
- - title_md: snap
- description_md: SNAP is a general purpose gene finding program suitable for both
- eukaryotic and prokaryotic genomes.
- - title_md: snippy
- description_md: Contains the snippy tool for characterising microbial snps
- - title_md: sonneityping
- description_md: Scripts for parsing Mykrobe predict results for Shigella sonnei.
- - title_md: sortmerna
- description_md: SortMeRNA is a software designed to rapidly filter ribosomal RNA
- fragments from metatransriptomic data produced by next-generation sequencers.
- - title_md: spades
- description_md: 'SPAdes is an assembly toolkit containing various assembly pipelines.
- It implements the following 4 stages: assembly graph construction, k-bimer adjustment,
- construction of paired assembly graph and contig construction.'
- - title_md: spotyping
- description_md: SpoTyping allows fast and accurate in silico Mycobacterium spoligotyping
- from sequence reads
- - title_md: sr_bowtie
- description_md: bowtie wrapper tool to align small RNA sequencing reads
- - title_md: srst2
- description_md: Short Read Sequence Typing for Bacterial Pathogens
- - title_md: srst2
- description_md: SRST2 Short Read Sequence Typing for Bacterial Pathogens
- - title_md: staramr
- description_md: Scan genome contigs against the ResFinder, PlasmidFinder, and
- PointFinder antimicrobial resistance databases.
- - title_md: stringmlst
- description_md: Rapid and accurate identification of the sequence type (ST)
- - title_md: structure
- description_md: for using multi-locus genotype data to investigate population
- structure.
- - title_md: suite_qiime2__alignment
- description_md: .nan
- - title_md: suite_qiime2__composition
- description_md: .nan
- - title_md: suite_qiime2__cutadapt
- description_md: .nan
- - title_md: suite_qiime2__dada2
- description_md: .nan
- - title_md: suite_qiime2__deblur
- description_md: .nan
- - title_md: suite_qiime2__demux
- description_md: .nan
- - title_md: suite_qiime2__diversity
- description_md: .nan
- - title_md: suite_qiime2__diversity_lib
- description_md: .nan
- - title_md: suite_qiime2__emperor
- description_md: .nan
- - title_md: suite_qiime2__feature_classifier
- description_md: .nan
- - title_md: suite_qiime2__feature_table
- description_md: .nan
- - title_md: suite_qiime2__fragment_insertion
- description_md: .nan
- - title_md: suite_qiime2__longitudinal
- description_md: .nan
- - title_md: suite_qiime2__metadata
- description_md: .nan
- - title_md: suite_qiime2__phylogeny
- description_md: .nan
- - title_md: suite_qiime2__quality_control
- description_md: .nan
- - title_md: suite_qiime2__quality_filter
- description_md: .nan
- - title_md: suite_qiime2__rescript
- description_md: .nan
- - title_md: suite_qiime2__sample_classifier
- description_md: .nan
- - title_md: suite_qiime2__taxa
- description_md: .nan
- - title_md: suite_qiime2__vsearch
- description_md: .nan
- - title_md: suite_qiime2_core
- description_md: .nan
- - title_md: suite_qiime2_core__tools
- description_md: .nan
- - title_md: t2ps
- description_md: Draw phylogeny
- - title_md: t2t_report
- description_md: Summarize taxonomy
- - title_md: t_coffee
- description_md: T-Coffee
- - title_md: taxonomy_krona_chart
- description_md: Krona pie chart from taxonomic profile
- - title_md: tb-profiler
- description_md: Processes M. tuberculosis sequence data to infer strain type and
- identify known drug resistance markers.
- - title_md: tooldistillator
- description_md: ToolDistillator extract and aggregate information from different
- tool outputs to JSON parsable files
- - title_md: transit
- description_md: TRANSIT
- - title_md: transtermhp
- description_md: Finds rho-independent transcription terminators in bacterial genomes
- - title_md: trim_galore
- description_md: Trim Galore adaptive quality and adapter trimmer
- - title_md: trycycler
- description_md: Trycycler toolkit wrappers
- - title_md: unicycler
- description_md: Unicycler is a hybrid assembly pipeline for bacterial genomes.
- - title_md: unipept
- description_md: Unipept retrieves metaproteomics information
- - title_md: uniprotxml_downloader
- description_md: Download UniProt proteome in XML or fasta format
- - title_md: usher
- description_md: UShER toolkit wrappers
- - title_md: valet
- description_md: A pipeline for detecting mis-assemblies in metagenomic assemblies.
- - title_md: vapor
- description_md: Classify Influenza samples from raw short read sequence data
- - title_md: varvamp
- description_md: Variable VirusAMPlicons (varVAMP) is a tool to design primers
- for highly diverse viruses
- - title_md: vegan
- description_md: an R package fo community ecologist
- - title_md: velvet
- description_md: de novo genomic assembler specially designed for short read sequencing
- technologies
- - title_md: velvet_optimiser
- description_md: Automatically optimize Velvet assemblies
- - title_md: virAnnot
- description_md: virAnnot wrappers
- - title_md: vsearch
- description_md: VSEARCH including searching, clustering, chimera detection, dereplication,
- sorting, masking and shuffling of sequences.
- - title_md: wtdbg
- description_md: WTDBG is a fuzzy Bruijn graph (FBG) approach to long noisy reads
- assembly.
+ description_md: |-
+ Handling and analysis of high-throughput microbiome census data
+ (Tool usage: 3615)
+
diff --git a/sources/bin/populate_labs.py b/sources/bin/populate_labs.py
index 6e28ce12..9b2f943b 100644
--- a/sources/bin/populate_labs.py
+++ b/sources/bin/populate_labs.py
@@ -1,11 +1,19 @@
+# python ./sources/bin/populate_labs.py --tool_tsv communities/microgalaxy/resources/curated_tools.tsv --tool_yml communities/microgalaxy/lab/sections/4_tools.yml
+
import argparse
import os
from typing import List
-import oyaml as yaml
import pandas as pd
+from ruamel.yaml import YAML as yaml
+from ruamel.yaml.scalarstring import LiteralScalarString
+
+number_of_categories = 10
+number_of_tools = 10
-# import yaml
+
+def add_tools_url(tools) -> None:
+ return tools
def main() -> None:
@@ -28,38 +36,147 @@ def main() -> None:
try:
# Read the TSV file with pandas (use tab delimiter)
- data = pd.read_csv(args.tool_tsv, sep="\t")
+ tools = pd.read_csv(args.tool_tsv, sep="\t")
# Construct the YAML data structure
yaml_data = {
"id": "tools",
- "title": "Community Tools",
- "tabs": [
- {"id": "tool_list", "title": "List of community curated tools available for microGalaxy", "content": []}
- ],
+ "title": "Community curated tools",
+ "tabs": [],
+ }
+
+ #################################
+ # Add manual entry for tool request and complete list
+ #################################
+
+ entries = []
+
+ # Tool request
+ entry = {
+ "title_md": "Request a new tool",
+ "description_md": "You can request a new tool by contacting the microGalaxy community: microgalaxy@lists.galaxyproject.org",
}
+ entries.append(entry)
- # Populate the content section with each row from the TSV
- for _, row in data.iterrows():
- # Use the first column (assumed to be the tool title) as the title_md
- title_md = row[data.columns[0]] # Get the first column's value as title_md
-
- # Start the unordered list and construct each - for every other column in the row
- description = row["Description"]
- # for column in data.columns[1:]: # Skip the first column (since it's title_md)
- # description += f"
- {column}: {row[column]}
\n"
- # description += "
"
-
- # Create the tool entry with the formatted HTML list
- tool_entry = {
- "title_md": title_md,
- "description_md": description, # Directly insert the HTML string without escape sequences
+ # Complete tool list
+ entry = {
+ "title_md": "See the complete tool list",
+ "description_md": "Our comprehensive curated tool list is available via https://galaxyproject.github.io/galaxy_codex/microgalaxy",
+ }
+ entries.append(entry)
+
+ # Add manual entries to the top of the tabs
+ yaml_data["tabs"].append(
+ {
+ "id": "more_tools",
+ "title": "More tools !",
+ "heading_md": f"Request a new tools or look at the complete list",
+ "content": entries,
}
- yaml_data["tabs"][0]["content"].append(tool_entry)
+ )
+
+ #######################################
+ # Get highest ranking EDAM operations
+ #######################################
+
+ count_column = "Suite runs on main servers"
+
+ # Step 1: Split the categories into separate rows and strip whitespace
+ df = tools.assign(Category=tools["EDAM operations"].str.split(",")).explode("Category")
+ df["Category"] = df["Category"].str.strip() # Strip whitespace
+
+ # Step 2: Group by category to calculate total count and item count
+ grouped = (
+ df.groupby("Category")
+ .agg(
+ total_count=(count_column, "sum"),
+ item_count=("Suite ID", "size"), # Count distinct items if necessary, use 'nunique'
+ )
+ .reset_index()
+ )
+
+ # Step 3: Filter categories with at least 5 items
+ filtered = grouped[grouped["item_count"] >= 5]
+
+ ###########################
+ # Get the corresponding tools
+ ###########################
+
+ # Step 4: Sort by total count in descending order
+ top_categories = filtered.sort_values(by="total_count", ascending=False).head(number_of_categories)["Category"]
+
+ # Step 5: Assign each tool to the first category it appears in
+ # Sort by 'Galaxy wrapper id' to ensure we assign based on first appearance
+ df_unique = df[df["Category"].isin(top_categories)] # Filter rows for top 5 categories
+ df_unique = df_unique.sort_values(by=["Suite ID", "Category"]) # Sort by tool ID to keep first category only
+
+ # Step 6: Remove duplicates, keeping the first appearance of each tool
+ df_unique = df_unique.drop_duplicates(subset=["Suite ID"], keep="first")
+
+ # Step 7: Extract top 5 items per category based on total count
+ top_items_per_category = (
+ df_unique.groupby("Category", group_keys=False) # Group by category
+ .apply(lambda group: group.nlargest(number_of_tools, count_column)) # Get top items per category
+ .reset_index(drop=True) # Reset index for clean output
+ )
+
+ #############################
+ # Populate the table
+ #############################
+
+ for group_id, group in top_items_per_category.groupby("Category"):
+
+ tool_entries = []
+ for index, row in group.iterrows():
+
+ # Prepare the description with an HTML unordered list and links for each Galaxy tool ID
+ description = f"{row['Description']}\n (Tool usage: {row[count_column]})"
+ tool_ids = row["Tool IDs"]
+ owner = row["Suite owner"]
+ wrapper_id = row["Suite ID"]
+
+ # Split the tool IDs by comma if it's a valid string, otherwise handle as an empty list
+ tool_ids_list = tool_ids.split(",") if isinstance(tool_ids, str) else []
+
+ # Create the base URL template for each tool link
+ url_template = (
+ "/tool_runner?tool_id=toolshed.g2.bx.psu.edu%2Frepos%2F{owner}%2F{wrapper_id}%2F{tool_id}"
+ )
+
+ # Build HTML list items with links
+ description += "\n\n"
+ for tool_id in tool_ids_list:
+ tool_id = tool_id.strip() # Trim whitespace
+ # Format the URL with owner, wrapper ID, and tool ID
+ url = url_template.format(owner=owner, wrapper_id=wrapper_id, tool_id=tool_id)
+ url = "{{ galaxy_base_url }}" + url
+ description += f' - {tool_id}
\n'
+ description += "
"
+
+ # Use LiteralScalarString to enforce literal block style for the description
+ description_md = LiteralScalarString(description.strip())
+
+ # Create the tool entry
+ tool_entry = {
+ "title_md": wrapper_id,
+ "description_md": description_md,
+ }
+
+ tool_entries.append(tool_entry)
+
+ # Create table entry for each EDAM
+ yaml_data["tabs"].append(
+ {
+ "id": group_id.replace(" ", "_").lower(),
+ "title": group_id,
+ "heading_md": f"Top 10 for the EDAM operation: {group_id}",
+ "content": tool_entries,
+ }
+ )
# Write the YAML data to the output file
with open(args.tool_yml, "w") as yaml_file:
- yaml.dump(yaml_data, yaml_file, default_flow_style=False, allow_unicode=True, indent=2)
+ yaml().dump(yaml_data, yaml_file)
print(f"Data successfully written to '{args.tool_yml}'")
except Exception as e: