From 1fb707f4692d29f600bb152b655dee40b1f2de34 Mon Sep 17 00:00:00 2001 From: dorien-er Date: Tue, 6 Feb 2024 16:51:31 +0100 Subject: [PATCH 1/7] add script, test, config for downalod busco datasets --- src/busco/{ => busco}/config.vsh.yaml | 4 +- src/busco/{ => busco}/help.txt | 0 src/busco/{ => busco}/script.sh | 0 src/busco/{ => busco}/test.sh | 0 src/busco/{ => busco}/test_data/genome.fna | 0 src/busco/{ => busco}/test_data/protein.fasta | 0 src/busco/{ => busco}/test_data/script.sh | 0 .../busco_download_datasets/config.vsh.yaml | 44 +++++++++++++++++++ src/busco/busco_download_datasets/script.sh | 14 ++++++ src/busco/busco_download_datasets/test.sh | 15 +++++++ 10 files changed, 76 insertions(+), 1 deletion(-) rename src/busco/{ => busco}/config.vsh.yaml (95%) rename src/busco/{ => busco}/help.txt (100%) rename src/busco/{ => busco}/script.sh (100%) rename src/busco/{ => busco}/test.sh (100%) rename src/busco/{ => busco}/test_data/genome.fna (100%) rename src/busco/{ => busco}/test_data/protein.fasta (100%) rename src/busco/{ => busco}/test_data/script.sh (100%) create mode 100644 src/busco/busco_download_datasets/config.vsh.yaml create mode 100644 src/busco/busco_download_datasets/script.sh create mode 100644 src/busco/busco_download_datasets/test.sh diff --git a/src/busco/config.vsh.yaml b/src/busco/busco/config.vsh.yaml similarity index 95% rename from src/busco/config.vsh.yaml rename to src/busco/busco/config.vsh.yaml index fba14892..e074982b 100644 --- a/src/busco/config.vsh.yaml +++ b/src/busco/busco/config.vsh.yaml @@ -37,7 +37,9 @@ functionality: Specify a BUSCO lineage dataset that is most closely related to the assembly or gene set being assessed. The full list of available datasets can be viewed [here](https://busco-data.ezlab.org/v5/data/lineages/) or by running `busco --list-datasets` (which requires installing the tool). When unsure, the "--auto_lineage" flag can be set to automatically find the optimal lineage path. - Requested datasets will automatically be downloaded if not already present in the download folder. + Requested datasets will automatically be downloaded if not already present in the download folder. BUSCO will automatically download the requested dataset if it is not already present in the download folder. + You can optionally provide a path to a local dataset instead of a name, e.g. path/to/dataset. + Datasets can be downloaded using the busco/busco_download_dataset component. example: stramenopiles_odb10 - name: Outputs diff --git a/src/busco/help.txt b/src/busco/busco/help.txt similarity index 100% rename from src/busco/help.txt rename to src/busco/busco/help.txt diff --git a/src/busco/script.sh b/src/busco/busco/script.sh similarity index 100% rename from src/busco/script.sh rename to src/busco/busco/script.sh diff --git a/src/busco/test.sh b/src/busco/busco/test.sh similarity index 100% rename from src/busco/test.sh rename to src/busco/busco/test.sh diff --git a/src/busco/test_data/genome.fna b/src/busco/busco/test_data/genome.fna similarity index 100% rename from src/busco/test_data/genome.fna rename to src/busco/busco/test_data/genome.fna diff --git a/src/busco/test_data/protein.fasta b/src/busco/busco/test_data/protein.fasta similarity index 100% rename from src/busco/test_data/protein.fasta rename to src/busco/busco/test_data/protein.fasta diff --git a/src/busco/test_data/script.sh b/src/busco/busco/test_data/script.sh similarity index 100% rename from src/busco/test_data/script.sh rename to src/busco/busco/test_data/script.sh diff --git a/src/busco/busco_download_datasets/config.vsh.yaml b/src/busco/busco_download_datasets/config.vsh.yaml new file mode 100644 index 00000000..19e027e5 --- /dev/null +++ b/src/busco/busco_download_datasets/config.vsh.yaml @@ -0,0 +1,44 @@ +functionality: + name: busco + description: Downloads available busco datasets + info: + keywords: [lineage datasets] + homepage: https://busco.ezlab.org/ + documentation: https://busco.ezlab.org/busco_userguide.html + repository: https://gitlab.com/ezlab/busco + reference: "10.1007/978-1-4939-9173-0_14" + licence: MIT + argument_groups: + - name: Inputs + arguments: + - name: --download + type: string + description: | + Download dataset. Possible values are a specific dataset name, "all", "prokaryota", "eukaryota", or "virus". + The full list of available datasets can be viewed [here](https://busco-data.ezlab.org/v5/data/lineages/) or by running the busco/busco_list_datasets component. + required: false + example: stramenopiles_odb10 + - name: Outputs + arguments: + - name: --download_path + direction: output + type: file + description: | + Local filepath for storing BUSCO dataset downloads + required: true + default: busco_downloads + example: busco_downloads + resources: + - type: bash_script + path: script.sh + test_resources: + - type: bash_script + path: test.sh +platforms: + - type: docker + image: quay.io/biocontainers/busco:5.6.1--pyhdfd78af_0 + setup: + - type: docker + run: | + busco --version | sed 's/BUSCO\s\(.*\)/busco: "\1"/' > /var/software_versions.txt + - type: nextflow diff --git a/src/busco/busco_download_datasets/script.sh b/src/busco/busco_download_datasets/script.sh new file mode 100644 index 00000000..6010c01f --- /dev/null +++ b/src/busco/busco_download_datasets/script.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +## VIASH START +## VIASH END + + +if [ ! -d "$par_download_path" ]; then + mkdir -p "$par_download_path" +fi + +busco \ + --download_path "$par_download_path" \ + --download "$par_download" + diff --git a/src/busco/busco_download_datasets/test.sh b/src/busco/busco_download_datasets/test.sh new file mode 100644 index 00000000..c6baecea --- /dev/null +++ b/src/busco/busco_download_datasets/test.sh @@ -0,0 +1,15 @@ +echo "> Downloading busco stramenopiles_odb10 dataset" + +"$meta_executable" \ + --download stramenopiles_odb10 \ + --download_path downloads + +echo ">> Checking output" +[ ! -f "downloads/file_versions.tsv" ] && echo "file_versions.tsv does not exist" && exit 1 +[ ! -f "downloads/lineages/stramenopiles_odb10/dataset.cfg" ] && echo "dataset.cfg does not exist" && exit 1 + +echo ">> Checking if output is empty" +[ ! -s "downloads/file_versions.tsv" ] && echo "file_versions.tsv is empty" && exit 1 +[ ! -s "downloads/lineages/stramenopiles_odb10/dataset.cfg" ] && echo "dataset.cfg is empty" && exit 1 + +rm -r downloads \ No newline at end of file From d56c3e585b16a47cd28ffb6e43b6eac48402c8f4 Mon Sep 17 00:00:00 2001 From: dorien-er Date: Tue, 6 Feb 2024 17:01:10 +0100 Subject: [PATCH 2/7] add changelog entry --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 276195c2..de9595bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,8 @@ * `busco`: Assess genome assembly and annotation completeness with single copy orthologs (PR #6). +* `busco_list_datasets`: Download busco datasets (PR #19) + * `featurecounts`: Assign sequence reads to genomic features (PR #11). * `bgzip`: Add bgzip functionality to compress and decompress files (PR #13). From 1101e5341231682c203ea7344d4cfc28b819a01b Mon Sep 17 00:00:00 2001 From: dorien-er Date: Tue, 6 Feb 2024 17:14:43 +0100 Subject: [PATCH 3/7] fix typos --- src/busco/busco/config.vsh.yaml | 2 +- src/busco/busco_download_datasets/config.vsh.yaml | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/busco/busco/config.vsh.yaml b/src/busco/busco/config.vsh.yaml index e074982b..d1eef04d 100644 --- a/src/busco/busco/config.vsh.yaml +++ b/src/busco/busco/config.vsh.yaml @@ -37,7 +37,7 @@ functionality: Specify a BUSCO lineage dataset that is most closely related to the assembly or gene set being assessed. The full list of available datasets can be viewed [here](https://busco-data.ezlab.org/v5/data/lineages/) or by running `busco --list-datasets` (which requires installing the tool). When unsure, the "--auto_lineage" flag can be set to automatically find the optimal lineage path. - Requested datasets will automatically be downloaded if not already present in the download folder. BUSCO will automatically download the requested dataset if it is not already present in the download folder. + BUSCO will automatically download the requested dataset if it is not already present in the download folder. You can optionally provide a path to a local dataset instead of a name, e.g. path/to/dataset. Datasets can be downloaded using the busco/busco_download_dataset component. example: stramenopiles_odb10 diff --git a/src/busco/busco_download_datasets/config.vsh.yaml b/src/busco/busco_download_datasets/config.vsh.yaml index 19e027e5..582f3eb5 100644 --- a/src/busco/busco_download_datasets/config.vsh.yaml +++ b/src/busco/busco_download_datasets/config.vsh.yaml @@ -16,7 +16,7 @@ functionality: description: | Download dataset. Possible values are a specific dataset name, "all", "prokaryota", "eukaryota", or "virus". The full list of available datasets can be viewed [here](https://busco-data.ezlab.org/v5/data/lineages/) or by running the busco/busco_list_datasets component. - required: false + required: true example: stramenopiles_odb10 - name: Outputs arguments: @@ -25,7 +25,7 @@ functionality: type: file description: | Local filepath for storing BUSCO dataset downloads - required: true + required: false default: busco_downloads example: busco_downloads resources: From 36ec371afee0c62fec530fd3719bba103fb4dfd3 Mon Sep 17 00:00:00 2001 From: dorien-er Date: Fri, 9 Feb 2024 18:00:39 +0100 Subject: [PATCH 4/7] update changelog --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index de9595bc..42a8790a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,9 +8,9 @@ * `fastp`: An ultra-fast all-in-one FASTQ preprocessor (PR #3). -* `busco`: Assess genome assembly and annotation completeness with single copy orthologs (PR #6). - -* `busco_list_datasets`: Download busco datasets (PR #19) +* `busco`: + - `busco/busco`: Assess genome assembly and annotation completeness with single copy orthologs (PR #6). + - `busco/busco_download_datasets`: Download busco datasets (PR #19) * `featurecounts`: Assign sequence reads to genomic features (PR #11). From e4113673cefc283f27668471f3b4cbbf0e63138f Mon Sep 17 00:00:00 2001 From: dorien-er Date: Sat, 10 Feb 2024 14:09:21 +0100 Subject: [PATCH 5/7] rename busco to busco run --- src/busco/{busco => busco_run}/config.vsh.yaml | 2 +- src/busco/{busco => busco_run}/help.txt | 0 src/busco/{busco => busco_run}/script.sh | 0 src/busco/{busco => busco_run}/test.sh | 0 src/busco/{busco => busco_run}/test_data/genome.fna | 0 src/busco/{busco => busco_run}/test_data/protein.fasta | 0 src/busco/{busco => busco_run}/test_data/script.sh | 0 7 files changed, 1 insertion(+), 1 deletion(-) rename src/busco/{busco => busco_run}/config.vsh.yaml (99%) rename src/busco/{busco => busco_run}/help.txt (100%) rename src/busco/{busco => busco_run}/script.sh (100%) rename src/busco/{busco => busco_run}/test.sh (100%) rename src/busco/{busco => busco_run}/test_data/genome.fna (100%) rename src/busco/{busco => busco_run}/test_data/protein.fasta (100%) rename src/busco/{busco => busco_run}/test_data/script.sh (100%) diff --git a/src/busco/busco/config.vsh.yaml b/src/busco/busco_run/config.vsh.yaml similarity index 99% rename from src/busco/busco/config.vsh.yaml rename to src/busco/busco_run/config.vsh.yaml index d1eef04d..cb667eb4 100644 --- a/src/busco/busco/config.vsh.yaml +++ b/src/busco/busco_run/config.vsh.yaml @@ -1,5 +1,5 @@ functionality: - name: busco + name: busco_run description: Assessment of genome assembly and annotation completeness with single copy orthologs info: keywords: [Genome assembly, quality control] diff --git a/src/busco/busco/help.txt b/src/busco/busco_run/help.txt similarity index 100% rename from src/busco/busco/help.txt rename to src/busco/busco_run/help.txt diff --git a/src/busco/busco/script.sh b/src/busco/busco_run/script.sh similarity index 100% rename from src/busco/busco/script.sh rename to src/busco/busco_run/script.sh diff --git a/src/busco/busco/test.sh b/src/busco/busco_run/test.sh similarity index 100% rename from src/busco/busco/test.sh rename to src/busco/busco_run/test.sh diff --git a/src/busco/busco/test_data/genome.fna b/src/busco/busco_run/test_data/genome.fna similarity index 100% rename from src/busco/busco/test_data/genome.fna rename to src/busco/busco_run/test_data/genome.fna diff --git a/src/busco/busco/test_data/protein.fasta b/src/busco/busco_run/test_data/protein.fasta similarity index 100% rename from src/busco/busco/test_data/protein.fasta rename to src/busco/busco_run/test_data/protein.fasta diff --git a/src/busco/busco/test_data/script.sh b/src/busco/busco_run/test_data/script.sh similarity index 100% rename from src/busco/busco/test_data/script.sh rename to src/busco/busco_run/test_data/script.sh From 6c77ac99b5b18c093469006141c9f95c0e23ad7f Mon Sep 17 00:00:00 2001 From: dorien-er Date: Sat, 10 Feb 2024 14:09:37 +0100 Subject: [PATCH 6/7] update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 42a8790a..0a2af08d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ * `fastp`: An ultra-fast all-in-one FASTQ preprocessor (PR #3). * `busco`: - - `busco/busco`: Assess genome assembly and annotation completeness with single copy orthologs (PR #6). + - `busco/busco_run`: Assess genome assembly and annotation completeness with single copy orthologs (PR #6). - `busco/busco_download_datasets`: Download busco datasets (PR #19) * `featurecounts`: Assign sequence reads to genomic features (PR #11). From 7e97f55ba6feb6ef927b940bc86f5b4bd4ed598d Mon Sep 17 00:00:00 2001 From: Robrecht Cannoodt Date: Sun, 11 Feb 2024 19:58:21 +0100 Subject: [PATCH 7/7] fix names and namespaces --- src/busco/busco_download_datasets/config.vsh.yaml | 3 ++- src/busco/busco_list_datasets/config.vsh.yaml | 3 ++- src/busco/busco_run/config.vsh.yaml | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/busco/busco_download_datasets/config.vsh.yaml b/src/busco/busco_download_datasets/config.vsh.yaml index 582f3eb5..dc356f8a 100644 --- a/src/busco/busco_download_datasets/config.vsh.yaml +++ b/src/busco/busco_download_datasets/config.vsh.yaml @@ -1,5 +1,6 @@ functionality: - name: busco + name: busco_download_datasets + namespace: busco description: Downloads available busco datasets info: keywords: [lineage datasets] diff --git a/src/busco/busco_list_datasets/config.vsh.yaml b/src/busco/busco_list_datasets/config.vsh.yaml index 444e2a6d..df612fdc 100644 --- a/src/busco/busco_list_datasets/config.vsh.yaml +++ b/src/busco/busco_list_datasets/config.vsh.yaml @@ -1,5 +1,6 @@ functionality: - name: busco + name: busco_list_datasets + namespace: busco description: Lists the available busco datasets info: keywords: [lineage datasets] diff --git a/src/busco/busco_run/config.vsh.yaml b/src/busco/busco_run/config.vsh.yaml index 11e08abb..0fdfea2e 100644 --- a/src/busco/busco_run/config.vsh.yaml +++ b/src/busco/busco_run/config.vsh.yaml @@ -1,5 +1,6 @@ functionality: name: busco_run + namespace: busco description: Assessment of genome assembly and annotation completeness with single copy orthologs info: keywords: [Genome assembly, quality control]