Skip to content

Commit

Permalink
customizing hybrid_strategy check-up (#37)
Browse files Browse the repository at this point in the history
* customizing hybrid_strategy check-up

* Create .gitpod.yml

* allocate more resources for quast and fix indentation

* Update haslr_hybrid.nf

haslr sometimes randomly quits ... make it retry

* pilon module make sure to have fresh outdir

* adding changes for new version

* fixing haslr retry definition

* Update nextflow.config

* Update base.config

* Update base.config

* file copy also for cloud envs

* copy statement does not work in cloud envs

* fixed error

* add option to not pilon polish raw assemblies

* fix new params usage

* update manual with new parameter

* Update base.config
  • Loading branch information
fmalmeida authored Sep 3, 2022
1 parent c1d2ab6 commit 6554501
Show file tree
Hide file tree
Showing 13 changed files with 93 additions and 28 deletions.
14 changes: 14 additions & 0 deletions .gitpod.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
image: nfcore/gitpod:latest

vscode:
extensions: # based on nf-core.nf-core-extensionpack
- codezombiech.gitignore # Language support for .gitignore files
# - cssho.vscode-svgviewer # SVG viewer
- esbenp.prettier-vscode # Markdown/CommonMark linting and style checking for Visual Studio Code
- eamodio.gitlens # Quickly glimpse into whom, why, and when a line or code block was changed
- EditorConfig.EditorConfig # override user/workspace settings with settings found in .editorconfig files
- Gruntfuggly.todo-tree # Display TODO and FIXME in a tree view in the activity bar
- mechatroner.rainbow-csv # Highlight columns in csv files in different colors
# - nextflow.nextflow # Nextflow syntax highlighting
- oderwat.indent-rainbow # Highlight indentation level
- streetsidesoftware.code-spell-checker # Spelling checker for source code
2 changes: 1 addition & 1 deletion .zenodo.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"description": "<p>MpGAP is built using Nextflow, a workflow tool to run tasks across multiple compute infrastructures in a very portable manner. It uses Docker/Singularity containers making installation trivial and results highly reproducible. It is an easy to use pipeline that adopts well known software for _de novo_ genome assembly of Illumina, Pacbio and Oxford Nanopore sequencing data through illumina only, long reads only or hybrid modes.</p>",
"license": "other-open",
"title": "fmalmeida/MpGAP: A generic multi-platform genome assembly pipeline",
"version": "v3.1.3",
"version": "v3.1.4",
"upload_type": "software",
"creators": [
{
Expand Down
30 changes: 23 additions & 7 deletions conf/base.config
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,18 @@ process {

// labels
withLabel:process_ultralow {
cpus = { check_max( 1 * task.attempt, 'cpus' ) }
cpus = { check_max( 1 * task.attempt, 'cpus' ) }
memory = { check_max( 2.GB * task.attempt, 'memory' ) }
time = { check_max( 1.h * task.attempt, 'time' ) }
time = { check_max( 1.h * task.attempt, 'time' ) }
}
withLabel:process_low {
cpus = { check_max( 2 * task.attempt, 'cpus' ) }
cpus = { check_max( 2 * task.attempt, 'cpus' ) }
memory = { check_max( 4.GB * task.attempt, 'memory' ) }
time = { check_max( 1.h * task.attempt, 'time' ) }
time = { check_max( 1.h * task.attempt, 'time' ) }

errorStrategy = { task.exitStatus in [21,143,137,104,134,139,247] ? 'retry' : 'finish' }
maxRetries = 1
maxErrors = '-1'
}
withLabel:error_ignore {
errorStrategy = 'ignore'
Expand All @@ -31,9 +35,21 @@ process {
// Assemblies will first try to adjust themselves to a parallel execution
// If it is not possible, then it waits to use all the resources allowed
withLabel:process_assembly {
cpus = { if (task.attempt == 1) { check_max( 6 * task.attempt, 'cpus' ) } else { params.max_cpus } }
memory = { if (task.attempt == 1) { check_max( 14.GB * task.attempt, 'memory' ) } else { params.max_memory } }
time = { if (task.attempt == 1) { check_max( 16.h * task.attempt, 'time' ) } else { params.max_time } }
cpus = { if (task.attempt == 1) { check_max( 6 * task.attempt, 'cpus' ) } else { params.max_cpus } }
memory = { if (task.attempt == 1) { check_max( 20.GB * task.attempt, 'memory' ) } else { params.max_memory } }
time = { if (task.attempt == 1) { check_max( 24.h * task.attempt, 'time' ) } else { params.max_time } }

// retry at least once to try it with full resources
errorStrategy = { task.exitStatus in [1,21,143,137,104,134,139,247] ? 'retry' : 'finish' }
maxRetries = 1
maxErrors = '-1'
}

// Quast sometimes can take too long
withName:quast {
cpus = { if (task.attempt == 1) { check_max( 4 * task.attempt, 'cpus' ) } else { params.max_cpus } }
memory = { if (task.attempt == 1) { check_max( 10.GB * task.attempt, 'memory' ) } else { params.max_memory } }
time = { if (task.attempt == 1) { check_max( 12.h * task.attempt, 'time' ) } else { params.max_time } }

// retry at least once to try it with full resources
errorStrategy = { task.exitStatus in [21,143,137,104,134,139,247] ? 'retry' : 'finish' }
Expand Down
15 changes: 11 additions & 4 deletions conf/defaults.config
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ params {


// Output folder name
output = "output"
output = "output"
tracedir = "${params.output}/pipeline_info"


/*
Expand Down Expand Up @@ -83,15 +84,21 @@ params {
/*
* Advanced parameters
*
* Controlling the execution of assemblers
* Controlling the execution of assemblers and other tools.
* It must be set as true to skip the software and false to use it.
* Also adding the possibility to pass additional parameters to them
* Additional parameters must be in quotes and separated by spaces.
*/


quast_additional_parameters = null // Give additional parameters to Quast while assessing assembly metrics.
// Must be given as shown in Quast manual. E.g. " --large --eukaryote ".
quast_additional_parameters = null // Give additional parameters to Quast while assessing assembly metrics.
// Must be given as shown in Quast manual. E.g. " --large --eukaryote ".

skip_raw_assemblies_polishing = false // This will make the pipeline not polish raw assemblies on hybrid strategy 2.
// For example, if a sample is assembled with flye and polished with medaka,
// by default, both assemblies will be passed to pilon so you can compare them.
// If you don't need this comparison and don't want to polish the raw assembly,
// use this parameter.

skip_spades = false // Hybrid and shortreads only assemblies
spades_additional_parameters = null // Must be given as shown in Spades manual. E.g. " --meta --plasmids "
Expand Down
6 changes: 6 additions & 0 deletions docs/manual.rst
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,12 @@ Advanced assembler customization options
* - ``--quast_additional_parameters``
- NA
- | Give additional parameters to Quast while assessing assembly metrics. Must be given as shown in Quast manual. E.g. ``" --large --eukaryote "``.

* - ``--skip_raw_assemblies_polishing``
- false
- | This will make the pipeline not polish raw assemblies on hybrid strategy 2.
| For example, if a sample is assembled with flye and polished with medaka, by default, both assemblies will be passed to pilon so you can compare them.
| If you don't need this comparison and don't want to polish the raw assembly, use this parameter.
* - ``--skip_canu``
- false
Expand Down
6 changes: 5 additions & 1 deletion lib/WorkflowMpGAP.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,17 @@ class WorkflowMpGAP {
// Check and validate parameters
//
public static void initialise(params, log) {
params.hybrid_strategy = params.hybrid_strategy.toString()
if (!params.get_config && !params.get_samplesheet && !params.help) {
if (!params.input) {
log.error "ERROR!\nA major error has occurred!\n\t==> A samplesheet has not been provided. Please, provide a samplesheet to run the analysis.\n\t Online documentation is available at: https://mpgap.readthedocs.io/en/latest/\nPlease, read the docs.\nCheers."
System.exit(1)
}
}

if (params.hybrid_strategy.toString() != "1" && params.hybrid_strategy.toString() != "2" && params.hybrid_strategy.toString() != "both") {
log.error "ERROR!\nA major error has occurred!\n\t==> Parameter --hybrid_strategy must be either 1, 2 or both.\n\t Online documentation is available at: https://mpgap.readthedocs.io/en/latest/\nPlease, read the docs.\nCheers."
System.exit(1)
}
}

}
1 change: 1 addition & 0 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import org.yaml.snakeyaml.Yaml
VALIDATE & PRINT PARAMETER SUMMARY
========================================================================================
*/
WorkflowMpGAP.initialise(params, log)
WorkflowMain.initialise(workflow, params, log)

/*
Expand Down
12 changes: 12 additions & 0 deletions markdown/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,18 @@

The tracking for changes started in v2.

## v3.1.4 -- [2022-Mar-03]

This version addresses the changes discussed in [#36](https://github.com/fmalmeida/MpGAP/issues/36), [#37](https://github.com/fmalmeida/MpGAP/issues/37) and [#38](https://github.com/fmalmeida/MpGAP/issues/38).
Its main changes are:

* Solving the problem of loading a parameter that accepts either integer or string by removing check-up from JSON schema and creating a customized check-up.
* Added 'error_retry' label to `haslr` as sometimes it radomnly fails.
* added a .gitpod.yml
* Customized labels to ask for a little bit more on first run
* Added a module config for quast to ask for more memory and cpus on first run, removing it from 'process_low' label
* Added a simple command in pilon module to ensure it starts with a "fresh" output dir to place results

## v3.1.3 -- [2022-Mar-03]

Although Megahit was already present inside the docker image and the core of the pipeline as it was used by [Shovill](https://github.com/tseemann/shovill), Shovill is an assembler focused in bacterias, and, in their manual, they instruct users to run [Megahit](https://github.com/voutcn/megahit) directly when working with non-bacterial samples.
Expand Down
6 changes: 3 additions & 3 deletions modules/Hybrid/unicycler_polish.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion modules/QualityAssessment/quast.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 2 additions & 3 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ params.hybrid_strategy = params.hybrid_strategy.toString()
params {

// Boilerplate options
tracedir = "${params.output}/pipeline_info"
help = false
get_config = false
get_samplesheet = false
Expand Down Expand Up @@ -95,6 +94,6 @@ manifest {
description = "Nextflow pipeline for de novo genome assembly"
homePage = "https://github.com/fmalmeida/mpgap"
mainScript = "main.nf"
nextflowVersion = ">=20.10.0"
version = "3.1.3"
nextflowVersion = "!>=21.10.3"
version = "3.1.4"
}
15 changes: 8 additions & 7 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -65,14 +65,8 @@
"description": "Select assembly strategy",
"properties": {
"hybrid_strategy": {
"type": "string",
"description": "Which hybrid strategy to run? Options: 1, 2, both.",
"help_text": "Selects which hybrid assembly strategy to run. Please read the manual for more information: https://mpgap.readthedocs.io/en/latest/manual.html",
"enum": [
"1",
"2",
"both"
],
"default": "1",
"fa_icon": "fas fa-align-left"
},
Expand Down Expand Up @@ -138,12 +132,19 @@
}
},
"turn_assemblers_on_off": {
"title": "Turn assemblers on/off",
"title": "Turn assemblers and modules on/off",
"type": "object",
"description": "Select which assemblers to skip or not",
"default": "",
"fa_icon": "fas fa-tasks",
"properties": {
"skip_raw_assemblies_polishing": {
"type": "boolean",
"description": "Not pilon polish with short reads the raw nanopore assemblies.",
"help_text": "This will make the pipeline not polish raw assemblies on hybrid strategy 2.\nFor example, if a sample is assembled with flye and polished with medaka, by default, both assemblies will be passed to pilon so you can compare them.\nIf you don't need this comparison and don't want to polish the raw assembly, use this parameter.",
"hidden": "true",
"fa_icon": "fas fa-ban"
},
"skip_spades": {
"type": "boolean",
"description": "Skip SPAdes assembler",
Expand Down
8 changes: 7 additions & 1 deletion workflows/hybrid.nf

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 6554501

Please sign in to comment.