From 2a48e7f7f0c5d63c4372e9b906cd8909d089efc0 Mon Sep 17 00:00:00 2001
From: vagkaratzas <vagelaros.gee@gmail.com>
Date: Thu, 10 Oct 2024 10:10:00 +0100
Subject: [PATCH 01/16] first draft push  with TODOs

---
 nextflow.config                               |  3 ++
 .../generate_downstream_samplesheets/main.nf  | 46 +++++++++++++++++++
 workflows/mag.nf                              | 45 +++++++++++++-----
 3 files changed, 83 insertions(+), 11 deletions(-)
 create mode 100644 subworkflows/local/generate_downstream_samplesheets/main.nf

diff --git a/nextflow.config b/nextflow.config
index ae8f6c59..5a7600b4 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -194,6 +194,9 @@ params {
     validationShowHiddenParams       = false
     validate_params                  = true
 
+    // Generate downstream samplesheets
+    generate_samplesheet = false
+    downstream_pipeline  = 'taxprofiler'
 }
 
 // Load base.config by default for all pipelines
diff --git a/subworkflows/local/generate_downstream_samplesheets/main.nf b/subworkflows/local/generate_downstream_samplesheets/main.nf
new file mode 100644
index 00000000..158bda4a
--- /dev/null
+++ b/subworkflows/local/generate_downstream_samplesheets/main.nf
@@ -0,0 +1,46 @@
+//
+// Subworkflow with functionality specific to the nf-core/createtaxdb pipeline
+//
+
+workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
+    take:
+    ch_databases
+
+    main:
+    ch_header  = Channel.empty()
+    format     = 'csv' // most common format in nf-core
+    format_sep = ','
+
+    // TODO --
+    // Make your samplesheet channel construct here depending on your downstream
+    // pipelines
+    if ( params.downstream_pipeline == 'taxprofiler' ) {
+        format = 'csv'
+        format_sep = ','
+        ch_list_for_samplesheet = ch_databases
+                                    .map {
+                                        meta, db ->
+                                            def tool      = meta.tool
+                                            def db_name   = meta.id + '-' + meta.tool
+                                            def db_params = ""
+                                            def db_type   = ""
+                                            def db_path   = file(params.outdir).getParent() + '/' + meta.tool + '/' + db.getName()
+                                        [ tool: tool, db_name: db_name, db_params: db_params, db_type: db_type, db_path: db_path ]
+                                    }
+                                    .tap{ ch_header }
+    }
+    // -- FINISH TODO
+
+    // Constructs the header string and then the strings of each row, and
+    // finally concatenates for saving.
+    ch_header
+        .first()
+        .map{ it.keySet().join(format_sep) }
+        .concat( ch_list_for_samplesheet.map{ it.values().join(format_sep) })
+        .collectFile(
+            name:"${params.outdir}/downstream_samplesheet/${params.downstream_pipeline}.${format}",
+            newLine: true,
+            sort: false
+        )
+
+}
diff --git a/workflows/mag.nf b/workflows/mag.nf
index 6c158284..fd0dc7aa 100644
--- a/workflows/mag.nf
+++ b/workflows/mag.nf
@@ -13,17 +13,18 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_mag_
 //
 // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
 //
-include { BINNING_PREPARATION             } from '../subworkflows/local/binning_preparation'
-include { BINNING                         } from '../subworkflows/local/binning'
-include { BINNING_REFINEMENT              } from '../subworkflows/local/binning_refinement'
-include { BUSCO_QC                        } from '../subworkflows/local/busco_qc'
-include { VIRUS_IDENTIFICATION            } from '../subworkflows/local/virus_identification'
-include { CHECKM_QC                       } from '../subworkflows/local/checkm_qc'
-include { GUNC_QC                         } from '../subworkflows/local/gunc_qc'
-include { GTDBTK                          } from '../subworkflows/local/gtdbtk'
-include { ANCIENT_DNA_ASSEMBLY_VALIDATION } from '../subworkflows/local/ancient_dna'
-include { DOMAIN_CLASSIFICATION           } from '../subworkflows/local/domain_classification'
-include { DEPTHS                          } from '../subworkflows/local/depths'
+include { BINNING_PREPARATION              } from '../subworkflows/local/binning_preparation'
+include { BINNING                          } from '../subworkflows/local/binning'
+include { BINNING_REFINEMENT               } from '../subworkflows/local/binning_refinement'
+include { BUSCO_QC                         } from '../subworkflows/local/busco_qc'
+include { VIRUS_IDENTIFICATION             } from '../subworkflows/local/virus_identification'
+include { CHECKM_QC                        } from '../subworkflows/local/checkm_qc'
+include { GUNC_QC                          } from '../subworkflows/local/gunc_qc'
+include { GTDBTK                           } from '../subworkflows/local/gtdbtk'
+include { ANCIENT_DNA_ASSEMBLY_VALIDATION  } from '../subworkflows/local/ancient_dna'
+include { DOMAIN_CLASSIFICATION            } from '../subworkflows/local/domain_classification'
+include { DEPTHS                           } from '../subworkflows/local/depths'
+include { GENERATE_DOWNSTREAM_SAMPLESHEETS } from '../subworkflows/local/generate_downstream_samplesheets/main.nf'
 
 //
 // MODULE: Installed directly from nf-core/modules
@@ -356,6 +357,8 @@ workflow MAG {
                         }
     }
 
+    // TODO remove ch_short_reads_assembly.view()
+
     /*
     ================================================================================
                                     Preprocessing and QC for long reads
@@ -1002,6 +1005,26 @@ workflow MAG {
         }
     }
 
+    //
+    // Samplesheet generation
+    //
+    ch_input_for_samplesheet = Channel
+                            .empty()
+                            // TODO
+                            // .mix(
+                            //         ch_centrifuge_output.map     {meta, db -> [ meta + [tool: "centrifuge"]     , db ]},
+                            //         ch_diamond_output.map        {meta, db -> [ meta + [tool: "diamond"]        , db ]},
+                            //         ch_kaiju_output.map          {meta, db -> [ meta + [tool: "kaiju"]          , db ]},
+                            //         ch_kraken2_bracken_output.map{meta, db -> [ meta + [tool: "kraken2_bracken"], db ]},
+                            //         ch_krakenuniq_output.map     {meta, db -> [ meta + [tool: "krakenuniq"]     , db ]},
+                            //         ch_malt_output.map           {meta, db -> [ meta + [tool: "malt"]           , db ]}
+                            //     )
+                            //     .view()
+
+    if ( params.generate_samplesheet ) {
+        GENERATE_DOWNSTREAM_SAMPLESHEETS ( ch_input_for_samplesheet )
+    }
+
     //
     // Collate and save software versions
     //

From 3b80e0c95ff2fef7f96800bd1e5ce577745399c4 Mon Sep 17 00:00:00 2001
From: vagkaratzas <vagelaros.gee@gmail.com>
Date: Thu, 10 Oct 2024 12:41:47 +0100
Subject: [PATCH 02/16] first attempt, files not yet being published in outdir

---
 docs/output.md                                | 23 ++++++++++++
 nextflow.config                               |  4 +--
 nextflow_schema.json                          | 22 ++++++++++++
 .../generate_downstream_samplesheets/main.nf  | 36 +++++++++----------
 workflows/mag.nf                              | 18 +++-------
 5 files changed, 67 insertions(+), 36 deletions(-)

diff --git a/docs/output.md b/docs/output.md
index 5f889056..1fdd982d 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -707,6 +707,9 @@ Because of aDNA damage, _de novo_ assemblers sometimes struggle to call a correc
 
 </details>
 
+The pipeline can also generate downstream pipeline input samplesheets.
+These are stored in `<outdir>/downstream_samplesheets`.
+
 ### MultiQC
 
 <details markdown="1">
@@ -751,3 +754,23 @@ Summary tool-specific plots and tables of following tools are currently displaye
 </details>
 
 [Nextflow](https://www.nextflow.io/docs/latest/tracing.html) provides excellent functionality for generating various reports relevant to the running and execution of the pipeline. This will allow you to troubleshoot errors with the running of the pipeline, and also provide you with other information such as launch commands, run times and resource usage.
+
+### Downstream samplesheets
+
+The pipeline can also generate input files for the following downstream
+pipelines:
+
+- [nf-core/taxprofiler](https://nf-co.re/taxprofiler)
+
+<details markdown="1">
+<summary>Output files</summary>
+
+- `downstream_samplesheets/`
+  - `taxprofiler.csv`: Partially filled out nf-core/taxprofiler preprocessed short reads csv with paths to database directories or `.fast1.gz` relative to the results directory
+
+</details>
+
+:::warning
+Any generated downstream samplesheet is provided as 'best effort' and are not guaranteed to work straight out of the box!
+They may not be complete (e.g. some columns may need to be manually filled in).
+:::
diff --git a/nextflow.config b/nextflow.config
index 5a7600b4..fdfa1844 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -195,8 +195,8 @@ params {
     validate_params                  = true
 
     // Generate downstream samplesheets
-    generate_samplesheet = false
-    downstream_pipeline  = 'taxprofiler'
+    generate_downstream_samplesheets = false
+    generate_pipeline_samplesheets   = 'taxprofiler'
 }
 
 // Load base.config by default for all pipelines
diff --git a/nextflow_schema.json b/nextflow_schema.json
index aaff9835..55ecc59f 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -83,6 +83,25 @@
                 }
             }
         },
+        "generate_samplesheet_options": {
+            "title": "Downstream pipeline samplesheet generation options",
+            "type": "object",
+            "fa_icon": "fas fa-align-justify",
+            "description": "Options for generating input samplesheets for complementary downstream pipelines.",
+            "properties": {
+                "generate_downstream_samplesheets": {
+                    "type": "boolean",
+                    "description": "Turn on generation of samplesheets for downstream pipelines.",
+                    "fa_icon": "fas fa-toggle-on"
+                },
+                "generate_pipeline_samplesheets": {
+                    "type": "string",
+                    "default": "taxprofiler",
+                    "description": "Specify which pipeline to generate a samplesheet for.",
+                    "fa_icon": "fas fa-toolbox"
+                }
+            }
+        },
         "institutional_config_options": {
             "title": "Institutional config options",
             "type": "object",
@@ -914,6 +933,9 @@
         {
             "$ref": "#/definitions/reference_genome_options"
         },
+        {
+            "$ref": "#/definitions/generate_samplesheet_options"
+        },
         {
             "$ref": "#/definitions/institutional_config_options"
         },
diff --git a/subworkflows/local/generate_downstream_samplesheets/main.nf b/subworkflows/local/generate_downstream_samplesheets/main.nf
index 158bda4a..2be09249 100644
--- a/subworkflows/local/generate_downstream_samplesheets/main.nf
+++ b/subworkflows/local/generate_downstream_samplesheets/main.nf
@@ -4,32 +4,28 @@
 
 workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
     take:
-    ch_databases
+    ch_reads
 
     main:
     ch_header  = Channel.empty()
     format     = 'csv' // most common format in nf-core
     format_sep = ','
 
-    // TODO --
-    // Make your samplesheet channel construct here depending on your downstream
-    // pipelines
-    if ( params.downstream_pipeline == 'taxprofiler' ) {
-        format = 'csv'
-        format_sep = ','
-        ch_list_for_samplesheet = ch_databases
-                                    .map {
-                                        meta, db ->
-                                            def tool      = meta.tool
-                                            def db_name   = meta.id + '-' + meta.tool
-                                            def db_params = ""
-                                            def db_type   = ""
-                                            def db_path   = file(params.outdir).getParent() + '/' + meta.tool + '/' + db.getName()
-                                        [ tool: tool, db_name: db_name, db_params: db_params, db_type: db_type, db_path: db_path ]
-                                    }
-                                    .tap{ ch_header }
+    // Make your samplesheet channel construct here depending on your downstream pipelines
+    if ( params.generate_pipeline_samplesheets == 'taxprofiler' ) {
+        ch_list_for_samplesheet = ch_reads
+            .map {
+                meta, fastq ->
+                    def sample              = meta.id
+                    def run_accession       = meta.id
+                    def instrument_platform = ""
+                    def fastq_1             = file(params.outdir).toString() + '/' + fastq[0].getName()
+                    def fastq_2             = file(params.outdir).toString() + '/' + fastq[1].getName()
+                    def fasta               = ""
+                [ sample: sample, run_accession: run_accession, instrument_platform: instrument_platform, fastq_1: fastq_1, fastq_2: fastq_2, fasta: fasta ]
+            }
+            .tap{ ch_header }
     }
-    // -- FINISH TODO
 
     // Constructs the header string and then the strings of each row, and
     // finally concatenates for saving.
@@ -38,7 +34,7 @@ workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
         .map{ it.keySet().join(format_sep) }
         .concat( ch_list_for_samplesheet.map{ it.values().join(format_sep) })
         .collectFile(
-            name:"${params.outdir}/downstream_samplesheet/${params.downstream_pipeline}.${format}",
+            name:"${params.outdir}/downstream_samplesheet/${params.generate_pipeline_samplesheets}.${format}",
             newLine: true,
             sort: false
         )
diff --git a/workflows/mag.nf b/workflows/mag.nf
index fd0dc7aa..c06c9892 100644
--- a/workflows/mag.nf
+++ b/workflows/mag.nf
@@ -357,8 +357,6 @@ workflow MAG {
                         }
     }
 
-    // TODO remove ch_short_reads_assembly.view()
-
     /*
     ================================================================================
                                     Preprocessing and QC for long reads
@@ -1010,18 +1008,10 @@ workflow MAG {
     //
     ch_input_for_samplesheet = Channel
                             .empty()
-                            // TODO
-                            // .mix(
-                            //         ch_centrifuge_output.map     {meta, db -> [ meta + [tool: "centrifuge"]     , db ]},
-                            //         ch_diamond_output.map        {meta, db -> [ meta + [tool: "diamond"]        , db ]},
-                            //         ch_kaiju_output.map          {meta, db -> [ meta + [tool: "kaiju"]          , db ]},
-                            //         ch_kraken2_bracken_output.map{meta, db -> [ meta + [tool: "kraken2_bracken"], db ]},
-                            //         ch_krakenuniq_output.map     {meta, db -> [ meta + [tool: "krakenuniq"]     , db ]},
-                            //         ch_malt_output.map           {meta, db -> [ meta + [tool: "malt"]           , db ]}
-                            //     )
-                            //     .view()
-
-    if ( params.generate_samplesheet ) {
+                            .mix( ch_short_reads_assembly )
+                            .view()
+
+    if ( params.generate_downstream_samplesheets ) {
         GENERATE_DOWNSTREAM_SAMPLESHEETS ( ch_input_for_samplesheet )
     }
 

From 2bd835221cb8e95ef1f36993b1c1f76be394a880 Mon Sep 17 00:00:00 2001
From: vagkaratzas <vagelaros.gee@gmail.com>
Date: Thu, 10 Oct 2024 13:24:40 +0100
Subject: [PATCH 03/16] 4 conditions for different short reads outdirs

---
 .../generate_downstream_samplesheets/main.nf  | 20 ++++++++++++++-----
 workflows/mag.nf                              |  1 -
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/subworkflows/local/generate_downstream_samplesheets/main.nf b/subworkflows/local/generate_downstream_samplesheets/main.nf
index 2be09249..bc2600a2 100644
--- a/subworkflows/local/generate_downstream_samplesheets/main.nf
+++ b/subworkflows/local/generate_downstream_samplesheets/main.nf
@@ -7,20 +7,30 @@ workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
     ch_reads
 
     main:
-    ch_header  = Channel.empty()
     format     = 'csv' // most common format in nf-core
     format_sep = ','
-
     // Make your samplesheet channel construct here depending on your downstream pipelines
-    if ( params.generate_pipeline_samplesheets == 'taxprofiler' ) {
+    if ( params.generate_pipeline_samplesheets == 'taxprofiler' && params.save_clipped_reads ) { // save_clipped_reads must be true
+        def fastq_rel_path = '/'
+        if (params.bbnorm) {
+            fastq_rel_path = '/bbmap/bbnorm/'
+        } else if (!params.keep_phix) {
+            fastq_rel_path = '/QC_shortreads/remove_phix/'
+        }
+        else if (params.host_fasta) {
+            fastq_rel_path = '/QC_shortreads/remove_host/'
+        }
+        else if (!params.skip_clipping) {
+            fastq_rel_path = '/QC_shortreads/fastp/'
+        }
         ch_list_for_samplesheet = ch_reads
             .map {
                 meta, fastq ->
                     def sample              = meta.id
                     def run_accession       = meta.id
                     def instrument_platform = ""
-                    def fastq_1             = file(params.outdir).toString() + '/' + fastq[0].getName()
-                    def fastq_2             = file(params.outdir).toString() + '/' + fastq[1].getName()
+                    def fastq_1             = file(params.outdir).toString() + fastq_rel_path + meta.id + '/' + fastq[0].getName()
+                    def fastq_2             = file(params.outdir).toString() + fastq_rel_path + meta.id + '/' + fastq[1].getName()
                     def fasta               = ""
                 [ sample: sample, run_accession: run_accession, instrument_platform: instrument_platform, fastq_1: fastq_1, fastq_2: fastq_2, fasta: fasta ]
             }
diff --git a/workflows/mag.nf b/workflows/mag.nf
index c06c9892..ff2e0dc6 100644
--- a/workflows/mag.nf
+++ b/workflows/mag.nf
@@ -1009,7 +1009,6 @@ workflow MAG {
     ch_input_for_samplesheet = Channel
                             .empty()
                             .mix( ch_short_reads_assembly )
-                            .view()
 
     if ( params.generate_downstream_samplesheets ) {
         GENERATE_DOWNSTREAM_SAMPLESHEETS ( ch_input_for_samplesheet )

From a74417507983f154537410d9fae490664139d5c9 Mon Sep 17 00:00:00 2001
From: jasmezz <jasmin.frangenberg@hki-jena.de>
Date: Thu, 10 Oct 2024 15:44:03 +0200
Subject: [PATCH 04/16] Add funcscan code

---
 docs/output.md                                |  2 +
 .../generate_downstream_samplesheets/main.nf  | 13 +++++
 workflows/mag.nf                              | 49 ++++++++++++-------
 3 files changed, 46 insertions(+), 18 deletions(-)

diff --git a/docs/output.md b/docs/output.md
index 1fdd982d..fca4f82e 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -760,12 +760,14 @@ Summary tool-specific plots and tables of following tools are currently displaye
 The pipeline can also generate input files for the following downstream
 pipelines:
 
+- [nf-core/funcscan](https://nf-co.re/funcscan)
 - [nf-core/taxprofiler](https://nf-co.re/taxprofiler)
 
 <details markdown="1">
 <summary>Output files</summary>
 
 - `downstream_samplesheets/`
+  - `funcscan.csv`: Filled out nf-core/funcscan `--input` csv with absolute paths to the assembly FASTA files produced by MAG (MEGAHIT, SPAdes, SPAdesHybrid)
   - `taxprofiler.csv`: Partially filled out nf-core/taxprofiler preprocessed short reads csv with paths to database directories or `.fast1.gz` relative to the results directory
 
 </details>
diff --git a/subworkflows/local/generate_downstream_samplesheets/main.nf b/subworkflows/local/generate_downstream_samplesheets/main.nf
index bc2600a2..23a2f505 100644
--- a/subworkflows/local/generate_downstream_samplesheets/main.nf
+++ b/subworkflows/local/generate_downstream_samplesheets/main.nf
@@ -37,6 +37,19 @@ workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
             .tap{ ch_header }
     }
 
+    if ( params.generate_pipeline_samplesheets == 'funcscan' ) {
+        format = 'csv'
+        format_sep = ','
+        ch_list_for_samplesheet = ch_assemblies
+                                    .map {
+                                        meta, filename ->
+                                            def sample = meta.id
+                                            def fasta  = file(params.outdir).toString() + '/Assembly/' + meta.assembler + '/' + filename.getName()
+                                        [ sample: sample, fasta: fasta ]
+                                    }
+                                    .tap{ ch_header }
+    }
+
     // Constructs the header string and then the strings of each row, and
     // finally concatenates for saving.
     ch_header
diff --git a/workflows/mag.nf b/workflows/mag.nf
index ff2e0dc6..7ba33c37 100644
--- a/workflows/mag.nf
+++ b/workflows/mag.nf
@@ -13,18 +13,19 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_mag_
 //
 // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
 //
-include { BINNING_PREPARATION              } from '../subworkflows/local/binning_preparation'
-include { BINNING                          } from '../subworkflows/local/binning'
-include { BINNING_REFINEMENT               } from '../subworkflows/local/binning_refinement'
-include { BUSCO_QC                         } from '../subworkflows/local/busco_qc'
-include { VIRUS_IDENTIFICATION             } from '../subworkflows/local/virus_identification'
-include { CHECKM_QC                        } from '../subworkflows/local/checkm_qc'
-include { GUNC_QC                          } from '../subworkflows/local/gunc_qc'
-include { GTDBTK                           } from '../subworkflows/local/gtdbtk'
-include { ANCIENT_DNA_ASSEMBLY_VALIDATION  } from '../subworkflows/local/ancient_dna'
-include { DOMAIN_CLASSIFICATION            } from '../subworkflows/local/domain_classification'
-include { DEPTHS                           } from '../subworkflows/local/depths'
-include { GENERATE_DOWNSTREAM_SAMPLESHEETS } from '../subworkflows/local/generate_downstream_samplesheets/main.nf'
+include { BINNING_PREPARATION                                                              } from '../subworkflows/local/binning_preparation'
+include { BINNING                                                                          } from '../subworkflows/local/binning'
+include { BINNING_REFINEMENT                                                               } from '../subworkflows/local/binning_refinement'
+include { BUSCO_QC                                                                         } from '../subworkflows/local/busco_qc'
+include { VIRUS_IDENTIFICATION                                                             } from '../subworkflows/local/virus_identification'
+include { CHECKM_QC                                                                        } from '../subworkflows/local/checkm_qc'
+include { GUNC_QC                                                                          } from '../subworkflows/local/gunc_qc'
+include { GTDBTK                                                                           } from '../subworkflows/local/gtdbtk'
+include { ANCIENT_DNA_ASSEMBLY_VALIDATION                                                  } from '../subworkflows/local/ancient_dna'
+include { DOMAIN_CLASSIFICATION                                                            } from '../subworkflows/local/domain_classification'
+include { DEPTHS                                                                           } from '../subworkflows/local/depths'
+include { GENERATE_DOWNSTREAM_SAMPLESHEETS as GENERATE_DOWNSTREAM_SAMPLESHEETS_FUNCSCAN    } from '../subworkflows/local/generate_downstream_samplesheets/main.nf'
+include { GENERATE_DOWNSTREAM_SAMPLESHEETS as GENERATE_DOWNSTREAM_SAMPLESHEETS_TAXPROFILER } from '../subworkflows/local/generate_downstream_samplesheets/main.nf'
 
 //
 // MODULE: Installed directly from nf-core/modules
@@ -1006,13 +1007,25 @@ workflow MAG {
     //
     // Samplesheet generation
     //
-    ch_input_for_samplesheet = Channel
-                            .empty()
-                            .mix( ch_short_reads_assembly )
+    // if ( params.generate_pipeline_samplesheets.contains("taxprofiler") ) {
+        ch_input_for_samplesheet = Channel
+                                .empty()
+                                .mix( ch_short_reads_assembly )
 
-    if ( params.generate_downstream_samplesheets ) {
-        GENERATE_DOWNSTREAM_SAMPLESHEETS ( ch_input_for_samplesheet )
-    }
+        if ( params.generate_downstream_samplesheets ) {
+            GENERATE_DOWNSTREAM_SAMPLESHEETS_TAXPROFILER ( ch_input_for_samplesheet )
+        }
+    // }
+
+    // if ( params.generate_pipeline_samplesheets.contains("funcscan") ) {
+        ch_input_for_samplesheet = Channel
+                                .empty()
+                                .mix( ch_assemblies )
+
+        if ( params.generate_downstream_samplesheets ) {
+            GENERATE_DOWNSTREAM_SAMPLESHEETS_FUNCSCAN ( ch_input_for_samplesheet )
+        }
+    // }
 
     //
     // Collate and save software versions

From 8672790c33a77758c69577eb8ec632f265ffa837 Mon Sep 17 00:00:00 2001
From: jasmezz <jasmin.frangenberg@hki-jena.de>
Date: Thu, 10 Oct 2024 16:04:57 +0200
Subject: [PATCH 05/16] Generate multiple samplesheets

---
 nextflow.config                                   |  2 +-
 nextflow_schema.json                              |  4 ++--
 .../generate_downstream_samplesheets/main.nf      | 15 +++++++--------
 workflows/mag.nf                                  | 12 ++++++------
 4 files changed, 16 insertions(+), 17 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index fdfa1844..91a893a0 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -196,7 +196,7 @@ params {
 
     // Generate downstream samplesheets
     generate_downstream_samplesheets = false
-    generate_pipeline_samplesheets   = 'taxprofiler'
+    generate_pipeline_samplesheets   = ["funcscan","taxprofiler"]
 }
 
 // Load base.config by default for all pipelines
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 55ecc59f..4a72c7a4 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -95,8 +95,8 @@
                     "fa_icon": "fas fa-toggle-on"
                 },
                 "generate_pipeline_samplesheets": {
-                    "type": "string",
-                    "default": "taxprofiler",
+                    "type": "array",
+                    "default": ["funcscan","taxprofiler"],
                     "description": "Specify which pipeline to generate a samplesheet for.",
                     "fa_icon": "fas fa-toolbox"
                 }
diff --git a/subworkflows/local/generate_downstream_samplesheets/main.nf b/subworkflows/local/generate_downstream_samplesheets/main.nf
index 23a2f505..71255748 100644
--- a/subworkflows/local/generate_downstream_samplesheets/main.nf
+++ b/subworkflows/local/generate_downstream_samplesheets/main.nf
@@ -4,13 +4,14 @@
 
 workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
     take:
-    ch_reads
+    ch_input
+    downstreampipeline_name
 
     main:
     format     = 'csv' // most common format in nf-core
     format_sep = ','
     // Make your samplesheet channel construct here depending on your downstream pipelines
-    if ( params.generate_pipeline_samplesheets == 'taxprofiler' && params.save_clipped_reads ) { // save_clipped_reads must be true
+    if ( downstreampipeline_name == 'taxprofiler' && params.save_clipped_reads ) { // save_clipped_reads must be true
         def fastq_rel_path = '/'
         if (params.bbnorm) {
             fastq_rel_path = '/bbmap/bbnorm/'
@@ -23,7 +24,7 @@ workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
         else if (!params.skip_clipping) {
             fastq_rel_path = '/QC_shortreads/fastp/'
         }
-        ch_list_for_samplesheet = ch_reads
+        ch_list_for_samplesheet = ch_input
             .map {
                 meta, fastq ->
                     def sample              = meta.id
@@ -37,10 +38,8 @@ workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
             .tap{ ch_header }
     }
 
-    if ( params.generate_pipeline_samplesheets == 'funcscan' ) {
-        format = 'csv'
-        format_sep = ','
-        ch_list_for_samplesheet = ch_assemblies
+    if ( downstreampipeline_name == 'funcscan' ) {
+        ch_list_for_samplesheet = ch_input
                                     .map {
                                         meta, filename ->
                                             def sample = meta.id
@@ -57,7 +56,7 @@ workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
         .map{ it.keySet().join(format_sep) }
         .concat( ch_list_for_samplesheet.map{ it.values().join(format_sep) })
         .collectFile(
-            name:"${params.outdir}/downstream_samplesheet/${params.generate_pipeline_samplesheets}.${format}",
+            name:"${params.outdir}/downstream_samplesheet/${downstreampipeline_name}.${format}",
             newLine: true,
             sort: false
         )
diff --git a/workflows/mag.nf b/workflows/mag.nf
index 7ba33c37..ea5f1a68 100644
--- a/workflows/mag.nf
+++ b/workflows/mag.nf
@@ -1007,25 +1007,25 @@ workflow MAG {
     //
     // Samplesheet generation
     //
-    // if ( params.generate_pipeline_samplesheets.contains("taxprofiler") ) {
+    if ( params.generate_pipeline_samplesheets.contains("taxprofiler") ) {
         ch_input_for_samplesheet = Channel
                                 .empty()
                                 .mix( ch_short_reads_assembly )
 
         if ( params.generate_downstream_samplesheets ) {
-            GENERATE_DOWNSTREAM_SAMPLESHEETS_TAXPROFILER ( ch_input_for_samplesheet )
+            GENERATE_DOWNSTREAM_SAMPLESHEETS_TAXPROFILER ( [ ch_input_for_samplesheet, "taxprofiler" ] )
         }
-    // }
+    }
 
-    // if ( params.generate_pipeline_samplesheets.contains("funcscan") ) {
+    if ( params.generate_pipeline_samplesheets.contains("funcscan") ) {
         ch_input_for_samplesheet = Channel
                                 .empty()
                                 .mix( ch_assemblies )
 
         if ( params.generate_downstream_samplesheets ) {
-            GENERATE_DOWNSTREAM_SAMPLESHEETS_FUNCSCAN ( ch_input_for_samplesheet )
+            GENERATE_DOWNSTREAM_SAMPLESHEETS_FUNCSCAN ( [ ch_input_for_samplesheet, "funcscan" ] )
         }
-    // }
+    }
 
     //
     // Collate and save software versions

From be914629b91c23b0080d07bb0b413d4e0a58bc15 Mon Sep 17 00:00:00 2001
From: jasmezz <jasmin.frangenberg@hki-jena.de>
Date: Thu, 10 Oct 2024 17:07:01 +0200
Subject: [PATCH 06/16] Update multi-samplesheet generation

---
 conf/test_hybrid.config                       |  4 +
 nextflow.config                               |  2 +-
 nextflow_schema.json                          |  4 +-
 .../generate_downstream_samplesheets/main.nf  | 74 ++++++++++++-------
 workflows/mag.nf                              | 45 ++++-------
 5 files changed, 69 insertions(+), 60 deletions(-)

diff --git a/conf/test_hybrid.config b/conf/test_hybrid.config
index ca6f4c74..c642fbfc 100644
--- a/conf/test_hybrid.config
+++ b/conf/test_hybrid.config
@@ -27,4 +27,8 @@ params {
     skip_gtdbtk                 = true
     gtdbtk_min_completeness     = 0
     skip_concoct                = true
+
+    // Generate downstream samplesheets
+    generate_downstream_samplesheets = true
+    generate_pipeline_samplesheets   = "funcscan,taxprofiler"
 }
diff --git a/nextflow.config b/nextflow.config
index 91a893a0..03d02754 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -196,7 +196,7 @@ params {
 
     // Generate downstream samplesheets
     generate_downstream_samplesheets = false
-    generate_pipeline_samplesheets   = ["funcscan","taxprofiler"]
+    generate_pipeline_samplesheets   = "funcscan,taxprofiler"
 }
 
 // Load base.config by default for all pipelines
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 4a72c7a4..6c9e448b 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -95,8 +95,8 @@
                     "fa_icon": "fas fa-toggle-on"
                 },
                 "generate_pipeline_samplesheets": {
-                    "type": "array",
-                    "default": ["funcscan","taxprofiler"],
+                    "type": "string",
+                    "default": "funcscan,taxprofiler",
                     "description": "Specify which pipeline to generate a samplesheet for.",
                     "fa_icon": "fas fa-toolbox"
                 }
diff --git a/subworkflows/local/generate_downstream_samplesheets/main.nf b/subworkflows/local/generate_downstream_samplesheets/main.nf
index 71255748..1ee345dd 100644
--- a/subworkflows/local/generate_downstream_samplesheets/main.nf
+++ b/subworkflows/local/generate_downstream_samplesheets/main.nf
@@ -1,18 +1,13 @@
 //
-// Subworkflow with functionality specific to the nf-core/createtaxdb pipeline
+// Subworkflow with functionality specific to the nf-core/mag pipeline
 //
 
-workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
+workflow SAMPLESHEET_TAXPROFILER {
     take:
-    ch_input
-    downstreampipeline_name
+    ch_reads
 
     main:
-    format     = 'csv' // most common format in nf-core
-    format_sep = ','
-    // Make your samplesheet channel construct here depending on your downstream pipelines
-    if ( downstreampipeline_name == 'taxprofiler' && params.save_clipped_reads ) { // save_clipped_reads must be true
-        def fastq_rel_path = '/'
+    def fastq_rel_path = '/'
         if (params.bbnorm) {
             fastq_rel_path = '/bbmap/bbnorm/'
         } else if (!params.keep_phix) {
@@ -24,7 +19,7 @@ workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
         else if (!params.skip_clipping) {
             fastq_rel_path = '/QC_shortreads/fastp/'
         }
-        ch_list_for_samplesheet = ch_input
+        ch_list_for_samplesheet = ch_reads
             .map {
                 meta, fastq ->
                     def sample              = meta.id
@@ -36,29 +31,56 @@ workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
                 [ sample: sample, run_accession: run_accession, instrument_platform: instrument_platform, fastq_1: fastq_1, fastq_2: fastq_2, fasta: fasta ]
             }
             .tap{ ch_header }
-    }
 
-    if ( downstreampipeline_name == 'funcscan' ) {
-        ch_list_for_samplesheet = ch_input
-                                    .map {
-                                        meta, filename ->
-                                            def sample = meta.id
-                                            def fasta  = file(params.outdir).toString() + '/Assembly/' + meta.assembler + '/' + filename.getName()
-                                        [ sample: sample, fasta: fasta ]
-                                    }
-                                    .tap{ ch_header }
-    }
+    ch_header
+        .first()
+        .map{ it.keySet().join(",") }
+        .concat( ch_list_for_samplesheet.map{ it.values().join(",") })
+        .collectFile(
+            name:"${params.outdir}/downstream_samplesheet/taxprofiler.csv",
+            newLine: true,
+            sort: false
+        )
+}
+
+workflow SAMPLESHEET_FUNCSCAN {
+    take:
+    ch_assemblies
+
+    main:
+    ch_list_for_samplesheet = ch_assemblies
+                            .map {
+                                meta, filename ->
+                                    def sample = meta.id
+                                    def fasta  = file(params.outdir).toString() + '/Assembly/' + meta.assembler + '/' + filename.getName()
+                                [ sample: sample, fasta: fasta ]
+                            }
+                            .tap{ ch_header }
 
-    // Constructs the header string and then the strings of each row, and
-    // finally concatenates for saving.
     ch_header
         .first()
-        .map{ it.keySet().join(format_sep) }
-        .concat( ch_list_for_samplesheet.map{ it.values().join(format_sep) })
+        .map{ it.keySet().join(",") }
+        .concat( ch_list_for_samplesheet.map{ it.values().join(",") })
         .collectFile(
-            name:"${params.outdir}/downstream_samplesheet/${downstreampipeline_name}.${format}",
+            name:"${params.outdir}/downstream_samplesheet/funcscan.csv",
             newLine: true,
             sort: false
         )
+}
+
+workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
+    take:
+    ch_reads
+    ch_assemblies
 
+    main:
+    def downstreampipeline_names = params.generate_pipeline_samplesheets.split(",")
+
+    if ( downstreampipeline_names.contains('taxprofiler') && params.save_clipped_reads ) { // save_clipped_reads must be true
+        SAMPLESHEET_TAXPROFILER(ch_reads)
+    }
+
+    if ( downstreampipeline_names.contains('funcscan') ) {
+        SAMPLESHEET_FUNCSCAN(ch_assemblies)
+    }
 }
diff --git a/workflows/mag.nf b/workflows/mag.nf
index ea5f1a68..c393e911 100644
--- a/workflows/mag.nf
+++ b/workflows/mag.nf
@@ -13,19 +13,18 @@ include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_mag_
 //
 // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
 //
-include { BINNING_PREPARATION                                                              } from '../subworkflows/local/binning_preparation'
-include { BINNING                                                                          } from '../subworkflows/local/binning'
-include { BINNING_REFINEMENT                                                               } from '../subworkflows/local/binning_refinement'
-include { BUSCO_QC                                                                         } from '../subworkflows/local/busco_qc'
-include { VIRUS_IDENTIFICATION                                                             } from '../subworkflows/local/virus_identification'
-include { CHECKM_QC                                                                        } from '../subworkflows/local/checkm_qc'
-include { GUNC_QC                                                                          } from '../subworkflows/local/gunc_qc'
-include { GTDBTK                                                                           } from '../subworkflows/local/gtdbtk'
-include { ANCIENT_DNA_ASSEMBLY_VALIDATION                                                  } from '../subworkflows/local/ancient_dna'
-include { DOMAIN_CLASSIFICATION                                                            } from '../subworkflows/local/domain_classification'
-include { DEPTHS                                                                           } from '../subworkflows/local/depths'
-include { GENERATE_DOWNSTREAM_SAMPLESHEETS as GENERATE_DOWNSTREAM_SAMPLESHEETS_FUNCSCAN    } from '../subworkflows/local/generate_downstream_samplesheets/main.nf'
-include { GENERATE_DOWNSTREAM_SAMPLESHEETS as GENERATE_DOWNSTREAM_SAMPLESHEETS_TAXPROFILER } from '../subworkflows/local/generate_downstream_samplesheets/main.nf'
+include { BINNING_PREPARATION              } from '../subworkflows/local/binning_preparation'
+include { BINNING                          } from '../subworkflows/local/binning'
+include { BINNING_REFINEMENT               } from '../subworkflows/local/binning_refinement'
+include { BUSCO_QC                         } from '../subworkflows/local/busco_qc'
+include { VIRUS_IDENTIFICATION             } from '../subworkflows/local/virus_identification'
+include { CHECKM_QC                        } from '../subworkflows/local/checkm_qc'
+include { GUNC_QC                          } from '../subworkflows/local/gunc_qc'
+include { GTDBTK                           } from '../subworkflows/local/gtdbtk'
+include { ANCIENT_DNA_ASSEMBLY_VALIDATION  } from '../subworkflows/local/ancient_dna'
+include { DOMAIN_CLASSIFICATION            } from '../subworkflows/local/domain_classification'
+include { DEPTHS                           } from '../subworkflows/local/depths'
+include { GENERATE_DOWNSTREAM_SAMPLESHEETS } from '../subworkflows/local/generate_downstream_samplesheets/main.nf'
 
 //
 // MODULE: Installed directly from nf-core/modules
@@ -1007,24 +1006,8 @@ workflow MAG {
     //
     // Samplesheet generation
     //
-    if ( params.generate_pipeline_samplesheets.contains("taxprofiler") ) {
-        ch_input_for_samplesheet = Channel
-                                .empty()
-                                .mix( ch_short_reads_assembly )
-
-        if ( params.generate_downstream_samplesheets ) {
-            GENERATE_DOWNSTREAM_SAMPLESHEETS_TAXPROFILER ( [ ch_input_for_samplesheet, "taxprofiler" ] )
-        }
-    }
-
-    if ( params.generate_pipeline_samplesheets.contains("funcscan") ) {
-        ch_input_for_samplesheet = Channel
-                                .empty()
-                                .mix( ch_assemblies )
-
-        if ( params.generate_downstream_samplesheets ) {
-            GENERATE_DOWNSTREAM_SAMPLESHEETS_FUNCSCAN ( [ ch_input_for_samplesheet, "funcscan" ] )
-        }
+    if ( params.generate_downstream_samplesheets ) {
+        GENERATE_DOWNSTREAM_SAMPLESHEETS ( ch_short_reads_assembly, ch_assemblies )
     }
 
     //

From e9df1263d9577cad06cd9e70c59ecffeae033f91 Mon Sep 17 00:00:00 2001
From: Jasmin Frangenberg <73216762+jasmezz@users.noreply.github.com>
Date: Mon, 14 Oct 2024 15:12:21 +0000
Subject: [PATCH 07/16] Update docs [skip ci]

Co-authored-by: James A. Fellows Yates <jfy133@gmail.com>
---
 docs/output.md | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/docs/output.md b/docs/output.md
index fca4f82e..10345218 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -707,7 +707,7 @@ Because of aDNA damage, _de novo_ assemblers sometimes struggle to call a correc
 
 </details>
 
-The pipeline can also generate downstream pipeline input samplesheets.
+The pipeline can also generate input samplesheets for downstream pipelines.
 These are stored in `<outdir>/downstream_samplesheets`.
 
 ### MultiQC
@@ -757,8 +757,7 @@ Summary tool-specific plots and tables of following tools are currently displaye
 
 ### Downstream samplesheets
 
-The pipeline can also generate input files for the following downstream
-pipelines:
+The pipeline can also generate input files for the following downstream pipelines:
 
 - [nf-core/funcscan](https://nf-co.re/funcscan)
 - [nf-core/taxprofiler](https://nf-co.re/taxprofiler)
@@ -767,8 +766,8 @@ pipelines:
 <summary>Output files</summary>
 
 - `downstream_samplesheets/`
-  - `funcscan.csv`: Filled out nf-core/funcscan `--input` csv with absolute paths to the assembly FASTA files produced by MAG (MEGAHIT, SPAdes, SPAdesHybrid)
-  - `taxprofiler.csv`: Partially filled out nf-core/taxprofiler preprocessed short reads csv with paths to database directories or `.fast1.gz` relative to the results directory
+  - `funcscan.csv`: Filled out nf-core/funcscan `--input` csv with absolute paths to the assembled contig FASTA files produced by nf-core/mag (MEGAHIT, SPAdes, SPAdesHybrid)
+  - `taxprofiler.csv`: Partially filled out nf-core/taxprofiler csv with paths to preprocessed reads (adapter trimmed, host removed etc.) `.fastq.gz`
 
 </details>
 

From e441e76e5985a4a11d6a73027b77f265e5957b15 Mon Sep 17 00:00:00 2001
From: vagkaratzas <vagelaros.gee@gmail.com>
Date: Tue, 15 Oct 2024 08:34:21 +0100
Subject: [PATCH 08/16] Added channelToSamplesheet function to subworkflow,
 added nf-test (not run yet)

---
 .../generate_downstream_samplesheets/main.nf  | 100 ++++++++++--------
 .../tests/main.test.nf                        |  42 ++++++++
 2 files changed, 95 insertions(+), 47 deletions(-)
 create mode 100644 subworkflows/local/generate_downstream_samplesheets/tests/main.test.nf

diff --git a/subworkflows/local/generate_downstream_samplesheets/main.nf b/subworkflows/local/generate_downstream_samplesheets/main.nf
index 1ee345dd..4f3b84c3 100644
--- a/subworkflows/local/generate_downstream_samplesheets/main.nf
+++ b/subworkflows/local/generate_downstream_samplesheets/main.nf
@@ -7,40 +7,37 @@ workflow SAMPLESHEET_TAXPROFILER {
     ch_reads
 
     main:
+    format     = 'csv' // most common format in nf-core
+    format_sep = ','
+
     def fastq_rel_path = '/'
-        if (params.bbnorm) {
-            fastq_rel_path = '/bbmap/bbnorm/'
-        } else if (!params.keep_phix) {
-            fastq_rel_path = '/QC_shortreads/remove_phix/'
-        }
-        else if (params.host_fasta) {
-            fastq_rel_path = '/QC_shortreads/remove_host/'
-        }
-        else if (!params.skip_clipping) {
-            fastq_rel_path = '/QC_shortreads/fastp/'
+    if (params.bbnorm) {
+        fastq_rel_path = '/bbmap/bbnorm/'
+    } else if (!params.keep_phix) {
+        fastq_rel_path = '/QC_shortreads/remove_phix/'
+    }
+    else if (params.host_fasta) {
+        fastq_rel_path = '/QC_shortreads/remove_host/'
+    }
+    else if (!params.skip_clipping) {
+        fastq_rel_path = '/QC_shortreads/fastp/'
+    }
+
+    ch_list_for_samplesheet = ch_reads
+        .map {
+            meta, fastq ->
+                def sample              = meta.id
+                def run_accession       = meta.id
+                def instrument_platform = ""
+                def fastq_1             = file(params.outdir).toString() + fastq_rel_path + meta.id + '/' + fastq[0].getName()
+                def fastq_2             = file(params.outdir).toString() + fastq_rel_path + meta.id + '/' + fastq[1].getName()
+                def fasta               = ""
+            [ sample: sample, run_accession: run_accession, instrument_platform: instrument_platform, fastq_1: fastq_1, fastq_2: fastq_2, fasta: fasta ]
         }
-        ch_list_for_samplesheet = ch_reads
-            .map {
-                meta, fastq ->
-                    def sample              = meta.id
-                    def run_accession       = meta.id
-                    def instrument_platform = ""
-                    def fastq_1             = file(params.outdir).toString() + fastq_rel_path + meta.id + '/' + fastq[0].getName()
-                    def fastq_2             = file(params.outdir).toString() + fastq_rel_path + meta.id + '/' + fastq[1].getName()
-                    def fasta               = ""
-                [ sample: sample, run_accession: run_accession, instrument_platform: instrument_platform, fastq_1: fastq_1, fastq_2: fastq_2, fasta: fasta ]
-            }
-            .tap{ ch_header }
+        .tap{ ch_colnames }
+
+    channelToSamplesheet(ch_colnames, ch_list_for_samplesheet, 'downstream_samplesheets', 'taxprofiler', format, format_sep)
 
-    ch_header
-        .first()
-        .map{ it.keySet().join(",") }
-        .concat( ch_list_for_samplesheet.map{ it.values().join(",") })
-        .collectFile(
-            name:"${params.outdir}/downstream_samplesheet/taxprofiler.csv",
-            newLine: true,
-            sort: false
-        )
 }
 
 workflow SAMPLESHEET_FUNCSCAN {
@@ -48,24 +45,20 @@ workflow SAMPLESHEET_FUNCSCAN {
     ch_assemblies
 
     main:
+    format     = 'csv' // most common format in nf-core
+    format_sep = ','
+
     ch_list_for_samplesheet = ch_assemblies
-                            .map {
-                                meta, filename ->
-                                    def sample = meta.id
-                                    def fasta  = file(params.outdir).toString() + '/Assembly/' + meta.assembler + '/' + filename.getName()
-                                [ sample: sample, fasta: fasta ]
-                            }
-                            .tap{ ch_header }
+        .map {
+            meta, filename ->
+                def sample = meta.id
+                def fasta  = file(params.outdir).toString() + '/Assembly/' + meta.assembler + '/' + filename.getName()
+            [ sample: sample, fasta: fasta ]
+        }
+        .tap{ ch_colnames }
+
+    channelToSamplesheet(ch_colnames, ch_list_for_samplesheet, 'downstream_samplesheets', 'funscan', format, format_sep)
 
-    ch_header
-        .first()
-        .map{ it.keySet().join(",") }
-        .concat( ch_list_for_samplesheet.map{ it.values().join(",") })
-        .collectFile(
-            name:"${params.outdir}/downstream_samplesheet/funcscan.csv",
-            newLine: true,
-            sort: false
-        )
 }
 
 workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
@@ -84,3 +77,16 @@ workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
         SAMPLESHEET_FUNCSCAN(ch_assemblies)
     }
 }
+
+// Constructs the header string and then the strings of each row, and
+def channelToSamplesheet(ch_header, ch_list_for_samplesheet, outdir_subdir, pipeline, format, format_sep) {
+    ch_header
+        .first()
+        .map{ it.keySet().join(format_sep) }
+        .concat( ch_list_for_samplesheet.map{ it.values().join(format_sep) })
+        .collectFile(
+            name:"${params.outdir}/${outdir_subdir}/${pipeline}.${format}",
+            newLine: true,
+            sort: false
+        )
+}
diff --git a/subworkflows/local/generate_downstream_samplesheets/tests/main.test.nf b/subworkflows/local/generate_downstream_samplesheets/tests/main.test.nf
new file mode 100644
index 00000000..902481be
--- /dev/null
+++ b/subworkflows/local/generate_downstream_samplesheets/tests/main.test.nf
@@ -0,0 +1,42 @@
+nextflow_workflow {
+    name "Test Subworkflow GENERATE_DOWNSTREAM_SAMPLESHEETS"
+    script "../main.nf"
+    workflow "GENERATE_DOWNSTREAM_SAMPLESHEETS"
+
+    tag "subworkflows"
+    tag "subworkflows_local"
+    tag "subworkflows/generate_downstream_samplesheets"
+
+    test("reads - taxprofiler,funscan") {
+
+        when {
+            params {
+                modules_testdata_base_path      = "https://raw.githubusercontent.com/nf-core/test-datasets/"
+                outdir                          = "."
+                generate_pipeline_samplesheets  = 'taxprofiler,funscan'
+            }
+            workflow {
+                """
+                input[0] = Channel.of(
+                        [
+                            [id:'test_taxprofiler_funscan', single_end:false, long_reads:true, amount_of_files:3],
+                            file(params.modules_testdata_base_path + 'mag/samplesheets/samplesheet.hybrid.csv', checkIfExists: true)
+                        ]
+                )
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert workflow.success},
+                { assert snapshot(
+                    [
+                        "${params.outdir}/downstream_samplesheets/funscan.csv",
+                        "${params.outdir}/downstream_samplesheets/taxprofiler.csv"
+                    ]).match()
+                },
+            )
+        }
+    }
+}

From ddb9c9619e47c8385beb7bc1296e39b9869f1bfc Mon Sep 17 00:00:00 2001
From: vagkaratzas <vagelaros.gee@gmail.com>
Date: Tue, 15 Oct 2024 08:51:31 +0100
Subject: [PATCH 09/16] Validate samplesheet generation parameters, null
 defaults,

---
 conf/test_hybrid.config                             |  2 +-
 nextflow.config                                     |  2 +-
 .../local/generate_downstream_samplesheets/main.nf  |  2 +-
 .../tests/main.test.nf                              |  6 +++---
 .../local/utils_nfcore_mag_pipeline/main.nf         | 13 +++++++++++++
 5 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/conf/test_hybrid.config b/conf/test_hybrid.config
index c642fbfc..bc32fadf 100644
--- a/conf/test_hybrid.config
+++ b/conf/test_hybrid.config
@@ -30,5 +30,5 @@ params {
 
     // Generate downstream samplesheets
     generate_downstream_samplesheets = true
-    generate_pipeline_samplesheets   = "funcscan,taxprofiler"
+    generate_pipeline_samplesheets   = null
 }
diff --git a/nextflow.config b/nextflow.config
index 03d02754..9a3bd5f5 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -196,7 +196,7 @@ params {
 
     // Generate downstream samplesheets
     generate_downstream_samplesheets = false
-    generate_pipeline_samplesheets   = "funcscan,taxprofiler"
+    generate_pipeline_samplesheets   = null
 }
 
 // Load base.config by default for all pipelines
diff --git a/subworkflows/local/generate_downstream_samplesheets/main.nf b/subworkflows/local/generate_downstream_samplesheets/main.nf
index 4f3b84c3..7814a3e5 100644
--- a/subworkflows/local/generate_downstream_samplesheets/main.nf
+++ b/subworkflows/local/generate_downstream_samplesheets/main.nf
@@ -57,7 +57,7 @@ workflow SAMPLESHEET_FUNCSCAN {
         }
         .tap{ ch_colnames }
 
-    channelToSamplesheet(ch_colnames, ch_list_for_samplesheet, 'downstream_samplesheets', 'funscan', format, format_sep)
+    channelToSamplesheet(ch_colnames, ch_list_for_samplesheet, 'downstream_samplesheets', 'funcscan', format, format_sep)
 
 }
 
diff --git a/subworkflows/local/generate_downstream_samplesheets/tests/main.test.nf b/subworkflows/local/generate_downstream_samplesheets/tests/main.test.nf
index 902481be..981e3a66 100644
--- a/subworkflows/local/generate_downstream_samplesheets/tests/main.test.nf
+++ b/subworkflows/local/generate_downstream_samplesheets/tests/main.test.nf
@@ -7,13 +7,13 @@ nextflow_workflow {
     tag "subworkflows_local"
     tag "subworkflows/generate_downstream_samplesheets"
 
-    test("reads - taxprofiler,funscan") {
+    test("reads - taxprofiler,funcscan") {
 
         when {
             params {
                 modules_testdata_base_path      = "https://raw.githubusercontent.com/nf-core/test-datasets/"
                 outdir                          = "."
-                generate_pipeline_samplesheets  = 'taxprofiler,funscan'
+                generate_pipeline_samplesheets  = 'taxprofiler,funcscan'
             }
             workflow {
                 """
@@ -32,7 +32,7 @@ nextflow_workflow {
                 { assert workflow.success},
                 { assert snapshot(
                     [
-                        "${params.outdir}/downstream_samplesheets/funscan.csv",
+                        "${params.outdir}/downstream_samplesheets/funcscan.csv",
                         "${params.outdir}/downstream_samplesheets/taxprofiler.csv"
                     ]).match()
                 },
diff --git a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf
index 29806112..511e59e6 100644
--- a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf
@@ -330,6 +330,19 @@ def validateInputParameters(hybrid) {
     if (params.save_mmseqs_db && !params.metaeuk_mmseqs_db) {
         error('[nf-core/mag] ERROR: Invalid parameter combination: --save_mmseqs_db supplied but no database has been requested for download with --metaeuk_mmseqs_db!')
     }
+
+    // Validate samplesheet generation parameters
+    if (params.generate_downstream_samplesheets && !params.generate_pipeline_samplesheets) {
+        error('[nf-core/mag] If supplying `--generate_downstream_samplesheets`, you must also specify which pipeline to generate for with `--generate_pipeline_samplesheets! Check input.')
+    }
+
+    if (params.generate_downstream_samplesheets && !params.save_clipped_reads) {
+        error('[nf-core/mag] Supplied --generate_downstream_samplesheets but missing --save_clipped_reads (mandatory for reads.gz output).')
+    }
+
+    if (params.generate_downstream_samplesheets && params.save_clipped_reads && (params.bbnorm || !params.keep_phix || params.host_fasta || params.skip_clipping)) {
+        error('[nf-core/mag] Supplied --generate_downstream_samplesheets and --save_clipped_reads is true, but also need one of the following: --bbnorm true, or --keep_phix false, or --host_fasta true, or skip_clipping true.')
+    }
 }
 
 //

From bf11fb3d5906e155d40b5b24b900b7fdf4ade144 Mon Sep 17 00:00:00 2001
From: vagkaratzas <vagelaros.gee@gmail.com>
Date: Tue, 15 Oct 2024 09:34:01 +0100
Subject: [PATCH 10/16] nf-test updated (no snapshot yet)

---
 .../tests/main.test.nf                        | 24 +++++++++++++++----
 1 file changed, 20 insertions(+), 4 deletions(-)

diff --git a/subworkflows/local/generate_downstream_samplesheets/tests/main.test.nf b/subworkflows/local/generate_downstream_samplesheets/tests/main.test.nf
index 981e3a66..635ae970 100644
--- a/subworkflows/local/generate_downstream_samplesheets/tests/main.test.nf
+++ b/subworkflows/local/generate_downstream_samplesheets/tests/main.test.nf
@@ -7,11 +7,11 @@ nextflow_workflow {
     tag "subworkflows_local"
     tag "subworkflows/generate_downstream_samplesheets"
 
-    test("reads - taxprofiler,funcscan") {
+    test("reads,assemblies - taxprofiler,funcscan") {
 
         when {
             params {
-                modules_testdata_base_path      = "https://raw.githubusercontent.com/nf-core/test-datasets/"
+                modules_testdata_base_path      = "https://github.com/nf-core/test-datasets/raw/mag/test_data/"
                 outdir                          = "."
                 generate_pipeline_samplesheets  = 'taxprofiler,funcscan'
             }
@@ -19,8 +19,24 @@ nextflow_workflow {
                 """
                 input[0] = Channel.of(
                         [
-                            [id:'test_taxprofiler_funscan', single_end:false, long_reads:true, amount_of_files:3],
-                            file(params.modules_testdata_base_path + 'mag/samplesheets/samplesheet.hybrid.csv', checkIfExists: true)
+                            [id:'test_minigut', group:0, single_end:false, amount_of_files:2],
+                            file(params.modules_testdata_base_path + 'mag/test_data/test_minigut_R1.fastq.gz', checkIfExists: true)
+                            file(params.modules_testdata_base_path + 'mag/test_data/test_minigut_R2.fastq.gz', checkIfExists: true)
+                        ],
+                        [
+                            [id:'test_minigut_sample2', group:0, single_end:false, amount_of_files:2],
+                            file(params.modules_testdata_base_path + 'mag/test_data/test_minigut_sample2_R1.fastq.gz', checkIfExists: true)
+                            file(params.modules_testdata_base_path + 'mag/test_data/test_minigut_sample2_R2.fastq.gz', checkIfExists: true)
+                        ]
+                )
+                input[1] = Channel.of(
+                        [
+                            [id:'test_minigut_spades', group:0, single_end:false, assembler:SPAdes, amount_of_files:1],
+                            file(params.modules_testdata_base_path + 'mag/assemblies/SPAdes-test_minigut_contigs.fasta.gz', checkIfExists: true)
+                        ],
+                        [
+                            [id:'test_minigut_megahit', group:0, single_end:false, assembler:MEGAHIT, amount_of_files:1],
+                            file(params.modules_testdata_base_path + 'mag/assemblies/MEGAHIT-test_minigut.contigs.fa.gz', checkIfExists: true)
                         ]
                 )
                 """

From 8724961232609021ef2b27e0e8b77e7403c60506 Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Tue, 15 Oct 2024 14:01:21 +0000
Subject: [PATCH 11/16] Update function and minor tweaks

---
 docs/output.md                                |  7 ++-
 nextflow_schema.json                          |  5 +-
 .../generate_downstream_samplesheets/main.nf  | 24 ++++----
 .../tests/main.test.nf                        | 58 -------------------
 .../local/utils_nfcore_mag_pipeline/main.nf   |  5 ++
 5 files changed, 25 insertions(+), 74 deletions(-)
 delete mode 100644 subworkflows/local/generate_downstream_samplesheets/tests/main.test.nf

diff --git a/docs/output.md b/docs/output.md
index 10345218..88be0aa0 100644
--- a/docs/output.md
+++ b/docs/output.md
@@ -26,6 +26,9 @@ The pipeline is built using [Nextflow](https://www.nextflow.io/) and processes d
 
 Note that when specifying the parameter `--coassemble_group`, for the corresponding output filenames/directories of the assembly or downsteam processes the group ID, or more precisely the term `group-[group_id]`, will be used instead of the sample ID.
 
+The pipeline can also generate downstream pipeline input samplesheets.
+These are stored in `<outdir>/downstream_samplesheets`.
+
 ## Quality control
 
 These steps trim away the adapter sequences present in input reads, trims away bad quality bases and sicard reads that are too short.
@@ -766,8 +769,8 @@ The pipeline can also generate input files for the following downstream pipeline
 <summary>Output files</summary>
 
 - `downstream_samplesheets/`
-  - `funcscan.csv`: Filled out nf-core/funcscan `--input` csv with absolute paths to the assembled contig FASTA files produced by nf-core/mag (MEGAHIT, SPAdes, SPAdesHybrid)
-  - `taxprofiler.csv`: Partially filled out nf-core/taxprofiler csv with paths to preprocessed reads (adapter trimmed, host removed etc.) `.fastq.gz`
+  - `taxprofiler.csv`: Partially filled out nf-core/taxprofiler `--input` csv with paths to preprocessed reads (adapter trimmed, host removed etc.) in `.fastq.gz` formats. I.e., the direct input into MEGAHIT, SPAdes, SPAdesHybrid.
+  - `funcscan.csv`: Filled out nf-core/funcscan `--input` csv with absolute paths to the assembled contig FASTA files produced by nf-core/mag (i.e., the direct output from MEGAHIT, SPAdes, SPAdesHybrid - not bins).
 
 </details>
 
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 6c9e448b..8f954b51 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -96,9 +96,10 @@
                 },
                 "generate_pipeline_samplesheets": {
                     "type": "string",
-                    "default": "funcscan,taxprofiler",
                     "description": "Specify which pipeline to generate a samplesheet for.",
-                    "fa_icon": "fas fa-toolbox"
+                    "help": "Note that the nf-core/funcscan samplesheet will only include paths to raw assemblies, not bins\n\nThe nf-core/taxprofiler samplesheet will include of paths the pre-processed reads that are used are used as input for _de novo_ assembly.",
+                    "fa_icon": "fas fa-toolbox",
+                    "pattern": "^(taxprofiler|funcscan)(?:,(taxprofiler|funcscan)){0,1}"
                 }
             }
         },
diff --git a/subworkflows/local/generate_downstream_samplesheets/main.nf b/subworkflows/local/generate_downstream_samplesheets/main.nf
index 7814a3e5..19bceb18 100644
--- a/subworkflows/local/generate_downstream_samplesheets/main.nf
+++ b/subworkflows/local/generate_downstream_samplesheets/main.nf
@@ -7,8 +7,7 @@ workflow SAMPLESHEET_TAXPROFILER {
     ch_reads
 
     main:
-    format     = 'csv' // most common format in nf-core
-    format_sep = ','
+    format     = 'csv'
 
     def fastq_rel_path = '/'
     if (params.bbnorm) {
@@ -36,7 +35,7 @@ workflow SAMPLESHEET_TAXPROFILER {
         }
         .tap{ ch_colnames }
 
-    channelToSamplesheet(ch_colnames, ch_list_for_samplesheet, 'downstream_samplesheets', 'taxprofiler', format, format_sep)
+    channelToSamplesheet(ch_list_for_samplesheet, "${params.outdir}/downstream_samplesheets/mag", format)
 
 }
 
@@ -45,8 +44,7 @@ workflow SAMPLESHEET_FUNCSCAN {
     ch_assemblies
 
     main:
-    format     = 'csv' // most common format in nf-core
-    format_sep = ','
+    format     = 'csv'
 
     ch_list_for_samplesheet = ch_assemblies
         .map {
@@ -57,8 +55,7 @@ workflow SAMPLESHEET_FUNCSCAN {
         }
         .tap{ ch_colnames }
 
-    channelToSamplesheet(ch_colnames, ch_list_for_samplesheet, 'downstream_samplesheets', 'funcscan', format, format_sep)
-
+    channelToSamplesheet(ch_list_for_samplesheet, "${params.outdir}/downstream_samplesheets/funcscan", format)
 }
 
 workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
@@ -78,14 +75,17 @@ workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
     }
 }
 
-// Constructs the header string and then the strings of each row, and
-def channelToSamplesheet(ch_header, ch_list_for_samplesheet, outdir_subdir, pipeline, format, format_sep) {
+def channelToSamplesheet(ch_list_for_samplesheet, path, format) {
+    def format_sep = [csv: ",", tsv: "\t", txt: "\t"][format]
+
+    def ch_header = ch_list_for_samplesheet
+
     ch_header
         .first()
-        .map{ it.keySet().join(format_sep) }
-        .concat( ch_list_for_samplesheet.map{ it.values().join(format_sep) })
+        .map { it.keySet().join(format_sep) }
+        .concat(ch_list_for_samplesheet.map { it.values().join(format_sep) })
         .collectFile(
-            name:"${params.outdir}/${outdir_subdir}/${pipeline}.${format}",
+            name: "${path}.${format}",
             newLine: true,
             sort: false
         )
diff --git a/subworkflows/local/generate_downstream_samplesheets/tests/main.test.nf b/subworkflows/local/generate_downstream_samplesheets/tests/main.test.nf
deleted file mode 100644
index 635ae970..00000000
--- a/subworkflows/local/generate_downstream_samplesheets/tests/main.test.nf
+++ /dev/null
@@ -1,58 +0,0 @@
-nextflow_workflow {
-    name "Test Subworkflow GENERATE_DOWNSTREAM_SAMPLESHEETS"
-    script "../main.nf"
-    workflow "GENERATE_DOWNSTREAM_SAMPLESHEETS"
-
-    tag "subworkflows"
-    tag "subworkflows_local"
-    tag "subworkflows/generate_downstream_samplesheets"
-
-    test("reads,assemblies - taxprofiler,funcscan") {
-
-        when {
-            params {
-                modules_testdata_base_path      = "https://github.com/nf-core/test-datasets/raw/mag/test_data/"
-                outdir                          = "."
-                generate_pipeline_samplesheets  = 'taxprofiler,funcscan'
-            }
-            workflow {
-                """
-                input[0] = Channel.of(
-                        [
-                            [id:'test_minigut', group:0, single_end:false, amount_of_files:2],
-                            file(params.modules_testdata_base_path + 'mag/test_data/test_minigut_R1.fastq.gz', checkIfExists: true)
-                            file(params.modules_testdata_base_path + 'mag/test_data/test_minigut_R2.fastq.gz', checkIfExists: true)
-                        ],
-                        [
-                            [id:'test_minigut_sample2', group:0, single_end:false, amount_of_files:2],
-                            file(params.modules_testdata_base_path + 'mag/test_data/test_minigut_sample2_R1.fastq.gz', checkIfExists: true)
-                            file(params.modules_testdata_base_path + 'mag/test_data/test_minigut_sample2_R2.fastq.gz', checkIfExists: true)
-                        ]
-                )
-                input[1] = Channel.of(
-                        [
-                            [id:'test_minigut_spades', group:0, single_end:false, assembler:SPAdes, amount_of_files:1],
-                            file(params.modules_testdata_base_path + 'mag/assemblies/SPAdes-test_minigut_contigs.fasta.gz', checkIfExists: true)
-                        ],
-                        [
-                            [id:'test_minigut_megahit', group:0, single_end:false, assembler:MEGAHIT, amount_of_files:1],
-                            file(params.modules_testdata_base_path + 'mag/assemblies/MEGAHIT-test_minigut.contigs.fa.gz', checkIfExists: true)
-                        ]
-                )
-                """
-            }
-        }
-
-        then {
-            assertAll(
-                { assert workflow.success},
-                { assert snapshot(
-                    [
-                        "${params.outdir}/downstream_samplesheets/funcscan.csv",
-                        "${params.outdir}/downstream_samplesheets/taxprofiler.csv"
-                    ]).match()
-                },
-            )
-        }
-    }
-}
diff --git a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf
index 511e59e6..c56c3cc3 100644
--- a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf
@@ -118,6 +118,11 @@ workflow PIPELINE_INITIALISATION {
     //
     validateInputParameters(
         hybrid
+
+        // Validate samplesheet generation parameters
+        if (params.generate_downstream_samplesheets && !params.generate_pipeline_samplesheets) {
+            error('[nf-core/createtaxdb] If supplying `--generate_downstream_samplesheets`, you must also specify which pipeline to generate for with `--generate_pipeline_samplesheets! Check input.')
+        }
     )
 
     // Validate PRE-ASSEMBLED CONTIG input when supplied

From f6b9a9984d2716ae28846dfb5a90bf3ada067a11 Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Tue, 15 Oct 2024 15:47:40 +0000
Subject: [PATCH 12/16] Put the if/else within the function, not as an argument
 to the function

---
 subworkflows/local/utils_nfcore_mag_pipeline/main.nf | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf
index c56c3cc3..02ef7b57 100644
--- a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf
@@ -118,11 +118,6 @@ workflow PIPELINE_INITIALISATION {
     //
     validateInputParameters(
         hybrid
-
-        // Validate samplesheet generation parameters
-        if (params.generate_downstream_samplesheets && !params.generate_pipeline_samplesheets) {
-            error('[nf-core/createtaxdb] If supplying `--generate_downstream_samplesheets`, you must also specify which pipeline to generate for with `--generate_pipeline_samplesheets! Check input.')
-        }
     )
 
     // Validate PRE-ASSEMBLED CONTIG input when supplied
@@ -285,6 +280,11 @@ def validateInputParameters(hybrid) {
         if ( !params.genomes[params.host_genome].bowtie2 ) {
             error("[nf-core/mag] ERROR: No Bowtie 2 index file specified for the host genome ${params.host_genome}!")
         }
+
+        // Validate samplesheet generation parameters
+        if (params.generate_downstream_samplesheets && !params.generate_pipeline_samplesheets) {
+            error('[nf-core/createtaxdb] If supplying `--generate_downstream_samplesheets`, you must also specify which pipeline to generate for with `--generate_pipeline_samplesheets! Check input.')
+        }
     }
 
     // Check MetaBAT2 inputs

From bec834781a539d92c88f571729f072bffbac51e8 Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Tue, 15 Oct 2024 15:54:17 +0000
Subject: [PATCH 13/16] Fix name of file,remove duplicated check

---
 .../generate_downstream_samplesheets/main.nf  | 45 +++++++++----------
 .../local/utils_nfcore_mag_pipeline/main.nf   |  8 +---
 2 files changed, 24 insertions(+), 29 deletions(-)

diff --git a/subworkflows/local/generate_downstream_samplesheets/main.nf b/subworkflows/local/generate_downstream_samplesheets/main.nf
index 19bceb18..8c4a00cd 100644
--- a/subworkflows/local/generate_downstream_samplesheets/main.nf
+++ b/subworkflows/local/generate_downstream_samplesheets/main.nf
@@ -7,12 +7,13 @@ workflow SAMPLESHEET_TAXPROFILER {
     ch_reads
 
     main:
-    format     = 'csv'
+    format = 'csv'
 
     def fastq_rel_path = '/'
     if (params.bbnorm) {
         fastq_rel_path = '/bbmap/bbnorm/'
-    } else if (!params.keep_phix) {
+    }
+    else if (!params.keep_phix) {
         fastq_rel_path = '/QC_shortreads/remove_phix/'
     }
     else if (params.host_fasta) {
@@ -23,20 +24,18 @@ workflow SAMPLESHEET_TAXPROFILER {
     }
 
     ch_list_for_samplesheet = ch_reads
-        .map {
-            meta, fastq ->
-                def sample              = meta.id
-                def run_accession       = meta.id
-                def instrument_platform = ""
-                def fastq_1             = file(params.outdir).toString() + fastq_rel_path + meta.id + '/' + fastq[0].getName()
-                def fastq_2             = file(params.outdir).toString() + fastq_rel_path + meta.id + '/' + fastq[1].getName()
-                def fasta               = ""
-            [ sample: sample, run_accession: run_accession, instrument_platform: instrument_platform, fastq_1: fastq_1, fastq_2: fastq_2, fasta: fasta ]
+        .map { meta, fastq ->
+            def sample = meta.id
+            def run_accession = meta.id
+            def instrument_platform = ""
+            def fastq_1 = file(params.outdir).toString() + fastq_rel_path + meta.id + '/' + fastq[0].getName()
+            def fastq_2 = file(params.outdir).toString() + fastq_rel_path + meta.id + '/' + fastq[1].getName()
+            def fasta = ""
+            [sample: sample, run_accession: run_accession, instrument_platform: instrument_platform, fastq_1: fastq_1, fastq_2: fastq_2, fasta: fasta]
         }
-        .tap{ ch_colnames }
-
-    channelToSamplesheet(ch_list_for_samplesheet, "${params.outdir}/downstream_samplesheets/mag", format)
+        .tap { ch_colnames }
 
+    channelToSamplesheet(ch_list_for_samplesheet, "${params.outdir}/downstream_samplesheets/taxprofiler", format)
 }
 
 workflow SAMPLESHEET_FUNCSCAN {
@@ -44,16 +43,15 @@ workflow SAMPLESHEET_FUNCSCAN {
     ch_assemblies
 
     main:
-    format     = 'csv'
+    format = 'csv'
 
     ch_list_for_samplesheet = ch_assemblies
-        .map {
-            meta, filename ->
-                def sample = meta.id
-                def fasta  = file(params.outdir).toString() + '/Assembly/' + meta.assembler + '/' + filename.getName()
-            [ sample: sample, fasta: fasta ]
+        .map { meta, filename ->
+            def sample = meta.id
+            def fasta = file(params.outdir).toString() + '/Assembly/' + meta.assembler + '/' + filename.getName()
+            [sample: sample, fasta: fasta]
         }
-        .tap{ ch_colnames }
+        .tap { ch_colnames }
 
     channelToSamplesheet(ch_list_for_samplesheet, "${params.outdir}/downstream_samplesheets/funcscan", format)
 }
@@ -66,11 +64,12 @@ workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
     main:
     def downstreampipeline_names = params.generate_pipeline_samplesheets.split(",")
 
-    if ( downstreampipeline_names.contains('taxprofiler') && params.save_clipped_reads ) { // save_clipped_reads must be true
+    if (downstreampipeline_names.contains('taxprofiler') && params.save_clipped_reads) {
+        // save_clipped_reads must be true
         SAMPLESHEET_TAXPROFILER(ch_reads)
     }
 
-    if ( downstreampipeline_names.contains('funcscan') ) {
+    if (downstreampipeline_names.contains('funcscan')) {
         SAMPLESHEET_FUNCSCAN(ch_assemblies)
     }
 }
diff --git a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf
index 02ef7b57..c7ae380e 100644
--- a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf
@@ -280,11 +280,6 @@ def validateInputParameters(hybrid) {
         if ( !params.genomes[params.host_genome].bowtie2 ) {
             error("[nf-core/mag] ERROR: No Bowtie 2 index file specified for the host genome ${params.host_genome}!")
         }
-
-        // Validate samplesheet generation parameters
-        if (params.generate_downstream_samplesheets && !params.generate_pipeline_samplesheets) {
-            error('[nf-core/createtaxdb] If supplying `--generate_downstream_samplesheets`, you must also specify which pipeline to generate for with `--generate_pipeline_samplesheets! Check input.')
-        }
     }
 
     // Check MetaBAT2 inputs
@@ -336,7 +331,7 @@ def validateInputParameters(hybrid) {
         error('[nf-core/mag] ERROR: Invalid parameter combination: --save_mmseqs_db supplied but no database has been requested for download with --metaeuk_mmseqs_db!')
     }
 
-    // Validate samplesheet generation parameters
+
     if (params.generate_downstream_samplesheets && !params.generate_pipeline_samplesheets) {
         error('[nf-core/mag] If supplying `--generate_downstream_samplesheets`, you must also specify which pipeline to generate for with `--generate_pipeline_samplesheets! Check input.')
     }
@@ -348,6 +343,7 @@ def validateInputParameters(hybrid) {
     if (params.generate_downstream_samplesheets && params.save_clipped_reads && (params.bbnorm || !params.keep_phix || params.host_fasta || params.skip_clipping)) {
         error('[nf-core/mag] Supplied --generate_downstream_samplesheets and --save_clipped_reads is true, but also need one of the following: --bbnorm true, or --keep_phix false, or --host_fasta true, or skip_clipping true.')
     }
+
 }
 
 //

From 67958ecc46d8af44e4cf52ba342646d1119312de Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Tue, 15 Oct 2024 15:59:29 +0000
Subject: [PATCH 14/16] Only check  save_clipped_reads if taxprofiler in
 pipeline list

---
 subworkflows/local/utils_nfcore_mag_pipeline/main.nf | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf
index c7ae380e..5316e28d 100644
--- a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf
@@ -331,16 +331,16 @@ def validateInputParameters(hybrid) {
         error('[nf-core/mag] ERROR: Invalid parameter combination: --save_mmseqs_db supplied but no database has been requested for download with --metaeuk_mmseqs_db!')
     }
 
-
+    // Validate generate samplesheet inputs
     if (params.generate_downstream_samplesheets && !params.generate_pipeline_samplesheets) {
         error('[nf-core/mag] If supplying `--generate_downstream_samplesheets`, you must also specify which pipeline to generate for with `--generate_pipeline_samplesheets! Check input.')
     }
 
-    if (params.generate_downstream_samplesheets && !params.save_clipped_reads) {
+    if (params.generate_downstream_samplesheets && params.generate_pipeline_samplesheets.split(',').contains('taxprofiler') && !params.save_clipped_reads) {
         error('[nf-core/mag] Supplied --generate_downstream_samplesheets but missing --save_clipped_reads (mandatory for reads.gz output).')
     }
 
-    if (params.generate_downstream_samplesheets && params.save_clipped_reads && (params.bbnorm || !params.keep_phix || params.host_fasta || params.skip_clipping)) {
+    if (params.generate_downstream_samplesheets && params.generate_pipeline_samplesheets.split(',').contains('taxprofiler') && params.save_clipped_reads && (params.bbnorm || !params.keep_phix || params.host_fasta || params.skip_clipping)) {
         error('[nf-core/mag] Supplied --generate_downstream_samplesheets and --save_clipped_reads is true, but also need one of the following: --bbnorm true, or --keep_phix false, or --host_fasta true, or skip_clipping true.')
     }
 

From 997674a3f59834ebfdd3968e3268bb711665905d Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Wed, 16 Oct 2024 15:09:16 +0000
Subject: [PATCH 15/16] Fix logic for generating taxprofiler paths; make new
 channel to pass correct assemblies files (Funcsan)

---
 conf/modules.config                           | 112 +++++++++---------
 .../generate_downstream_samplesheets/main.nf  |  26 ++--
 .../local/utils_nfcore_mag_pipeline/main.nf   |  38 ++++--
 workflows/mag.nf                              |  21 +++-
 4 files changed, 120 insertions(+), 77 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index b226ba01..4c2e9ed3 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -21,18 +21,18 @@ process {
     ]
 
     withName: FASTQC_RAW {
-        ext.args   = '--quiet'
+        ext.args = '--quiet'
         publishDir = [
             path: { "${params.outdir}/QC_shortreads/fastqc" },
             mode: params.publish_dir_mode,
             pattern: "*.html"
         ]
         ext.prefix = { "${meta.id}_run${meta.run}_raw" }
-        tag        = { "${meta.id}_run${meta.run}_raw" }
+        tag = { "${meta.id}_run${meta.run}_raw" }
     }
 
     withName: FASTP {
-        ext.args   = [
+        ext.args = [
             "-q ${params.fastp_qualified_quality}",
             "--cut_front",
             "--cut_tail",
@@ -53,11 +53,11 @@ process {
             ]
         ]
         ext.prefix = { "${meta.id}_run${meta.run}_fastp" }
-        tag        = { "${meta.id}_run${meta.run}" }
+        tag = { "${meta.id}_run${meta.run}" }
     }
 
     withName: ADAPTERREMOVAL_PE {
-        ext.args   = [
+        ext.args = [
             "--minlength ${params.reads_minlength}",
             "--adapter1 ${params.adapterremoval_adapter1} --adapter2 ${params.adapterremoval_adapter2}",
             "--minquality ${params.adapterremoval_minquality} --trimns",
@@ -77,11 +77,11 @@ process {
             ]
         ]
         ext.prefix = { "${meta.id}_run${meta.run}_ar2" }
-        tag        = { "${meta.id}_run${meta.run}" }
+        tag = { "${meta.id}_run${meta.run}" }
     }
 
     withName: ADAPTERREMOVAL_SE {
-        ext.args   = [
+        ext.args = [
             "--minlength ${params.reads_minlength}",
             "--adapter1 ${params.adapterremoval_adapter1}",
             "--minquality ${params.adapterremoval_minquality} --trimns",
@@ -93,72 +93,72 @@ process {
             pattern: "*.{settings}"
         ]
         ext.prefix = { "${meta.id}_run${meta.run}_ar2" }
-        tag        = { "${meta.id}_run${meta.run}" }
+        tag = { "${meta.id}_run${meta.run}" }
     }
 
     withName: BOWTIE2_PHIX_REMOVAL_ALIGN {
         ext.prefix = { "${meta.id}_run${meta.run}_phix_removed" }
         publishDir = [
             [
-                path: { "${params.outdir}/QC_shortreads/remove_phix" },
+                path: { "${params.outdir}/QC_shortreads/remove_phix/${meta.id}/" },
                 mode: params.publish_dir_mode,
                 pattern: "*.log"
             ],
             [
-                path: { "${params.outdir}/QC_shortreads/remove_phix" },
+                path: { "${params.outdir}/QC_shortreads/remove_phix/${meta.id}/" },
                 mode: params.publish_dir_mode,
                 pattern: "*.unmapped*.fastq.gz",
                 enabled: params.save_phixremoved_reads
             ]
         ]
-        tag        = { "${meta.id}_run${meta.run}" }
+        tag = { "${meta.id}_run${meta.run}" }
     }
 
     withName: BOWTIE2_HOST_REMOVAL_ALIGN {
-        ext.args   = params.host_removal_verysensitive ? "--very-sensitive" : "--sensitive"
-        ext.args2  = params.host_removal_save_ids ? "--host_removal_save_ids" : ''
+        ext.args = params.host_removal_verysensitive ? "--very-sensitive" : "--sensitive"
+        ext.args2 = params.host_removal_save_ids ? "--host_removal_save_ids" : ''
         ext.prefix = { "${meta.id}_run${meta.run}_host_removed" }
         publishDir = [
             [
-                path: { "${params.outdir}/QC_shortreads/remove_host" },
+                path: { "${params.outdir}/QC_shortreads/remove_host/${meta.id}/" },
                 mode: params.publish_dir_mode,
                 pattern: "*{.log,read_ids.txt}"
             ],
             [
-                path: { "${params.outdir}/QC_shortreads/remove_host" },
+                path: { "${params.outdir}/QC_shortreads/remove_host/${meta.id}/" },
                 mode: params.publish_dir_mode,
                 pattern: "*.unmapped*.fastq.gz",
                 enabled: params.save_hostremoved_reads
             ]
         ]
-        tag        = { "${meta.id}_run${meta.run}" }
+        tag = { "${meta.id}_run${meta.run}" }
     }
 
     withName: FASTQC_TRIMMED {
-        ext.args   = '--quiet'
+        ext.args = '--quiet'
         ext.prefix = { "${meta.id}_run${meta.run}_trimmed" }
         publishDir = [
             path: { "${params.outdir}/QC_shortreads/fastqc" },
             mode: params.publish_dir_mode,
             pattern: "*.html"
         ]
-        tag        = { "${meta.id}_run${meta.run}" }
+        tag = { "${meta.id}_run${meta.run}" }
     }
 
     withName: BBMAP_BBNORM {
-        ext.args   = [
+        ext.args = [
             params.bbnorm_target ? "target=${params.bbnorm_target}" : '',
             params.bbnorm_min ? "min=${params.bbnorm_min}" : ''
         ].join(' ').trim()
         publishDir = [
             [
-                path: { "${params.outdir}/bbmap/bbnorm/logs" },
+                path: { "${params.outdir}/bbmap/bbnorm/${meta.id}/" },
                 enabled: params.save_bbnorm_reads,
                 mode: params.publish_dir_mode,
                 pattern: "*.log"
             ],
             [
-                path: { "${params.outdir}/bbmap/bbnorm/" },
+                path: { "${params.outdir}/bbmap/bbnorm/${meta.id}/" },
                 mode: 'copy',
                 enabled: params.save_bbnorm_reads,
                 mode: params.publish_dir_mode,
@@ -179,11 +179,11 @@ process {
 
     withName: PORECHOP_ABI {
         publishDir = [
-                path: { "${params.outdir}/QC_longreads/porechop" },
-                mode: params.publish_dir_mode,
-                pattern: "*_porechop-abi_trimmed.fastq.gz",
-                enabled: params.save_porechop_reads
-            ]
+            path: { "${params.outdir}/QC_longreads/porechop" },
+            mode: params.publish_dir_mode,
+            pattern: "*_porechop-abi_trimmed.fastq.gz",
+            enabled: params.save_porechop_reads
+        ]
         ext.prefix = { "${meta.id}_run${meta.run}_porechop-abi_trimmed" }
     }
 
@@ -195,11 +195,11 @@ process {
             "--length_weight ${params.longreads_length_weight}"
         ].join(' ').trim()
         publishDir = [
-                path: { "${params.outdir}/QC_longreads/Filtlong" },
-                mode: params.publish_dir_mode,
-                pattern: "*_filtlong.fastq.gz",
-                enabled: params.save_filtlong_reads
-            ]
+            path: { "${params.outdir}/QC_longreads/Filtlong" },
+            mode: params.publish_dir_mode,
+            pattern: "*_filtlong.fastq.gz",
+            enabled: params.save_filtlong_reads
+        ]
         ext.prefix = { "${meta.id}_run${meta.run}_filtlong" }
     }
 
@@ -222,7 +222,7 @@ process {
 
     withName: NANOPLOT_RAW {
         ext.prefix = 'raw'
-        ext.args   = {
+        ext.args = {
             [
                 "-p raw_",
                 "--title ${meta.id}_raw",
@@ -237,7 +237,7 @@ process {
     }
 
     withName: NANOPLOT_FILTERED {
-        ext.args   = {
+        ext.args = {
             [
                 "-p filtered_",
                 "--title ${meta.id}_filtered",
@@ -269,7 +269,7 @@ process {
     }
 
     withName: KRAKEN2 {
-        ext.args   = '--quiet'
+        ext.args = '--quiet'
         publishDir = [
             path: { "${params.outdir}/Taxonomy/kraken2/${meta.id}" },
             mode: params.publish_dir_mode,
@@ -296,7 +296,7 @@ process {
 
     //pattern: "*.{fa.gz,log}" //'pattern' didnt work, probably because the output is in a folder, solved with 'saveAs'
     withName: MEGAHIT {
-        ext.args   = params.megahit_options ?: ''
+        ext.args = params.megahit_options ?: ''
         publishDir = [
             path: { "${params.outdir}/Assembly" },
             mode: params.publish_dir_mode,
@@ -311,7 +311,7 @@ process {
     }
 
     withName: SPADES {
-        ext.args   = params.spades_options ?: ''
+        ext.args = params.spades_options ?: ''
         publishDir = [
             path: { "${params.outdir}/Assembly/SPAdes" },
             mode: params.publish_dir_mode,
@@ -320,7 +320,7 @@ process {
     }
 
     withName: SPADESHYBRID {
-        ext.args   = params.spades_options ?: ''
+        ext.args = params.spades_options ?: ''
         publishDir = [
             path: { "${params.outdir}/Assembly/SPAdesHybrid" },
             mode: params.publish_dir_mode,
@@ -337,7 +337,7 @@ process {
     }
 
     withName: GENOMAD_ENDTOEND {
-        ext.args   = [
+        ext.args = [
             "--cleanup",
             "--min-score ${params.genomad_min_score}",
             "--splits ${params.genomad_splits}"
@@ -350,7 +350,7 @@ process {
     }
 
     withName: BOWTIE2_ASSEMBLY_ALIGN {
-        ext.args   = params.bowtie2_mode ? params.bowtie2_mode : params.ancient_dna ? '--very-sensitive -N 1' : ''
+        ext.args = params.bowtie2_mode ? params.bowtie2_mode : params.ancient_dna ? '--very-sensitive -N 1' : ''
         ext.prefix = { "${meta.id}.assembly" }
         publishDir = [
             [
@@ -392,7 +392,7 @@ process {
     }
 
     withName: BUSCO {
-        ext.args   = [
+        ext.args = [
             params.busco_db ? '--offline' : ''
         ].join(' ').trim()
         publishDir = [
@@ -430,7 +430,7 @@ process {
     }
 
     withName: CHECKM_LINEAGEWF {
-        tag        = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}" }
+        tag = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}" }
         ext.prefix = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}_wf" }
         publishDir = [
             path: { "${params.outdir}/GenomeBinning/QC/CheckM" },
@@ -441,7 +441,7 @@ process {
 
     withName: CHECKM_QA {
         ext.prefix = { "${meta.assembler}-${meta.binner}-${meta.domain}-${meta.refinement}-${meta.id}_qa" }
-        ext.args   = "-o 2 --tab_table"
+        ext.args = "-o 2 --tab_table"
         publishDir = [
             path: { "${params.outdir}/GenomeBinning/QC/CheckM" },
             mode: params.publish_dir_mode,
@@ -510,7 +510,7 @@ process {
     }
 
     withName: GTDBTK_CLASSIFYWF {
-        ext.args   = [
+        ext.args = [
             "--extension fa",
             "--min_perc_aa ${params.gtdbtk_min_perc_aa}",
             "--min_af ${params.gtdbtk_min_af}",
@@ -525,7 +525,7 @@ process {
     }
 
     withName: GTDBTK_SUMMARY {
-        ext.args   = "--extension fa"
+        ext.args = "--extension fa"
         publishDir = [
             path: { "${params.outdir}/Taxonomy/GTDB-Tk" },
             mode: params.publish_dir_mode,
@@ -534,7 +534,7 @@ process {
     }
 
     withName: PROKKA {
-        ext.args   = "--metagenome"
+        ext.args = "--metagenome"
         publishDir = [
             path: { "${params.outdir}/Annotation/Prokka/${meta.assembler}" },
             mode: params.publish_dir_mode,
@@ -543,7 +543,7 @@ process {
     }
 
     withName: PRODIGAL {
-        ext.args   = "-p meta"
+        ext.args = "-p meta"
         publishDir = [
             path: { "${params.outdir}/Annotation/Prodigal/${meta.assembler}/${meta.id}" },
             mode: params.publish_dir_mode,
@@ -553,7 +553,7 @@ process {
 
     withName: FREEBAYES {
         ext.prefix = { "${meta.assembler}-${meta.id}" }
-        ext.args   = "-p ${params.freebayes_ploidy} -q ${params.freebayes_min_basequality} -F ${params.freebayes_minallelefreq}"
+        ext.args = "-p ${params.freebayes_ploidy} -q ${params.freebayes_min_basequality} -F ${params.freebayes_minallelefreq}"
         publishDir = [
             path: { "${params.outdir}/Ancient_DNA/variant_calling/freebayes" },
             mode: params.publish_dir_mode,
@@ -563,7 +563,7 @@ process {
 
     withName: BCFTOOLS_VIEW {
         ext.prefix = { "${meta.assembler}-${meta.id}.filtered" }
-        ext.args   = "-v snps,mnps -i 'QUAL>=${params.bcftools_view_high_variant_quality} || (QUAL>=${params.bcftools_view_medium_variant_quality} && FORMAT/AO>=${params.bcftools_view_minimal_allelesupport})'"
+        ext.args = "-v snps,mnps -i 'QUAL>=${params.bcftools_view_high_variant_quality} || (QUAL>=${params.bcftools_view_medium_variant_quality} && FORMAT/AO>=${params.bcftools_view_minimal_allelesupport})'"
         publishDir = [
             path: { "${params.outdir}/Ancient_DNA/variant_calling/filtered" },
             mode: params.publish_dir_mode,
@@ -582,7 +582,7 @@ process {
 
     withName: BCFTOOLS_INDEX {
         ext.prefix = { "${meta.assembler}-${meta.id}" }
-        ext.args   = "-t"
+        ext.args = "-t"
         publishDir = [
             path: { "${params.outdir}/Ancient_DNA/variant_calling/index" },
             mode: params.publish_dir_mode,
@@ -600,7 +600,7 @@ process {
 
     withName: PYDAMAGE_FILTER {
         ext.prefix = { "${meta.assembler}-${meta.id}" }
-        ext.args   = "-t ${params.pydamage_accuracy}"
+        ext.args = "-t ${params.pydamage_accuracy}"
         publishDir = [
             path: { "${params.outdir}/Ancient_DNA/pydamage/filter/${meta.assembler}-${meta.id}/" },
             mode: params.publish_dir_mode
@@ -644,7 +644,7 @@ process {
             ]
         ]
         ext.prefix = { "${meta.assembler}-MetaBAT2-${meta.id}" }
-        ext.args   = [
+        ext.args = [
             params.min_contig_size < 1500 ? "-m 1500" : "-m ${params.min_contig_size}",
             "--unbinned",
             "--seed ${params.metabat_rng_seed}"
@@ -734,7 +734,7 @@ process {
             ]
         ]
         ext.prefix = { "${meta.assembler}-DASTool-${meta.id}" }
-        ext.args   = "--write_bins --write_unbinned --write_bin_evals --score_threshold ${params.refine_bins_dastool_threshold}"
+        ext.args = "--write_bins --write_unbinned --write_bin_evals --score_threshold ${params.refine_bins_dastool_threshold}"
     }
 
     withName: RENAME_POSTDASTOOL {
@@ -758,12 +758,12 @@ process {
             mode: params.publish_dir_mode,
             pattern: "*.txt"
         ]
-        ext.args   = { "--min_len ${params.tiara_min_length} --probabilities" }
+        ext.args = { "--min_len ${params.tiara_min_length} --probabilities" }
         ext.prefix = { "${meta.assembler}-${meta.id}.tiara" }
     }
 
     withName: TIARA_CLASSIFY {
-        ext.args   = { "--join_prokaryotes --assembler ${meta.assembler}" }
+        ext.args = { "--join_prokaryotes --assembler ${meta.assembler}" }
         ext.prefix = { "${meta.assembler}-${meta.binner}-${meta.bin}-${meta.id}" }
     }
 
@@ -787,7 +787,7 @@ process {
     }
 
     withName: METAEUK_EASYPREDICT {
-        ext.args   = ""
+        ext.args = ""
         ext.prefix = { "${meta.id}" }
         publishDir = [
             path: { "${params.outdir}/Annotation/MetaEuk/${meta.assembler}/${meta.id}" },
@@ -797,7 +797,7 @@ process {
     }
 
     withName: MULTIQC {
-        ext.args   = { params.multiqc_title ? "--title \"${params.multiqc_title}\"" : '' }
+        ext.args = { params.multiqc_title ? "--title \"${params.multiqc_title}\"" : '' }
         publishDir = [
             path: { "${params.outdir}/multiqc" },
             mode: params.publish_dir_mode,
diff --git a/subworkflows/local/generate_downstream_samplesheets/main.nf b/subworkflows/local/generate_downstream_samplesheets/main.nf
index 8c4a00cd..21439cd2 100644
--- a/subworkflows/local/generate_downstream_samplesheets/main.nf
+++ b/subworkflows/local/generate_downstream_samplesheets/main.nf
@@ -11,16 +11,19 @@ workflow SAMPLESHEET_TAXPROFILER {
 
     def fastq_rel_path = '/'
     if (params.bbnorm) {
-        fastq_rel_path = '/bbmap/bbnorm/'
+        fastq_rel_path = "/bbmap/bbnorm/"
     }
     else if (!params.keep_phix) {
-        fastq_rel_path = '/QC_shortreads/remove_phix/'
+        fastq_rel_path = "/QC_shortreads/remove_phix/"
     }
-    else if (params.host_fasta) {
-        fastq_rel_path = '/QC_shortreads/remove_host/'
+    else if (params.host_fasta != false) {
+        fastq_rel_path = "/QC_shortreads/remove_host/"
     }
-    else if (!params.skip_clipping) {
-        fastq_rel_path = '/QC_shortreads/fastp/'
+    else if (!params.skip_clipping && params.clip_tool == 'fastp') {
+        fastq_rel_path = "/QC_shortreads/fastp/"
+    }
+    else if (!params.skip_clipping && params.clip_tool == 'adapterremoval') {
+        fastq_rel_path = "/QC_shortreads/adapterremoval/"
     }
 
     ch_list_for_samplesheet = ch_reads
@@ -28,8 +31,8 @@ workflow SAMPLESHEET_TAXPROFILER {
             def sample = meta.id
             def run_accession = meta.id
             def instrument_platform = ""
-            def fastq_1 = file(params.outdir).toString() + fastq_rel_path + meta.id + '/' + fastq[0].getName()
-            def fastq_2 = file(params.outdir).toString() + fastq_rel_path + meta.id + '/' + fastq[1].getName()
+            def fastq_1 = meta.single_end ? file(params.outdir).toString() + fastq_rel_path + meta.id + '/' + fastq.getName() : file(params.outdir).toString() + fastq_rel_path + meta.id + '/' + fastq[0].getName()
+            def fastq_2 = meta.single_end ? "" : file(params.outdir).toString() + fastq_rel_path + meta.id + '/' + fastq[1].getName()
             def fasta = ""
             [sample: sample, run_accession: run_accession, instrument_platform: instrument_platform, fastq_1: fastq_1, fastq_2: fastq_2, fasta: fasta]
         }
@@ -47,7 +50,8 @@ workflow SAMPLESHEET_FUNCSCAN {
 
     ch_list_for_samplesheet = ch_assemblies
         .map { meta, filename ->
-            def sample = meta.id
+            // funcscan requires
+            def sample = filename.extension ==~ 'gz' ? filename.baseName.take(filename.baseName.lastIndexOf('.')) : filename.baseName
             def fasta = file(params.outdir).toString() + '/Assembly/' + meta.assembler + '/' + filename.getName()
             [sample: sample, fasta: fasta]
         }
@@ -64,8 +68,7 @@ workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
     main:
     def downstreampipeline_names = params.generate_pipeline_samplesheets.split(",")
 
-    if (downstreampipeline_names.contains('taxprofiler') && params.save_clipped_reads) {
-        // save_clipped_reads must be true
+    if (downstreampipeline_names.contains('taxprofiler')) {
         SAMPLESHEET_TAXPROFILER(ch_reads)
     }
 
@@ -74,6 +77,7 @@ workflow GENERATE_DOWNSTREAM_SAMPLESHEETS {
     }
 }
 
+// Constructs the header string and then the strings of each row, and
 def channelToSamplesheet(ch_list_for_samplesheet, path, format) {
     def format_sep = [csv: ",", tsv: "\t", txt: "\t"][format]
 
diff --git a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf
index 5316e28d..f34b6622 100644
--- a/subworkflows/local/utils_nfcore_mag_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_mag_pipeline/main.nf
@@ -331,19 +331,39 @@ def validateInputParameters(hybrid) {
         error('[nf-core/mag] ERROR: Invalid parameter combination: --save_mmseqs_db supplied but no database has been requested for download with --metaeuk_mmseqs_db!')
     }
 
-    // Validate generate samplesheet inputs
-    if (params.generate_downstream_samplesheets && !params.generate_pipeline_samplesheets) {
-        error('[nf-core/mag] If supplying `--generate_downstream_samplesheets`, you must also specify which pipeline to generate for with `--generate_pipeline_samplesheets! Check input.')
-    }
+    if (params.generate_downstream_samplesheets) {
 
-    if (params.generate_downstream_samplesheets && params.generate_pipeline_samplesheets.split(',').contains('taxprofiler') && !params.save_clipped_reads) {
-        error('[nf-core/mag] Supplied --generate_downstream_samplesheets but missing --save_clipped_reads (mandatory for reads.gz output).')
-    }
+        if (!params.generate_pipeline_samplesheets) {
+            error('[nf-core/mag] If supplying `--generate_downstream_samplesheets`, you must also specify which pipeline to generate for with `--generate_pipeline_samplesheets! Check input.')
+        }
+
+        if (params.generate_pipeline_samplesheets.split(',').contains('taxprofiler') && params.save_clipped_reads && (!params.bbnorm && params.keep_phix && !params.host_fasta && params.skip_clipping)) {
+            error('[nf-core/mag] Supplied --generate_downstream_samplesheets and --save_clipped_reads is true, but also need at lesat one of the following: --bbnorm, or --host_fasta <path>, and/or either do not supply both --keep_phix or --skip_clipping')
+        }
 
-    if (params.generate_downstream_samplesheets && params.generate_pipeline_samplesheets.split(',').contains('taxprofiler') && params.save_clipped_reads && (params.bbnorm || !params.keep_phix || params.host_fasta || params.skip_clipping)) {
-        error('[nf-core/mag] Supplied --generate_downstream_samplesheets and --save_clipped_reads is true, but also need one of the following: --bbnorm true, or --keep_phix false, or --host_fasta true, or skip_clipping true.')
+        if (params.generate_pipeline_samplesheets.split(',').contains('taxprofiler') && params.bbnorm && !params.save_bbnorm_reads) {
+            error('[nf-core/mag] Supplied --generate_downstream_samplesheets but missing --save_bbnorm_reads (mandatory for reads.gz output when --bbnorm).')
+        }
+        else if (params.generate_pipeline_samplesheets.split(',').contains('taxprofiler') && !params.bbnorm && !params.keep_phix && !params.save_phixremoved_reads) {
+            error('[nf-core/mag] Supplied --generate_downstream_samplesheets but missing --save_phixremoved_reads (mandatory for reads.gz output when phix being removed [default behaviour]).')
+        }
+        else if (params.generate_pipeline_samplesheets.split(',').contains('taxprofiler') && !params.bbnorm && params.keep_phix && params.host_fasta && !params.save_hostremoved_reads) {
+            error('[nf-core/mag] Supplied --generate_downstream_samplesheets but missing --save_hostremoved_reads (mandatory for reads.gz output when host reads being removed).')
+        }
+        else if (params.generate_pipeline_samplesheets.split(',').contains('taxprofiler') &&  !params.bbnorm && params.keep_phix && !params.host_fasta && !params.skip_clipping && !params.save_clipped_reads) {
+            error('[nf-core/mag] Supplied --generate_downstream_samplesheets but missing --save_clipped_reads (mandatory for reads.gz output when running clipping).')
+        }
     }
 
+    // Validate generate samplesheet inputs
+
+
+
+
+
+
+
+
 }
 
 //
diff --git a/workflows/mag.nf b/workflows/mag.nf
index 1a7293b0..0842a55e 100644
--- a/workflows/mag.nf
+++ b/workflows/mag.nf
@@ -507,6 +507,7 @@ workflow MAG {
         }
 
         ch_assemblies = Channel.empty()
+        ch_assemblies_gz = Channel.empty()
 
         if (!params.skip_megahit){
             MEGAHIT ( ch_short_reads_grouped )
@@ -515,7 +516,13 @@ workflow MAG {
                     def meta_new = meta + [assembler: 'MEGAHIT']
                     [ meta_new, assembly ]
                 }
+            ch_megahit_assemblies_gz = MEGAHIT.out.assembly_gz
+                .map { meta, assembly ->
+                    def meta_new = meta + [assembler: 'MEGAHIT']
+                    [ meta_new, assembly ]
+                }
             ch_assemblies = ch_assemblies.mix(ch_megahit_assemblies)
+            ch_assemblies_gz = ch_assemblies_gz.mix(ch_megahit_assemblies_gz)
             ch_versions = ch_versions.mix(MEGAHIT.out.versions.first())
         }
 
@@ -558,7 +565,13 @@ workflow MAG {
                     def meta_new = meta + [assembler: 'SPAdes']
                     [ meta_new, assembly ]
                 }
+            ch_spades_assemblies_gz = SPADES.out.assembly_gz
+                .map { meta, assembly ->
+                    def meta_new = meta + [assembler: 'SPAdes']
+                    [ meta_new, assembly ]
+                }
             ch_assemblies = ch_assemblies.mix(ch_spades_assemblies)
+            ch_assemblies_gz = ch_assemblies_gz.mix(ch_spades_assemblies_gz)
             ch_versions = ch_versions.mix(SPADES.out.versions.first())
         }
 
@@ -577,7 +590,13 @@ workflow MAG {
                     def meta_new = meta + [assembler: "SPAdesHybrid"]
                     [ meta_new, assembly ]
                 }
+            ch_spadeshybrid_assemblies_gz = SPADESHYBRID.out.assembly_gz
+                .map { meta, assembly ->
+                    def meta_new = meta + [assembler: "SPAdesHybrid"]
+                    [ meta_new, assembly ]
+                }
             ch_assemblies = ch_assemblies.mix(ch_spadeshybrid_assemblies)
+            ch_assemblies_gz = ch_assemblies_gz.mix(ch_spadeshybrid_assemblies_gz)
             ch_versions = ch_versions.mix(SPADESHYBRID.out.versions.first())
         }
     } else {
@@ -963,7 +982,7 @@ workflow MAG {
     // Samplesheet generation
     //
     if ( params.generate_downstream_samplesheets ) {
-        GENERATE_DOWNSTREAM_SAMPLESHEETS ( ch_short_reads_assembly, ch_assemblies )
+        GENERATE_DOWNSTREAM_SAMPLESHEETS ( ch_short_reads_assembly, ch_assemblies_gz )
     }
 
     //

From 0163690aa05309adc637026c3e62149b0fb8292c Mon Sep 17 00:00:00 2001
From: "James A. Fellows Yates" <jfy133@gmail.com>
Date: Thu, 24 Oct 2024 14:09:18 +0000
Subject: [PATCH 16/16] Compelte template merge

---
 workflows/mag.nf | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/workflows/mag.nf b/workflows/mag.nf
index f64f8313..a06dc78a 100644
--- a/workflows/mag.nf
+++ b/workflows/mag.nf
@@ -567,16 +567,16 @@ workflow MAG {
                 .map { id, meta_long, long_reads, meta_short, short_reads -> [meta_short, short_reads, [], long_reads] }
 
             METASPADESHYBRID(ch_reads_spadeshybrid, [], [])
-            ch_spadeshybrid_assemblies = METASPADESHYBRID.out.scaffolds.map { meta, assembly ->
-                def meta_new = meta + [assembler: "SPAdesHybrid"]
-                [meta_new, assembly]
-            }
-            ch_spadeshybrid_assemblies_gz = METASPADESHYBRID.out.assembly_gz.map { meta, assembly ->
-                def meta_new = meta + [assembler: "SPAdesHybrid"]
-                [meta_new, assembly]
-            }
-            ch_assemblies = ch_assemblies.mix(ch_spadeshybrid_assemblies)
-            ch_assemblies_gz = ch_assemblies_gz.mix(ch_spadeshybrid_assemblies_gz)
+            ch_spadeshybrid_assemblies = METASPADESHYBRID.out.scaffolds
+                .map { meta, assembly ->
+                    def meta_new = meta + [assembler: "SPAdesHybrid"]
+                    [meta_new, assembly]
+                }
+                .tap { ch_spadeshybrid_assemblies_gz }
+
+
+            ch_assembled_contigs = ch_assembled_contigs.mix(ch_spadeshybrid_assemblies)
+            ch_assembled_contigs_gz = ch_assembled_contigs_gz.mix(ch_spadeshybrid_assemblies_gz)
             ch_versions = ch_versions.mix(METASPADESHYBRID.out.versions.first())
         }
 
@@ -977,7 +977,7 @@ workflow MAG {
     // Samplesheet generation
     //
     if (params.generate_downstream_samplesheets) {
-        GENERATE_DOWNSTREAM_SAMPLESHEETS(ch_short_reads_assembly, ch_assemblies_gz)
+        GENERATE_DOWNSTREAM_SAMPLESHEETS(ch_short_reads_assembly, ch_assembled_contigs_gz)
     }
 
     //