-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.nf
176 lines (149 loc) · 4.02 KB
/
main.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
// Enable DSL 2 syntax
nextflow.enable.dsl = 2
log.info """\
S O U P E R - S T A R
==========================================
samplesheet : $params.samplesheet
samplesheet_sep : $params.samplesheet_sep
umi_len : $params.umi_len
min_reads : $params.min_reads
genome_fasta : $params.genome_fasta
k : $params.k
flags : $params.flags
results : $params.results
CONTAINERS
=========================================
samtools : $params.container__samtools
souporcell : $params.container__souporcell
misc : $params.container__misc
archr : $params.container__archr
"""
// Import processes
include {
sam_to_bam;
add_tags;
filter_reads;
merge_sample;
dedup;
index;
make_bed;
merge_all;
get_barcodes;
join_barcodes;
souporcell;
summarize;
archr;
} from './processes.nf'
workflow {
if ( "${params.results}" == "false" ){error "Must provide parameter: results"}
if ( "${params.samplesheet}" == "false" ){error "Must provide parameter: samplesheet"}
if ( "${params.genome_fasta}" == "false" ){error "Must provide parameter: genome_fasta"}
// Get the input BAM/SAM files from the samplesheet
Channel
.fromPath(
"${params.samplesheet}",
checkIfExists: true
)
.splitCsv(
header: true,
sep: params.samplesheet_sep,
strip: true
)
.map {
it -> [
"${it[params.sample_col]}",
file(
"${it[params.path_col]}",
checkIfExists: true
)
]
}
.toSortedList()
.map { it -> [it, (1..it.size).toList()] }
.transpose()
.map { it -> [it[0][0], it[0][1], it[1]]}
.branch {
bam: it[1].name.endsWith(".bam")
sam: true
}
.set { input }
// SAM -> BAM
sam_to_bam(input.sam)
// Define channel of BAM inputs
sam_to_bam
.out
.mix(input.bam)
.set { bam_ch }
// Remove duplicates
dedup(bam_ch)
// Add unique tags for each input file
add_tags(dedup.out[0])
// If the user specified a minimum number of reads per barcode
if ( "${params.min_reads}" != "0" ){
filter_reads(add_tags.out)
filter_reads.out.set { to_be_merged }
} else {
add_tags.out.set { to_be_merged }
}
// Get the barcodes used for each BAM
get_barcodes(to_be_merged)
// Merge together all of those barcode lists
join_barcodes(get_barcodes.out.toSortedList())
// Merge
merge_sample(
to_be_merged
.groupTuple(
sort: true
)
)
// Index the BAM
index(merge_sample.out)
// Make a channel containing:
// tuple val(sample), path(bam), path(bai)
merge_sample.out.join(index.out).set { indexed_bam }
// Make a BED file from the BAM with its index
make_bed(indexed_bam)
// Merge the sample-level BAMs together
merge_all(
indexed_bam
.map { it -> [it[1], it[2]] }
.flatten()
.toSortedList()
)
// Make sure that the genome FASTA exists
genome = file(
"${params.genome_fasta}",
checkIfExists: true
)
// Make sure that the genome index exists
genome_index = file(
"${params.genome_fasta}.fai",
checkIfExists: true
)
// Run souporcell
souporcell(
merge_all.out,
join_barcodes.out,
genome,
genome_index
)
// Run archR
archr(
make_bed
.out
.map { it -> it[1] }
.toSortedList(),
souporcell.out
)
// Post-process the souporcell outputs
summarize(
souporcell.out,
join_barcodes.out,
bam_ch
.map { it -> "${it[0]},${it[2]}" }
.collectFile(
name: "sample_manifest.csv",
newLine: true
)
)
}