Skip to content

Commit

Permalink
Merge pull request PennChopMicrobiomeProgram#1 from ctanes/master
Browse files Browse the repository at this point in the history
Cleaned diversity rules and took out report
  • Loading branch information
ctanes authored Dec 9, 2021
2 parents 9e86dc8 + d62cdd1 commit 0a4f21b
Show file tree
Hide file tree
Showing 11 changed files with 167 additions and 1,140 deletions.
8 changes: 1 addition & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,10 +113,4 @@ snakemake \
- Representative sequences (fasta)
#### Output
- Vsearch report (tsv) customized to be like BLAST results (see config.yml)
- Vsearch list of representative sequences that aligned (fasta)

### Basic Bioinformatics Report
#### Input
- QIIME2 compatible mapping file and output from diversity calculation
#### Output
- Basic Bioinformatics Report containging heatmap, relative proportion bar graph, alpha diversity plots, beta diversity plots, and per sample read counts in HTML format.
- Vsearch list of representative sequences that aligned (fasta)
11 changes: 5 additions & 6 deletions Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ trunc_len_f = config["denoise"]["trunc_len_f"]
trim_left_r = config["denoise"]["trim_left_r"]
trunc_len_r = config["denoise"]["trunc_len_r"]

DENOISE_DIR = (QIIME_OUTPUT_DIR + "/denoise" +
"_fwd_" + str(trim_left_f) + "-" + str(trunc_len_f) +
"_rev_" + str(trim_left_r) + "-" + str(trunc_len_r)
)
DENOISE_DIR = (QIIME_OUTPUT_DIR + "/denoise-results")

CORE_METRIC_DIR = (DENOISE_DIR + "/core-metrics" +
"_sampling_depth_" + str(config["diversity"]["sampling_depth"]))
"_sampling_depth_" + str(config["diversity"]["sampling_depth"]))

CORE_METRIC_UNRAREFIED_DIR = (DENOISE_DIR + "/core-metrics-unrarefied")

include: "rules/targets/targets.rules"
include: "rules/demux/dnabc.rules"
Expand All @@ -29,7 +29,6 @@ include: "rules/taxonomy/taxonomy.rules"
include: "rules/tree/tree.rules"
include: "rules/diversity/diversity.rules"
include: "rules/unassign/unassign.rules"
include: "rules/report/report.rules"
include: "rules/dada2_species/dada2.rules"
include: "rules/vsearch/vsearch.rules"

Expand Down
10 changes: 1 addition & 9 deletions config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,11 @@ taxonomy:
classifier_fp: "/home/danielsg/16S_QIIME2/QIIME2_data/gg-13-8-99-nb-classifier.qza"

diversity:
sampling_depth: 30
sampling_depth: 1000

unassign:
unassigner_species_fp: "/home/danielsg/16S_QIIME2/unassign_data/unassigner_species.fasta"

report:
study_group_var: "investigator"
min_reads: 5
richness_subsample_size: 20
rmd: "/home/danielsg/16S_QIIME2/scripts/Basic_Bioinformatics_Report.Rmd"
R_helper: "/home/danielsg/16S_QIIME2/scripts/helper_functions.R"
report_rdata: "/home/danielsg/16S_QIIME2/test/report.RData" # rmd can only have one output so have to put this here

dada2:
rscript: "/home/danielsg/16S_QIIME2/scripts/dada2.R"
species_training_set: "/home/danielsg/16S_QIIME2/dada2_data/rdp_species_assignment_16.fa.gz"
Expand Down
4 changes: 2 additions & 2 deletions dryrun_snakemake.bash
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
#$ -j y

#Uncomment the next two lines if you want to 'qsub' this script
source ~/.bashrc #needed to make "conda" command to work
conda activate qiime2-snakemake
#source ~/.bashrc #needed to make "conda" command to work
#conda activate qiime2-snakemake

set -xeuo pipefail

Expand Down
15 changes: 14 additions & 1 deletion rules/denoise/denoise.rules
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ rule run_dada2:
params:
n_threads = config["denoise"]["n_threads"]
output:
feature_table = DENOISE_DIR + "/table.qza",
feature_table = DENOISE_DIR + "/table_main.qza",
rep_seq = DENOISE_DIR + "/representative-seqs.qza",
stat = DENOISE_DIR + "/denoise_stats.qza"
shell:
Expand All @@ -62,3 +62,16 @@ rule run_dada2:
--o-representative-sequences {output.rep_seq} \
--o-denoising-stats {output.stat}
"""

rule filter_empty:
input:
DENOISE_DIR + "/table_main.qza"
output:
DENOISE_DIR + "/table.qza"
shell:
"""
qiime feature-table filter-samples \
--i-table {input} \
--p-min-frequency 1 \
--o-filtered-table "{output}"
"""
126 changes: 126 additions & 0 deletions rules/diversity/diversity.rules
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,129 @@ rule get_diversity:
--o-bray-curtis-emperor {output.bc_emperor}
"""

rule get_uu_unrarefied:
input:
rooted_tree = DENOISE_DIR + "/rooted-tree.qza",
table = DENOISE_DIR + "/table.qza"
output:
CORE_METRIC_UNRAREFIED_DIR + "/unweighted_unifrac_distance_matrix.qza"
shell:
"""
qiime diversity beta-phylogenetic \
--i-phylogeny {input.rooted_tree} \
--i-table {input.table} \
--p-metric unweighted_unifrac \
--o-distance-matrix {output}
"""

rule move_uu_unrarefied:
input:
CORE_METRIC_UNRAREFIED_DIR + "/uu/distance-matrix.tsv"
params:
CORE_METRIC_UNRAREFIED_DIR + "/uu"
output:
CORE_METRIC_UNRAREFIED_DIR + "/uu_unrarefied.tsv"
shell:
"""
mv {input} {output}
rmdir {params}
"""

rule uu_convert_unrarefied:
input:
CORE_METRIC_UNRAREFIED_DIR + "/unweighted_unifrac_distance_matrix.qza"
params:
CORE_METRIC_UNRAREFIED_DIR + "/uu"
output:
CORE_METRIC_UNRAREFIED_DIR + "/uu/distance-matrix.tsv"
shell:
"""
qiime tools export \
--input-path {input} \
--output-path {params}
"""

rule get_wu_unrarefied:
input:
rooted_tree = DENOISE_DIR + "/rooted-tree.qza",
table = DENOISE_DIR + "/table.qza"
output:
CORE_METRIC_UNRAREFIED_DIR + "/weighted_unifrac_distance_matrix.qza"
shell:
"""
qiime diversity beta-phylogenetic \
--i-phylogeny {input.rooted_tree} \
--i-table {input.table} \
--p-metric weighted_unifrac \
--o-distance-matrix {output}
"""

rule move_wu_unrarefied:
input:
CORE_METRIC_UNRAREFIED_DIR + "/wu/distance-matrix.tsv"
params:
CORE_METRIC_UNRAREFIED_DIR + "/wu"
output:
CORE_METRIC_UNRAREFIED_DIR + "/wu_unrarefied.tsv"
shell:
"""
mv {input} {output}
rmdir {params}
"""

rule wu_convert_unrarefied:
input:
CORE_METRIC_UNRAREFIED_DIR + "/weighted_unifrac_distance_matrix.qza"
params:
CORE_METRIC_UNRAREFIED_DIR + "/wu"
output:
CORE_METRIC_UNRAREFIED_DIR + "/wu/distance-matrix.tsv"
shell:
"""
qiime tools export \
--input-path {input} \
--output-path {params}
"""


rule get_faith_unrarefied:
input:
rooted_tree = DENOISE_DIR + "/rooted-tree.qza",
table = DENOISE_DIR + "/table.qza"
output:
CORE_METRIC_UNRAREFIED_DIR + "/faith_pd_vector.qza"
shell:
"""
qiime diversity alpha-phylogenetic \
--i-phylogeny {input.rooted_tree} \
--i-table {input.table} \
--p-metric faith_pd \
--o-alpha-diversity {output}
"""

rule move_faith_unrarefied:
input:
CORE_METRIC_UNRAREFIED_DIR + "/faith/alpha-diversity.tsv"
params:
CORE_METRIC_UNRAREFIED_DIR + "/faith"
output:
CORE_METRIC_UNRAREFIED_DIR + "/faith_pd_unrarefied.tsv"
shell:
"""
mv {input} {output}
rmdir {params}
"""

rule faith_convert_unrarefied:
input:
CORE_METRIC_UNRAREFIED_DIR + "/faith_pd_vector.qza"
params:
CORE_METRIC_UNRAREFIED_DIR + "/faith"
output:
CORE_METRIC_UNRAREFIED_DIR + "/faith/alpha-diversity.tsv"
shell:
"""
qiime tools export \
--input-path {input} \
--output-path {params}
"""
18 changes: 0 additions & 18 deletions rules/report/report.rules

This file was deleted.

14 changes: 7 additions & 7 deletions rules/targets/targets.rules
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,13 @@ TARGET_TREE = [str(DENOISE_DIR + "/rooted-tree.qza")]

TARGET_DIVERSITY = [str(CORE_METRIC_DIR + "/faith_pd.tsv"),
str(CORE_METRIC_DIR + "/uu.tsv"),
str(CORE_METRIC_DIR + "/wu.tsv")]
str(CORE_METRIC_DIR + "/wu.tsv"),
str(CORE_METRIC_UNRAREFIED_DIR + "/faith_pd_unrarefied.tsv"),
str(CORE_METRIC_UNRAREFIED_DIR + "/uu_unrarefied.tsv"),
str(CORE_METRIC_UNRAREFIED_DIR + "/wu_unrarefied.tsv")]

TARGET_UNASSIGN = [str(DENOISE_DIR + "/representative_seq_fasta/unassigned/unassigner_output.tsv")]

TARGET_REPORT = [str(CORE_METRIC_DIR + "/basic_bioinformatics_report/Basic_Bioinformatics_Report.html")]

TARGET_DADA2SP = [str(DENOISE_DIR + "/representative_seq_fasta/dada2/dada2_species_assignments.tsv"),
str(DENOISE_DIR + "/representative_seq_fasta/dada2/dada2.RData")]

Expand All @@ -37,8 +38,7 @@ TARGET_ALL = (
TARGET_TAXONOMY +
TARGET_TREE +
TARGET_DIVERSITY +
TARGET_UNASSIGN +
TARGET_REPORT +
TARGET_DADA2SP +
TARGET_VSEARCH
TARGET_UNASSIGN #+
#TARGET_DADA2SP +
#TARGET_VSEARCH
)
19 changes: 11 additions & 8 deletions run_snakemake.bash
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
#!/usr/bin/env bash

#$ -cwd
#$ -r n
#$ -V
#$ -l h_vmem=2G
#$ -j y
#SBATCH --mem=2G
#SBATCH -n 1
#SBATCH --export=ALL
#SBATCH [email protected]
#SBATCH --mail-type=END,FAIL
#SBATCH --no-requeue
#SBATCH -t 12:00:00
#SBATCH --output=slurm_%x_%j.out

#Uncomment the next two lines if you want to 'qsub' this script
source ~/.bashrc #needed to make "conda" command to work
conda activate qiime2-snakemake
#source ~/.bashrc #needed to make "conda" command to work
#conda activate qiime2-snakemake

set -xeuo pipefail

Expand All @@ -28,4 +31,4 @@ snakemake \
--notemp \
--printshellcmds \
--cluster \
"qsub -cwd -r n -V -l h_vmem={cluster.h_vmem} -l mem_free={cluster.mem_free} -pe smp {threads}"
"sbatch --mem=10G -t 12:00:00 -n 4 --export=ALL --no-requeue"
Loading

0 comments on commit 0a4f21b

Please sign in to comment.