Skip to content

Commit

Permalink
Merge pull request junglee0713#18 from scottdaniel/master
Browse files Browse the repository at this point in the history
added dada2 + more
  • Loading branch information
junglee0713 authored Jun 29, 2020
2 parents 8bbba53 + df1dbe9 commit ba036be
Show file tree
Hide file tree
Showing 80 changed files with 106 additions and 8,187 deletions.
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,11 @@ test/demultiplexed_fastq_gz
#cached python files
scripts/__pycache__/*

config_361.yml
config_499.yml
.DS_Store
test/QIIME_output/
test/cluster_logs/
test/manifest.csv
test/total_read_counts.tsv
test/report.RData
3 changes: 2 additions & 1 deletion Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ DENOISE_DIR = (QIIME_OUTPUT_DIR + "/denoise" +
"_fwd_" + str(trim_left_f) + "-" + str(trunc_len_f) +
"_rev_" + str(trim_left_r) + "-" + str(trunc_len_r)
)
CORE_METRIC_DIR = (DENOISE_DIR + "/core-metrics" +
CORE_METRIC_DIR = (DENOISE_DIR + "/core-metrics" +
"_sampling_depth_" + str(config["diversity"]["sampling_depth"]))

include: "rules/targets/targets.rules"
Expand All @@ -30,6 +30,7 @@ include: "rules/tree/tree.rules"
include: "rules/diversity/diversity.rules"
include: "rules/unassign/unassign.rules"
include: "rules/report/report.rules"
include: "rules/dada2_species/dada2.rules"

workdir: PROJECT_DIR

Expand Down
16 changes: 11 additions & 5 deletions config.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
all:
project_dir: "/home/danielsg/16S_QIIME2/test"
mux_dir: "/home/danielsg/16S_QIIME2/test/multiplexed_fastq"
project_dir: "/home/danielsg/16S_QIIME2/test"
mux_dir: "multiplexed_fastq"
mapping: "test_mapping_file.tsv"
admin_email: "[email protected]"

#try revcomp if you are getting 0 reads when demultiplexing
demux:
mismatch: 0
revcomp: false
Expand All @@ -16,7 +17,7 @@ denoise:
n_threads: 6

taxonomy:
classifier_fp: "/home/danielsg/16S_QIIME2/QIIME2_data/gg-13-8-99-nb-classifier.qza"
classifier_fp: "/home/danielsg/16S_QIIME2/QIIME2_data/gg-13-8-99-nb-classifier.qza"

diversity:
sampling_depth: 30
Expand All @@ -25,8 +26,13 @@ unassign:
unassigner_species_fp: "/home/danielsg/16S_QIIME2/unassign_data/unassigner_species.fasta"

report:
study_group_var: "Investigator"
study_group_var: "investigator"
min_reads: 5
richness_subsample_size: 20
rmd: "/home/danielsg/16S_QIIME2/scripts/Basic_Bioinformatics_Report.Rmd"
R_helper: "/home/danielsg/16S_QIIME2/scripts/helper_functions.R"
R_helper: "/home/danielsg/16S_QIIME2/scripts/helper_functions.R"
report_rdata: "/home/danielsg/16S_QIIME2/test/report.RData" # rmd can only have one output so have to put this here

dada2:
rscript: "/home/danielsg/16S_QIIME2/scripts/dada2.R"
species_training_set: "/home/danielsg/16S_QIIME2/dada2_data/rdp_species_assignment_16.fa.gz"
Binary file added dada2_data/rdp_species_assignment_16.fa.gz
Binary file not shown.
13 changes: 12 additions & 1 deletion dryrun_snakemake.bash
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@
#!/bin/bash
#!/usr/bin/env bash

#$ -cwd
#$ -r n
#$ -V
#$ -l h_vmem=2G
#$ -j y

#Uncomment the next two lines if you want to 'qsub' this script
#source ~/.bashrc #needed to make "conda" command to work
#conda activate qiime2-snakemake

set -xeuo pipefail

if [ $# -ne 1 ]; then
Expand Down
12 changes: 12 additions & 0 deletions rules/dada2_species/dada2.rules
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
rule all_dada2_species:
input:
TARGET_DADA2SP

rule dada2_assignSpecies:
input:
DENOISE_DIR + "/representative_seq_fasta/dna-sequences.fasta"
output:
dada2_output = DENOISE_DIR + "/representative_seq_fasta/dada2/dada2_species_assignments.tsv",
dada2_rdata = DENOISE_DIR + "/representative_seq_fasta/dada2/dada2.RData"
script:
str(config["dada2"]["rscript"])
4 changes: 2 additions & 2 deletions rules/report/report.rules
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ rule generate_report:
input:
mapping_fp = MAPPING_FP,
demux_count_fp = QIIME_OUTPUT_DIR + "/demux_stat/per-sample-fastq-counts.csv",
feature_table_fp = DENOISE_DIR + "/feature_table/feature-table.tsv",
feature_table_fp = DENOISE_DIR + "/feature_table/feature-table.tsv",
taxo_assignment_fp = DENOISE_DIR + "/taxonomy/taxonomy.tsv",
faith_fp = CORE_METRIC_DIR + "/faith_pd.tsv",
uu_fp = CORE_METRIC_DIR + "/uu.tsv",
wu_fp = CORE_METRIC_DIR + "/wu.tsv"
output:
CORE_METRIC_DIR + "/basic_bioinformatics_report/Basic_Bioinformatics_Report.html"
script:
str(config["report"]["rmd"])
str(config["report"]["rmd"])

6 changes: 5 additions & 1 deletion rules/targets/targets.rules
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ TARGET_UNASSIGN = [str(DENOISE_DIR + "/representative_seq_fasta/unassigned/unass

TARGET_REPORT = [str(CORE_METRIC_DIR + "/basic_bioinformatics_report/Basic_Bioinformatics_Report.html")]

TARGET_DADA2SP = [str(DENOISE_DIR + "/representative_seq_fasta/dada2/dada2_species_assignments.tsv"),
str(DENOISE_DIR + "/representative_seq_fasta/dada2/dada2.RData")]

TARGET_ALL = (
TARGET_DNABC +
TARGET_QIIME_IMPORT +
Expand All @@ -30,5 +33,6 @@ TARGET_ALL = (
TARGET_TREE +
TARGET_DIVERSITY +
TARGET_UNASSIGN +
TARGET_REPORT
TARGET_REPORT +
TARGET_DADA2SP
)
13 changes: 12 additions & 1 deletion run_snakemake.bash
Original file line number Diff line number Diff line change
@@ -1,4 +1,15 @@
#!/bin/bash
#!/usr/bin/env bash

#$ -cwd
#$ -r n
#$ -V
#$ -l h_vmem=2G
#$ -j y

#Uncomment the next two lines if you want to 'qsub' this script
#source ~/.bashrc #needed to make "conda" command to work
#conda activate qiime2-snakemake

set -xeuo pipefail

if [ $# -ne 1 ]; then
Expand Down
7 changes: 7 additions & 0 deletions scripts/Basic_Bioinformatics_Report.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -422,3 +422,10 @@ ReadCounts <- s1 %>% select(SampleID, demux_Read_Counts, denoise_Read_Counts, QC
arrange(QC_Read_Counts)
datatable(ReadCounts, fillContainer = FALSE, options = list(pageLength = 10))
```

```{r save environment}
save.image(file = snakemake@config[["report"]][["report_rdata"]])
```

Binary file removed scripts/__pycache__/__init__.cpython-36.pyc
Binary file not shown.
Binary file removed scripts/__pycache__/util_functions.cpython-36.pyc
Binary file not shown.
35 changes: 35 additions & 0 deletions scripts/dada2.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/env Rscript

library(tidyverse)
library(Biostrings)
library(dada2)

#from config.yml

rdp_species <- snakemake@config[["dada2"]][["species_training_set"]]

#from dada2.rules

my_seqs <- readDNAStringSet(snakemake@input[[1]])

#main

seqs_df <- tibble(query_id = names(my_seqs),
sequence = as.character(my_seqs))

set.seed(100) # Initialize random number generator for reproducibility

genus.species <- assignSpecies(seqs_df$sequence, rdp_species)

genspec_df <- genus.species %>%
as.data.frame() %>%
rownames_to_column(var = "sequence") %>%
inner_join(seqs_df, by = "sequence") %>%
mutate(species = paste0(Genus, " ", Species)) %>% #making same as unassigner output
select(query_id, species, sequence)

#from dada2.rules and targets.rules

save.image(file = snakemake@output[["dada2_rdata"]])

write_tsv(x = genspec_df, path = snakemake@output[["dada2_output"]])
Binary file removed test/QIIME_output/demux.qza
Binary file not shown.
Binary file removed test/QIIME_output/demux.qzv
Binary file not shown.
1 change: 0 additions & 1 deletion test/QIIME_output/demux_stat/data.jsonp

This file was deleted.

Binary file removed test/QIIME_output/demux_stat/demultiplex-summary.pdf
Binary file not shown.
Binary file removed test/QIIME_output/demux_stat/demultiplex-summary.png
Binary file not shown.
1 change: 0 additions & 1 deletion test/QIIME_output/demux_stat/dist/bundle.js

This file was deleted.

27 changes: 0 additions & 27 deletions test/QIIME_output/demux_stat/dist/d3-license.txt

This file was deleted.

7 changes: 0 additions & 7 deletions test/QIIME_output/demux_stat/dist/vendor.bundle.js

This file was deleted.

Loading

0 comments on commit ba036be

Please sign in to comment.