From 69076a9f2f33e024ad51cda19dc1d618255ae3f3 Mon Sep 17 00:00:00 2001 From: Yan Gao Date: Tue, 20 Feb 2018 13:52:44 -0800 Subject: [PATCH] polish Snakefile --- Snakefile | 13 ++++++++----- src/update_gtf.c | 6 ++++-- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/Snakefile b/Snakefile index 74d1499..a8d742c 100644 --- a/Snakefile +++ b/Snakefile @@ -63,7 +63,8 @@ rule sam_novel_gtf: rRNA=config["genome"]["rRNA"], gtf=config["genome"]["gtf"] output: - "gtf/{sample}_sam_novel.gtf" + filtered_bam="alignment/{sample}.filtered.bam", + sam_gtf="gtf/{sample}_sam_novel.gtf" threads: config["novel_gtf"]["threads"] log: @@ -74,7 +75,8 @@ rule sam_novel_gtf: lr2rmats=config["exe_files"]["lr2rmats"], samtools=config["exe_files"]["samtools"] shell: - "{params.lr2rmats} filter {input.sam} -r {input.rRNA} 2> {log} | {params.samtools} sort -@ {threads} 2>> {log} | {params.lr2rmats} update-gtf - {input.gtf} 2>> {log} > {output}" + "{params.lr2rmats} filter {input.sam} -r {input.rRNA} 2> {log} | {params.samtools} sort -@ {threads} > {output.filtered_bam} 2>> {log}; " + "{params.lr2rmats} update-gtf {output.filtered_bam} {input.gtf} 2>> {log} > {output.sam_gtf}" # merge and sort gtf rule new_gtf: @@ -126,8 +128,9 @@ rule star_map: rule gtf_novel_gtf: input: gtf=config["genome"]["gtf"], - novel_gtf="gtf/{sample}_sam_novel.gtf", - bam="alignment/{sample}.STARAligned.out.bam", + #novel_gtf="gtf/{sample}_sam_novel.gtf", + filtered_bam="alignment/{sample}.filtered.bam", + #bam="alignment/{sample}.STARAligned.out.bam", SJ="alignment/{sample}.STARSJ.out.tab" output: update_gtf="gtf/{sample}_gtf_novel.gtf", @@ -145,7 +148,7 @@ rule gtf_novel_gtf: sort_gtf=config["exe_files"]["sort_gtf"], samtools=config["exe_files"]["samtools"] shell: - "{params.lr2rmats} update-gtf -mg -b {input.bam} -j {input.SJ} {input.novel_gtf} {input.gtf} -y {output.summary} -A {output.detail} -k {output.known_gtf} -v {output.novel_gtf} -u {output.unrecog_gtf} > {output.update_gtf} 2> {log}" + "{params.lr2rmats} update-gtf -j {input.SJ} {input.filtered_bam} {input.gtf} -y {output.summary} -A {output.detail} -k {output.known_gtf} -v {output.novel_gtf} -u {output.unrecog_gtf} > {output.update_gtf} 2> {log}" rule update_gtf: input: diff --git a/src/update_gtf.c b/src/update_gtf.c index b2d2382..ca2fd4d 100644 --- a/src/update_gtf.c +++ b/src/update_gtf.c @@ -912,9 +912,11 @@ void check_trans(read_trans_t *bam_T, read_trans_t *anno_T, sj_t *sj_group, int } else if (ugp->split_trans) { // has unreliable novel splice junction // split into short transcripts read_trans_t *split_read_trans = split_trans(bam_t); - for (j = 0; j < split_read_trans->trans_n; ++j) + for (j = 0; j < split_read_trans->trans_n; ++j) { add_read_trans(novel_T, split_read_trans->t[j]); - if (merge_trans(split_read_trans->t+j, updated_T, ugp->ss_dis, ugp->single_exon_ovlp_frac) == 0) add_read_trans(updated_T, split_read_trans->t[j]); + if (merge_trans(split_read_trans->t+j, updated_T, ugp->ss_dis, ugp->single_exon_ovlp_frac) == 0) + add_read_trans(updated_T, split_read_trans->t[j]); + } read_trans_free(split_read_trans); } } else { // novel and no_known_site