forked from galaxyproject/tools-iuc
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add tool for BREW3R.r bioconductor package (galaxyproject#6058)
* add tool for BREW3R.r bioconductor package * lintr * fix tests * added the help section and lint * simplify brew3r docker url Co-authored-by: Björn Grüning <[email protected]> * use required_files * fix single quote thanks to @bernt-matthias * fix exclude pattern * exclude \ from exclude_pattern --------- Co-authored-by: Björn Grüning <[email protected]>
- Loading branch information
Showing
7 changed files
with
635 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
categories: | ||
- Transcriptomics | ||
- RNA | ||
description: Extend 3' end of a GTF using another GTF as a template | ||
homepage_url: https://bioconductor.org/packages/release/bioc/html/BREW3R.r.html | ||
long_description: | | ||
This tool is using the BREW3R.r package the way it is used in the BREW3R workflow. It extends a gtf using information from another gtf. The process allows to extend gene annotation without increasing the overlap between gene ids. | ||
name: brew3r_r | ||
owner: iuc | ||
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/brew3r_r | ||
type: unrestricted |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
library("getopt") | ||
suppressPackageStartupMessages(library("rtracklayer")) | ||
library(GenomicRanges) | ||
library("BREW3R.r") | ||
|
||
options(stringAsFactors = FALSE, useFancyQuotes = FALSE) | ||
args <- commandArgs(trailingOnly = TRUE) | ||
# - Column 1: the long flag name. A multi-character string. | ||
# - Column 2: short flag alias of Column 1. A single-character string. | ||
# - Column 3: Argument mask of the flag. An integer. | ||
# Possible values: 0=no argument, 1=required argument, 2=optional argument. | ||
# - Column 4: Data type to which the flag's argument shall be cast using | ||
# storage.mode(). A multi-character string. This only considered for same-row | ||
# Column 3 values of 1,2. Possible values: logical, integer, double, complex, | ||
# character. If numeric is encountered then it will be converted to double. | ||
# - Column 5 (optional): A brief description of the purpose of the option. | ||
spec <- matrix(c( | ||
"help", "h", 0, "logical", "display help", | ||
"gtf_to_extend", "i", 1, "character", "input gtf file to be extended on 3'", | ||
"gtf_to_overlap", "g", 1, "character", | ||
"input gtf file that will be used to extend", | ||
"output", "o", 1, "character", "output extended gtf", | ||
"sup_output", "s", 1, "character", | ||
"supplementary output file with resolution of overlaps", | ||
"no_add", "n", 0, "logical", "do not add new exons", | ||
"exclude_pattern", "e", 1, "character", "do not extend genes with names matching this pattern", | ||
"filter_unstranded", "f", 0, "logical", | ||
"remove unstranded intervals from gtf_to_overlap which overlap intervals from gtf_to_extend of both strands", | ||
"quiet", "q", 0, "logical", "decrease verbosity", | ||
"verbose", "v", 0, "logical", "increase verbosity" | ||
), byrow = TRUE, ncol = 5) | ||
opt <- getopt(spec) | ||
|
||
# if help was asked for print a friendly message | ||
# and exit with a non-zero error code | ||
if (!is.null(opt$help)) { | ||
cat(getopt(spec, usage = TRUE)) | ||
q(status = 1) | ||
} | ||
|
||
# Check all required arguments | ||
if (is.null(opt$gtf_to_extend)) { | ||
stop("--gtf_to_extend is required") | ||
} | ||
if (is.null(opt$gtf_to_overlap)) { | ||
stop("--gtf_to_overlap is required") | ||
} | ||
if (is.null(opt$output)) { | ||
stop("--output is required") | ||
} | ||
|
||
# Check incompatible arguments | ||
if (!is.null(opt$quiet) && !is.null(opt$verbose)) { | ||
stop("quiet and verbose are mutually exclusive options") | ||
} | ||
|
||
# Adjust verbosity | ||
if (!is.null(opt$quiet)) { | ||
options(rlib_message_verbosity = "quiet") | ||
} | ||
|
||
if (!is.null(opt$verbose)) { | ||
options(BREW3R.r.verbose = "progression") | ||
} | ||
|
||
# Load gtfs as GenomicRanges | ||
input_gr_to_extend <- rtracklayer::import(opt$gtf_to_extend, format = "gtf") | ||
input_gr_template <- rtracklayer::import(opt$gtf_to_overlap, format = "gtf") | ||
|
||
# Save CDS info | ||
input_gr_CDS <- subset(input_gr_to_extend, type == "CDS") | ||
|
||
# Filter the template if needed | ||
if (!is.null(opt$filter_unstranded)) { | ||
# Find intervals without strand information in template | ||
unstranded.intervals <- which(strand(input_gr_template) == "*") | ||
if (length(unstranded.intervals) > 0) { | ||
# Check if they overlap genes from input with different strands | ||
# First compute the overlap | ||
ov <- suppressWarnings( | ||
as.data.frame(findOverlaps( | ||
input_gr_template[unstranded.intervals], | ||
input_gr_to_extend | ||
)) | ||
) | ||
# Add the strand information | ||
ov$strand <- as.factor(strand(input_gr_to_extend))[ov$subjectHits] | ||
# Simplify the dataframe to get only the strand info | ||
ov.simple <- unique(ov[, c("queryHits", "strand")]) | ||
# If the queryHits is duplicated it means there are different strands | ||
multi.strand.query <- ov.simple$queryHits[duplicated(ov.simple$queryHits)] | ||
to.remove <- unstranded.intervals[multi.strand.query] | ||
# Remove these potentially error-prone intervals from the template | ||
input_gr_template <- input_gr_template[-to.remove] | ||
} | ||
} | ||
|
||
# Run BREW3R.r main function | ||
new_gr_exons <- extend_granges( | ||
input_gr_to_extend = input_gr_to_extend, | ||
input_gr_to_overlap = input_gr_template, | ||
add_new_exons = is.null(opt$no_add), | ||
overlap_resolution_fn = opt$sup_output | ||
) | ||
# Prevent extension using pattern | ||
if (!is.null(opt$exclude_pattern)) { | ||
input_gr_pattern <- subset( | ||
input_gr_to_extend, | ||
type == "exon" & grepl(opt$exclude_pattern, gene_name) | ||
) | ||
new_gr_no_pattern <- subset( | ||
new_gr_exons, | ||
!grepl(opt$exclude_pattern, gene_name) | ||
) | ||
new_gr_exons <- c(new_gr_no_pattern, input_gr_pattern) | ||
} | ||
|
||
# Recompose with CDS | ||
new_gr <- c(new_gr_exons, input_gr_CDS) | ||
|
||
# Export | ||
rtracklayer::export.gff(sort(new_gr, ignore.strand = TRUE), opt$output) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
<tool id="brew3r_r" name="BREW3R.r" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01" license="MIT"> | ||
<description>Extend GTF</description> | ||
<macros> | ||
<token name="@TOOL_VERSION@">1.0.1</token> | ||
<token name="@VERSION_SUFFIX@">0</token> | ||
</macros> | ||
<edam_topics> | ||
<edam_topic>topic_3308</edam_topic> | ||
</edam_topics> | ||
<edam_operations> | ||
<edam_operation>operation_0362</edam_operation> | ||
</edam_operations> | ||
<xrefs> | ||
<!-- <xref type="bio.tools">BREW3R.r</xref> --> | ||
<xref type="bioconductor">BREW3R.r</xref> | ||
</xrefs> | ||
<requirements> | ||
<!-- <requirement type="package" version="@TOOL_VERSION@">bioconductor-brew3r.r</requirement> | ||
<requirement type="package" version="1.64.0">bioconductor-rtracklayer</requirement> | ||
<requirement type="package" version="1.20.4">r-getopt</requirement> --> | ||
<container type="docker">lldelisle/brew3r:v2</container> | ||
</requirements> | ||
<required_files> | ||
<include path="brew3r.r_script.R" /> | ||
</required_files> | ||
<version_command><![CDATA[ | ||
echo $(R --version | grep version | grep -v GNU)", BREW3R.r version" $(R --vanilla --slave -e "library(BREW3R.r); cat(sessionInfo()\$otherPkgs\$BREW3R.r\$Version)" 2> /dev/null | grep -v -i "WARNING: ") | ||
]]></version_command> | ||
<command detect_errors="exit_code"><![CDATA[ | ||
Rscript '${__tool_directory__}/brew3r.r_script.R' | ||
--gtf_to_extend '$gtf_to_extend' | ||
--gtf_to_overlap '$gtf_to_overlap' | ||
#if '$sup_output' == 'true': | ||
--sup_output '$output_table' | ||
#end if | ||
#if str($no_add) != '': | ||
'$no_add' | ||
#end if | ||
#if str($exclude_pattern) != '': | ||
--exclude_pattern '$exclude_pattern' | ||
#end if | ||
#if str($filter_unstranded) != '': | ||
'$filter_unstranded' | ||
#end if | ||
-o output.gtf | ||
]]></command> | ||
<inputs> | ||
<param argument="--gtf_to_extend" type="data" format="gtf" label="Input gtf file to be extended on 3'" help="Usually coming from public resource." /> | ||
<param argument="--gtf_to_overlap" type="data" format="gtf" label="Input gtf file that will be used to extend" help="Coming from StringTie or another public resource." /> | ||
<param argument="--sup_output" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Get a supplementary output table with resolution of overlaps" /> | ||
<param argument="--no_add" type="boolean" truevalue="--no_add" falsevalue="" checked="false" label="Do not add new exons" /> | ||
<param argument="--exclude_pattern" type="text" value="" label="Do not extend genes with names matching this pattern" help="Leave empty if you want to extend all genes."> | ||
<sanitizer> | ||
<valid initial="string.printable"> | ||
<remove value="'"/> | ||
<remove value="\"/> | ||
</valid> | ||
</sanitizer> | ||
</param> | ||
<param argument="--filter_unstranded" type="boolean" truevalue="--filter_unstranded" falsevalue="" checked="false" label="Filter unstranded intervals that overlaps genes of both strands" help="Recommanded if you used StringTie on unstranded libraries." /> | ||
</inputs> | ||
<outputs> | ||
<data name="output" format="gtf" from_work_dir="output.gtf" label="${tool.name} on ${gtf_to_extend.name} and ${gtf_to_overlap.name}: GTF" /> | ||
<data name="output_table" format="tabular" label="${tool.name} on ${gtf_to_extend.name} and ${gtf_to_overlap.name}: overlap resolution"> | ||
<filter>sup_output == True</filter> | ||
</data> | ||
</outputs> | ||
<tests> | ||
<test expect_num_outputs="1"> | ||
<param name="gtf_to_extend" value="input.gtf"/> | ||
<param name="gtf_to_overlap" value="second_input.gtf"/> | ||
<output name="output" value="output.gtf" compare="diff" lines_diff="2"/> | ||
</test> | ||
<test expect_num_outputs="1"> | ||
<param name="gtf_to_extend" value="input.gtf"/> | ||
<param name="gtf_to_overlap" value="second_input.gtf"/> | ||
<param name="no_add" value="true"/> | ||
<output name="output"> | ||
<assert_contents> | ||
<has_n_lines n="31"/> | ||
<not_has_text text="BREW3R"/> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
<test expect_num_outputs="1"> | ||
<param name="gtf_to_extend" value="input.gtf"/> | ||
<param name="gtf_to_overlap" value="second_input.gtf"/> | ||
<param name="exclude_pattern" value="^Gm"/> | ||
<output name="output"> | ||
<assert_contents> | ||
<has_n_lines n="34"/> | ||
<not_has_text text="exon111.ext"/> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
<test expect_num_outputs="1"> | ||
<param name="gtf_to_extend" value="input.gtf"/> | ||
<param name="gtf_to_overlap" value="second_input.gtf"/> | ||
<param name="exclude_pattern" value="Gm$"/> | ||
<output name="output" value="output.gtf" compare="diff" lines_diff="2"/> | ||
<assert_command> | ||
<has_text text="--exclude_pattern 'Gm$'"/> | ||
</assert_command> | ||
</test> | ||
<test expect_num_outputs="1"> | ||
<param name="gtf_to_extend" value="input.gtf"/> | ||
<param name="gtf_to_overlap" value="second_input.gtf"/> | ||
<param name="filter_unstranded" value="true"/> | ||
<output name="output"> | ||
<assert_contents> | ||
<has_n_lines n="36"/> | ||
<not_has_text text="exon121.ext"/> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
</tests> | ||
<help><![CDATA[ | ||
.. class:: infomark | ||
**What it does** | ||
This tool extend the annotations existing in an input GTF file in the 3' end using annotations from another input GTF. During the process, it makes sure that there will not be new overlaps between different genes. | ||
Usage | ||
..... | ||
**Input** | ||
2 GTF files: | ||
- First one to extend usually comes from a public resource. | ||
- Second one that is used as template may come from a public resource or from StringTie. | ||
**Output** | ||
1 GTF file with all exons from the input GTF where some of them have been extended (the exon_id ends with '.ext') and potentially new exons (the exon_id contains BREW3R). | ||
]]></help> | ||
<citations> | ||
<citation type="bibtex"> | ||
@unpublished{None, | ||
author = {Lucille Lopez-Delisle}, | ||
title = {None}, | ||
year = {None}, | ||
eprint = {None}, | ||
url = {https://github.com/lldelisle/BREW3R.r} | ||
}</citation> | ||
</citations> | ||
</tool> |
Oops, something went wrong.