Skip to content

Commit

Permalink
Merge pull request galaxyproject#6665 from MaraBesemer/main
Browse files Browse the repository at this point in the history
add: variables topX and normalize to Phyloseq Bar Plot
  • Loading branch information
bgruening authored Jan 10, 2025
2 parents 637a6eb + 8282568 commit 26f87cc
Show file tree
Hide file tree
Showing 2 changed files with 140 additions and 40 deletions.
99 changes: 88 additions & 11 deletions tools/phyloseq/phyloseq_plot_bar.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,34 @@ option_list <- list(
make_option(c("--output"),
action = "store", dest = "output",
help = "Output file (PDF)"
),
make_option(c("--topX"),
action = "store", dest = "topX", default = NULL,
help = "Show only the top X taxa based on abundance (e.g., '10') (optional)"
),
make_option(c("--keepOthers"),
action = "store_true", dest = "keepOthers", default = FALSE,
help = "Keep taxa not in topX and label them as 'Others' (optional)"
),
make_option(c("--keepNonAssigned"),
action = "store_true", dest = "keepNonAssigned", default = FALSE,
help = "Keep taxa labeled as 'Not Assigned' (optional)"
),
make_option(c("--normalize"),
action = "store_true", dest = "normalize", default = FALSE,
help = "Normalize abundances to sum to 100% (optional)"
),
make_option(c("--width"),
action = "store", dest = "width", default = 10,
type = "numeric", help = "Width of the output plot in inches"
),
make_option(c("--height"),
action = "store", dest = "height", default = 8,
type = "numeric", help = "Height of the output plot in inches"
),
make_option(c("--device"),
action = "store", dest = "device", default = "pdf",
help = "Output format (e.g., 'pdf', 'png', 'jpeg')"
)
)

Expand All @@ -38,9 +66,6 @@ opt <- args$options
if (is.null(opt$input) || opt$input == "") {
stop("Error: Input file is required.")
}
if (is.null(opt$x) || opt$x == "") {
stop("Error: X-axis variable is required.")
}
if (is.null(opt$output) || opt$output == "") {
stop("Error: Output file is required.")
}
Expand All @@ -49,14 +74,63 @@ if (is.null(opt$output) || opt$output == "") {
print(paste("Trying to read:", opt$input))
physeq <- readRDS(opt$input)

# Normalize to relative abundances if requested
if (opt$normalize) {
print("Normalizing abundances to sum to 100%...")
physeq <- transform_sample_counts(physeq, function(x) 100 * x / sum(x))
}

if (opt$keepNonAssigned) {
# Add synthetic "Not Assigned" for missing/NA taxa
tax_table(physeq) <- apply(tax_table(physeq), c(1, 2), function(x) ifelse(is.na(x) | x == "", "Not Assigned", x))
}
# Check if the 'x' and 'fill' variables are valid
sample_vars <- colnames(sample_data(physeq))
if (!opt$x %in% sample_vars) {
stop(paste("Error: X-axis variable", opt$x, "does not exist in the sample data."))

# If topX is provided, filter the phyloseq object to show only top X taxa
if (!is.null(opt$topX) && opt$topX != "") {
topX <- as.numeric(opt$topX)
if (is.na(topX) || topX <= 0) {
stop("Error: topX should be a positive number.")
}

# Aggregate the data at the selected rank (e.g., Phylum)
tax_rank <- opt$fill # Adjust as necessary
physeq_agg <- tax_glom(physeq, taxrank = tax_rank)

# Get the abundance of each taxon at the selected rank
taxa_abundance <- taxa_sums(physeq_agg)

# Summarize the abundance at each taxonomic rank (grouping by taxonomic name)
tax_table_agg <- tax_table(physeq_agg)
taxa_abundance_by_rank <- tapply(taxa_abundance, tax_table_agg[, tax_rank], sum)

# Identify the top X taxa by summed abundance
top_taxa <- names(sort(taxa_abundance_by_rank, decreasing = TRUE))[1:topX]

print("Only plotting taxa in TopX taxa group:")
print(top_taxa)

# Get the OTUs corresponding to the top taxa
otus_in_top_taxa <- rownames(tax_table_agg)[tax_table_agg[, tax_rank] %in% top_taxa]

if (opt$keepOthers) {
# Label taxa not in top_taxa as "Others"
tax_table(physeq_agg)[, tax_rank][!rownames(tax_table(physeq_agg)) %in% otus_in_top_taxa] <- "Others"
physeq <- physeq_agg
} else {
# Subset the phyloseq object to keep only the top X taxa
physeq_filtered <- prune_taxa(otus_in_top_taxa, physeq_agg)
physeq <- physeq_filtered
}
}

# Generate bar plot
p <- plot_bar(physeq, x = opt$x, fill = opt$fill)
if (!is.null(opt$x) && opt$x != "") {
p <- plot_bar(physeq, x = opt$x, fill = opt$fill)
} else {
p <- plot_bar(physeq, fill = opt$fill) # If no x is provided, don't include x
}

# Only facet if the facet variable is provided and exists in the sample data
if (!is.null(opt$facet) && opt$facet != "") {
Expand All @@ -67,8 +141,11 @@ if (!is.null(opt$facet) && opt$facet != "") {
}
}

# Save to output file using PDF device
print(paste("Saving plot to:", opt$output))
pdf(file = opt$output, width = 10, height = 8)
print(p)
dev.off()
# Save to output file
ggsave(
filename = opt$output,
plot = p,
width = opt$width,
height = opt$height,
device = opt$device
)
81 changes: 52 additions & 29 deletions tools/phyloseq/phyloseq_plot_bar.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12,44 +12,66 @@ Rscript '${__tool_directory__}/phyloseq_plot_bar.R'
--fill '$fill'
--facet '${facet}'
--output '$output'
--topX '${topX}'
--keepOthers '${keepOthers}'
--keepNonAssigned '${keepNonAssigned}'
--normalize '${normalize}'
--width '${width}'
--height '${height}'
--device '${device}'
]]></command>
<inputs>
<expand macro="phyloseq_input"/>
<param name="x" type="text" label="X-axis variable" help="Variable for the x-axis (e.g., Sample, Phylum)" />
<param name="fill" type="text" label="Fill variable" help="Variable to color the bars (e.g., Genus, Order)" />
<param name="facet" type="text" optional="true" label="Facet by variable" help="Optional: Variable to facet the chart by (e.g., SampleType)" />
<param name="x" type="text" optional="true" label="X-axis variable" help="Variable for the x-axis (e.g., Sample, Phylum). If not specified, the Samples are taken." />
<param name="fill" type="text" label="Fill variable" help="Variable to color the bars (e.g., Genus, Order)." />
<param name="facet" type="text" optional="true" label="Facet by variable" help="Variable to facet the chart by (e.g., SampleType)." />
<param name="topX" value="10" type="integer" optional="true" label="Top X" help="Only show the ranks with the top X abundance." />
<param name="keepOthers" type="boolean" label="Keep 'Others'" help="Keep OTUs which are not in top X as 'Others'." />
<param name="keepNonAssigned" type="boolean" label="Keep Non Assigned" help="Keep OTUs that are not assigned at this rank and label as 'Not Assigned'." />
<param name="normalize" type="boolean" label="Normalize" help="Normalize abundances to sum to 100%. Normalization is performed before Top X selection." />
<param name="width" type="float" value="10" optional="true" label="Plot Width" help="Width of the output plot in inches." />
<param name="height" type="float" value="8" optional="true" label="Plot Height" help="Height of the output plot in inches." />
<param name="device" type="select" value="pdf" label="Output Device" help="Device to use for the output file. Options include pdf, png, and others.">
<option value="pdf">PDF</option>
<option value="png">PNG</option>
<option value="jpeg">JPEG</option>
<option value="tiff">TIFF</option>
</param>
</inputs>
<outputs>
<data name="output" format="pdf" label="Bar Chart (PDF)" />
</outputs>
<tests>
<!-- Test 1: Default parameters -->
<tests>
<!-- Test 1: Basic functionality with x and fill variables -->
<test>
<param name="input" value="output.phyloseq" ftype="phyloseq"/>
<param name="x" value="Property"/>
<param name="fill" value="Number"/>
<param name="facet" value="Property"/>
<output name="output" ftype="pdf">
<assert_contents>
<has_text text="%PDF"/>
<has_text text="%%EOF"/>
</assert_contents>
</output>
<param name="input" value="output.phyloseq" ftype="phyloseq"/>
<param name="x" value="Property"/>
<param name="fill" value="Phylum"/>
<param name="device" value="pdf"/>
<output name="output" ftype="pdf">
<assert_contents>
<has_text text="%PDF"/>
<has_text text="%%EOF"/>
</assert_contents>
</output>
</test>
<!-- Test 2: Valid parameters without facet -->

<!-- Test 2: TopX filtering and normalization -->
<test>
<param name="input" value="output.phyloseq" ftype="phyloseq"/>
<param name="x" value="Property"/>
<param name="fill" value="Number"/>
<param name="facet" value=""/>
<output name="output" ftype="pdf">
<assert_contents>
<has_text text="%PDF"/>
<has_text text="%%EOF"/>
</assert_contents>
</output>
<param name="input" value="output.phyloseq" ftype="phyloseq"/>
<param name="x" value="Property"/>
<param name="fill" value="Genus"/>
<param name="facet" value="SampleType"/>
<param name="topX" value="10"/>
<param name="normalize" value="true"/>
<output name="output" ftype="pdf">
<assert_contents>
<has_text text="%PDF"/>
<has_text text="%%EOF"/>
</assert_contents>
</output>
</test>
</tests>
</tests>

<help>
**Description**
Expand All @@ -62,15 +84,16 @@ Rscript '${__tool_directory__}/phyloseq_plot_bar.R'
- **X-axis variable**: The variable to use for the x-axis (e.g., Sample, Phylum).
- **Fill variable**: (Optional) The variable to use for the bar fill colors (e.g., Genus, Order).
- **Facet by variable**: (Optional) A variable to facet the bar chart (e.g., SampleType).
- **Width and Height**: Dimensions of the plot in inches (default: 10x8).
- **Device**: Output format (e.g., pdf, png, jpeg, tiff).

**Outputs**

- A PDF file containing the bar chart.
- A file containing the bar chart in the specified format.

**Usage Notes**

Ensure that the input file is a valid phyloseq object in RDS format.
</help>
<expand macro="citations"/>
</tool>

0 comments on commit 26f87cc

Please sign in to comment.