Skip to content

Commit

Permalink
Fix: comp_barplot and tax_sort checks/warnings about unavailable coun…
Browse files Browse the repository at this point in the history
…ts can be disabled with counts_warn = FALSE

Identified unnecessary attempts to retain count data in temporary psExtra objects created by tax_transform. These were removed from ps_seriate and comp_barplot, by setting tax_transform keep_counts to FALSE in these calls.
  • Loading branch information
david-barnett committed Sep 14, 2024
1 parent 73eec7a commit 072965d
Show file tree
Hide file tree
Showing 7 changed files with 81 additions and 55 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# microViz 0.12.5

- Fix: tax_fix no longer allows min_length of 0, as empty strings are (and were) always replaced
- Fix/Feat: comp_barplot and tax_sort checks/warnings about unavailable counts can be disabled with counts_warn = FALSE

# microViz 0.12.4

Expand Down
46 changes: 32 additions & 14 deletions R/comp_barplot.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@
#' - merge_other: controls whether bar outlines can be drawn around individual (lower abundance) taxa that are grouped in "other" category. If you want to see the diversity of taxa in "other" use merge_taxa = FALSE, or use TRUE if you prefer the cleaner merged look
#' - palette: Default colouring is consistent across multiple plots if created with the group_by argument, and the defaults scheme retains the colouring of the most abundant taxa irrespective of n_taxa
#'
#' @param ps phyloseq object
#' @param tax_level taxonomic aggregation level (from rank_names(ps))
#' @param ps
#' phyloseq object
#' @param tax_level
#' taxonomic aggregation level (from rank_names(ps))
#' @param n_taxa
#' how many taxa to show distinct colours for (all others grouped into "Other")
#' @param tax_order
Expand Down Expand Up @@ -62,16 +64,23 @@
#' @param keep_all_vars
#' FALSE may speed up internal melting with ps_melt for large phyloseq objects
#' but TRUE is required for some post-hoc plot customisation
#' @param interactive creates plot suitable for use with ggiraph
#' @param max_taxa maximum distinct taxa groups to show
#' @param interactive
#' creates plot suitable for use with ggiraph
#' @param max_taxa
#' maximum distinct taxa groups to show
#' (only really useful for limiting complexity of interactive plots
#' e.g. within ord_explore)
#' @param other_name name for other taxa after N
#' @param ... extra arguments passed to facet_wrap() (if facet_by is not NA)
#' @param other_name
#' name for other taxa after N
#' @param ...
#' extra arguments passed to facet_wrap() (if facet_by is not NA)
#' @param x
#' name of variable to use as x aesthetic:
#' it probably only makes sense to change this when also using facets
#' (check only one sample is represented per bar!)
#' @param counts_warn
#' should a warning be issued if counts are unavailable?
#' TRUE, FALSE, or "error" (passed to ps_get)
#'
#' @return ggplot or list of harmonised ggplots
#' @export
Expand Down Expand Up @@ -215,21 +224,26 @@ comp_barplot <- function(ps,
max_taxa = 10000,
other_name = "Other",
x = "SAMPLE",
counts_warn = TRUE,
...) {
ps <- ps_get(ps)

if (identical(tax_level, ".Taxon")) {
stop("'.Taxon' cannot be used as a rank name! You must rename that rank.")
}
if (!rlang::is_scalar_integerish(n_taxa) || n_taxa < 1) {
stop("n_taxa must a positive integer")
stop("n_taxa must be a positive integer")
}
if (!rlang::is_scalar_integerish(max_taxa) || max_taxa < n_taxa) {
stop("max_taxa must be a positive integer, and not lower than n_taxa")
}
if (!rlang::is_character(sample_order)) stop("sample_order")
if (!rlang::is_character(sample_order)) {
stop("sample_order must be character")
}

# check phyloseq for common problems (and fix or message about this)
ps <- phyloseq_validate(ps, remove_undetected = FALSE, verbose = TRUE)
ps <- psCheckSamdat(ps, verbose = TRUE)
ps <- psCheckTaxTable(ps, verbose = TRUE, min_tax_length = 3)

# create a sample names variable if this will be used for labelling
if (identical(label, "SAMPLE") || identical(x, "SAMPLE")) {
Expand All @@ -245,11 +259,12 @@ comp_barplot <- function(ps,
# taxa: aggregate and order for bar ordering and plotting ------------------

# include "unique" rank when aggregating
ps <- tax_agg(ps, rank = tax_level, add_unique = TRUE) %>% ps_get()
ps <- tax_agg(ps, rank = tax_level, add_unique = TRUE)
ps <- ps_get(ps)

if (length(tax_order) == 1) {
# reorder taxa if tax_order given as a length one rule
ps <- tax_sort(ps, by = tax_order)
ps <- tax_sort(ps, by = tax_order, use_counts = TRUE, counts_warn = counts_warn)
} else {
# reorder taxa if tax_order given as presumably a vector of taxa names
ps <- tax_reorder(ps, tax_order = tax_order, tree_warn = FALSE, ignore = other_name)
Expand Down Expand Up @@ -309,7 +324,8 @@ comp_barplot <- function(ps,
# merge ".top" rank's `other_name` category into one taxon to allow drawing
# bar outlines everywhere except within `other_name` category bars
if (isTRUE(merge_other)) {
ps <- tax_agg(ps, rank = ".top", force = TRUE, add_unique = TRUE) %>% ps_get()
ps <- tax_agg(ps, rank = ".top", force = TRUE, add_unique = TRUE)
ps <- ps_get(ps)
} else {
ps <- taxMaxEnforce(ps = ps, maxTaxa = max_taxa, otherName = other_name)
}
Expand Down Expand Up @@ -374,7 +390,8 @@ comp_barplotFixed <- function(ps, interactive,
if (is.null(label)) LABELLER <- NULL # will remove labels

# transform taxa values for display on plot
ps <- tax_transform(ps, trans = tax_transform_for_plot) %>% ps_get()
ps <- tax_transform(ps, trans = tax_transform_for_plot, keep_counts = FALSE)
ps <- ps_get(ps, warn = FALSE)

# prepare dataframe for plot ----------------------------------------------
# create long dataframe from (compositional) phyloseq
Expand Down Expand Up @@ -514,7 +531,8 @@ taxMaxEnforce <- function(ps, maxTaxa, otherName) {
tt = phyloseq::tax_table(ps), N = maxTaxa - 1,
other = otherName, varname = "..sep" # temp variable, can't be "unique"
)
ps <- tax_agg(ps, rank = "..sep", force = TRUE, add_unique = TRUE) %>% ps_get()
ps <- tax_agg(ps, rank = "..sep", force = TRUE, add_unique = TRUE)
ps <- ps_get(ps)
ps <- tax_mutate(ps = ps, ..sep = NULL)
return(ps)
}
3 changes: 2 additions & 1 deletion R/ps_seriate.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ ps_seriate <- function(ps,
# aggregate taxa for ordering (possibly, as NA is no aggregation)
psX <- tax_agg(ps, rank = rank)
# transform taxa for ordering (facilitated primarily for clr for PCA methods)
ps_transformed <- tax_transform(psX, trans = tax_transform) %>% ps_get()
ps_transformed <- tax_transform(psX, trans = tax_transform, keep_counts = FALSE)
ps_transformed <- ps_get(ps_transformed)

if (method %in% seriation::list_seriation_methods(kind = "matrix")) {
# directly seriate the otu matrix
Expand Down
21 changes: 15 additions & 6 deletions R/tax_sort.R
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#' @param trans
#' name of transformation to apply to taxa before sorting (taxa are returned un-transformed)
#' @param use_counts use count data if available, instead of transformed data
#' @param counts_warn warn if count data are not available?
#'
#' @return sorted phyloseq or psExtra
#' @export
Expand Down Expand Up @@ -116,7 +117,8 @@ tax_sort <- function(data,
tree_warn = TRUE,
verbose = TRUE,
trans = "identity",
use_counts = TRUE) {
use_counts = TRUE,
counts_warn = TRUE) {
# checks, and get phyloseq--------------------------------------------------
stopifnot(rlang::is_bool(use_counts))
stopifnot(rlang::is_bool(tree_warn))
Expand All @@ -134,9 +136,12 @@ tax_sort <- function(data,
if (!rlang::is_string(by) && !rlang::is_function(by)) stop(byIsInvalidError)
# TODO ?allow numeric or character vector sorting by subsetting?

# get and check phyloseq (of counts?)
ps <- if (use_counts) ps_counts(data, warn = TRUE) else ps_get(data)
ps <- phyloseq_validate(ps, remove_undetected = FALSE, verbose = verbose)
# get phyloseq (of counts or whatever else)
ps <- ps_get(data, counts = use_counts, warn = counts_warn)

# check phyloseq for common problems (and fix or message about this)
ps <- psCheckSamdat(ps, verbose = verbose)
ps <- psCheckTaxTable(ps, verbose = verbose, min_tax_length = 3)

# check `by` string options including sample names
if (rlang::is_string(by)) {
Expand Down Expand Up @@ -185,8 +190,12 @@ tax_sort <- function(data,
} else {
# it was checked already that it must otherwise be a rank name
# --> aggregation required
psAg <- tax_agg(ps = ps, rank = at) %>% ps_get()
psAg <- tax_sort(data = psAg, by = by, at = "names", trans = trans, ...)
psAg <- tax_agg(ps = ps, rank = at)
psAg <- ps_get(psAg)
psAg <- tax_sort(
data = psAg, by = by, at = "names", trans = trans,
use_counts = use_counts, counts_warn = counts_warn, ...
)

# the aggregated tax_table has different dimensions to the un-aggregated
# make tax table of un-aggregated phyloseq as dataframe
Expand Down
56 changes: 23 additions & 33 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ date)](https://img.shields.io/docker/v/barnettdavid/microviz-rocker-verse?color=
badge](https://david-barnett.r-universe.dev/badges/microViz)](https://david-barnett.r-universe.dev/microViz)
[![JOSS
article](https://joss.theoj.org/papers/4547b492f224a26d96938ada81fee3fa/status.svg)](https://joss.theoj.org/papers/4547b492f224a26d96938ada81fee3fa)
[![Citations](https://img.shields.io/badge/Citations-~135-blueviolet)](https://scholar.google.com/scholar?hl=en&as_sdt=2005&sciodt=0,5&cites=5439940108464463894&scipsc=&q=&scisbd=1)
[![Citations](https://img.shields.io/badge/Citations-~157-blueviolet)](https://scholar.google.com/scholar?hl=en&as_sdt=2005&sciodt=0,5&cites=5439940108464463894&scipsc=&q=&scisbd=1)
[![Zenodo
DOI](https://zenodo.org/badge/307119750.svg)](https://zenodo.org/badge/latestdoi/307119750)
<!-- badges: end -->
Expand Down Expand Up @@ -150,7 +150,7 @@ test this.

``` r
library(microViz)
#> microViz version 0.12.3.9000 - Copyright (C) 2021-2024 David Barnett
#> microViz version 0.12.4 - Copyright (C) 2021-2024 David Barnett
#> ! Website: https://david-barnett.github.io/microViz
#> ✔ Useful? For citation details, run: `citation("microViz")`
#> ✖ Silence? `suppressPackageStartupMessages(library(microViz))`
Expand Down Expand Up @@ -316,21 +316,15 @@ aitchison_perm <- aitchison_dists %>%
n_processes = 1, n_perms = 99, # you should use at least 999!
variables = "bmi_group"
)
#> 2024-06-30 10:55:51.862638 - Starting PERMANOVA with 99 perms with 1 processes
#> 2024-06-30 10:55:51.919099 - Finished PERMANOVA
```

``` r
#> 2024-09-14 16:41:43.803016 - Starting PERMANOVA with 99 perms with 1 processes
#> 2024-09-14 16:41:43.858752 - Finished PERMANOVA

# view the permanova results
perm_get(aitchison_perm) %>% as.data.frame()
#> Df SumOfSqs R2 F Pr(>F)
#> bmi_group 2 109.170 0.04104336 4.686602 0.01
#> Residual 219 2550.700 0.95895664 NA NA
#> Total 221 2659.869 1.00000000 NA NA
```

``` r

# view the info stored about the distance calculation
info_get(aitchison_perm)
Expand All @@ -348,8 +342,8 @@ your permanova directly using the `ord_plot` function with constraints
perm2 <- aitchison_dists %>%
dist_permanova(variables = c("weight", "african", "sex"), seed = 321)
#> Dropping samples with missings: 2
#> 2024-06-30 10:55:51.930319 - Starting PERMANOVA with 999 perms with 1 processes
#> 2024-06-30 10:55:53.574252 - Finished PERMANOVA
#> 2024-09-14 16:41:43.869815 - Starting PERMANOVA with 999 perms with 1 processes
#> 2024-09-14 16:41:45.512262 - Finished PERMANOVA
```

We’ll visualise the effect of nationality and bodyweight on sample
Expand Down Expand Up @@ -398,9 +392,6 @@ psq <- dietswap %>%
) %>%
tax_transform("identity", rank = "Genus")
#> Proportional min_prevalence given: 0.1 --> min 23/222 samples.
```

``` r

# randomly select 30 taxa from the 50 most abundant taxa (just for an example)
set.seed(123)
Expand Down Expand Up @@ -445,7 +436,7 @@ and by participating in this project you agree to abide by its terms.
sessionInfo()
#> R version 4.4.0 (2024-04-24)
#> Platform: aarch64-apple-darwin20
#> Running under: macOS Sonoma 14.5
#> Running under: macOS Sonoma 14.6.1
#>
#> Matrix products: default
#> BLAS: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib
Expand All @@ -461,9 +452,8 @@ sessionInfo()
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] ggplot2_3.5.1 dplyr_1.1.4 phyloseq_1.48.0
#> [4] microViz_0.12.3.9000 testthat_3.2.1.1 devtools_2.4.5
#> [7] usethis_2.2.3
#> [1] ggplot2_3.5.1 dplyr_1.1.4 phyloseq_1.48.0 microViz_0.12.4
#> [5] testthat_3.2.1.1 devtools_2.4.5 usethis_3.0.0
#>
#> loaded via a namespace (and not attached):
#> [1] RColorBrewer_1.1-3 rstudioapi_0.16.0 jsonlite_1.8.8
Expand All @@ -476,35 +466,35 @@ sessionInfo()
#> [22] commonmark_1.9.1 igraph_2.0.3 mime_0.12
#> [25] lifecycle_1.0.4 iterators_1.0.14 pkgconfig_2.0.3
#> [28] Matrix_1.7-0 R6_2.5.1 fastmap_1.2.0
#> [31] clue_0.3-65 GenomeInfoDbData_1.2.12 shiny_1.8.1.1
#> [34] digest_0.6.36 selectr_0.4-2 colorspace_2.1-0
#> [37] S4Vectors_0.42.0 ps_1.7.6 pkgload_1.3.4
#> [31] clue_0.3-65 GenomeInfoDbData_1.2.12 shiny_1.9.1
#> [34] digest_0.6.36 selectr_0.4-2 colorspace_2.1-1
#> [37] S4Vectors_0.42.1 ps_1.7.7 pkgload_1.4.0
#> [40] seriation_1.5.5 vegan_2.6-6.1 labeling_0.4.3
#> [43] fansi_1.0.6 httr_1.4.7 mgcv_1.9-1
#> [46] compiler_4.4.0 remotes_2.5.0 doParallel_1.0.17
#> [49] withr_3.0.0 viridis_0.6.5 pkgbuild_1.4.4
#> [52] highr_0.11 MASS_7.3-60.2 sessioninfo_1.2.2
#> [49] withr_3.0.1 viridis_0.6.5 pkgbuild_1.4.4
#> [52] highr_0.11 MASS_7.3-61 sessioninfo_1.2.2
#> [55] rjson_0.2.21 biomformat_1.32.0 permute_0.9-7
#> [58] tools_4.4.0 chromote_0.2.0 ape_5.8
#> [61] httpuv_1.6.15 glue_1.7.0 nlme_3.1-164
#> [61] httpuv_1.6.15 glue_1.7.0 nlme_3.1-165
#> [64] rhdf5filters_1.16.0 promises_1.3.0 gridtext_0.1.5
#> [67] grid_4.4.0 Rtsne_0.17 cluster_2.1.6
#> [70] reshape2_1.4.4 ade4_1.7-22 generics_0.1.3
#> [73] gtable_0.3.5 microbiome_1.26.0 ca_0.71.1
#> [76] tidyr_1.3.1 websocket_1.4.1 data.table_1.15.4
#> [76] tidyr_1.3.1 websocket_1.4.2 data.table_1.15.4
#> [79] xml2_1.3.6 utf8_1.2.4 XVector_0.44.0
#> [82] BiocGenerics_0.50.0 markdown_1.12 foreach_1.5.2
#> [82] BiocGenerics_0.50.0 markdown_1.13 foreach_1.5.2
#> [85] pillar_1.9.0 stringr_1.5.1 later_1.3.2
#> [88] circlize_0.4.16 splines_4.4.0 ggtext_0.1.2
#> [91] lattice_0.22-6 survival_3.5-8 tidyselect_1.2.1
#> [94] registry_0.5-1 ComplexHeatmap_2.20.0 Biostrings_2.72.0
#> [97] miniUI_0.1.1.1 knitr_1.47 gridExtra_2.3
#> [100] IRanges_2.38.0 stats4_4.4.0 xfun_0.45
#> [91] lattice_0.22-6 survival_3.7-0 tidyselect_1.2.1
#> [94] registry_0.5-1 ComplexHeatmap_2.20.0 Biostrings_2.72.1
#> [97] miniUI_0.1.1.1 knitr_1.48 gridExtra_2.3
#> [100] IRanges_2.38.1 stats4_4.4.0 xfun_0.46
#> [103] Biobase_2.64.0 matrixStats_1.3.0 brio_1.1.5
#> [106] stringi_1.8.4 UCSC.utils_1.0.0 yaml_2.3.8
#> [106] stringi_1.8.4 UCSC.utils_1.0.0 yaml_2.3.10
#> [109] evaluate_0.24.0 codetools_0.2-20 tibble_3.2.1
#> [112] cli_3.6.3 xtable_1.8-4 munsell_0.5.1
#> [115] processx_3.8.4 Rcpp_1.0.12 GenomeInfoDb_1.40.1
#> [115] processx_3.8.4 Rcpp_1.0.13 GenomeInfoDb_1.40.1
#> [118] png_0.1-8 parallel_4.4.0 ellipsis_0.3.2
#> [121] profvis_0.3.8 urlchecker_1.0.1 viridisLite_0.4.2
#> [124] scales_1.3.0 purrr_1.0.2 crayon_1.5.3
Expand Down
4 changes: 4 additions & 0 deletions man/comp_barplot.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 4 additions & 1 deletion man/tax_sort.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 072965d

Please sign in to comment.