Fix: comp_barplot and tax_sort checks/warnings about unavailable coun…

…ts can be disabled with counts_warn = FALSE Identified unnecessary attempts to retain count data in temporary psExtra objects created by tax_transform. These were removed from ps_seriate and comp_barplot, by setting tax_transform keep_counts to FALSE in these calls.
david-barnett · Sep 14, 2024 · 072965d · 072965d
1 parent 73eec7a
commit 072965d
Show file tree

Hide file tree

Showing 7 changed files with 81 additions and 55 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,6 +1,7 @@
 # microViz 0.12.5
 
 - Fix: tax_fix no longer allows min_length of 0, as empty strings are (and were) always replaced
+- Fix/Feat: comp_barplot and tax_sort checks/warnings about unavailable counts can be disabled with counts_warn = FALSE
 
 # microViz 0.12.4
 

diff --git a/R/comp_barplot.R b/R/comp_barplot.R
@@ -16,8 +16,10 @@
 #' - merge_other: controls whether bar outlines can be drawn around individual (lower abundance) taxa that are grouped in "other" category. If you want to see the diversity of taxa in "other" use merge_taxa = FALSE, or use TRUE if you prefer the cleaner merged look
 #' - palette: Default colouring is consistent across multiple plots if created with the group_by argument, and the defaults scheme retains the colouring of the most abundant taxa irrespective of n_taxa
 #'
-#' @param ps phyloseq object
-#' @param tax_level taxonomic aggregation level (from rank_names(ps))
+#' @param ps
+#' phyloseq object
+#' @param tax_level
+#' taxonomic aggregation level (from rank_names(ps))
 #' @param n_taxa
 #' how many taxa to show distinct colours for (all others grouped into "Other")
 #' @param tax_order
@@ -62,16 +64,23 @@
 #' @param keep_all_vars
 #' FALSE may speed up internal melting with ps_melt for large phyloseq objects
 #' but TRUE is required for some post-hoc plot customisation
-#' @param interactive creates plot suitable for use with ggiraph
-#' @param max_taxa maximum distinct taxa groups to show
+#' @param interactive
+#' creates plot suitable for use with ggiraph
+#' @param max_taxa
+#' maximum distinct taxa groups to show
 #' (only really useful for limiting complexity of interactive plots
 #' e.g. within ord_explore)
-#' @param other_name name for other taxa after N
-#' @param ... extra arguments passed to facet_wrap() (if facet_by is not NA)
+#' @param other_name
+#' name for other taxa after N
+#' @param ...
+#' extra arguments passed to facet_wrap() (if facet_by is not NA)
 #' @param x
 #' name of variable to use as x aesthetic:
 #' it probably only makes sense to change this when also using facets
 #' (check only one sample is represented per bar!)
+#' @param counts_warn
+#' should a warning be issued if counts are unavailable?
+#' TRUE, FALSE, or "error" (passed to ps_get)
 #'
 #' @return ggplot or list of harmonised ggplots
 #' @export
@@ -215,21 +224,26 @@ comp_barplot <- function(ps,
                          max_taxa = 10000,
                          other_name = "Other",
                          x = "SAMPLE",
+                         counts_warn = TRUE,
                          ...) {
   ps <- ps_get(ps)
+
   if (identical(tax_level, ".Taxon")) {
     stop("'.Taxon' cannot be used as a rank name! You must rename that rank.")
   }
   if (!rlang::is_scalar_integerish(n_taxa) || n_taxa < 1) {
-    stop("n_taxa must a positive integer")
+    stop("n_taxa must be a positive integer")
   }
   if (!rlang::is_scalar_integerish(max_taxa) || max_taxa < n_taxa) {
     stop("max_taxa must be a positive integer, and not lower than n_taxa")
   }
-  if (!rlang::is_character(sample_order)) stop("sample_order")
+  if (!rlang::is_character(sample_order)) {
+    stop("sample_order must be character")
+  }
 
   # check phyloseq for common problems (and fix or message about this)
-  ps <- phyloseq_validate(ps, remove_undetected = FALSE, verbose = TRUE)
+  ps <- psCheckSamdat(ps, verbose = TRUE)
+  ps <- psCheckTaxTable(ps, verbose = TRUE, min_tax_length = 3)
 
   # create a sample names variable if this will be used for labelling
   if (identical(label, "SAMPLE") || identical(x, "SAMPLE")) {
@@ -245,11 +259,12 @@ comp_barplot <- function(ps,
   # taxa: aggregate and order for bar ordering and plotting ------------------
 
   # include "unique" rank when aggregating
-  ps <- tax_agg(ps, rank = tax_level, add_unique = TRUE) %>% ps_get()
+  ps <- tax_agg(ps, rank = tax_level, add_unique = TRUE)
+  ps <- ps_get(ps)
 
   if (length(tax_order) == 1) {
     # reorder taxa if tax_order given as a length one rule
-    ps <- tax_sort(ps, by = tax_order)
+    ps <- tax_sort(ps, by = tax_order, use_counts = TRUE, counts_warn = counts_warn)
   } else {
     # reorder taxa if tax_order given as presumably a vector of taxa names
     ps <- tax_reorder(ps, tax_order = tax_order, tree_warn = FALSE, ignore = other_name)
@@ -309,7 +324,8 @@ comp_barplot <- function(ps,
     # merge ".top" rank's `other_name` category into one taxon to allow drawing
     # bar outlines everywhere except within `other_name` category bars
     if (isTRUE(merge_other)) {
-      ps <- tax_agg(ps, rank = ".top", force = TRUE, add_unique = TRUE) %>% ps_get()
+      ps <- tax_agg(ps, rank = ".top", force = TRUE, add_unique = TRUE)
+      ps <- ps_get(ps)
     } else {
       ps <- taxMaxEnforce(ps = ps, maxTaxa = max_taxa, otherName = other_name)
     }
@@ -374,7 +390,8 @@ comp_barplotFixed <- function(ps, interactive,
   if (is.null(label)) LABELLER <- NULL # will remove labels
 
   # transform taxa values for display on plot
-  ps <- tax_transform(ps, trans = tax_transform_for_plot) %>% ps_get()
+  ps <- tax_transform(ps, trans = tax_transform_for_plot, keep_counts = FALSE)
+  ps <- ps_get(ps, warn = FALSE)
 
   # prepare dataframe for plot ----------------------------------------------
   # create long dataframe from (compositional) phyloseq
@@ -514,7 +531,8 @@ taxMaxEnforce <- function(ps, maxTaxa, otherName) {
     tt = phyloseq::tax_table(ps), N = maxTaxa - 1,
     other = otherName, varname = "..sep" # temp variable, can't be "unique"
   )
-  ps <- tax_agg(ps, rank = "..sep", force = TRUE, add_unique = TRUE) %>% ps_get()
+  ps <- tax_agg(ps, rank = "..sep", force = TRUE, add_unique = TRUE)
+  ps <- ps_get(ps)
   ps <- tax_mutate(ps = ps, ..sep = NULL)
   return(ps)
 }
diff --git a/R/ps_seriate.R b/R/ps_seriate.R
@@ -41,7 +41,8 @@ ps_seriate <- function(ps,
   # aggregate taxa for ordering (possibly, as NA is no aggregation)
   psX <- tax_agg(ps, rank = rank)
   # transform taxa for ordering (facilitated primarily for clr for PCA methods)
-  ps_transformed <- tax_transform(psX, trans = tax_transform) %>% ps_get()
+  ps_transformed <- tax_transform(psX, trans = tax_transform, keep_counts = FALSE)
+  ps_transformed <- ps_get(ps_transformed)
 
   if (method %in% seriation::list_seriation_methods(kind = "matrix")) {
     # directly seriate the otu matrix

diff --git a/R/tax_sort.R b/R/tax_sort.R
@@ -33,6 +33,7 @@
 #' @param trans
 #' name of transformation to apply to taxa before sorting (taxa are returned un-transformed)
 #' @param use_counts use count data if available, instead of transformed data
+#' @param counts_warn warn if count data are not available?
 #'
 #' @return sorted phyloseq or psExtra
 #' @export
@@ -116,7 +117,8 @@ tax_sort <- function(data,
                      tree_warn = TRUE,
                      verbose = TRUE,
                      trans = "identity",
-                     use_counts = TRUE) {
+                     use_counts = TRUE,
+                     counts_warn = TRUE) {
   # checks, and get phyloseq--------------------------------------------------
   stopifnot(rlang::is_bool(use_counts))
   stopifnot(rlang::is_bool(tree_warn))
@@ -134,9 +136,12 @@ tax_sort <- function(data,
   if (!rlang::is_string(by) && !rlang::is_function(by)) stop(byIsInvalidError)
   # TODO ?allow numeric or character vector sorting by subsetting?
 
-  # get and check phyloseq (of counts?)
-  ps <- if (use_counts) ps_counts(data, warn = TRUE) else ps_get(data)
-  ps <- phyloseq_validate(ps, remove_undetected = FALSE, verbose = verbose)
+  # get phyloseq (of counts or whatever else)
+  ps <- ps_get(data, counts = use_counts, warn = counts_warn)
+
+  # check phyloseq for common problems (and fix or message about this)
+  ps <- psCheckSamdat(ps, verbose = verbose)
+  ps <- psCheckTaxTable(ps, verbose = verbose, min_tax_length = 3)
 
   # check `by` string options including sample names
   if (rlang::is_string(by)) {
@@ -185,8 +190,12 @@ tax_sort <- function(data,
     } else {
       # it was checked already that it must otherwise be a rank name
       # --> aggregation required
-      psAg <- tax_agg(ps = ps, rank = at) %>% ps_get()
-      psAg <- tax_sort(data = psAg, by = by, at = "names", trans = trans, ...)
+      psAg <- tax_agg(ps = ps, rank = at)
+      psAg <- ps_get(psAg)
+      psAg <- tax_sort(
+        data = psAg, by = by, at = "names", trans = trans,
+        use_counts = use_counts, counts_warn = counts_warn, ...
+      )
 
       # the aggregated tax_table has different dimensions to the un-aggregated
       # make tax table of un-aggregated phyloseq as dataframe

diff --git a/README.md b/README.md
@@ -17,7 +17,7 @@ date)](https://img.shields.io/docker/v/barnettdavid/microviz-rocker-verse?color=
 badge](https://david-barnett.r-universe.dev/badges/microViz)](https://david-barnett.r-universe.dev/microViz)
 [![JOSS
 article](https://joss.theoj.org/papers/4547b492f224a26d96938ada81fee3fa/status.svg)](https://joss.theoj.org/papers/4547b492f224a26d96938ada81fee3fa)
-[![Citations](https://img.shields.io/badge/Citations-~135-blueviolet)](https://scholar.google.com/scholar?hl=en&as_sdt=2005&sciodt=0,5&cites=5439940108464463894&scipsc=&q=&scisbd=1)
+[![Citations](https://img.shields.io/badge/Citations-~157-blueviolet)](https://scholar.google.com/scholar?hl=en&as_sdt=2005&sciodt=0,5&cites=5439940108464463894&scipsc=&q=&scisbd=1)
 [![Zenodo
 DOI](https://zenodo.org/badge/307119750.svg)](https://zenodo.org/badge/latestdoi/307119750)
 <!-- badges: end -->
@@ -150,7 +150,7 @@ test this.
 
 ``` r
 library(microViz)
-#> microViz version 0.12.3.9000 - Copyright (C) 2021-2024 David Barnett
+#> microViz version 0.12.4 - Copyright (C) 2021-2024 David Barnett
 #> ! Website: https://david-barnett.github.io/microViz
 #> ✔ Useful?  For citation details, run: `citation("microViz")`
 #> ✖ Silence? `suppressPackageStartupMessages(library(microViz))`
@@ -316,21 +316,15 @@ aitchison_perm <- aitchison_dists %>%
     n_processes = 1, n_perms = 99, # you should use at least 999!
     variables = "bmi_group"
   )
-#> 2024-06-30 10:55:51.862638 - Starting PERMANOVA with 99 perms with 1 processes
-#> 2024-06-30 10:55:51.919099 - Finished PERMANOVA
-```
-
-``` r
+#> 2024-09-14 16:41:43.803016 - Starting PERMANOVA with 99 perms with 1 processes
+#> 2024-09-14 16:41:43.858752 - Finished PERMANOVA
 
 # view the permanova results
 perm_get(aitchison_perm) %>% as.data.frame()
 #>            Df SumOfSqs         R2        F Pr(>F)
 #> bmi_group   2  109.170 0.04104336 4.686602   0.01
 #> Residual  219 2550.700 0.95895664       NA     NA
 #> Total     221 2659.869 1.00000000       NA     NA
-```
-
-``` r
 
 # view the info stored about the distance calculation
 info_get(aitchison_perm)
@@ -348,8 +342,8 @@ your permanova directly using the `ord_plot` function with constraints
 perm2 <- aitchison_dists %>%
   dist_permanova(variables = c("weight", "african", "sex"), seed = 321)
 #> Dropping samples with missings: 2
-#> 2024-06-30 10:55:51.930319 - Starting PERMANOVA with 999 perms with 1 processes
-#> 2024-06-30 10:55:53.574252 - Finished PERMANOVA
+#> 2024-09-14 16:41:43.869815 - Starting PERMANOVA with 999 perms with 1 processes
+#> 2024-09-14 16:41:45.512262 - Finished PERMANOVA
 ```
 
 We’ll visualise the effect of nationality and bodyweight on sample
@@ -398,9 +392,6 @@ psq <- dietswap %>%
   ) %>%
   tax_transform("identity", rank = "Genus")
 #> Proportional min_prevalence given: 0.1 --> min 23/222 samples.
-```
-
-``` r
 
 # randomly select 30 taxa from the 50 most abundant taxa (just for an example)
 set.seed(123)
@@ -445,7 +436,7 @@ and by participating in this project you agree to abide by its terms.
 sessionInfo()
 #> R version 4.4.0 (2024-04-24)
 #> Platform: aarch64-apple-darwin20
-#> Running under: macOS Sonoma 14.5
+#> Running under: macOS Sonoma 14.6.1
 #> 
 #> Matrix products: default
 #> BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
@@ -461,9 +452,8 @@ sessionInfo()
 #> [1] stats     graphics  grDevices utils     datasets  methods   base     
 #> 
 #> other attached packages:
-#> [1] ggplot2_3.5.1        dplyr_1.1.4          phyloseq_1.48.0     
-#> [4] microViz_0.12.3.9000 testthat_3.2.1.1     devtools_2.4.5      
-#> [7] usethis_2.2.3       
+#> [1] ggplot2_3.5.1    dplyr_1.1.4      phyloseq_1.48.0  microViz_0.12.4 
+#> [5] testthat_3.2.1.1 devtools_2.4.5   usethis_3.0.0   
 #> 
 #> loaded via a namespace (and not attached):
 #>   [1] RColorBrewer_1.1-3      rstudioapi_0.16.0       jsonlite_1.8.8         
@@ -476,35 +466,35 @@ sessionInfo()
 #>  [22] commonmark_1.9.1        igraph_2.0.3            mime_0.12              
 #>  [25] lifecycle_1.0.4         iterators_1.0.14        pkgconfig_2.0.3        
 #>  [28] Matrix_1.7-0            R6_2.5.1                fastmap_1.2.0          
-#>  [31] clue_0.3-65             GenomeInfoDbData_1.2.12 shiny_1.8.1.1          
-#>  [34] digest_0.6.36           selectr_0.4-2           colorspace_2.1-0       
-#>  [37] S4Vectors_0.42.0        ps_1.7.6                pkgload_1.3.4          
+#>  [31] clue_0.3-65             GenomeInfoDbData_1.2.12 shiny_1.9.1            
+#>  [34] digest_0.6.36           selectr_0.4-2           colorspace_2.1-1       
+#>  [37] S4Vectors_0.42.1        ps_1.7.7                pkgload_1.4.0          
 #>  [40] seriation_1.5.5         vegan_2.6-6.1           labeling_0.4.3         
 #>  [43] fansi_1.0.6             httr_1.4.7              mgcv_1.9-1             
 #>  [46] compiler_4.4.0          remotes_2.5.0           doParallel_1.0.17      
-#>  [49] withr_3.0.0             viridis_0.6.5           pkgbuild_1.4.4         
-#>  [52] highr_0.11              MASS_7.3-60.2           sessioninfo_1.2.2      
+#>  [49] withr_3.0.1             viridis_0.6.5           pkgbuild_1.4.4         
+#>  [52] highr_0.11              MASS_7.3-61             sessioninfo_1.2.2      
 #>  [55] rjson_0.2.21            biomformat_1.32.0       permute_0.9-7          
 #>  [58] tools_4.4.0             chromote_0.2.0          ape_5.8                
-#>  [61] httpuv_1.6.15           glue_1.7.0              nlme_3.1-164           
+#>  [61] httpuv_1.6.15           glue_1.7.0              nlme_3.1-165           
 #>  [64] rhdf5filters_1.16.0     promises_1.3.0          gridtext_0.1.5         
 #>  [67] grid_4.4.0              Rtsne_0.17              cluster_2.1.6          
 #>  [70] reshape2_1.4.4          ade4_1.7-22             generics_0.1.3         
 #>  [73] gtable_0.3.5            microbiome_1.26.0       ca_0.71.1              
-#>  [76] tidyr_1.3.1             websocket_1.4.1         data.table_1.15.4      
+#>  [76] tidyr_1.3.1             websocket_1.4.2         data.table_1.15.4      
 #>  [79] xml2_1.3.6              utf8_1.2.4              XVector_0.44.0         
-#>  [82] BiocGenerics_0.50.0     markdown_1.12           foreach_1.5.2          
+#>  [82] BiocGenerics_0.50.0     markdown_1.13           foreach_1.5.2          
 #>  [85] pillar_1.9.0            stringr_1.5.1           later_1.3.2            
 #>  [88] circlize_0.4.16         splines_4.4.0           ggtext_0.1.2           
-#>  [91] lattice_0.22-6          survival_3.5-8          tidyselect_1.2.1       
-#>  [94] registry_0.5-1          ComplexHeatmap_2.20.0   Biostrings_2.72.0      
-#>  [97] miniUI_0.1.1.1          knitr_1.47              gridExtra_2.3          
-#> [100] IRanges_2.38.0          stats4_4.4.0            xfun_0.45              
+#>  [91] lattice_0.22-6          survival_3.7-0          tidyselect_1.2.1       
+#>  [94] registry_0.5-1          ComplexHeatmap_2.20.0   Biostrings_2.72.1      
+#>  [97] miniUI_0.1.1.1          knitr_1.48              gridExtra_2.3          
+#> [100] IRanges_2.38.1          stats4_4.4.0            xfun_0.46              
 #> [103] Biobase_2.64.0          matrixStats_1.3.0       brio_1.1.5             
-#> [106] stringi_1.8.4           UCSC.utils_1.0.0        yaml_2.3.8             
+#> [106] stringi_1.8.4           UCSC.utils_1.0.0        yaml_2.3.10            
 #> [109] evaluate_0.24.0         codetools_0.2-20        tibble_3.2.1           
 #> [112] cli_3.6.3               xtable_1.8-4            munsell_0.5.1          
-#> [115] processx_3.8.4          Rcpp_1.0.12             GenomeInfoDb_1.40.1    
+#> [115] processx_3.8.4          Rcpp_1.0.13             GenomeInfoDb_1.40.1    
 #> [118] png_0.1-8               parallel_4.4.0          ellipsis_0.3.2         
 #> [121] profvis_0.3.8           urlchecker_1.0.1        viridisLite_0.4.2      
 #> [124] scales_1.3.0            purrr_1.0.2             crayon_1.5.3           

diff --git a/man/comp_barplot.Rd b/man/comp_barplot.Rd
diff --git a/man/tax_sort.Rd b/man/tax_sort.Rd