From 625a66c1b92cb588fe0ce8ac2c53110b2f4ede73 Mon Sep 17 00:00:00 2001 From: SkylarMarvel Date: Thu, 21 Dec 2023 09:49:18 -0700 Subject: [PATCH 1/5] Step 00 of sensitivity replication --- script-replication/sensitivity.R | 211 ++++++++++++++++++++++++++++++- 1 file changed, 210 insertions(+), 1 deletion(-) diff --git a/script-replication/sensitivity.R b/script-replication/sensitivity.R index c533b66..b301041 100644 --- a/script-replication/sensitivity.R +++ b/script-replication/sensitivity.R @@ -7,7 +7,216 @@ library(tidyverse) ################################################################################ ################################################################################ -# TODO +library(httk) + +rm(list = ls()) + +set.seed(2345) + +MC_iter <- 10 +n_chem <- 2 +n_county <- 5 + +########## +# Data +load("~/dev/GeoTox/data/age_by_county_20220228.RData") +load("~/dev/GeoTox/data/obesity_by_county_20220228.RData") + +age.by.county <- lapply(age.by.county[1:n_county], \(x) x[1:MC_iter]) +obesity.by.county <- lapply(obesity.by.county[1:n_county], \(x) x[1:MC_iter]) + +in_chems <- c( + "98-86-2", "92-87-5", "92-52-4", "117-81-7", "133-06-2", "532-27-4", + "133-90-4", "57-74-9", "510-15-6", "94-75-7", "64-67-5", "132-64-9", + "106-46-7", "111-44-4", "79-44-7", "131-11-3", "77-78-1", "119-90-4", + "121-14-2", "534-52-1", "51-28-5", "121-69-7", "107-21-1", "51-79-6", + "76-44-8", "822-06-0", "77-47-4", "123-31-9", "72-43-5", "101-77-9", + "56-38-2", "82-68-8", "87-86-5", "1120-71-4", "114-26-1", "91-22-5", + "96-09-3", "95-80-7", "584-84-9", "95-95-4", "1582-09-8" +)[1:n_chem] + +######################################## +# Define population demographics for httk simulation +pop_demo <- cross_join( + tibble( + age_group = list( + c(0, 2), c(3, 5), c(6, 10), c(11, 15), c(16, 20), c(21, 30), + c(31, 40), c(41, 50), c(51, 60), c(61, 70), c(71, 100) + ) + ), + tibble( + weight = c("Normal", "Obese") + ) +) %>% + rowwise() %>% + mutate(age_min = age_group[1]) %>% + ungroup() + +######################################## +# Create wrapper function around httk steps +simulate_css <- function(chem.cas, agelim_years, weight_category, samples) { + + cat( + chem.cas, + paste0("(", paste(agelim_years, collapse = ", "), ")"), + weight_category, + "\n" + ) + + httkpop <- list( + method = "vi", + gendernum = NULL, + agelim_years = agelim_years, + agelim_months = NULL, + weight_category = weight_category, + reths = c( + "Mexican American", + "Other Hispanic", + "Non-Hispanic White", + "Non-Hispanic Black", + "Other" + ) + ) + + mcs <- create_mc_samples( + chem.cas = chem.cas, + samples = samples, + httkpop.generate.arg.list = httkpop, + suppress.messages = TRUE + ) + + css <- calc_analytic_css( + chem.cas = chem.cas, + parameters = mcs, + model = "3compartmentss", + suppress.messages = TRUE + ) + + list(css) +} + +######################################## +# Simulate Css values +simulated_css <- lapply(in_chems, function(casrn) { + pop_demo %>% + rowwise() %>% + mutate( + css = simulate_css(.env$casrn, age_group, weight, .env$MC_iter) + ) %>% + ungroup() +}) +simulated_css <- setNames(simulated_css, in_chems) + +######################################## +# Compute median Css values for different strata + +# Get median Css values for each age_group +simulated_css <- lapply( + simulated_css, + function(cas_df) { + cas_df %>% + nest(.by = age_group) %>% + mutate( + age_median_css = sapply(data, function(df) median(unlist(df$css))) + ) %>% + unnest(data) + } +) + +# Get median Css values for each weight +simulated_css <- lapply( + simulated_css, + function(cas_df) { + cas_df %>% + nest(.by = weight) %>% + mutate( + weight_median_css = sapply(data, function(df) median(unlist(df$css))) + ) %>% + unnest(data) %>% + arrange(age_min, weight) + } +) + +######################################## +# Create sensitivity data objects + +css_sensitivity_age <- lapply(age.by.county, function(county_age) { + do.call(cbind, lapply(simulated_css, function(cas_df) { + # Get age_median_css for corresponding county_age + age_df <- cas_df %>% distinct(age_min, age_median_css) + idx <- sapply( + county_age, + function(age) tail(which(age >= age_df$age_min), 1) + ) + age_df$age_median_css[idx] + })) +}) + +css_sensitivity_obesity <- lapply(obesity.by.county, function(county_weight) { + do.call(cbind, lapply(simulated_css, function(cas_df) { + # Get weight_median_css for corresponding county_weight + weight_df <- cas_df %>% distinct(weight, weight_median_css) + weight_df$weight_median_css[match(county_weight, weight_df$weight)] + })) +}) + +css_sensitivity_httk <- lapply(age.by.county, function(county_age) { + # TODO why round the median? + median_county_age <- round(median(county_age)) + do.call(cbind, lapply(simulated_css, function(cas_df) { + # Sample from "Normal" weight css values + css <- cas_df %>% + filter( + weight == "Normal", + median_county_age >= age_min + ) %>% + slice_tail(n = 1) %>% + pull(css) %>% unlist() + sample(css, length(county_age), replace = TRUE) + })) +}) + +#=============================================================================== +# Compare to GeoToxMIE +#=============================================================================== + +# modify lines 134-136 from "ncol = 41" to "ncol = length(in.chems)" +# modify line 159 from "j in 1:41" to "j in 1:length(in.chem)" + +set.seed(2345) + +MC.iter <- MC_iter +in.chems <- in_chems + +# Run lines 37-60, 66-120 + +all.equal( + css.list, + lapply( + lapply(simulated_css, "[[", "css"), + function(css) do.call(cbind, css) + ) +) + +# Run lines 128-200 + +all.equal( + css.sensitivity.age, + css_sensitivity_age, + check.attributes = FALSE +) + +all.equal( + css.sensitivity.obesity, + css_sensitivity_obesity, + check.attributes = FALSE +) + +all.equal( + css.sensitivity.httk, + css_sensitivity_httk, + check.attributes = FALSE +) ################################################################################ ################################################################################ From 87633a3e79ef167d223f515724b631d99d2527fa Mon Sep 17 00:00:00 2001 From: SkylarMarvel Date: Fri, 22 Dec 2023 10:04:17 -0700 Subject: [PATCH 2/5] Step 06 of sensitivity replication --- script-replication/sensitivity.R | 277 ++++++++++++++++++++++++++++++- 1 file changed, 276 insertions(+), 1 deletion(-) diff --git a/script-replication/sensitivity.R b/script-replication/sensitivity.R index b301041..30cdfbf 100644 --- a/script-replication/sensitivity.R +++ b/script-replication/sensitivity.R @@ -176,6 +176,8 @@ css_sensitivity_httk <- lapply(age.by.county, function(county_age) { })) }) +# TODO setNames to county FIPS for css_sensitivity_* + #=============================================================================== # Compare to GeoToxMIE #=============================================================================== @@ -512,4 +514,277 @@ all.equal(final.response.by.county, concentration_response) ################################################################################ ################################################################################ -# TODO +library(ggridges) +library(ggpubr) + +rm(list = ls()) + +########## +# Data +if (FALSE) { + + MC_iter <- 50 + n_county <- 20 + + load("~/dev/GeoTox/data/sensitivity_results_age_20220901.RData") + sensitivity.age <- final.response.by.county + load("~/dev/GeoTox/data/sensitivity_results_obesity_20220901.RData") + sensitivity.obesity <- final.response.by.county + load("~/dev/GeoTox/data/sensitivity_results_httk_20220901.RData") + sensitivity.httk <- final.response.by.county + load("~/dev/GeoTox/data/sensitivity_results_conc_resp_20220901.RData") + sensitivity.conc.resp <- final.response.by.county + load("~/dev/GeoTox/data/sensitivity_results_ext_conc_20220901.RData") + sensitivity.ext.conc <- final.response.by.county + load("~/dev/GeoTox/data/final_response_by_county_20220901.RData") + baseline <- final.response.by.county + + rm(final.response.by.county) + + get_subset <- function(x) { + lapply(x[1:n_county], \(df) df[1:MC_iter, ]) + } + + sensitivity.age <- get_subset(sensitivity.age) + sensitivity.obesity <- get_subset(sensitivity.obesity) + sensitivity.httk <- get_subset(sensitivity.httk) + sensitivity.conc.resp <- get_subset(sensitivity.conc.resp) + sensitivity.ext.conc <- get_subset(sensitivity.ext.conc) + baseline <- get_subset(baseline) + + save( + MC_iter, + n_county, + sensitivity.age, + sensitivity.obesity, + sensitivity.httk, + sensitivity.conc.resp, + sensitivity.ext.conc, + baseline, + file = "~/dev/GeoTox/outputs/sensitivity.RData" + ) +} else { + load("~/dev/GeoTox/outputs/sensitivity.RData") +} + +######################################## +# Create useful functions + +gather_results <- function(param) { + baseline_param <- switch( + param, + "GCA.Eff" = "GCA", + "IA.eff" = "IA", + "IA.HQ.10" = "HQ.10" + ) + colnames <- c( + "External Concentration", + "Toxicokinetic Parameters", + "Obesity", + "Age", + "Concentration-Response", + "Baseline" + ) + out <- cbind( + unlist(lapply(sensitivity.ext.conc, "[[", param)), + unlist(lapply(sensitivity.httk, "[[", param)), + unlist(lapply(sensitivity.obesity, "[[", param)), + unlist(lapply(sensitivity.age, "[[", param)), + unlist(lapply(sensitivity.conc.resp, "[[", param)), + unlist(lapply(baseline, "[[", baseline_param)) + ) + colnames(out) <- colnames + as.data.frame(out) %>% + pivot_longer(cols = everything()) %>% + mutate(name = factor(name, levels = colnames)) +} + +plot_gathered_results <- function(df, xlab = "", ylab = "", scale_x = TRUE) { + p <- df %>% + ggplot(aes(x = value, y = name, fill = name)) + + stat_density_ridges( + geom = "density_ridges_gradient", + calc_ecdf = TRUE, + quantiles = 4, + quantile_lines = FALSE + ) + + scale_fill_viridis_d(option = "C") + + theme(legend.position = "none") + + xlab(xlab) + + ylab(ylab) + + theme_minimal() + + coord_cartesian(clip = "off") + + theme( + text = element_text(size = 14), + legend.position="none", + axis.text = element_text(size = 14), + axis.title = element_text(size = 14) + ) + if (scale_x) { + p + scale_x_log10(labels = scales::label_math(10^.x, format = log10)) + } else { + p + } +} + +plot_gathered_results2 <- function(df, y = "y", xlab = "", ylab = "") { + df %>% + ggplot(aes(x = value, y = .env$y, fill = NA, color = name)) + + stat_density_ridges( + calc_ecdf = TRUE, + quantiles = 4, + quantile_lines = FALSE, + fill = NA, + linewidth = 1 + ) + + scale_x_log10(labels = scales::label_math(10^.x, format = log10)) + + scale_color_brewer(palette="Set2") + + theme(legend.position = "none") + + xlab(xlab) + + ylab(ylab) + + labs(color = 'Varying Parameter') + + theme_minimal() + + coord_cartesian(clip = "off") + + theme( + text = element_text(size = 14), + axis.text = element_text(size = 14), + axis.title = element_text(size = 14) + ) +} + +######################################## +# First set of plots + +df1 <- gather_results("GCA.Eff") +p1 <- plot_gathered_results( + df1, + xlab = paste( + "Z-score of Median Predicted Log2 Fold", + "Change mRNA Expression CYP1A1", + sep = "\n" + ), + ylab = "Varying Parameter" +) + +df2 <- gather_results("IA.eff") +p2 <- plot_gathered_results( + df2, + xlab = paste( + "Z-score of Median Predicted Log2 Fold", + "Change mRNA Expression CYP1A1", + sep = "\n" + ) +) + theme(axis.text.y = element_blank()) + +df3 <- gather_results("IA.HQ.10") +p3 <- plot_gathered_results( + df3, + xlab = paste( + "Meidan CYP1A1", + "Summed Risk Quotient", + sep = "\n" + ), + scale_x = FALSE +) + theme(axis.text.y = element_blank()) + +p1to3 <- ggarrange( + p1, p2, p3, + labels = c( "A", "B", "C"), + vjust = 1, + align = "h", + ncol = 3, nrow = 1, + widths = c(1, 0.5, 0.5), + font.label = list(size = 20, color = "black", face = "bold"), + common.legend = FALSE +) + +######################################## +# Second set of plots + +p4 <- plot_gathered_results2( + df2, + y = "CA/IA", + xlab = paste( + "Median Predicted Log2 Fold Change", + "mRNA Expression CYP1A1", + sep = "\n" + ) +) + +p5 <- plot_gathered_results2( + df3, + y = "RQ", + xlab = paste( + "Median CYP1A1", + "Summed Risk Quotient", + sep = "\n" + ) +) + +p4to5 <- ggarrange( + p4, p5, + labels = c("A", "B"), + vjust = 1, + align = "h", + ncol = 2, nrow = 1, + widths = c(0.5, 0.5), + font.label = list(size = 20, color = "black", face = "bold"), + common.legend = TRUE, + legend = "right" +) + +#=============================================================================== +# Compare to GeoToxMIE +#=============================================================================== + +library(reshape2) + +# Change scale_x_log10 +# comment out: +# scale_x_log10(labels = trans_format("log10", math_format(10^.x)))+ +# add line: +# scale_x_log10(labels = scales::label_math(10^.x, format = log10)) + +# +# Comment out any lines referencing "$X2" + +# Run lines 32-173 + +compare_gathered_data <- function(df_orig, df_new) { + all.equal( + df_orig %>% + mutate(name = as.character(Var2), value) %>% + select(name, value) %>% + arrange(name, value), + df_new %>% + mutate(name = as.character(name), value) %>% + arrange(name, value), + check.attributes = FALSE + ) +} + +compare_gathered_data(CR.melt, df1) +compare_gathered_data(CR.IA.melt, df2) +compare_gathered_data(HQ.IA.melt, df3) + +pdf("~/dev/GeoTox/outputs/temp1.1.pdf"); conc.resp.plot.GCA; invisible(dev.off()) +pdf("~/dev/GeoTox/outputs/temp1.2.pdf"); p1; invisible(dev.off()) + +pdf("~/dev/GeoTox/outputs/temp2.1.pdf"); conc.resp.plot.IA; invisible(dev.off()) +pdf("~/dev/GeoTox/outputs/temp2.2.pdf"); p2; invisible(dev.off()) + +pdf("~/dev/GeoTox/outputs/temp3.1.pdf"); HQ.plot.IA; invisible(dev.off()) +pdf("~/dev/GeoTox/outputs/temp3.2.pdf"); p3; invisible(dev.off()) + +pdf("~/dev/GeoTox/outputs/temp4.1.pdf"); composite; invisible(dev.off()) +pdf("~/dev/GeoTox/outputs/temp4.2.pdf"); p1to3; invisible(dev.off()) + +# Run lines 180-245 + +pdf("~/dev/GeoTox/outputs/temp5.1.pdf"); combined_plot; invisible(dev.off()) +pdf("~/dev/GeoTox/outputs/temp5.2.pdf"); p4; invisible(dev.off()) + +pdf("~/dev/GeoTox/outputs/temp6.1.pdf"); HQ_plot; invisible(dev.off()) +pdf("~/dev/GeoTox/outputs/temp6.2.pdf"); p5; invisible(dev.off()) + +pdf("~/dev/GeoTox/outputs/temp7.1.pdf"); composite2; invisible(dev.off()) +pdf("~/dev/GeoTox/outputs/temp7.2.pdf"); p4to5; invisible(dev.off()) From 8f5ba4bfca023cdf18989c452284731d9af48714 Mon Sep 17 00:00:00 2001 From: SkylarMarvel Date: Wed, 27 Dec 2023 11:20:27 -0700 Subject: [PATCH 3/5] Step 06 of cyp1a1-pipeline replication --- script-replication/cyp1a1-pipeline.R | 228 ++++++++++++++++++++++++++- 1 file changed, 227 insertions(+), 1 deletion(-) diff --git a/script-replication/cyp1a1-pipeline.R b/script-replication/cyp1a1-pipeline.R index dd903e7..3dae71c 100644 --- a/script-replication/cyp1a1-pipeline.R +++ b/script-replication/cyp1a1-pipeline.R @@ -577,4 +577,230 @@ all.equal( ################################################################################ ################################################################################ -# TODO +library(sf) +library(ggpubr) + +rm(list = ls()) + +########## +# Data +load("~/dev/GeoTox/data/final_response_by_county_20220901.RData") +load("~/dev/GeoTox/data/FIPS_by_county.RData") # FIPS shouldn't be needed +county_2014 <- st_read( + "~/dev/GeoTox/data/cb_2014_us_county_5m/cb_2014_us_county_5m.shp", + quiet = TRUE +) + +states <- st_as_sf(maps::map("state", plot = FALSE, fill = TRUE)) + +county <- county_2014 %>% + filter(!(STATEFP %in% c("02", "15", "60", "66", "69", "72", "78"))) %>% + mutate(FIPS = str_c(STATEFP, COUNTYFP), .before = 1) + +######################################## +# response_by_county + +# TODO list should already have names() == FIPS +# Make sure leading 0s are there for joining with county +names(final.response.by.county) <- sprintf("%05d", FIPS) + +response_by_county <- tibble( + FIPS = names(final.response.by.county), + data = final.response.by.county +) %>% + unnest(cols = data) %>% + pivot_longer(-FIPS, names_to = "health_measure") %>% + mutate( + health_measure = factor( + health_measure, + levels = c("GCA", "IA", "HQ.10") + ) + ) + +######################################## +# IVIVE summary + +# Summarize within counties +ivive_summary_df <- response_by_county %>% + summarize( + median = median(value , na.rm = TRUE), + x95_quantile = quantile(value, 0.95, na.rm = TRUE, names = FALSE), + x5_quantile = quantile(value, 0.05, na.rm = TRUE, names = FALSE), + .by = c(FIPS, health_measure) + ) + +ivive_summary_df_stack <- ivive_summary_df %>% + pivot_longer( + cols = c(median, x95_quantile, x5_quantile), + names_to = "variable" + ) %>% + mutate( + variable = factor( + variable, + levels = c("x5_quantile", "median", "x95_quantile") + ) + ) + +# Summarize across counties +ivive_summary_stats <- ivive_summary_df_stack %>% + summarize( + min = min(value), + median = median(value), + max = max(value), + .by = c(variable, health_measure)) + +######################################## +# Health measure histogram + +hist_HM <- ggplot(ivive_summary_df_stack, aes(x = log10(value))) + + geom_histogram(bins = 500) + + facet_grid( + health_measure ~ variable, + labeller = labeller( + health_measure = c( + "GCA" = "CA Response", + "IA" = "IA Response", + "HQ.10" = "RQ" + ), + variable = c( + "x5_quantile" = "5th Percentile", + "median" = "Median", + "x95_quantile" = "95th Percentile" + ) + ) + ) + + theme_minimal() + + ylab("Count") + + xlab("Log10 Risk Metric Value") + +######################################## +# County heatmaps + +ivive_county_sf <- ivive_summary_df_stack %>% + left_join(county, by = join_by(FIPS), keep = FALSE) %>% + st_as_sf() %>% + st_zm() # Was having errors until adding this + +make_county_heatmap <- function(df, legend_name) { + ggplot(df, aes(fill = value)) + + # Plot fill, hide county borders + # geom_sf(lwd = 0) + # This still showed up in .pdf images + geom_sf(color = NA) + + # Add state borders + geom_sf(data = states, fill = NA, size = 0.15) + + # Create separate plots for each variable + facet_wrap( + ~variable, + ncol = 3, + labeller = labeller( + variable = c( + "x5_quantile" = "5th Percentile", + "median" = "Median", + "x95_quantile" = "95th Percentile" + ) + ) + ) + + # Add fill scale + scale_fill_viridis_c( + name = legend_name, + direction = -1, + option = "A", + trans = "sqrt" + ) + + # Theme + theme_bw() + + theme( + text = element_text(size = 12), + legend.text = element_text(size = 8) + ) +} + +GCA_Eff_plot <- make_county_heatmap( + ivive_county_sf %>% filter(health_measure == "GCA"), + paste("Predicted Response", "Log2 Fold Change", "mRNA Expression",sep = "\n") +) + +IA_Eff_plot <- make_county_heatmap( + ivive_county_sf %>% filter(health_measure == "IA"), + paste("Predicted Response", "Log2 Fold Change", "mRNA Expression",sep = "\n") +) + +HQ_10_plot <- make_county_heatmap( + ivive_county_sf %>% filter(health_measure == "HQ.10"), + "Risk Quotient" +) + +all_plots <- ggarrange( + GCA_Eff_plot , IA_Eff_plot, HQ_10_plot, + labels = c( "A", "B", "C"), + vjust = 1, + align = "v", + nrow = 3, + font.label = list(size = 20, color = "black", face = "bold"), + common.legend = FALSE +) + +#=============================================================================== +# Compare to GeoToxMIE +#=============================================================================== + +library(reshape2) + +load("~/dev/GeoTox/data/final_response_by_county_20220901.RData") + +# Run 25-27, 35-80, 85-97 + +all.equal( + response.by.county %>% + select(health_measure, value), + response_by_county %>% + arrange(as.numeric(FIPS), health_measure) %>% + select(health_measure, value), + check.attributes = FALSE +) + +all.equal( + summary_stats, + ivive_summary_stats %>% + arrange(variable, health_measure) %>% + select(variable, health_measure, median, min, max), + check.attributes = FALSE +) + +pdf("~/dev/GeoTox/outputs/cyp1a1_hist_orig.pdf") +histogram_HM +invisible(dev.off()) + +pdf("~/dev/GeoTox/outputs/cyp1a1_hist_new.pdf") +hist_HM +invisible(dev.off()) + +# Run lines 107-108 + +all.equal( + ivive_county_cyp1a1_up_sf %>% + arrange(STATEFP, COUNTYFP, health_measure, variable) %>% + pull(value), + ivive_county_sf %>% + arrange(STATEFP, COUNTYFP, health_measure, variable) %>% + pull(value) +) + +# Needed to add st_zm(), otherwise plot would have error +ivive_county_cyp1a1_up_sf <- ivive_county_cyp1a1_up_sf %>% st_zm() + +# Run lines 110-112, 130, 157, 170 + +ggsave( + "~/dev/GeoTox/outputs/cyp1a1_heatmap_orig.tiff", + composite_plot, width = 40, height = 25, units = "cm", dpi = 300 +) + +ggsave( + "~/dev/GeoTox/outputs/cyp1a1_heatmap_new.tiff", + all_plots, width = 40, height = 25, units = "cm", dpi = 300 +) + +pdf("~/dev/GeoTox/outputs/cyp1a1_heatmap_new.pdf", width = 15.75, height = 10) +all_plots +invisible(dev.off()) From 5626f75e51d85e5b9064d2adccd001193a798968 Mon Sep 17 00:00:00 2001 From: SkylarMarvel Date: Fri, 29 Dec 2023 11:44:47 -0700 Subject: [PATCH 4/5] Step 01 of cyp1a1-figures replication --- script-replication/cyp1a1-figures.R | 323 ++++++++++++++++++++++++++++ 1 file changed, 323 insertions(+) create mode 100644 script-replication/cyp1a1-figures.R diff --git a/script-replication/cyp1a1-figures.R b/script-replication/cyp1a1-figures.R new file mode 100644 index 0000000..16c8d67 --- /dev/null +++ b/script-replication/cyp1a1-figures.R @@ -0,0 +1,323 @@ +library(devtools) +library(tidyverse) + +################################################################################ +################################################################################ +# 01-CYP1A1-figures.R +################################################################################ +################################################################################ + +library(sf) +library(ggpubr) + +rm(list = ls()) + +########## +# Data + +# TODO should the 112 be kept (i.e. "Benzidine 112")? It is in the MIE script +# fit_params is from step 2 of conc-resp.R +fit_params <- readRDS("~/dev/GeoTox/outputs/fit_params.rds") %>% + mutate( + chnm = case_when( + chnm == "C.I. Disperse Black 6" ~ "3,3'-Dimethoxybenzidine", + chnm == "C.I. Azoic Diazo Component 112" ~ "Benzidine", + .default = chnm + ) + ) + +nata_df <- read.csv("~/dev/GeoTox/data/2014_NATA_CONCS.csv") +nata_chemicals <- read.csv("~/dev/GeoTox/data/NATA_pollutant_names_casrn.csv") + +# TODO different ice data source files? +# The "ice_data" in [01-02]-Conc-Response-Fit.R is from: +# load("~/dev/GeoTox/data/LTEA_HepaRG_CYP1A1_up 41 chems for Kyle 220131.RData") +load("~/dev/GeoTox/data/210105_ICE_cHTS_invitrodbv33.Rdata") +ice_data <- subset(ice_data, new_hitc == 1) + +states <- st_as_sf(maps::map("state", plot = FALSE, fill = TRUE)) + +county_2014 <- st_read( + "~/dev/GeoTox/data/cb_2014_us_county_5m/cb_2014_us_county_5m.shp", + quiet = TRUE +) + +county <- county_2014 %>% + filter(!(STATEFP %in% c("02", "15", "60", "66", "69", "72", "78"))) %>% + mutate(FIPS = str_c(STATEFP, COUNTYFP), .before = 1) %>% + st_zm() %>% + # TODO why simplify here but not in cyp1a1-pipeline? + # "preserveTopology = FALSE" was dropped + st_simplify(dTolerance = 1000) + +nata_chemicals_2 <- nata_chemicals %>% + mutate( + web_name = str_to_title(web_name), + web_name = str_replace(web_name, "\\s+\\([^\\)]+\\)", "") + ) + +######################################## +# Optional, the objects aren't used later on + +# TODO same casrn but different names +x <- nata_chemicals_2 %>% distinct(casrn, web_name) %>% filter(!is.na(casrn)) +(x <- x$casrn[duplicated(x$casrn)]) +nata_chemicals_2 %>% filter(casrn %in% x) + +nata_tox21_2 <- inner_join( + ice_data, + nata_chemicals_2, + join_by(casn == casrn), + relationship = "many-to-many" # due to duplicate casrn +) + +nata_tox21_count_2 <- nata_tox21_2 %>% + summarize(chemical_count = n(), .by = aenm) %>% + rename("assay name" = aenm) + +######################################## +# Aggregate NATA data + +norm_fn = function(x) {(x - min(x)) / (max(x) - min(x))} + +nata_county_stack_2 <- nata_df %>% + # Aggregate by county + summarize(across(ACETALD:XYLENES, mean), .by = STCOFIPS) %>% + pivot_longer(-STCOFIPS) %>% + rename(FIPS = STCOFIPS, chemical = name, concentration = value) %>% + # Add casrn and web_name + left_join(nata_chemicals_2, by = join_by(chemical == smoke_name)) %>% + # Limit to chemicals with fit data (MIE line 106) + filter(casrn %in% fit_params$casn) %>% + # Remove any missing data + na.omit() %>% + # Normalize air concentrations + # This was very convoluted in the MIE script (MIE lines 89-95) + group_by(chemical) %>% + mutate(concentration_norm = norm_fn(concentration)) %>% + ungroup() + +nata_county_cyp1a1_up_2 <- nata_county_stack_2 %>% + left_join( + ice_data %>% filter(aenm == "LTEA_HepaRG_CYP1A1_up"), + by = join_by(casrn == casn) + ) %>% + mutate(conc_gt_0 = as.integer(concentration > 0)) + +######################################## +# heatmap_cyp1a1_up_plot + +heatmap_df_2 <- nata_county_cyp1a1_up_2 %>% + left_join(fit_params, by = join_by(casrn == casn, chnm)) %>% + summarize(mean = mean(10^logAC50), .by = c(aenm, chnm)) + +heatmap_cyp1a1_up_plot <- ggplot( + heatmap_df_2, + aes(x = fct_reorder(chnm, mean), y = aenm, fill = mean) +) + + geom_tile() + + theme_bw() + + scale_fill_viridis_c(direction = 1, option = "A") + + labs(y = "Assay", x = "Chemical", fill = "EC50 (μM)") + + theme( + axis.text.x = element_text(angle = 45, hjust = 1), + axis.text.y = element_blank(), + axis.title.y = element_blank(), + axis.ticks.y = element_blank(), + legend.title=element_text(size = 14), + text = element_text(size = 18), + aspect.ratio = 1.5 / 10, + plot.margin = margin(10, 10, 10, 100) + ) + +######################################## +# cyp1a1_up_all_plot + +cyp1a1_up_nata_county_sf_2 <- nata_county_cyp1a1_up_2 %>% + left_join(fit_params, by = join_by(casrn == casn, chnm)) %>% + right_join(county %>% mutate(FIPS = as.numeric(FIPS)), by = join_by(FIPS)) %>% + st_as_sf() + +cyp1a1_up_all_plot <- ggplot( + cyp1a1_up_nata_county_sf_2, + aes(fill = concentration_norm) +) + + geom_sf(color = NA) + + facet_wrap(vars(fct_reorder(chnm, logAC50)), ncol = 7) + + # Recolor subset as light grey + geom_sf( + data = cyp1a1_up_nata_county_sf_2 %>% filter(concentration == 0), + aes(fill = concentration), + fill = "light grey", + color = "light grey", + lwd = 0.01 + ) + + # State borders + geom_sf(data = states, fill = NA, lwd = 0.15) + + scale_fill_viridis_c( + name = "Normalized Concentration", + direction = -1, + option = "A" + ) + + theme_bw() + + theme( + axis.ticks.x = element_blank(), + axis.title.x = element_blank(), + axis.text.x = element_blank(), + axis.ticks.y = element_blank(), + axis.title.y = element_blank(), + axis.text.y = element_blank(), + legend.text = element_text(angle = 45), + legend.position = "bottom", + strip.text = element_text(size = 10), + text = element_text(size = 18) + ) + +######################################## +# cyp1a1_up_all_plot with relative concentration scales + +cyp1a1_up_all_relative_plot <- lapply( + unique(cyp1a1_up_nata_county_sf_2$chnm), + function(x) cyp1a1_up_nata_county_sf_2 %>% + filter(web_name == x) %>% + ggplot(aes(fill = concentration)) + + geom_sf(color = NA) + + theme_bw() + + geom_sf(data = states, fill = NA, size = 0.15) + + scale_fill_viridis_c( + name = "Conc. (ug/m3)", + direction = -1, + option = "A" + ) + + theme_bw() + + theme(axis.text = element_blank(), axis.ticks = element_blank()) + + ggtitle(x) +) + +plot2_2 <- cowplot::plot_grid( + plotlist = cyp1a1_up_all_relative_plot, label_size = 12, ncol = 8 +) + +pdf("~/dev/GeoTox/outputs/cyp1a1_figures_1_facet.pdf", width = 24, height = 20) +plot2_2 +invisible(dev.off()) + +######################################## +# count_cyp1a1_up_map + +count_county_cyp1a1_up_sf_2 <- nata_county_cyp1a1_up_2 %>% + summarize(count = sum(conc_gt_0), .by = FIPS) %>% + right_join(county %>% mutate(FIPS = as.numeric(FIPS)), by = join_by(FIPS)) %>% + st_as_sf() + +count_cyp1a1_up_map_2 <- ggplot( + count_county_cyp1a1_up_sf_2, + aes(fill = count) +) + + geom_sf(color = NA) + + theme_bw() + + geom_sf(data = states, fill = NA, size=0.15) + + scale_fill_viridis_c(name = "# Chemicals", direction = -1, option = "A") + + theme( + axis.ticks.x = element_blank(), + axis.text.x = element_blank(), + axis.ticks.y = element_blank(), + axis.text.y = element_blank(), + legend.position = "right", + text = element_text(size = 18), + aspect.ratio = 5 / 10 + ) + +######################################## +# combo + +cyp1a1_combo <- ggarrange( + cyp1a1_up_all_plot, count_cyp1a1_up_map_2, heatmap_cyp1a1_up_plot, + labels = c("A", "B", "C"), + widths = c(1, 0.65, 0.5), + ncol = 1, nrow = 3, + heights = c(1, 0.65, 0.5), + align = "v", + font.label = list(size = 20, color = "black", face = "bold"), + common.legend = FALSE +) + +pdf("~/dev/GeoTox/outputs/cyp1a1_figures_1.pdf", width = 17.7, height = 21.7) +cyp1a1_combo +invisible(dev.off()) + +ggsave( + "~/dev/GeoTox/outputs/cyp1a1_figures_1.tiff", + cyp1a1_combo, width = 45, height = 55, units = "cm", dpi = 300 +) + +# TODO failes to create plot +sjPlot::save_plot( + "~/dev/GeoTox/outputs/cyp1a1_figures_1_sjPlot.tif", + cyp1a1_combo, width = 45, height = 55, dpi = 300 +) + +#=============================================================================== +# Compare to GeoToxMIE +#=============================================================================== + +library(reshape2) + +load("~/dev/GeoTox/data/Hill_2param_model_fit.RData") +hill2.fit <- df.params; rm(df.params) + +# Run lines 24-27 + +hill2.fit$chnm <- str_replace_all(hill2.fit$chnm, "Benzidine 112", "Benzidine") + +all.equal( + hill2.fit, + fit_params %>% select(!ends_with("imputed")), + check.attributes = FALSE +) + +# Run lines 49, 56-57 + +# Optional run line 59-71, these objects are not used later + +all.equal( + nata_tox21 %>% arrange(casn, aeid, smoke_name) %>% select(-presence), + nata_tox21_2 %>% arrange(casn, aeid, smoke_name), + check.attributes = FALSE +) + +all.equal( + nata_tox21_count %>% arrange(`assay name`), + nata_tox21_count_2 %>% arrange(`assay name`) +) + +# Run lines 76-106 + +all.equal( + nata_county_stack %>% arrange(FIPS, casrn), + nata_county_stack_2 %>% arrange(FIPS, casrn), + check.attributes = FALSE +) + +# Run lines 109-125 + +all.equal( + summary(heatmap_df$mean), + summary(heatmap_df_2$mean) +) + +# Skip remaining lines + +# Compare +# +# "~/dev/GeoTox/outputs/cyp1a1_figures_1.pdf" +# vs +# "Figure3_v20220420.tif" + +################################################################################ +################################################################################ +# 02-CYP1A1-figures.R +################################################################################ +################################################################################ + +# TODO From 21932ed4039bd9fd5295604c4e56bd8fd31ffcf6 Mon Sep 17 00:00:00 2001 From: SkylarMarvel Date: Fri, 5 Jan 2024 13:43:31 -0700 Subject: [PATCH 5/5] Step 02 of cyp1a1-figures replication --- script-replication/cyp1a1-figures.R | 99 +++++++++++++++++++++++++++-- 1 file changed, 94 insertions(+), 5 deletions(-) diff --git a/script-replication/cyp1a1-figures.R b/script-replication/cyp1a1-figures.R index 16c8d67..374ed8b 100644 --- a/script-replication/cyp1a1-figures.R +++ b/script-replication/cyp1a1-figures.R @@ -45,10 +45,7 @@ county_2014 <- st_read( county <- county_2014 %>% filter(!(STATEFP %in% c("02", "15", "60", "66", "69", "72", "78"))) %>% mutate(FIPS = str_c(STATEFP, COUNTYFP), .before = 1) %>% - st_zm() %>% - # TODO why simplify here but not in cyp1a1-pipeline? - # "preserveTopology = FALSE" was dropped - st_simplify(dTolerance = 1000) + st_zm() nata_chemicals_2 <- nata_chemicals %>% mutate( @@ -320,4 +317,96 @@ all.equal( ################################################################################ ################################################################################ -# TODO +rm(list = ls()) + +########## +# Data +load("~/dev/GeoTox/data/LTEA_HepaRG_CYP1A1_up 41 chems for Kyle 220131.RData") +ice_data <- cdat; rm(cdat) +fit_params <- readRDS("~/dev/GeoTox/outputs/fit_params.rds") + + +######################################## +# Create plot + +# TODO export tcplHillVal? +tcplHillVal <- function(conc, tp, ga, gw, bt = 0) { + bt + (tp - bt) / (1 + (ga / conc)^gw) +} + +x <- 10^seq(-3, 3, length.out = 100) + +df <- fit_params %>% + rowwise() %>% + mutate( + curves = list( + data.frame( + x = .env$x, + y_mean = tcplHillVal(.env$x, tp, 10^logAC50, 1), + # TODO both upper and lower had "-" for AC50 + # Is there a standard approach for computing fitted value confidence + # intervals from optim outputs? + y_lower = tcplHillVal( + .env$x, tp - tp.sd * 1.96, 10^(logAC50 + logAC50.sd * 1.96), 1 + ), + y_upper = tcplHillVal( + .env$x, tp + tp.sd * 1.96, 10^(logAC50 - logAC50.sd * 1.96), 1 + ) + ) + ) + ) %>% + ungroup() %>% + select(chnm, curves) %>% + unnest(cols = curves) + +p <- ggplot(df, aes(x = x)) + + geom_line(aes(y = y_lower), color = "red") + + geom_line(aes(y = y_upper), color = "red") + + geom_line(aes(y = y_mean), color = "black", lwd = 1) + + geom_point( + data = ice_data %>% mutate(x = 10^logc, y = resp), + aes(x = x, y = y) + ) + + facet_wrap(~chnm, scales = "free") + + scale_x_log10(labels = scales::label_math(10^.x, format = log10)) + + theme_bw() + + theme( + legend.position = "none", + text = element_text(size = 16) + )+ + ylab("Response") + + xlab("Concentration") + +#=============================================================================== +# Compare to GeoToxMIE +#=============================================================================== + +library(reshape2) + +hill2 <- fit_params + +# Run lines 36, 51-162, 172 + +SI_figure <- ggplot()+ + geom_line(data = m, aes(x = x, y = lower), color="red")+ + geom_line(data = m, aes(x = x, y = upper), color="red")+ + geom_line(data = m, aes(x = x, y = mean), color="black", lwd = 1)+ + geom_point( + data = ice_data %>% mutate(x = 10^logc, y = resp), + aes(x = x, y = y) + ) + + facet_wrap(vars(chnm), scales= "free")+ + scale_x_log10(labels = scales::label_math(10^.x, format = log10)) + + theme_bw()+ + theme(legend.position = "none", + text = element_text(size = 16))+ + ylab("Response")+ + xlab("Concentration") + +pdf("~/dev/GeoTox/outputs/SI_dr.pdf", width = 20, heigh = 16) +SI_figure +invisible(dev.off()) + +pdf("~/dev/GeoTox/outputs/SI_dr_new.pdf", width = 20, heigh = 16) +p +invisible(dev.off())