From 625a66c1b92cb588fe0ce8ac2c53110b2f4ede73 Mon Sep 17 00:00:00 2001
From: SkylarMarvel <skylar.marvel@gmail.com>
Date: Thu, 21 Dec 2023 09:49:18 -0700
Subject: [PATCH 1/5] Step 00 of sensitivity replication

---
 script-replication/sensitivity.R | 211 ++++++++++++++++++++++++++++++-
 1 file changed, 210 insertions(+), 1 deletion(-)

diff --git a/script-replication/sensitivity.R b/script-replication/sensitivity.R
index c533b66..b301041 100644
--- a/script-replication/sensitivity.R
+++ b/script-replication/sensitivity.R
@@ -7,7 +7,216 @@ library(tidyverse)
 ################################################################################
 ################################################################################
 
-# TODO
+library(httk)
+
+rm(list = ls())
+
+set.seed(2345)
+
+MC_iter <- 10
+n_chem <- 2
+n_county <- 5
+
+##########
+# Data
+load("~/dev/GeoTox/data/age_by_county_20220228.RData")
+load("~/dev/GeoTox/data/obesity_by_county_20220228.RData")
+
+age.by.county <- lapply(age.by.county[1:n_county], \(x) x[1:MC_iter])
+obesity.by.county <- lapply(obesity.by.county[1:n_county], \(x) x[1:MC_iter])
+
+in_chems <- c(
+  "98-86-2", "92-87-5", "92-52-4", "117-81-7", "133-06-2", "532-27-4",
+  "133-90-4", "57-74-9", "510-15-6", "94-75-7", "64-67-5", "132-64-9",
+  "106-46-7", "111-44-4", "79-44-7", "131-11-3", "77-78-1", "119-90-4",
+  "121-14-2", "534-52-1", "51-28-5", "121-69-7", "107-21-1", "51-79-6",
+  "76-44-8", "822-06-0", "77-47-4", "123-31-9", "72-43-5", "101-77-9",
+  "56-38-2", "82-68-8", "87-86-5", "1120-71-4", "114-26-1", "91-22-5",
+  "96-09-3", "95-80-7", "584-84-9", "95-95-4", "1582-09-8"
+)[1:n_chem]
+
+########################################
+# Define population demographics for httk simulation
+pop_demo <- cross_join(
+  tibble(
+    age_group = list(
+      c(0, 2), c(3, 5), c(6, 10), c(11, 15), c(16, 20), c(21, 30),
+      c(31, 40), c(41, 50), c(51, 60), c(61, 70), c(71, 100)
+    )
+  ),
+  tibble(
+    weight = c("Normal", "Obese")
+  )
+) %>%
+  rowwise() %>%
+  mutate(age_min = age_group[1]) %>%
+  ungroup()
+
+########################################
+# Create wrapper function around httk steps
+simulate_css <- function(chem.cas, agelim_years, weight_category, samples) {
+
+  cat(
+    chem.cas,
+    paste0("(", paste(agelim_years, collapse = ", "), ")"),
+    weight_category,
+    "\n"
+  )
+
+  httkpop <- list(
+    method = "vi",
+    gendernum = NULL,
+    agelim_years = agelim_years,
+    agelim_months = NULL,
+    weight_category = weight_category,
+    reths = c(
+      "Mexican American",
+      "Other Hispanic",
+      "Non-Hispanic White",
+      "Non-Hispanic Black",
+      "Other"
+    )
+  )
+
+  mcs <- create_mc_samples(
+    chem.cas = chem.cas,
+    samples = samples,
+    httkpop.generate.arg.list = httkpop,
+    suppress.messages = TRUE
+  )
+
+  css <- calc_analytic_css(
+    chem.cas = chem.cas,
+    parameters = mcs,
+    model = "3compartmentss",
+    suppress.messages = TRUE
+  )
+
+  list(css)
+}
+
+########################################
+# Simulate Css values
+simulated_css <- lapply(in_chems, function(casrn) {
+  pop_demo %>%
+    rowwise() %>%
+    mutate(
+      css = simulate_css(.env$casrn, age_group, weight, .env$MC_iter)
+    ) %>%
+    ungroup()
+})
+simulated_css <- setNames(simulated_css, in_chems)
+
+########################################
+# Compute median Css values for different strata
+
+# Get median Css values for each age_group
+simulated_css <- lapply(
+  simulated_css,
+  function(cas_df) {
+    cas_df %>%
+      nest(.by = age_group) %>%
+      mutate(
+        age_median_css = sapply(data, function(df) median(unlist(df$css)))
+      ) %>%
+      unnest(data)
+  }
+)
+
+# Get median Css values for each weight
+simulated_css <- lapply(
+  simulated_css,
+  function(cas_df) {
+    cas_df %>%
+      nest(.by = weight) %>%
+      mutate(
+        weight_median_css = sapply(data, function(df) median(unlist(df$css)))
+      ) %>%
+      unnest(data) %>%
+      arrange(age_min, weight)
+  }
+)
+
+########################################
+# Create sensitivity data objects
+
+css_sensitivity_age <- lapply(age.by.county, function(county_age) {
+  do.call(cbind, lapply(simulated_css, function(cas_df) {
+    # Get age_median_css for corresponding county_age
+    age_df <- cas_df %>% distinct(age_min, age_median_css)
+    idx <- sapply(
+      county_age,
+      function(age) tail(which(age >= age_df$age_min), 1)
+    )
+    age_df$age_median_css[idx]
+  }))
+})
+
+css_sensitivity_obesity <- lapply(obesity.by.county, function(county_weight) {
+  do.call(cbind, lapply(simulated_css, function(cas_df) {
+    # Get weight_median_css for corresponding county_weight
+    weight_df <- cas_df %>% distinct(weight, weight_median_css)
+    weight_df$weight_median_css[match(county_weight, weight_df$weight)]
+  }))
+})
+
+css_sensitivity_httk <- lapply(age.by.county, function(county_age) {
+  # TODO why round the median?
+  median_county_age <- round(median(county_age))
+  do.call(cbind, lapply(simulated_css, function(cas_df) {
+    # Sample from "Normal" weight css values
+    css <- cas_df %>%
+      filter(
+        weight == "Normal",
+        median_county_age >= age_min
+      ) %>%
+      slice_tail(n = 1) %>%
+      pull(css) %>% unlist()
+    sample(css, length(county_age), replace = TRUE)
+  }))
+})
+
+#===============================================================================
+# Compare to GeoToxMIE
+#===============================================================================
+
+# modify lines 134-136 from "ncol = 41" to "ncol = length(in.chems)"
+# modify line 159 from "j in 1:41" to "j in 1:length(in.chem)"
+
+set.seed(2345)
+
+MC.iter <- MC_iter
+in.chems <- in_chems
+
+# Run lines 37-60, 66-120
+
+all.equal(
+  css.list,
+  lapply(
+    lapply(simulated_css, "[[", "css"),
+    function(css) do.call(cbind, css)
+  )
+)
+
+# Run lines 128-200
+
+all.equal(
+  css.sensitivity.age,
+  css_sensitivity_age,
+  check.attributes = FALSE
+)
+
+all.equal(
+  css.sensitivity.obesity,
+  css_sensitivity_obesity,
+  check.attributes = FALSE
+)
+
+all.equal(
+  css.sensitivity.httk,
+  css_sensitivity_httk,
+  check.attributes = FALSE
+)
 
 ################################################################################
 ################################################################################

From 87633a3e79ef167d223f515724b631d99d2527fa Mon Sep 17 00:00:00 2001
From: SkylarMarvel <skylar.marvel@gmail.com>
Date: Fri, 22 Dec 2023 10:04:17 -0700
Subject: [PATCH 2/5] Step 06 of sensitivity replication

---
 script-replication/sensitivity.R | 277 ++++++++++++++++++++++++++++++-
 1 file changed, 276 insertions(+), 1 deletion(-)

diff --git a/script-replication/sensitivity.R b/script-replication/sensitivity.R
index b301041..30cdfbf 100644
--- a/script-replication/sensitivity.R
+++ b/script-replication/sensitivity.R
@@ -176,6 +176,8 @@ css_sensitivity_httk <- lapply(age.by.county, function(county_age) {
   }))
 })
 
+# TODO setNames to county FIPS for css_sensitivity_*
+
 #===============================================================================
 # Compare to GeoToxMIE
 #===============================================================================
@@ -512,4 +514,277 @@ all.equal(final.response.by.county, concentration_response)
 ################################################################################
 ################################################################################
 
-# TODO
+library(ggridges)
+library(ggpubr)
+
+rm(list = ls())
+
+##########
+# Data
+if (FALSE) {
+
+  MC_iter <- 50
+  n_county <- 20
+
+  load("~/dev/GeoTox/data/sensitivity_results_age_20220901.RData")
+  sensitivity.age <- final.response.by.county
+  load("~/dev/GeoTox/data/sensitivity_results_obesity_20220901.RData")
+  sensitivity.obesity <- final.response.by.county
+  load("~/dev/GeoTox/data/sensitivity_results_httk_20220901.RData")
+  sensitivity.httk <- final.response.by.county
+  load("~/dev/GeoTox/data/sensitivity_results_conc_resp_20220901.RData")
+  sensitivity.conc.resp <- final.response.by.county
+  load("~/dev/GeoTox/data/sensitivity_results_ext_conc_20220901.RData")
+  sensitivity.ext.conc <- final.response.by.county
+  load("~/dev/GeoTox/data/final_response_by_county_20220901.RData")
+  baseline <- final.response.by.county
+
+  rm(final.response.by.county)
+
+  get_subset <- function(x) {
+    lapply(x[1:n_county], \(df) df[1:MC_iter, ])
+  }
+
+  sensitivity.age       <- get_subset(sensitivity.age)
+  sensitivity.obesity   <- get_subset(sensitivity.obesity)
+  sensitivity.httk      <- get_subset(sensitivity.httk)
+  sensitivity.conc.resp <- get_subset(sensitivity.conc.resp)
+  sensitivity.ext.conc  <- get_subset(sensitivity.ext.conc)
+  baseline              <- get_subset(baseline)
+
+  save(
+    MC_iter,
+    n_county,
+    sensitivity.age,
+    sensitivity.obesity,
+    sensitivity.httk,
+    sensitivity.conc.resp,
+    sensitivity.ext.conc,
+    baseline,
+    file = "~/dev/GeoTox/outputs/sensitivity.RData"
+  )
+} else {
+  load("~/dev/GeoTox/outputs/sensitivity.RData")
+}
+
+########################################
+# Create useful functions
+
+gather_results <- function(param) {
+  baseline_param <- switch(
+    param,
+    "GCA.Eff"  = "GCA",
+    "IA.eff"   = "IA",
+    "IA.HQ.10" = "HQ.10"
+  )
+  colnames <- c(
+    "External Concentration",
+    "Toxicokinetic Parameters",
+    "Obesity",
+    "Age",
+    "Concentration-Response",
+    "Baseline"
+  )
+  out <- cbind(
+    unlist(lapply(sensitivity.ext.conc, "[[", param)),
+    unlist(lapply(sensitivity.httk, "[[", param)),
+    unlist(lapply(sensitivity.obesity, "[[", param)),
+    unlist(lapply(sensitivity.age, "[[", param)),
+    unlist(lapply(sensitivity.conc.resp, "[[", param)),
+    unlist(lapply(baseline, "[[", baseline_param))
+  )
+  colnames(out) <- colnames
+  as.data.frame(out) %>%
+    pivot_longer(cols = everything()) %>%
+    mutate(name = factor(name, levels = colnames))
+}
+
+plot_gathered_results <- function(df, xlab = "", ylab = "", scale_x = TRUE) {
+  p <- df %>%
+    ggplot(aes(x = value, y = name, fill = name)) +
+    stat_density_ridges(
+      geom = "density_ridges_gradient",
+      calc_ecdf = TRUE,
+      quantiles = 4,
+      quantile_lines = FALSE
+    ) +
+    scale_fill_viridis_d(option = "C") +
+    theme(legend.position = "none") +
+    xlab(xlab) +
+    ylab(ylab) +
+    theme_minimal() +
+    coord_cartesian(clip = "off") +
+    theme(
+      text = element_text(size = 14),
+      legend.position="none",
+      axis.text = element_text(size = 14),
+      axis.title = element_text(size = 14)
+    )
+  if (scale_x) {
+    p + scale_x_log10(labels = scales::label_math(10^.x, format = log10))
+  } else {
+    p
+  }
+}
+
+plot_gathered_results2 <- function(df, y = "y", xlab = "", ylab = "") {
+  df %>%
+    ggplot(aes(x = value, y = .env$y, fill = NA, color = name)) +
+    stat_density_ridges(
+      calc_ecdf = TRUE,
+      quantiles = 4,
+      quantile_lines = FALSE,
+      fill = NA,
+      linewidth = 1
+    ) +
+    scale_x_log10(labels = scales::label_math(10^.x, format = log10)) +
+    scale_color_brewer(palette="Set2") +
+    theme(legend.position = "none") +
+    xlab(xlab) +
+    ylab(ylab) +
+    labs(color = 'Varying Parameter') +
+    theme_minimal() +
+    coord_cartesian(clip = "off") +
+    theme(
+      text = element_text(size = 14),
+      axis.text = element_text(size = 14),
+      axis.title = element_text(size = 14)
+    )
+}
+
+########################################
+# First set of plots
+
+df1 <- gather_results("GCA.Eff")
+p1 <- plot_gathered_results(
+  df1,
+  xlab = paste(
+    "Z-score of Median Predicted Log2 Fold",
+    "Change mRNA Expression CYP1A1",
+    sep = "\n"
+  ),
+  ylab = "Varying Parameter"
+)
+
+df2 <- gather_results("IA.eff")
+p2 <- plot_gathered_results(
+  df2,
+  xlab = paste(
+    "Z-score of Median Predicted Log2 Fold",
+    "Change mRNA Expression CYP1A1",
+    sep = "\n"
+  )
+) + theme(axis.text.y = element_blank())
+
+df3 <- gather_results("IA.HQ.10")
+p3 <- plot_gathered_results(
+  df3,
+  xlab = paste(
+    "Meidan CYP1A1",
+    "Summed Risk Quotient",
+    sep = "\n"
+  ),
+  scale_x = FALSE
+) + theme(axis.text.y = element_blank())
+
+p1to3 <- ggarrange(
+  p1, p2, p3,
+  labels = c( "A", "B", "C"),
+  vjust = 1,
+  align = "h",
+  ncol = 3, nrow = 1,
+  widths = c(1, 0.5, 0.5),
+  font.label = list(size = 20, color = "black", face = "bold"),
+  common.legend = FALSE
+)
+
+########################################
+# Second set of plots
+
+p4 <- plot_gathered_results2(
+  df2,
+  y = "CA/IA",
+  xlab = paste(
+    "Median Predicted Log2 Fold Change",
+    "mRNA Expression CYP1A1",
+    sep = "\n"
+  )
+)
+
+p5 <- plot_gathered_results2(
+  df3,
+  y = "RQ",
+  xlab = paste(
+    "Median CYP1A1",
+    "Summed Risk Quotient",
+    sep = "\n"
+  )
+)
+
+p4to5 <- ggarrange(
+  p4, p5,
+  labels = c("A", "B"),
+  vjust = 1,
+  align = "h",
+  ncol = 2, nrow = 1,
+  widths = c(0.5, 0.5),
+  font.label = list(size = 20, color = "black", face = "bold"),
+  common.legend = TRUE,
+  legend = "right"
+)
+
+#===============================================================================
+# Compare to GeoToxMIE
+#===============================================================================
+
+library(reshape2)
+
+# Change scale_x_log10
+# comment out:
+# scale_x_log10(labels = trans_format("log10", math_format(10^.x)))+
+# add line:
+# scale_x_log10(labels = scales::label_math(10^.x, format = log10)) +
+#
+# Comment out any lines referencing "$X2"
+
+# Run lines 32-173
+
+compare_gathered_data <- function(df_orig, df_new) {
+  all.equal(
+    df_orig %>%
+      mutate(name = as.character(Var2), value) %>%
+      select(name, value) %>%
+      arrange(name, value),
+    df_new %>%
+      mutate(name = as.character(name), value) %>%
+      arrange(name, value),
+    check.attributes = FALSE
+  )
+}
+
+compare_gathered_data(CR.melt, df1)
+compare_gathered_data(CR.IA.melt, df2)
+compare_gathered_data(HQ.IA.melt, df3)
+
+pdf("~/dev/GeoTox/outputs/temp1.1.pdf"); conc.resp.plot.GCA; invisible(dev.off())
+pdf("~/dev/GeoTox/outputs/temp1.2.pdf"); p1; invisible(dev.off())
+
+pdf("~/dev/GeoTox/outputs/temp2.1.pdf"); conc.resp.plot.IA; invisible(dev.off())
+pdf("~/dev/GeoTox/outputs/temp2.2.pdf"); p2; invisible(dev.off())
+
+pdf("~/dev/GeoTox/outputs/temp3.1.pdf"); HQ.plot.IA; invisible(dev.off())
+pdf("~/dev/GeoTox/outputs/temp3.2.pdf"); p3; invisible(dev.off())
+
+pdf("~/dev/GeoTox/outputs/temp4.1.pdf"); composite; invisible(dev.off())
+pdf("~/dev/GeoTox/outputs/temp4.2.pdf"); p1to3; invisible(dev.off())
+
+# Run lines 180-245
+
+pdf("~/dev/GeoTox/outputs/temp5.1.pdf"); combined_plot; invisible(dev.off())
+pdf("~/dev/GeoTox/outputs/temp5.2.pdf"); p4; invisible(dev.off())
+
+pdf("~/dev/GeoTox/outputs/temp6.1.pdf"); HQ_plot; invisible(dev.off())
+pdf("~/dev/GeoTox/outputs/temp6.2.pdf"); p5; invisible(dev.off())
+
+pdf("~/dev/GeoTox/outputs/temp7.1.pdf"); composite2; invisible(dev.off())
+pdf("~/dev/GeoTox/outputs/temp7.2.pdf"); p4to5; invisible(dev.off())

From 8f5ba4bfca023cdf18989c452284731d9af48714 Mon Sep 17 00:00:00 2001
From: SkylarMarvel <skylar.marvel@gmail.com>
Date: Wed, 27 Dec 2023 11:20:27 -0700
Subject: [PATCH 3/5] Step 06 of cyp1a1-pipeline replication

---
 script-replication/cyp1a1-pipeline.R | 228 ++++++++++++++++++++++++++-
 1 file changed, 227 insertions(+), 1 deletion(-)

diff --git a/script-replication/cyp1a1-pipeline.R b/script-replication/cyp1a1-pipeline.R
index dd903e7..3dae71c 100644
--- a/script-replication/cyp1a1-pipeline.R
+++ b/script-replication/cyp1a1-pipeline.R
@@ -577,4 +577,230 @@ all.equal(
 ################################################################################
 ################################################################################
 
-# TODO
+library(sf)
+library(ggpubr)
+
+rm(list = ls())
+
+##########
+# Data
+load("~/dev/GeoTox/data/final_response_by_county_20220901.RData")
+load("~/dev/GeoTox/data/FIPS_by_county.RData") # FIPS shouldn't be needed
+county_2014 <- st_read(
+  "~/dev/GeoTox/data/cb_2014_us_county_5m/cb_2014_us_county_5m.shp",
+  quiet = TRUE
+)
+
+states <- st_as_sf(maps::map("state", plot = FALSE, fill = TRUE))
+
+county <- county_2014 %>%
+  filter(!(STATEFP %in% c("02", "15", "60", "66", "69", "72", "78"))) %>%
+  mutate(FIPS = str_c(STATEFP, COUNTYFP), .before = 1)
+
+########################################
+# response_by_county
+
+# TODO list should already have names() == FIPS
+# Make sure leading 0s are there for joining with county
+names(final.response.by.county) <- sprintf("%05d", FIPS)
+
+response_by_county <- tibble(
+  FIPS = names(final.response.by.county),
+  data = final.response.by.county
+) %>%
+  unnest(cols = data) %>%
+  pivot_longer(-FIPS, names_to = "health_measure") %>%
+  mutate(
+    health_measure = factor(
+      health_measure,
+      levels = c("GCA", "IA", "HQ.10")
+    )
+  )
+
+########################################
+# IVIVE summary
+
+# Summarize within counties
+ivive_summary_df <- response_by_county %>%
+  summarize(
+    median = median(value , na.rm = TRUE),
+    x95_quantile = quantile(value, 0.95, na.rm = TRUE, names = FALSE),
+    x5_quantile = quantile(value, 0.05, na.rm = TRUE, names = FALSE),
+    .by = c(FIPS, health_measure)
+  )
+
+ivive_summary_df_stack <- ivive_summary_df %>%
+  pivot_longer(
+    cols = c(median, x95_quantile, x5_quantile),
+    names_to = "variable"
+  ) %>%
+  mutate(
+    variable = factor(
+      variable,
+      levels = c("x5_quantile", "median", "x95_quantile")
+    )
+  )
+
+# Summarize across counties
+ivive_summary_stats <- ivive_summary_df_stack %>%
+  summarize(
+    min = min(value),
+    median = median(value),
+    max = max(value),
+    .by = c(variable, health_measure))
+
+########################################
+# Health measure histogram
+
+hist_HM <- ggplot(ivive_summary_df_stack, aes(x = log10(value))) +
+  geom_histogram(bins = 500) +
+  facet_grid(
+    health_measure ~ variable,
+    labeller = labeller(
+      health_measure = c(
+        "GCA"   = "CA Response",
+        "IA"    = "IA Response",
+        "HQ.10" = "RQ"
+      ),
+      variable = c(
+        "x5_quantile"  = "5th Percentile",
+        "median"       = "Median",
+        "x95_quantile" = "95th Percentile"
+      )
+    )
+  ) +
+  theme_minimal() +
+  ylab("Count") +
+  xlab("Log10 Risk Metric Value")
+
+########################################
+# County heatmaps
+
+ivive_county_sf <- ivive_summary_df_stack %>%
+  left_join(county, by = join_by(FIPS), keep = FALSE) %>%
+  st_as_sf() %>%
+  st_zm() # Was having errors until adding this
+
+make_county_heatmap <- function(df, legend_name) {
+  ggplot(df, aes(fill = value)) +
+    # Plot fill, hide county borders
+    # geom_sf(lwd = 0) + # This still showed up in .pdf images
+    geom_sf(color = NA) +
+    # Add state borders
+    geom_sf(data = states, fill = NA, size = 0.15) +
+    # Create separate plots for each variable
+    facet_wrap(
+      ~variable,
+      ncol = 3,
+      labeller = labeller(
+        variable = c(
+          "x5_quantile" = "5th Percentile",
+          "median" = "Median",
+          "x95_quantile" = "95th Percentile"
+        )
+      )
+    ) +
+    # Add fill scale
+    scale_fill_viridis_c(
+      name = legend_name,
+      direction = -1,
+      option = "A",
+      trans = "sqrt"
+    ) +
+    # Theme
+    theme_bw() +
+    theme(
+      text = element_text(size = 12),
+      legend.text = element_text(size = 8)
+    )
+}
+
+GCA_Eff_plot <- make_county_heatmap(
+  ivive_county_sf %>% filter(health_measure == "GCA"),
+  paste("Predicted Response", "Log2 Fold Change", "mRNA Expression",sep = "\n")
+)
+
+IA_Eff_plot <- make_county_heatmap(
+  ivive_county_sf %>% filter(health_measure == "IA"),
+  paste("Predicted Response", "Log2 Fold Change", "mRNA Expression",sep = "\n")
+)
+
+HQ_10_plot <- make_county_heatmap(
+  ivive_county_sf %>% filter(health_measure == "HQ.10"),
+  "Risk Quotient"
+)
+
+all_plots <- ggarrange(
+  GCA_Eff_plot , IA_Eff_plot, HQ_10_plot,
+  labels = c( "A", "B", "C"),
+  vjust = 1,
+  align = "v",
+  nrow = 3,
+  font.label = list(size = 20, color = "black", face = "bold"),
+  common.legend = FALSE
+)
+
+#===============================================================================
+# Compare to GeoToxMIE
+#===============================================================================
+
+library(reshape2)
+
+load("~/dev/GeoTox/data/final_response_by_county_20220901.RData")
+
+# Run 25-27, 35-80, 85-97
+
+all.equal(
+  response.by.county %>%
+    select(health_measure, value),
+  response_by_county %>%
+    arrange(as.numeric(FIPS), health_measure) %>%
+    select(health_measure, value),
+  check.attributes = FALSE
+)
+
+all.equal(
+  summary_stats,
+  ivive_summary_stats %>%
+    arrange(variable, health_measure) %>%
+    select(variable, health_measure, median, min, max),
+  check.attributes = FALSE
+)
+
+pdf("~/dev/GeoTox/outputs/cyp1a1_hist_orig.pdf")
+histogram_HM
+invisible(dev.off())
+
+pdf("~/dev/GeoTox/outputs/cyp1a1_hist_new.pdf")
+hist_HM
+invisible(dev.off())
+
+# Run lines 107-108
+
+all.equal(
+  ivive_county_cyp1a1_up_sf %>%
+    arrange(STATEFP, COUNTYFP, health_measure, variable) %>%
+    pull(value),
+  ivive_county_sf %>%
+    arrange(STATEFP, COUNTYFP, health_measure, variable) %>%
+    pull(value)
+)
+
+# Needed to add st_zm(), otherwise plot would have error
+ivive_county_cyp1a1_up_sf <- ivive_county_cyp1a1_up_sf %>% st_zm()
+
+# Run lines 110-112, 130, 157, 170
+
+ggsave(
+  "~/dev/GeoTox/outputs/cyp1a1_heatmap_orig.tiff",
+  composite_plot, width = 40, height = 25, units = "cm", dpi = 300
+)
+
+ggsave(
+  "~/dev/GeoTox/outputs/cyp1a1_heatmap_new.tiff",
+  all_plots, width = 40, height = 25, units = "cm", dpi = 300
+)
+
+pdf("~/dev/GeoTox/outputs/cyp1a1_heatmap_new.pdf", width = 15.75, height = 10)
+all_plots
+invisible(dev.off())

From 5626f75e51d85e5b9064d2adccd001193a798968 Mon Sep 17 00:00:00 2001
From: SkylarMarvel <skylar.marvel@gmail.com>
Date: Fri, 29 Dec 2023 11:44:47 -0700
Subject: [PATCH 4/5] Step 01 of cyp1a1-figures replication

---
 script-replication/cyp1a1-figures.R | 323 ++++++++++++++++++++++++++++
 1 file changed, 323 insertions(+)
 create mode 100644 script-replication/cyp1a1-figures.R

diff --git a/script-replication/cyp1a1-figures.R b/script-replication/cyp1a1-figures.R
new file mode 100644
index 0000000..16c8d67
--- /dev/null
+++ b/script-replication/cyp1a1-figures.R
@@ -0,0 +1,323 @@
+library(devtools)
+library(tidyverse)
+
+################################################################################
+################################################################################
+# 01-CYP1A1-figures.R
+################################################################################
+################################################################################
+
+library(sf)
+library(ggpubr)
+
+rm(list = ls())
+
+##########
+# Data
+
+# TODO should the 112 be kept (i.e. "Benzidine 112")? It is in the MIE script
+# fit_params is from step 2 of conc-resp.R
+fit_params <- readRDS("~/dev/GeoTox/outputs/fit_params.rds") %>%
+  mutate(
+    chnm = case_when(
+      chnm == "C.I. Disperse Black 6" ~ "3,3'-Dimethoxybenzidine",
+      chnm == "C.I. Azoic Diazo Component 112" ~ "Benzidine",
+      .default = chnm
+    )
+  )
+
+nata_df <- read.csv("~/dev/GeoTox/data/2014_NATA_CONCS.csv")
+nata_chemicals <- read.csv("~/dev/GeoTox/data/NATA_pollutant_names_casrn.csv")
+
+# TODO different ice data source files?
+# The "ice_data" in [01-02]-Conc-Response-Fit.R is from:
+# load("~/dev/GeoTox/data/LTEA_HepaRG_CYP1A1_up 41 chems for Kyle 220131.RData")
+load("~/dev/GeoTox/data/210105_ICE_cHTS_invitrodbv33.Rdata")
+ice_data <- subset(ice_data, new_hitc == 1)
+
+states <- st_as_sf(maps::map("state", plot = FALSE, fill = TRUE))
+
+county_2014 <- st_read(
+  "~/dev/GeoTox/data/cb_2014_us_county_5m/cb_2014_us_county_5m.shp",
+  quiet = TRUE
+)
+
+county <- county_2014 %>%
+  filter(!(STATEFP %in% c("02", "15", "60", "66", "69", "72", "78"))) %>%
+  mutate(FIPS = str_c(STATEFP, COUNTYFP), .before = 1) %>%
+  st_zm() %>%
+  # TODO why simplify here but not in cyp1a1-pipeline?
+  # "preserveTopology = FALSE" was dropped
+  st_simplify(dTolerance = 1000)
+
+nata_chemicals_2 <- nata_chemicals %>%
+  mutate(
+    web_name = str_to_title(web_name),
+    web_name = str_replace(web_name, "\\s+\\([^\\)]+\\)", "")
+  )
+
+########################################
+# Optional, the objects aren't used later on
+
+# TODO same casrn but different names
+x <- nata_chemicals_2 %>% distinct(casrn, web_name) %>% filter(!is.na(casrn))
+(x <- x$casrn[duplicated(x$casrn)])
+nata_chemicals_2 %>% filter(casrn %in% x)
+
+nata_tox21_2 <- inner_join(
+  ice_data,
+  nata_chemicals_2,
+  join_by(casn == casrn),
+  relationship = "many-to-many" # due to duplicate casrn
+)
+
+nata_tox21_count_2 <- nata_tox21_2 %>%
+  summarize(chemical_count = n(), .by = aenm) %>%
+  rename("assay name" = aenm)
+
+########################################
+# Aggregate NATA data
+
+norm_fn = function(x) {(x - min(x)) / (max(x) - min(x))}
+
+nata_county_stack_2 <- nata_df %>%
+  # Aggregate by county
+  summarize(across(ACETALD:XYLENES, mean), .by = STCOFIPS) %>%
+  pivot_longer(-STCOFIPS) %>%
+  rename(FIPS = STCOFIPS, chemical = name, concentration = value) %>%
+  # Add casrn and web_name
+  left_join(nata_chemicals_2, by = join_by(chemical == smoke_name)) %>%
+  # Limit to chemicals with fit data (MIE line 106)
+  filter(casrn %in% fit_params$casn) %>%
+  # Remove any missing data
+  na.omit() %>%
+  # Normalize air concentrations
+  # This was very convoluted in the MIE script (MIE lines 89-95)
+  group_by(chemical) %>%
+  mutate(concentration_norm = norm_fn(concentration)) %>%
+  ungroup()
+
+nata_county_cyp1a1_up_2 <- nata_county_stack_2 %>%
+  left_join(
+    ice_data %>% filter(aenm == "LTEA_HepaRG_CYP1A1_up"),
+    by = join_by(casrn == casn)
+  ) %>%
+  mutate(conc_gt_0 = as.integer(concentration > 0))
+
+########################################
+# heatmap_cyp1a1_up_plot
+
+heatmap_df_2 <- nata_county_cyp1a1_up_2 %>%
+  left_join(fit_params, by = join_by(casrn == casn, chnm)) %>%
+  summarize(mean = mean(10^logAC50), .by = c(aenm, chnm))
+
+heatmap_cyp1a1_up_plot <- ggplot(
+  heatmap_df_2,
+  aes(x = fct_reorder(chnm, mean), y = aenm,  fill = mean)
+) +
+  geom_tile() +
+  theme_bw() +
+  scale_fill_viridis_c(direction = 1, option = "A") +
+  labs(y = "Assay", x = "Chemical", fill = "EC50 (μM)") +
+  theme(
+    axis.text.x = element_text(angle = 45, hjust = 1),
+    axis.text.y = element_blank(),
+    axis.title.y = element_blank(),
+    axis.ticks.y = element_blank(),
+    legend.title=element_text(size = 14),
+    text = element_text(size = 18),
+    aspect.ratio = 1.5 / 10,
+    plot.margin = margin(10, 10, 10, 100)
+  )
+
+########################################
+# cyp1a1_up_all_plot
+
+cyp1a1_up_nata_county_sf_2 <- nata_county_cyp1a1_up_2 %>%
+  left_join(fit_params, by = join_by(casrn == casn, chnm)) %>%
+  right_join(county %>% mutate(FIPS = as.numeric(FIPS)), by = join_by(FIPS)) %>%
+  st_as_sf()
+
+cyp1a1_up_all_plot <- ggplot(
+  cyp1a1_up_nata_county_sf_2,
+  aes(fill = concentration_norm)
+) +
+  geom_sf(color = NA) +
+  facet_wrap(vars(fct_reorder(chnm, logAC50)), ncol = 7) +
+  # Recolor subset as light grey
+  geom_sf(
+    data = cyp1a1_up_nata_county_sf_2 %>% filter(concentration == 0),
+    aes(fill = concentration),
+    fill = "light grey",
+    color = "light grey",
+    lwd = 0.01
+  ) +
+  # State borders
+  geom_sf(data = states, fill = NA, lwd = 0.15) +
+  scale_fill_viridis_c(
+    name = "Normalized Concentration",
+    direction = -1,
+    option = "A"
+  ) +
+  theme_bw() +
+  theme(
+    axis.ticks.x = element_blank(),
+    axis.title.x = element_blank(),
+    axis.text.x = element_blank(),
+    axis.ticks.y = element_blank(),
+    axis.title.y = element_blank(),
+    axis.text.y = element_blank(),
+    legend.text = element_text(angle = 45),
+    legend.position = "bottom",
+    strip.text = element_text(size = 10),
+    text = element_text(size = 18)
+  )
+
+########################################
+# cyp1a1_up_all_plot with relative concentration scales
+
+cyp1a1_up_all_relative_plot <- lapply(
+  unique(cyp1a1_up_nata_county_sf_2$chnm),
+  function(x) cyp1a1_up_nata_county_sf_2 %>%
+    filter(web_name == x) %>%
+    ggplot(aes(fill = concentration)) +
+    geom_sf(color = NA) +
+    theme_bw() +
+    geom_sf(data = states, fill = NA, size = 0.15) +
+    scale_fill_viridis_c(
+      name = "Conc. (ug/m3)",
+      direction = -1,
+      option = "A"
+    ) +
+    theme_bw() +
+    theme(axis.text = element_blank(), axis.ticks = element_blank()) +
+    ggtitle(x)
+)
+
+plot2_2 <- cowplot::plot_grid(
+  plotlist = cyp1a1_up_all_relative_plot, label_size = 12, ncol = 8
+)
+
+pdf("~/dev/GeoTox/outputs/cyp1a1_figures_1_facet.pdf", width = 24, height = 20)
+plot2_2
+invisible(dev.off())
+
+########################################
+# count_cyp1a1_up_map
+
+count_county_cyp1a1_up_sf_2 <- nata_county_cyp1a1_up_2 %>%
+  summarize(count = sum(conc_gt_0), .by = FIPS) %>%
+  right_join(county %>% mutate(FIPS = as.numeric(FIPS)), by = join_by(FIPS)) %>%
+  st_as_sf()
+
+count_cyp1a1_up_map_2 <- ggplot(
+  count_county_cyp1a1_up_sf_2,
+  aes(fill = count)
+) +
+  geom_sf(color = NA) +
+  theme_bw() +
+  geom_sf(data = states, fill = NA, size=0.15) +
+  scale_fill_viridis_c(name = "# Chemicals", direction = -1, option = "A") +
+  theme(
+    axis.ticks.x = element_blank(),
+    axis.text.x = element_blank(),
+    axis.ticks.y = element_blank(),
+    axis.text.y = element_blank(),
+    legend.position = "right",
+    text = element_text(size = 18),
+    aspect.ratio = 5 / 10
+  )
+
+########################################
+# combo
+
+cyp1a1_combo <- ggarrange(
+  cyp1a1_up_all_plot, count_cyp1a1_up_map_2, heatmap_cyp1a1_up_plot,
+  labels = c("A", "B", "C"),
+  widths = c(1, 0.65, 0.5),
+  ncol = 1, nrow = 3,
+  heights = c(1, 0.65, 0.5),
+  align = "v",
+  font.label = list(size = 20, color = "black", face = "bold"),
+  common.legend = FALSE
+)
+
+pdf("~/dev/GeoTox/outputs/cyp1a1_figures_1.pdf", width = 17.7, height = 21.7)
+cyp1a1_combo
+invisible(dev.off())
+
+ggsave(
+  "~/dev/GeoTox/outputs/cyp1a1_figures_1.tiff",
+  cyp1a1_combo, width = 45, height = 55, units = "cm", dpi = 300
+)
+
+# TODO failes to create plot
+sjPlot::save_plot(
+  "~/dev/GeoTox/outputs/cyp1a1_figures_1_sjPlot.tif",
+  cyp1a1_combo, width = 45, height = 55, dpi = 300
+)
+
+#===============================================================================
+# Compare to GeoToxMIE
+#===============================================================================
+
+library(reshape2)
+
+load("~/dev/GeoTox/data/Hill_2param_model_fit.RData")
+hill2.fit <- df.params; rm(df.params)
+
+# Run lines 24-27
+
+hill2.fit$chnm <- str_replace_all(hill2.fit$chnm, "Benzidine 112", "Benzidine")
+
+all.equal(
+  hill2.fit,
+  fit_params %>% select(!ends_with("imputed")),
+  check.attributes = FALSE
+)
+
+# Run lines 49, 56-57
+
+# Optional run line 59-71, these objects are not used later
+
+all.equal(
+  nata_tox21   %>% arrange(casn, aeid, smoke_name) %>% select(-presence),
+  nata_tox21_2 %>% arrange(casn, aeid, smoke_name),
+  check.attributes = FALSE
+)
+
+all.equal(
+  nata_tox21_count   %>% arrange(`assay name`),
+  nata_tox21_count_2 %>% arrange(`assay name`)
+)
+
+# Run lines 76-106
+
+all.equal(
+  nata_county_stack   %>% arrange(FIPS, casrn),
+  nata_county_stack_2 %>% arrange(FIPS, casrn),
+  check.attributes = FALSE
+)
+
+# Run lines 109-125
+
+all.equal(
+  summary(heatmap_df$mean),
+  summary(heatmap_df_2$mean)
+)
+
+# Skip remaining lines
+
+# Compare
+#
+# "~/dev/GeoTox/outputs/cyp1a1_figures_1.pdf"
+# vs
+# "Figure3_v20220420.tif"
+
+################################################################################
+################################################################################
+# 02-CYP1A1-figures.R
+################################################################################
+################################################################################
+
+# TODO

From 21932ed4039bd9fd5295604c4e56bd8fd31ffcf6 Mon Sep 17 00:00:00 2001
From: SkylarMarvel <skylar.marvel@gmail.com>
Date: Fri, 5 Jan 2024 13:43:31 -0700
Subject: [PATCH 5/5] Step 02 of cyp1a1-figures replication

---
 script-replication/cyp1a1-figures.R | 99 +++++++++++++++++++++++++++--
 1 file changed, 94 insertions(+), 5 deletions(-)

diff --git a/script-replication/cyp1a1-figures.R b/script-replication/cyp1a1-figures.R
index 16c8d67..374ed8b 100644
--- a/script-replication/cyp1a1-figures.R
+++ b/script-replication/cyp1a1-figures.R
@@ -45,10 +45,7 @@ county_2014 <- st_read(
 county <- county_2014 %>%
   filter(!(STATEFP %in% c("02", "15", "60", "66", "69", "72", "78"))) %>%
   mutate(FIPS = str_c(STATEFP, COUNTYFP), .before = 1) %>%
-  st_zm() %>%
-  # TODO why simplify here but not in cyp1a1-pipeline?
-  # "preserveTopology = FALSE" was dropped
-  st_simplify(dTolerance = 1000)
+  st_zm()
 
 nata_chemicals_2 <- nata_chemicals %>%
   mutate(
@@ -320,4 +317,96 @@ all.equal(
 ################################################################################
 ################################################################################
 
-# TODO
+rm(list = ls())
+
+##########
+# Data
+load("~/dev/GeoTox/data/LTEA_HepaRG_CYP1A1_up 41 chems for Kyle 220131.RData")
+ice_data <- cdat; rm(cdat)
+fit_params <- readRDS("~/dev/GeoTox/outputs/fit_params.rds")
+
+
+########################################
+# Create plot
+
+# TODO export tcplHillVal?
+tcplHillVal <- function(conc, tp, ga, gw, bt = 0) {
+  bt + (tp - bt) / (1 + (ga / conc)^gw)
+}
+
+x <- 10^seq(-3, 3, length.out = 100)
+
+df <- fit_params %>%
+  rowwise() %>%
+  mutate(
+    curves = list(
+      data.frame(
+        x = .env$x,
+        y_mean = tcplHillVal(.env$x, tp, 10^logAC50, 1),
+        # TODO both upper and lower had "-" for AC50
+        # Is there a standard approach for computing fitted value confidence
+        # intervals from optim outputs?
+        y_lower = tcplHillVal(
+          .env$x, tp - tp.sd * 1.96, 10^(logAC50 + logAC50.sd * 1.96), 1
+        ),
+        y_upper = tcplHillVal(
+          .env$x, tp + tp.sd * 1.96, 10^(logAC50 - logAC50.sd * 1.96), 1
+        )
+      )
+    )
+  ) %>%
+  ungroup() %>%
+  select(chnm, curves) %>%
+  unnest(cols = curves)
+
+p <- ggplot(df, aes(x = x)) +
+  geom_line(aes(y = y_lower), color = "red") +
+  geom_line(aes(y = y_upper), color = "red") +
+  geom_line(aes(y = y_mean), color = "black", lwd = 1) +
+  geom_point(
+    data = ice_data %>% mutate(x = 10^logc, y = resp),
+    aes(x = x, y = y)
+  ) +
+  facet_wrap(~chnm, scales = "free") +
+  scale_x_log10(labels = scales::label_math(10^.x, format = log10)) +
+  theme_bw() +
+  theme(
+    legend.position = "none",
+    text = element_text(size = 16)
+  )+
+  ylab("Response") +
+  xlab("Concentration")
+
+#===============================================================================
+# Compare to GeoToxMIE
+#===============================================================================
+
+library(reshape2)
+
+hill2 <- fit_params
+
+# Run lines 36, 51-162, 172
+
+SI_figure <- ggplot()+
+  geom_line(data = m, aes(x = x, y = lower), color="red")+
+  geom_line(data = m, aes(x = x, y = upper), color="red")+
+  geom_line(data = m, aes(x = x, y = mean), color="black", lwd = 1)+
+  geom_point(
+    data = ice_data %>% mutate(x = 10^logc, y = resp),
+    aes(x = x, y = y)
+  ) +
+  facet_wrap(vars(chnm), scales= "free")+
+  scale_x_log10(labels = scales::label_math(10^.x, format = log10)) +
+  theme_bw()+
+  theme(legend.position = "none",
+        text = element_text(size = 16))+
+  ylab("Response")+
+  xlab("Concentration")
+
+pdf("~/dev/GeoTox/outputs/SI_dr.pdf", width = 20, heigh = 16)
+SI_figure
+invisible(dev.off())
+
+pdf("~/dev/GeoTox/outputs/SI_dr_new.pdf", width = 20, heigh = 16)
+p
+invisible(dev.off())