adding vignette, removing snow

RANDCorporation · Apr 2, 2024 · e423686 · e423686
1 parent 0171ff6
commit e423686
Show file tree

Hide file tree

Showing 14 changed files with 250 additions and 265 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -5,25 +5,20 @@ Version: 1.0.0
 Authors@R: 
     person("Pedro", "Nascimento de Lima", , "[email protected]", role = c("cre", "aut"),
            comment = c(ORCID = "0000-0001-9057-198X"))
-Description: Provides an R6-based encapsulated object-oriented programming (OOP)
+Description: Provides an R6-based encapsulated object-oriented programming
     framework for simulation modeling studies in R.
 License: GPL (>= 3)
 Imports: 
     assertthat,
     data.table,
-    doSNOW,
     dplyr,
-    foreach,
-    imabc,
     jsonlite,
     lhs,
     magrittr,
     parallel,
-    progress,
     purrr,
     R6,
     readxl,
-    snow,
     stats,
     tidyr,
     utils,
@@ -32,8 +27,9 @@ Suggests:
     knitr,
     rmarkdown,
     testthat (>= 3.0.0)
+VignetteBuilder: 
+    knitr
 Config/testthat/edition: 3
 Encoding: UTF-8
 LazyData: true
 RoxygenNote: 7.2.3
-VignetteBuilder: knitr
diff --git a/NAMESPACE b/NAMESPACE
@@ -5,17 +5,11 @@ export(R6Sim)
 export(is.R6Experiment)
 export(is.R6Sim)
 import(R6)
-import(doSNOW)
 import(dplyr)
-import(foreach)
 import(parallel)
-import(progress)
 import(tidyr)
 importFrom(data.table,as.data.table)
 importFrom(data.table,data.table)
-importFrom(imabc,as.priors)
-importFrom(imabc,as.targets)
-importFrom(imabc,imabc)
 importFrom(lhs,randomLHS)
 importFrom(magrittr,"%>%")
 importFrom(readxl,excel_sheets)

diff --git a/R/R6Experiment.R b/R/R6Experiment.R
@@ -173,8 +173,21 @@ R6Experiment <- R6::R6Class(
         )
       }
     },
-    run = function(n_cores = 3, parallel = F, cluster_eval_script, model_from_cluster_eval = T) {
-      R6Experiment_run(self = self, n_cores = n_cores, parallel = parallel, cluster_eval_script = cluster_eval_script, model_from_cluster_eval = model_from_cluster_eval)
+
+    #' Runs R6Experiment in parallel
+    #'
+    #'
+    #' This function is most useful to simulate the posterior distribution for a single model in parallel in one machine. This function is not used when calibrating the model and not useful for parallelization across multiple nodes.
+    #'
+    #' @param self experiment object
+    #' @param n_cores number of cores to use
+    #' @param parallel whether to evaluate run in parallel
+    #' @param cluster_eval_script path to script that instantiates necessary functions. this will often mean sourcing functions external to the package and loading dependencies for the model. needed if parallel = T
+    #' @param model_from_cluster_eval T if model is instantiated in the cluter eval scripts, F otherwise. Use T if using models that need compilation (like odin) and F otherwise.
+    #' @param ... additional parameters to be passed down to the `model$simulate()` function
+    #' @return results data.frame from all simulations in parallel
+    run = function(n_cores = 3, parallel = F, cluster_eval_script, model_from_cluster_eval = T, ...) {
+      R6Experiment_run(self = self, n_cores = n_cores, parallel = parallel, cluster_eval_script = cluster_eval_script, model_from_cluster_eval = model_from_cluster_eval, ...)
     }
   ),
 

diff --git a/R/R6Experiment_run.R b/R/R6Experiment_run.R
@@ -36,73 +36,81 @@
 #' @param parallel whether to evaluate run in parallel
 #' @param cluster_eval_script path to script that instantiates necessary functions. this will often mean sourcing functions external to the package and loading dependencies for the model. needed if parallel = T
 #' @param model_from_cluster_eval T if model is instantiated in the cluter eval scripts, F otherwise. Use T if using models that need compilation (like odin) and F otherwise.
+#' @param ... additional parameters to be passed to the model simulation function.
 #' @return results data.frame from all simulations in parallel
 #'
 #' @import parallel
-#' @import doSNOW
-#' @import foreach
-#' @import progress
-R6Experiment_run <- function(self, n_cores, parallel, cluster_eval_script, model_from_cluster_eval) {
-
+R6Experiment_run <- function(self, n_cores, parallel, cluster_eval_script, model_from_cluster_eval, ...) {
 
   # If we are running the experiment in parallel, we need to instantiate the models explicitly within each "node".
   # if the model is self-contained, that should be enough.
   if (parallel) {
 
     # Make cluster and evaluate the cluster eval script.
-    cl <- snow::makeCluster(n_cores)
-    doSNOW::registerDoSNOW(cl)
-    snow::clusterExport(cl, "cluster_eval_script", envir = environment())
-    snow::clusterEvalQ(cl, source(cluster_eval_script))
+    # With parallel
+    cl <- parallel::makeCluster(n_cores)
+    parallel::clusterExport(cl, "cluster_eval_script", envir = environment())
+    parallel::clusterEvalQ(cl, source(cluster_eval_script))
+
   }
 
   # progress bar ------------------------------------------------------------
   # Progress bar setup adapted from:
   # https://stackoverflow.com/questions/5423760/how-do-you-create-a-progress-bar-when-using-the-foreach-function-in-r
-  pb <- progress_bar$new(
-    format = "R6Sim: experiment = :experiment [:bar] :elapsed | eta: :eta",
-    total = nrow(self$policy_design), # 100
-    width = 80
-  )
+  # pb <- progress_bar$new(
+  #   format = "R6Sim: experiment = :experiment [:bar] :elapsed | eta: :eta",
+  #   total = nrow(self$policy_design), # 100
+  #   width = 80
+  # )
 
   # allowing progress bar to be used in foreach -----------------------------
-  progress <- function(n) {
-    pb$tick(tokens = list(experiment = n))
-  }
+  # progress <- function(n) {
+  #   pb$tick(tokens = list(experiment = n))
+  # }
 
-  opts <- list(progress = progress)
+  # opts <- list(progress = progress)
 
   # foreach loop ------------------------------------------------------------
-  results <- foreach(i = 1:nrow(self$policy_design), .combine = rbind, .options.snow = opts) %dopar% {
-    if (!model_from_cluster_eval) {
-      model <- self$models[[self$policy_design$model.id[i]]]
-    } else {
-      stopifnot("cluster_experiment object not defined. Create an R6Experiment object called cluster_experiment in your cluster_eval_script file, containing the models used in this analysis." = exists("cluster_experiment"))
 
-      stopifnot("cluster_experiment object is not an R6Experiment. Make sure to use R6Experiment to create the cluster_experiment object." = is.R6Experiment(cluster_experiment))
+  if (parallel) {
 
-      model <- cluster_experiment$models[[self$policy_design$model.id[i]]]
-    }
+    # Using parLapply and not foreach due to licensing issues.
+    results <- parLapply(cl = cl, X = 1:nrow(self$policy_design), fun = run_single_experiment, model_from_cluster_eval = model_from_cluster_eval, self = self, parallel = parallel, ...)
 
-    id_cols <- c("grid.id", "lhs.id", "params_design.id", "param.id", "model.id", "all.params.id", "policy.exp.id")
+    parallel::stopCluster(cl)
+  } else {
+    results <- lapply(X = 1:nrow(self$policy_design), FUN = run_single_experiment, model_from_cluster_eval = model_from_cluster_eval, self = self, parallel = parallel, ...)
+  }
+
+  return(do.call(rbind, results))
+}
 
-    scenario_inputs <- self$policy_design[i, ] %>%
-      select(-any_of(id_cols)) %>%
-      as.data.frame()
+run_single_experiment <- function(policy_design_id, self, model_from_cluster_eval, parallel, ...) {
 
-    # Set each input
-    for (var in names(scenario_inputs)) {
-      model$set_input(var, scenario_inputs[, var])
-    }
+  if (!model_from_cluster_eval | !parallel) {
+    model <- self$models[[self$policy_design$model.id[policy_design_id]]]
+  } else {
+    stopifnot("cluster_experiment object not defined. Create an R6Experiment object called cluster_experiment in your cluster_eval_script file, containing the models used in this analysis." = exists("cluster_experiment"))
 
-    res <- model$simulate()
+    stopifnot("cluster_experiment object is not an R6Experiment. Make sure to use R6Experiment to create the cluster_experiment object." = is.R6Experiment(cluster_experiment))
 
-    return(cbind(self$policy_design[i, ], res))
+    model <- cluster_experiment$models[[self$policy_design$model.id[policy_design_id]]]
   }
 
-  if (parallel) {
-    snow::stopCluster(cl)
+  id_cols <- c("grid.id", "lhs.id", "params_design.id", "param.id", "model.id", "all.params.id", "policy.exp.id")
+
+  scenario_inputs <- self$policy_design[policy_design_id, ] %>%
+    select(-any_of(id_cols)) %>%
+    as.data.frame()
+
+  # Set each input
+  for (var in names(scenario_inputs)) {
+    model$set_input(var, scenario_inputs[, var])
   }
 
-  return(results)
+  res <- model$simulate(...)
+
+  return(cbind(self$policy_design[policy_design_id, ], res))
+
 }
+
diff --git a/R/R6Sim.R b/R/R6Sim.R
@@ -186,97 +186,9 @@ R6Sim <- R6::R6Class(
     #' @param ... any set of parameters passed to this function will be passed along to the user natural history function.
     setup_run = function(...) {
       stop("Setup_run method must be implemented by your class.")
-    },
-
-    #' @description
-    #' make priors for calibration (using the IMABC format)
-    #'
-    #' return an imabc priors object
-    #'
-    #' @param priors_file csv file name where priors should be saved, if any
-    make_priors = function(priors_file) {
-      priors <- self$inputs$priors %>%
-        # clean up fixed parameters
-        mutate(
-          sd = ifelse(dist_base_name == "fixed", NA, sd),
-          min = ifelse(dist_base_name == "fixed", NA, min),
-          max = ifelse(dist_base_name == "fixed", NA, max),
-          a = ifelse(dist_base_name == "fixed", NA, a),
-          b = ifelse(dist_base_name == "fixed", NA, b)
-        ) %>%
-        # clean up uniform parameters:
-        mutate(mean = ifelse(dist_base_name == "unif", NA, mean)) %>%
-        # add a and b parameters for the truncated normal:
-        mutate(a = ifelse(dist_base_name == "truncnorm", min, NA)) %>%
-        mutate(b = ifelse(dist_base_name == "truncnorm", max, NA)) %>%
-        # clean up additional columns:
-        select(-any_of(c("ideal_dist_base_name", "reported_prior"))) %>%
-        imabc::as.priors()
-
-      if (!missing(priors_file)) {
-        write.csv(as.data.frame(priors), file = priors_file)
-      }
-
-      return(priors)
-    },
-
-    #' @description
-    #' make targets for calibration
-    #'
-    #' return an imabc targets object
-    #'
-    #' @param targets_file csv file name where targets should be saved, if any
-    #' @importFrom imabc as.priors
-    #' @importFrom imabc as.targets
-    #' @importFrom imabc imabc
-    make_targets = function(targets_file) {
-
-      # this name can't change.
-      time_variable <- "week"
-      target_variables <- names(self$inputs$t_tol)[-c(1, 2)]
-
-      targets_df <- get_long_target_df(self$inputs$t_series, target_variables)
-
-      tolerances_df <- get_long_target_df(self$inputs$t_tol, target_variables)
-
-      init_tolerances_df <- get_long_target_df(self$inputs$t_ini_tol, target_variables)
-
-      # create imabc target object:
-      imabc_targets <- data.frame(
-        target_names = targets_df$target_names,
-        target_groups = targets_df$target_groups,
-        targets = targets_df$value,
-        current_lower_bounds = targets_df$value - init_tolerances_df$value,
-        current_upper_bounds = targets_df$value + init_tolerances_df$value,
-        stopping_lower_bounds = targets_df$value - tolerances_df$value,
-        stopping_upper_bounds = targets_df$value + tolerances_df$value
-      ) %>%
-        imabc::as.targets(.)
-
-      # Write csv data for imabc:
-      if (!missing(targets_file)) {
-        write.csv(x = as.data.frame(imabc_targets), file = targets_file, row.names = F)
-      }
-
-      return(imabc_targets)
-    },
-
-    #' @description
-    #' Simulate Model Posterior Distribution of parameters
-    #'
-    #' @details
-    #' This function is used to simulate the model over the posterior distribution
-    #' Before running this function, the user should run the set_posterior.
-    #' @seealso simulate
-    #' @param parallel if T, the model will run in parallel
-    #' @param n_cores number of CPUs to use in the simulation. defaults to detectCores() - 2
-    #' be passed along to the user natural history function.
-    simulate_posterior = function(parallel = T, n_cores = detectCores() - 2, cluster_eval_script) {
-
-      # Simulate posterior distribution for a specific model
-      R6Sim_simulate_posterior(self = self, parallel = parallel, n_cores = n_cores, cluster_eval_script = cluster_eval_script)
     }
-  ),
+
+    ),
 
   # Use private to hold data that will not be accessed by the user directly.
   private = list(

diff --git a/R/R6Sim_set_param_dist.R b/R/R6Sim_set_param_dist.R
@@ -150,10 +150,10 @@ calculate_weighted_averages <- function(df, param_dist_weights) {
   # Calculate the weighted average for every numeric variable:
   df %>%
     # Multiply value by the normalized weights:
-    mutate(across(where(is.numeric) & !c(.data$param_dist.df.id, .data$param_dist.df.name), ~ .x * df$normalized_weights)) %>%
-    select(-.data$normalized_weights) %>%
+    mutate(across(where(is.numeric) & !c(param_dist.df.id, param_dist.df.name), ~ .x * df$normalized_weights)) %>%
+    select(-normalized_weights) %>%
     # Add them up:
-    group_by(.data$param_dist.df.id, .data$param_dist.df.name) %>%
+    group_by(param_dist.df.id, param_dist.df.name) %>%
     summarise(across(where(is.numeric), sum), .groups = "drop") %>%
     as.data.frame()
 }
diff --git a/README.Rmd b/README.Rmd
@@ -54,4 +54,4 @@ Reach out to [Pedro Nascimento de Lima](https://www.rand.org/about/people/l/lima
 
 ## License 
 
-Copyright (C) 2023 by The [RAND Corporation](https://www.rand.org). This repository is released as open-source software under an MIT + license. See the LICENSE.md file.
+Copyright (C) 2024 by The [RAND Corporation](https://www.rand.org). This repository is released as open-source software under a GPL-3 license. See the LICENSE.md file.
diff --git a/README.md b/README.md
@@ -61,6 +61,6 @@ for questions related to this repository.
 
 ## License
 
-Copyright (C) 2023 by The [RAND Corporation](https://www.rand.org). This
-repository is released as open-source software under an MIT + license.
+Copyright (C) 2024 by The [RAND Corporation](https://www.rand.org). This
+repository is released as open-source software under a GPL-3 license.
 See the LICENSE.md file.
Original file line number	Diff line number	Diff line change
Expand Up		@@ -54,4 +54,4 @@ Reach out to [Pedro Nascimento de Lima](https://www.rand.org/about/people/l/lima

		## License

		Copyright (C) 2023 by The [RAND Corporation](https://www.rand.org). This repository is released as open-source software under an MIT + license. See the LICENSE.md file.
		Copyright (C) 2024 by The [RAND Corporation](https://www.rand.org). This repository is released as open-source software under a GPL-3 license. See the LICENSE.md file.