Skip to content

Commit

Permalink
adding vignette, removing snow
Browse files Browse the repository at this point in the history
  • Loading branch information
pedroliman committed Apr 2, 2024
1 parent 0171ff6 commit e423686
Show file tree
Hide file tree
Showing 14 changed files with 250 additions and 265 deletions.
10 changes: 3 additions & 7 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,20 @@ Version: 1.0.0
Authors@R:
person("Pedro", "Nascimento de Lima", , "[email protected]", role = c("cre", "aut"),
comment = c(ORCID = "0000-0001-9057-198X"))
Description: Provides an R6-based encapsulated object-oriented programming (OOP)
Description: Provides an R6-based encapsulated object-oriented programming
framework for simulation modeling studies in R.
License: GPL (>= 3)
Imports:
assertthat,
data.table,
doSNOW,
dplyr,
foreach,
imabc,
jsonlite,
lhs,
magrittr,
parallel,
progress,
purrr,
R6,
readxl,
snow,
stats,
tidyr,
utils,
Expand All @@ -32,8 +27,9 @@ Suggests:
knitr,
rmarkdown,
testthat (>= 3.0.0)
VignetteBuilder:
knitr
Config/testthat/edition: 3
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.2.3
VignetteBuilder: knitr
6 changes: 0 additions & 6 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,11 @@ export(R6Sim)
export(is.R6Experiment)
export(is.R6Sim)
import(R6)
import(doSNOW)
import(dplyr)
import(foreach)
import(parallel)
import(progress)
import(tidyr)
importFrom(data.table,as.data.table)
importFrom(data.table,data.table)
importFrom(imabc,as.priors)
importFrom(imabc,as.targets)
importFrom(imabc,imabc)
importFrom(lhs,randomLHS)
importFrom(magrittr,"%>%")
importFrom(readxl,excel_sheets)
Expand Down
17 changes: 15 additions & 2 deletions R/R6Experiment.R
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,21 @@ R6Experiment <- R6::R6Class(
)
}
},
run = function(n_cores = 3, parallel = F, cluster_eval_script, model_from_cluster_eval = T) {
R6Experiment_run(self = self, n_cores = n_cores, parallel = parallel, cluster_eval_script = cluster_eval_script, model_from_cluster_eval = model_from_cluster_eval)

#' Runs R6Experiment in parallel
#'
#'
#' This function is most useful to simulate the posterior distribution for a single model in parallel in one machine. This function is not used when calibrating the model and not useful for parallelization across multiple nodes.
#'
#' @param self experiment object
#' @param n_cores number of cores to use
#' @param parallel whether to evaluate run in parallel
#' @param cluster_eval_script path to script that instantiates necessary functions. this will often mean sourcing functions external to the package and loading dependencies for the model. needed if parallel = T
#' @param model_from_cluster_eval T if model is instantiated in the cluter eval scripts, F otherwise. Use T if using models that need compilation (like odin) and F otherwise.
#' @param ... additional parameters to be passed down to the `model$simulate()` function
#' @return results data.frame from all simulations in parallel
run = function(n_cores = 3, parallel = F, cluster_eval_script, model_from_cluster_eval = T, ...) {
R6Experiment_run(self = self, n_cores = n_cores, parallel = parallel, cluster_eval_script = cluster_eval_script, model_from_cluster_eval = model_from_cluster_eval, ...)
}
),

Expand Down
86 changes: 47 additions & 39 deletions R/R6Experiment_run.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,73 +36,81 @@
#' @param parallel whether to evaluate run in parallel
#' @param cluster_eval_script path to script that instantiates necessary functions. this will often mean sourcing functions external to the package and loading dependencies for the model. needed if parallel = T
#' @param model_from_cluster_eval T if model is instantiated in the cluter eval scripts, F otherwise. Use T if using models that need compilation (like odin) and F otherwise.
#' @param ... additional parameters to be passed to the model simulation function.
#' @return results data.frame from all simulations in parallel
#'
#' @import parallel
#' @import doSNOW
#' @import foreach
#' @import progress
R6Experiment_run <- function(self, n_cores, parallel, cluster_eval_script, model_from_cluster_eval) {

R6Experiment_run <- function(self, n_cores, parallel, cluster_eval_script, model_from_cluster_eval, ...) {

# If we are running the experiment in parallel, we need to instantiate the models explicitly within each "node".
# if the model is self-contained, that should be enough.
if (parallel) {

# Make cluster and evaluate the cluster eval script.
cl <- snow::makeCluster(n_cores)
doSNOW::registerDoSNOW(cl)
snow::clusterExport(cl, "cluster_eval_script", envir = environment())
snow::clusterEvalQ(cl, source(cluster_eval_script))
# With parallel
cl <- parallel::makeCluster(n_cores)
parallel::clusterExport(cl, "cluster_eval_script", envir = environment())
parallel::clusterEvalQ(cl, source(cluster_eval_script))

}

# progress bar ------------------------------------------------------------
# Progress bar setup adapted from:
# https://stackoverflow.com/questions/5423760/how-do-you-create-a-progress-bar-when-using-the-foreach-function-in-r
pb <- progress_bar$new(
format = "R6Sim: experiment = :experiment [:bar] :elapsed | eta: :eta",
total = nrow(self$policy_design), # 100
width = 80
)
# pb <- progress_bar$new(
# format = "R6Sim: experiment = :experiment [:bar] :elapsed | eta: :eta",
# total = nrow(self$policy_design), # 100
# width = 80
# )

# allowing progress bar to be used in foreach -----------------------------
progress <- function(n) {
pb$tick(tokens = list(experiment = n))
}
# progress <- function(n) {
# pb$tick(tokens = list(experiment = n))
# }

opts <- list(progress = progress)
# opts <- list(progress = progress)

# foreach loop ------------------------------------------------------------
results <- foreach(i = 1:nrow(self$policy_design), .combine = rbind, .options.snow = opts) %dopar% {
if (!model_from_cluster_eval) {
model <- self$models[[self$policy_design$model.id[i]]]
} else {
stopifnot("cluster_experiment object not defined. Create an R6Experiment object called cluster_experiment in your cluster_eval_script file, containing the models used in this analysis." = exists("cluster_experiment"))

stopifnot("cluster_experiment object is not an R6Experiment. Make sure to use R6Experiment to create the cluster_experiment object." = is.R6Experiment(cluster_experiment))
if (parallel) {

model <- cluster_experiment$models[[self$policy_design$model.id[i]]]
}
# Using parLapply and not foreach due to licensing issues.
results <- parLapply(cl = cl, X = 1:nrow(self$policy_design), fun = run_single_experiment, model_from_cluster_eval = model_from_cluster_eval, self = self, parallel = parallel, ...)

id_cols <- c("grid.id", "lhs.id", "params_design.id", "param.id", "model.id", "all.params.id", "policy.exp.id")
parallel::stopCluster(cl)
} else {
results <- lapply(X = 1:nrow(self$policy_design), FUN = run_single_experiment, model_from_cluster_eval = model_from_cluster_eval, self = self, parallel = parallel, ...)
}

return(do.call(rbind, results))
}

scenario_inputs <- self$policy_design[i, ] %>%
select(-any_of(id_cols)) %>%
as.data.frame()
run_single_experiment <- function(policy_design_id, self, model_from_cluster_eval, parallel, ...) {

# Set each input
for (var in names(scenario_inputs)) {
model$set_input(var, scenario_inputs[, var])
}
if (!model_from_cluster_eval | !parallel) {
model <- self$models[[self$policy_design$model.id[policy_design_id]]]
} else {
stopifnot("cluster_experiment object not defined. Create an R6Experiment object called cluster_experiment in your cluster_eval_script file, containing the models used in this analysis." = exists("cluster_experiment"))

res <- model$simulate()
stopifnot("cluster_experiment object is not an R6Experiment. Make sure to use R6Experiment to create the cluster_experiment object." = is.R6Experiment(cluster_experiment))

return(cbind(self$policy_design[i, ], res))
model <- cluster_experiment$models[[self$policy_design$model.id[policy_design_id]]]
}

if (parallel) {
snow::stopCluster(cl)
id_cols <- c("grid.id", "lhs.id", "params_design.id", "param.id", "model.id", "all.params.id", "policy.exp.id")

scenario_inputs <- self$policy_design[policy_design_id, ] %>%
select(-any_of(id_cols)) %>%
as.data.frame()

# Set each input
for (var in names(scenario_inputs)) {
model$set_input(var, scenario_inputs[, var])
}

return(results)
res <- model$simulate(...)

return(cbind(self$policy_design[policy_design_id, ], res))

}

92 changes: 2 additions & 90 deletions R/R6Sim.R
Original file line number Diff line number Diff line change
Expand Up @@ -186,97 +186,9 @@ R6Sim <- R6::R6Class(
#' @param ... any set of parameters passed to this function will be passed along to the user natural history function.
setup_run = function(...) {
stop("Setup_run method must be implemented by your class.")
},

#' @description
#' make priors for calibration (using the IMABC format)
#'
#' return an imabc priors object
#'
#' @param priors_file csv file name where priors should be saved, if any
make_priors = function(priors_file) {
priors <- self$inputs$priors %>%
# clean up fixed parameters
mutate(
sd = ifelse(dist_base_name == "fixed", NA, sd),
min = ifelse(dist_base_name == "fixed", NA, min),
max = ifelse(dist_base_name == "fixed", NA, max),
a = ifelse(dist_base_name == "fixed", NA, a),
b = ifelse(dist_base_name == "fixed", NA, b)
) %>%
# clean up uniform parameters:
mutate(mean = ifelse(dist_base_name == "unif", NA, mean)) %>%
# add a and b parameters for the truncated normal:
mutate(a = ifelse(dist_base_name == "truncnorm", min, NA)) %>%
mutate(b = ifelse(dist_base_name == "truncnorm", max, NA)) %>%
# clean up additional columns:
select(-any_of(c("ideal_dist_base_name", "reported_prior"))) %>%
imabc::as.priors()

if (!missing(priors_file)) {
write.csv(as.data.frame(priors), file = priors_file)
}

return(priors)
},

#' @description
#' make targets for calibration
#'
#' return an imabc targets object
#'
#' @param targets_file csv file name where targets should be saved, if any
#' @importFrom imabc as.priors
#' @importFrom imabc as.targets
#' @importFrom imabc imabc
make_targets = function(targets_file) {

# this name can't change.
time_variable <- "week"
target_variables <- names(self$inputs$t_tol)[-c(1, 2)]

targets_df <- get_long_target_df(self$inputs$t_series, target_variables)

tolerances_df <- get_long_target_df(self$inputs$t_tol, target_variables)

init_tolerances_df <- get_long_target_df(self$inputs$t_ini_tol, target_variables)

# create imabc target object:
imabc_targets <- data.frame(
target_names = targets_df$target_names,
target_groups = targets_df$target_groups,
targets = targets_df$value,
current_lower_bounds = targets_df$value - init_tolerances_df$value,
current_upper_bounds = targets_df$value + init_tolerances_df$value,
stopping_lower_bounds = targets_df$value - tolerances_df$value,
stopping_upper_bounds = targets_df$value + tolerances_df$value
) %>%
imabc::as.targets(.)

# Write csv data for imabc:
if (!missing(targets_file)) {
write.csv(x = as.data.frame(imabc_targets), file = targets_file, row.names = F)
}

return(imabc_targets)
},

#' @description
#' Simulate Model Posterior Distribution of parameters
#'
#' @details
#' This function is used to simulate the model over the posterior distribution
#' Before running this function, the user should run the set_posterior.
#' @seealso simulate
#' @param parallel if T, the model will run in parallel
#' @param n_cores number of CPUs to use in the simulation. defaults to detectCores() - 2
#' be passed along to the user natural history function.
simulate_posterior = function(parallel = T, n_cores = detectCores() - 2, cluster_eval_script) {

# Simulate posterior distribution for a specific model
R6Sim_simulate_posterior(self = self, parallel = parallel, n_cores = n_cores, cluster_eval_script = cluster_eval_script)
}
),

),

# Use private to hold data that will not be accessed by the user directly.
private = list(
Expand Down
6 changes: 3 additions & 3 deletions R/R6Sim_set_param_dist.R
Original file line number Diff line number Diff line change
Expand Up @@ -150,10 +150,10 @@ calculate_weighted_averages <- function(df, param_dist_weights) {
# Calculate the weighted average for every numeric variable:
df %>%
# Multiply value by the normalized weights:
mutate(across(where(is.numeric) & !c(.data$param_dist.df.id, .data$param_dist.df.name), ~ .x * df$normalized_weights)) %>%
select(-.data$normalized_weights) %>%
mutate(across(where(is.numeric) & !c(param_dist.df.id, param_dist.df.name), ~ .x * df$normalized_weights)) %>%
select(-normalized_weights) %>%
# Add them up:
group_by(.data$param_dist.df.id, .data$param_dist.df.name) %>%
group_by(param_dist.df.id, param_dist.df.name) %>%
summarise(across(where(is.numeric), sum), .groups = "drop") %>%
as.data.frame()
}
2 changes: 1 addition & 1 deletion README.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,4 @@ Reach out to [Pedro Nascimento de Lima](https://www.rand.org/about/people/l/lima

## License

Copyright (C) 2023 by The [RAND Corporation](https://www.rand.org). This repository is released as open-source software under an MIT + license. See the LICENSE.md file.
Copyright (C) 2024 by The [RAND Corporation](https://www.rand.org). This repository is released as open-source software under a GPL-3 license. See the LICENSE.md file.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,6 @@ for questions related to this repository.

## License

Copyright (C) 2023 by The [RAND Corporation](https://www.rand.org). This
repository is released as open-source software under an MIT + license.
Copyright (C) 2024 by The [RAND Corporation](https://www.rand.org). This
repository is released as open-source software under a GPL-3 license.
See the LICENSE.md file.
Loading

0 comments on commit e423686

Please sign in to comment.