implementing R6Experiment$run() method

RANDCorporation · Sep 14, 2023 · 9c2950a · 9c2950a
1 parent ea2eac3
commit 9c2950a
Show file tree

Hide file tree

Showing 9 changed files with 133 additions and 32 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -5,8 +5,8 @@ Version: 0.1.0
 Authors@R: 
     person("Pedro", "Nascimento de Lima", , "[email protected]", role = c("cre", "aut"),
            comment = c(ORCID = "0000-0001-9057-198X"))
-Description: Provides an encapsulated object-oriented programming (OOP)
-    framework for simulation modeling studies.
+Description: Provides an R6-based encapsulated object-oriented programming (OOP)
+    framework for simulation modeling studies in R.
 License: MIT + file LICENSE
 Imports: 
     assertthat,

diff --git a/R/R6Experiment.R b/R/R6Experiment.R
@@ -9,7 +9,7 @@
 #------------------------------------------------------------------------------#
 
 #------------------------------------------------------------------------------#
-# c19 experiment Class
+# R6Experiment Class
 # Purpose: The R6Experiment contains one or more models...
 #------------------------------------------------------------------------------#
 
@@ -25,7 +25,7 @@ R6Experiment <- R6::R6Class(
   # Use public to expose methods of this class:
   public = list(
 
-    #' @field models is a list containing c19model objects.
+    #' @field models is a list containing R6Sim objects.
     models = NULL,
 
     #' @field blocks number of population blocks for cases when we want to paralellize individual-level simulations.
@@ -43,7 +43,7 @@ R6Experiment <- R6::R6Class(
     #' @field params is a data.frame containing one row per parameter set defined in the params object of each model included in the experiment.
     params = NULL,
 
-    # Note from crcrdm: params design is the old natural history design, and policy design is the screening desing.
+    # Note: params design is the old natural history design, and policy design is the policy desing.
     #' @field params_design is a data.frame containing one row per parameter set defined in the params object of each model included in the experiment.
     params_design = NULL,
 
@@ -55,7 +55,7 @@ R6Experiment <- R6::R6Class(
 
     #' @description
     #' This function is used to initialize a `R6Experiment` object. This object represents an experiment that will be run and can encompass multiple models.
-    #' @param ... set of c19models to be included in the experiment. One `R6Experiment` can contain multiple models of the `c19model` class.
+    #' @param ... set of R6Sim to be included in the experiment. One `R6Experiment` can contain multiple models of the `c19model` class.
     #' @return a new `R6Experiment` object.
     initialize = function(...) {
       self$models <- list(...)
@@ -82,9 +82,9 @@ R6Experiment <- R6::R6Class(
     #' Set Experimental Design
     #'
     #' @details
-    #' Creates two data.frames that represent the experimental design" the `exp_design` for natural history experiments and the `screening_design` for screening experiments. These experimental designs are created based on the parameters defined by the set_parameter functions. The experimental design created by this function is useful to run a typical RDM analysis where each policy is evaluated across a LHS of deep uncertainties. To achieve that, define each policy lever as a grid parameter, and each uncertainty as an "lhs" uncertainty. Natural history uncertainties are often already defined in the model's posterior file and are also considered.
+    #' Creates two data.frames that represent the experimental design" the `exp_design` for natural history experiments and the `policy_design` for policy experiments. These experimental designs are created based on the parameters defined by the set_parameter functions. The experimental design created by this function is useful to run a typical RDM analysis where each policy is evaluated across a LHS of deep uncertainties. To achieve that, define each policy lever as a grid parameter, and each uncertainty as an "lhs" uncertainty. Natural history uncertainties are often already defined in the model's posterior file and are also considered.
     #' The natural history design will have `n_posterior` runs for each model in the experimental design.
-    #' The screening experimental design will have `blocks` \* `n_lhs` \* `n_grid_points` \* `n_posterior` for each model in the experimental design.
+    #' The policy experimental design will have `blocks` \* `n_lhs` \* `n_grid_points` \* `n_posterior` for each model in the experimental design.
     #'
     #' @param n_lhs The number of points in the Latin Hypercube Sample to be created.
     #' @param blocks is the number of population blocks to use to parallelize the runs across nodes.
@@ -103,7 +103,7 @@ R6Experiment <- R6::R6Class(
     #'
     #' @param path folder where json experimental designs should be saved. Do not specify a file name. If missing, the function will return the design specified below.
     #' @param write_inputs if TRUE (default), writes model inputs to json. Might be unnecessary when inputs are set in the model run script.
-    #' @param format "json" or "csv". the natural history design must be written to json, whereas the screening design can be written to json or csv.
+    #' @param format "json" or "csv". the natural history design must be written to json, whereas the policy design can be written to json or csv.
     write_design = function(path, write_inputs = T, format = c("json", "csv")) {
 
       # Checking arguments, selecting defaults:
@@ -160,8 +160,13 @@ R6Experiment <- R6::R6Class(
           row.names = F, col.names = F, append = F, sep = ","
         )
       }
+    },
+
+    run = function(n_cores = 3, parallel = T, cluster_eval_script) {
+      R6Experiment_run(self = self, n_cores = n_cores, parallel = parallel, cluster_eval_script = cluster_eval_script)
     }
   ),
+
   # Use private to hold data that will not be accessed by the user directly.
   private = list(
     # Private objects are not documented and exported as R6 fields:

diff --git a/R/R6Experiment_run.R b/R/R6Experiment_run.R
@@ -0,0 +1,96 @@
+
+
+#------------------------------------------------------------------------------#
+#
+# R6Sim: R6-based Simulation Modeling Toolkit
+#
+# Author: Pedro Nascimento de Lima
+# See LICENSE.txt and README.txt for information on usage and licensing
+#------------------------------------------------------------------------------#
+
+#------------------------------------------------------------------------------#
+# Run an R6Experiment
+# Purpose: Runs an R6Experiment in parallel, assuming each input assigned
+# maps to one model input
+#------------------------------------------------------------------------------#
+
+#' Runs R6Experiment in parallel
+#'
+#'
+#' This function is most useful to simulate the posterior distribution for a single model in parallel in one machine. This function is not used when calibrating the model and not useful for parallelization across multiple nodes.
+#'
+#' @param self experiment object
+#' @param n_cores number of cores to use
+#' @param parallel whether to evaluate run in parallel
+#' @param cluster_eval_script path to script that instantiates necessary functions. this will often mean sourcing functions external to the package and loading dependencies for the model. needed if parallel = T
+#'
+#' @return results data.frame from all simulations in parallel
+#'
+#' @import parallel
+#' @import doSNOW
+#' @import foreach
+#' @import progress
+R6Experiment_run <- function(self, n_cores, parallel, cluster_eval_script) {
+
+  # TODO: Continue here.
+  # browser()
+
+  if (parallel) {
+    # PNL note: Decide whether to use snow:: or parallel:: makeCluster
+
+    browser()
+
+    cl <- parallel::makeCluster()
+
+    cl <- snow::makeCluster(n_cores)
+    doSNOW::registerDoSNOW(cl)
+    #snow::clusterExport(cl, "cluster_eval_script")
+    #snow::clusterEvalQ(cl, source(cluster_eval_script))
+    snow::clusterEvalQ(cl, source("./R/scripts/cluster_eval.R"))
+  }
+
+  # progress bar ------------------------------------------------------------
+  # Progress bar setup adapted from:
+  # https://stackoverflow.com/questions/5423760/how-do-you-create-a-progress-bar-when-using-the-foreach-function-in-r
+  pb <- progress_bar$new(
+    format = "R6Sim: experiment = :experiment [:bar] :elapsed | eta: :eta",
+    total = nrow(self$policy_design), # 100
+    width = 80
+  )
+
+  # allowing progress bar to be used in foreach -----------------------------
+  progress <- function(n) {
+    pb$tick(tokens = list(experiment = n))
+  }
+
+  opts <- list(progress = progress)
+
+  # foreach loop ------------------------------------------------------------
+  results <- foreach(i = 1:nrow(self$policy_design), .combine = rbind, .options.snow = opts) %dopar% {
+
+    # Assign model inputs:
+    model <- self$models[[self$policy_design$model.id[i]]]
+
+    id_cols <- c("grid.id", "lhs.id", "params_design.id", "param.id", "model.id", "all.params.id", "policy.exp.id")
+
+    scenario_inputs <- self$policy_design[i,] %>%
+      select(-any_of(id_cols)) %>%
+      as.data.frame()
+
+    # Set each input
+    for(var in names(scenario_inputs)) {
+      model$set_input(var, scenario_inputs[,var])
+    }
+
+    res <- model$simulate()
+
+    return(cbind(self$policy_design[i,],res))
+
+  }
+
+  if (parallel) {
+    stopCluster(cl)
+  }
+
+  return(results)
+}
diff --git a/R/R6Experiment_set_design.R b/R/R6Experiment_set_design.R
@@ -145,14 +145,14 @@ R6Experiment_set_design <- function(self, n_lhs, blocks, grid_design_df, convert
   policy_design <- policy_design %>%
     left_join(grid_params, by = "grid.id") %>%
     left_join(lhs_experiments, by = "lhs.id") %>%
-    mutate(screening.exp.id = row_number())
+    mutate(policy.exp.id = row_number())
 
   # Save Experimental Design as a data.tables and json objects:
 
   # For the Natural history design:
   self$params_design <- data.table::as.data.table(params_design)
 
-  # For the Screening design
+  # For the policy design
   self$policy_design <- data.table::as.data.table(policy_design)
 
   invisible(self)

diff --git a/R/R6Sim.R b/R/R6Sim.R
@@ -161,8 +161,8 @@ R6Sim <- R6::R6Class(
     #' Runs the simulation model for a single parameter set.
     #'
     #' @details
-    #' This function is a wrapper around the user natural history function. It passes the `self` object and any other parameters provided to the function.
-    #' @param ... any set of parameters passed to this function will be passed along to the user natural history function.
+    #' The simulate method should be used to simulate a model run. All inputs used by the model should already have been defined before this function is called.
+    #' @param ... any set of parameters passed to this function will be passed along to the model simulate function.
     simulate = function(...) {
       stop("Simulate method must be implemented by your class.")
     },
@@ -174,7 +174,7 @@ R6Sim <- R6::R6Class(
     #' This function is a wrapper around the user natural history function. It passes the `self` object and any other parameters provided to the function.
     #' @param ... any set of parameters passed to this function will be passed along to the user natural history function.
     setup_run = function(...) {
-      stop("Simulate method must be implemented by your class.")
+      stop("Setup_run method must be implemented by your class.")
     },
 
     #' @description
@@ -260,10 +260,10 @@ R6Sim <- R6::R6Class(
     #' @param parallel if T, the model will run in parallel
     #' @param n_cores number of CPUs to use in the simulation. defaults to detectCores() - 2
     #' be passed along to the user natural history function.
-    simulate_posterior = function(parallel = T, n_cores = detectCores() - 2) {
+    simulate_posterior = function(parallel = T, n_cores = detectCores() - 2, cluster_eval_script) {
 
       # Simulate posterior distribution for a specific model
-      R6Sim_simulate_posterior(self = self, parallel = parallel, n_cores = n_cores)
+      R6Sim_simulate_posterior(self = self, parallel = parallel, n_cores = n_cores, cluster_eval_script = cluster_eval_script)
     }
   ),
 

diff --git a/R/R6Sim_json.R b/R/R6Sim_json.R
@@ -49,7 +49,7 @@ R6Sim_to_json <- function(self, private, types) {
   return(json_model)
 }
 
-# Converts a JSON Object to a crcspin model object
+# Converts a JSON Object to a model object
 R6Sim_set_inputs_from_json <- function(self, json) {
   json_list <- jsonlite::unserializeJSON(json)
 

diff --git a/R/R6Sim_set_param_dist.R b/R/R6Sim_set_param_dist.R
@@ -2,7 +2,7 @@
 
 #------------------------------------------------------------------------------#
 #
-# CRCRDM: Robust Decision Making tools for Colorectal Cancer Screening Models
+# R6Sim: R6-based Simulation Modeling Toolkit
 #
 # Author: Pedro Nascimento de Lima
 # See LICENSE.txt and README.txt for information on usage and licensing

diff --git a/README.Rmd b/README.Rmd
@@ -19,7 +19,9 @@ knitr::opts_chunk$set(
 [![R-CMD-check](https://github.com/RANDCorporation/R6Sim/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/RANDCorporation/R6Sim/actions/workflows/R-CMD-check.yaml)
 <!-- badges: end -->
 
-`R6Sim` provides an `R6`-based framework for simulation modeling in R for those who would like the encapsulated object-oriented programming approach provided by `R6`. This package allows one to create a model that inherits`R6Sim`, and specify experimental designs using multiple models.
+`R6Sim` provides an `R6` base class for simulation modeling for those who appreciate the structure of encapsulated object-oriented programming provided by `R6`. This package allows one to create a model that inherits `R6Sim`, and specify experimental designs that potentially use multiple models using the `R6Experiment` class.
+
+We use `R6Sim` across multiple projects, and the purpose of releasing it publicly is to facilitate reproducibility and enhance the quality, agility and reliability of code used in simulation-based policy research projects.
 
 ## Installation
 
@@ -30,8 +32,6 @@ You can install the development version of `R6Sim` like so:
 # install.packages("remotes")
 # install from github:
 remotes::install_github("randcorporation/r6sim")
-# or install from CRAN (in the near future)
-install.packages("R6Sim")
 ```
 
 ## Contact

diff --git a/tests/testthat/test-test_R6Sim.R b/tests/testthat/test-test_R6Sim.R
@@ -35,7 +35,7 @@ Mymodel <- R6::R6Class(
 
 # Creating a model object -------------------------------------------------
 
-# Creates a CRC model object and gives it a name.
+# Creates a model object and gives it a name.
 model = Mymodel$new(name = "test")
 
 test_that("R6Sim was created", {
@@ -49,9 +49,9 @@ model$
   set_input(name = "risk.mean",value = 0.15,type = "natural_history")$
   set_input(name = "risk.sd",value = 0.02,type = "natural_history")$
   set_input(name = "trials",value = 10,type = "natural_history")$
-  set_input(name = "strategy.id",value = 1,type = "screening")$
-  set_input(name = "some_date",value = "2020-01-01",type = "screening")$
-  set_input(name = "det.ratios",value = seq.default(from = 0, to = 1, length.out = 101),type = "screening")
+  set_input(name = "strategy.id",value = 1,type = "policy")$
+  set_input(name = "some_date",value = "2020-01-01",type = "policy")$
+  set_input(name = "det.ratios",value = seq.default(from = 0, to = 1, length.out = 101),type = "policy")
 
 # Setting an input twice:
 model$set_input(name = "risk.sd",value = 0.02,type = "natural_history")
@@ -210,19 +210,19 @@ test_that("R6Experiment works with pre-existing design", {
 # test_that("write_design can write to a file", {
 #   experiment$write_design(path = "./json-test")
 #
-#   expect_true(file.exists("./json-test/screening_design.txt"))
+#   expect_true(file.exists("./json-test/policy_design.txt"))
 #   expect_true(file.exists("./json-test/nh_design.txt"))
-#   file.remove("./json-test/screening_design.txt")
+#   file.remove("./json-test/policy_design.txt")
 #   file.remove("./json-test/nh_design.txt")
 #   file.remove("./json-test/")
 # })
 #
 # test_that("write_design also can write to csv", {
-#   experiment$write_design(path = "./json-test", design = "screening", format = "csv")
-#   expect_true(file.exists("./json-test/screening_design.txt"))
-#   expect_true(file.exists("./json-test/screening_design_col_names.txt"))
-#   file.remove("./json-test/screening_design.txt")
-#   file.remove("./json-test/screening_design_col_names.txt")
+#   experiment$write_design(path = "./json-test", design = "policy", format = "csv")
+#   expect_true(file.exists("./json-test/policy_design.txt"))
+#   expect_true(file.exists("./json-test/policy_design_col_names.txt"))
+#   file.remove("./json-test/policy_design.txt")
+#   file.remove("./json-test/policy_design_col_names.txt")
 #   file.remove("./json-test/")
 # })
 #