Merge cabp_install into GH-191/longleaf-batch-submission

HopkinsIDD · Oct 3, 2024 · 3ab547f · 3ab547f
2 parents 8af3698 + 814e607
commit 3ab547f
Show file tree

Hide file tree

Showing 20 changed files with 159 additions and 43 deletions.
diff --git a/README.md b/README.md
@@ -1,3 +1,19 @@
+
+# FlepiMoP
+
+The *Fle*xible *Epi*demic *Mo*deling *P*ipeline, `FlepiMoP`, makes it easy to build an infectious disease model, infer that model's parameters, and project scenario outcomes.
+
+# Quickstart
+
+```bash
+mkdir myflepimopworkspace && cd $_
+git clone [email protected]:HopkinsIDD/flepiMoP.git --depth 1
+./flepiMop/install_ubuntu.sh
+cp -r ./flepiMoP/examples/tutorial_two_subpops test_model && cd $_
+gempyor-simulate -c config_sample_2pop.yml
+flepimop-inference-main -c config_sample_2pop_inference.yml
+```
+
 # flepiMoP
 
 Welcome to the Johns Hopkins University Infectious Disease Dynamics COVID-19 Working Group's `Flexible Epidemic Modeling Pipeline`(“FlepiMoP”, formerly the COVID Scenario Pipeline, “CSP”), a flexible modeling framework that projects epidemic trajectories and healthcare impacts under different suites of interventions in order to aid in scenario planning. The model is generic enough to be applied to different spatial scales given shapefiles, population data, and COVID-19 confirmed case data. There are multiple components to the pipeline, which may be characterized as follows: 1) epidemic seeding; 2) disease transmission and non-pharmaceutical intervention scenarios; 3) calculation of health outcomes (hospital and ICU admissions and bed use, ventilator use, and deaths); and 4) summarization of model outputs.

diff --git a/batch/AWS_inference_runner.sh b/batch/AWS_inference_runner.sh
@@ -145,14 +145,14 @@ echo "---"
 find data
 echo "==="
 
-echo "***************** RUNNING inference_slot.R *****************"
+echo "***************** RUNNING flepimop-inference-slot *****************"
 
-Rscript flepiMoP/flepimop/main_scripts/inference_slot.R -p flepiMoP
+flepimop-inference-slot -p flepiMoP
 dvc_ret=$?
 if [ $dvc_ret -ne 0 ]; then
-        error_handler "Error code returned from inference_main.R: $dvc_ret"
+        error_handler "Error code returned from flepimop-inference-slot: $dvc_ret"
 fi
-echo "***************** DONE RUNNING inference_slot.R *****************"
+echo "***************** DONE RUNNING flepimop-inference-slot *****************"
 
 echo "***************** UPLOADING RESULT TO S3 *****************"
 for type in "seir" "hosp" "llik" "spar" "snpi" "hnpi" "hpar"

diff --git a/batch/SLURM_inference_job.run b/batch/SLURM_inference_job.run
@@ -136,9 +136,9 @@ fi
 ls -ltr model_output
 echo "***************** DONE FETCHING RESUME FILES *****************"
 
-echo "***************** RUNNING INFERENCE_MAIN.R *****************"
+echo "***************** RUNNING flepimop-inference-slot *****************"
 export LOG_FILE="$FS_RESULTS_PATH/log_${FLEPI_RUN_INDEX}_${FLEPI_SLOT_INDEX}.txt"
-echo "Rscript $FLEPI_PATH/flepimop/main_scripts/inference_slot.R --config $CONFIG_PATH   # path to the config file
+echo "flepimop-inference-slot --config $CONFIG_PATH   # path to the config file
                                                                  --run_id $FLEPI_RUN_INDEX  # Unique identifier for this run
                                                                  --seir_modifiers_scenarios $FLEPI_SEIR_SCENARIOS  # name of the intervention to run, or 'all'
                                                                  --outcome_modifiers_scenarios $FLEPI_OUTCOME_SCENARIOS  # name of the outcome scenarios to run, or 'all'
@@ -155,12 +155,12 @@ echo "Rscript $FLEPI_PATH/flepimop/main_scripts/inference_slot.R --config $CONFI
                                                                  --is-resume $RESUME_RUN # Is this run a resume
                                                                  --is-interactive FALSE # Is this run an interactive run" #> $LOG_FILE 2>&1 &
 
-Rscript $FLEPI_PATH/flepimop/main_scripts/inference_slot.R -p $FLEPI_PATH --config $CONFIG_PATH --run_id $FLEPI_RUN_INDEX --seir_modifiers_scenarios $FLEPI_SEIR_SCENARIOS --outcome_modifiers_scenarios $FLEPI_OUTCOME_SCENARIOS --jobs 1 --iterations_per_slot $FLEPI_ITERATIONS_PER_SLOT --this_slot $FLEPI_SLOT_INDEX --this_block 1 --stoch_traj_flag $FLEPI_STOCHASTIC_RUN --is-resume $RESUME_RUN --is-interactive FALSE #> $LOG_FILE 2>&1
+flepimop-inference-slot -p $FLEPI_PATH --config $CONFIG_PATH --run_id $FLEPI_RUN_INDEX --seir_modifiers_scenarios $FLEPI_SEIR_SCENARIOS --outcome_modifiers_scenarios $FLEPI_OUTCOME_SCENARIOS --jobs 1 --iterations_per_slot $FLEPI_ITERATIONS_PER_SLOT --this_slot $FLEPI_SLOT_INDEX --this_block 1 --stoch_traj_flag $FLEPI_STOCHASTIC_RUN --is-resume $RESUME_RUN --is-interactive FALSE #> $LOG_FILE 2>&1
 dvc_ret=$?
 if [[ $dvc_ret -ne 0 ]]; then
-        echo "Error code returned from inference_slot.R: $dvc_ret"
+        echo "Error code returned from flepimop-inference-slot: $dvc_ret"
 fi
-echo "***************** DONE RUNNING INFERENCE_SLOT.R *****************"
+echo "***************** DONE flepimop-inference-slot *****************"
 
 
 echo "***************** UPLOADING RESULT TO S3 (OR NOT) *****************"

diff --git a/build/setup.R b/build/setup.R
@@ -0,0 +1,33 @@
+#!/usr/bin/env Rscript
+
+.args <- commandArgs(trailingOnly = TRUE)
+
+if (length(.args) != 1) {
+  stop("Usage: setup.R <flepimop-path>")
+}
+
+# TODO sniff for upgrade mode
+
+if (!require(remotes)) {
+  install.packages("remotes", repos = c(getOption("repos"), "http://cran.r-project.org"));
+  stopifnot("Could not load `remotes` package." = require(remotes))
+}
+
+rpkgs <- list.files(file.path(.args[1], "flepimop", "R_packages"), full.names = TRUE)
+
+for (pkg in rpkgs) {
+  install.packages(pkg, repos = NULL, type = "source")
+}
+
+# other dependencies for analysis scripts
+for (pkg in c("ggfortify", "flextable", "optparse", "cowplot")) {
+  if (!requireNamespace(pkg, quietly = TRUE)) { 
+    install.packages(pkg, repos = c(getOption("repos"), "http://cran.r-project.org"));
+    if (!requireNamespace(pkg)) {
+       stop(sprintf("Could not install and/or load `%s` package.", pkg))
+    }
+  }
+}
+
+# install the R scripts as executables
+inference::install_cli()
diff --git a/documentation/gitbook/how-to-run/advanced-run-guides/quick-start-guide-conda.md b/documentation/gitbook/how-to-run/advanced-run-guides/quick-start-guide-conda.md
@@ -162,7 +162,7 @@ rm -r model_output/ # delete the outputs of past run if there are
 An inference run requires a configuration file that has an `inference` section. Stay in the `$DATA_PATH` folder, and run the inference script, providing the name of the configuration file you want to run (ex. `config.yml`). In the example data folder (flepimop\_sample), try out the example config XXX.
 
 ```bash
-Rscript  $FLEPI_PATH/flepimop/main_scripts/inference_main.R -c config.yml
+flepimop-inference-main.R -c config.yml
 ```
 
 This will run the model and create [a lot of output files](../../gempyor/output-files.md) in `$DATA_PATH/model_output/`.
@@ -179,14 +179,14 @@ The last few lines visible on the command prompt should be:
 
 If you want to quickly do runs with options different from those encoded in the configuration file, you can do that from the command line, for example
 
-```
-Rscript $FLEPI_PATH/flepimop/main_scripts/inference_main.R -j 1 -n 1 -k 1 -c config.yml
+```bash
+flepimop-inference-main -j 1 -n 1 -k 1 -c config.yml
 ```
 
 where:
 
 * `n` is the number of parallel inference slots,
-* `j` is the number of CPU cores to use on your machine (if `j` > `n`, only `n` cores will actually be used. If `j` <`n`, some cores will run multiple slots in sequence)
+* `j` is the number of CPU cores to use on your machine (if `j` > `n`, only `n` cores will actually be used. If `j` < `n`, some cores will run multiple slots in sequence)
 * `k` is the number of iterations per slots.
 
 #### Non-inference run
@@ -220,6 +220,6 @@ cd $DATA_PATH
 rm -rf model_output
 export CONFIG_PATH=config.yml # set your configuration file path
 
-Rscript $FLEPI_PATH/flepimop/main_scripts/inference_main.R.R -j 1 -n 1 -k 1
+flepimop-inference-main -j 1 -n 1 -k 1
 ```
 {% endcode %}
diff --git a/documentation/gitbook/how-to-run/advanced-run-guides/running-on-aws.md b/documentation/gitbook/how-to-run/advanced-run-guides/running-on-aws.md
@@ -199,7 +199,7 @@ Rscript $FLEPI_PATH/datasetup/build_flu_data.R
 Now you may want to test that it works :
 
 ```bash
-Rscript $FLEPI_PATH/flepimop/main_scripts/inference_main.R -c $CONFIG_PATH -j 1 -n 1 -k 1 
+flepimop-inference-main -c $CONFIG_PATH -j 1 -n 1 -k 1 
 ```
 
 If this fails, you may want to investigate this error. In case this succeeds, then you can proceed by first deleting the model\_output:

diff --git a/...mentation/gitbook/how-to-run/advanced-run-guides/running-with-docker-locally.md b/...mentation/gitbook/how-to-run/advanced-run-guides/running-with-docker-locally.md
@@ -146,7 +146,7 @@ rm -r model_output/ # delete the outputs of past run if there are
 An inference run requires a configuration file that has the `inference` section. Stay in the `$DATA_PATH` folder, and run the inference script, providing the name of the configuration file you want to run (ex. `config.yml`)&#x20;
 
 ```bash
-Rscript  $FLEPI_PATH/flepimop/main_scripts/inference_main.R -c config.yml
+flepimop-inference-main -c config.yml
 ```
 
 This will run the model and create a lot of output files in `$DATA_PATH/model_output/`.&#x20;
@@ -163,8 +163,8 @@ The last few lines visible on the command prompt should be:
 
 If you want to quickly do runs with options different from those encoded in the configuration file, you can do that from the command line, for example
 
-```
-Rscript $FLEPI_PATH/flepimop/main_scripts/inference_main.R -j 1 -n 1 -k 1 -c config.yml
+```bash
+flepimop-inference-main -j 1 -n 1 -k 1 -c config.yml
 ```
 
 where:
@@ -187,7 +187,7 @@ Rscript build/local_install.R
 pip install --no-deps -e flepimop/gempyor_pkg/
 cd $DATA_PATH
 rm -rf model_output
-Rscript $FLEPI_PATH/flepimop/main_scripts/inference_main.R -j 1 -n 1 -k 1 -c config.yml
+flepimop-inference-main -j 1 -n 1 -k 1 -c config.yml
 </code></pre>
 
 ### Non-inference run

diff --git a/documentation/gitbook/how-to-run/advanced-run-guides/slurm-submission-on-marcc.md b/documentation/gitbook/how-to-run/advanced-run-guides/slurm-submission-on-marcc.md
@@ -184,7 +184,7 @@ export CONFIG_PATH=config_example.yml # TO DO: ADD AN EXAMPLE
 You may want to test that it works before launching a full batch:
 
 ```bash
-Rscript $FLEPI_PATH/flepimop/main_scripts/inference_main.R -c $CONFIG_PATH -j 1 -n 1 -k 1 
+flepimop-inference-main -c $CONFIG_PATH -j 1 -n 1 -k 1 
 ```
 
 If this fails, you may want to investigate this error. In case this succeeds, then you can proceed (but remember to delete the existing model output).

diff --git a/documentation/gitbook/how-to-run/quick-start-guide.md b/documentation/gitbook/how-to-run/quick-start-guide.md
@@ -172,7 +172,7 @@ An inference run requires a configuration file that has the `inference` section.
 
 {% code overflow="wrap" %}
 ```bash
-Rscript  $FLEPI_PATH/flepimop/main_scripts/inference_main.R -c config_sample_2pop_inference.yml
+flepimop-inference-main.R -c config_sample_2pop_inference.yml
 ```
 {% endcode %}
 
@@ -190,8 +190,8 @@ The last few lines visible on the command prompt should be:
 
 If you want to quickly do runs with options different from those encoded in the configuration file, you can do that from the command line, for example
 
-```
-Rscript $FLEPI_PATH/flepimop/main_scripts/inference_main.R -j 1 -n 1 -k 1 -c config_inference.yml
+```bash
+flepimop-inference-main.R -j 1 -n 1 -k 1 -c config_inference.yml
 ```
 
 where:
@@ -204,15 +204,15 @@ Again, it is helpful to run the model output notebook (`model_output_notebook.Rm
 
 The first time you run all this, it's , it's better to run each command individually as described above to be sure each exits successfully. However, eventually you can **put all these steps together in a script**, like below
 
-```
+```bash
 export FLEPI_PATH=/Users/YourName/Github/flepiMoP
 export PROJECT_PATH=/Users/YourName/Github/flepiMoP_sample
 cd $FLEPI_PATH
 pip install --no-deps -e flepimop/gempyor_pkg/
 Rscript build/local_install.R
 cd $PROJECT_PATH
 rm -rf model_output
-Rscript $FLEPI_PATH/flepimop/main_scripts/inference_main.R -c config_inference.yml
+flepimop-inference-main -c config_inference.yml
 ```
 
 Note that you only have to re-run the installation steps once each time you update any of the files in the flepimop repository (either by pulling changes made by the developers and stored on Github, or by changing them yourself). If you're just running the same or different configuration file, just repeat the final steps

diff --git a/...tion/gitbook/jhu-internal/us-specific-how-to-run/running-with-docker-locally.md b/...tion/gitbook/jhu-internal/us-specific-how-to-run/running-with-docker-locally.md
@@ -99,7 +99,7 @@ Stay in `$DATA_PATH`, select a config, and build the setup. The setup creates th
 export CONFIG_PATH=config_SMH_R1_lowVac_optImm_2022.yml
 Rscript $FLEPI_PATH/datasetup/build_US_setup.R
 Rscript $FLEPI_PATH/datasetup/build_flu_data.R
-Rscript $FLEPI_PATH/flepimop/main_scripts/inference_main.R.R -j 1 -n 1 -k 1
+flepimop-inference-main -j 1 -n 1 -k 1
 ```
 
 where:

diff --git a/...tation/gitbook/jhu-internal/us-specific-how-to-run/slurm-submission-on-marcc.md b/...tation/gitbook/jhu-internal/us-specific-how-to-run/slurm-submission-on-marcc.md
@@ -248,7 +248,7 @@ export FLEPI_MEM_PROF_ITERS=50
 Now you may want to test that it works :
 
 ```bash
-Rscript $FLEPI_PATH/flepimop/main_scripts/inference_main.R -c $CONFIG_PATH -j 1 -n 1 -k 1 
+flepimop-inference-main -c $CONFIG_PATH -j 1 -n 1 -k 1 
 ```
 
 If this fails, you may want to investigate this error. In case this succeeds, then you can proceed by first deleting the model\_output:

diff --git a/flepimop/R_packages/inference/DESCRIPTION b/flepimop/R_packages/inference/DESCRIPTION
@@ -21,6 +21,6 @@ Imports:
   reticulate,
   truncnorm,
   arrow
-RoxygenNote: 7.2.3
+RoxygenNote: 7.3.2
 Suggests: 
     testthat
diff --git a/flepimop/R_packages/inference/NAMESPACE b/flepimop/R_packages/inference/NAMESPACE
@@ -15,6 +15,7 @@ export(getStats)
 export(get_ground_truth)
 export(get_ground_truth_file)
 export(initialize_mcmc_first_block)
+export(install_cli)
 export(iterateAccept)
 export(logLikStat)
 export(perform_MCMC_step_copies_chimeric)
@@ -25,6 +26,5 @@ export(perturb_hnpi_from_file)
 export(perturb_hpar)
 export(perturb_seeding)
 export(perturb_snpi)
-export(perturb_init)
 export(perturb_snpi_from_file)
 importFrom(magrittr,"%>%")
diff --git a/flepimop/R_packages/inference/R/install_cli.R b/flepimop/R_packages/inference/R/install_cli.R
@@ -0,0 +1,21 @@
+
+#' @title Install Inference Scripts
+#' 
+#' @description
+#' Installs the scripts for R-based FlepiMoP inference.
+#' 
+#' @param path The path to install the scripts to. Default is `usr/local/bin` (unix-like).
+#' 
+#' @param overwrite Whether to overwrite existing scripts. Default is `TRUE`. see [base::file.copy()].
+#' 
+#' @export
+install_cli <- function(
+  path = if (.Platform$OS.type == "unix") normalizePath(file.path("/usr", "local", "bin")) else stop("Unsupported OS")
+) {
+  scriptfiles <- list.files(
+    system.file("scripts", package = utils::packageName()), pattern = "flepimop-.*", full.names = TRUE
+  )
+  from <- scriptfiles
+  to <- file.path(path, gsub("\\.R$", "", basename(scriptfiles)))
+  file.symlink(from, to)
+}
diff --git a/flepimop/main_scripts/inference_main.R → ...ce/inst/scripts/flepimop-inference-main.R b/flepimop/main_scripts/inference_main.R → ...ce/inst/scripts/flepimop-inference-main.R
@@ -1,3 +1,5 @@
+#!/usr/bin/env Rscript
+
 # About ------------------------------------------------------------------------
 
 ## This script processes the options for an inference run and then creates a separate parallel processing job for each combination of SEIR parameter modification scenario, outcome parameter modification scenario, and independent MCMC chain ("slot")
@@ -11,8 +13,8 @@ suppressMessages(library(parallel))
 suppressMessages(library(doParallel))
 options(readr.num_columns = 0)
 
-# There are multiple ways to specify options when inference_main.R is run, which take the following precedence:
-#  1) (optional) options called along with the script at the command line (ie > Rscript inference_main.R -c my_config.yml)
+# There are multiple ways to specify options when flepimop-inference-main is run, which take the following precedence:
+#  1) (optional) options called along with the script at the command line (ie > flepimop-inference-main -c my_config.yml)
 #  2) (optional) environmental variables set by the user (ie user could set > export CONFIG_PATH = ~/flepimop_sample/my_config.yml to not have t specify it each time the script is run)
 # If neither are specified, then a default value is used, given by the second argument of Sys.getenv() commands below. 
 #  *3) For some options, a default doesn't exist, and the value specified in the config will be used if the option is not specified at the command line or by an environmental variable (iterations_per_slot, slots)
@@ -125,8 +127,7 @@ foreach(seir_modifiers_scenario = seir_modifiers_scenarios) %:%
 
     err <- system(
       paste(
-        opt$rpath,
-        file.path(opt$flepi_path, "flepimop", "main_scripts","inference_slot.R"),
+        "flepimop-inference-slot",
         "-c", opt$config,
         "-u", opt$run_id,
         "-s", opt$seir_modifiers_scenarios,

diff --git a/flepimop/main_scripts/inference_slot.R → ...ce/inst/scripts/flepimop-inference-slot.R b/flepimop/main_scripts/inference_slot.R → ...ce/inst/scripts/flepimop-inference-slot.R
@@ -1,6 +1,8 @@
+#!/usr/bin/env Rscript
+
 # About
 
-## This script runs a single slot (MCMC chain) of an inference run. It can be called directly, but is often called from inference_main.R if multiple slots are run.
+## This script runs a single slot (MCMC chain) of an inference run. It can be called directly, but is often called from flepimop-inference-main if multiple slots are run.
 
 # Run Options ---------------------------------------------------------------------
 
@@ -24,8 +26,8 @@ required_packages <- c("dplyr", "magrittr", "xts", "zoo", "stringr")
 #set.seed(1) # set within R
 #reticulate::py_run_string(paste0("rng_seed = ", 1)) #set within Python
 
-# There are multiple ways to specify options when inference_slot.R is run, which take the following precedence:
-#  1) (optional) options called along with the script at the command line (ie > Rscript inference_main.R -c my_config.yml)
+# There are multiple ways to specify options when flepimop-inference-slot is run, which take the following precedence:
+#  1) (optional) options called along with the script at the command line (ie > flepimop-inference-slot -c my_config.yml)
 #  2) (optional) environmental variables set by the user (ie user could set > export CONFIG_PATH = ~/flepimop_sample/my_config.yml to not have t specify it each time the script is run)
 # If neither are specified, then a default value is used, given by the second argument of Sys.getenv() commands below.
 #  *3) For some options, a default doesn't exist, and the value specified in the config will be used if the option is not specified at the command line or by an environmental variable (iterations_per_slot, slots)
@@ -451,7 +453,7 @@ for(seir_modifiers_scenario in seir_modifiers_scenarios) {
         autowrite_seir = TRUE
       )
     }, error = function(e) {
-      print("GempyorInference failed to run (call on l. 443 of inference_slot.R).")
+      print("GempyorInference failed to run (call on l. 443 of flepimop-inference-slot).")
       print("Here is all the debug information I could find:")
       for(m in reticulate::py_last_error()) print(m)
       stop("GempyorInference failed to run... stopping")
@@ -622,7 +624,7 @@ for(seir_modifiers_scenario in seir_modifiers_scenarios) {
           load_ID=TRUE,
           sim_id2load=this_index)
       }, error = function(e) {
-        print("GempyorInference failed to run (call on l. 575 of inference_slot.R).")
+        print("GempyorInference failed to run (call on l. 575 of flepimop-inference-sl).")
         print("Here is all the debug information I could find:")
         for(m in reticulate::py_last_error()) print(m)
         stop("GempyorInference failed to run... stopping")

diff --git a/flepimop/gempyor_pkg/docs/Rinterface.Rmd b/flepimop/gempyor_pkg/docs/Rinterface.Rmd
@@ -175,7 +175,7 @@ here if source_filters is [["age0to17"], ["OMICRON", "WILD"]], it means filter (
 ![while readable_graph look like this](./readable_graph.pdf){width=100% height=200}
 
 ## Simulate
-The code in this section is very much like what is present in inference_slot.R for the inference runs: it runs the full gempyor (parameters, npi, simulation, outcomes).
+The code in this section is very much like what is present in flepimop-inference-slot for the inference runs: it runs the full gempyor (parameters, npi, simulation, outcomes).
 These functions take as input a `seed` file with sim_id `sim_id2write`, and outputs files hpar, spar, snpi, hnpi, seir, hosp with sim_id `sim_id2write`. In case the flag load_ID is set as true, the function will also load from file the seeding, parameters and the NPIs (seed, hnpi, snpi, hpar, spar) from the simulation with sim_id `sim_id2load`. 
 
 Here we simulate once from config, then a second time using the same values as generated by the first run:
@@ -197,7 +197,7 @@ npi_outcome = gempyor_inference$get_outcome_npi(bypass_FN = 'model_output/hnpi/t
 
 
 ## Also...
-Additional methods are provided, one of which updates the prefix, which is done by e.g inference_slot.R to indicate the slot
+Additional methods are provided, one of which updates the prefix, which is done by e.g flepimop-inference-slot to indicate the slot
 ```{r}
 gempyor_inference$update_prefix(new_prefix="my new prefix")
 ```