diff --git a/NAMESPACE b/NAMESPACE index 4ca3f7e2..3093718a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -26,7 +26,6 @@ importFrom(data.table,setorder) importFrom(glmnet,cv.glmnet) importFrom(glmnet,glmnet) importFrom(methods,is) -importFrom(origami,cross_validate) importFrom(origami,folds2foldvec) importFrom(origami,make_folds) importFrom(stats,aggregate) @@ -36,7 +35,6 @@ importFrom(stats,median) importFrom(stats,plogis) importFrom(stats,predict) importFrom(stats,quantile) -importFrom(stats,sd) importFrom(stringr,str_detect) importFrom(stringr,str_extract) importFrom(stringr,str_match) diff --git a/R/cv_lasso.R b/R/cv_lasso.R deleted file mode 100644 index a7fece86..00000000 --- a/R/cv_lasso.R +++ /dev/null @@ -1,72 +0,0 @@ -#' Cross-validated Lasso on Indicator Bases -#' -#' Fits Lasso regression using a customized procedure, with cross-validation -#' based on \pkg{origami} -#' -#' @param x_basis A \code{dgCMatrix} object corresponding to a sparse matrix of -#' the basis functions generated for the HAL algorithm. -#' @param y A \code{numeric} vector of the observed outcome variable values. -#' @param n_lambda A \code{numeric} scalar indicating the number of values of -#' the L1 regularization parameter (lambda) to be obtained from fitting the -#' Lasso to the full data. Cross-validation is used to select an optimal -#' lambda (that minimizes the risk) from among these. -#' @param n_folds A \code{numeric} scalar for the number of folds to be used in -#' the cross-validation procedure to select an optimal value of lambda. -#' @param center binary. If \code{TRUE}, covariates are centered. This is much -#' slower, but matches the \code{glmnet} implementation. Default \code{FALSE}. -#' -#' @importFrom origami make_folds cross_validate -#' @importFrom stats sd -cv_lasso <- function(x_basis, y, n_lambda = 100, n_folds = 10, - center = FALSE) { - # first, need to run lasso on the full data to get a sequence of lambdas - lasso_init <- lassi(y = y, x = x_basis, nlambda = n_lambda) - lambdas_init <- lasso_init$lambdas - - # next, set up a cross-validated lasso using the sequence of lambdas - full_data_mat <- cbind(y, x_basis) - folds <- origami::make_folds(full_data_mat, V = n_folds) - - # run the cross-validated lasso procedure to find the optimal lambda - cv_lasso_out <- origami::cross_validate( - cv_fun = lassi_origami, - folds = folds, - data = full_data_mat, - lambdas = lambdas_init, - center = center - ) - - # compute cv-mean of MSEs for each lambda - lambdas_cvmse <- colMeans(cv_lasso_out$mses) - - # find the lambda that minimizes the MSE - lambda_optim_index <- which.min(lambdas_cvmse) - lambda_minmse <- lambdas_init[lambda_optim_index] - - # also need the adjusted CV standard deviation for each lambda - lambdas_cvsd <- apply(cv_lasso_out$mses, 2, sd) / sqrt(n_folds) - - # find the maximum lambda among those 1 standard error above the minimum - lambda_min_1se <- (lambdas_cvmse + lambdas_cvsd)[lambda_optim_index - 1] - lambda_1se <- max(lambdas_init[lambdas_cvmse <= lambda_min_1se], - na.rm = TRUE - ) - lambda_1se_index <- which.min(abs(lambdas_init - lambda_1se)) - - # create output object - get_lambda_indices <- c(lambda_1se_index, lambda_optim_index) - betas_out <- lasso_init$beta_mat[, get_lambda_indices] - colnames(betas_out) <- c("lambda_1se", "lambda_min") - - # add in intercept term to coefs matrix and convert to sparse matrix output - betas_out <- rbind(rep(mean(y), ncol(betas_out)), betas_out) - betas_out <- as_dgCMatrix(betas_out * 1.0) - - # create output object - cv_lasso_out <- list(betas_out, lambda_minmse, lambda_1se, lambdas_cvmse) - names(cv_lasso_out) <- c( - "betas_mat", "lambda_min", "lambda_1se", - "lambdas_cvmse" - ) - return(cv_lasso_out) -} diff --git a/R/formula_hal9001.R b/R/formula_hal9001.R index 9d8e3d68..fe3b85a2 100644 --- a/R/formula_hal9001.R +++ b/R/formula_hal9001.R @@ -253,7 +253,7 @@ print.formula_hal9001 <- function(x, ...) { #' #' @param var_names A \code{character} vector of variable names representing a single type of interaction -#" (e.g. var_names = c("W1", "W2", "W3") encodes three way interactions between W1, W2 and W3. +# " (e.g. var_names = c("W1", "W2", "W3") encodes three way interactions between W1, W2 and W3. #' var_names may include the wildcard variable "." in which case the argument `.` must be specified #' so that all interactions matching the form of var_names are generated. #' @param . Specification of variables for use in the formula. @@ -276,7 +276,7 @@ fill_dots <- function(var_names, .) { return(out) }) is_nested <- is.list(all_items[[1]]) - while(is_nested) { + while (is_nested) { all_items <- unlist(all_items, recursive = FALSE) is_nested <- is.list(all_items[[1]]) } @@ -292,4 +292,3 @@ fill_dots <- function(var_names, .) { return(unique(all_items)) } - diff --git a/R/hal.R b/R/hal.R index e3fdedbf..e73d6d90 100644 --- a/R/hal.R +++ b/R/hal.R @@ -264,8 +264,8 @@ fit_hal <- function(X, ) } - if(!is.character(fit_control$prediction_bounds)){ - if(fam == "mgaussian"){ + if (!is.character(fit_control$prediction_bounds)) { + if (fam == "mgaussian") { assertthat::assert_that( is.list(fit_control$prediction_bounds) & length(fit_control$prediction_bounds) == ncol(Y), @@ -342,14 +342,14 @@ fit_hal <- function(X, # NOTE: keep only basis functions with some (or higher) proportion of 1's if (all(smoothness_orders == 0)) { - if(is.null(reduce_basis)){ + if (is.null(reduce_basis)) { reduce_basis <- 1 / sqrt(n_Y) } reduced_basis_map <- make_reduced_basis_map(x_basis, reduce_basis) x_basis <- x_basis[, reduced_basis_map] basis_list <- basis_list[reduced_basis_map] } else { - if(!is.null(reduce_basis)){ + if (!is.null(reduce_basis)) { warning("Dropping reduce_basis; only applies if smoothness_orders = 0") } } @@ -467,9 +467,9 @@ fit_hal <- function(X, # Bounds for prediction on new data (to prevent extrapolation for linear HAL) if (is.character(fit_control$prediction_bounds) && fit_control$prediction_bounds == "default") { - if(fam == "mgaussian") { - fit_control$prediction_bounds <- lapply(seq(ncol(Y)), function(i){ - c(min(Y[,i]) - 2 * stats::sd(Y[,i]), max(Y[,i]) + 2 * stats::sd(Y[,i])) + if (fam == "mgaussian") { + fit_control$prediction_bounds <- lapply(seq(ncol(Y)), function(i) { + c(min(Y[, i]) - 2 * stats::sd(Y[, i]), max(Y[, i]) + 2 * stats::sd(Y[, i])) }) } else if (fam == "cox") { fit_control$prediction_bounds <- NULL diff --git a/R/predict.R b/R/predict.R index d9a12a32..41895867 100644 --- a/R/predict.R +++ b/R/predict.R @@ -41,7 +41,6 @@ predict.hal9001 <- function(object, offset = NULL, type = c("response", "link"), ...) { - family <- ifelse(inherits(object$family, "family"), object$family$family, object$family) type <- match.arg(type) @@ -80,25 +79,28 @@ predict.hal9001 <- function(object, # generate predictions if (!family %in% c("cox", "mgaussian")) { if (ncol(object$coefs) > 1) { - preds <- pred_x_basis%*%object$coefs[-1,]+ - matrix(object$coefs[1,], nrow=nrow(pred_x_basis), - ncol=ncol(object$coefs), byrow = TRUE) + preds <- pred_x_basis %*% object$coefs[-1, ] + + matrix(object$coefs[1, ], + nrow = nrow(pred_x_basis), + ncol = ncol(object$coefs), byrow = TRUE + ) } else { preds <- as.vector(Matrix::tcrossprod( x = pred_x_basis, - y = matrix(object$coefs[-1],nrow=1) + y = matrix(object$coefs[-1], nrow = 1) ) + object$coefs[1]) } } else { - if(family == "cox") { + if (family == "cox") { # Note: there is no intercept in the Cox model (built into the baseline # hazard and would cancel in the partial likelihood). # Note: there is no intercept in the Cox model (built into the baseline # hazard and would cancel in the partial likelihood). - preds <- pred_x_basis%*%object$coefs + preds <- pred_x_basis %*% object$coefs } else if (family == "mgaussian") { preds <- stats::predict( - object$lasso_fit, newx = pred_x_basis, s = object$lambda_star + object$lasso_fit, + newx = pred_x_basis, s = object$lambda_star ) } } @@ -123,13 +125,13 @@ predict.hal9001 <- function(object, transform <- stats::plogis } else if (family %in% c("poisson", "cox")) { transform <- exp - } else if(family%in%c("gaussian","mgaussian")){ + } else if (family %in% c("gaussian", "mgaussian")) { transform <- identity - } else{ + } else { stop("unsupported family") } - - if(length(ncol(preds))){ + + if (length(ncol(preds))) { # apply along only the first dimension (to handle n-d arrays) margin <- seq(length(dim(preds)))[-1] preds <- apply(preds, margin, transform) @@ -141,10 +143,10 @@ predict.hal9001 <- function(object, # bound predictions within observed outcome bounds if on response scale if (!is.null(object$prediction_bounds)) { bounds <- object$prediction_bounds - if(family == "mgaussian") { - preds <- do.call(cbind, lapply(seq(ncol(preds)), function(i){ + if (family == "mgaussian") { + preds <- do.call(cbind, lapply(seq(ncol(preds)), function(i) { bounds_y <- sort(bounds[[i]]) - preds_y <- preds[,i,] + preds_y <- preds[, i, ] preds_y <- pmax(bounds_y[1], preds_y) preds_y <- pmin(preds_y, bounds_y[2]) return(preds_y) diff --git a/R/sl_hal9001.R b/R/sl_hal9001.R index 3bf5662c..0fe18b08 100644 --- a/R/sl_hal9001.R +++ b/R/sl_hal9001.R @@ -40,7 +40,6 @@ SL.hal9001 <- function(Y, smoothness_orders = 1, num_knots = 5, ...) { - # create matrix version of X and newX for use with hal9001::fit_hal if (!is.matrix(X)) X <- as.matrix(X) if (!is.null(newX) & !is.matrix(newX)) newX <- as.matrix(newX) diff --git a/R/summary.R b/R/summary.R index b8c12975..5193e7cd 100644 --- a/R/summary.R +++ b/R/summary.R @@ -178,7 +178,7 @@ summary.hal9001 <- function(object, } else { # add col indicating whether or not there is a duplicate coef_summ[, dup := (duplicated(dups_tbl) | - duplicated(dups_tbl, fromLast = TRUE))] + duplicated(dups_tbl, fromLast = TRUE))] # if basis_list_idx contains redundant duplicates, remove them redundant_dups <- coef_summ[dup == TRUE, "basis_list_idx"] diff --git a/man/cv_lasso.Rd b/man/cv_lasso.Rd deleted file mode 100644 index 26aa3dff..00000000 --- a/man/cv_lasso.Rd +++ /dev/null @@ -1,29 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cv_lasso.R -\name{cv_lasso} -\alias{cv_lasso} -\title{Cross-validated Lasso on Indicator Bases} -\usage{ -cv_lasso(x_basis, y, n_lambda = 100, n_folds = 10, center = FALSE) -} -\arguments{ -\item{x_basis}{A \code{dgCMatrix} object corresponding to a sparse matrix of -the basis functions generated for the HAL algorithm.} - -\item{y}{A \code{numeric} vector of the observed outcome variable values.} - -\item{n_lambda}{A \code{numeric} scalar indicating the number of values of -the L1 regularization parameter (lambda) to be obtained from fitting the -Lasso to the full data. Cross-validation is used to select an optimal -lambda (that minimizes the risk) from among these.} - -\item{n_folds}{A \code{numeric} scalar for the number of folds to be used in -the cross-validation procedure to select an optimal value of lambda.} - -\item{center}{binary. If \code{TRUE}, covariates are centered. This is much -slower, but matches the \code{glmnet} implementation. Default \code{FALSE}.} -} -\description{ -Fits Lasso regression using a customized procedure, with cross-validation -based on \pkg{origami} -} diff --git a/tests/testthat/test-formula.R b/tests/testthat/test-formula.R index 7e4a4fbd..ba901a94 100644 --- a/tests/testthat/test-formula.R +++ b/tests/testthat/test-formula.R @@ -1,4 +1,3 @@ - context("check formula function") diff --git a/tests/testthat/test-hal_multivariate.R b/tests/testthat/test-hal_multivariate.R index c530a67f..2593466b 100644 --- a/tests/testthat/test-hal_multivariate.R +++ b/tests/testthat/test-hal_multivariate.R @@ -16,10 +16,12 @@ test_that("HAL and glmnet predictions match for multivariate outcome", { hal_pred <- predict(hal_fit, new_data = MultiGaussianExample$x) # get glmnet preds set.seed(74296) - glmnet_fit <- cv.glmnet(x = hal_fit$x_basis, y = MultiGaussianExample$y, - family = "mgaussian", standardize = FALSE, - lambda.min.ratio = 1e-4) - glmnet_pred <- predict(glmnet_fit, hal_fit$x_basis, s = hal_fit$lambda_star)[,,1] + glmnet_fit <- cv.glmnet( + x = hal_fit$x_basis, y = MultiGaussianExample$y, + family = "mgaussian", standardize = FALSE, + lambda.min.ratio = 1e-4 + ) + glmnet_pred <- predict(glmnet_fit, hal_fit$x_basis, s = hal_fit$lambda_star)[, , 1] # test equivalence colnames(glmnet_pred) <- colnames(hal_pred) expect_equivalent(glmnet_pred, hal_pred) @@ -31,15 +33,17 @@ test_that("HAL summarizes coefs for each multivariate outcome prediction", { test_that("HAL summarizes coefs appropriately for multivariate outcome", { # this checks intercept matches - lapply(seq_along(hal_summary), function(i){ - expect_equal(hal_fit$coefs[[i]][1,], as.numeric(hal_summary[[i]]$table[1,1])) + lapply(seq_along(hal_summary), function(i) { + expect_equal(hal_fit$coefs[[i]][1, ], as.numeric(hal_summary[[i]]$table[1, 1])) }) }) test_that("Error when prediction_bounds is incorrectly formatted", { fit_control <- list(prediction_bounds = 9) - expect_error(fit_hal(X = MultiGaussianExample$x, Y = MultiGaussianExample$y, - family = "mgaussian", fit_control = fit_control)) + expect_error(fit_hal( + X = MultiGaussianExample$x, Y = MultiGaussianExample$y, + family = "mgaussian", fit_control = fit_control + )) }) test_that("HAL summary for multivariate outcome predictions prints", { diff --git a/tests/testthat/test-reduce_basis_filter.R b/tests/testthat/test-reduce_basis_filter.R index e8da97d2..07c5ed53 100644 --- a/tests/testthat/test-reduce_basis_filter.R +++ b/tests/testthat/test-reduce_basis_filter.R @@ -50,7 +50,7 @@ system.time({ mse <- mean(se) se[c(current_i, new_i)] <- 0 new_i <- which.max(se) - #print(sprintf("%f, %f", old_mse, mse)) + # print(sprintf("%f, %f", old_mse, mse)) continue <- mse < 1.1 * old_mse if (mse < old_mse) { good_i <- unique(c(good_i, new_i)) @@ -58,7 +58,7 @@ system.time({ old_mse <- mse coefs <- as.vector(coef(screen_glmnet, s = "lambda.min"))[-1] # old_basis <- union(old_basis,c(old_basis,b1)[which(coefs!=0)]) - #print(length(old_basis)) + # print(length(old_basis)) old_basis <- unique(c(old_basis, b1)) } @@ -164,4 +164,3 @@ test_that("Predictions are not too different when reducing basis functions", { # ensure hal fit with reduce_basis works with new data for prediction newx <- matrix(rnorm(n * p), n, p) hal_pred_reduced_newx <- predict(hal_fit_reduced, new_data = newx) - diff --git a/vignettes/intro_hal9001.Rmd b/vignettes/intro_hal9001.Rmd index a7a1bc7a..46b2c3ee 100644 --- a/vignettes/intro_hal9001.Rmd +++ b/vignettes/intro_hal9001.Rmd @@ -94,7 +94,7 @@ LaTeX with the `xtable` R package. Here's an example: ```{r eval-mse} # training sample prediction for HAL vs HAL9000 mse <- function(preds, y) { - mean((preds - y)^2) + mean((preds - y)^2) } preds_hal <- predict(object = hal_fit, new_data = x) @@ -188,7 +188,7 @@ hal_fit_smooth_1 <- fit_hal( ) hal_fit_smooth_2_all <- fit_hal( - X = x, Y = y, smoothness_orders = 2, num_knots = num_knots, + X = x, Y = y, smoothness_orders = 2, num_knots = num_knots, fit_control = list(cv_select = FALSE) ) @@ -204,7 +204,8 @@ pred_smooth_2_all <- predict(hal_fit_smooth_2_all, new_data = x) dt <- data.table(x = as.vector(x)) dt <- cbind(dt, pred_smooth_2_all) long <- melt(dt, id = "x") -ggplot(long, aes(x = x, y = value, group = variable)) + geom_line() +ggplot(long, aes(x = x, y = value, group = variable)) + + geom_line() ``` Comparing the mean squared error (MSE) between the predictions and the true @@ -218,17 +219,20 @@ to `num_knots` for a big decrease in runtime without sacrificing performance. ```{r} mean((pred_0 - ytrue)^2) -mean((pred_smooth_1- ytrue)^2) +mean((pred_smooth_1 - ytrue)^2) mean((pred_smooth_2 - ytrue)^2) -dt <- data.table(x = as.vector(x), - ytrue = ytrue, - y = y, - pred0 = pred_0, - pred1 = pred_smooth_1, - pred2 = pred_smooth_2) +dt <- data.table( + x = as.vector(x), + ytrue = ytrue, + y = y, + pred0 = pred_0, + pred1 = pred_smooth_1, + pred2 = pred_smooth_2 +) long <- melt(dt, id = "x") -ggplot(long, aes(x = x, y = value, color = variable)) + geom_line() +ggplot(long, aes(x = x, y = value, color = variable)) + + geom_line() plot(x, pred_0, main = "Zero order smoothness fit") plot(x, pred_smooth_1, main = "First order smoothness fit") plot(x, pred_smooth_2, main = "Second order smoothness fit") @@ -270,7 +274,7 @@ pred_smooth_2 <- predict(hal_fit_smooth_2, new_data = x) ```{r} mean((pred_0 - ytrue)^2) -mean((pred_smooth_1- ytrue)^2) +mean((pred_smooth_1 - ytrue)^2) mean((pred_smooth_2 - ytrue)^2) plot(x, pred_0, main = "Zero order smoothness fit") plot(x, pred_smooth_1, main = "First order smoothness fit") @@ -295,7 +299,7 @@ set.seed(98109) num_knots <- 100 n_obs <- 500 -x1 <- runif(n_obs, min = -4, max = 4) +x1 <- runif(n_obs, min = -4, max = 4) x2 <- runif(n_obs, min = -4, max = 4) A <- runif(n_obs, min = -4, max = 4) X <- data.frame(x1 = x1, x2 = x2, A = A) @@ -314,8 +318,8 @@ and without "y" in the character string. # The `h` function is used to specify the basis functions for a given term # h(x1) generates one-way basis functions for the variable x1 # This is an additive model: -formula <- ~h(x1) + h(x2) + h(A) -#We can actually evaluate the h function as well. We need to specify some tuning parameters in the current environment: +formula <- ~ h(x1) + h(x2) + h(A) +# We can actually evaluate the h function as well. We need to specify some tuning parameters in the current environment: smoothness_orders <- 0 num_knots <- 10 # It will look in the parent environment for `X` and the above tuning parameters @@ -324,25 +328,23 @@ form_term$basis_list[[1]] # We don't need the variables in the parent environment if we specify them directly: rm(smoothness_orders) rm(num_knots) -# `h` excepts the arguments `s` and `k`. `s` stands for smoothness and is equivalent to smoothness_orders in use. `k` specifies the number of knots. ` +# `h` excepts the arguments `s` and `k`. `s` stands for smoothness and is equivalent to smoothness_orders in use. `k` specifies the number of knots. ` form_term_new <- h(x1, s = 0, k = 10) + h(x2, s = 0, k = 10) + h(A, s = 0, k = 10) # They are the same! -length( form_term_new$basis_list) == length(form_term$basis_list) +length(form_term_new$basis_list) == length(form_term$basis_list) -#To evaluate a unevaluated formula object like: -formula <- ~h(x1) + h(x2) + h(A) +# To evaluate a unevaluated formula object like: +formula <- ~ h(x1) + h(x2) + h(A) # we can use the formula_hal function: formula <- formula_hal( - ~ h(x1) + h(x2) + h(A), X = X, smoothness_orders = 1, num_knots = 10 + ~ h(x1) + h(x2) + h(A), + X = X, smoothness_orders = 1, num_knots = 10 ) # Note that the arguments smoothness_orders and/or num_knots will not be used if `s` and/or `k` are specified in `h`. formula <- formula_hal( - Y ~ h(x1, k=1) + h(x2, k=1) + h(A, k=1), X = X, smoothness_orders = 1, num_knots = 10 + Y ~ h(x1, k = 1) + h(x2, k = 1) + h(A, k = 1), + X = X, smoothness_orders = 1, num_knots = 10 ) - - - - ``` The `.` argument. We can generate an additive model for all or a subset of variables using the `.` variable and `.` argument of `h`. By default, `.` in `h(.)` is treated as a wildcard and basis functions are generated by replacing the `.` with all variables in `X`. @@ -356,33 +358,30 @@ formula1 <- h(.) # Longcut: formula2 <- h(x1) + h(x2) + h(A) # Same number of basis functions -length(formula1$basis_list ) == length(formula2$basis_list) +length(formula1$basis_list) == length(formula2$basis_list) # Maybe we only want an additive model for x1 and x2 # Use the `.` argument formula1 <- h(., . = c("x1", "x2")) -formula2 <- h(x1) + h(x2) -length(formula1$basis_list ) == length(formula2$basis_list) - +formula2 <- h(x1) + h(x2) +length(formula1$basis_list) == length(formula2$basis_list) ``` We can specify interactions as follows. ```{r} # Two way interactions -formula1 <- h(x1) + h(x2) + h(A) + h(x1, x2) -formula2 <- h(.) + h(x1, x2) -length(formula1$basis_list ) == length(formula2$basis_list) +formula1 <- h(x1) + h(x2) + h(A) + h(x1, x2) +formula2 <- h(.) + h(x1, x2) +length(formula1$basis_list) == length(formula2$basis_list) # -formula1 <- h(.) + h(x1, x2) + h(x1,A) + h(x2,A) -formula2 <- h(.) + h(., .) -length(formula1$basis_list ) == length(formula2$basis_list) +formula1 <- h(.) + h(x1, x2) + h(x1, A) + h(x2, A) +formula2 <- h(.) + h(., .) +length(formula1$basis_list) == length(formula2$basis_list) # Three way interactions -formula1 <- h(.) + h(.,.) + h(x1,A,x2) -formula2 <- h(.) + h(., .)+ h(.,.,.) -length(formula1$basis_list ) == length(formula2$basis_list) - - +formula1 <- h(.) + h(., .) + h(x1, A, x2) +formula2 <- h(.) + h(., .) + h(., ., .) +length(formula1$basis_list) == length(formula2$basis_list) ``` Sometimes, one might want to build an additive model, but include all two-way @@ -391,23 +390,23 @@ variety of ways. The `.` argument allows you to specify a subset of variables. ```{r} # Write it all out -formula <- h(x1) + h(x2) + h(A) + h(A, x1) + h(A,x2) - +formula <- h(x1) + h(x2) + h(A) + h(A, x1) + h(A, x2) + # Use the "h(.)" which stands for add all additive terms and then manually add # interactions -formula <- y ~ h(.) + h(A,x1) + h(A,x2) +formula <- y ~ h(.) + h(A, x1) + h(A, x2) + - # Use the "wildcard" feature for when "." is included in the "h()" term. This # useful when you have many variables and do not want to write out every term. -formula <- h(.) + h(A,.) +formula <- h(.) + h(A, .) -formula1 <- h(A,x1) -formula2 <- h(A,., . = c("x1")) - length(formula1$basis_list) == length(formula2$basis_list) +formula1 <- h(A, x1) +formula2 <- h(A, ., . = c("x1")) +length(formula1$basis_list) == length(formula2$basis_list) ``` @@ -419,29 +418,29 @@ these constraints is achieved by specifying the `monotone` argument of `h`. Note ```{r} # An additive monotone increasing model formula <- formula_hal( - y ~ h(., monotone = "i"), X, smoothness_orders = 0, num_knots = 100 + y ~ h(., monotone = "i"), X, + smoothness_orders = 0, num_knots = 100 ) # An additive unpenalized monotone increasing model (NPMLE isotonic regressio) # Set the penalty factor argument `pf` to remove L1 penalization formula <- formula_hal( - y ~ h(., monotone = "i", pf = 0), X, smoothness_orders = 0, num_knots = 100 + y ~ h(., monotone = "i", pf = 0), X, + smoothness_orders = 0, num_knots = 100 ) # An additive unpenalized convex model (NPMLE convex regressio) # Set the penalty factor argument `pf` to remove L1 penalization # Note the second term is equivalent to adding unpenalized and unconstrained main-terms (e.g. main-term glm) formula <- formula_hal( - ~ h(., monotone = "i", pf = 0, k=200, s=1) + h(., monotone = "none", pf = 0, k=1, s=1), X) - + ~ h(., monotone = "i", pf = 0, k = 200, s = 1) + h(., monotone = "none", pf = 0, k = 1, s = 1), X +) + # A bi-additive monotone decreasing model formula <- formula_hal( - ~ h(., monotone = "d") + h(.,., monotone = "d"), X, smoothness_orders = 1, num_knots = 100 + ~ h(., monotone = "d") + h(., ., monotone = "d"), X, + smoothness_orders = 1, num_knots = 100 ) - - - - ``` @@ -456,7 +455,6 @@ formula <- h(., s = 1, k = 1, pf = 0) # intraction glm formula <- h(., ., s = 1, k = 1, pf = 0) + h(., s = 1, k = 1, pf = 0) # Running HAL with this formula will be equivalent to running glm with the formula Y ~ .^2 - ```