diff --git a/NAMESPACE b/NAMESPACE index f5c33f1f..4ca3f7e2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,7 +16,6 @@ export(make_copy_map) export(make_design_matrix) export(make_reduced_basis_map) export(squash_hal_fit) -import(Rcpp) importFrom(Matrix,tcrossprod) importFrom(Rcpp,sourceCpp) importFrom(assertthat,assert_that) @@ -27,13 +26,9 @@ importFrom(data.table,setorder) importFrom(glmnet,cv.glmnet) importFrom(glmnet,glmnet) importFrom(methods,is) -importFrom(methods,new) importFrom(origami,cross_validate) -importFrom(origami,fold_index) importFrom(origami,folds2foldvec) importFrom(origami,make_folds) -importFrom(origami,training) -importFrom(origami,validation) importFrom(stats,aggregate) importFrom(stats,as.formula) importFrom(stats,coef) diff --git a/R/RcppExports.R b/R/RcppExports.R index bd17cee2..aaedb7ca 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -51,16 +51,6 @@ apply_copy_map <- function(X, copy_map) { .Call('_hal9001_apply_copy_map', PACKAGE = 'hal9001', X, copy_map) } -#' Prediction from a Lassi Model -#' -#' @param X A sparse matrix of HAL basis functions. -#' @param beta A vector of coefficient values for the HAL basis functions. -#' @param intercept A numeric value giving the intercept of the HAL model. -#' -lassi_predict <- function(X, beta, intercept) { - .Call('_hal9001_lassi_predict', PACKAGE = 'hal9001', X, beta, intercept) -} - #' Sort Basis Functions #' #' Build a sorted list of unique basis functions based on columns, where each diff --git a/R/cv_lasso.R b/R/cv_lasso.R index 44233825..a7fece86 100644 --- a/R/cv_lasso.R +++ b/R/cv_lasso.R @@ -1,55 +1,3 @@ -#' Single Lasso estimation for cross-validation with Origami -#' -#' Fits Lasso regression over a single fold of a cross-validated data set. This -#' is meant to be called using \code{\link[origami]{cross_validate}}, which is -#' done through \code{\link{cv_lasso}}. Note that this procedure is NOT meant -#' to be invoked by itself. INTERNAL USE ONLY. -#' -#' @param fold A \code{fold} object produced by a call to \code{make_folds} -#' from the \pkg{origami}. -#' @param data A \code{dgCMatrix} object containing the outcome values (Y) in -#' its first column and vectors corresponding to the basis functions of HAL in -#' all other columns. Consult the description of HAL regression for details. -#' @param lambdas A \code{numeric} vector corresponding to a sequence of lambda -#' values obtained by fitting the Lasso on the full data. -#' @param center binary. If \code{TRUE}, covariates are centered. This is much -#' slower, but matches the \code{glmnet} implementation. Default \code{FALSE}. -#' -#' @importFrom origami training validation fold_index -lassi_origami <- function(fold, data, lambdas, center = FALSE) { - # make sure data is an (augmented) sparse matrix of basis functions - stopifnot(class(data) == "dgCMatrix") - - # split data for V-fold cross-validation - train_data <- origami::training(data) - valid_data <- origami::validation(data) - - # wrangle objects to clearer forms - train_x_basis <- train_data[, -1] - valid_x_basis <- valid_data[, -1] - train_y <- train_data[, 1] - valid_y <- valid_data[, 1] - - # compute the predicted betas for the given training and validation sets - lassi_fit <- lassi( - x = train_x_basis, y = train_y, lambdas = lambdas, - center = center - ) - pred_mat <- predict(lassi_fit, valid_x_basis) - - # compute the MSE for the given training and validation sets - mses <- apply(pred_mat, 2, function(preds) { - mean((preds - valid_y)^2) - }) - - # the only output needed is the lambda-wise MSE over each fold - mses_out <- matrix(mses, nrow = 1) - out <- list(mses = mses_out) - return(out) -} - -############################################################################### - #' Cross-validated Lasso on Indicator Bases #' #' Fits Lasso regression using a customized procedure, with cross-validation diff --git a/R/cv_lasso_early_stopping.R b/R/cv_lasso_early_stopping.R deleted file mode 100644 index a8423f63..00000000 --- a/R/cv_lasso_early_stopping.R +++ /dev/null @@ -1,97 +0,0 @@ -#' Cross-validated LASSO on Indicator Bases -#' -#' Fits the LASSO regression using a customized procedure with cross-validation -#' based on \pkg{origami} -#' -#' @param x_basis A \code{dgCMatrix} object corresponding to a sparse matrix of -#' the basis functions generated for the HAL algorithm. -#' @param y A \code{numeric} vector of the observed outcome variable values. -#' @param n_lambda A \code{numeric} scalar indicating the number of values of -#' the L1 regularization parameter (lambda) to be obtained from fitting the -#' LASSO to the full data. Cross-validation is used to select an optimal -#' lambda (that minimizes the risk) from among these. -#' @param n_folds A \code{numeric} scalar for the number of folds to be used in -#' the cross-validation procedure to select an optimal value of lambda. -#' -#' @importFrom origami make_folds cross_validate -#' @importFrom stats sd -cv_lasso_early_stopping <- function(x_basis, y, n_lambda = 100, n_folds = 10) { - # first, need to run lasso on the full data to get a sequence of lambdas - lasso_init <- lassi(y = y, x = x_basis, nlambda = n_lambda, center = FALSE) - lambdas_init <- lasso_init$lambdas - - # next, set up a cross-validated lasso using the sequence of lambdas - folds <- origami::make_folds(x_basis, V = n_folds) - - # track separately for folds = xscale, beta, resid, intercept - fold <- folds[[1]] - setup_fold_data <- function(fold, x_basis, y) { - x_train <- training(x_basis) - y_train <- training(y) - x_valid <- validation(x_basis) - y_valid <- validation(y) - - intercept <- mean(y_train) - resid <- y_train - intercept - xcenter <- rep(0, ncol(x_basis)) - xscale <- calc_xscale(x_train, xcenter) - beta <- rep(0, ncol(x_basis)) - - fold_data <- list( - x_train = x_train, x_valid = x_valid, y_valid = y_valid, - intercept = intercept, resid = resid, xscale = xscale, xcenter = xcenter, - beta = beta - ) - - return(list(fold_data = fold_data)) - } - - all_fold_data <- cross_validate(setup_fold_data, folds, x_basis, y, - .combine = FALSE - )$fold_data - - cv_lassi_step <- function(fold, all_fold_data, lambda) { - fold_data <- fold_index(all_fold_data)[[1]] - n_updates <- with(fold_data, fit_lassi_step( - x_train, resid, beta, lambda, - xscale, xcenter, intercept, - FALSE - )) - preds <- with(fold_data, as.vector(x_valid %*% (beta / xscale)) + - intercept) - mse <- with(fold_data, mean((preds - y_valid)^2)) - return(list(fold_data = fold_data, mse = mse)) - } - - null_mse <- NULL - min_mse <- Inf - step_mses <- rep(Inf, n_lambda) - for (lambda_step in seq_along(lambdas_init)) { - lambda <- lambdas_init[lambda_step] - step_results <- lapply(folds, cv_lassi_step, all_fold_data, lambda) - all_fold_data <- lapply(step_results, `[[`, "fold_data") - step_mse <- mean(sapply(step_results, `[[`, "mse")) - # step_results <- cross_validate(cv_lassi_step, folds, all_fold_data, - # lambda, .combine = FALSE) - # all_fold_data <- step_results$fold_data - - if (is.null(null_mse)) { - # null_mse is the first mse (i.e. for the null model) - null_mse <- step_mse - } - - if (step_mse < min_mse) { - min_mse <- step_mse - } - - # compute increase above minimum as percentage of total range - ratio <- (step_mse - min_mse) / (null_mse - min_mse) - - # cat(sprintf("lambda: %f, mse: %f, ratio: %f\n", lambda, step_mse, ratio)) - if (is.finite(ratio) && (ratio > 0.1)) { - break - } - step_mses[lambda_step] <- step_mse - } - return(step_mses) -} diff --git a/R/lassi.R b/R/lassi.R deleted file mode 100644 index 6fa3986d..00000000 --- a/R/lassi.R +++ /dev/null @@ -1,97 +0,0 @@ -#' Rcpp module: lassi_fit_module -#' @import Rcpp -#' @name lassi_fit_module -NULL -loadModule("lassi_module", TRUE) - -#' Custom Lasso implementation for matrices of indicator functions -#' -#' @param x The covariate matrix -#' @param y The outcome vector -#' @param lambdas A sequence of values for the L1 regularization parameter -#' (lambda) to be used in fitting the LASSO. Defaults to \code{NULL}. -#' @param nlambda number of lambdas to fit. -#' @param lambda_min_ratio ratio of largest to smallest lambda to fit. -#' @param center ... -#' -#' @importFrom methods new -#' -#' @keywords internal -lassi <- function(x, y, lambdas = NULL, nlambda = 100, - lambda_min_ratio = 0.01, center = FALSE) { - if (!is.null(lambdas)) { - nlambda <- length(lambdas) - } - - # initialize object - lassi_object <- methods::new(Lassi, x, y, nlambda, lambda_min_ratio, center) - - if (!is.null(lambdas)) { - lassi_object$lambdas <- lambdas - } - - # initialize step counter - step_counts <- rep(0, nlambda) - - # iterative procedure for active step convergence - for (i in (seq_len(nlambda) - 1)) { - full_steps <- lassi_object$lassi_fit_cd(i, FALSE, 1) - if (full_steps > 0) { - active_steps <- lassi_object$lassi_fit_cd(i, TRUE, 1000) - } else { - active_steps <- 0 - } - step_counts[i + 1] <- active_steps - } - - beta_mat <- as.matrix(lassi_object$beta_mat) - intercepts <- lassi_object$intercepts - beta_mat <- beta_mat / lassi_object$xscale - if (center) { - intercepts <- intercepts - crossprod(lassi_object$xcenter, beta_mat) - } - - chichignoud_criterion <- NULL - - # create output object - out <- list(beta_mat, intercepts, - lambdas = lassi_object$lambdas, - step_counts, chichignoud_criterion - ) - names(out) <- c( - "beta_mat", "intercepts", "lambdas", "steps", - "chichignoud_criterion" - ) - class(out) <- "lassi" - return(out) -} - -#' Predict Method for Lasso on Indicator Bases -#' -#' @param fit ... -#' @param new_x_basis ... -#' @param lambdas ... -#' -#' @keywords internal -predict.lassi <- function(fit, new_x_basis, lambdas = NULL) { - if (is.null(lambdas)) { - lambdas <- fit$lambdas - } - - if (!all(lambdas %in% fit$lambdas)) { - stop("Attempting to predict for a lambda that was not fit.") - } - - preds <- matrix(0, nrow = nrow(new_x_basis), ncol = length(lambdas)) - - for (i in seq_along(lambdas)) { - lambda <- lambdas[i] - beta_col <- which(lambda == fit$lambdas) - beta <- fit$beta_mat[, beta_col] - intercept <- fit$intercepts[beta_col] - pred_col <- lassi_predict(new_x_basis, beta, intercept) - preds[, i] <- pred_col - # find corresponding betas - } - return(preds) -} diff --git a/README.md b/README.md index 7e359316..728a247b 100644 --- a/README.md +++ b/README.md @@ -85,7 +85,11 @@ predictions via Highly Adaptive Lasso regression: # load the package and set a seed library(hal9001) #> Loading required package: Rcpp +<<<<<<< HEAD #> hal9001 v0.4.4: The Scalable Highly Adaptive Lasso +======= +#> hal9001 v0.4.5: The Scalable Highly Adaptive Lasso +>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c #> note: fit_hal defaults have changed. See ?fit_hal for details set.seed(385971) diff --git a/docs/articles/intro_hal9001.html b/docs/articles/intro_hal9001.html index b59b537b..d5262436 100644 --- a/docs/articles/intro_hal9001.html +++ b/docs/articles/intro_hal9001.html @@ -85,7 +85,11 @@

Nima Hejazi, Jeremy Coyle, Rachael Phillips, Lars van der Laan

+<<<<<<< HEAD

2022-07-12

+======= +

2022-11-04

+>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c Source: vignettes/intro_hal9001.Rmd @@ -165,12 +169,21 @@

Fitting the modelhal_fit <- fit_hal(X = x, Y = y) hal_fit$times
##                   user.self sys.self elapsed user.child sys.child
+<<<<<<< HEAD
 ## enumerate_basis       0.027    0.003   0.053          0         0
 ## design_matrix         0.129    0.021   0.340          0         0
 ## reduce_basis          0.000    0.000   0.000          0         0
 ## remove_duplicates     0.000    0.000   0.000          0         0
 ## lasso                 3.577    0.436   9.466          0         0
 ## total                 3.734    0.460   9.861          0         0
+======= +## enumerate_basis 0.017 0.000 0.018 0 0 +## design_matrix 0.082 0.003 0.086 0 0 +## reduce_basis 0.000 0.000 0.000 0 0 +## remove_duplicates 0.000 0.000 0.000 0 0 +## lasso 2.284 0.072 2.399 0 0 +## total 2.384 0.075 2.503 0 0 +>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c

Summarizing the model @@ -345,6 +358,7 @@

Reducing basis functions
 hal_fit_reduced$times

##                   user.self sys.self elapsed user.child sys.child
+<<<<<<< HEAD
 ## enumerate_basis       0.028    0.007   0.122          0         0
 ## design_matrix         0.131    0.017   0.463          0         0
 ## reduce_basis          0.000    0.000   0.000          0         0
@@ -355,6 +369,18 @@ 

Reducing basis functions
+=======
+## enumerate_basis       0.026    0.005   0.031          0         0
+## design_matrix         0.080    0.002   0.082          0         0
+## reduce_basis          0.000    0.000   0.000          0         0
+## remove_duplicates     0.000    0.000   0.000          0         0
+## lasso                 1.951    0.063   2.030          0         0
+## total                 2.057    0.070   2.143          0         0
+

In the above, all basis functions with fewer than 10% of observations +meeting the criterion imposed are automatically removed prior to the +Lasso step of fitting the HAL regression. The results appear below

+
##              coef
 ##  1: -1.424003e+00
@@ -513,7 +539,11 @@ 

Specifying smoothness of the HAL for a near-optimal fit. Therefore, one can safely pass a smaller value to num_knots for a big decrease in runtime without sacrificing performance.

+<<<<<<< HEAD
+=======
+
+>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c
 mean((pred_0 - ytrue)^2)
## [1] 0.00732315
@@ -552,7 +582,11 @@ 

Specifying smoothness of the HAL Comparing the following simulation and the previous one, the HAL with second-order smoothness performed better when there were fewer knot points.

+<<<<<<< HEAD
+=======
+
+>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c
 set.seed(98109)
 n_covars <- 1
 n_obs <- 250
@@ -609,7 +643,11 @@ 

Formula interface
+=======
+
+>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c
 set.seed(98109)
 num_knots <- 100
 
@@ -627,7 +665,11 @@ 

Formula interface
+=======
+
+>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c
 # The `h` function is used to specify the basis functions for a given term
 # h(x1) generates one-way basis functions for the variable x1
 # This is an additive model:
@@ -672,7 +714,11 @@ 

Formula interface
+=======
+
+>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c
 smoothness_orders <- 1
 num_knots <- 5
 # A additive model
@@ -716,7 +762,11 @@ 

Formula interface
+=======
+
+>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c
 # Write it all out
 formula <-  h(x1) + h(x2) + h(A) + h(A, x1) + h(A,x2)
  
@@ -745,7 +795,11 @@ 

Formula interface
+=======
+
+>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c
 # An additive monotone increasing model
 formula <- formula_hal(
   y ~ h(., monotone = "i"), X, smoothness_orders = 0, num_knots = 100
@@ -780,7 +834,11 @@ 

Formula interface
+=======
+
+>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c
 # get formula object
 fit <- fit_hal(
   X = X, Y = Y, formula = ~ h(.), smoothness_orders = 1, num_knots = 100
diff --git a/docs/authors.html b/docs/authors.html
index f9048826..9f3e40fb 100644
--- a/docs/authors.html
+++ b/docs/authors.html
@@ -103,7 +103,11 @@ 

Citation

Coyle J, Hejazi N, Phillips R, van der Laan L, van der Laan M (2022). hal9001: The scalable highly adaptive lasso. +<<<<<<< HEAD doi:10.5281/zenodo.3558313, R package version 0.4.5, https://github.com/tlverse/hal9001. +======= +doi:10.5281/zenodo.3558313, R package version 0.4.3, https://github.com/tlverse/hal9001. +>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c

@Manual{,
   title = {{hal9001}: The scalable highly adaptive lasso},
diff --git a/docs/index.html b/docs/index.html
index 99aca6de..b68b1ec1 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -128,7 +128,11 @@ 

Example# load the package and set a seed library(hal9001) #> Loading required package: Rcpp +<<<<<<< HEAD #> hal9001 v0.4.4: The Scalable Highly Adaptive Lasso +======= +#> hal9001 v0.4.5: The Scalable Highly Adaptive Lasso +>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c #> note: fit_hal defaults have changed. See ?fit_hal for details set.seed(385971) @@ -143,12 +147,21 @@

Example#> [1] "I'm sorry, Dave. I'm afraid I can't do that." hal_fit$times #> user.self sys.self elapsed user.child sys.child +<<<<<<< HEAD #> enumerate_basis 0.014 0.003 0.059 0 0 #> design_matrix 0.004 0.001 0.005 0 0 #> reduce_basis 0.000 0.000 0.000 0 0 #> remove_duplicates 0.000 0.000 0.000 0 0 #> lasso 2.684 0.343 6.583 0 0 #> total 2.703 0.348 6.655 0 0 +======= +#> enumerate_basis 0.008 0.001 0.009 0 0 +#> design_matrix 0.003 0.000 0.003 0 0 +#> reduce_basis 0.000 0.000 0.000 0 0 +#> remove_duplicates 0.000 0.000 0.001 0 0 +#> lasso 1.690 0.036 1.764 0 0 +#> total 1.702 0.037 1.778 0 0 +>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c # training sample prediction preds <- predict(hal_fit, new_data = x) diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index cc24eab0..57535cfb 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -3,7 +3,11 @@ pkgdown: 2.0.3 pkgdown_sha: ~ articles: intro_hal9001: intro_hal9001.html +<<<<<<< HEAD last_built: 2022-07-13T01:48Z +======= +last_built: 2022-11-04T20:06Z +>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c urls: reference: https://tlverse.org/hal9001/reference article: https://tlverse.org/hal9001/articles diff --git a/docs/reference/index.html b/docs/reference/index.html index a4183ecc..ad92e617 100644 --- a/docs/reference/index.html +++ b/docs/reference/index.html @@ -86,10 +86,6 @@

All functions cv_lasso()

Cross-validated Lasso on Indicator Bases

- -

cv_lasso_early_stopping()

- -

Cross-validated LASSO on Indicator Bases

enumerate_basis()

@@ -130,18 +126,6 @@

All functions index_first_copy()

Find Copies of Columns

- -

lassi_fit_module

- -

Rcpp module: lassi_fit_module

- -

lassi_origami()

- -

Single Lasso estimation for cross-validation with Origami

- -

lassi_predict()

- -

Prediction from a Lassi Model

make_basis_list()

diff --git a/man/cv_lasso_early_stopping.Rd b/man/cv_lasso_early_stopping.Rd deleted file mode 100644 index cea940fa..00000000 --- a/man/cv_lasso_early_stopping.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cv_lasso_early_stopping.R -\name{cv_lasso_early_stopping} -\alias{cv_lasso_early_stopping} -\title{Cross-validated LASSO on Indicator Bases} -\usage{ -cv_lasso_early_stopping(x_basis, y, n_lambda = 100, n_folds = 10) -} -\arguments{ -\item{x_basis}{A \code{dgCMatrix} object corresponding to a sparse matrix of -the basis functions generated for the HAL algorithm.} - -\item{y}{A \code{numeric} vector of the observed outcome variable values.} - -\item{n_lambda}{A \code{numeric} scalar indicating the number of values of -the L1 regularization parameter (lambda) to be obtained from fitting the -LASSO to the full data. Cross-validation is used to select an optimal -lambda (that minimizes the risk) from among these.} - -\item{n_folds}{A \code{numeric} scalar for the number of folds to be used in -the cross-validation procedure to select an optimal value of lambda.} -} -\description{ -Fits the LASSO regression using a customized procedure with cross-validation -based on \pkg{origami} -} diff --git a/man/lassi.Rd b/man/lassi.Rd deleted file mode 100644 index 2803067e..00000000 --- a/man/lassi.Rd +++ /dev/null @@ -1,33 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lassi.R -\name{lassi} -\alias{lassi} -\title{Custom Lasso implementation for matrices of indicator functions} -\usage{ -lassi( - x, - y, - lambdas = NULL, - nlambda = 100, - lambda_min_ratio = 0.01, - center = FALSE -) -} -\arguments{ -\item{x}{The covariate matrix} - -\item{y}{The outcome vector} - -\item{lambdas}{A sequence of values for the L1 regularization parameter -(lambda) to be used in fitting the LASSO. Defaults to \code{NULL}.} - -\item{nlambda}{number of lambdas to fit.} - -\item{lambda_min_ratio}{ratio of largest to smallest lambda to fit.} - -\item{center}{...} -} -\description{ -Custom Lasso implementation for matrices of indicator functions -} -\keyword{internal} diff --git a/man/lassi_fit_module.Rd b/man/lassi_fit_module.Rd deleted file mode 100644 index 35929eae..00000000 --- a/man/lassi_fit_module.Rd +++ /dev/null @@ -1,8 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lassi.R -\name{lassi_fit_module} -\alias{lassi_fit_module} -\title{Rcpp module: lassi_fit_module} -\description{ -Rcpp module: lassi_fit_module -} diff --git a/man/lassi_origami.Rd b/man/lassi_origami.Rd deleted file mode 100644 index dbfa5899..00000000 --- a/man/lassi_origami.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/cv_lasso.R -\name{lassi_origami} -\alias{lassi_origami} -\title{Single Lasso estimation for cross-validation with Origami} -\usage{ -lassi_origami(fold, data, lambdas, center = FALSE) -} -\arguments{ -\item{fold}{A \code{fold} object produced by a call to \code{make_folds} -from the \pkg{origami}.} - -\item{data}{A \code{dgCMatrix} object containing the outcome values (Y) in -its first column and vectors corresponding to the basis functions of HAL in -all other columns. Consult the description of HAL regression for details.} - -\item{lambdas}{A \code{numeric} vector corresponding to a sequence of lambda -values obtained by fitting the Lasso on the full data.} - -\item{center}{binary. If \code{TRUE}, covariates are centered. This is much -slower, but matches the \code{glmnet} implementation. Default \code{FALSE}.} -} -\description{ -Fits Lasso regression over a single fold of a cross-validated data set. This -is meant to be called using \code{\link[origami]{cross_validate}}, which is -done through \code{\link{cv_lasso}}. Note that this procedure is NOT meant -to be invoked by itself. INTERNAL USE ONLY. -} diff --git a/man/lassi_predict.Rd b/man/lassi_predict.Rd deleted file mode 100644 index 6737dfba..00000000 --- a/man/lassi_predict.Rd +++ /dev/null @@ -1,18 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/RcppExports.R -\name{lassi_predict} -\alias{lassi_predict} -\title{Prediction from a Lassi Model} -\usage{ -lassi_predict(X, beta, intercept) -} -\arguments{ -\item{X}{A sparse matrix of HAL basis functions.} - -\item{beta}{A vector of coefficient values for the HAL basis functions.} - -\item{intercept}{A numeric value giving the intercept of the HAL model.} -} -\description{ -Prediction from a Lassi Model -} diff --git a/man/predict.lassi.Rd b/man/predict.lassi.Rd deleted file mode 100644 index aede8220..00000000 --- a/man/predict.lassi.Rd +++ /dev/null @@ -1,19 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/lassi.R -\name{predict.lassi} -\alias{predict.lassi} -\title{Predict Method for Lasso on Indicator Bases} -\usage{ -\method{predict}{lassi}(fit, new_x_basis, lambdas = NULL) -} -\arguments{ -\item{fit}{...} - -\item{new_x_basis}{...} - -\item{lambdas}{...} -} -\description{ -Predict Method for Lasso on Indicator Bases -} -\keyword{internal} diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index 9ab10913..f47ac63b 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -35,19 +35,6 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } -// lassi_predict -NumericVector lassi_predict(const MSpMat X, const NumericVector beta, double intercept); -RcppExport SEXP _hal9001_lassi_predict(SEXP XSEXP, SEXP betaSEXP, SEXP interceptSEXP) { -BEGIN_RCPP - Rcpp::RObject rcpp_result_gen; - Rcpp::RNGScope rcpp_rngScope_gen; - Rcpp::traits::input_parameter< const MSpMat >::type X(XSEXP); - Rcpp::traits::input_parameter< const NumericVector >::type beta(betaSEXP); - Rcpp::traits::input_parameter< double >::type intercept(interceptSEXP); - rcpp_result_gen = Rcpp::wrap(lassi_predict(X, beta, intercept)); - return rcpp_result_gen; -END_RCPP -} // make_basis_list List make_basis_list(const NumericMatrix& X_sub, const NumericVector& cols, const IntegerVector& order_map); RcppExport SEXP _hal9001_make_basis_list(SEXP X_subSEXP, SEXP colsSEXP, SEXP order_mapSEXP) { @@ -137,12 +124,9 @@ BEGIN_RCPP END_RCPP } -RcppExport SEXP _rcpp_module_boot_lassi_module(); - static const R_CallMethodDef CallEntries[] = { {"_hal9001_index_first_copy", (DL_FUNC) &_hal9001_index_first_copy, 1}, {"_hal9001_apply_copy_map", (DL_FUNC) &_hal9001_apply_copy_map, 2}, - {"_hal9001_lassi_predict", (DL_FUNC) &_hal9001_lassi_predict, 3}, {"_hal9001_make_basis_list", (DL_FUNC) &_hal9001_make_basis_list, 3}, {"_hal9001_meets_basis", (DL_FUNC) &_hal9001_meets_basis, 5}, {"_hal9001_evaluate_basis", (DL_FUNC) &_hal9001_evaluate_basis, 4}, @@ -150,7 +134,6 @@ static const R_CallMethodDef CallEntries[] = { {"_hal9001_as_dgCMatrix", (DL_FUNC) &_hal9001_as_dgCMatrix, 1}, {"_hal9001_calc_pnz", (DL_FUNC) &_hal9001_calc_pnz, 1}, {"_hal9001_calc_xscale", (DL_FUNC) &_hal9001_calc_xscale, 2}, - {"_rcpp_module_boot_lassi_module", (DL_FUNC) &_rcpp_module_boot_lassi_module, 0}, {NULL, NULL, 0} }; diff --git a/src/lassi_fit.cpp b/src/lassi_fit.cpp deleted file mode 100644 index 9509bda5..00000000 --- a/src/lassi_fit.cpp +++ /dev/null @@ -1,446 +0,0 @@ -// [[Rcpp::depends(RcppEigen)]] -#include -#include "hal9001_types.h" -#include "utils.h" -#include -#include - -using namespace Rcpp; - - -class Lassi { - const MSpMat X; - int n; - int p; - bool center; - double intercept; - NumericVector resids; - double resid_sum; - double rss; - double null_rss; - NumericVector beta; - NumericVector xcenter; - NumericVector xscale; - NumericVector lambdas; - SpMat beta_mat; - NumericVector intercepts; - /* - * variable_state is a vector of a lazy-person's enumerated type - * state=2 is the active_set (possibly also the strong_set) - * state=1 is the strong_set, but not the active_set - * state=0 is neither the strong_set nor the active_set - */ - IntegerVector variable_state; - NumericVector safe_lambda; - double lambda_max; -public: - Lassi(const MSpMat X_init, NumericVector y, int nlambda, - double lambda_min_ratio, bool center): - X(X_init), - n(X_init.rows()), - p(X_init.cols()), - center(center), - intercept(mean(y)), - resids(y-intercept), - resid_sum(sum(resids)), - rss(sum(resids * resids)), - null_rss(rss), - beta(NumericVector(p,0.0)), - lambdas(NumericVector(nlambda, 0.0)), - beta_mat(SpMat(p, nlambda)), - intercepts(NumericVector(nlambda,0.0)), - variable_state(IntegerVector(p, 0)) - { - // get centering and scaling vectors if applicable - if(center){ - xcenter = calc_pnz(X); - } else { - xcenter = NumericVector(p, 0.0); - } - - xscale = calc_xscale(X, xcenter); - - // initialize lambda_max and lambda vector - // X_t_resid is used for lots of things: - // strong rule - // kkt violation - // beta update - // so we compute beta updates - // then we check strong filtering - // then we check for kkt violations (all at once, but why) - - lambda_max = 0; - double new_beta; - for (int j = 0; j < p; ++j) { - new_beta = X_t_resid(j) / n; - new_beta = std::abs(new_beta); - if (new_beta > lambda_max) { - - lambda_max = new_beta; - } - } - - double log_lambda_max = log(lambda_max); - double log_lambda_min = log(lambda_min_ratio*lambda_max); - double lambda_step_size = (log_lambda_max - log_lambda_min) / (nlambda - 1); - - for(int i = 0; i < nlambda; i++){ - lambdas[i] = exp(log_lambda_max - i*lambda_step_size); - } - - //below this lambda, we must check if variable is now active. - safe_lambda = NumericVector(p, lambda_max); - - beta_mat.reserve(0.2 * p * nlambda); - } - - const MSpMat& get_X(){ - return(X); - } - - NumericVector get_beta(){ - return(beta); - } - - SpMat get_beta_mat(){ - beta_mat.makeCompressed(); - return(beta_mat); - } - - NumericVector get_intercepts(){ - return(intercepts); - } - - NumericVector get_lambdas(){ - return(lambdas); - } - - void set_lambdas(NumericVector new_lambdas){ - if(new_lambdas.length()!=lambdas.length()){ - stop("length(lambdas) must match nlambda passed on construction"); - } - - lambdas=new_lambdas; - } - - NumericVector get_resids(){ - return(resids); - } - - NumericVector get_xscale(){ - return(xscale); - } - - NumericVector get_xcenter(){ - return(xcenter); - } - - double X_t_resid(int j) { - double crossprod_sum = 0; - for (MInIterMat i_(X, j); i_; ++i_) { - crossprod_sum += resids[i_.index()]; - } - - if(center){ - crossprod_sum = (crossprod_sum - xcenter[j] * resid_sum) / xscale[j]; - } else { - crossprod_sum = crossprod_sum / xscale[j]; - } - - return(crossprod_sum); - } - - double get_new_beta(int j) { - double crossprod_sum = X_t_resid(j); - double new_beta = crossprod_sum / n + beta[j]; - return(new_beta); - } - - double find_lambda_max(){ - return(lambda_max); - } - - void update_resid(int j, double beta_diff) { - - double scaled_diff = beta_diff / xscale[j]; - - // if(center){ - // for (int i=0; i 1e-7) { - - update_resid(j, beta_diff); - beta[j] = new_beta; - } else { - beta_diff = 0; - } - return(beta_diff); - } - - int update_coords(double lambda, bool active_set) { - // update coordinates one-by-one - int j; - double old_rss = rss; - int updates = 0; - double update; - for (j = 0; j < X.outerSize(); ++j) { - if(!(active_set) || beta[j]!=0){ - update = update_coord(j, lambda); - - // see if we decreased the rss - // todo: should be relative to null deviance - if(update!=0){ - if((old_rss-rss)/null_rss > 1e-7){ - updates++; - } - old_rss = rss; - } - } - } - - // update intercept - double mean_resid = mean(resids); - resids = resids - mean_resid; - intercept += mean_resid; - - // Rcout << "Updated " << updated << " coords" << std::endl; - return(updates); - } - - int check_kkt(double lambda) { - return(0); - } - - int lassi_fit_cd(int lambda_step, bool active_set, int nsteps) { - double lambda = lambdas[lambda_step]; - - int step_num = 0; - double last_rss = rss; - double ratio = 0; - int updated=0; - // Rcout << "Starting mse " << mse << std::endl; - for (step_num = 0; step_num < nsteps; step_num++) { - last_rss = rss; - - updated = update_coords(lambda, active_set); - // rss = sum(resids*resids); - // we failed to substantially improve any coords - if (updated == 0) { - break; - } - - ratio = (last_rss - rss) / last_rss; - - if (ratio < 1e-2) { - break; - } - } - - //copy nz betas into beta_mat col - for (int j = 0; j < p; j++) { - if(beta[j]!=0){ - beta_mat.insert(j, lambda_step) = beta[j]; - } - } - - intercepts[lambda_step] = intercept; - return(step_num); - } - - NumericVector complex_approach(int lambda_step){ - // use active set - // update_coords until convergence - // check kkt for strong set, if violations, add to active, continue iterating - // check kkt for all preds, if violations, add to active, recompute strong, continue iterating - // basically, prioritize strong set before other preds when activating vars - - // kkt violations in non strong preds are very rare - // step 2 is active set - // step 1 is strong set - // step 0 is full set - int step_num=2; - int steps=0; - int max_steps=1000; - // double old_rss = rss; - double loop_rss = rss; - double lambda = lambdas[lambda_step]; - double old_lambda; - double thresh = 1e-7 * null_rss / n; - if(lambda_step==0){ - old_lambda = lambda; - } else { - old_lambda = lambdas[lambda_step-1]; - } - double strong_criterion = 2*lambda - old_lambda; - - Timer timer; - timer.step("start"); - while((steps=0)){ - int updates=0; - double update; - int checked=0; - double max_update=0.0; - for(int j=0; j 1e-7) { - - update_resid(j, beta_diff); - beta[j] = new_beta; - updates++; - } - - double something = beta_diff * beta_diff; - if(something>max_update){ - max_update=something; - } - - if(std::abs(update) > lambda){ - if(step_num<2){ - // if not already, put in active set - variable_state[j]=2; - } - - } else { - //put in strong if not currently and criteria met - if(step_num==0){ - - //update strong - if(std::abs(update) > strong_criterion){ - variable_state[j]=1; - } - - //update safe - //we need to start checking this predictor again - //when lambda gets smaller than safe_lambda - double rnorm=std::sqrt(rss)/n; - safe_lambda[j]=lambda*((rnorm+std::abs(update))/(rnorm+lambda)); - // Rcout << "rnorm: " << rnorm << " update: " << update << " current: " - // << lambda << " next_safe: "<< safe_lambda[j] << std::endl; - } - } - } - } - - if(max_update("%d, %d, %d, %d, %f", steps, step_num, checked, updates, max_update)); - // Rcout << "step: " << steps << " step_num: " << step_num << " updates: " << updates - // << " loop_rss: "<< loop_rss << " old_rss: "<< old_rss << " ratio: " << (loop_rss-old_rss)/loop_rss << std::endl; - loop_rss = rss; - - if(updates==0){ - //if we failed to update, move on to the next step (lower numbered) - step_num--; - } else{ - //if we updated anything, we should go back to the active set step - step_num=2; - } - steps++; - } - //copy nz betas into beta_mat col - for (int j = 0; j < p; j++) { - if(beta[j]!=0){ - beta_mat.insert(j, lambda_step) = beta[j]; - } - } - intercepts[lambda_step] = intercept; - NumericVector res(timer); - return(res); - } -}; - -RCPP_MODULE(lassi_module) { - class_( "Lassi" ) - .constructor() - .method( "update_coord", &Lassi::update_coord) - .method( "update_coords", &Lassi::update_coords) - - .method( "lassi_fit_cd", &Lassi::lassi_fit_cd ) - .method( "complex_approach", &Lassi::complex_approach) - .method( "X_t_resid", &Lassi::X_t_resid) - .property( "beta", &Lassi::get_beta) - .property( "beta_mat", &Lassi::get_beta_mat) - .property( "intercepts", &Lassi::get_intercepts) - - .property( "resids", &Lassi::get_resids) - .property( "xscale", &Lassi::get_xscale) - .property( "xcenter", &Lassi::get_xcenter) - .property( "X", &Lassi::get_X) - .property( "lambdas", &Lassi::get_lambdas, &Lassi::set_lambdas) - .property( "lambda_max", &Lassi::find_lambda_max) - ; -} - -//' Prediction from a Lassi Model -//' -//' @param X A sparse matrix of HAL basis functions. -//' @param beta A vector of coefficient values for the HAL basis functions. -//' @param intercept A numeric value giving the intercept of the HAL model. -//' -// [[Rcpp::export]] -NumericVector lassi_predict(const MSpMat X, const NumericVector beta, - double intercept) { - int n = X.rows(); - NumericVector pred(n, intercept); - int k = 0; - double current_beta; - - for (k = 0; k < X.outerSize(); ++k) { - current_beta = beta[k]; - - for (MInIterMat it_(X, k); it_; ++it_) { - pred[it_.row()] += current_beta; - } - } - return(pred); -} diff --git a/tests/testthat/test-cv_lasso.R b/tests/testthat/test-cv_lasso.R index 8054e4d3..70e34775 100644 --- a/tests/testthat/test-cv_lasso.R +++ b/tests/testthat/test-cv_lasso.R @@ -33,7 +33,6 @@ x_basis <- x_basis[, unique_columns] # cv.glmnet reference ################################################################################ - # create fold ID object for using the same folds between cv.glmnet and origami folds <- make_folds(n) fold_id <- origami:::folds2foldvec(folds) @@ -43,114 +42,8 @@ lasso_glmnet <- glmnet::cv.glmnet( x = x_basis, y = y, nfolds = n_folds, foldid = fold_id ) -glmnet_nlambda <- length(lasso_glmnet$lambda) - - -################################################################################ -# CV-LASSO custom implementation -################################################################################ - -# first, need to run lasso on the full data to get a sequence of lambdas -lasso_init <- hal9001:::lassi(y = y, x = x_basis, center = TRUE) -lambdas_init <- lasso_init$lambdas - -test_that("Check that procedure to generate lambdas matches that from glmnet", { - expect_equal(lambdas_init[seq_len(glmnet_nlambda)], lasso_glmnet$lambda) -}) -lambdas_init <- lambdas_init[seq_len(glmnet_nlambda)] - -# next, set up a cross-validated lasso using the sequence of lambdas -full_data_mat <- cbind(y, x_basis) -folds <- origami::make_folds(full_data_mat, V = n_folds) - -# run the cross-validated lasso procedure to find the optimal lambda -cv_lasso_out <- origami::cross_validate( - cv_fun = lassi_origami, - folds = folds, - data = full_data_mat, - lambdas = lambdas_init, - center = TRUE -) - -# compute cv-mean of MSEs for each lambda -lambdas_cvmse <- colMeans(cv_lasso_out$mses) - -# NOTE: there is an off-by-one error occurring in the computation of the optimal -# lambda and the lambda 1 standard deviation above it. Based on manual -# inspection, the custom CV-Lasso routine consistently selects an optimal -# lambda that is slightly too large and a 1se-lambda slightly too small. - -# find the lambda that minimizes the MSE -lambda_optim_index <- which.min(lambdas_cvmse) + 1 -lambda_minmse_origami <- lambdas_init[lambda_optim_index] - -# also need the adjusted CV standard for each lambda -lambdas_cvsd <- apply(cv_lasso_out$mses, 2, sd) / sqrt(n_folds) - -# find the maximum lambda among those 1 standard error above the minimum -lambda_min_1se <- (lambdas_cvmse + lambdas_cvsd)[lambda_optim_index] -lambda_1se_origami <- max(lambdas_init[lambdas_cvmse <= lambda_min_1se], - na.rm = TRUE -) -lambda_1se_index <- which.min(abs(lambdas_init - lambda_1se_origami)) - -# create output object -get_lambda_indices <- c(lambda_1se_index, lambda_optim_index) -betas_out <- lasso_init$beta_mat[, get_lambda_indices] -colnames(betas_out) <- c("lambda_1se", "lambda_min") - -# add in intercept term to coefs matrix and convert to sparse matrix output -betas_out <- rbind(rep(mean(y), ncol(betas_out)), betas_out) -betas_out <- as_dgCMatrix(betas_out * 1.0) -coef_minmse_origami <- as.numeric(betas_out[, 2]) -coef_1se_origami <- as.numeric(betas_out[, 1]) - - -################################################################################ -# test set performance -################################################################################ - -# create fold ID object for using the same folds between cv.glmnet and origami -fold_id <- origami::folds2foldvec(folds) - -# just use the standard implementation available in glmnet with origami folds -lasso_glmnet <- glmnet::cv.glmnet( - x = x_basis, y = y, nfolds = n_folds, - foldid = fold_id -) lambda_minmse_cvglmnet <- lasso_glmnet$lambda.min lambda_1se_cvglmnet <- lasso_glmnet$lambda.1se coef_minmse_cvglmnet <- as.numeric(coef(lasso_glmnet, "lambda.min")) coef_1se_cvglmnet <- as.numeric(coef(lasso_glmnet, "lambda.1se")) betas_cvglmnet <- cbind(coef_1se_cvglmnet, coef_minmse_cvglmnet) - -# now use the glmnet implementation with origami folds and lassi lambdas -lassi_glmnet <- glmnet::cv.glmnet( - x = x_basis, y = y, nfolds = n_folds, - foldid = fold_id, lambda = lambdas_init -) -lambda_minmse_cvglmnet_lassi <- lassi_glmnet$lambda.min -lambda_1se_cvglmnet_lassi <- lassi_glmnet$lambda.1se -coef_minmse_cvglmnet_lassi <- coef(lassi_glmnet, "lambda.min") -coef_1se_cvglmnet_lassi <- coef(lassi_glmnet, "lambda.1se") -betas_cvglmnet_lassi <- cbind( - coef_1se_cvglmnet_lassi, - coef_minmse_cvglmnet_lassi -) - -################################################################################ -# TEST THAT ORIGAMI AND CV-GLMNET IMPLEMENTATIONS MATCH -################################################################################ -test_that("lambda-min difference between cv.glmnet, cv_lasso within 0.5%.", { - expect_lte( - lambda_minmse_origami - lambda_minmse_cvglmnet, - lambda_minmse_cvglmnet * 0.005 - ) -}) - -test_that("lambda-1se difference between cv.glmnet and cv_lasso within 1%.", { - expect_lte( - lambda_minmse_origami - lambda_minmse_cvglmnet, - lambda_minmse_cvglmnet * 0.01 - ) -}) diff --git a/tests/testthat/test-lasso.R b/tests/testthat/test-lasso.R index c8ac8c34..9d6dc756 100644 --- a/tests/testthat/test-lasso.R +++ b/tests/testthat/test-lasso.R @@ -37,80 +37,6 @@ system.time({ ) }) -################################################# -# test scaling and centering -lassi_fit <- methods::new(hal9001:::Lassi, x_basis, y, 100, 0.01, TRUE) -xcenter <- lassi_fit$xcenter -xscale <- lassi_fit$xscale - -# apply scaling and centering -xcentered <- sweep(x_basis, 2, xcenter, "-") - -# xscale_r <- apply(xcentered, 2, sd) * sqrt((n-1)/n) # bessel correction -xcenter_scaled <- sweep(xcentered, 2, xscale, "/") - -cs_means <- colMeans(as.matrix(xcenter_scaled)) -# bessel correction -cs_sd <- apply(xcenter_scaled, 2, sd) * sqrt((n - 1) / n) - -test_that("centering and scaling x works", { - expect_lt(max(abs(cs_means)), 1e-8) - expect_lt(max(abs(cs_sd[cs_sd != 0] - 1)), 1e-8) -}) - - -################################################# -# test generating the sequence of lambdas -test_that("lambda sequence matches glmnet", { - expect_equal(lassi_fit$lambdas, glmnet_fit$lambda) -}) -lambda_max <- lassi_fit$lambdas[1] - -# verify that lambda max zeros out coefficients -lassi_fit$update_coords(lambda_max, FALSE) -test_that("lambda_max results in zero beta vector", { - expect_true(all(lassi_fit$beta == 0)) -}) - -# verify that a slightly smaller lambda does not -delta <- 1 - 1e-3 -lambda_not_quite_max <- lambda_max * delta -lassi_fit$update_coords(lambda_not_quite_max, FALSE) -test_that( - "a slightly smaller lambda results in nonzero beta vector", - expect_true(!all(lassi_fit$beta == 0)) -) - -################################################# -# test a single coordinate descent update -lassi_fit <- new(hal9001:::Lassi, x_basis, y, 100, 0.01, FALSE) -n <- length(y) - -# which beta to update (1 - indexed) -i <- 1 -xvar <- lassi_fit$X[, i] -xscale_i <- lassi_fit$xscale[i] -resid <- lassi_fit$resids - -ls_beta <- coef(lm(resid ~ xvar - 1)) * xscale_i -# cd_beta <- mean(xvar/xscale_i * resid) - -lassi_fit$update_coord(i - 1, 0) -beta_new <- lassi_fit$beta[i] - - -test_that("coordinate descent works as it does in R", { - expect_equivalent(ls_beta, beta_new) -}) - -new_resid <- lassi_fit$resids -verify_new_resid <- y - mean(y) - beta_new * xvar / xscale_i - -test_that("the updated residuals are as expected", { - expect_equal(new_resid, verify_new_resid) -}) - - ################################################################################ # PREDICTION ################################################################################ @@ -118,32 +44,7 @@ test_that("the updated residuals are as expected", { new_data <- as.matrix(test_x) pred_x_basis <- hal9001:::make_design_matrix(new_data, basis_list) pred_x_basis <- hal9001:::apply_copy_map(pred_x_basis, copy_map) - -# lassi prediction and mses -system.time({ - lassi_fit <- hal9001:::lassi(x_basis, y, center = FALSE) -}) - -system.time({ - lassi_fit <- hal9001:::lassi(x_basis, y, center = TRUE) -}) - -pred_mat <- predict(lassi_fit, pred_x_basis) -mses <- apply(pred_mat, 2, function(preds) { - mean((preds - test_y)^2) -}) - - gpred_mat <- predict(glmnet_fit, pred_x_basis) gmses <- apply(gpred_mat, 2, function(preds) { mean((preds - test_y)^2) }) - -test_that("lassi isn't doing much worse in terms of MSE", { - expect_lt((min(mses) - min(gmses)) / min(gmses), 0.05) -}) - -# library(ggplot2) -# combined = data.frame(lambda = seq_len(100), mse = c(mses, gmses), -# type = rep(c("lassi", "glmnet"), each = 100)) -# ggplot(combined, aes(x = lambda, y = mse, color = type)) + geom_line()