diff --git a/NAMESPACE b/NAMESPACE index f5c33f1f..4ca3f7e2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -16,7 +16,6 @@ export(make_copy_map) export(make_design_matrix) export(make_reduced_basis_map) export(squash_hal_fit) -import(Rcpp) importFrom(Matrix,tcrossprod) importFrom(Rcpp,sourceCpp) importFrom(assertthat,assert_that) @@ -27,13 +26,9 @@ importFrom(data.table,setorder) importFrom(glmnet,cv.glmnet) importFrom(glmnet,glmnet) importFrom(methods,is) -importFrom(methods,new) importFrom(origami,cross_validate) -importFrom(origami,fold_index) importFrom(origami,folds2foldvec) importFrom(origami,make_folds) -importFrom(origami,training) -importFrom(origami,validation) importFrom(stats,aggregate) importFrom(stats,as.formula) importFrom(stats,coef) diff --git a/R/RcppExports.R b/R/RcppExports.R index bd17cee2..aaedb7ca 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -51,16 +51,6 @@ apply_copy_map <- function(X, copy_map) { .Call('_hal9001_apply_copy_map', PACKAGE = 'hal9001', X, copy_map) } -#' Prediction from a Lassi Model -#' -#' @param X A sparse matrix of HAL basis functions. -#' @param beta A vector of coefficient values for the HAL basis functions. -#' @param intercept A numeric value giving the intercept of the HAL model. -#' -lassi_predict <- function(X, beta, intercept) { - .Call('_hal9001_lassi_predict', PACKAGE = 'hal9001', X, beta, intercept) -} - #' Sort Basis Functions #' #' Build a sorted list of unique basis functions based on columns, where each diff --git a/R/cv_lasso.R b/R/cv_lasso.R index 44233825..a7fece86 100644 --- a/R/cv_lasso.R +++ b/R/cv_lasso.R @@ -1,55 +1,3 @@ -#' Single Lasso estimation for cross-validation with Origami -#' -#' Fits Lasso regression over a single fold of a cross-validated data set. This -#' is meant to be called using \code{\link[origami]{cross_validate}}, which is -#' done through \code{\link{cv_lasso}}. Note that this procedure is NOT meant -#' to be invoked by itself. INTERNAL USE ONLY. -#' -#' @param fold A \code{fold} object produced by a call to \code{make_folds} -#' from the \pkg{origami}. -#' @param data A \code{dgCMatrix} object containing the outcome values (Y) in -#' its first column and vectors corresponding to the basis functions of HAL in -#' all other columns. Consult the description of HAL regression for details. -#' @param lambdas A \code{numeric} vector corresponding to a sequence of lambda -#' values obtained by fitting the Lasso on the full data. -#' @param center binary. If \code{TRUE}, covariates are centered. This is much -#' slower, but matches the \code{glmnet} implementation. Default \code{FALSE}. -#' -#' @importFrom origami training validation fold_index -lassi_origami <- function(fold, data, lambdas, center = FALSE) { - # make sure data is an (augmented) sparse matrix of basis functions - stopifnot(class(data) == "dgCMatrix") - - # split data for V-fold cross-validation - train_data <- origami::training(data) - valid_data <- origami::validation(data) - - # wrangle objects to clearer forms - train_x_basis <- train_data[, -1] - valid_x_basis <- valid_data[, -1] - train_y <- train_data[, 1] - valid_y <- valid_data[, 1] - - # compute the predicted betas for the given training and validation sets - lassi_fit <- lassi( - x = train_x_basis, y = train_y, lambdas = lambdas, - center = center - ) - pred_mat <- predict(lassi_fit, valid_x_basis) - - # compute the MSE for the given training and validation sets - mses <- apply(pred_mat, 2, function(preds) { - mean((preds - valid_y)^2) - }) - - # the only output needed is the lambda-wise MSE over each fold - mses_out <- matrix(mses, nrow = 1) - out <- list(mses = mses_out) - return(out) -} - -############################################################################### - #' Cross-validated Lasso on Indicator Bases #' #' Fits Lasso regression using a customized procedure, with cross-validation diff --git a/R/cv_lasso_early_stopping.R b/R/cv_lasso_early_stopping.R deleted file mode 100644 index a8423f63..00000000 --- a/R/cv_lasso_early_stopping.R +++ /dev/null @@ -1,97 +0,0 @@ -#' Cross-validated LASSO on Indicator Bases -#' -#' Fits the LASSO regression using a customized procedure with cross-validation -#' based on \pkg{origami} -#' -#' @param x_basis A \code{dgCMatrix} object corresponding to a sparse matrix of -#' the basis functions generated for the HAL algorithm. -#' @param y A \code{numeric} vector of the observed outcome variable values. -#' @param n_lambda A \code{numeric} scalar indicating the number of values of -#' the L1 regularization parameter (lambda) to be obtained from fitting the -#' LASSO to the full data. Cross-validation is used to select an optimal -#' lambda (that minimizes the risk) from among these. -#' @param n_folds A \code{numeric} scalar for the number of folds to be used in -#' the cross-validation procedure to select an optimal value of lambda. -#' -#' @importFrom origami make_folds cross_validate -#' @importFrom stats sd -cv_lasso_early_stopping <- function(x_basis, y, n_lambda = 100, n_folds = 10) { - # first, need to run lasso on the full data to get a sequence of lambdas - lasso_init <- lassi(y = y, x = x_basis, nlambda = n_lambda, center = FALSE) - lambdas_init <- lasso_init$lambdas - - # next, set up a cross-validated lasso using the sequence of lambdas - folds <- origami::make_folds(x_basis, V = n_folds) - - # track separately for folds = xscale, beta, resid, intercept - fold <- folds[[1]] - setup_fold_data <- function(fold, x_basis, y) { - x_train <- training(x_basis) - y_train <- training(y) - x_valid <- validation(x_basis) - y_valid <- validation(y) - - intercept <- mean(y_train) - resid <- y_train - intercept - xcenter <- rep(0, ncol(x_basis)) - xscale <- calc_xscale(x_train, xcenter) - beta <- rep(0, ncol(x_basis)) - - fold_data <- list( - x_train = x_train, x_valid = x_valid, y_valid = y_valid, - intercept = intercept, resid = resid, xscale = xscale, xcenter = xcenter, - beta = beta - ) - - return(list(fold_data = fold_data)) - } - - all_fold_data <- cross_validate(setup_fold_data, folds, x_basis, y, - .combine = FALSE - )$fold_data - - cv_lassi_step <- function(fold, all_fold_data, lambda) { - fold_data <- fold_index(all_fold_data)[[1]] - n_updates <- with(fold_data, fit_lassi_step( - x_train, resid, beta, lambda, - xscale, xcenter, intercept, - FALSE - )) - preds <- with(fold_data, as.vector(x_valid %*% (beta / xscale)) + - intercept) - mse <- with(fold_data, mean((preds - y_valid)^2)) - return(list(fold_data = fold_data, mse = mse)) - } - - null_mse <- NULL - min_mse <- Inf - step_mses <- rep(Inf, n_lambda) - for (lambda_step in seq_along(lambdas_init)) { - lambda <- lambdas_init[lambda_step] - step_results <- lapply(folds, cv_lassi_step, all_fold_data, lambda) - all_fold_data <- lapply(step_results, `[[`, "fold_data") - step_mse <- mean(sapply(step_results, `[[`, "mse")) - # step_results <- cross_validate(cv_lassi_step, folds, all_fold_data, - # lambda, .combine = FALSE) - # all_fold_data <- step_results$fold_data - - if (is.null(null_mse)) { - # null_mse is the first mse (i.e. for the null model) - null_mse <- step_mse - } - - if (step_mse < min_mse) { - min_mse <- step_mse - } - - # compute increase above minimum as percentage of total range - ratio <- (step_mse - min_mse) / (null_mse - min_mse) - - # cat(sprintf("lambda: %f, mse: %f, ratio: %f\n", lambda, step_mse, ratio)) - if (is.finite(ratio) && (ratio > 0.1)) { - break - } - step_mses[lambda_step] <- step_mse - } - return(step_mses) -} diff --git a/R/lassi.R b/R/lassi.R deleted file mode 100644 index 6fa3986d..00000000 --- a/R/lassi.R +++ /dev/null @@ -1,97 +0,0 @@ -#' Rcpp module: lassi_fit_module -#' @import Rcpp -#' @name lassi_fit_module -NULL -loadModule("lassi_module", TRUE) - -#' Custom Lasso implementation for matrices of indicator functions -#' -#' @param x The covariate matrix -#' @param y The outcome vector -#' @param lambdas A sequence of values for the L1 regularization parameter -#' (lambda) to be used in fitting the LASSO. Defaults to \code{NULL}. -#' @param nlambda number of lambdas to fit. -#' @param lambda_min_ratio ratio of largest to smallest lambda to fit. -#' @param center ... -#' -#' @importFrom methods new -#' -#' @keywords internal -lassi <- function(x, y, lambdas = NULL, nlambda = 100, - lambda_min_ratio = 0.01, center = FALSE) { - if (!is.null(lambdas)) { - nlambda <- length(lambdas) - } - - # initialize object - lassi_object <- methods::new(Lassi, x, y, nlambda, lambda_min_ratio, center) - - if (!is.null(lambdas)) { - lassi_object$lambdas <- lambdas - } - - # initialize step counter - step_counts <- rep(0, nlambda) - - # iterative procedure for active step convergence - for (i in (seq_len(nlambda) - 1)) { - full_steps <- lassi_object$lassi_fit_cd(i, FALSE, 1) - if (full_steps > 0) { - active_steps <- lassi_object$lassi_fit_cd(i, TRUE, 1000) - } else { - active_steps <- 0 - } - step_counts[i + 1] <- active_steps - } - - beta_mat <- as.matrix(lassi_object$beta_mat) - intercepts <- lassi_object$intercepts - beta_mat <- beta_mat / lassi_object$xscale - if (center) { - intercepts <- intercepts - crossprod(lassi_object$xcenter, beta_mat) - } - - chichignoud_criterion <- NULL - - # create output object - out <- list(beta_mat, intercepts, - lambdas = lassi_object$lambdas, - step_counts, chichignoud_criterion - ) - names(out) <- c( - "beta_mat", "intercepts", "lambdas", "steps", - "chichignoud_criterion" - ) - class(out) <- "lassi" - return(out) -} - -#' Predict Method for Lasso on Indicator Bases -#' -#' @param fit ... -#' @param new_x_basis ... -#' @param lambdas ... -#' -#' @keywords internal -predict.lassi <- function(fit, new_x_basis, lambdas = NULL) { - if (is.null(lambdas)) { - lambdas <- fit$lambdas - } - - if (!all(lambdas %in% fit$lambdas)) { - stop("Attempting to predict for a lambda that was not fit.") - } - - preds <- matrix(0, nrow = nrow(new_x_basis), ncol = length(lambdas)) - - for (i in seq_along(lambdas)) { - lambda <- lambdas[i] - beta_col <- which(lambda == fit$lambdas) - beta <- fit$beta_mat[, beta_col] - intercept <- fit$intercepts[beta_col] - pred_col <- lassi_predict(new_x_basis, beta, intercept) - preds[, i] <- pred_col - # find corresponding betas - } - return(preds) -} diff --git a/README.md b/README.md index 7e359316..728a247b 100644 --- a/README.md +++ b/README.md @@ -85,7 +85,11 @@ predictions via Highly Adaptive Lasso regression: # load the package and set a seed library(hal9001) #> Loading required package: Rcpp +<<<<<<< HEAD #> hal9001 v0.4.4: The Scalable Highly Adaptive Lasso +======= +#> hal9001 v0.4.5: The Scalable Highly Adaptive Lasso +>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c #> note: fit_hal defaults have changed. See ?fit_hal for details set.seed(385971) diff --git a/docs/articles/intro_hal9001.html b/docs/articles/intro_hal9001.html index b59b537b..d5262436 100644 --- a/docs/articles/intro_hal9001.html +++ b/docs/articles/intro_hal9001.html @@ -85,7 +85,11 @@
vignettes/intro_hal9001.Rmd
intro_hal9001.Rmd
## user.self sys.self elapsed user.child sys.child
+<<<<<<< HEAD
## enumerate_basis 0.027 0.003 0.053 0 0
## design_matrix 0.129 0.021 0.340 0 0
## reduce_basis 0.000 0.000 0.000 0 0
## remove_duplicates 0.000 0.000 0.000 0 0
## lasso 3.577 0.436 9.466 0 0
## total 3.734 0.460 9.861 0 0
+=======
+## enumerate_basis 0.017 0.000 0.018 0 0
+## design_matrix 0.082 0.003 0.086 0 0
+## reduce_basis 0.000 0.000 0.000 0 0
+## remove_duplicates 0.000 0.000 0.000 0 0
+## lasso 2.284 0.072 2.399 0 0
+## total 2.384 0.075 2.503 0 0
+>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c
hal_fit_reduced$times
## user.self sys.self elapsed user.child sys.child
+<<<<<<< HEAD
## enumerate_basis 0.028 0.007 0.122 0 0
## design_matrix 0.131 0.017 0.463 0 0
## reduce_basis 0.000 0.000 0.000 0 0
@@ -355,6 +369,18 @@ Reducing basis functions
+=======
+## enumerate_basis 0.026 0.005 0.031 0 0
+## design_matrix 0.080 0.002 0.082 0 0
+## reduce_basis 0.000 0.000 0.000 0 0
+## remove_duplicates 0.000 0.000 0.000 0 0
+## lasso 1.951 0.063 2.030 0 0
+## total 2.057 0.070 2.143 0 0
+In the above, all basis functions with fewer than 10% of observations
+meeting the criterion imposed are automatically removed prior to the
+Lasso step of fitting the HAL regression. The results appear below
+
+>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c
summary(hal_fit_reduced)$table
## coef
## 1: -1.424003e+00
@@ -513,7 +539,11 @@ Specifying smoothness of the HAL
for a near-optimal fit. Therefore, one can safely pass a smaller value
to num_knots
for a big decrease in runtime without
sacrificing performance.
+<<<<<<< HEAD
+=======
+
+>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c
mean((pred_0 - ytrue)^2)
## [1] 0.00732315
@@ -552,7 +582,11 @@ Specifying smoothness of the HAL
Comparing the following simulation and the previous one, the HAL with
second-order smoothness performed better when there were fewer knot
points.
+<<<<<<< HEAD
+=======
+
+>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c
set.seed(98109)
n_covars <- 1
n_obs <- 250
@@ -609,7 +643,11 @@ Formula interface
+=======
+
+>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c
set.seed(98109)
num_knots <- 100
@@ -627,7 +665,11 @@ Formula interface
+=======
+
+>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c
# The `h` function is used to specify the basis functions for a given term
# h(x1) generates one-way basis functions for the variable x1
# This is an additive model:
@@ -672,7 +714,11 @@ Formula interface
+=======
+
+>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c
smoothness_orders <- 1
num_knots <- 5
# A additive model
@@ -716,7 +762,11 @@ Formula interface
+=======
+
+>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c
# Write it all out
formula <- h(x1) + h(x2) + h(A) + h(A, x1) + h(A,x2)
@@ -745,7 +795,11 @@ Formula interface
+=======
+
+>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c
# An additive monotone increasing model
formula <- formula_hal(
y ~ h(., monotone = "i"), X, smoothness_orders = 0, num_knots = 100
@@ -780,7 +834,11 @@ Formula interface
+=======
+
+>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c
# get formula object
fit <- fit_hal(
X = X, Y = Y, formula = ~ h(.), smoothness_orders = 1, num_knots = 100
diff --git a/docs/authors.html b/docs/authors.html
index f9048826..9f3e40fb 100644
--- a/docs/authors.html
+++ b/docs/authors.html
@@ -103,7 +103,11 @@ Citation
Coyle J, Hejazi N, Phillips R, van der Laan L, van der Laan M (2022).
hal9001: The scalable highly adaptive lasso.
+<<<<<<< HEAD
doi:10.5281/zenodo.3558313, R package version 0.4.5, https://github.com/tlverse/hal9001.
+=======
+doi:10.5281/zenodo.3558313, R package version 0.4.3, https://github.com/tlverse/hal9001.
+>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c
@Manual{,
title = {{hal9001}: The scalable highly adaptive lasso},
diff --git a/docs/index.html b/docs/index.html
index 99aca6de..b68b1ec1 100644
--- a/docs/index.html
+++ b/docs/index.html
@@ -128,7 +128,11 @@ Example
# load the package and set a seed
library(hal9001)
#> Loading required package: Rcpp
+<<<<<<< HEAD
#> hal9001 v0.4.4: The Scalable Highly Adaptive Lasso
+=======
+#> hal9001 v0.4.5: The Scalable Highly Adaptive Lasso
+>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c
#> note: fit_hal defaults have changed. See ?fit_hal for details
set.seed(385971)
@@ -143,12 +147,21 @@ Example
#> [1] "I'm sorry, Dave. I'm afraid I can't do that."
hal_fit$times
#> user.self sys.self elapsed user.child sys.child
+<<<<<<< HEAD
#> enumerate_basis 0.014 0.003 0.059 0 0
#> design_matrix 0.004 0.001 0.005 0 0
#> reduce_basis 0.000 0.000 0.000 0 0
#> remove_duplicates 0.000 0.000 0.000 0 0
#> lasso 2.684 0.343 6.583 0 0
#> total 2.703 0.348 6.655 0 0
+=======
+#> enumerate_basis 0.008 0.001 0.009 0 0
+#> design_matrix 0.003 0.000 0.003 0 0
+#> reduce_basis 0.000 0.000 0.000 0 0
+#> remove_duplicates 0.000 0.000 0.001 0 0
+#> lasso 1.690 0.036 1.764 0 0
+#> total 1.702 0.037 1.778 0 0
+>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c
# training sample prediction
preds <- predict(hal_fit, new_data = x)
diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml
index cc24eab0..57535cfb 100644
--- a/docs/pkgdown.yml
+++ b/docs/pkgdown.yml
@@ -3,7 +3,11 @@ pkgdown: 2.0.3
pkgdown_sha: ~
articles:
intro_hal9001: intro_hal9001.html
+<<<<<<< HEAD
last_built: 2022-07-13T01:48Z
+=======
+last_built: 2022-11-04T20:06Z
+>>>>>>> 81093a5ceebcd36630f308dd07f69d4e30f07f1c
urls:
reference: https://tlverse.org/hal9001/reference
article: https://tlverse.org/hal9001/articles
diff --git a/docs/reference/index.html b/docs/reference/index.html
index a4183ecc..ad92e617 100644
--- a/docs/reference/index.html
+++ b/docs/reference/index.html
@@ -86,10 +86,6 @@ All functions cv_lasso()
Cross-validated Lasso on Indicator Bases
-
-
-
- Cross-validated LASSO on Indicator Bases
@@ -130,18 +126,6 @@ All functions index_first_copy()
Find Copies of Columns
-
-
-
- Rcpp module: lassi_fit_module
-
-
-
- Single Lasso estimation for cross-validation with Origami
-
-
-
- Prediction from a Lassi Model
diff --git a/man/cv_lasso_early_stopping.Rd b/man/cv_lasso_early_stopping.Rd
deleted file mode 100644
index cea940fa..00000000
--- a/man/cv_lasso_early_stopping.Rd
+++ /dev/null
@@ -1,26 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/cv_lasso_early_stopping.R
-\name{cv_lasso_early_stopping}
-\alias{cv_lasso_early_stopping}
-\title{Cross-validated LASSO on Indicator Bases}
-\usage{
-cv_lasso_early_stopping(x_basis, y, n_lambda = 100, n_folds = 10)
-}
-\arguments{
-\item{x_basis}{A \code{dgCMatrix} object corresponding to a sparse matrix of
-the basis functions generated for the HAL algorithm.}
-
-\item{y}{A \code{numeric} vector of the observed outcome variable values.}
-
-\item{n_lambda}{A \code{numeric} scalar indicating the number of values of
-the L1 regularization parameter (lambda) to be obtained from fitting the
-LASSO to the full data. Cross-validation is used to select an optimal
-lambda (that minimizes the risk) from among these.}
-
-\item{n_folds}{A \code{numeric} scalar for the number of folds to be used in
-the cross-validation procedure to select an optimal value of lambda.}
-}
-\description{
-Fits the LASSO regression using a customized procedure with cross-validation
-based on \pkg{origami}
-}
diff --git a/man/lassi.Rd b/man/lassi.Rd
deleted file mode 100644
index 2803067e..00000000
--- a/man/lassi.Rd
+++ /dev/null
@@ -1,33 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/lassi.R
-\name{lassi}
-\alias{lassi}
-\title{Custom Lasso implementation for matrices of indicator functions}
-\usage{
-lassi(
- x,
- y,
- lambdas = NULL,
- nlambda = 100,
- lambda_min_ratio = 0.01,
- center = FALSE
-)
-}
-\arguments{
-\item{x}{The covariate matrix}
-
-\item{y}{The outcome vector}
-
-\item{lambdas}{A sequence of values for the L1 regularization parameter
-(lambda) to be used in fitting the LASSO. Defaults to \code{NULL}.}
-
-\item{nlambda}{number of lambdas to fit.}
-
-\item{lambda_min_ratio}{ratio of largest to smallest lambda to fit.}
-
-\item{center}{...}
-}
-\description{
-Custom Lasso implementation for matrices of indicator functions
-}
-\keyword{internal}
diff --git a/man/lassi_fit_module.Rd b/man/lassi_fit_module.Rd
deleted file mode 100644
index 35929eae..00000000
--- a/man/lassi_fit_module.Rd
+++ /dev/null
@@ -1,8 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/lassi.R
-\name{lassi_fit_module}
-\alias{lassi_fit_module}
-\title{Rcpp module: lassi_fit_module}
-\description{
-Rcpp module: lassi_fit_module
-}
diff --git a/man/lassi_origami.Rd b/man/lassi_origami.Rd
deleted file mode 100644
index dbfa5899..00000000
--- a/man/lassi_origami.Rd
+++ /dev/null
@@ -1,28 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/cv_lasso.R
-\name{lassi_origami}
-\alias{lassi_origami}
-\title{Single Lasso estimation for cross-validation with Origami}
-\usage{
-lassi_origami(fold, data, lambdas, center = FALSE)
-}
-\arguments{
-\item{fold}{A \code{fold} object produced by a call to \code{make_folds}
-from the \pkg{origami}.}
-
-\item{data}{A \code{dgCMatrix} object containing the outcome values (Y) in
-its first column and vectors corresponding to the basis functions of HAL in
-all other columns. Consult the description of HAL regression for details.}
-
-\item{lambdas}{A \code{numeric} vector corresponding to a sequence of lambda
-values obtained by fitting the Lasso on the full data.}
-
-\item{center}{binary. If \code{TRUE}, covariates are centered. This is much
-slower, but matches the \code{glmnet} implementation. Default \code{FALSE}.}
-}
-\description{
-Fits Lasso regression over a single fold of a cross-validated data set. This
-is meant to be called using \code{\link[origami]{cross_validate}}, which is
-done through \code{\link{cv_lasso}}. Note that this procedure is NOT meant
-to be invoked by itself. INTERNAL USE ONLY.
-}
diff --git a/man/lassi_predict.Rd b/man/lassi_predict.Rd
deleted file mode 100644
index 6737dfba..00000000
--- a/man/lassi_predict.Rd
+++ /dev/null
@@ -1,18 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/RcppExports.R
-\name{lassi_predict}
-\alias{lassi_predict}
-\title{Prediction from a Lassi Model}
-\usage{
-lassi_predict(X, beta, intercept)
-}
-\arguments{
-\item{X}{A sparse matrix of HAL basis functions.}
-
-\item{beta}{A vector of coefficient values for the HAL basis functions.}
-
-\item{intercept}{A numeric value giving the intercept of the HAL model.}
-}
-\description{
-Prediction from a Lassi Model
-}
diff --git a/man/predict.lassi.Rd b/man/predict.lassi.Rd
deleted file mode 100644
index aede8220..00000000
--- a/man/predict.lassi.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/lassi.R
-\name{predict.lassi}
-\alias{predict.lassi}
-\title{Predict Method for Lasso on Indicator Bases}
-\usage{
-\method{predict}{lassi}(fit, new_x_basis, lambdas = NULL)
-}
-\arguments{
-\item{fit}{...}
-
-\item{new_x_basis}{...}
-
-\item{lambdas}{...}
-}
-\description{
-Predict Method for Lasso on Indicator Bases
-}
-\keyword{internal}
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index 9ab10913..f47ac63b 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -35,19 +35,6 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
-// lassi_predict
-NumericVector lassi_predict(const MSpMat X, const NumericVector beta, double intercept);
-RcppExport SEXP _hal9001_lassi_predict(SEXP XSEXP, SEXP betaSEXP, SEXP interceptSEXP) {
-BEGIN_RCPP
- Rcpp::RObject rcpp_result_gen;
- Rcpp::RNGScope rcpp_rngScope_gen;
- Rcpp::traits::input_parameter< const MSpMat >::type X(XSEXP);
- Rcpp::traits::input_parameter< const NumericVector >::type beta(betaSEXP);
- Rcpp::traits::input_parameter< double >::type intercept(interceptSEXP);
- rcpp_result_gen = Rcpp::wrap(lassi_predict(X, beta, intercept));
- return rcpp_result_gen;
-END_RCPP
-}
// make_basis_list
List make_basis_list(const NumericMatrix& X_sub, const NumericVector& cols, const IntegerVector& order_map);
RcppExport SEXP _hal9001_make_basis_list(SEXP X_subSEXP, SEXP colsSEXP, SEXP order_mapSEXP) {
@@ -137,12 +124,9 @@ BEGIN_RCPP
END_RCPP
}
-RcppExport SEXP _rcpp_module_boot_lassi_module();
-
static const R_CallMethodDef CallEntries[] = {
{"_hal9001_index_first_copy", (DL_FUNC) &_hal9001_index_first_copy, 1},
{"_hal9001_apply_copy_map", (DL_FUNC) &_hal9001_apply_copy_map, 2},
- {"_hal9001_lassi_predict", (DL_FUNC) &_hal9001_lassi_predict, 3},
{"_hal9001_make_basis_list", (DL_FUNC) &_hal9001_make_basis_list, 3},
{"_hal9001_meets_basis", (DL_FUNC) &_hal9001_meets_basis, 5},
{"_hal9001_evaluate_basis", (DL_FUNC) &_hal9001_evaluate_basis, 4},
@@ -150,7 +134,6 @@ static const R_CallMethodDef CallEntries[] = {
{"_hal9001_as_dgCMatrix", (DL_FUNC) &_hal9001_as_dgCMatrix, 1},
{"_hal9001_calc_pnz", (DL_FUNC) &_hal9001_calc_pnz, 1},
{"_hal9001_calc_xscale", (DL_FUNC) &_hal9001_calc_xscale, 2},
- {"_rcpp_module_boot_lassi_module", (DL_FUNC) &_rcpp_module_boot_lassi_module, 0},
{NULL, NULL, 0}
};
diff --git a/src/lassi_fit.cpp b/src/lassi_fit.cpp
deleted file mode 100644
index 9509bda5..00000000
--- a/src/lassi_fit.cpp
+++ /dev/null
@@ -1,446 +0,0 @@
-// [[Rcpp::depends(RcppEigen)]]
-#include
-#include "hal9001_types.h"
-#include "utils.h"
-#include
-#include
-
-using namespace Rcpp;
-
-
-class Lassi {
- const MSpMat X;
- int n;
- int p;
- bool center;
- double intercept;
- NumericVector resids;
- double resid_sum;
- double rss;
- double null_rss;
- NumericVector beta;
- NumericVector xcenter;
- NumericVector xscale;
- NumericVector lambdas;
- SpMat beta_mat;
- NumericVector intercepts;
- /*
- * variable_state is a vector of a lazy-person's enumerated type
- * state=2 is the active_set (possibly also the strong_set)
- * state=1 is the strong_set, but not the active_set
- * state=0 is neither the strong_set nor the active_set
- */
- IntegerVector variable_state;
- NumericVector safe_lambda;
- double lambda_max;
-public:
- Lassi(const MSpMat X_init, NumericVector y, int nlambda,
- double lambda_min_ratio, bool center):
- X(X_init),
- n(X_init.rows()),
- p(X_init.cols()),
- center(center),
- intercept(mean(y)),
- resids(y-intercept),
- resid_sum(sum(resids)),
- rss(sum(resids * resids)),
- null_rss(rss),
- beta(NumericVector(p,0.0)),
- lambdas(NumericVector(nlambda, 0.0)),
- beta_mat(SpMat(p, nlambda)),
- intercepts(NumericVector(nlambda,0.0)),
- variable_state(IntegerVector(p, 0))
- {
- // get centering and scaling vectors if applicable
- if(center){
- xcenter = calc_pnz(X);
- } else {
- xcenter = NumericVector(p, 0.0);
- }
-
- xscale = calc_xscale(X, xcenter);
-
- // initialize lambda_max and lambda vector
- // X_t_resid is used for lots of things:
- // strong rule
- // kkt violation
- // beta update
- // so we compute beta updates
- // then we check strong filtering
- // then we check for kkt violations (all at once, but why)
-
- lambda_max = 0;
- double new_beta;
- for (int j = 0; j < p; ++j) {
- new_beta = X_t_resid(j) / n;
- new_beta = std::abs(new_beta);
- if (new_beta > lambda_max) {
-
- lambda_max = new_beta;
- }
- }
-
- double log_lambda_max = log(lambda_max);
- double log_lambda_min = log(lambda_min_ratio*lambda_max);
- double lambda_step_size = (log_lambda_max - log_lambda_min) / (nlambda - 1);
-
- for(int i = 0; i < nlambda; i++){
- lambdas[i] = exp(log_lambda_max - i*lambda_step_size);
- }
-
- //below this lambda, we must check if variable is now active.
- safe_lambda = NumericVector(p, lambda_max);
-
- beta_mat.reserve(0.2 * p * nlambda);
- }
-
- const MSpMat& get_X(){
- return(X);
- }
-
- NumericVector get_beta(){
- return(beta);
- }
-
- SpMat get_beta_mat(){
- beta_mat.makeCompressed();
- return(beta_mat);
- }
-
- NumericVector get_intercepts(){
- return(intercepts);
- }
-
- NumericVector get_lambdas(){
- return(lambdas);
- }
-
- void set_lambdas(NumericVector new_lambdas){
- if(new_lambdas.length()!=lambdas.length()){
- stop("length(lambdas) must match nlambda passed on construction");
- }
-
- lambdas=new_lambdas;
- }
-
- NumericVector get_resids(){
- return(resids);
- }
-
- NumericVector get_xscale(){
- return(xscale);
- }
-
- NumericVector get_xcenter(){
- return(xcenter);
- }
-
- double X_t_resid(int j) {
- double crossprod_sum = 0;
- for (MInIterMat i_(X, j); i_; ++i_) {
- crossprod_sum += resids[i_.index()];
- }
-
- if(center){
- crossprod_sum = (crossprod_sum - xcenter[j] * resid_sum) / xscale[j];
- } else {
- crossprod_sum = crossprod_sum / xscale[j];
- }
-
- return(crossprod_sum);
- }
-
- double get_new_beta(int j) {
- double crossprod_sum = X_t_resid(j);
- double new_beta = crossprod_sum / n + beta[j];
- return(new_beta);
- }
-
- double find_lambda_max(){
- return(lambda_max);
- }
-
- void update_resid(int j, double beta_diff) {
-
- double scaled_diff = beta_diff / xscale[j];
-
- // if(center){
- // for (int i=0; i 1e-7) {
-
- update_resid(j, beta_diff);
- beta[j] = new_beta;
- } else {
- beta_diff = 0;
- }
- return(beta_diff);
- }
-
- int update_coords(double lambda, bool active_set) {
- // update coordinates one-by-one
- int j;
- double old_rss = rss;
- int updates = 0;
- double update;
- for (j = 0; j < X.outerSize(); ++j) {
- if(!(active_set) || beta[j]!=0){
- update = update_coord(j, lambda);
-
- // see if we decreased the rss
- // todo: should be relative to null deviance
- if(update!=0){
- if((old_rss-rss)/null_rss > 1e-7){
- updates++;
- }
- old_rss = rss;
- }
- }
- }
-
- // update intercept
- double mean_resid = mean(resids);
- resids = resids - mean_resid;
- intercept += mean_resid;
-
- // Rcout << "Updated " << updated << " coords" << std::endl;
- return(updates);
- }
-
- int check_kkt(double lambda) {
- return(0);
- }
-
- int lassi_fit_cd(int lambda_step, bool active_set, int nsteps) {
- double lambda = lambdas[lambda_step];
-
- int step_num = 0;
- double last_rss = rss;
- double ratio = 0;
- int updated=0;
- // Rcout << "Starting mse " << mse << std::endl;
- for (step_num = 0; step_num < nsteps; step_num++) {
- last_rss = rss;
-
- updated = update_coords(lambda, active_set);
- // rss = sum(resids*resids);
- // we failed to substantially improve any coords
- if (updated == 0) {
- break;
- }
-
- ratio = (last_rss - rss) / last_rss;
-
- if (ratio < 1e-2) {
- break;
- }
- }
-
- //copy nz betas into beta_mat col
- for (int j = 0; j < p; j++) {
- if(beta[j]!=0){
- beta_mat.insert(j, lambda_step) = beta[j];
- }
- }
-
- intercepts[lambda_step] = intercept;
- return(step_num);
- }
-
- NumericVector complex_approach(int lambda_step){
- // use active set
- // update_coords until convergence
- // check kkt for strong set, if violations, add to active, continue iterating
- // check kkt for all preds, if violations, add to active, recompute strong, continue iterating
- // basically, prioritize strong set before other preds when activating vars
-
- // kkt violations in non strong preds are very rare
- // step 2 is active set
- // step 1 is strong set
- // step 0 is full set
- int step_num=2;
- int steps=0;
- int max_steps=1000;
- // double old_rss = rss;
- double loop_rss = rss;
- double lambda = lambdas[lambda_step];
- double old_lambda;
- double thresh = 1e-7 * null_rss / n;
- if(lambda_step==0){
- old_lambda = lambda;
- } else {
- old_lambda = lambdas[lambda_step-1];
- }
- double strong_criterion = 2*lambda - old_lambda;
-
- Timer timer;
- timer.step("start");
- while((steps=0)){
- int updates=0;
- double update;
- int checked=0;
- double max_update=0.0;
- for(int j=0; j 1e-7) {
-
- update_resid(j, beta_diff);
- beta[j] = new_beta;
- updates++;
- }
-
- double something = beta_diff * beta_diff;
- if(something>max_update){
- max_update=something;
- }
-
- if(std::abs(update) > lambda){
- if(step_num<2){
- // if not already, put in active set
- variable_state[j]=2;
- }
-
- } else {
- //put in strong if not currently and criteria met
- if(step_num==0){
-
- //update strong
- if(std::abs(update) > strong_criterion){
- variable_state[j]=1;
- }
-
- //update safe
- //we need to start checking this predictor again
- //when lambda gets smaller than safe_lambda
- double rnorm=std::sqrt(rss)/n;
- safe_lambda[j]=lambda*((rnorm+std::abs(update))/(rnorm+lambda));
- // Rcout << "rnorm: " << rnorm << " update: " << update << " current: "
- // << lambda << " next_safe: "<< safe_lambda[j] << std::endl;
- }
- }
- }
- }
-
- if(max_update("%d, %d, %d, %d, %f", steps, step_num, checked, updates, max_update));
- // Rcout << "step: " << steps << " step_num: " << step_num << " updates: " << updates
- // << " loop_rss: "<< loop_rss << " old_rss: "<< old_rss << " ratio: " << (loop_rss-old_rss)/loop_rss << std::endl;
- loop_rss = rss;
-
- if(updates==0){
- //if we failed to update, move on to the next step (lower numbered)
- step_num--;
- } else{
- //if we updated anything, we should go back to the active set step
- step_num=2;
- }
- steps++;
- }
- //copy nz betas into beta_mat col
- for (int j = 0; j < p; j++) {
- if(beta[j]!=0){
- beta_mat.insert(j, lambda_step) = beta[j];
- }
- }
- intercepts[lambda_step] = intercept;
- NumericVector res(timer);
- return(res);
- }
-};
-
-RCPP_MODULE(lassi_module) {
- class_( "Lassi" )
- .constructor()
- .method( "update_coord", &Lassi::update_coord)
- .method( "update_coords", &Lassi::update_coords)
-
- .method( "lassi_fit_cd", &Lassi::lassi_fit_cd )
- .method( "complex_approach", &Lassi::complex_approach)
- .method( "X_t_resid", &Lassi::X_t_resid)
- .property( "beta", &Lassi::get_beta)
- .property( "beta_mat", &Lassi::get_beta_mat)
- .property( "intercepts", &Lassi::get_intercepts)
-
- .property( "resids", &Lassi::get_resids)
- .property( "xscale", &Lassi::get_xscale)
- .property( "xcenter", &Lassi::get_xcenter)
- .property( "X", &Lassi::get_X)
- .property( "lambdas", &Lassi::get_lambdas, &Lassi::set_lambdas)
- .property( "lambda_max", &Lassi::find_lambda_max)
- ;
-}
-
-//' Prediction from a Lassi Model
-//'
-//' @param X A sparse matrix of HAL basis functions.
-//' @param beta A vector of coefficient values for the HAL basis functions.
-//' @param intercept A numeric value giving the intercept of the HAL model.
-//'
-// [[Rcpp::export]]
-NumericVector lassi_predict(const MSpMat X, const NumericVector beta,
- double intercept) {
- int n = X.rows();
- NumericVector pred(n, intercept);
- int k = 0;
- double current_beta;
-
- for (k = 0; k < X.outerSize(); ++k) {
- current_beta = beta[k];
-
- for (MInIterMat it_(X, k); it_; ++it_) {
- pred[it_.row()] += current_beta;
- }
- }
- return(pred);
-}
diff --git a/tests/testthat/test-cv_lasso.R b/tests/testthat/test-cv_lasso.R
index 8054e4d3..70e34775 100644
--- a/tests/testthat/test-cv_lasso.R
+++ b/tests/testthat/test-cv_lasso.R
@@ -33,7 +33,6 @@ x_basis <- x_basis[, unique_columns]
# cv.glmnet reference
################################################################################
-
# create fold ID object for using the same folds between cv.glmnet and origami
folds <- make_folds(n)
fold_id <- origami:::folds2foldvec(folds)
@@ -43,114 +42,8 @@ lasso_glmnet <- glmnet::cv.glmnet(
x = x_basis, y = y, nfolds = n_folds,
foldid = fold_id
)
-glmnet_nlambda <- length(lasso_glmnet$lambda)
-
-
-################################################################################
-# CV-LASSO custom implementation
-################################################################################
-
-# first, need to run lasso on the full data to get a sequence of lambdas
-lasso_init <- hal9001:::lassi(y = y, x = x_basis, center = TRUE)
-lambdas_init <- lasso_init$lambdas
-
-test_that("Check that procedure to generate lambdas matches that from glmnet", {
- expect_equal(lambdas_init[seq_len(glmnet_nlambda)], lasso_glmnet$lambda)
-})
-lambdas_init <- lambdas_init[seq_len(glmnet_nlambda)]
-
-# next, set up a cross-validated lasso using the sequence of lambdas
-full_data_mat <- cbind(y, x_basis)
-folds <- origami::make_folds(full_data_mat, V = n_folds)
-
-# run the cross-validated lasso procedure to find the optimal lambda
-cv_lasso_out <- origami::cross_validate(
- cv_fun = lassi_origami,
- folds = folds,
- data = full_data_mat,
- lambdas = lambdas_init,
- center = TRUE
-)
-
-# compute cv-mean of MSEs for each lambda
-lambdas_cvmse <- colMeans(cv_lasso_out$mses)
-
-# NOTE: there is an off-by-one error occurring in the computation of the optimal
-# lambda and the lambda 1 standard deviation above it. Based on manual
-# inspection, the custom CV-Lasso routine consistently selects an optimal
-# lambda that is slightly too large and a 1se-lambda slightly too small.
-
-# find the lambda that minimizes the MSE
-lambda_optim_index <- which.min(lambdas_cvmse) + 1
-lambda_minmse_origami <- lambdas_init[lambda_optim_index]
-
-# also need the adjusted CV standard for each lambda
-lambdas_cvsd <- apply(cv_lasso_out$mses, 2, sd) / sqrt(n_folds)
-
-# find the maximum lambda among those 1 standard error above the minimum
-lambda_min_1se <- (lambdas_cvmse + lambdas_cvsd)[lambda_optim_index]
-lambda_1se_origami <- max(lambdas_init[lambdas_cvmse <= lambda_min_1se],
- na.rm = TRUE
-)
-lambda_1se_index <- which.min(abs(lambdas_init - lambda_1se_origami))
-
-# create output object
-get_lambda_indices <- c(lambda_1se_index, lambda_optim_index)
-betas_out <- lasso_init$beta_mat[, get_lambda_indices]
-colnames(betas_out) <- c("lambda_1se", "lambda_min")
-
-# add in intercept term to coefs matrix and convert to sparse matrix output
-betas_out <- rbind(rep(mean(y), ncol(betas_out)), betas_out)
-betas_out <- as_dgCMatrix(betas_out * 1.0)
-coef_minmse_origami <- as.numeric(betas_out[, 2])
-coef_1se_origami <- as.numeric(betas_out[, 1])
-
-
-################################################################################
-# test set performance
-################################################################################
-
-# create fold ID object for using the same folds between cv.glmnet and origami
-fold_id <- origami::folds2foldvec(folds)
-
-# just use the standard implementation available in glmnet with origami folds
-lasso_glmnet <- glmnet::cv.glmnet(
- x = x_basis, y = y, nfolds = n_folds,
- foldid = fold_id
-)
lambda_minmse_cvglmnet <- lasso_glmnet$lambda.min
lambda_1se_cvglmnet <- lasso_glmnet$lambda.1se
coef_minmse_cvglmnet <- as.numeric(coef(lasso_glmnet, "lambda.min"))
coef_1se_cvglmnet <- as.numeric(coef(lasso_glmnet, "lambda.1se"))
betas_cvglmnet <- cbind(coef_1se_cvglmnet, coef_minmse_cvglmnet)
-
-# now use the glmnet implementation with origami folds and lassi lambdas
-lassi_glmnet <- glmnet::cv.glmnet(
- x = x_basis, y = y, nfolds = n_folds,
- foldid = fold_id, lambda = lambdas_init
-)
-lambda_minmse_cvglmnet_lassi <- lassi_glmnet$lambda.min
-lambda_1se_cvglmnet_lassi <- lassi_glmnet$lambda.1se
-coef_minmse_cvglmnet_lassi <- coef(lassi_glmnet, "lambda.min")
-coef_1se_cvglmnet_lassi <- coef(lassi_glmnet, "lambda.1se")
-betas_cvglmnet_lassi <- cbind(
- coef_1se_cvglmnet_lassi,
- coef_minmse_cvglmnet_lassi
-)
-
-################################################################################
-# TEST THAT ORIGAMI AND CV-GLMNET IMPLEMENTATIONS MATCH
-################################################################################
-test_that("lambda-min difference between cv.glmnet, cv_lasso within 0.5%.", {
- expect_lte(
- lambda_minmse_origami - lambda_minmse_cvglmnet,
- lambda_minmse_cvglmnet * 0.005
- )
-})
-
-test_that("lambda-1se difference between cv.glmnet and cv_lasso within 1%.", {
- expect_lte(
- lambda_minmse_origami - lambda_minmse_cvglmnet,
- lambda_minmse_cvglmnet * 0.01
- )
-})
diff --git a/tests/testthat/test-lasso.R b/tests/testthat/test-lasso.R
index c8ac8c34..9d6dc756 100644
--- a/tests/testthat/test-lasso.R
+++ b/tests/testthat/test-lasso.R
@@ -37,80 +37,6 @@ system.time({
)
})
-#################################################
-# test scaling and centering
-lassi_fit <- methods::new(hal9001:::Lassi, x_basis, y, 100, 0.01, TRUE)
-xcenter <- lassi_fit$xcenter
-xscale <- lassi_fit$xscale
-
-# apply scaling and centering
-xcentered <- sweep(x_basis, 2, xcenter, "-")
-
-# xscale_r <- apply(xcentered, 2, sd) * sqrt((n-1)/n) # bessel correction
-xcenter_scaled <- sweep(xcentered, 2, xscale, "/")
-
-cs_means <- colMeans(as.matrix(xcenter_scaled))
-# bessel correction
-cs_sd <- apply(xcenter_scaled, 2, sd) * sqrt((n - 1) / n)
-
-test_that("centering and scaling x works", {
- expect_lt(max(abs(cs_means)), 1e-8)
- expect_lt(max(abs(cs_sd[cs_sd != 0] - 1)), 1e-8)
-})
-
-
-#################################################
-# test generating the sequence of lambdas
-test_that("lambda sequence matches glmnet", {
- expect_equal(lassi_fit$lambdas, glmnet_fit$lambda)
-})
-lambda_max <- lassi_fit$lambdas[1]
-
-# verify that lambda max zeros out coefficients
-lassi_fit$update_coords(lambda_max, FALSE)
-test_that("lambda_max results in zero beta vector", {
- expect_true(all(lassi_fit$beta == 0))
-})
-
-# verify that a slightly smaller lambda does not
-delta <- 1 - 1e-3
-lambda_not_quite_max <- lambda_max * delta
-lassi_fit$update_coords(lambda_not_quite_max, FALSE)
-test_that(
- "a slightly smaller lambda results in nonzero beta vector",
- expect_true(!all(lassi_fit$beta == 0))
-)
-
-#################################################
-# test a single coordinate descent update
-lassi_fit <- new(hal9001:::Lassi, x_basis, y, 100, 0.01, FALSE)
-n <- length(y)
-
-# which beta to update (1 - indexed)
-i <- 1
-xvar <- lassi_fit$X[, i]
-xscale_i <- lassi_fit$xscale[i]
-resid <- lassi_fit$resids
-
-ls_beta <- coef(lm(resid ~ xvar - 1)) * xscale_i
-# cd_beta <- mean(xvar/xscale_i * resid)
-
-lassi_fit$update_coord(i - 1, 0)
-beta_new <- lassi_fit$beta[i]
-
-
-test_that("coordinate descent works as it does in R", {
- expect_equivalent(ls_beta, beta_new)
-})
-
-new_resid <- lassi_fit$resids
-verify_new_resid <- y - mean(y) - beta_new * xvar / xscale_i
-
-test_that("the updated residuals are as expected", {
- expect_equal(new_resid, verify_new_resid)
-})
-
-
################################################################################
# PREDICTION
################################################################################
@@ -118,32 +44,7 @@ test_that("the updated residuals are as expected", {
new_data <- as.matrix(test_x)
pred_x_basis <- hal9001:::make_design_matrix(new_data, basis_list)
pred_x_basis <- hal9001:::apply_copy_map(pred_x_basis, copy_map)
-
-# lassi prediction and mses
-system.time({
- lassi_fit <- hal9001:::lassi(x_basis, y, center = FALSE)
-})
-
-system.time({
- lassi_fit <- hal9001:::lassi(x_basis, y, center = TRUE)
-})
-
-pred_mat <- predict(lassi_fit, pred_x_basis)
-mses <- apply(pred_mat, 2, function(preds) {
- mean((preds - test_y)^2)
-})
-
-
gpred_mat <- predict(glmnet_fit, pred_x_basis)
gmses <- apply(gpred_mat, 2, function(preds) {
mean((preds - test_y)^2)
})
-
-test_that("lassi isn't doing much worse in terms of MSE", {
- expect_lt((min(mses) - min(gmses)) / min(gmses), 0.05)
-})
-
-# library(ggplot2)
-# combined = data.frame(lambda = seq_len(100), mse = c(mses, gmses),
-# type = rep(c("lassi", "glmnet"), each = 100))
-# ggplot(combined, aes(x = lambda, y = mse, color = type)) + geom_line()