From 990bd40360159df9daf833156c9a6a7659dd15a5 Mon Sep 17 00:00:00 2001 From: Oliver Hines Date: Sat, 6 Apr 2024 15:26:39 +0200 Subject: [PATCH] Some attention is all you need (#1) --- .github/CODEOWNERS | 1 + .github/PULL_REQUEST_TEMPLATE.md | 7 + .github/workflows/ci.yml | 24 +++ .gitignore | 4 + DESCRIPTION | 6 +- LICENSE | 21 +++ NAMESPACE | 3 + R/IFglm.R | 20 ++- R/fit_aipw.R | 182 ++++++++++++----------- R/fit_ipw.R | 109 +++++++------- R/fit_psmatch.R | 116 ++++++++------- R/fit_ra.R | 145 +++++++++--------- R/teffects.R | 247 ++++++++++++++++++------------- man/IF.glm.Rd | 14 +- man/fit_aipw.Rd | 47 +++--- man/fit_ipw.Rd | 33 +++-- man/fit_psmatch.Rd | 44 +++--- man/fit_ra.Rd | 38 +++-- man/teffect.Rd | 98 +++++++----- tests/testthat.R | 4 + tests/testthat/test-teffects.R | 27 ++++ 21 files changed, 711 insertions(+), 479 deletions(-) create mode 100644 .github/CODEOWNERS create mode 100644 .github/PULL_REQUEST_TEMPLATE.md create mode 100644 .github/workflows/ci.yml create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 tests/testthat.R create mode 100644 tests/testthat/test-teffects.R diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..5230727 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +* @ohines \ No newline at end of file diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md new file mode 100644 index 0000000..18173bb --- /dev/null +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -0,0 +1,7 @@ +# Motivation + + + +# Changes + + \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..23eb0b3 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,24 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +name: R-CMD-check +on: "push" + +jobs: + R-CMD-check: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ba7a4ba --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.DS_Store +.Rproj.user +.lintr +.Rhistory \ No newline at end of file diff --git a/DESCRIPTION b/DESCRIPTION index 08d53d0..a18e504 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -5,9 +5,11 @@ Version: 1.0 Author: Oliver Hines Maintainer: Oliver Hines Description: Fit causal treatment-effect estimands using regression adjusment and methods based on the propensity score. -License: TBC +License: MIT + file LICENSE Encoding: UTF-8 LazyData: true Imports: Matching -RoxygenNote: 7.1.0 +Suggests: + testthat +RoxygenNote: 7.3.1 diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..4f63719 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Oliver Hines + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/NAMESPACE b/NAMESPACE index d8214b7..0ce6af9 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -7,3 +7,6 @@ export(fit_ipw) export(fit_psmatch) export(fit_ra) export(teffect) +importFrom(stats,family) +importFrom(stats,gaussian) +importFrom(stats,quasibinomial) diff --git a/R/IFglm.R b/R/IFglm.R index 3d041fe..d5473ce 100644 --- a/R/IFglm.R +++ b/R/IFglm.R @@ -1,14 +1,18 @@ #' Influence Functions from Generalized Linear Models #' -#' \code{IF.glm} returns a matrix of influence functions from either a \code{glm} model object (run with \code{x=TRUE}), -#' or a \code{glm.fit} object (in which case the user must parse the design matrix, \code{x}). -#' The resulting matrix has one row for each observation, and one column for each GLM model parameter. +#' \code{IF.glm} returns a matrix of influence functions from either a +#' \code{glm} model object (run with \code{x=TRUE}), or a \code{glm.fit} +#' object (in which case the user must parse the design matrix, \code{x}). +#' The resulting matrix has one row for each observation, and one column +#' for each GLM model parameter. #' -#' @param object A \code{glm} model object (run with \code{x=TRUE}), or a \code{glm.fit} object -#' @param x The model matrix. Not required for a \code{glm} model object (run with \code{x=TRUE}). +#' @param object A \code{glm} model object (run with \code{x=TRUE}), +#' or a \code{glm.fit} object +#' @param x The model matrix. +#' Not required for a \code{glm} model object (run with \code{x=TRUE}). #' @return Matrix of influence function values. #' @export -IF.glm <- function(object,x=object$x){ - NROW(object$y)*(x*object$prior.weights*(object$y-object$fitted.values))%*%chol2inv(object$qr$qr) -} \ No newline at end of file +IF.glm <- function(object, x = object$x) { + NROW(object$y) * (x * object$prior.weights * (object$y - object$fitted.values)) %*% chol2inv(object$qr$qr) +} diff --git a/R/fit_aipw.R b/R/fit_aipw.R index 9669eb3..97e720a 100644 --- a/R/fit_aipw.R +++ b/R/fit_aipw.R @@ -1,105 +1,117 @@ #' Fit Augmented Inverse Probability Weighting #' -#' Fitting function called by \link[teffectsR]{teffect} when the method is set to \code{'AIPW'}. -#' Augmented Inverse Probability Weighting combines the IPW method and the RA method. -#' A (binomial \link[stats]{glm}) exposure model is fitted which provides observation weights. -#' Also a (\link[stats]{glm}) outcome model is fitted to each treatment subgroup, and used to predict potential outcomes. +#' Fitting function called by \link[teffectsR]{teffect} when the method is set +#' to \code{'AIPW'}. +#' Augmented Inverse Probability Weighting combines the IPW method and the RA +#' method. +#' A (binomial \link[stats]{glm}) exposure model is fitted which provides +#' observation weights. +#' Also a (\link[stats]{glm}) outcome model is fitted to each treatment +#' subgroup, and used to predict potential outcomes. #' #' @param X numeric vector of (0,1) specifiying the treatment variable. #' @param Y numeric vector sepcifying the outcome variable #' @param Zx design matrix for the exposure (propensity score) model. -#' @param Zy design matrix for the outcome model to be fitted to each treatment subgroup. -#' @param Ofam family function for the outcome model. See \link[stats]{family} for details of family functions. -#' @param treatment.effect the treament effect estimand of interest. +#' @param Zy design matrix for the outcome model to be fitted to each treatment +#' subgroup. +#' @param Ofam family function for the outcome model. +#' See \link[stats]{family} for details of family functions. +#' @param treatment.effect the treament effect estimand of interest. #' Can be either \code{"ATE","ATT","ATC"} or \code{"All"} to fit all three. -#' @param weights an optional numeric vector of ‘observation weights’ to be used in the fitting process. -#' -#' @return List of fit parameters, which is used to derive an object of class \code{teffects} when called by \link[teffectsR]{teffects}. +#' @param weights an optional numeric vector of ‘observation weights’ to be used +#' in the fitting process. +#' +#' @return List of fit parameters, which is used to derive an object of class +#' \code{teffects} when called by \link[teffectsR]{teffect}. #' @examples -#' #generate some data -#' N = 50 -#' X = rnorm(N) #confounder -#' A = rbinom(N,1,plogis(X)) #treatment variable -#' Y = X+0.5*A #continuous outcome -#' Z = rbinom(N,1,plogis(Y)) #binary outcome -#' df = data.frame(X=X,A=A,Y=Y,Z=Z) +#' # generate some data +#' N <- 50 +#' X <- rnorm(N) # confounder +#' A <- rbinom(N, 1, plogis(X)) # treatment variable +#' Y <- X + 0.5 * A # continuous outcome +#' Z <- rbinom(N, 1, plogis(Y)) # binary outcome +#' df <- data.frame(X = X, A = A, Y = Y, Z = Z) #' -#' teffect(A~X,Y~X,data=df,method="AIPW") -#' teffect(A~X,Z~X,data=df,outcome.family="binomial",method="AIPW") +#' teffect(A ~ X, Y ~ X, data = df, method = "AIPW") +#' teffect(A ~ X, Z ~ X, data = df, outcome.family = "binomial", method = "AIPW") +#' +#' @importFrom stats gaussian quasibinomial family #' @export -fit_aipw <- function(X,Y,Zx,Zy,Ofam=gaussian(),treatment.effect = "ATE", weights=rep(1,N)){ - link <- Ofam$linkinv #invlink function for outcome model - dlink <- Ofam$mu.eta #derivative of mean wrt to linear predictor - N = NROW(X) - if (is.null(weights)){ +fit_aipw <- function(X, Y, Zx, Zy, Ofam = gaussian(), treatment.effect = "ATE", weights = rep(1, N)) { + link <- Ofam$linkinv # invlink function for outcome model + dlink <- Ofam$mu.eta # derivative of mean wrt to linear predictor + N <- NROW(X) + if (is.null(weights)) { weights <- rep.int(1, N) } - - X.mod = glm.fit(Zx,X,family=quasibinomial(),weights=weights) - Y.mod1 = glm.fit(Zy,Y,family=Ofam,weights = weights*X) - Y.mod0 = glm.fit(Zy,Y,family=Ofam,weights = weights*(1-X)) - - IFa = IF.glm(X.mod,Zx) #Influence function of ps model - IFb1 = IF.glm(Y.mod1,Zy) #Influence function of outcome model - IFb0 = IF.glm(Y.mod0,Zy) #Influence function of outcome model - - ps = X.mod$fitted.values - eta1 = Y.mod1$linear.predictor - eta0 = Y.mod0$linear.predictor - - mu1 = link(eta1) - mu0 = link(eta0) - - X_ps = X/ps - cX_ps = (1-X)/(1-ps) - dX_ps = -X_ps*(1-ps) #derivative of weights - dcX_ps = cX_ps*ps - - #Get potential outcomes - Po1 = X_ps*Y + (1-X_ps)*mu1 - Po0 = cX_ps*Y + (1-cX_ps)*mu0 - - P.wts <- list(ATE = rep.int(1, N), ATT = X, ATC = 1-X) - - if (!treatment.effect %in% c("ATE","ATT","ATC","All")){ + + X.mod <- stats::glm.fit(Zx, X, family = quasibinomial(), weights = weights) + Y.mod1 <- stats::glm.fit(Zy, Y, family = Ofam, weights = weights * X) + Y.mod0 <- stats::glm.fit(Zy, Y, family = Ofam, weights = weights * (1 - X)) + + IFa <- teffectsR::IF.glm(X.mod, Zx) # Influence function of ps model + IFb1 <- teffectsR::IF.glm(Y.mod1, Zy) # Influence function of outcome model + IFb0 <- teffectsR::IF.glm(Y.mod0, Zy) # Influence function of outcome model + + ps <- X.mod$fitted.values + eta1 <- Y.mod1$linear.predictor + eta0 <- Y.mod0$linear.predictor + + mu1 <- link(eta1) + mu0 <- link(eta0) + + X_ps <- X / ps + cX_ps <- (1 - X) / (1 - ps) + dX_ps <- -X_ps * (1 - ps) # derivative of weights + dcX_ps <- cX_ps * ps + + # Get potential outcomes + Po1 <- X_ps * Y + (1 - X_ps) * mu1 + Po0 <- cX_ps * Y + (1 - cX_ps) * mu0 + + P.wts <- list(ATE = rep.int(1, N), ATT = X, ATC = 1 - X) + + if (!treatment.effect %in% c("ATE", "ATT", "ATC", "All")) { stop("'treatment.effect' not recognized") - } else if(treatment.effect != "All") P.wts <- P.wts[treatment.effect] - - - run <- lapply(P.wts, function(wt){ - wt <- as.vector(weights)*wt - N.wt = sum(wt) - - Po1.mean = sum(Po1*wt)/N.wt - Po0.mean = sum(Po0*wt)/N.wt - - xbar1a = ((Y-mu1)*dX_ps*wt)%*%Zx/N.wt - xbar0a = ((Y-mu0)*dcX_ps*wt)%*%Zx/N.wt - xbar1b = ((1-X_ps)*dlink(eta1)*wt)%*%Zy/N.wt - xbar0b = ((1-cX_ps)*dlink(eta0)*wt)%*%Zy/N.wt - - IF1 = (Po1 - Po1.mean)*wt*N/N.wt + IFa%*%t(xbar1a) + IFb1%*%t(xbar1b) - IF0 = (Po0 - Po0.mean)*wt*N/N.wt + IFa%*%t(xbar0a) + IFb0%*%t(xbar0b) - Var = sum((IF1-IF0)^2)/N^2 - TE = Po1.mean-Po0.mean - - a = list() + } + + if (treatment.effect != "All") P.wts <- P.wts[treatment.effect] + + run <- lapply(P.wts, function(wt) { + wt <- as.vector(weights) * wt + N.wt <- sum(wt) + + Po1.mean <- sum(Po1 * wt) / N.wt + Po0.mean <- sum(Po0 * wt) / N.wt + + xbar1a <- ((Y - mu1) * dX_ps * wt) %*% Zx / N.wt + xbar0a <- ((Y - mu0) * dcX_ps * wt) %*% Zx / N.wt + xbar1b <- ((1 - X_ps) * dlink(eta1) * wt) %*% Zy / N.wt + xbar0b <- ((1 - cX_ps) * dlink(eta0) * wt) %*% Zy / N.wt + + IF1 <- (Po1 - Po1.mean) * wt * N / N.wt + IFa %*% t(xbar1a) + IFb1 %*% t(xbar1b) + IF0 <- (Po0 - Po0.mean) * wt * N / N.wt + IFa %*% t(xbar0a) + IFb0 %*% t(xbar0b) + Var <- sum((IF1 - IF0)^2) / N^2 + TE <- Po1.mean - Po0.mean + + a <- list() a$coefs <- TE a$std.err <- sqrt(Var) - a$Wald <- TE^2/Var + a$Wald <- TE^2 / Var a$PO.means <- c(Po1 = Po1.mean, Po0 = Po0.mean) - a$PO.std.err<- c(Po1 = sqrt(sum(IF1^2))/N, Po0 = sqrt(sum(IF0^2))/N ) - return(a) + a$PO.std.err <- c(Po1 = sqrt(sum(IF1^2)) / N, Po0 = sqrt(sum(IF0^2)) / N) + a }) - + run <- simplify2array(run) - - a <- list(coef = as.double(run["coefs",]), - std.err = as.double(run["std.err",]), - Wald = as.double(run["Wald",]), - Po.means = run["PO.means",], - Po.std.err = run["PO.std.err",]) + + a <- list( + coef = as.double(run["coefs", ]), + std.err = as.double(run["std.err", ]), + Wald = as.double(run["Wald", ]), + Po.means = run["PO.means", ], + Po.std.err = run["PO.std.err", ] + ) names(a$coef) <- names(a$std.err) <- names(a$Wald) <- names(P.wts) - return(a) - + a } diff --git a/R/fit_ipw.R b/R/fit_ipw.R index e9ed065..d291988 100644 --- a/R/fit_ipw.R +++ b/R/fit_ipw.R @@ -1,66 +1,71 @@ #' Fit Inverse Probability Weighting #' -#' Fitting function called by \link[teffectsR]{teffect} when the method is set to \code{'IPW'}. -#' Inverse Probability Weighting fits a (binomial \link[stats]{glm}) model for the exposure (propensity score model). -#' Predictions from this model are used to weight the observations and derive potential outcome means. +#' Fitting function called by \link[teffectsR]{teffect} when the method is +#' set to \code{'IPW'}. +#' Inverse Probability Weighting fits a (binomial \link[stats]{glm}) model +#' for the exposure (propensity score model). +#' Predictions from this model are used to weight the observations and derive +#' potential outcome means. #' This implementation normalizes the propesnity score weights. #' This is sometimes called the "sample bounded" IPW estimator. -#' +#' #' @param X numeric vector of (0,1) specifiying the treatment variable. #' @param Y numeric vector sepcifying the outcome variable #' @param Zx design matrix for the exposure (propensity score) model. -#' @param weights an optional numeric vector of ‘observation weights’ to be used in the fitting process. -#' -#' @return List of fit parameters, which is used to derive an object of class \code{teffects} when called by \link[teffectsR]{teffects}. -#' @examples -#' #generate some data -#' N = 50 -#' X = rnorm(N) #confounder -#' A = rbinom(N,1,plogis(X)) #treatment variable -#' Y = X+0.5*A #continuous outcome -#' Z = rbinom(N,1,plogis(Y)) #binary outcome -#' df = data.frame(X=X,A=A,Y=Y,Z=Z) +#' @param weights an optional numeric vector of ‘observation weights’ to be used +#' in the fitting process. #' -#' teffect(A~X,Y~1,data=df,method="IPW") -#' teffect(A~X,Z~1,data=df,method="IPW") +#' @return List of fit parameters, which is used to derive an object of class +#' \code{teffects} when called by \link[teffectsR]{teffect}. +#' @examples +#' # generate some data +#' N <- 50 +#' X <- rnorm(N) # confounder +#' A <- rbinom(N, 1, plogis(X)) # treatment variable +#' Y <- X + 0.5 * A # continuous outcome +#' Z <- rbinom(N, 1, plogis(Y)) # binary outcome +#' df <- data.frame(X = X, A = A, Y = Y, Z = Z) #' +#' teffect(A ~ X, Y ~ 1, data = df, method = "IPW") +#' teffect(A ~ X, Z ~ 1, data = df, method = "IPW") +#' +#' @importFrom stats quasibinomial family #' @export -fit_ipw <- function(X,Y,Zx,weights=rep(1,N)){ - N = NROW(X) - if (is.null(weights)){ +fit_ipw <- function(X, Y, Zx, weights = rep(1, N)) { + N <- NROW(X) + if (is.null(weights)) { weights <- rep.int(1, N) } - wt = as.vector(weights) - N.wt = sum(wt) - - X.mod = glm.fit(Zx,X,family=quasibinomial(),weights=wt) - IFa = IF.glm(X.mod,Zx) #Influence function of ps model - - ps = X.mod$fitted.values - X_ps = X/ps - cX_ps = (1-X)/(1-ps) - dX_ps = -X_ps*(1-ps) #derivative of weights - dcX_ps = cX_ps*ps - N.wt1 = sum(wt*X_ps) - N.wt0 = sum(wt*cX_ps) - - Po1.mean = sum(Y*X_ps*wt)/N.wt1 - Po0.mean = sum(Y*cX_ps*wt)/N.wt0 - - xbar1a = ((Y-Po1.mean)*dX_ps*wt)%*%Zx/N.wt1 - xbar0a = ((Y-Po0.mean)*dcX_ps*wt)%*%Zx/N.wt0 - - IF1 = (Y - Po1.mean)*wt*X_ps*N/N.wt1 + IFa%*%t(xbar1a) - IF0 = (Y - Po0.mean)*wt*cX_ps*N/N.wt0 + IFa%*%t(xbar0a) - - TE = Po1.mean-Po0.mean - Var = sum((IF1-IF0)^2)/N^2 - - a = list() - a$coefs <- c(ATE=TE) - a$std.err <- c(ATE=sqrt(Var)) - a$Wald <- c(ATE= TE^2/Var) + wt <- as.vector(weights) + + X.mod <- stats::glm.fit(Zx, X, family = quasibinomial(), weights = wt) + IFa <- teffectsR::IF.glm(X.mod, Zx) # Influence function of ps model + + ps <- X.mod$fitted.values + X_ps <- X / ps + cX_ps <- (1 - X) / (1 - ps) + dX_ps <- -X_ps * (1 - ps) # derivative of weights + dcX_ps <- cX_ps * ps + N.wt1 <- sum(wt * X_ps) + N.wt0 <- sum(wt * cX_ps) + + Po1.mean <- sum(Y * X_ps * wt) / N.wt1 + Po0.mean <- sum(Y * cX_ps * wt) / N.wt0 + + xbar1a <- ((Y - Po1.mean) * dX_ps * wt) %*% Zx / N.wt1 + xbar0a <- ((Y - Po0.mean) * dcX_ps * wt) %*% Zx / N.wt0 + + IF1 <- (Y - Po1.mean) * wt * X_ps * N / N.wt1 + IFa %*% t(xbar1a) + IF0 <- (Y - Po0.mean) * wt * cX_ps * N / N.wt0 + IFa %*% t(xbar0a) + + TE <- Po1.mean - Po0.mean + Var <- sum((IF1 - IF0)^2) / N^2 + + a <- list() + a$coefs <- c(ATE = TE) + a$std.err <- c(ATE = sqrt(Var)) + a$Wald <- c(ATE = TE^2 / Var) a$PO.means <- c(Po1 = Po1.mean, Po0 = Po0.mean) - a$PO.std.err<- c(Po1 = sqrt(sum(IF1^2))/N, Po0 = sqrt(sum(IF0^2))/N ) - return(a) + a$PO.std.err <- c(Po1 = sqrt(sum(IF1^2)) / N, Po0 = sqrt(sum(IF0^2)) / N) + a } diff --git a/R/fit_psmatch.R b/R/fit_psmatch.R index aedd9fb..2de092a 100644 --- a/R/fit_psmatch.R +++ b/R/fit_psmatch.R @@ -1,79 +1,91 @@ #' Fit Propensity Score Matching #' -#' Fitting function called by \link[teffectsR]{teffect} when the method is set to \code{'PSMatch'}. -#' Propensity Score Matching fits a (binomial \link[stats]{glm}) model for the exposure (propensity score model). -#' Observations from each treatment group are matched based on predictions from this model. +#' Fitting function called by \link[teffectsR]{teffect} when the method is set +#' to \code{'PSMatch'}. +#' Propensity Score Matching fits a (binomial \link[stats]{glm}) model for the +#' exposure (propensity score model). +#' Observations from each treatment group are matched based on predictions from +#' this model. #' See \code{\link[Matching]{Match}} for details on Matching. #' #' @param X numeric vector of (0,1) specifiying the treatment variable. #' @param Y numeric vector sepcifying the outcome variable #' @param Zx design matrix for the exposure (propensity score) model. -#' @param Zy design matrix for the outcome model to be fitted to each treatment subgroup. -#' @param Ofam family function for the outcome model. See \link[stats]{family} for details of family functions. -#' @param treatment.effect the treament effect estimand of interest. +#' @param treatment.effect the treament effect estimand of interest. #' Can be either \code{"ATE","ATT"} or \code{"ATC"}. -#' @param weights an optional numeric vector of ‘observation weights’ to be used in the fitting process. -#' @param ... arguments to be passed to the matching function. See \link[Matching]{Match} for details. +#' @param weights an optional numeric vector of ‘observation weights’ to be used +#' in the fitting process. +#' @param ... arguments to be passed to the matching function. +#' See\ link[Matching]{Match} for details. #' #' -#' @return List of fit parameters, which is used to derive an object of class \code{teffects} when called by \link[teffectsR]{teffects}. +#' @return List of fit parameters, which is used to derive an object of class +#' \code{teffects} when called by \link[teffectsR]{teffect}. #' @examples -#' #generate some data -#' N = 50 -#' X = rnorm(N) #confounder -#' A = rbinom(N,1,plogis(X)) #treatment variable -#' Y = X+0.5*A #continuous outcome -#' Z = rbinom(N,1,plogis(Y)) #binary outcome -#' df = data.frame(X=X,A=A,Y=Y,Z=Z) +#' # generate some data +#' N <- 50 +#' X <- rnorm(N) # confounder +#' A <- rbinom(N, 1, plogis(X)) # treatment variable +#' Y <- X + 0.5 * A # continuous outcome +#' Z <- rbinom(N, 1, plogis(Y)) # binary outcome +#' df <- data.frame(X = X, A = A, Y = Y, Z = Z) #' -#' teffect(A~X,Y~1,data=df,method="PSMatch") -#' teffect(A~X,Z~1,data=df,method="PSMatch") +#' teffect(A ~ X, Y ~ 1, data = df, method = "PSMatch") +#' teffect(A ~ X, Z ~ 1, data = df, method = "PSMatch") #' @export -fit_psmatch<- function(X,Y,Zx,treatment.effect="ATE",weights=rep(1,N),...){ - - if(treatment.effect == "All"){ - P.wts = list(ATE="ATE",ATT="ATT",ATC="ATC") - }else if(treatment.effect %in% c("ATE","ATT","ATC")){ - P.wts = list(TE=treatment.effect) +fit_psmatch <- function(X, Y, Zx, treatment.effect = "ATE", weights = rep(1, N), ...) { + if (treatment.effect == "All") { + P.wts <- list(ATE = "ATE", ATT = "ATT", ATC = "ATC") + } else if (treatment.effect %in% c("ATE", "ATT", "ATC")) { + P.wts <- list(TE = treatment.effect) names(P.wts) <- treatment.effect - }else{ + } else { stop(gettextf("treatment.effect must be 'ATE', 'ATT', 'ATC' or 'All'.")) } - - if (is.null(weights)){ + + if (is.null(weights)) { + N <- NROW(X) weights <- rep.int(1, N) } - - ps = glm.fit(Zx,X,family=quasibinomial(),weights=weights)$fitted.values - run <- lapply(P.wts, function(TE){ - Mod_match = Matching::Match(Y=Y,Tr=X,X=ps, - estimand = TE, - weights = weights, - ...) - - if(Mod_match$ndrops.matches>0){ - warning(gettextf("Observations dropped while estimating '%s' due to caliper = '%i'",TE, - Mod_match$ndrops.matches)) + ps <- stats::glm.fit( + Zx, X, + family = quasibinomial(), weights = weights + )$fitted.values + + run <- lapply(P.wts, function(TE) { + Mod_match <- Matching::Match( + Y = Y, Tr = X, X = ps, + estimand = TE, + weights = weights, + ... + ) + + if (Mod_match$ndrops.matches > 0) { + warning(gettextf( + "Observations dropped while estimating '%s' due to caliper = '%i'", TE, + Mod_match$ndrops.matches + )) } - a = list() - a$coefs <- c(TE=Mod_match$est) + a <- list() + a$coefs <- c(TE = Mod_match$est) a$std.err <- c(Mod_match$se) - a$Wald <- c(TE=(Mod_match$est/Mod_match$se)^2) + a$Wald <- c(TE = (Mod_match$est / Mod_match$se)^2) names(a$coefs) <- names(a$std.err) <- names(a$Wald) <- treatment.effect - + a$PO.means <- c(Po1 = NA, Po0 = NA) - a$PO.std.err<- c(Po1 = NA, Po0 = NA ) - (a) + a$PO.std.err <- c(Po1 = NA, Po0 = NA) + a }) - + run <- simplify2array(run) - a <- list(coef = as.double(run["coefs",]), - std.err = as.double(run["std.err",]), - Wald = as.double(run["Wald",]), - Po.means = run["PO.means",], - Po.std.err = run["PO.std.err",]) + a <- list( + coef = as.double(run["coefs", ]), + std.err = as.double(run["std.err", ]), + Wald = as.double(run["Wald", ]), + Po.means = run["PO.means", ], + Po.std.err = run["PO.std.err", ] + ) names(a$coef) <- names(a$std.err) <- names(a$Wald) <- names(P.wts) - return(a) - + a } diff --git a/R/fit_ra.R b/R/fit_ra.R index 95b4078..057b055 100644 --- a/R/fit_ra.R +++ b/R/fit_ra.R @@ -1,89 +1,100 @@ #' Fit Regression Adjusment #' -#' Fitting function called by \link[teffectsR]{teffect} when the method is set to \code{'RA'}. -#' Regression adjusment fits a \link[stats]{glm} model for the outcome to each treatment subgroup. +#' Fitting function called by \link[teffectsR]{teffect} when the method is set +#' to \code{'RA'}. +#' Regression adjusment fits a \link[stats]{glm} model for the outcome to each +#' treatment subgroup. #' Potential outcomes are derived using these outcome models. #' #' @param X numeric vector of (0,1) specifiying the treatment variable. #' @param Y numeric vector sepcifying the outcome variable -#' @param Zy design matrix for the outcome model to be fitted to each treatment subgroup. -#' @param Ofam family function for the outcome model. See \link[stats]{family} for details of family functions. -#' @param treatment.effect the treament effect estimand of interest. +#' @param Zy design matrix for the outcome model to be fitted to each treatment +#' subgroup. +#' @param Ofam family function for the outcome model. +#' See \link[stats]{family} for details of family functions. +#' @param treatment.effect the treament effect estimand of interest. #' Can be either \code{"ATE","ATT","ATC"} or \code{"All"} to fit all three. -#' @param weights an optional numeric vector of ‘observation weights’ to be used in the fitting process. -#' -#' @return List of fit parameters, which is used to derive an object of class \code{teffects} when called by \link[teffectsR]{teffects}. +#' @param weights an optional numeric vector of ‘observation weights’ to be used +#' in the fitting process. +#' +#' @return List of fit parameters, which is used to derive an object of class +#' \code{teffects} when called by \link[teffectsR]{teffect}. #' @examples -#' #generate some data -#' N = 50 -#' X = rnorm(N) #confounder -#' A = rbinom(N,1,plogis(X)) #treatment variable -#' Y = X+0.5*A #continuous outcome -#' Z = rbinom(N,1,plogis(Y)) #binary outcome -#' df = data.frame(X=X,A=A,Y=Y,Z=Z) +#' # generate some data +#' N <- 50 +#' X <- rnorm(N) # confounder +#' A <- rbinom(N, 1, plogis(X)) # treatment variable +#' Y <- X + 0.5 * A # continuous outcome +#' Z <- rbinom(N, 1, plogis(Y)) # binary outcome +#' df <- data.frame(X = X, A = A, Y = Y, Z = Z) #' -#' teffect(A~1,Y~X,data=df,method="RA") -#' teffect(A~1,Z~X,data=df,outcome.family="binomial",method="RA") +#' teffect(A ~ 1, Y ~ X, data = df, method = "RA") +#' teffect(A ~ 1, Z ~ X, data = df, outcome.family = "binomial", method = "RA") +#' @importFrom stats gaussian family #' @export -fit_ra<- function(X,Y,Zy,Ofam=gaussian(),treatment.effect="ATE",weights=rep(1,N)){ +fit_ra <- function(X, Y, Zy, Ofam = gaussian(), treatment.effect = "ATE", weights = rep(1, N)) { ## Using math formulas similar to https://boris.unibe.ch/130362/1/jann-2019-influencefunctions.pdf - - link <- Ofam$linkinv #invlink function for outcome model - dlink <- Ofam$mu.eta #derivative of mean wrt to linear predictor - N = NROW(X) - if (is.null(weights)){ + + link <- Ofam$linkinv # invlink function for outcome model + dlink <- Ofam$mu.eta # derivative of mean wrt to linear predictor + N <- NROW(X) + if (is.null(weights)) { weights <- rep.int(1, N) } - Y.mod1 = glm.fit(Zy,Y,family=Ofam,weights = weights*X) - Y.mod0 = glm.fit(Zy,Y,family=Ofam,weights = weights*(1-X)) - - IFb1 = IF.glm(Y.mod1,Zy) #Influence function of outcome model - IFb0 = IF.glm(Y.mod0,Zy) #Influence function of outcome model - - eta1 = Y.mod1$linear.predictor - eta0 = Y.mod0$linear.predictor - - Po1 = link(eta1) - Po0 = link(eta0) - - P.wts <- list(ATE = rep.int(1, N), ATT = X, ATC = 1-X) - - if (!treatment.effect %in% c("ATE","ATT","ATC","All")){ + Y.mod1 <- stats::glm.fit(Zy, Y, family = Ofam, weights = weights * X) + Y.mod0 <- stats::glm.fit(Zy, Y, family = Ofam, weights = weights * (1 - X)) + + IFb1 <- teffectsR::IF.glm(Y.mod1, Zy) # Influence function of outcome model + IFb0 <- teffectsR::IF.glm(Y.mod0, Zy) # Influence function of outcome model + + eta1 <- Y.mod1$linear.predictor + eta0 <- Y.mod0$linear.predictor + + Po1 <- link(eta1) + Po0 <- link(eta0) + + P.wts <- list(ATE = rep.int(1, N), ATT = X, ATC = 1 - X) + + if (!treatment.effect %in% c("ATE", "ATT", "ATC", "All")) { stop("'treatment.effect' not recognized") - } else if(treatment.effect != "All") P.wts <- P.wts[treatment.effect] - - run <- lapply(P.wts, function(wt){ - wt <- as.vector(weights)*wt - N.wt = sum(wt) - - Po1.mean = sum(Po1*wt)/N.wt - Po0.mean = sum(Po0*wt)/N.wt - - xbar1 = (dlink(eta1)*wt)%*%Zy/N.wt - xbar0 = (dlink(eta0)*wt)%*%Zy/N.wt - - IF1 = (Po1 - Po1.mean)*wt*N/N.wt + IFb1%*%t(xbar1) - IF0 = (Po0 - Po0.mean)*wt*N/N.wt + IFb0%*%t(xbar0) - - TE = Po1.mean-Po0.mean - Var = sum((IF1-IF0)^2)/N^2 - - a = list() + } + + if (treatment.effect != "All") P.wts <- P.wts[treatment.effect] + + run <- lapply(P.wts, function(wt) { + wt <- as.vector(weights) * wt + N.wt <- sum(wt) + + Po1.mean <- sum(Po1 * wt) / N.wt + Po0.mean <- sum(Po0 * wt) / N.wt + + xbar1 <- (dlink(eta1) * wt) %*% Zy / N.wt + xbar0 <- (dlink(eta0) * wt) %*% Zy / N.wt + + IF1 <- (Po1 - Po1.mean) * wt * N / N.wt + IFb1 %*% t(xbar1) + IF0 <- (Po0 - Po0.mean) * wt * N / N.wt + IFb0 %*% t(xbar0) + + TE <- Po1.mean - Po0.mean + Var <- sum((IF1 - IF0)^2) / N^2 + + a <- list() a$coefs <- TE a$std.err <- sqrt(Var) - a$Wald <- TE^2/Var + a$Wald <- TE^2 / Var a$PO.means <- c(Po1 = Po1.mean, Po0 = Po0.mean) - a$PO.std.err<- c(Po1 = sqrt(sum(IF1^2))/N, Po0 = sqrt(sum(IF0^2))/N ) - return(a) + a$PO.std.err <- c(Po1 = sqrt(sum(IF1^2)) / N, Po0 = sqrt(sum(IF0^2)) / N) + a }) - + run <- simplify2array(run) - a <- list(coef = as.double(run["coefs",]), - std.err = as.double(run["std.err",]), - Wald = as.double(run["Wald",]), - Po.means = run["PO.means",], - Po.std.err = run["PO.std.err",]) + a <- list( + coef = as.double(run["coefs", ]), + std.err = as.double(run["std.err", ]), + Wald = as.double(run["Wald", ]), + Po.means = run["PO.means", ], + Po.std.err = run["PO.std.err", ] + ) names(a$coef) <- names(a$std.err) <- names(a$Wald) <- names(P.wts) - return(a) + a } diff --git a/R/teffects.R b/R/teffects.R index 040ae48..e662b9c 100644 --- a/R/teffects.R +++ b/R/teffects.R @@ -1,156 +1,205 @@ #' Treatment Effects for Causal Inference #' -#' \code{teffects} is used to fit treatment effect estimands of a binary exposure, on an outcome, -#' given a set of variables which adjust for confounding. Estimands included are: the Average Treatment Effect (\code{"ATE"}), -#' the Average Treatment Effect in the Treated (\code{"ATT"}), and the Average Treatment Effect in the Controls (\code{"ATC"}). -#' -#' Several methods are implemented to adjust for confounding. Methods based on regression adjustment (\code{"\link[=fit_ra]{RA}","\link[=fit_aipw]{AIPW}"}) require +#' \code{teffects} is used to fit treatment effect estimands of a binary +#' exposure, on an outcome, given a set of variables which adjust for +#' confounding. Estimands included are: the Average Treatment Effect +#' (\code{"ATE"}), the Average Treatment Effect in the Treated +#' (\code{"ATT"}), and the Average Treatment Effect in the Controls +#' (\code{"ATC"}). +#' +#' Several methods are implemented to adjust for confounding. +#' Methods based on regression adjustment +#' (\code{"\link[=fit_ra]{RA}","\link[=fit_aipw]{AIPW}"}) require #' that a glm outcome model is specified (See \code{\link[stats]{glm}}). -#' Methods base on the propensity score (\code{"\link[=fit_ipw]{IPW}","\link[=fit_aipw]{AIPW}","\link[=fit_psmatch]{PSMatch}"}) require that a binomial glm model is sepcified for the exposure (propensity score model). -#' When models are not required, dummy models must speficied, to parse the appropriate exposure/ outcome variable. -#' E.g. when using the method \code{"RA"}, then the exposure model might be \code{A~1}, where \code{A} is the binary exposure variable of interest. +#' Methods base on the propensity score +#' (\code{"\link[=fit_ipw]{IPW}","\link[=fit_aipw]{AIPW}","\link[=fit_psmatch]{PSMatch}"}) +#' require that a binomial glm model is sepcified for the exposure +#' (propensity score model). +#' When models are not required, dummy models must speficied, to parse +#' the appropriate exposure/ outcome variable. +#' E.g. when using the method \code{"RA"}, then the exposure model might be +#' \code{A~1}, where \code{A} is the binary exposure variable of interest. #' The right hand side of these dummy models is ignored. #' -#' @param exposure.formula an object of class "\code{\link[stats]{formula}}" (or one that can be coerced to -#' that class) where the left hand side of the formula contains the binary exposure variable of interest. -#' @param outcome.formula an object of class "\code{\link[stats]{formula}}" (or one that can be coerced to -#' that class) where the left hand side of the formula contains the outcome variable of interest. The right hand side should -#' not contain the exposure. -#' @param outcome.family family function for the outcome model, can be can be a character string naming a family function, -#' a family function or the result of a call to a family function. (See \link[stats]{family} for details of family functions.) -#' For methods \code{"IPW"} and \code{"PSMatching"} no outcome model is required. -#' @param treatment.effect The treatment effect estimand. Can be either \code{"ATE","ATT","ATC"} or \code{"All"} to fit all three. +#' @param exposure.formula an object of class "\code{\link[stats]{formula}}" +#' (or one that can be coerced to that class) where the left hand side of the +#' formula contains the binary exposure variable of interest. +#' @param outcome.formula an object of class "\code{\link[stats]{formula}}" +#' (or one that can be coerced to that class) where the left hand side of the +#' formula contains the outcome variable of interest. +#' The right hand side should not contain the exposure. +#' @param outcome.family family function for the outcome model, can be can be a +#' character string naming a family function, a family function or the result of +#' a call to a family function. (See \link[stats]{family} for details of family +#' functions.) +#' For methods \code{"IPW"} and \code{"PSMatching"} no outcome model is +#' required. +#' @param treatment.effect The treatment effect estimand. Can be either +#' \code{"ATE","ATT","ATC"} or \code{"All"} to fit all three. #' The method, \code{"PSMatch"}, can only fit the ATE estimand. #' @param data an optional data frame, list or environment #' (or object coercible by \code{\link{as.data.frame}} to a data frame) #' containing the variables in the model. If not found in data, the #' variables are taken from environment(formula), typically the environment from #' which \code{teffects} is called. -#' @param weights an optional vector of ‘observation weights’ to be used in +#' @param weights an optional vector of ‘observation weights’ to be used in #' the fitting process. Should be \code{NULL} or a numeric vector. -#' @param method The mediation fitting method to be used. Can be either \code{"AIPW","IPW","RA"} or \code{"PSMatch"} -#' @param ... arguments to be passed to the matching function when using the \code{"PSMatch"} method. See \link[Matching]{Match} for details. +#' @param method The mediation fitting method to be used. Can be either +#' \code{"AIPW","IPW","RA"} or \code{"PSMatch"} +#' @param ... arguments to be passed to the matching function when using the +#' \code{"PSMatch"} method. +#' See \link[Matching]{Match} for details. #' #' @return An object of class \code{teffect} with effect estimates, -#' estimated standard errors, Wald based test statistics and estimated potnetial outcome means. +#' estimated standard errors, Wald based test statistics and estimated +#' potential outcome means. #' @examples -#' #Example on Generated data -#' N = 50 -#' X = rnorm(N) #confounder -#' A = rbinom(N,1,plogis(X)) #treatment variable -#' Y = X+0.5*A #continuous outcome -#' Z = rbinom(N,1,plogis(Y)) #binary outcome -#' df = data.frame(X=X,A=A,Y=Y,Z=Z) +#' # Example on Generated data +#' N <- 50 +#' X <- rnorm(N) # confounder +#' A <- rbinom(N, 1, plogis(X)) # treatment variable +#' Y <- X + 0.5 * A # continuous outcome +#' Z <- rbinom(N, 1, plogis(Y)) # binary outcome +#' df <- data.frame(X = X, A = A, Y = Y, Z = Z) +#' +#' # Fit AIPW by default +#' teffect(A ~ X, Y ~ X, data = df) +#' teffect(A ~ X, Z ~ X, data = df, outcome.family = "binomial") #' -#' #Fit AIPW by default -#' teffect(A~X,Y~X,data=df) -#' teffect(A~X,Z~X,data=df,outcome.family="binomial") -#' -#' #Fit IPW -#' teffect(A~X,Y~1,data=df,method="IPW") -#' teffect(A~X,Z~1,data=df,method="IPW") -#' -#' #Fit RA -#' teffect(A~1,Y~X,data=df,method="RA") -#' teffect(A~1,Z~X,data=df,outcome.family="binomial",method="RA") +#' # Fit IPW +#' teffect(A ~ X, Y ~ 1, data = df, method = "IPW") +#' teffect(A ~ X, Z ~ 1, data = df, method = "IPW") #' -#' #Fit PSMatch -#' teffect(A~X,Y~1,data=df,method="PSMatch") -#' teffect(A~X,Z~1,data=df,method="PSMatch") +#' # Fit RA +#' teffect(A ~ 1, Y ~ X, data = df, method = "RA") +#' teffect(A ~ 1, Z ~ X, data = df, outcome.family = "binomial", method = "RA") +#' +#' # Fit PSMatch +#' teffect(A ~ X, Y ~ 1, data = df, method = "PSMatch") +#' teffect(A ~ X, Z ~ 1, data = df, method = "PSMatch") #' @export -teffect <- function(exposure.formula,outcome.formula, - outcome.family="gaussian", +teffect <- function(exposure.formula, outcome.formula, + outcome.family = "gaussian", treatment.effect = "ATE", - data,weights,method="AIPW",...){ + data, weights, method = "AIPW", ...) { mf <- match.call(expand.dots = FALSE) - m <- match(c("exposure.formula","outcome.formula","data","weights"),names(mf), 0L) + m <- match( + c("exposure.formula", "outcome.formula", "data", "weights"), names(mf), 0L + ) - #Get exposure model frame if required - xf <- mf[c(1,m[c(1,3,4)])] + # Get exposure model frame if required + xf <- mf[c(1, m[c(1, 3, 4)])] xf[[1L]] <- quote(stats::model.frame) xf$drop.unused.levels <- TRUE names(xf)[2] <- "formula" xf <- eval(xf, parent.frame()) - X <- model.response(xf, "any") - weights <- as.vector(model.weights(xf)) + X <- stats::model.response(xf, "any") + weights <- as.vector(stats::model.weights(xf)) xt <- attr(xf, "terms") # allow model.frame to have updated it - Zx <- model.matrix(xt, xf) - - yf <- mf[c(1,m[c(2,3,4)])] + Zx <- stats::model.matrix(xt, xf) + + yf <- mf[c(1, m[c(2, 3, 4)])] yf[[1L]] <- quote(stats::model.frame) yf$drop.unused.levels <- TRUE names(yf)[2] <- "formula" yf <- eval(yf, parent.frame()) - Y <- model.response(yf, "any") + Y <- stats::model.response(yf, "any") yt <- attr(yf, "terms") # allow model.frame to have updated it - Zy <- model.matrix(yt, yf) - + Zy <- stats::model.matrix(yt, yf) + ## Check exposure and outcome families - if (method %in% c("IPW","AIPW","PSMatch") ){ - xfam = "binomial" - }else xfam <- "none" - if (method %in% c("RA","AIPW") ){ + if (method %in% c("IPW", "AIPW", "PSMatch")) { + xfam <- "binomial" + } else { + xfam <- "none" + } + if (method %in% c("RA", "AIPW")) { ## family - if(is.character(outcome.family)) - outcome.family <- get(outcome.family, mode = "function", envir = parent.frame()) - if(is.function(outcome.family)) Ofam <- outcome.family() + if (is.character(outcome.family)) { + outcome.family <- get( + outcome.family, + mode = "function", envir = parent.frame() + ) + } + if (is.function(outcome.family)) Ofam <- outcome.family() ofam <- Ofam$family - if(is.null(ofam)) { + if (is.null(ofam)) { print(family) stop("'family' not recognized") } - }else ofam <- "none" + } else { + ofam <- "none" + } ## Check Weights - if(!is.null(weights) && !is.numeric(weights)) + if (!is.null(weights) && !is.numeric(weights)) { stop("'weights' must be a numeric vector") - if( !is.null(weights) && any(weights < 0) ) + } + if (!is.null(weights) && any(weights < 0)) { stop("negative weights not allowed") - + } ## exposure family - if (method=="AIPW"){ - a <- fit_aipw(X,Y,Zx,Zy,Ofam,treatment.effect = treatment.effect, weights=weights) - }else if (method == "IPW"){ - if(treatment.effect != "ATE"){ + if (method == "AIPW") { + a <- teffectsR::fit_aipw( + X, Y, Zx, Zy, Ofam, + treatment.effect = treatment.effect, weights = weights + ) + } else if (method == "IPW") { + if (treatment.effect != "ATE") { warning("Method: 'IPW' only compatible with ATE. Estimating ATE. ") } - a <- fit_ipw(X,Y,Zx,weights=weights) - }else if (method == "RA"){ - a <- fit_ra(X,Y,Zy,Ofam,treatment.effect = treatment.effect,weights=weights) - } else if (method == "PSMatch"){ - a <- fit_psmatch(X,Y,Zx, - treatment.effect= treatment.effect, - weights=weights,...) - }else{ + a <- teffectsR::fit_ipw(X, Y, Zx, weights = weights) + } else if (method == "RA") { + a <- teffectsR::fit_ra( + X, Y, Zy, Ofam, + treatment.effect = treatment.effect, weights = weights + ) + } else if (method == "PSMatch") { + a <- teffectsR::fit_psmatch(X, Y, Zx, + treatment.effect = treatment.effect, + weights = weights, ... + ) + } else { stop("Method not recognized") } - - + + a$Method <- method a$call <- mf - a$exposure.family = xfam - a$outcome.family = ofam + a$exposure.family <- xfam + a$outcome.family <- ofam class(a) <- "teffect" a } #' @export -print.teffect <- function(object){ - a <- object +print.teffect <- function(x, ...) { + cat( + "\nCall:\n", + paste(deparse(x$call), + sep = "\n", + collapse = "\n" + ), "\n\n", + sep = "" + ) + cat( + "Fitting Method: \t", x$Method, + "\nExposure GLM family:\t", x$exposure.family, + "\nOutcome GLM family:\t", x$outcome.family + ) - cat("\nCall:\n", paste(deparse(a$call), sep = "\n", collapse = "\n"), "\n\n", sep = "") - cat("Fitting Method: \t",a$Method, - "\nExposure GLM family:\t",a$exposure.family, - "\nOutcome GLM family:\t",a$outcome.family) - cat("\n\nTreatment Effects:\n") - - df <- data.frame(Estimate = a$coef, Std.Error = a$std.err , - Wald.value = a$Wald, - Wald.pval = pchisq(a$Wald,df=1,lower.tail = F) ) - - printCoefmat(df, digits = 6, signif.stars = T,na.print = "NA", - tst.ind = 3,P.values=T,has.Pvalue=T) -} \ No newline at end of file + + df <- data.frame( + Estimate = x$coef, Std.Error = x$std.err, + Wald.value = x$Wald, + Wald.pval = stats::pchisq(x$Wald, df = 1, lower.tail = FALSE) + ) + + stats::printCoefmat(df, + digits = 6, signif.stars = TRUE, na.print = "NA", + tst.ind = 3, P.values = TRUE, has.Pvalue = TRUE + ) +} diff --git a/man/IF.glm.Rd b/man/IF.glm.Rd index 539b82d..eceb521 100644 --- a/man/IF.glm.Rd +++ b/man/IF.glm.Rd @@ -7,15 +7,19 @@ IF.glm(object, x = object$x) } \arguments{ -\item{object}{A \code{glm} model object (run with \code{x=TRUE}), or a \code{glm.fit} object} +\item{object}{A \code{glm} model object (run with \code{x=TRUE}), +or a \code{glm.fit} object} -\item{x}{The model matrix. Not required for a \code{glm} model object (run with \code{x=TRUE}).} +\item{x}{The model matrix. +Not required for a \code{glm} model object (run with \code{x=TRUE}).} } \value{ Matrix of influence function values. } \description{ -\code{IF.glm} returns a matrix of influence functions from either a \code{glm} model object (run with \code{x=TRUE}), - or a \code{glm.fit} object (in which case the user must parse the design matrix, \code{x}). - The resulting matrix has one row for each observation, and one column for each GLM model parameter. +\code{IF.glm} returns a matrix of influence functions from either a +\code{glm} model object (run with \code{x=TRUE}), or a \code{glm.fit} +object (in which case the user must parse the design matrix, \code{x}). + The resulting matrix has one row for each observation, and one column +for each GLM model parameter. } diff --git a/man/fit_aipw.Rd b/man/fit_aipw.Rd index ff48e89..42cf3fe 100644 --- a/man/fit_aipw.Rd +++ b/man/fit_aipw.Rd @@ -21,33 +21,42 @@ fit_aipw( \item{Zx}{design matrix for the exposure (propensity score) model.} -\item{Zy}{design matrix for the outcome model to be fitted to each treatment subgroup.} +\item{Zy}{design matrix for the outcome model to be fitted to each treatment +subgroup.} -\item{Ofam}{family function for the outcome model. See \link[stats]{family} for details of family functions.} +\item{Ofam}{family function for the outcome model. +See \link[stats]{family} for details of family functions.} -\item{treatment.effect}{the treament effect estimand of interest. +\item{treatment.effect}{the treament effect estimand of interest. Can be either \code{"ATE","ATT","ATC"} or \code{"All"} to fit all three.} -\item{weights}{an optional numeric vector of ‘observation weights’ to be used in the fitting process.} +\item{weights}{an optional numeric vector of ‘observation weights’ to be used +in the fitting process.} } \value{ -List of fit parameters, which is used to derive an object of class \code{teffects} when called by \link[teffectsR]{teffects}. +List of fit parameters, which is used to derive an object of class +\code{teffects} when called by \link[teffectsR]{teffect}. } \description{ -Fitting function called by \link[teffectsR]{teffect} when the method is set to \code{'AIPW'}. -Augmented Inverse Probability Weighting combines the IPW method and the RA method. -A (binomial \link[stats]{glm}) exposure model is fitted which provides observation weights. -Also a (\link[stats]{glm}) outcome model is fitted to each treatment subgroup, and used to predict potential outcomes. +Fitting function called by \link[teffectsR]{teffect} when the method is set +to \code{'AIPW'}. +Augmented Inverse Probability Weighting combines the IPW method and the RA +method. +A (binomial \link[stats]{glm}) exposure model is fitted which provides +observation weights. +Also a (\link[stats]{glm}) outcome model is fitted to each treatment +subgroup, and used to predict potential outcomes. } \examples{ -#generate some data -N = 50 -X = rnorm(N) #confounder -A = rbinom(N,1,plogis(X)) #treatment variable -Y = X+0.5*A #continuous outcome -Z = rbinom(N,1,plogis(Y)) #binary outcome -df = data.frame(X=X,A=A,Y=Y,Z=Z) - -teffect(A~X,Y~X,data=df,method="AIPW") -teffect(A~X,Z~X,data=df,outcome.family="binomial",method="AIPW") +# generate some data +N <- 50 +X <- rnorm(N) # confounder +A <- rbinom(N, 1, plogis(X)) # treatment variable +Y <- X + 0.5 * A # continuous outcome +Z <- rbinom(N, 1, plogis(Y)) # binary outcome +df <- data.frame(X = X, A = A, Y = Y, Z = Z) + +teffect(A ~ X, Y ~ X, data = df, method = "AIPW") +teffect(A ~ X, Z ~ X, data = df, outcome.family = "binomial", method = "AIPW") + } diff --git a/man/fit_ipw.Rd b/man/fit_ipw.Rd index fd3e7ce..4b235b6 100644 --- a/man/fit_ipw.Rd +++ b/man/fit_ipw.Rd @@ -13,28 +13,33 @@ fit_ipw(X, Y, Zx, weights = rep(1, N)) \item{Zx}{design matrix for the exposure (propensity score) model.} -\item{weights}{an optional numeric vector of ‘observation weights’ to be used in the fitting process.} +\item{weights}{an optional numeric vector of ‘observation weights’ to be used +in the fitting process.} } \value{ -List of fit parameters, which is used to derive an object of class \code{teffects} when called by \link[teffectsR]{teffects}. +List of fit parameters, which is used to derive an object of class +\code{teffects} when called by \link[teffectsR]{teffect}. } \description{ -Fitting function called by \link[teffectsR]{teffect} when the method is set to \code{'IPW'}. -Inverse Probability Weighting fits a (binomial \link[stats]{glm}) model for the exposure (propensity score model). -Predictions from this model are used to weight the observations and derive potential outcome means. +Fitting function called by \link[teffectsR]{teffect} when the method is +set to \code{'IPW'}. +Inverse Probability Weighting fits a (binomial \link[stats]{glm}) model +for the exposure (propensity score model). +Predictions from this model are used to weight the observations and derive + potential outcome means. This implementation normalizes the propesnity score weights. This is sometimes called the "sample bounded" IPW estimator. } \examples{ -#generate some data -N = 50 -X = rnorm(N) #confounder -A = rbinom(N,1,plogis(X)) #treatment variable -Y = X+0.5*A #continuous outcome -Z = rbinom(N,1,plogis(Y)) #binary outcome -df = data.frame(X=X,A=A,Y=Y,Z=Z) +# generate some data +N <- 50 +X <- rnorm(N) # confounder +A <- rbinom(N, 1, plogis(X)) # treatment variable +Y <- X + 0.5 * A # continuous outcome +Z <- rbinom(N, 1, plogis(Y)) # binary outcome +df <- data.frame(X = X, A = A, Y = Y, Z = Z) -teffect(A~X,Y~1,data=df,method="IPW") -teffect(A~X,Z~1,data=df,method="IPW") +teffect(A ~ X, Y ~ 1, data = df, method = "IPW") +teffect(A ~ X, Z ~ 1, data = df, method = "IPW") } diff --git a/man/fit_psmatch.Rd b/man/fit_psmatch.Rd index ade4d99..29bb2ba 100644 --- a/man/fit_psmatch.Rd +++ b/man/fit_psmatch.Rd @@ -13,35 +13,37 @@ fit_psmatch(X, Y, Zx, treatment.effect = "ATE", weights = rep(1, N), ...) \item{Zx}{design matrix for the exposure (propensity score) model.} -\item{treatment.effect}{the treament effect estimand of interest. +\item{treatment.effect}{the treament effect estimand of interest. Can be either \code{"ATE","ATT"} or \code{"ATC"}.} -\item{weights}{an optional numeric vector of ‘observation weights’ to be used in the fitting process.} +\item{weights}{an optional numeric vector of ‘observation weights’ to be used +in the fitting process.} -\item{...}{arguments to be passed to the matching function. See \link[Matching]{Match} for details.} - -\item{Zy}{design matrix for the outcome model to be fitted to each treatment subgroup.} - -\item{Ofam}{family function for the outcome model. See \link[stats]{family} for details of family functions.} +\item{...}{arguments to be passed to the matching function. +See\ link[Matching]{Match} for details.} } \value{ -List of fit parameters, which is used to derive an object of class \code{teffects} when called by \link[teffectsR]{teffects}. +List of fit parameters, which is used to derive an object of class +\code{teffects} when called by \link[teffectsR]{teffect}. } \description{ -Fitting function called by \link[teffectsR]{teffect} when the method is set to \code{'PSMatch'}. -Propensity Score Matching fits a (binomial \link[stats]{glm}) model for the exposure (propensity score model). -Observations from each treatment group are matched based on predictions from this model. +Fitting function called by \link[teffectsR]{teffect} when the method is set +to \code{'PSMatch'}. +Propensity Score Matching fits a (binomial \link[stats]{glm}) model for the +exposure (propensity score model). +Observations from each treatment group are matched based on predictions from +this model. See \code{\link[Matching]{Match}} for details on Matching. } \examples{ -#generate some data -N = 50 -X = rnorm(N) #confounder -A = rbinom(N,1,plogis(X)) #treatment variable -Y = X+0.5*A #continuous outcome -Z = rbinom(N,1,plogis(Y)) #binary outcome -df = data.frame(X=X,A=A,Y=Y,Z=Z) - -teffect(A~X,Y~1,data=df,method="PSMatch") -teffect(A~X,Z~1,data=df,method="PSMatch") +# generate some data +N <- 50 +X <- rnorm(N) # confounder +A <- rbinom(N, 1, plogis(X)) # treatment variable +Y <- X + 0.5 * A # continuous outcome +Z <- rbinom(N, 1, plogis(Y)) # binary outcome +df <- data.frame(X = X, A = A, Y = Y, Z = Z) + +teffect(A ~ X, Y ~ 1, data = df, method = "PSMatch") +teffect(A ~ X, Z ~ 1, data = df, method = "PSMatch") } diff --git a/man/fit_ra.Rd b/man/fit_ra.Rd index 91645f2..2aff184 100644 --- a/man/fit_ra.Rd +++ b/man/fit_ra.Rd @@ -18,32 +18,38 @@ fit_ra( \item{Y}{numeric vector sepcifying the outcome variable} -\item{Zy}{design matrix for the outcome model to be fitted to each treatment subgroup.} +\item{Zy}{design matrix for the outcome model to be fitted to each treatment +subgroup.} -\item{Ofam}{family function for the outcome model. See \link[stats]{family} for details of family functions.} +\item{Ofam}{family function for the outcome model. +See \link[stats]{family} for details of family functions.} -\item{treatment.effect}{the treament effect estimand of interest. +\item{treatment.effect}{the treament effect estimand of interest. Can be either \code{"ATE","ATT","ATC"} or \code{"All"} to fit all three.} -\item{weights}{an optional numeric vector of ‘observation weights’ to be used in the fitting process.} +\item{weights}{an optional numeric vector of ‘observation weights’ to be used +in the fitting process.} } \value{ -List of fit parameters, which is used to derive an object of class \code{teffects} when called by \link[teffectsR]{teffects}. +List of fit parameters, which is used to derive an object of class +\code{teffects} when called by \link[teffectsR]{teffect}. } \description{ -Fitting function called by \link[teffectsR]{teffect} when the method is set to \code{'RA'}. -Regression adjusment fits a \link[stats]{glm} model for the outcome to each treatment subgroup. +Fitting function called by \link[teffectsR]{teffect} when the method is set +to \code{'RA'}. +Regression adjusment fits a \link[stats]{glm} model for the outcome to each +treatment subgroup. Potential outcomes are derived using these outcome models. } \examples{ -#generate some data -N = 50 -X = rnorm(N) #confounder -A = rbinom(N,1,plogis(X)) #treatment variable -Y = X+0.5*A #continuous outcome -Z = rbinom(N,1,plogis(Y)) #binary outcome -df = data.frame(X=X,A=A,Y=Y,Z=Z) +# generate some data +N <- 50 +X <- rnorm(N) # confounder +A <- rbinom(N, 1, plogis(X)) # treatment variable +Y <- X + 0.5 * A # continuous outcome +Z <- rbinom(N, 1, plogis(Y)) # binary outcome +df <- data.frame(X = X, A = A, Y = Y, Z = Z) -teffect(A~1,Y~X,data=df,method="RA") -teffect(A~1,Z~X,data=df,outcome.family="binomial",method="RA") +teffect(A ~ 1, Y ~ X, data = df, method = "RA") +teffect(A ~ 1, Z ~ X, data = df, outcome.family = "binomial", method = "RA") } diff --git a/man/teffect.Rd b/man/teffect.Rd index dbedfb3..aef8fe1 100644 --- a/man/teffect.Rd +++ b/man/teffect.Rd @@ -16,18 +16,24 @@ teffect( ) } \arguments{ -\item{exposure.formula}{an object of class "\code{\link[stats]{formula}}" (or one that can be coerced to -that class) where the left hand side of the formula contains the binary exposure variable of interest.} +\item{exposure.formula}{an object of class "\code{\link[stats]{formula}}" +(or one that can be coerced to that class) where the left hand side of the +formula contains the binary exposure variable of interest.} -\item{outcome.formula}{an object of class "\code{\link[stats]{formula}}" (or one that can be coerced to -that class) where the left hand side of the formula contains the outcome variable of interest. The right hand side should -not contain the exposure.} +\item{outcome.formula}{an object of class "\code{\link[stats]{formula}}" +(or one that can be coerced to that class) where the left hand side of the +formula contains the outcome variable of interest. +The right hand side should not contain the exposure.} -\item{outcome.family}{family function for the outcome model, can be can be a character string naming a family function, -a family function or the result of a call to a family function. (See \link[stats]{family} for details of family functions.) -For methods \code{"IPW"} and \code{"PSMatching"} no outcome model is required.} +\item{outcome.family}{family function for the outcome model, can be can be a +character string naming a family function, a family function or the result of +a call to a family function. (See \link[stats]{family} for details of family +functions.) +For methods \code{"IPW"} and \code{"PSMatching"} no outcome model is +required.} -\item{treatment.effect}{The treatment effect estimand. Can be either \code{"ATE","ATT","ATC"} or \code{"All"} to fit all three. +\item{treatment.effect}{The treatment effect estimand. Can be either +\code{"ATE","ATT","ATC"} or \code{"All"} to fit all three. The method, \code{"PSMatch"}, can only fit the ATE estimand.} \item{data}{an optional data frame, list or environment @@ -36,52 +42,66 @@ containing the variables in the model. If not found in data, the variables are taken from environment(formula), typically the environment from which \code{teffects} is called.} -\item{weights}{an optional vector of ‘observation weights’ to be used in +\item{weights}{an optional vector of ‘observation weights’ to be used in the fitting process. Should be \code{NULL} or a numeric vector.} -\item{method}{The mediation fitting method to be used. Can be either \code{"AIPW","IPW","RA"} or \code{"PSMatch"}} +\item{method}{The mediation fitting method to be used. Can be either +\code{"AIPW","IPW","RA"} or \code{"PSMatch"}} -\item{...}{arguments to be passed to the matching function when using the \code{"PSMatch"} method. See \link[Matching]{Match} for details.} +\item{...}{arguments to be passed to the matching function when using the +\code{"PSMatch"} method. +See \link[Matching]{Match} for details.} } \value{ An object of class \code{teffect} with effect estimates, - estimated standard errors, Wald based test statistics and estimated potnetial outcome means. +estimated standard errors, Wald based test statistics and estimated +potential outcome means. } \description{ -\code{teffects} is used to fit treatment effect estimands of a binary exposure, on an outcome, -given a set of variables which adjust for confounding. Estimands included are: the Average Treatment Effect (\code{"ATE"}), -the Average Treatment Effect in the Treated (\code{"ATT"}), and the Average Treatment Effect in the Controls (\code{"ATC"}). +\code{teffects} is used to fit treatment effect estimands of a binary +exposure, on an outcome, given a set of variables which adjust for +confounding. Estimands included are: the Average Treatment Effect +(\code{"ATE"}), the Average Treatment Effect in the Treated +(\code{"ATT"}), and the Average Treatment Effect in the Controls +(\code{"ATC"}). } \details{ -Several methods are implemented to adjust for confounding. Methods based on regression adjustment (\code{"\link[=fit_ra]{RA}","\link[=fit_aipw]{AIPW}"}) require +Several methods are implemented to adjust for confounding. +Methods based on regression adjustment +(\code{"\link[=fit_ra]{RA}","\link[=fit_aipw]{AIPW}"}) require that a glm outcome model is specified (See \code{\link[stats]{glm}}). -Methods base on the propensity score (\code{"\link[=fit_ipw]{IPW}","\link[=fit_aipw]{AIPW}","\link[=fit_psmatch]{PSMatch}"}) require that a binomial glm model is sepcified for the exposure (propensity score model). -When models are not required, dummy models must speficied, to parse the appropriate exposure/ outcome variable. -E.g. when using the method \code{"RA"}, then the exposure model might be \code{A~1}, where \code{A} is the binary exposure variable of interest. +Methods base on the propensity score +(\code{"\link[=fit_ipw]{IPW}","\link[=fit_aipw]{AIPW}","\link[=fit_psmatch]{PSMatch}"}) +require that a binomial glm model is sepcified for the exposure +(propensity score model). +When models are not required, dummy models must speficied, to parse +the appropriate exposure/ outcome variable. +E.g. when using the method \code{"RA"}, then the exposure model might be +\code{A~1}, where \code{A} is the binary exposure variable of interest. The right hand side of these dummy models is ignored. } \examples{ -#Example on Generated data -N = 50 -X = rnorm(N) #confounder -A = rbinom(N,1,plogis(X)) #treatment variable -Y = X+0.5*A #continuous outcome -Z = rbinom(N,1,plogis(Y)) #binary outcome -df = data.frame(X=X,A=A,Y=Y,Z=Z) +# Example on Generated data +N <- 50 +X <- rnorm(N) # confounder +A <- rbinom(N, 1, plogis(X)) # treatment variable +Y <- X + 0.5 * A # continuous outcome +Z <- rbinom(N, 1, plogis(Y)) # binary outcome +df <- data.frame(X = X, A = A, Y = Y, Z = Z) -#Fit AIPW by default -teffect(A~X,Y~X,data=df) -teffect(A~X,Z~X,data=df,outcome.family="binomial") +# Fit AIPW by default +teffect(A ~ X, Y ~ X, data = df) +teffect(A ~ X, Z ~ X, data = df, outcome.family = "binomial") -#Fit IPW -teffect(A~X,Y~1,data=df,method="IPW") -teffect(A~X,Z~1,data=df,method="IPW") +# Fit IPW +teffect(A ~ X, Y ~ 1, data = df, method = "IPW") +teffect(A ~ X, Z ~ 1, data = df, method = "IPW") -#Fit RA -teffect(A~1,Y~X,data=df,method="RA") -teffect(A~1,Z~X,data=df,outcome.family="binomial",method="RA") +# Fit RA +teffect(A ~ 1, Y ~ X, data = df, method = "RA") +teffect(A ~ 1, Z ~ X, data = df, outcome.family = "binomial", method = "RA") -#Fit PSMatch -teffect(A~X,Y~1,data=df,method="PSMatch") -teffect(A~X,Z~1,data=df,method="PSMatch") +# Fit PSMatch +teffect(A ~ X, Y ~ 1, data = df, method = "PSMatch") +teffect(A ~ X, Z ~ 1, data = df, method = "PSMatch") } diff --git a/tests/testthat.R b/tests/testthat.R new file mode 100644 index 0000000..6839208 --- /dev/null +++ b/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(teffectsR) + +test_check("teffectsR") diff --git a/tests/testthat/test-teffects.R b/tests/testthat/test-teffects.R new file mode 100644 index 0000000..0a2a704 --- /dev/null +++ b/tests/testthat/test-teffects.R @@ -0,0 +1,27 @@ +N <- 50 +X <- rnorm(N) # confounder +A <- rbinom(N, 1, plogis(X)) # treatment variable +Y <- X + 0.5 * A # continuous outcome +Z <- rbinom(N, 1, plogis(Y)) # binary outcome +df <- data.frame(X = X, A = A, Y = Y, Z = Z) + +test_that("AIPW runs without errors", { + # Fits AIPW by default + teffect(A ~ X, Y ~ X, data = df) + teffect(A ~ X, Z ~ X, data = df, outcome.family = "binomial") +}) + +test_that("IPW runs without errors", { + teffect(A ~ X, Y ~ 1, data = df, method = "IPW") + teffect(A ~ X, Z ~ 1, data = df, method = "IPW") +}) + +test_that("RA runs without errors", { + teffect(A ~ 1, Y ~ X, data = df, method = "RA") + teffect(A ~ 1, Z ~ X, data = df, outcome.family = "binomial", method = "RA") +}) + +test_that("PSMatch runs without errors", { + teffect(A ~ X, Y ~ 1, data = df, method = "PSMatch") + teffect(A ~ X, Z ~ 1, data = df, method = "PSMatch") +})