Skip to content

Commit

Permalink
Some attention is all you need (#1)
Browse files Browse the repository at this point in the history
  • Loading branch information
ohines authored Apr 6, 2024
1 parent d98ac02 commit 990bd40
Show file tree
Hide file tree
Showing 21 changed files with 711 additions and 479 deletions.
1 change: 1 addition & 0 deletions .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* @ohines
7 changes: 7 additions & 0 deletions .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Motivation

<!-- Why is this change necessary? Link issues here if applicable. -->

# Changes

<!-- What changes have been performed? -->
24 changes: 24 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
name: R-CMD-check
on: "push"

jobs:
R-CMD-check:
runs-on: ubuntu-latest
env:
GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
R_KEEP_PKG_SOURCE: yes
steps:
- uses: actions/checkout@v4

- uses: r-lib/actions/setup-r@v2
with:
use-public-rspm: true

- uses: r-lib/actions/setup-r-dependencies@v2
with:
extra-packages: any::rcmdcheck
needs: check

- uses: r-lib/actions/check-r-package@v2
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
.DS_Store
.Rproj.user
.lintr
.Rhistory
6 changes: 4 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ Version: 1.0
Author: Oliver Hines
Maintainer: Oliver Hines <[email protected]>
Description: Fit causal treatment-effect estimands using regression adjusment and methods based on the propensity score.
License: TBC
License: MIT + file LICENSE
Encoding: UTF-8
LazyData: true
Imports:
Matching
RoxygenNote: 7.1.0
Suggests:
testthat
RoxygenNote: 7.3.1
21 changes: 21 additions & 0 deletions LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2024 Oliver Hines

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,6 @@ export(fit_ipw)
export(fit_psmatch)
export(fit_ra)
export(teffect)
importFrom(stats,family)
importFrom(stats,gaussian)
importFrom(stats,quasibinomial)
20 changes: 12 additions & 8 deletions R/IFglm.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
#' Influence Functions from Generalized Linear Models
#'
#' \code{IF.glm} returns a matrix of influence functions from either a \code{glm} model object (run with \code{x=TRUE}),
#' or a \code{glm.fit} object (in which case the user must parse the design matrix, \code{x}).
#' The resulting matrix has one row for each observation, and one column for each GLM model parameter.
#' \code{IF.glm} returns a matrix of influence functions from either a
#' \code{glm} model object (run with \code{x=TRUE}), or a \code{glm.fit}
#' object (in which case the user must parse the design matrix, \code{x}).
#' The resulting matrix has one row for each observation, and one column
#' for each GLM model parameter.
#'
#' @param object A \code{glm} model object (run with \code{x=TRUE}), or a \code{glm.fit} object
#' @param x The model matrix. Not required for a \code{glm} model object (run with \code{x=TRUE}).
#' @param object A \code{glm} model object (run with \code{x=TRUE}),
#' or a \code{glm.fit} object
#' @param x The model matrix.
#' Not required for a \code{glm} model object (run with \code{x=TRUE}).
#' @return Matrix of influence function values.
#' @export

IF.glm <- function(object,x=object$x){
NROW(object$y)*(x*object$prior.weights*(object$y-object$fitted.values))%*%chol2inv(object$qr$qr)
}
IF.glm <- function(object, x = object$x) {
NROW(object$y) * (x * object$prior.weights * (object$y - object$fitted.values)) %*% chol2inv(object$qr$qr)
}
182 changes: 97 additions & 85 deletions R/fit_aipw.R
Original file line number Diff line number Diff line change
@@ -1,105 +1,117 @@
#' Fit Augmented Inverse Probability Weighting
#'
#' Fitting function called by \link[teffectsR]{teffect} when the method is set to \code{'AIPW'}.
#' Augmented Inverse Probability Weighting combines the IPW method and the RA method.
#' A (binomial \link[stats]{glm}) exposure model is fitted which provides observation weights.
#' Also a (\link[stats]{glm}) outcome model is fitted to each treatment subgroup, and used to predict potential outcomes.
#' Fitting function called by \link[teffectsR]{teffect} when the method is set
#' to \code{'AIPW'}.
#' Augmented Inverse Probability Weighting combines the IPW method and the RA
#' method.
#' A (binomial \link[stats]{glm}) exposure model is fitted which provides
#' observation weights.
#' Also a (\link[stats]{glm}) outcome model is fitted to each treatment
#' subgroup, and used to predict potential outcomes.
#'
#' @param X numeric vector of (0,1) specifiying the treatment variable.
#' @param Y numeric vector sepcifying the outcome variable
#' @param Zx design matrix for the exposure (propensity score) model.
#' @param Zy design matrix for the outcome model to be fitted to each treatment subgroup.
#' @param Ofam family function for the outcome model. See \link[stats]{family} for details of family functions.
#' @param treatment.effect the treament effect estimand of interest.
#' @param Zy design matrix for the outcome model to be fitted to each treatment
#' subgroup.
#' @param Ofam family function for the outcome model.
#' See \link[stats]{family} for details of family functions.
#' @param treatment.effect the treament effect estimand of interest.
#' Can be either \code{"ATE","ATT","ATC"} or \code{"All"} to fit all three.
#' @param weights an optional numeric vector of ‘observation weights’ to be used in the fitting process.
#'
#' @return List of fit parameters, which is used to derive an object of class \code{teffects} when called by \link[teffectsR]{teffects}.
#' @param weights an optional numeric vector of ‘observation weights’ to be used
#' in the fitting process.
#'
#' @return List of fit parameters, which is used to derive an object of class
#' \code{teffects} when called by \link[teffectsR]{teffect}.
#' @examples
#' #generate some data
#' N = 50
#' X = rnorm(N) #confounder
#' A = rbinom(N,1,plogis(X)) #treatment variable
#' Y = X+0.5*A #continuous outcome
#' Z = rbinom(N,1,plogis(Y)) #binary outcome
#' df = data.frame(X=X,A=A,Y=Y,Z=Z)
#' # generate some data
#' N <- 50
#' X <- rnorm(N) # confounder
#' A <- rbinom(N, 1, plogis(X)) # treatment variable
#' Y <- X + 0.5 * A # continuous outcome
#' Z <- rbinom(N, 1, plogis(Y)) # binary outcome
#' df <- data.frame(X = X, A = A, Y = Y, Z = Z)
#'
#' teffect(A~X,Y~X,data=df,method="AIPW")
#' teffect(A~X,Z~X,data=df,outcome.family="binomial",method="AIPW")
#' teffect(A ~ X, Y ~ X, data = df, method = "AIPW")
#' teffect(A ~ X, Z ~ X, data = df, outcome.family = "binomial", method = "AIPW")
#'
#' @importFrom stats gaussian quasibinomial family
#' @export
fit_aipw <- function(X,Y,Zx,Zy,Ofam=gaussian(),treatment.effect = "ATE", weights=rep(1,N)){
link <- Ofam$linkinv #invlink function for outcome model
dlink <- Ofam$mu.eta #derivative of mean wrt to linear predictor
N = NROW(X)
if (is.null(weights)){
fit_aipw <- function(X, Y, Zx, Zy, Ofam = gaussian(), treatment.effect = "ATE", weights = rep(1, N)) {
link <- Ofam$linkinv # invlink function for outcome model
dlink <- Ofam$mu.eta # derivative of mean wrt to linear predictor
N <- NROW(X)
if (is.null(weights)) {
weights <- rep.int(1, N)
}
X.mod = glm.fit(Zx,X,family=quasibinomial(),weights=weights)
Y.mod1 = glm.fit(Zy,Y,family=Ofam,weights = weights*X)
Y.mod0 = glm.fit(Zy,Y,family=Ofam,weights = weights*(1-X))
IFa = IF.glm(X.mod,Zx) #Influence function of ps model
IFb1 = IF.glm(Y.mod1,Zy) #Influence function of outcome model
IFb0 = IF.glm(Y.mod0,Zy) #Influence function of outcome model

ps = X.mod$fitted.values
eta1 = Y.mod1$linear.predictor
eta0 = Y.mod0$linear.predictor
mu1 = link(eta1)
mu0 = link(eta0)
X_ps = X/ps
cX_ps = (1-X)/(1-ps)
dX_ps = -X_ps*(1-ps) #derivative of weights
dcX_ps = cX_ps*ps
#Get potential outcomes
Po1 = X_ps*Y + (1-X_ps)*mu1
Po0 = cX_ps*Y + (1-cX_ps)*mu0
P.wts <- list(ATE = rep.int(1, N), ATT = X, ATC = 1-X)
if (!treatment.effect %in% c("ATE","ATT","ATC","All")){

X.mod <- stats::glm.fit(Zx, X, family = quasibinomial(), weights = weights)
Y.mod1 <- stats::glm.fit(Zy, Y, family = Ofam, weights = weights * X)
Y.mod0 <- stats::glm.fit(Zy, Y, family = Ofam, weights = weights * (1 - X))

IFa <- teffectsR::IF.glm(X.mod, Zx) # Influence function of ps model
IFb1 <- teffectsR::IF.glm(Y.mod1, Zy) # Influence function of outcome model
IFb0 <- teffectsR::IF.glm(Y.mod0, Zy) # Influence function of outcome model

ps <- X.mod$fitted.values
eta1 <- Y.mod1$linear.predictor
eta0 <- Y.mod0$linear.predictor

mu1 <- link(eta1)
mu0 <- link(eta0)

X_ps <- X / ps
cX_ps <- (1 - X) / (1 - ps)
dX_ps <- -X_ps * (1 - ps) # derivative of weights
dcX_ps <- cX_ps * ps

# Get potential outcomes
Po1 <- X_ps * Y + (1 - X_ps) * mu1
Po0 <- cX_ps * Y + (1 - cX_ps) * mu0

P.wts <- list(ATE = rep.int(1, N), ATT = X, ATC = 1 - X)

if (!treatment.effect %in% c("ATE", "ATT", "ATC", "All")) {
stop("'treatment.effect' not recognized")
} else if(treatment.effect != "All") P.wts <- P.wts[treatment.effect]


run <- lapply(P.wts, function(wt){
wt <- as.vector(weights)*wt
N.wt = sum(wt)

Po1.mean = sum(Po1*wt)/N.wt
Po0.mean = sum(Po0*wt)/N.wt

xbar1a = ((Y-mu1)*dX_ps*wt)%*%Zx/N.wt
xbar0a = ((Y-mu0)*dcX_ps*wt)%*%Zx/N.wt
xbar1b = ((1-X_ps)*dlink(eta1)*wt)%*%Zy/N.wt
xbar0b = ((1-cX_ps)*dlink(eta0)*wt)%*%Zy/N.wt

IF1 = (Po1 - Po1.mean)*wt*N/N.wt + IFa%*%t(xbar1a) + IFb1%*%t(xbar1b)
IF0 = (Po0 - Po0.mean)*wt*N/N.wt + IFa%*%t(xbar0a) + IFb0%*%t(xbar0b)
Var = sum((IF1-IF0)^2)/N^2
TE = Po1.mean-Po0.mean

a = list()
}

if (treatment.effect != "All") P.wts <- P.wts[treatment.effect]

run <- lapply(P.wts, function(wt) {
wt <- as.vector(weights) * wt
N.wt <- sum(wt)

Po1.mean <- sum(Po1 * wt) / N.wt
Po0.mean <- sum(Po0 * wt) / N.wt

xbar1a <- ((Y - mu1) * dX_ps * wt) %*% Zx / N.wt
xbar0a <- ((Y - mu0) * dcX_ps * wt) %*% Zx / N.wt
xbar1b <- ((1 - X_ps) * dlink(eta1) * wt) %*% Zy / N.wt
xbar0b <- ((1 - cX_ps) * dlink(eta0) * wt) %*% Zy / N.wt

IF1 <- (Po1 - Po1.mean) * wt * N / N.wt + IFa %*% t(xbar1a) + IFb1 %*% t(xbar1b)
IF0 <- (Po0 - Po0.mean) * wt * N / N.wt + IFa %*% t(xbar0a) + IFb0 %*% t(xbar0b)
Var <- sum((IF1 - IF0)^2) / N^2
TE <- Po1.mean - Po0.mean

a <- list()
a$coefs <- TE
a$std.err <- sqrt(Var)
a$Wald <- TE^2/Var
a$Wald <- TE^2 / Var
a$PO.means <- c(Po1 = Po1.mean, Po0 = Po0.mean)
a$PO.std.err<- c(Po1 = sqrt(sum(IF1^2))/N, Po0 = sqrt(sum(IF0^2))/N )
return(a)
a$PO.std.err <- c(Po1 = sqrt(sum(IF1^2)) / N, Po0 = sqrt(sum(IF0^2)) / N)
a
})

run <- simplify2array(run)

a <- list(coef = as.double(run["coefs",]),
std.err = as.double(run["std.err",]),
Wald = as.double(run["Wald",]),
Po.means = run["PO.means",],
Po.std.err = run["PO.std.err",])

a <- list(
coef = as.double(run["coefs", ]),
std.err = as.double(run["std.err", ]),
Wald = as.double(run["Wald", ]),
Po.means = run["PO.means", ],
Po.std.err = run["PO.std.err", ]
)
names(a$coef) <- names(a$std.err) <- names(a$Wald) <- names(P.wts)
return(a)

a
}
Loading

0 comments on commit 990bd40

Please sign in to comment.