alpha0 <- 0.5 # Exposure / A model (intercept)
alpha1 <- 0.5 # Exposure / A model (slope for L)
beta0 <- 0 # Time-dependent variable / L model (intercept)
beta1 <- -5 # Time-dependent variable / L model (slope for Alag)
theta0 <- -3 # Outcome / Y model (intercept)
theta1 <- -2 # Outcome / Y model (slope for L)
theta2 <- -0.8 # Outcome / Y model (slope for A)
100 simulation iterations were performed:
followup <- 10 # followup
n <- 2500 # subjects per simulation
NSim <- 100 # number of simulations
saved.data <- vector(mode = "list", length = NSim)
set.seed(112) # Seed for randomization
Codes ported to R from Young, J. G., & Tchetgen Tchetgen, E. J. (2014)
for (k in 1:NSim){ # simulation index k
aggregate.data <- NULL
for (j in 1:n){ # j = subject index
id <- j
# Data generation in follow-up time 1
tpoint <- 1
Alag <- 0
pL <- 0.5
uL <- runif(1)
L <- ifelse(uL <= pL, 1, 0)
logitA <- alpha0
pA <- 1/(1+exp(-logitA))
uA <- runif(1)
A <- ifelse(uA <= pA, 1, 0)
logitY <- theta0 + (theta1 * L) + (theta2 * A)
pY <- 1/(1+exp(-logitY))
uY <- runif(1)
Y <- ifelse(uY <= pY, 1, 0)
Ylag <- Y
long.form.data <- data.frame(id, tpoint, Alag, L, A, Y)
if (Ylag == 0){
# Stop if event (death) already occurred
for (i in 2:followup){ # i followup index
Alag <- A
tpoint <- i
logitL <- beta0 + beta1 * Alag
pL <- 1/(1+exp(-logitL))
uL <- runif(1)
L <- ifelse(uL <= pL, 1, 0)
logitA <- alpha0 + alpha1 * L
pA <- 1/(1+exp(-logitA))
uA <- runif(1)
A <- ifelse(uA <= pA, 1, 0)
logitY <- theta0 + (theta1 * L) + (theta2 * A)
pY <- 1/(1+exp(-logitY))
uY <- runif(1)
Y <- ifelse(uY <= pY, 1, 0)
long.form.data <- rbind(long.form.data, c(id, tpoint, Alag, L, A, Y, Ylag))
if (Y == 1) {break}
}
}
aggregate.data <- rbind(aggregate.data,long.form.data)
# aggregated data for 1 patient (id = j for all followups)
}
library(dplyr)
aggregate.data <-
aggregate.data %>%
group_by(id) %>%
mutate(Llag = dplyr::lag(L, n = 1, default = 0))
aggregate.data <-
aggregate.data %>%
group_by(id) %>%
mutate(tpoint0 = dplyr::lag(tpoint, n = 1, default = 0))
#write.csv(aggregate.data, file = paste0("data/file", k, ".csv"))
saved.data[[k]] <- aggregate.data
# aggregated data for all patients (in simulation iteration = k)
#print(k)
}
saveRDS(saved.data, file = "data/savedData.rds")
Function to extract results
ext.cox <- function(fit){
x <- summary(fit)
b.se <- ifelse(x$used.robust == TRUE, x$coef["A","robust se"], x$coef["A","se(coef)"])
if (is.null(fit$weights)) {
res <- as.numeric(c(x$n, x$nevent, x$coef["A","coef"], b.se,
confint(fit)["A",],x$coef["A", "Pr(>|z|)"], x$used.robust,
rep(NA,3) ))
} else {
res <- as.numeric(c(x$n, x$nevent, x$coef["A","coef"], b.se,
confint(fit)["A",],x$coef["A", "Pr(>|z|)"], x$used.robust,
mean(fit$weights), range(fit$weights) ))
}
names(res) <- c("n", "events", "coef", "se", "lowerci", "upperci", "pval", "robust", "meanW", "minW", "maxW")
return(res)
}
Fitting modes and obtaining results
saved.data <- readRDS(file = "data/savedData.rds")
mat.cox.adj.all <- mat.cox.all <- mat.msm.w.all <- mat.msm.all <- NULL
for (k in 1:NSim){
#aggregate.data <- read.csv(file = paste0("data/file", k, ".csv"))
aggregate.data <- saved.data[[k]]
# Step 1: Weight denominator model
ww <- glm(A ~ tpoint + L + Alag + Llag, family = binomial(logit), data = aggregate.data)
# Step 2: Weight numerator model
ww0 <- glm(A ~ tpoint + Alag, family = binomial(logit), data = aggregate.data)
# Step 3: Obtain fir from the weight models
aggregate.data$wwp <- with(aggregate.data, ifelse(A == 0, 1 - fitted(ww), fitted(ww)))
aggregate.data$wwp0 <- with(aggregate.data, ifelse(A == 0, 1 - fitted(ww0),fitted(ww0)))
# Step 4: time-dependent weights
aggregate.data$w <- unlist(tapply(1/aggregate.data$wwp, aggregate.data$id, cumprod))
aggregate.data$sw <- unlist(tapply(aggregate.data$wwp0/aggregate.data$wwp, aggregate.data$id, cumprod))
summary(aggregate.data$sw)
require(survival)
# Step 5: Weighted outcome model
fit.msm.w <- coxph(Surv(tpoint0, tpoint, Y) ~ A + cluster(id), data = aggregate.data, weight = w, robust =TRUE)
mat.msm.w <- ext.cox(fit.msm.w)
fit.msm <- coxph(Surv(tpoint0, tpoint, Y) ~ A + cluster(id), data = aggregate.data, weight = sw, robust =TRUE)
mat.msm <- ext.cox(fit.msm)
# Comparison with unweighted models
fit.cox <- coxph(Surv(tpoint0, tpoint, Y) ~ A + cluster(id), data = aggregate.data, robust =TRUE)
mat.cox <- ext.cox(fit.cox)
fit.cox.adj <- coxph(Surv(tpoint0, tpoint, Y) ~ A + L + cluster(id), data = aggregate.data, robust =TRUE)
mat.cox.adj <- ext.cox(fit.cox.adj)
# Results
mat.cox.all <- as.data.frame(rbind(mat.cox.all, mat.cox))
mat.cox.adj.all <- as.data.frame(rbind(mat.cox.adj.all, mat.cox.adj))
mat.msm.w.all <- as.data.frame(rbind(mat.msm.w.all, mat.msm.w))
mat.msm.all <- as.data.frame(rbind(mat.msm.all, mat.msm))
# print(k)
}
Saving results locally
saveRDS(mat.cox.all, file = "data/cox.rds")
saveRDS(mat.cox.adj.all, file = "data/coxL.rds")
saveRDS(mat.msm.w.all, file = "data/msmw.rds")
saveRDS(mat.msm.all, file = "data/msm.rds")
True value for theta2
theta2
## [1] -0.8
Coefs from unadjusted Cox regression, blue line is the reference theta2=-0.8. Red line is the mean of coefs.
require(ggplot2)
p <- ggplot(mat.cox.all, aes(x=coef)) +
geom_density()
p+ geom_vline(aes(xintercept=mean(coef)),
color="red", linetype="dashed", size=1)+
geom_vline(xintercept=-0.8, color ="blue")
Coefs from adjusted Cox regression (adjusted by L)
p <- ggplot(mat.cox.adj.all, aes(x=coef)) +
geom_density()
p+ geom_vline(aes(xintercept=mean(coef)),
color="red", linetype="dashed", size=1) +
geom_vline(xintercept=-0.8, color ="blue")
Coefs from MSM (unstabilized weights used)
p <- ggplot(mat.msm.w.all, aes(x=coef)) +
geom_density()
p+ geom_vline(aes(xintercept=mean(coef)),
color="red", linetype="dashed", size=1) +
geom_vline(xintercept=-0.8, color ="blue")
Coefs from MSM (stabilized weights used)
p <- ggplot(mat.msm.all, aes(x=coef)) +
geom_density()
p+ geom_vline(aes(xintercept=mean(coef)),
color="red", linetype="dashed", size=1) +
geom_vline(xintercept=-0.8, color ="blue")
Percent bias from unadjusted Cox regression
cat(round((mean(mat.cox.all$coef)-theta2)*100/theta2,2), "%")
## 5.78 %
Percent bias from adjusted Cox regression (adjusted by L)
cat(round((mean(mat.cox.adj.all$coef)-theta2)*100/theta2,2), "%")
## -2.31 %
Percent bias from MSM (unstabilized weights used)
cat(round((mean(mat.msm.w.all$coef)-theta2)*100/theta2,2), "%")
## 3.47 %
Investigating unstabilized weights (large weights)
round(range(mat.msm.w.all$maxW),2)
## [1] 10720.82 134166.54
Percent bias from MSM (stabilized weights used)
cat(round((mean(mat.msm.all$coef)-theta2)*100/theta2,2), "%")
## 0.58 %
Investigating stabilized weights (reasonable weights)
round(range(mat.msm.all$maxW),2)
## [1] 1.68 3.14