# Load required packages
library(survey)
require(interactions)
require(mitools)
require(mice)
require(miceadds)
library(modelsummary)
tidy.pool_mi <- function(x, ...) {
msg <- capture.output(out <- summary(x, ...))
out$term <- row.names(out)
colnames(out) <- c("estimate", "std.error", "statistic", "p.value",
"conf.low", "conf.high", "miss", "term")
return(out)
}
Effect modification
In this tutorial, we delve into the concept of effect modification.
We discussed about effect modification in an earlier discussion.
We start by loading the necessary packages that will aid in the analysis. We also define a function tidy.pool_mi
to streamline the pooling process for multiple imputation results.
Data
We load a dataset named smi
. This dataset is a list of multiple imputed datasets, which is evident from the structure and the way we access its elements.
require(mitools)
data(smi)
length(smi)
#> [1] 2
length(smi[[1]])
#> [1] 5
head(smi[[1]][[1]])
#> id wave mmetro parsmk drkfre alcdos alcdhi smk
#> 1 920006 1 1 0 Non drinker Non drinker 0 non/ex-smoker
#> 2 920006 2 1 0 Non drinker Non drinker 0 non/ex-smoker
#> 3 920006 3 1 0 Non drinker Non drinker 0 non/ex-smoker
#> 4 920006 4 1 0 Non drinker Non drinker 0 non/ex-smoker
#> 5 920006 5 1 0 Non drinker Non drinker 0 non/ex-smoker
#> 6 920006 6 1 0 not in last wk not in last wk 0 non/ex-smoker
#> cistot mdrkfre sex drinkreg
#> 1 0 0 1 FALSE
#> 2 0 0 1 FALSE
#> 3 0 0 1 FALSE
#> 4 1 0 1 FALSE
#> 5 4 0 1 FALSE
#> 6 3 0 1 FALSE
Model with interaction and ORs
We’re interested in understanding how the variable wave
interacts with sex
in predicting drinkreg
. We fit two logistic regression models, one for each level of the sex
variable (0 males, 1 females), to understand this interaction. wave
is the exposure variable here.
For effect modifier sex = 0 (males)
models <- with(smi, glm(drinkreg~ wave + sex + wave*sex, family = binomial()))
summary(pool(models, rule = "rubin1987"), conf.int = TRUE, exponentiate = TRUE)[2,]
#> term estimate std.error statistic df p.value 2.5 % 97.5 %
#> 2 wave 1.271952 0.06587423 3.651693 234.4974 0.0003212903 1.11714 1.448217
For effect modifier sex = 1 (females) (just changing reference)
models2<-with(smi, glm(drinkreg~ wave + I(sex==0) + wave*I(sex==0),family=binomial()))
summary(pool(models2, rule = "rubin1987"),conf.int = TRUE, exponentiate = TRUE)[2,]
#> term estimate std.error statistic df p.value 2.5 % 97.5 %
#> 2 wave 1.225438 0.05876369 3.45959 240.3053 0.0006399352 1.091487 1.375828
- Notice the ORs for
wave
in the above 2 analyses. These are basically our target. - For proper survey data analysis, you will have to work with design and make sure you subset your subpopulation (those eligible) appropriately.
Simple slopes analyses
We perform a simple slopes analysis for each imputed dataset. This analysis helps in understanding the relationship between the predictor and the outcome at specific levels of the moderator.
After obtaining the results from each imputed dataset, we pool them to get a consolidated result. This is done separately for each level of the sex
variable.
Pooled results for sex = 0
# For sex = 0
ef.lev <- 1
est <- c(a1$slopes$Est.[ef.lev],
a2$slopes$Est.[ef.lev],
a3$slopes$Est.[ef.lev],
a4$slopes$Est.[ef.lev],
a5$slopes$Est.[ef.lev])
se <- c(a1$slopes$S.E.[ef.lev],
a2$slopes$S.E.[ef.lev],
a3$slopes$S.E.[ef.lev],
a4$slopes$S.E.[ef.lev],
a5$slopes$S.E.[ef.lev])
vr <- se^2
OR <- exp(est)
OR.se <- OR * se
OR.v <- OR.se^2
mod_pooled <- miceadds::pool_mi(qhat=OR, u=OR.v)
tidy.pool_mi(mod_pooled)
#> estimate std.error statistic p.value conf.low conf.high miss term
#> 1 1.272164 0.08386552 15.16909 6.987679e-39 1.107118 1.43721 12.2 % 1
summary(MIcombine(as.list(OR), as.list(OR.v)))
#> Multiple imputation results:
#> MIcombine.default(as.list(OR), as.list(OR.v))
#> results se (lower upper) missInfo
#> 1 1.272164 0.08386552 1.107118 1.43721 12 %
Pooled results for sex = 1
# For sex = 1
ef.lev <- 2
est <- c(a1$slopes$Est.[ef.lev],
a2$slopes$Est.[ef.lev],
a3$slopes$Est.[ef.lev],
a4$slopes$Est.[ef.lev],
a5$slopes$Est.[ef.lev])
se <- c(a1$slopes$S.E.[ef.lev],
a2$slopes$S.E.[ef.lev],
a3$slopes$S.E.[ef.lev],
a4$slopes$S.E.[ef.lev],
a5$slopes$S.E.[ef.lev])
vr <- se^2
OR <- exp(est)
OR.se <- OR * se
OR.v <- OR.se^2
mod_pooled <- miceadds::pool_mi(qhat=OR, u=OR.v)
tidy.pool_mi(mod_pooled)
#> estimate std.error statistic p.value conf.low conf.high miss term
#> 1 1.225598 0.07204679 17.01113 2.282427e-46 1.083838 1.367357 11.9 % 1
summary(MIcombine(as.list(OR), as.list(OR.v)))
#> Multiple imputation results:
#> MIcombine.default(as.list(OR), as.list(OR.v))
#> results se (lower upper) missInfo
#> 1 1.225598 0.07204679 1.083838 1.367357 12 %