dim(hdps.data)
#> [1] 3000 410
# Investigator-specified covariates
investigator.vars#> [1] "age" "sex" "comorbidity"
# Top 200 empirical covariates section based on Cox-LASSO
head(empirical.vars.lasso)
#> [1] "rec_diag_V03_once" "rec_diag_S24_once" "rec_diag_W61_once"
#> [4] "rec_diag_C81_once" "rec_diag_M85_once" "rec_diag_H18_once"
# Investigator-specified and empirical covariates
head(vars.hsps)
#> [1] "age" "sex" "comorbidity"
#> [4] "rec_diag_V03_once" "rec_diag_S24_once" "rec_diag_W61_once"
23 hdDRS with a survival outcome
To demonstrate the high-dimensional disease risk score (hdDRS) with a survival/time-to-event outcome, we will use the same simulated data for this hdDRS demonstration as we did for the hdPS with survival outcome demonstration.
23.1 Step 0: Analytic data
23.2 Step 6: Disease risk score (DRS)
There are at least eight approaches to estimate the disease risk score (DRS):
hdDRS-Full-Logistic: On the full cohort (both exposed and unexposed), fit logistic regression without considering the follow-up time. This model included the exposure, investigator-specified measured confounders, and the recurrence covariates. The DRS is calculated as the probability of the outcome by setting everyone as unexposed.
hdDRS-Full-Survival: On the full cohort, fit the Cox-PH model with the exposure, investigator-specified measured confounders and the recurrence covariates. The DRS is calculated as the survival probability of the outcome by setting everyone as unexposed.
hdDRS-Full-Hazard: On the full cohort, fit the Cox-PH model with the exposure, investigator-specified measured confounders and the recurrence covariates. The DRS is calculated as the hazard of the outcome by setting everyone unexposed.
hdDRS-Full-Rate: On the full cohort, fit the modified Poisson regression with the exposure, an offset by the natural logarithm of follow-up time, investigator-specified measured confounders and the recurrence covariates. The DRS is calculated as the rate of the outcome by setting everyone as unexposed.
hdDRS-Unexposed-Logistic: On the cohort with only unexposed, fit the logistic regression with the investigator-specified measured confounders and the recurrence covariates. The DRS is calculated as the probability of the outcome on the full cohort.
hdDRS-Unexposed-Survival: On the cohort with only unexposed, fit the Cox-PH model with the investigator-specified measured confounders and the recurrence covariates. The DRS is calculated as the survival probability of the outcome on the full cohort.
hdDRS-Unexposed-Hazard: On the cohort with only unexposed, fit the Cox-PH model with the investigator-specified measured confounders and the recurrence covariates. The DRS is calculated as the hazard of the outcome on the full cohort.
hdDRS-Unexposed-Rate: On the cohort with only unexposed, fit the modified Poisson regression with an offset by the natural logarithm of follow-up time, investigator-specified measured confounders and the recurrence covariates. The DRS is calculated as the rate of the outcome on the full cohort.
In this example, we will focus on the rate-based approach using the unexposed cohort (hdDRS-Unexposed-Rate). To demonstrate how to apply all these eight methods in a given scenario, reproducible R codes on a simulated dataset are provided in the GitHub folder.
23.2.1 Unexposed cohort
# Unexposed cohort
<- subset(hdps.data, arthritis == "No")
dat.unexposed
# Offset
$log.offset <- log(1)
dat.unexposed
# Full cohort
<- hdps.data
dat.full
# Offset
$log.offset <- log(1) dat.full
23.2.2 Create DRS formula
# Covariates
<- c(investigator.vars, empirical.vars.lasso)
vars.hsdrs
# Formula
<- as.formula(paste0("cvd ~ offset(log.offset) + ",
drs.formula paste(vars.hsdrs, collapse = "+")))
23.2.3 Fit DRS model
<- glm(drs.formula, data = dat.unexposed, family = poisson)
fit.drs $coefficients[is.na(fit.drs$coefficients)] <- 0 fit.drs
23.2.4 Obtain DRS
$drs <- predict(fit.drs, type = "response", newdata = dat.full)
dat.full
# Sumamry
summary(dat.full$drs)
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 0.004198 0.089762 0.232163 0.324619 0.466890 4.085130
# Summary by exposure status
tapply(dat.full$drs, dat.full$arthritis, summary)
#> $No
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 0.004198 0.081598 0.206067 0.293047 0.433756 1.776528
#>
#> $Yes
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 0.006824 0.129409 0.298506 0.403565 0.568061 4.085130
23.3 Step 7: Association
# Deciles of DRS
$drs.decile <- as.factor(dplyr::ntile(dat.full$drs, 10))
dat.full
# Outcome analysis
<- coxph(Surv(follow_up, cvd) ~ arthritis + drs.decile + age + sex +
fit.hddrs data = dat.full)
comorbidity,
publish(fit.hddrs, pvalue.method = "robust", confint.method = "robust",
print = F)$regressionTable[1:2,]