dim(hdps.data)
#> [1] 3839 119
21 hdDRS with a binary outcome
Similar to hdPS, there are seven steps in High-dimensional disease risk score (hdDRS). However, the sixth step is different. In the sixth step, we fit a propensity score model in the hdPS analysis, while we fit a disease risk score model in the hdDRS analysis. Moreover in the seventh step, estimating the treatment/exposure effect with the matched or inverse probability weighted data is a popular choice in the hdPS analysis, while adjusting for deciles of the disease risk score is a popular choice in the hdDRS analysis.
# | hdPS | hdDRS |
---|---|---|
Step 1 | Proxy sources | Proxy sources |
Step 2 | Empirical | Empirical |
Step 3 | Recurrence | Recurrence |
Step 4 | Prioritize | Prioritize |
Step 5 | Covariates | Covariates |
Step 6 | Propensity score | Disease risk score |
Step 7 | Association | Association |
- Utilizing these empirically identified proxies with investigator-specified measured confounders, the hdPS and hdDRS are proposed to reduce residual confounding bias.
- Both methods have seven steps.
- The sixth step is different: in the hdPS analysis, we we model the exposure (propensity score model), while in the hdDRS analysis, we model the outcome (disease risk score model).
21.1 Step 0: Analytic data
We will use the same NHANES data for this hdDRS demonstration as we did for the hdPS with a binary exposure and a binary outcome demonstration.
21.2 Step 6: Disease risk score (DRS)
Hansen (2008) shows that the DRS has a balancing property, called prognostic balance. Individuals sharing a similar DRS value can be regarded as having the same risk/prognosis for the outcome. In contrast, the propensity score has a covariate balance property.
21.2.1 Create DRS formula
With a binary outcome, there are two approaches to estimate the disease risk score (DRS):
- Fitting DRS model on unexposed individuals: Covariates include the investigator-specified and empirical covariates
- Fitting DRS model on the full cohort: Covariates include the exposure, investigator-specified and empirical covariates.
In this example, we will focus on fitting the DRS model on unexposed individuals:
$outcome <- as.numeric(I(hdps.data$diabetes=='Yes'))
hdps.data<- names(out3$autoselected_covariate_df[,-1])
proxy.list.sel <- paste0(proxy.list.sel, collapse = "+")
proxyform <- paste0(investigator.specified.covariates, collapse = " + ") covform
<- paste0(c(covform, proxyform), collapse = "+")
rhsformula <- as.formula(paste0("outcome", "~", rhsformula))
drs.formula
drs.formula#> outcome ~ age.cat + sex + education + race + marital + income +
#> born + year + diabetes.family.history + medical.access +
#> smoking + diet.healthy + physical.activity + sleep + uric.acid +
#> protein.total + bilirubin.total + phosphorus + sodium + potassium +
#> globulin + calcium.total + systolicBP + diastolicBP + high.cholesterol +
#> rec_dx_I10_once + rec_dx_R73_once + rec_dx_I10_frequent +
#> rec_dx_R60_once + rec_dx_E78_once + rec_dx_M79_once + rec_dx_I51_once +
#> rec_dx_M10_once + rec_dx_I50_once + rec_dx_K21_once + rec_dx_D75_once +
#> rec_dx_Z79_once + rec_dx_F41_once + rec_dx_M1A_once + rec_dx_E87_once +
#> rec_dx_R12_once + rec_dx_R51_once + rec_dx_J45_once + rec_dx_I50_frequent +
#> rec_dx_L70_once + rec_dx_M25_once + rec_dx_I63_once + rec_dx_R39_once +
#> rec_dx_N28_once + rec_dx_K25_once + rec_dx_F90_once + rec_dx_B00_once +
#> rec_dx_J42_once + rec_dx_R41_once + rec_dx_I20_once + rec_dx_M54_once +
#> rec_dx_J44_once + rec_dx_K08_once + rec_dx_I21_once + rec_dx_F32_once +
#> rec_dx_J30_once + rec_dx_F43_once + rec_dx_R06_once + rec_dx_I48_once +
#> rec_dx_R32_once + rec_dx_R42_once + rec_dx_N92_once + rec_dx_N95_once +
#> rec_dx_M19_once + rec_dx_E07_once + rec_dx_R25_once + rec_dx_G43_once +
#> rec_dx_R52_once + rec_dx_M81_once + rec_dx_T78_once + rec_dx_G47_once +
#> rec_dx_R11_once + rec_dx_B35_once + rec_dx_M06_once + rec_dx_E04_once +
#> rec_dx_H40_frequent + rec_dx_T14_once + rec_dx_C50_once +
#> rec_dx_H40_once + rec_dx_N32_once + rec_dx_A49_once + rec_dx_J45_frequent +
#> rec_dx_N39_once + rec_dx_R09_once + rec_dx_N94_once + rec_dx_F39_once +
#> rec_dx_E03_once + rec_dx_K59_once + rec_dx_F31_once + rec_dx_L40_once +
#> rec_dx_M06_frequent + rec_dx_M13_once + rec_dx_I48_frequent +
#> rec_dx_G25_once + rec_dx_F31_frequent + rec_dx_M62_once +
#> rec_dx_K92_once + rec_dx_R10_once + rec_dx_G40_once + rec_dx_G40_frequent +
#> rec_dx_J44_frequent + rec_dx_K04_once + rec_dx_I49_once +
#> rec_dx_R00_once + rec_dx_R07_once + rec_dx_H04_once + rec_dx_K27_once +
#> rec_dx_R05_once + rec_dx_K30_once + rec_dx_N40_once + rec_dx_R35_once
21.2.2 Unexposed cohort
# Unexposed cohort
<- subset(hdps.data, obese == "No") dat.unexposed
21.2.3 Fit DRS model
Here we fit a logistic regression model on the unexposed individuals. Similar to the hdPS example, we are adding only the main effects in the non-transformed form.
<- glm(drs.formula, data = dat.unexposed, family = binomial) fit.drs
21.2.4 Obtain DRS
Now we can obtain the DRS for the full cohort and check the summary. Unlike in hdPS, balance checking is difficult in hdDRS analysis. There are some proposed matching and weighting techniques with hdDRS where balance checking is possible. These techniques are developed/proposed with only investigator-specified covariates. In this demonstration, we adjust our outcome model for deciles of the DRS, which is a common practice in the hdDRS literature.
$drs <- predict(fit.drs, type = "response", newdata = hdps.data)
hdps.data
# Sumamry
summary(hdps.data$drs)
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 0.00000 0.01255 0.06931 0.19362 0.26619 0.99998
# Summary by exposure status
tapply(hdps.data$drs, hdps.data$obese, summary)
#> $No
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 0.00000 0.01281 0.06271 0.17814 0.23135 0.99989
#>
#> $Yes
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 0.00000 0.01186 0.07603 0.21491 0.31354 0.99998
21.3 Step 7: Association
21.3.1 Adjtsing for deciles of DRS
# Deciles of DRS
$drs.decile <- as.factor(dplyr::ntile(hdps.data$drs, 10))
hdps.data
# Outcome analysis
<- glm(outcome ~ obese + drs.decile,
fit.hddrs data = hdps.data,
family = binomial)
publish(fit.hddrs, pvalue.method = "robust", confint.method = "robust",
print = F)$regressionTable[1:2,]
21.3.2 Adjtsing for deciles of DRS and covariates
Another popular approach is to adjust for deciles of the DRS as well as for the investigator-specified covariates:
# Outcome analysis
<- glm(outcome ~ obese + drs.decile + age.cat + sex + education +
fit.hddrs1 + marital + income + born + year +
race + medical.access + smoking +
diabetes.family.history + physical.activity + sleep + uric.acid +
diet.healthy + bilirubin.total + phosphorus + sodium +
protein.total + globulin + calcium.total + systolicBP +
potassium + high.cholesterol, data = hdps.data,
diastolicBP family = binomial)
publish(fit.hddrs1, pvalue.method = "robust", confint.method = "robust",
print = F)$regressionTable[1:2,]