Methods talk

TI Methods Speaker Series in collaboration with the Student and Recent Graduate Committee (SARGC) of the Statistical Society of Canada.

About the topic

Propensity score-based methods are widely used in analyzing observational datasets to reduce the impact of confounding due to observed covariates. This Webinar will provide a basic overview of popular Propensity Score approaches, analysis steps explained in R, best practices, and reporting guidelines. The prerequisites are knowledge of multiple regression analysis, and working knowledge of R. Background in propensity score analysis or causal inference is not required.

Software Requirements

It is assumed that you have the following software packages installed. Webinar does not provide any installation support. Note that, working on software during the webinar is not mandatory. But if the participant like, they are welcome to browse through the webinar slides in their own laptop.

References

Sample Data Source

rhc <- read.csv("http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/rhc.csv")
# Change the Age variable into categories  below 50, [50,60), [60,70), [70,80), above 80
# categorizing a continuous variable is not recommended.
rhc$age <- cut(rhc$age,breaks=c(-Inf, 50, 60, 70, 80, Inf),right=FALSE)
# Re-order the levels of race to white, black and other
rhc$race <- factor(rhc$race, levels=c("white","black","other"))
# merging disease categories
rhc$cat1 <- as.character(rhc$cat1)
rhc$cat1[rhc$cat1 == "Lung Cancer"] <- "Other"
rhc$cat1[rhc$cat1 == "COPD"] <- "Other"
rhc$cat1[rhc$cat1 == "Coma"] <- "Other"
rhc$cat1[rhc$cat1 == "Cirrhosis"] <- "Other"
rhc$cat1[rhc$cat1 == "Colon Cancer"] <- "Other"
rhc$cat1[rhc$cat1 == "MOSF w/Malignancy"] <- "MOSF"
rhc$cat1[rhc$cat1 == "MOSF w/Sepsis"] <- "MOSF"
rhc$cat1 <- as.factor(rhc$cat1)
# Change the baseline for gender to Male
rhc$sex <- as.factor(rhc$sex)
rhc$sex <- relevel(rhc$sex, ref = "Male")
# Regroup the levels for disease categories to "ARF","CHF","MOSF","Other".
levels(rhc$ca) <- c("Metastatic","None","Localized (Yes)")
# Rename the levels of "ca" (Cancer) to "Metastatic","None" and "Localized (Yes)" 
rhc$ca <- factor(rhc$ca, levels=c("None","Localized (Yes)","Metastatic"))
# re-order the levels to "None","Localized (Yes)" and "Metastatic"
rhc$ca <- factor(rhc$ca, levels=c("None","Localized (Yes)","Metastatic"))
# create a new variable called "numcom" to count number of comorbidities illness for each person  (12 categories)
rhc$numcom <- rhc$cardiohx + rhc$chfhx + rhc$dementhx + rhc$psychhx + 
  rhc$chrpulhx + rhc$renalhx + rhc$liverhx + rhc$gibledhx + rhc$malighx + 
  rhc$immunhx + rhc$transhx +rhc$amihx
rhc2 <- rhc[c("age","sex", "race","cat1", "ca", "dnr1", "aps1",
              "surv2md1","numcom","adld3p","das2d3pc","temp1",
              "hrt1","meanbp1","resp1","wblc1","pafi1","paco21",
              "ph1","crea1","alb1","scoma1","swang1", "death")]
names(rhc2) <- c("age","sex", "race","Disease.category", "Cancer", 
                 "DNR.status", "APACHE.III.score", "Pr.2mo.survival",
                 "No.of.comorbidity","ADLs.2wk.prior","DASI.2wk.prior",
                 "Temperature","Heart.rate","Blood.pressure",
                 "Respiratory.rate","WBC.count","PaO2.by.FIO2","PaCO2",
                 "pH","Creatinine","Albumin","GComa.Score","RHC", "Death")
dim(rhc2)
rhc2$age <- factor(rhc2$age, levels = c("[-Inf,50)","[50,60)","[60,70)",
                                        "[70,80)","[80, Inf)"), 
                   ordered = TRUE)
levels(rhc2$age)
# Assess missing values
require(DataExplorer)
plot_missing(rhc2) 
# simplifying
rhc2$ADLs.2wk.prior <- NULL
rhc2$Cancer <- NULL
analytic.data0 <- rhc2 
rm(rhc2)
dim(analytic.data0)
table(analytic.data0$RHC)
table(analytic.data0$Death)
# inducing some bias in the study!!
analytic.data0$ID <- 1:nrow(analytic.data0)
# Younger age and no treated and did not survive
id1 <- analytic.data0$ID[analytic.data0$RHC!="RHC" & analytic.data0$age =="[-Inf,50)" & analytic.data0$Death=="Yes"]
# Female and not treated and did not survive
id2 <- analytic.data0$ID[analytic.data0$RHC!="RHC" & analytic.data0$sex !="Male" & analytic.data0$Death=="Yes"]
# Other race (other than white and black) and not treated and did not survive
id3 <- analytic.data0$ID[analytic.data0$RHC!="RHC" & analytic.data0$race =="other" & analytic.data0$Death=="Yes"]
# Abnormal heart rate and not treated and did not survive
id4 <- analytic.data0$ID[analytic.data0$RHC!="RHC" & analytic.data0$Heart.rate < 70 & analytic.data0$Heart.rate > 110 & analytic.data0$Death=="Yes"]
idx <- unique(c(id1,id2,id3,id4))
length(idx)
set.seed(123)
# take a random sample of the above group
exclude.id <- sample(idx, 
                     size = round(length(idx)*3/4), 
                     replace = FALSE)
head(sort(exclude.id))
# exclude the selected sample from the analytic data
analytic.data <- analytic.data0[ !analytic.data0$ID %in% exclude.id, ]
head(sort(analytic.data$ID))
table(analytic.data$RHC)
table(analytic.data$Death)
dim(analytic.data)
# Saving the data for later use
saveRDS(analytic.data, "/data/RHC.Rds")