Section 2 A Real data: Box 14: Initial data setup

# add column for outcome Y: length of stay 
# Y = date of discharge - study admission date
# Y = date of death - study admission date if date of discharge not available
ObsData <- read.csv("https://hbiostat.org/data/repo/rhc.csv", header = TRUE)
# ObsData <- read.csv("rhc.csv", header = TRUE) # replace with your file path
ObsData$Y <- ObsData$dschdte - ObsData$sadmdte
ObsData$Y[is.na(ObsData$Y)] <- ObsData$dthdte[is.na(ObsData$Y)] - 
  ObsData$sadmdte[is.na(ObsData$Y)]

# remove outcomes we are not examining in this example
ObsData <- dplyr::select(ObsData, 
!c(dthdte,lstctdte,dschdte,death,t3d30,dth30,surv2md1))
# remove unnecessary and problematic variables 
ObsData <- dplyr::select(ObsData,
   !c(sadmdte,ptid,X,adld3p,urin1,cat2))

# convert all categorical variables to factors 
factors <- c("cat1", "ca", "cardiohx", "chfhx", "dementhx", "psychhx", 
             "chrpulhx", "renalhx", "liverhx", "gibledhx", "malighx", 
             "immunhx", "transhx", "amihx", "sex", "dnr1", "ninsclas", 
             "resp", "card", "neuro", "gastr", "renal", "meta", "hema", 
             "seps", "trauma", "ortho", "race", "income")
ObsData[factors] <- lapply(ObsData[factors], as.factor)
# convert our treatment A (RHC vs. No RHC) to a binary variable
ObsData$A <- ifelse(ObsData$swang1 == "RHC", 1, 0)
ObsData <- dplyr::select(ObsData, !swang1)
# categorize the variables to match with the original paper
ObsData$age <- cut(ObsData$age,breaks=c(-Inf, 50, 60, 70, 80, Inf),right=FALSE)
ObsData$race <- factor(ObsData$race, levels=c("white","black","other"))
ObsData$sex <- as.factor(ObsData$sex)
ObsData$sex <- relevel(ObsData$sex, ref = "Male")
ObsData$cat1 <- as.factor(ObsData$cat1)
levels(ObsData$cat1) <- c("ARF","CHF","Other","Other","Other",
                          "Other","Other","MOSF","MOSF")
ObsData$ca <- as.factor(ObsData$ca)
levels(ObsData$ca) <- c("Metastatic","None","Localized (Yes)")
ObsData$ca <- factor(ObsData$ca, levels=c("None", "Localized (Yes)", "Metastatic"))
# rename variables
names(ObsData) <- c("Disease.category", "Cancer", "Cardiovascular", 
                    "Congestive.HF", "Dementia", "Psychiatric", "Pulmonary", 
                    "Renal", "Hepatic", "GI.Bleed", "Tumor", 
                    "Immunosuppression", "Transfer.hx", "MI", "age", "sex", 
                    "edu", "DASIndex", "APACHE.score", "Glasgow.Coma.Score", 
                    "blood.pressure", "WBC", "Heart.rate", "Respiratory.rate", 
                    "Temperature", "PaO2vs.FIO2", "Albumin", "Hematocrit", 
                    "Bilirubin", "Creatinine", "Sodium", "Potassium", "PaCo2", 
                    "PH", "Weight", "DNR.status", "Medical.insurance", 
                    "Respiratory.Diag", "Cardiovascular.Diag", 
                    "Neurological.Diag", "Gastrointestinal.Diag", "Renal.Diag", 
                    "Metabolic.Diag", "Hematologic.Diag", "Sepsis.Diag", 
                    "Trauma.Diag", "Orthopedic.Diag", "race", "income", 
                    "Y", "A")
require(tableone)
CreateTableOne(data = ObsData, strata = "A")
FALSE                                  Stratified by A
FALSE                                   0               1               p      test
FALSE   n                                 3551            2184                     
FALSE   Disease.category (%)                                            <0.001     
FALSE      ARF                            1581 (44.5)      909 (41.6)              
FALSE      CHF                             247 ( 7.0)      209 ( 9.6)              
FALSE      Other                           955 (26.9)      208 ( 9.5)              
FALSE      MOSF                            768 (21.6)      858 (39.3)              
FALSE   Cancer (%)                                                       0.001     
FALSE      None                           2652 (74.7)     1727 (79.1)              
FALSE      Localized (Yes)                 638 (18.0)      334 (15.3)              
FALSE      Metastatic                      261 ( 7.4)      123 ( 5.6)              
FALSE   Cardiovascular = 1 (%)             567 (16.0)      446 (20.4)   <0.001     
FALSE   Congestive.HF = 1 (%)              596 (16.8)      425 (19.5)    0.011     
FALSE   Dementia = 1 (%)                   413 (11.6)      151 ( 6.9)   <0.001     
FALSE   Psychiatric = 1 (%)                286 ( 8.1)      100 ( 4.6)   <0.001     
FALSE   Pulmonary = 1 (%)                  774 (21.8)      315 (14.4)   <0.001     
FALSE   Renal = 1 (%)                      149 ( 4.2)      106 ( 4.9)    0.268     
FALSE   Hepatic = 1 (%)                    265 ( 7.5)      136 ( 6.2)    0.084     
FALSE   GI.Bleed = 1 (%)                   131 ( 3.7)       54 ( 2.5)    0.014     
FALSE   Tumor = 1 (%)                      872 (24.6)      444 (20.3)   <0.001     
FALSE   Immunosuppression = 1 (%)          907 (25.5)      636 (29.1)    0.003     
FALSE   Transfer.hx = 1 (%)                335 ( 9.4)      327 (15.0)   <0.001     
FALSE   MI = 1 (%)                         105 ( 3.0)       95 ( 4.3)    0.007     
FALSE   age (%)                                                         <0.001     
FALSE      [-Inf,50)                       884 (24.9)      540 (24.7)              
FALSE      [50,60)                         546 (15.4)      371 (17.0)              
FALSE      [60,70)                         812 (22.9)      577 (26.4)              
FALSE      [70,80)                         809 (22.8)      529 (24.2)              
FALSE      [80, Inf)                       500 (14.1)      167 ( 7.6)              
FALSE   sex = Female (%)                  1637 (46.1)      906 (41.5)    0.001     
FALSE   edu (mean (SD))                  11.57 (3.13)    11.86 (3.16)    0.001     
FALSE   DASIndex (mean (SD))             20.37 (5.48)    20.70 (5.03)    0.023     
FALSE   APACHE.score (mean (SD))         50.93 (18.81)   60.74 (20.27)  <0.001     
FALSE   Glasgow.Coma.Score (mean (SD))   22.25 (31.37)   18.97 (28.26)  <0.001     
FALSE   blood.pressure (mean (SD))       84.87 (38.87)   68.20 (34.24)  <0.001     
FALSE   WBC (mean (SD))                  15.26 (11.41)   16.27 (12.55)   0.002     
FALSE   Heart.rate (mean (SD))          112.87 (40.94)  118.93 (41.47)  <0.001     
FALSE   Respiratory.rate (mean (SD))     28.98 (13.95)   26.65 (14.17)  <0.001     
FALSE   Temperature (mean (SD))          37.63 (1.74)    37.59 (1.83)    0.429     
FALSE   PaO2vs.FIO2 (mean (SD))         240.63 (116.66) 192.43 (105.54) <0.001     
FALSE   Albumin (mean (SD))               3.16 (0.67)     2.98 (0.93)   <0.001     
FALSE   Hematocrit (mean (SD))           32.70 (8.79)    30.51 (7.42)   <0.001     
FALSE   Bilirubin (mean (SD))             2.00 (4.43)     2.71 (5.33)   <0.001     
FALSE   Creatinine (mean (SD))            1.92 (2.03)     2.47 (2.05)   <0.001     
FALSE   Sodium (mean (SD))              137.04 (7.68)   136.33 (7.60)    0.001     
FALSE   Potassium (mean (SD))             4.08 (1.04)     4.05 (1.01)    0.321     
FALSE   PaCo2 (mean (SD))                39.95 (14.24)   36.79 (10.97)  <0.001     
FALSE   PH (mean (SD))                    7.39 (0.11)     7.38 (0.11)   <0.001     
FALSE   Weight (mean (SD))               65.04 (29.50)   72.36 (27.73)  <0.001     
FALSE   DNR.status = Yes (%)               499 (14.1)      155 ( 7.1)   <0.001     
FALSE   Medical.insurance (%)                                           <0.001     
FALSE      Medicaid                        454 (12.8)      193 ( 8.8)              
FALSE      Medicare                        947 (26.7)      511 (23.4)              
FALSE      Medicare & Medicaid             251 ( 7.1)      123 ( 5.6)              
FALSE      No insurance                    186 ( 5.2)      136 ( 6.2)              
FALSE      Private                         967 (27.2)      731 (33.5)              
FALSE      Private & Medicare              746 (21.0)      490 (22.4)              
FALSE   Respiratory.Diag = Yes (%)        1481 (41.7)      632 (28.9)   <0.001     
FALSE   Cardiovascular.Diag = Yes (%)     1007 (28.4)      924 (42.3)   <0.001     
FALSE   Neurological.Diag = Yes (%)        575 (16.2)      118 ( 5.4)   <0.001     
FALSE   Gastrointestinal.Diag = Yes (%)    522 (14.7)      420 (19.2)   <0.001     
FALSE   Renal.Diag = Yes (%)               147 ( 4.1)      148 ( 6.8)   <0.001     
FALSE   Metabolic.Diag = Yes (%)           172 ( 4.8)       93 ( 4.3)    0.337     
FALSE   Hematologic.Diag = Yes (%)         239 ( 6.7)      115 ( 5.3)    0.029     
FALSE   Sepsis.Diag = Yes (%)              515 (14.5)      516 (23.6)   <0.001     
FALSE   Trauma.Diag = Yes (%)               18 ( 0.5)       34 ( 1.6)   <0.001     
FALSE   Orthopedic.Diag = Yes (%)            3 ( 0.1)        4 ( 0.2)    0.516     
FALSE   race (%)                                                         0.425     
FALSE      white                          2753 (77.5)     1707 (78.2)              
FALSE      black                           585 (16.5)      335 (15.3)              
FALSE      other                           213 ( 6.0)      142 ( 6.5)              
FALSE   income (%)                                                      <0.001     
FALSE      $11-$25k                        713 (20.1)      452 (20.7)              
FALSE      $25-$50k                        500 (14.1)      393 (18.0)              
FALSE      > $50k                          257 ( 7.2)      194 ( 8.9)              
FALSE      Under $11k                     2081 (58.6)     1145 (52.4)              
FALSE   Y (mean (SD))                    19.53 (23.59)   24.86 (28.90)  <0.001     
FALSE   A (mean (SD))                     0.00 (0.00)     1.00 (0.00)   <0.001