CCHS: Regression

This tutorial is for a complex data analysis, specifically using regression techniques to analyze survey data.

Let us load necessary R packages for the analysis:

# Load required packages
library(survey)
library(knitr)
library(car)
library(tableone)
library(DataExplorer)
library(Publish)
library(ROCR)
library(WeightedROC)
library(jtools)
library(MASS)

Load data

Loads a dataset and provides some quick data checks, like the dimensions and summary of weights.

load("Data/surveydata/cchs123w.RData")
ls()
#> [1] "analytic.miss"   "analytic2"       "has_annotations" "w.design"
dim(analytic.miss)
#> [1] 397173     23
dim(analytic2)
#> [1] 185613     22
summary(weights(w.design))
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>    0.39   23.85   45.98   71.54   87.30 2384.98

Logistic for complex survey

Performs a simple logistic regression using the complex survey data, focusing on the relationship between cardiovascular disease and osteoarthritis.

formula0 <- as.formula(I(CVD=="event") ~ OA)

## Crude regression
fit2 <- svyglm(formula0, 
              design = w.design, 
              family = binomial(logit))
require(Publish)
publish(fit2)
#>  Variable   Units OddsRatio       CI.95 p-value 
#>        OA Control       Ref                     
#>                OA      4.52 [4.19;4.87]  <1e-04

Multivariable analysis

Runs a more complex logistic regression model, adding multiple covariates to better understand the relationship.

formula1 <- as.formula(I(CVD=="event") ~ OA + age + sex + married + race + 
              edu + income + bmi + phyact + doctor + stress + 
              smoke + drink + fruit + bp + diab + province + immigrate)

fit3 <- svyglm(formula1, 
              design = w.design, 
              family = binomial(logit))
publish(fit3)
#>   Variable             Units OddsRatio         CI.95     p-value 
#>         OA           Control       Ref                           
#>                           OA      1.52   [1.40;1.66]     < 1e-04 
#>        age       20-29 years       Ref                           
#>                  30-39 years      1.29   [0.98;1.69]   0.0707636 
#>                  40-49 years      2.74   [2.17;3.47]     < 1e-04 
#>                  50-59 years      6.24   [4.97;7.83]     < 1e-04 
#>                  60-64 years      9.71  [7.68;12.29]     < 1e-04 
#>            65 years and over     15.85 [12.57;20.00]     < 1e-04 
#>                         teen      0.46   [0.20;1.07]   0.0707295 
#>        sex            Female       Ref                           
#>                         Male      1.73   [1.60;1.88]     < 1e-04 
#>    married        not single       Ref                           
#>                       single      0.97   [0.90;1.05]   0.5209444 
#>       race         Non-white       Ref                           
#>                        White      1.44   [1.19;1.75]   0.0002219 
#>        edu          < 2ndary       Ref                           
#>                    2nd grad.      0.90   [0.80;1.00]   0.0512330 
#>              Other 2nd grad.      0.97   [0.83;1.13]   0.6737665 
#>               Post-2nd grad.      0.93   [0.85;1.02]   0.1016562 
#>     income   $29,999 or less       Ref                           
#>              $30,000-$49,999      0.74   [0.67;0.81]     < 1e-04 
#>              $50,000-$79,999      0.64   [0.58;0.72]     < 1e-04 
#>              $80,000 or more      0.58   [0.51;0.66]     < 1e-04 
#>        bmi       Underweight       Ref                           
#>               healthy weight      0.86   [0.67;1.10]   0.2350526 
#>                   Overweight      0.88   [0.69;1.12]   0.3033213 
#>     phyact            Active       Ref                           
#>                     Inactive      1.21   [1.10;1.34]   0.0001345 
#>                     Moderate      1.08   [0.97;1.21]   0.1771985 
#>     doctor                No       Ref                           
#>                          Yes      1.75   [1.49;2.06]     < 1e-04 
#>     stress  Not too stressed       Ref                           
#>                     stressed      1.30   [1.18;1.42]     < 1e-04 
#>      smoke      Never smoker       Ref                           
#>               Current smoker      1.18   [1.05;1.32]   0.0050518 
#>                Former smoker      1.21   [1.11;1.33]     < 1e-04 
#>      drink       Never drank       Ref                           
#>              Current drinker      0.82   [0.68;0.98]   0.0290605 
#>                Former driker      1.13   [0.93;1.36]   0.2133779 
#>      fruit 0-3 daily serving       Ref                           
#>            4-6 daily serving      0.94   [0.86;1.03]   0.1758214 
#>             6+ daily serving      1.09   [0.97;1.23]   0.1311029 
#>         bp                No       Ref                           
#>                          Yes      2.35   [2.16;2.55]     < 1e-04 
#>       diab                No       Ref                           
#>                          Yes      1.86   [1.66;2.07]     < 1e-04 
#>   province             South       Ref                           
#>                        North      1.21   [0.90;1.62]   0.2103030 
#>  immigrate     not immigrant       Ref                           
#>                   > 10 years      1.02   [0.89;1.16]   0.8057243 
#>                       recent      1.06   [0.73;1.53]   0.7651069

Model fit assessment

Variability explained

Pseudo-R-square values indicate how much of the total variability in the outcomes is explainable by the fitted model (analogous to R-square). For a continuous outcome, we can compute R-square, while R-square cannot be calculated when the outcome variable is categorical, nominal or ordinal. We use pseudo-R-squared measures when the dependent variable is not continuous but a likelihood function is used to fit a model. Popular Pseudo-R-square measures are:

  1. Cox/Snell (never reaches max 1)
  2. Nagelkerke R-square (scaled to max 1)
  • The larger Cox & Snell estimate is the better the model.
  • These Pseudo-R-square values should be interpreted with caution (if not ignored).
  • They offer little confidence in interpreting the model fit.
  • Survey weighted version of them are available.
  • Not trivial to decide which statistic to use under complex surveys.

Evaluates the model fit using Akaike Information Criterion (AIC) and pseudo R-squared metrics.

fit3 <- svyglm(formula1, 
              design = w.design, 
              family = quasibinomial(logit)) # publish does not work
AIC(fit3) 
#>        eff.p          AIC     deltabar 
#>    67.752064 45362.706032     2.053093

# AIC for survey weighted regressions
psrsq(fit3, method = "Cox-Snell")
#> [1] 0.06091896
psrsq(fit3, method = "Nagelkerke")
#> [1] 0.2307586
# Nagelkerke and Cox-Snell pseudo-rsquared statistics

Backward Elimination

  • Model comparisons
    • LRT-aprroximation
    • Wald-based

Checking one by one

Checks the significance of each variable one by one and removes those that are not statistically significant.

round(sort(summary(fit3)$coef[,"Pr(>|t|)"]),2)
#>            (Intercept)   age65 years and over                  bpYes 
#>                   0.00                   0.00                   0.00 
#>         age60-64 years         age50-59 years                sexMale 
#>                   0.00                   0.00                   0.00 
#>                diabYes                   OAOA  income$80,000 or more 
#>                   0.00                   0.00                   0.00 
#>         age40-49 years  income$50,000-$79,999              doctorYes 
#>                   0.00                   0.00                   0.00 
#>  income$30,000-$49,999         stressstressed     smokeFormer smoker 
#>                   0.00                   0.00                   0.00 
#>         phyactInactive              raceWhite    smokeCurrent smoker 
#>                   0.00                   0.00                   0.01 
#>   drinkCurrent drinker           edu2nd grad.                ageteen 
#>                   0.03                   0.05                   0.07 
#>         age30-39 years      eduPost-2nd grad.  fruit6+ daily serving 
#>                   0.07                   0.10                   0.13 
#> fruit4-6 daily serving         phyactModerate          provinceNorth 
#>                   0.18                   0.18                   0.21 
#>     drinkFormer driker      bmihealthy weight          bmiOverweight 
#>                   0.21                   0.24                   0.30 
#>          marriedsingle     eduOther 2nd grad.        immigraterecent 
#>                   0.52                   0.67                   0.77 
#>    immigrate> 10 years 
#>                   0.81
# bmiOverweight is associated with largest p-value
# but what about other categories?

regTermTest(fit3,~bmi) # coef of all bmi cat = 0
#> Wald test for bmi
#>  in svyglm(formula = formula1, design = w.design, family = quasibinomial(logit))
#> F =  0.7591291  on  2  and  185579  df: p= 0.46808
fit4 <- update(fit3, .~. -bmi) 

anova(fit3, fit4)
#> Working (Rao-Scott+F) LRT for bmi
#>  in svyglm(formula = formula1, design = w.design, family = quasibinomial(logit))
#> Working 2logLR =  1.424634 p= 0.49071 
#> (scale factors:  1.1 0.93 );  denominator df= 185579
# high p-value (in both wald and Anova) makes it more likely that you should exclude bmi
AIC(fit3,fit4) 
#>         eff.p      AIC deltabar
#> [1,] 67.75206 45362.71 2.053093
#> [2,] 64.30460 45358.26 2.074342
round(sort(summary(fit4)$coef[,"Pr(>|t|)"]),2)
#>            (Intercept)   age65 years and over                  bpYes 
#>                   0.00                   0.00                   0.00 
#>         age60-64 years         age50-59 years                sexMale 
#>                   0.00                   0.00                   0.00 
#>                diabYes                   OAOA  income$80,000 or more 
#>                   0.00                   0.00                   0.00 
#>         age40-49 years  income$50,000-$79,999              doctorYes 
#>                   0.00                   0.00                   0.00 
#>  income$30,000-$49,999         stressstressed     smokeFormer smoker 
#>                   0.00                   0.00                   0.00 
#>         phyactInactive              raceWhite    smokeCurrent smoker 
#>                   0.00                   0.00                   0.00 
#>   drinkCurrent drinker           edu2nd grad.         age30-39 years 
#>                   0.03                   0.05                   0.07 
#>                ageteen      eduPost-2nd grad.  fruit6+ daily serving 
#>                   0.07                   0.10                   0.13 
#> fruit4-6 daily serving         phyactModerate          provinceNorth 
#>                   0.17                   0.17                   0.21 
#>     drinkFormer driker          marriedsingle     eduOther 2nd grad. 
#>                   0.21                   0.53                   0.67 
#>        immigraterecent    immigrate> 10 years 
#>                   0.76                   0.81

Using AIC to automate

Uses stepwise regression guided by AIC to automatically select the most important variables.

require(MASS)
formula1b <- as.formula(I(CVD=="event") ~ OA + age + sex)
fit1b <- svyglm(formula1b, 
              design = w.design, 
              family = binomial(logit))
fit5 <- stepAIC(fit1b, direction = "backward")
#> Start:  AIC=47384.51
#> I(CVD == "event") ~ OA + age + sex
#> 
#>        Df Deviance   AIC
#> <none>       47353 47385
#> - sex   1    47634 47658
#> - OA    1    47679 47702
#> - age   6    54414 54291
publish(fit5)
#>  Variable             Units OddsRatio         CI.95   p-value 
#>        OA           Control       Ref                         
#>                          OA      1.81   [1.66;1.97]   < 1e-04 
#>       age       20-29 years       Ref                         
#>                 30-39 years      1.40   [1.07;1.84]   0.01374 
#>                 40-49 years      3.39   [2.69;4.27]   < 1e-04 
#>                 50-59 years      9.42  [7.55;11.76]   < 1e-04 
#>                 60-64 years     17.78 [14.22;22.23]   < 1e-04 
#>           65 years and over     33.82 [27.26;41.97]   < 1e-04 
#>                        teen      0.45   [0.20;1.02]   0.05462 
#>       sex            Female       Ref                         
#>                        Male      1.57   [1.46;1.69]   < 1e-04
round(sort(summary(fit5)$coef[,"Pr(>|t|)"]),2)
#>          (Intercept) age65 years and over       age60-64 years 
#>                 0.00                 0.00                 0.00 
#>       age50-59 years                 OAOA              sexMale 
#>                 0.00                 0.00                 0.00 
#>       age40-49 years       age30-39 years              ageteen 
#>                 0.00                 0.01                 0.05

Using AIC, but keeping importants

Similar to the previous step but ensures certain important variables remain in the model.

formula1c <- as.formula(I(CVD=="event") ~ OA + age + sex + married + race +
                         edu + income + bmi + phyact + fruit + bp + diab + 
                         doctor + stress + smoke + drink + province + immigrate)
scope <- list(upper = ~ OA + age + sex + married + race +
                         edu + income + bmi + phyact + fruit + bp + diab + 
                         doctor + stress + smoke + drink + province + immigrate,
              lower = ~ OA + age + sex + married + race +
                         edu + income + bmi + phyact + fruit + bp + diab)

fit1c <- svyglm(formula1c, design = w.design, family = binomial(logit))

fitstep <- step(fit1c, scope = scope, trace = FALSE, k = 2, direction = "backward")
# k = 2 gives the genuine AIC
publish(fitstep)
#>  Variable             Units OddsRatio         CI.95     p-value 
#>        OA           Control       Ref                           
#>                          OA      1.52   [1.40;1.66]     < 1e-04 
#>       age       20-29 years       Ref                           
#>                 30-39 years      1.29   [0.98;1.70]   0.0697699 
#>                 40-49 years      2.74   [2.17;3.47]     < 1e-04 
#>                 50-59 years      6.24   [4.97;7.83]     < 1e-04 
#>                 60-64 years      9.71  [7.68;12.28]     < 1e-04 
#>           65 years and over     15.85 [12.57;19.98]     < 1e-04 
#>                        teen      0.46   [0.20;1.07]   0.0705660 
#>       sex            Female       Ref                           
#>                        Male      1.73   [1.60;1.88]     < 1e-04 
#>   married        not single       Ref                           
#>                      single      0.97   [0.90;1.05]   0.5042496 
#>      race         Non-white       Ref                           
#>                       White      1.41   [1.18;1.70]   0.0001986 
#>       edu          < 2ndary       Ref                           
#>                   2nd grad.      0.90   [0.80;1.00]   0.0524940 
#>             Other 2nd grad.      0.97   [0.83;1.13]   0.6732446 
#>              Post-2nd grad.      0.93   [0.85;1.02]   0.1045975 
#>    income   $29,999 or less       Ref                           
#>             $30,000-$49,999      0.74   [0.67;0.81]     < 1e-04 
#>             $50,000-$79,999      0.64   [0.58;0.72]     < 1e-04 
#>             $80,000 or more      0.58   [0.51;0.66]     < 1e-04 
#>       bmi       Underweight       Ref                           
#>              healthy weight      0.86   [0.67;1.10]   0.2316915 
#>                  Overweight      0.88   [0.69;1.12]   0.2982852 
#>    phyact            Active       Ref                           
#>                    Inactive      1.22   [1.10;1.34]   0.0001227 
#>                    Moderate      1.08   [0.97;1.21]   0.1754422 
#>     fruit 0-3 daily serving       Ref                           
#>           4-6 daily serving      0.94   [0.86;1.03]   0.1807666 
#>            6+ daily serving      1.09   [0.97;1.23]   0.1295281 
#>        bp                No       Ref                           
#>                         Yes      2.35   [2.16;2.55]     < 1e-04 
#>      diab                No       Ref                           
#>                         Yes      1.85   [1.66;2.07]     < 1e-04 
#>    doctor                No       Ref                           
#>                         Yes      1.75   [1.49;2.05]     < 1e-04 
#>    stress  Not too stressed       Ref                           
#>                    stressed      1.30   [1.18;1.42]     < 1e-04 
#>     smoke      Never smoker       Ref                           
#>              Current smoker      1.17   [1.05;1.31]   0.0053412 
#>               Former smoker      1.21   [1.10;1.33]     < 1e-04 
#>     drink       Never drank       Ref                           
#>             Current drinker      0.82   [0.68;0.98]   0.0254942 
#>               Former driker      1.12   [0.93;1.36]   0.2205315
round(sort(summary(fitstep)$coef[,"Pr(>|t|)"]),2)
#>            (Intercept)   age65 years and over                  bpYes 
#>                   0.00                   0.00                   0.00 
#>         age60-64 years         age50-59 years                sexMale 
#>                   0.00                   0.00                   0.00 
#>                diabYes                   OAOA  income$80,000 or more 
#>                   0.00                   0.00                   0.00 
#>         age40-49 years  income$50,000-$79,999              doctorYes 
#>                   0.00                   0.00                   0.00 
#>  income$30,000-$49,999         stressstressed     smokeFormer smoker 
#>                   0.00                   0.00                   0.00 
#>         phyactInactive              raceWhite    smokeCurrent smoker 
#>                   0.00                   0.00                   0.01 
#>   drinkCurrent drinker           edu2nd grad.         age30-39 years 
#>                   0.03                   0.05                   0.07 
#>                ageteen      eduPost-2nd grad.  fruit6+ daily serving 
#>                   0.07                   0.10                   0.13 
#>         phyactModerate fruit4-6 daily serving     drinkFormer driker 
#>                   0.18                   0.18                   0.22 
#>      bmihealthy weight          bmiOverweight          marriedsingle 
#>                   0.23                   0.30                   0.50 
#>     eduOther 2nd grad. 
#>                   0.67

Assess interactions

Check biologically interesting ones.

Check one by one

Checks if there is a significant interaction effect between ‘age’ and ‘sex’.

fit8a <- update(fitstep, .~. + interaction(age,sex))
anova(fitstep, fit8a) # keep interaction
#> Working (Rao-Scott+F) LRT for interaction(age, sex)
#>  in svyglm(formula = I(CVD == "event") ~ OA + age + sex + married + 
#>     race + edu + income + bmi + phyact + fruit + bp + diab + 
#>     doctor + stress + smoke + drink + interaction(age, sex), 
#>     design = w.design, family = binomial(logit))
#> Working 2logLR =  40.16528 p= 1.2167e-06 
#> (scale factors:  1.3 1.2 1.2 0.93 0.78 0.71 );  denominator df= 185576

Checks if there is a significant interaction effect between ‘sex’ and ‘diabetes’.

fit8b <- update(fitstep, .~. + interaction(sex,diab))
anova(fitstep, fit8b) # Do not keep this interaction
#> Working (Rao-Scott+F) LRT for interaction(sex, diab)
#>  in svyglm(formula = I(CVD == "event") ~ OA + age + sex + married + 
#>     race + edu + income + bmi + phyact + fruit + bp + diab + 
#>     doctor + stress + smoke + drink + interaction(sex, diab), 
#>     design = w.design, family = binomial(logit))
#> Working 2logLR =  0.4591456 p= 0.49597 
#> df=1;  denominator df= 185581

Checks if there is a significant interaction effect between ‘BMI’ and ‘diabetes’.

fit8c <- update(fitstep, .~. + interaction(bmi,diab))
anova(fitstep, fit8c) # keep this interaction
#> Working (Rao-Scott+F) LRT for interaction(bmi, diab)
#>  in svyglm(formula = I(CVD == "event") ~ OA + age + sex + married + 
#>     race + edu + income + bmi + phyact + fruit + bp + diab + 
#>     doctor + stress + smoke + drink + interaction(bmi, diab), 
#>     design = w.design, family = binomial(logit))
#> Working 2logLR =  7.92727 p= 0.02533 
#> (scale factors:  1.4 0.6 );  denominator df= 185580
Add all significant interactions in 1 model

Updates the model to include significant interaction terms.

Note that we have 0 effect modifier, 2 interactions

fit9 <- update(fitstep, .~. + interaction(age,sex) + interaction(bmi,diab))
require(jtools)
summ(fit9, confint = TRUE, digits = 3)
Observations 185613
Dependent variable I(CVD == "event")
Type Survey-weighted generalized linear model
Family binomial
Link logit
Pseudo-R² (Cragg-Uhler) 0.233
Pseudo-R² (McFadden) 0.207
AIC 41548.160
Est. 2.5% 97.5% t val. p
(Intercept) -5.590 -6.046 -5.135 -24.069 0.000
OAOA 0.438 0.352 0.523 10.043 0.000
age30-39 years 0.299 -0.118 0.717 1.405 0.160
age40-49 years 1.158 0.794 1.522 6.236 0.000
age50-59 years 2.181 1.831 2.531 12.212 0.000
age60-64 years 2.619 2.263 2.976 14.395 0.000
age65 years and over 3.033 2.680 3.387 16.833 0.000
ageteen -0.695 -1.704 0.314 -1.350 0.177
sexMale -0.028 -0.447 0.390 -0.133 0.895
marriedsingle -0.016 -0.096 0.064 -0.382 0.703
raceWhite 0.348 0.165 0.531 3.724 0.000
edu2nd grad. -0.107 -0.217 0.003 -1.906 0.057
eduOther 2nd grad. -0.039 -0.192 0.114 -0.498 0.618
eduPost-2nd grad. -0.081 -0.173 0.010 -1.746 0.081
income$30,000-$49,999 -0.304 -0.400 -0.208 -6.215 0.000
income$50,000-$79,999 -0.444 -0.552 -0.336 -8.034 0.000
income$80,000 or more -0.555 -0.684 -0.426 -8.424 0.000
bmihealthy weight -1.406 -2.677 -0.135 -2.167 0.030
bmiOverweight -1.110 -2.375 0.154 -1.721 0.085
phyactInactive 0.194 0.094 0.293 3.817 0.000
phyactModerate 0.077 -0.034 0.187 1.353 0.176
fruit4-6 daily serving -0.062 -0.155 0.031 -1.313 0.189
fruit6+ daily serving 0.094 -0.023 0.211 1.579 0.114
bpYes 0.861 0.778 0.944 20.354 0.000
diabYes 1.745 0.462 3.027 2.667 0.008
doctorYes 0.535 0.372 0.697 6.447 0.000
stressstressed 0.256 0.164 0.347 5.483 0.000
smokeCurrent smoker 0.152 0.039 0.266 2.628 0.009
smokeFormer smoker 0.177 0.082 0.272 3.654 0.000
drinkCurrent drinker -0.205 -0.381 -0.029 -2.277 0.023
drinkFormer driker 0.116 -0.072 0.303 1.210 0.226
interaction(age, sex)30-39 years.Female -0.083 -0.622 0.457 -0.300 0.764
interaction(age, sex)40-49 years.Female -0.302 -0.766 0.161 -1.278 0.201
interaction(age, sex)50-59 years.Female -0.810 -1.258 -0.362 -3.543 0.000
interaction(age, sex)60-64 years.Female -0.787 -1.237 -0.337 -3.428 0.001
interaction(age, sex)65 years and over.Female -0.603 -1.035 -0.170 -2.733 0.006
interaction(age, sex)teen.Female -0.129 -1.806 1.548 -0.151 0.880
interaction(bmi, diab)healthy weight.No 1.380 0.085 2.675 2.089 0.037
interaction(bmi, diab)Overweight.No 1.085 -0.204 2.373 1.650 0.099
Standard errors: Robust
fit9 <- update(fitstep, .~. + age:sex + bmi:diab)
publish(fit9)
#>                                            Variable             Units OddsRatio         CI.95     p-value 
#>                                                  OA           Control       Ref                           
#>                                                                    OA      1.55   [1.42;1.69]     < 1e-04 
#>                                             married        not single       Ref                           
#>                                                                single      0.98   [0.91;1.07]   0.7026897 
#>                                                race         Non-white       Ref                           
#>                                                                 White      1.42   [1.18;1.70]   0.0001960 
#>                                                 edu          < 2ndary       Ref                           
#>                                                             2nd grad.      0.90   [0.81;1.00]   0.0566536 
#>                                                       Other 2nd grad.      0.96   [0.83;1.12]   0.6183383 
#>                                                        Post-2nd grad.      0.92   [0.84;1.01]   0.0807393 
#>                                              income   $29,999 or less       Ref                           
#>                                                       $30,000-$49,999      0.74   [0.67;0.81]     < 1e-04 
#>                                                       $50,000-$79,999      0.64   [0.58;0.71]     < 1e-04 
#>                                                       $80,000 or more      0.57   [0.50;0.65]     < 1e-04 
#>                                              phyact            Active       Ref                           
#>                                                              Inactive      1.21   [1.10;1.34]   0.0001349 
#>                                                              Moderate      1.08   [0.97;1.21]   0.1760803 
#>                                               fruit 0-3 daily serving       Ref                           
#>                                                     4-6 daily serving      0.94   [0.86;1.03]   0.1892549 
#>                                                      6+ daily serving      1.10   [0.98;1.23]   0.1142759 
#>                                                  bp                No       Ref                           
#>                                                                   Yes      2.37   [2.18;2.57]     < 1e-04 
#>                                              doctor                No       Ref                           
#>                                                                   Yes      1.71   [1.45;2.01]     < 1e-04 
#>                                              stress  Not too stressed       Ref                           
#>                                                              stressed      1.29   [1.18;1.42]     < 1e-04 
#>                                               smoke      Never smoker       Ref                           
#>                                                        Current smoker      1.16   [1.04;1.30]   0.0085960 
#>                                                         Former smoker      1.19   [1.09;1.31]   0.0002579 
#>                                               drink       Never drank       Ref                           
#>                                                       Current drinker      0.81   [0.68;0.97]   0.0227934 
#>                                                         Former driker      1.12   [0.93;1.35]   0.2264702 
#>               age(20-29 years): sex(Male vs Female)                        0.97   [0.64;1.48]   0.8945322 
#>               age(30-39 years): sex(Male vs Female)                        1.06   [0.75;1.49]   0.7583565 
#>               age(40-49 years): sex(Male vs Female)                        1.32   [1.07;1.62]   0.0091527 
#>               age(50-59 years): sex(Male vs Female)                        2.18   [1.85;2.58]     < 1e-04 
#>               age(60-64 years): sex(Male vs Female)                        2.14   [1.80;2.53]     < 1e-04 
#>         age(65 years and over): sex(Male vs Female)                        1.78   [1.58;1.99]     < 1e-04 
#>                      age(teen): sex(Male vs Female)                        1.11   [0.22;5.62]   0.9033924 
#>        sex(Female): age(30-39 years vs 20-29 years)                        1.24   [0.87;1.77]   0.2276609 
#>        sex(Female): age(40-49 years vs 20-29 years)                        2.35   [1.75;3.16]     < 1e-04 
#>        sex(Female): age(50-59 years vs 20-29 years)                        3.94   [2.95;5.27]     < 1e-04 
#>        sex(Female): age(60-64 years vs 20-29 years)                        6.25   [4.66;8.39]     < 1e-04 
#>  sex(Female): age(65 years and over vs 20-29 years)                       11.37  [8.60;15.02]     < 1e-04 
#>               sex(Female): age(teen vs 20-29 years)                        0.44   [0.11;1.69]   0.2320840 
#>          sex(Male): age(30-39 years vs 20-29 years)                        1.35   [0.89;2.05]   0.1599938 
#>          sex(Male): age(40-49 years vs 20-29 years)                        3.18   [2.21;4.58]     < 1e-04 
#>          sex(Male): age(50-59 years vs 20-29 years)                        8.85  [6.24;12.56]     < 1e-04 
#>          sex(Male): age(60-64 years vs 20-29 years)                       13.73  [9.61;19.61]     < 1e-04 
#>    sex(Male): age(65 years and over vs 20-29 years)                       20.77 [14.59;29.57]     < 1e-04 
#>                 sex(Male): age(teen vs 20-29 years)                        0.50   [0.18;1.37]   0.1769155 
#>                   bmi(Underweight): diab(Yes vs No)                        5.72  [1.59;20.63]   0.0076561 
#>                bmi(healthy weight): diab(Yes vs No)                        1.44   [1.19;1.75]   0.0002221 
#>                    bmi(Overweight): diab(Yes vs No)                        1.93   [1.70;2.20]     < 1e-04 
#>        diab(No): bmi(healthy weight vs Underweight)                        0.97   [0.76;1.25]   0.8394972 
#>            diab(No): bmi(Overweight vs Underweight)                        0.97   [0.76;1.25]   0.8400722 
#>       diab(Yes): bmi(healthy weight vs Underweight)                        0.25   [0.07;0.87]   0.0302066 
#>           diab(Yes): bmi(Overweight vs Underweight)                        0.33   [0.09;1.17]   0.0852377
basic.model <- eval(fit5$call[[2]])
basic.model
#> I(CVD == "event") ~ OA + age + sex
#> attr(,"variables")
#> list(I(CVD == "event"), OA, age, sex)
#> attr(,"factors")
#>                   OA age sex
#> I(CVD == "event")  0   0   0
#> OA                 1   0   0
#> age                0   1   0
#> sex                0   0   1
#> attr(,"term.labels")
#> [1] "OA"  "age" "sex"
#> attr(,"order")
#> [1] 1 1 1
#> attr(,"intercept")
#> [1] 1
#> attr(,"response")
#> [1] 1
#> attr(,".Environment")
#> <environment: R_GlobalEnv>
#> attr(,"predvars")
#> list(I(CVD == "event"), OA, age, sex)
#> attr(,"dataClasses")
#> I(CVD == "event")                OA               age               sex 
#>         "logical"          "factor"          "factor"          "factor" 
#>         (weights) 
#>         "numeric"

aic.int.model <- eval(fit9$call[[2]])
aic.int.model
#> I(CVD == "event") ~ OA + age + sex + married + race + edu + income + 
#>     bmi + phyact + fruit + bp + diab + doctor + stress + smoke + 
#>     drink + age:sex + bmi:diab

Saving data

Saves the final regression models for future use.

save(basic.model, aic.int.model, file = "Data/surveydata/cchs123w2.RData")

Video content (optional)

Tip

For those who prefer a video walkthrough, feel free to watch the video below, which offers a description of an earlier version of the above content.