Polytomous and ordinal

Let us load the packages:

# Load required packages
require(Publish)
require(survey)
require(svyVGAM)
require(car)
library(knitr)

In the code chunk below, we create a function called invalid.exclude. This function can be used to exclude invalid responses from the dataset, where don’t know, refusal, and not stated are considered invalid responses.

# Function to exclude invalid responses
invalid.exclude <- function(Data, Var){
  subset.data <- subset(Data, eval(parse(text = Var)) != "DON'T KNOW" & 
                          eval(parse(text = Var)) != "REFUSAL" & 
                          eval(parse(text = Var)) != "NOT STATED")
  x1 <- dim(Data)[1]
  x2 <- dim(subset.data)[1]
  cat( format(x1-x2, big.mark = ","),
       "subjects deleted, and current N =" , format(x2, big.mark = ",") , "\n")
  return(subset.data)
}

Data

# Data
analytic <- readRDS("Data/nonbinary/cmh.Rds")
str(analytic)
#> 'data.frame':    2628 obs. of  9 variables:
#>  $ MHcondition       : Factor w/ 3 levels "Good","Poor or Fair",..: 2 2 3 3 3 2 3 1 3 3 ...
#>  $ CommunityBelonging: Factor w/ 4 levels "SOMEWHAT STRONG",..: 1 1 3 3 3 2 1 1 3 1 ...
#>  $ Age               : Factor w/ 6 levels "15 TO 24 YEARS",..: 4 1 1 4 3 5 2 5 3 1 ...
#>  $ Sex               : Factor w/ 2 levels "FEMALE","MALE": 1 1 2 2 2 2 2 1 1 2 ...
#>  $ RaceEthnicity     : Factor w/ 2 levels "NON-WHITE","WHITE": 2 1 2 2 1 2 2 2 2 1 ...
#>  $ MainIncome        : Factor w/ 5 levels "EI/WORKER'S COMP",..: 2 2 2 2 2 5 2 2 2 2 ...
#>  $ ReceivedHelp      : Factor w/ 4 levels "DON'T KNOW","NO",..: 4 4 4 2 2 4 2 2 4 2 ...
#>  $ Weight            : num  678 1298 196 917 2384 ...
#>  $ Disorder          : Factor w/ 1 level "YES": 1 1 1 1 1 1 1 1 1 1 ...

Let us drop invalid responses

# Drop invalid responses
analytic <- invalid.exclude(analytic, Var = c("CommunityBelonging", "Age", "Sex", "RaceEthnicity", 
                                              "MainIncome", "MHcondition"))
#> 0 subjects deleted, and current N = 2,628

Multinomial logistic

Unweighted Tables

Let us see the summary statistic of the variables, stratified by mental health condition:

require("tableone")
tab1 <- CreateTableOne(vars = c("CommunityBelonging", "Age", "Sex", "RaceEthnicity", "MainIncome"),
                       strata = "MHcondition", data = analytic, test = F)
print(tab1, showAllLevels = TRUE, smd = T)
#>                         Stratified by MHcondition
#>                          level             Good        Poor or Fair
#>   n                                        885         1002        
#>   CommunityBelonging (%) SOMEWHAT STRONG   362 (40.9)   288 (28.7) 
#>                          SOMEWHAT WEAK     309 (34.9)   358 (35.7) 
#>                          VERY STRONG        96 (10.8)    74 ( 7.4) 
#>                          VERY WEAK         118 (13.3)   282 (28.1) 
#>   Age (%)                15 TO 24 YEARS    264 (29.8)   191 (19.1) 
#>                          25 TO 34 YEARS    167 (18.9)   141 (14.1) 
#>                          35 TO 44 YEARS    119 (13.4)   185 (18.5) 
#>                          35 TO 54 YEARS    139 (15.7)   220 (22.0) 
#>                          55 TO 64 YEARS    113 (12.8)   198 (19.8) 
#>                          65 years or older  83 ( 9.4)    67 ( 6.7) 
#>   Sex (%)                FEMALE            487 (55.0)   616 (61.5) 
#>                          MALE              398 (45.0)   386 (38.5) 
#>   RaceEthnicity (%)      NON-WHITE         140 (15.8)   184 (18.4) 
#>                          WHITE             745 (84.2)   818 (81.6) 
#>   MainIncome (%)         EI/WORKER'S COMP   78 ( 8.8)   195 (19.5) 
#>                          EMPLOYMENT INC.   641 (72.4)   560 (55.9) 
#>                          NOT STATED         23 ( 2.6)    25 ( 2.5) 
#>                          OTHER              36 ( 4.1)    66 ( 6.6) 
#>                          SENIOR BENEFITS   107 (12.1)   156 (15.6) 
#>                         Stratified by MHcondition
#>                          Very good/excellent SMD   
#>   n                      741                       
#>   CommunityBelonging (%) 355 (47.9)           0.425
#>                          190 (25.6)                
#>                          116 (15.7)                
#>                           80 (10.8)                
#>   Age (%)                285 (38.5)           0.420
#>                          167 (22.5)                
#>                           89 (12.0)                
#>                           79 (10.7)                
#>                           68 ( 9.2)                
#>                           53 ( 7.2)                
#>   Sex (%)                304 (41.0)           0.277
#>                          437 (59.0)                
#>   RaceEthnicity (%)      134 (18.1)           0.045
#>                          607 (81.9)                
#>   MainIncome (%)          36 ( 4.9)           0.400
#>                          598 (80.7)                
#>                            9 ( 1.2)                
#>                           22 ( 3.0)                
#>                           76 (10.3)

Multinomial Model fitting

Before fitting the multinomial regression, let us redefine the reference categories of the variables.

analytic$MHcondition2 <- relevel(analytic$MHcondition, ref = "Poor or Fair")
analytic$CommunityBelonging2 <- relevel(analytic$CommunityBelonging, ref = "VERY WEAK")
analytic$Age2 <- relevel(analytic$Age, ref = "65 years or older")
analytic$Sex2 <- relevel(analytic$Sex, ref = "FEMALE")
analytic$RaceEthnicity2 <- relevel(analytic$RaceEthnicity, ref = "NON-WHITE")

Now we will fit the multinomial logistic regression model using the multinom function from the nnet package:

require(nnet)
fit4 <- multinom(MHcondition2 ~ CommunityBelonging2 + Sex2 + Age2 + RaceEthnicity2 + MainIncome, 
                    data = analytic)
#> # weights:  48 (30 variable)
#> initial  value 2887.153095 
#> iter  10 value 2640.386436
#> iter  20 value 2625.127331
#> iter  30 value 2622.465409
#> final  value 2622.328038 
#> converged
kable(round(exp(cbind(coef(fit4), confint(fit4))),2))
#> Warning in cbind(coef(fit4), confint(fit4)): number of rows of result is not a
#> multiple of vector length (arg 2)
(Intercept) CommunityBelonging2SOMEWHAT STRONG CommunityBelonging2SOMEWHAT WEAK CommunityBelonging2VERY STRONG Sex2MALE Age215 TO 24 YEARS Age225 TO 34 YEARS Age235 TO 44 YEARS Age235 TO 54 YEARS Age255 TO 64 YEARS RaceEthnicity2WHITE MainIncomeEMPLOYMENT INC. MainIncomeNOT STATED MainIncomeOTHER MainIncomeSENIOR BENEFITS
Good 0.38 2.72 1.84 3.08 1.29 0.70 0.62 0.32 0.37 0.36 1.25 2.29 1.57 1.14 1.11 0.21
Very good/excellent 0.09 3.92 1.64 5.65 2.19 1.27 1.08 0.39 0.37 0.37 1.15 3.89 1.25 1.34 2.11 2.07

Multinomial logistic for complex survey

Survey-weighted Tables

Now, we will set up the survey design with the survey weights and then see design-adjusted summary statistics.

require(survey)
# Design
svy.analytic <- svydesign(ids = ~ 1, weights = ~ Weight, data = analytic)

# Table 1
tab1a <- svyCreateTableOne(vars = c("CommunityBelonging", "Age", "Sex", "RaceEthnicity", "MainIncome"), 
                  data = svy.analytic)
print(tab1a, showAllLevels = TRUE)
#>                         
#>                          level             Overall          
#>   n                                        2734564.0        
#>   CommunityBelonging (%) SOMEWHAT STRONG   1015381.0 (37.1) 
#>                          SOMEWHAT WEAK      948838.7 (34.7) 
#>                          VERY STRONG        297141.6 (10.9) 
#>                          VERY WEAK          473202.7 (17.3) 
#>   Age (%)                15 TO 24 YEARS     795568.6 (29.1) 
#>                          25 TO 34 YEARS     564720.6 (20.7) 
#>                          35 TO 44 YEARS     457821.1 (16.7) 
#>                          35 TO 54 YEARS     447324.8 (16.4) 
#>                          55 TO 64 YEARS     319374.3 (11.7) 
#>                          65 years or older  149754.6 ( 5.5) 
#>   Sex (%)                FEMALE            1312605.0 (48.0) 
#>                          MALE              1421958.9 (52.0) 
#>   RaceEthnicity (%)      NON-WHITE          565784.5 (20.7) 
#>                          WHITE             2168779.4 (79.3) 
#>   MainIncome (%)         EI/WORKER'S COMP   245758.5 ( 9.0) 
#>                          EMPLOYMENT INC.   2059689.1 (75.3) 
#>                          NOT STATED          60101.5 ( 2.2) 
#>                          OTHER              116260.6 ( 4.3) 
#>                          SENIOR BENEFITS    252754.4 ( 9.2)

# table 1 stratified by MHcondition
tab1b <- svyCreateTableOne(vars = c("CommunityBelonging", "Age", "Sex", "RaceEthnicity", "MainIncome"), 
                           strata = "MHcondition", data = svy.analytic)
print(tab1b, showAllLevels = TRUE)
#>                         Stratified by MHcondition
#>                          level             Good             Poor or Fair    
#>   n                                        949208.1         973112.1        
#>   CommunityBelonging (%) SOMEWHAT STRONG   346216.2 (36.5)  273163.9 (28.1) 
#>                          SOMEWHAT WEAK     378431.4 (39.9)  365731.0 (37.6) 
#>                          VERY STRONG        97528.8 (10.3)   69438.0 ( 7.1) 
#>                          VERY WEAK         127031.7 (13.4)  264779.1 (27.2) 
#>   Age (%)                15 TO 24 YEARS    286103.0 (30.1)  187610.9 (19.3) 
#>                          25 TO 34 YEARS    189084.8 (19.9)  184528.6 (19.0) 
#>                          35 TO 44 YEARS    140796.4 (14.8)  199172.7 (20.5) 
#>                          35 TO 54 YEARS    171968.1 (18.1)  194689.1 (20.0) 
#>                          55 TO 64 YEARS    109114.5 (11.5)  148432.2 (15.3) 
#>                          65 years or older  52141.3 ( 5.5)   58678.6 ( 6.0) 
#>   Sex (%)                FEMALE            436830.4 (46.0)  582311.1 (59.8) 
#>                          MALE              512377.7 (54.0)  390801.0 (40.2) 
#>   RaceEthnicity (%)      NON-WHITE         167887.9 (17.7)  220525.5 (22.7) 
#>                          WHITE             781320.2 (82.3)  752586.6 (77.3) 
#>   MainIncome (%)         EI/WORKER'S COMP   66003.9 ( 7.0)  151095.4 (15.5) 
#>                          EMPLOYMENT INC.   752208.0 (79.2)  608703.4 (62.6) 
#>                          NOT STATED         23589.0 ( 2.5)   28371.0 ( 2.9) 
#>                          OTHER              32247.9 ( 3.4)   68453.1 ( 7.0) 
#>                          SENIOR BENEFITS    75159.3 ( 7.9)  116489.2 (12.0) 
#>                         Stratified by MHcondition
#>                          Very good/excellent p      test
#>   n                      812243.7                       
#>   CommunityBelonging (%) 396000.8 (48.8)     <0.001     
#>                          204676.3 (25.2)                
#>                          130174.7 (16.0)                
#>                           81391.9 (10.0)                
#>   Age (%)                321854.6 (39.6)     <0.001     
#>                          191107.2 (23.5)                
#>                          117852.1 (14.5)                
#>                           80667.6 ( 9.9)                
#>                           61827.6 ( 7.6)                
#>                           38934.6 ( 4.8)                
#>   Sex (%)                293463.5 (36.1)     <0.001     
#>                          518780.2 (63.9)                
#>   RaceEthnicity (%)      177371.1 (21.8)      0.219     
#>                          634872.7 (78.2)                
#>   MainIncome (%)          28659.2 ( 3.5)     <0.001     
#>                          698777.7 (86.0)                
#>                            8141.5 ( 1.0)                
#>                           15559.6 ( 1.9)                
#>                           61105.8 ( 7.5)

Setting up the design

Let us redefine the reference categories within the survey design and convert the design to use replicate weights.

w.design <- svydesign(id=~1, weights=~Weight, data=analytic)
w.design <- update(w.design , MHcondition2 = relevel(MHcondition, ref = "Poor or Fair"),
                  CommunityBelonging2 = relevel(CommunityBelonging, ref = "VERY WEAK"),
                  Age2 = relevel(Age, ref = "65 years or older"),
                  Sex2 = relevel(Sex, ref = "FEMALE"),
                  RaceEthnicity2 = relevel(RaceEthnicity, ref = "NON-WHITE"))

# Convert a survey design to use replicate weights
w.design2 <- as.svrepdesign(w.design, type = "bootstrap" , replicates = 50)

Multinomial Model fitting

Now, we will fit the design-adjusted multinomial logistic regression:

require(svyVGAM)
fit5 <- svy_vglm(MHcondition2 ~ CommunityBelonging2 + Sex2 + Age2 + RaceEthnicity2 + MainIncome,
                    design = w.design2, family = multinomial)
#> Warning in vglm.fitter(x = x, y = y, w = w, offset = offset, Xm2 = Xm2, :
#> iterations terminated because half-step sizes are very small
#> Warning in vglm.fitter(x = x, y = y, w = w, offset = offset, Xm2 = Xm2, : some
#> quantities such as z, residuals, SEs may be inaccurate due to convergence at a
#> half-step
kable(round(exp(cbind(coef(fit5), confint(fit5))),2))
2.5 % 97.5 %
(Intercept):1 16.57 7.53 36.46
(Intercept):2 3.24 1.21 8.69
CommunityBelonging2SOMEWHAT STRONG:1 0.22 0.13 0.37
CommunityBelonging2SOMEWHAT STRONG:2 0.56 0.37 0.85
CommunityBelonging2SOMEWHAT WEAK:1 0.58 0.35 0.95
CommunityBelonging2SOMEWHAT WEAK:2 1.18 0.73 1.90
CommunityBelonging2VERY STRONG:1 0.15 0.08 0.29
CommunityBelonging2VERY STRONG:2 0.46 0.25 0.84
Sex2MALE:1 0.39 0.29 0.51
Sex2MALE:2 0.68 0.50 0.93
Age215 TO 24 YEARS:1 0.56 0.28 1.12
Age215 TO 24 YEARS:2 0.65 0.37 1.12
Age225 TO 34 YEARS:1 0.83 0.39 1.76
Age225 TO 34 YEARS:2 0.68 0.37 1.28
Age235 TO 44 YEARS:1 1.74 0.77 3.90
Age235 TO 44 YEARS:2 0.91 0.48 1.73
Age235 TO 54 YEARS:1 2.02 0.96 4.23
Age235 TO 54 YEARS:2 1.48 0.72 3.01
Age255 TO 64 YEARS:1 1.91 0.83 4.41
Age255 TO 64 YEARS:2 1.33 0.67 2.64
RaceEthnicity2WHITE:1 0.91 0.64 1.29
RaceEthnicity2WHITE:2 1.22 0.83 1.81
MainIncomeEMPLOYMENT INC.:1 0.25 0.14 0.44
MainIncomeEMPLOYMENT INC.:2 0.58 0.30 1.15
MainIncomeNOT STATED:1 0.91 0.23 3.59
MainIncomeNOT STATED:2 1.37 0.30 6.25
MainIncomeOTHER:1 1.43 0.47 4.39
MainIncomeOTHER:2 1.30 0.49 3.47
MainIncomeSENIOR BENEFITS:1 0.39 0.18 0.83
MainIncomeSENIOR BENEFITS:2 0.49 0.21 1.13

Ordinal Regression

Ordering outcome

Let’s define MHcondition as an ordinal outcome. We can do it by using ordered = TRUE in the factor function.

# Ordinal outcome
analytic$MHcondition3 <- factor(analytic$MHcondition, 
                               levels = c("Poor or Fair", "Good", "Very good or excellent"), 
                      ordered = TRUE)

Ordinal logistic

Let’s fit the ordinal logistic using the polr function:

require(MASS)
fit5o1 <- polr(MHcondition3 ~CommunityBelonging2 + 
                  Sex2 + Age2 + RaceEthnicity2 + MainIncome, 
                data=analytic)
kable(round(exp(cbind(coef(fit5o1), confint(fit5o1))),2))
#> Waiting for profiling to be done...
#> 
#> Re-fitting to get Hessian
2.5 % 97.5 %
CommunityBelonging2SOMEWHAT STRONG 2.69 2.05 3.55
CommunityBelonging2SOMEWHAT WEAK 1.83 1.40 2.41
CommunityBelonging2VERY STRONG 3.15 2.15 4.66
Sex2MALE 1.30 1.07 1.58
Age215 TO 24 YEARS 0.69 0.42 1.11
Age225 TO 34 YEARS 0.60 0.36 0.98
Age235 TO 44 YEARS 0.32 0.19 0.53
Age235 TO 54 YEARS 0.37 0.23 0.59
Age255 TO 64 YEARS 0.36 0.22 0.56
RaceEthnicity2WHITE 1.26 0.97 1.63
MainIncomeEMPLOYMENT INC. 2.26 1.68 3.06
MainIncomeNOT STATED 1.53 0.79 2.94
MainIncomeOTHER 1.15 0.70 1.90
MainIncomeSENIOR BENEFITS 1.10 0.70 1.70

Ordinal logistic for complex survey

The same as before, we can set up the design, relevel variables within the design, define MHcondition as an ordinal variable, and then fit the design-adjusted ordinal logistic regression.

w.design <- svydesign(id=~1, weights=~Weight, data=analytic)
w.design<-update(w.design , 
                  CommunityBelonging2=relevel(CommunityBelonging, ref="VERY WEAK"),
                  Age2=relevel(Age, ref="65 years or older"),
                  Sex2=relevel(Sex, ref="FEMALE"),
                  RaceEthnicity2=relevel(RaceEthnicity, ref="NON-WHITE"),
                  MHcondition3 = factor(MHcondition, levels=c("Poor or Fair", "Good", 
                                                              "Very good or excellent"), 
                                        ordered=TRUE))

# Design-adjusted Ordinal logistic
fit5o <- svyolr(MHcondition3 ~CommunityBelonging2 + Sex2 + Age2 + RaceEthnicity2 + MainIncome, 
                design=w.design)
kable(round(exp(cbind(coef(fit5o), confint(fit5o))),2))
2.5 % 97.5 %
CommunityBelonging2SOMEWHAT STRONG 2.49 1.65 3.75
CommunityBelonging2SOMEWHAT WEAK 2.04 1.37 3.05
CommunityBelonging2VERY STRONG 3.02 1.74 5.26
Sex2MALE 1.76 1.30 2.38
Age215 TO 24 YEARS 1.13 0.52 2.44
Age225 TO 34 YEARS 0.77 0.34 1.74
Age235 TO 44 YEARS 0.52 0.24 1.13
Age235 TO 54 YEARS 0.70 0.32 1.52
Age255 TO 64 YEARS 0.69 0.32 1.46
RaceEthnicity2WHITE 1.31 0.90 1.91
MainIncomeEMPLOYMENT INC. 2.37 1.53 3.67
MainIncomeNOT STATED 1.42 0.52 3.86
MainIncomeOTHER 0.88 0.39 2.00
MainIncomeSENIOR BENEFITS 1.26 0.66 2.41
Poor or Fair|Good 4.79 1.92 11.91
Good|Very good or excellent 2636603.90 996905.04 6973262.13

Assessing model fit

We can use the regTermTest function from the survey package to do the Wald test of a regression coefficient.

regTermTest(fit5o, ~CommunityBelonging2 , df = Inf) 
#> Wald test for CommunityBelonging2
#>  in svyolr(MHcondition3 ~ CommunityBelonging2 + Sex2 + Age2 + RaceEthnicity2 + 
#>     MainIncome, design = w.design)
#> Chisq =  24.53941  on  3  df: p= 1.9272e-05
regTermTest(fit5o, ~Age2 , df = Inf) 
#> Wald test for Age2
#>  in svyolr(MHcondition3 ~ CommunityBelonging2 + Sex2 + Age2 + RaceEthnicity2 + 
#>     MainIncome, design = w.design)
#> Chisq =  14.00491  on  5  df: p= 0.015578
regTermTest(fit5o, ~Sex2 , df = Inf)
#> Wald test for Sex2
#>  in svyolr(MHcondition3 ~ CommunityBelonging2 + Sex2 + Age2 + RaceEthnicity2 + 
#>     MainIncome, design = w.design)
#> Chisq =  13.46032  on  1  df: p= 0.00024366
regTermTest(fit5o, ~RaceEthnicity2 , df = Inf)
#> Wald test for RaceEthnicity2
#>  in svyolr(MHcondition3 ~ CommunityBelonging2 + Sex2 + Age2 + RaceEthnicity2 + 
#>     MainIncome, design = w.design)
#> Chisq =  2.002794  on  1  df: p= 0.15701
regTermTest(fit5o, ~MainIncome , df = Inf)
#> Wald test for MainIncome
#>  in svyolr(MHcondition3 ~ CommunityBelonging2 + Sex2 + Age2 + RaceEthnicity2 + 
#>     MainIncome, design = w.design)
#> Chisq =  22.13656  on  4  df: p= 0.00018826

Video content (optional)

Tip

For those who prefer a video walkthrough, feel free to watch the video below, which offers a description of an earlier version of the above content.