Performance with NA

This is a tutorial of how to estimate model performance while analyzing survey data with missing values (for predictive goals).

# Load required packages
library(survey)
library(ROCR)
library(WeightedROC)

Useful functions

The functions below could be helpful in

  • svyROCw3: calculating area under the ROC curve (AUC) value with survey data with logistic regression
  • AL.gof3: testing Archer-Lemeshow goodness of fit for survey data with logistic regression
knitr::opts_chunk$set(echo = TRUE)

# ROC curve for Survey Data with Logistic Regression
svyROCw3 <- function(fit=fit7,outcome=analytic2$CVD=="event", weight = NULL,plot=FALSE){
  if (is.null(weight)){ # require(ROCR)
    prob <- predict(fit, type = "response")
  pred <- prediction(as.vector(prob), outcome)
  perf <- performance(pred, "tpr", "fpr")
  auc <- performance(pred, measure = "auc")
  auc <- auc@y.values[[1]]
  if (plot == TRUE){
    roc.data <- data.frame(fpr = unlist(perf@x.values), tpr = unlist(perf@y.values), 
      model = "Logistic")
    with(data = roc.data,plot(fpr, tpr, type="l", xlim=c(0,1), ylim=c(0,1), lwd=1,
     xlab="1 - specificity", ylab="Sensitivity",
     main = paste("AUC = ", round(auc,3))))
  mtext("Unweighted ROC")
  abline(0,1, lty=2)
  }
  } else { # library(WeightedROC)
    outcome <- as.numeric(outcome)
  pred <- predict(fit, type = "response")
  tp.fp <- WeightedROC(pred, outcome, weight)
  auc <- WeightedAUC(tp.fp)
  if (plot == TRUE){
    with(data = tp.fp,plot(FPR, TPR, type="l", xlim=c(0,1), ylim=c(0,1), lwd=1,
     xlab="1 - specificity", ylab="Sensitivity",
     main = paste("AUC = ", round(auc,3))))
  abline(0,1, lty=2)
  mtext("Weighted ROC")
  }
  }
  return(auc)
}

# Archer-Lemeshow Goodness of Fit Test for Survey Data with Logistic Regression
AL.gof3 <- function(fit=fit7, data = analytic2, weight = "weight", psu = "psu", 
                    strata= "strata"){
  r <- residuals(fit, type="response") 
  f<-fitted(fit) 
  breaks.g <- c(-Inf, quantile(f,  (1:9)/10), Inf)
  breaks.g <- breaks.g + seq_along(breaks.g) * .Machine$double.eps
  g<- cut(f, breaks.g)
  data2g <- cbind(data,r,g)
  if (is.null(psu)){
    newdesign <- svydesign(id=~1,
                         weights=as.formula(paste0("~",weight)), 
                        data=data2g, nest = TRUE)
  }
  if (!is.null(psu)) {
    newdesign <- svydesign(id=as.formula(paste0("~",psu)),
                         strata=as.formula(paste0("~",strata)),
                         weights=as.formula(paste0("~",weight)), 
                        data=data2g, nest = TRUE)
  }
  decilemodel<- svyglm(r~g, design=newdesign) 
  res <- regTermTest(decilemodel, ~g)
  return(as.numeric(res$p)) 
}

Load imputed 5 sets of data

Saved at the end of Lab 6 part 2.

# Saved from last lab
load("Data/missingdata/missOA123CVDnorth.RData")
str(allImputations)
#> List of 2
#>  $ imputations:List of 5
#>   ..$ :'data.frame': 135448 obs. of  19 variables:
#>   .. ..$ .imp   : int [1:135448] 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..$ .id    : int [1:135448] 1 2 3 4 5 6 7 8 9 10 ...
#>   .. ..$ CVD    : Factor w/ 2 levels "0 event","event": 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..$ age    : Factor w/ 4 levels "20-39 years",..: 1 2 1 2 3 1 3 2 1 2 ...
#>   .. ..$ sex    : Factor w/ 2 levels "Female","Male": 2 2 1 2 2 2 1 1 1 1 ...
#>   .. ..$ income : Factor w/ 4 levels "$29,999 or less",..: 4 1 3 4 2 2 1 4 4 3 ...
#>   .. ..$ race   : Factor w/ 2 levels "Non-white","White": 2 2 2 2 2 2 2 2 2 2 ...
#>   .. ..$ bmicat : Factor w/ 3 levels "Normal","Overweight",..: 1 2 1 1 2 1 2 2 3 2 ...
#>   .. ..$ phyact : Factor w/ 3 levels "Active","Inactive",..: 2 1 2 2 3 3 2 1 2 2 ...
#>   .. ..$ smoke  : Factor w/ 3 levels "Current smoker",..: 2 2 2 2 2 3 2 2 3 3 ...
#>   .. ..$ fruit  : Factor w/ 3 levels "0-3 daily serving",..: 2 2 2 2 2 1 2 3 3 2 ...
#>   .. ..$ painmed: Factor w/ 2 levels "No","Yes": 2 1 2 1 1 2 2 2 2 2 ...
#>   .. ..$ ht     : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 1 1 1 ...
#>   .. ..$ copd   : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..$ diab   : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..$ edu    : Factor w/ 4 levels "< 2ndary","2nd grad.",..: 4 4 4 4 2 4 1 4 4 4 ...
#>   .. ..$ weight : num [1:135448] 56.1 56.1 39.2 44 59.9 ...
#>   .. ..$ OA     : Factor w/ 2 levels "Control","OA": 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..$ ID     : int [1:135448] 1 3 6 7 12 13 14 16 20 21 ...
#>   ..$ :'data.frame': 135448 obs. of  19 variables:
#>   .. ..$ .imp   : int [1:135448] 2 2 2 2 2 2 2 2 2 2 ...
#>   .. ..$ .id    : int [1:135448] 1 2 3 4 5 6 7 8 9 10 ...
#>   .. ..$ CVD    : Factor w/ 2 levels "0 event","event": 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..$ age    : Factor w/ 4 levels "20-39 years",..: 1 2 1 2 3 1 3 2 1 2 ...
#>   .. ..$ sex    : Factor w/ 2 levels "Female","Male": 2 2 1 2 2 2 1 1 1 1 ...
#>   .. ..$ income : Factor w/ 4 levels "$29,999 or less",..: 4 1 3 4 2 2 1 4 4 3 ...
#>   .. ..$ race   : Factor w/ 2 levels "Non-white","White": 2 2 2 2 2 2 2 2 2 2 ...
#>   .. ..$ bmicat : Factor w/ 3 levels "Normal","Overweight",..: 1 2 1 1 2 1 2 2 3 2 ...
#>   .. ..$ phyact : Factor w/ 3 levels "Active","Inactive",..: 2 1 2 2 3 3 2 1 2 2 ...
#>   .. ..$ smoke  : Factor w/ 3 levels "Current smoker",..: 2 2 2 2 2 3 2 2 3 3 ...
#>   .. ..$ fruit  : Factor w/ 3 levels "0-3 daily serving",..: 2 2 2 2 2 1 2 3 3 2 ...
#>   .. ..$ painmed: Factor w/ 2 levels "No","Yes": 2 2 2 2 2 1 2 2 2 2 ...
#>   .. ..$ ht     : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 1 1 1 ...
#>   .. ..$ copd   : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..$ diab   : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..$ edu    : Factor w/ 4 levels "< 2ndary","2nd grad.",..: 4 4 4 4 2 4 1 4 4 4 ...
#>   .. ..$ weight : num [1:135448] 56.1 56.1 39.2 44 59.9 ...
#>   .. ..$ OA     : Factor w/ 2 levels "Control","OA": 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..$ ID     : int [1:135448] 1 3 6 7 12 13 14 16 20 21 ...
#>   ..$ :'data.frame': 135448 obs. of  19 variables:
#>   .. ..$ .imp   : int [1:135448] 3 3 3 3 3 3 3 3 3 3 ...
#>   .. ..$ .id    : int [1:135448] 1 2 3 4 5 6 7 8 9 10 ...
#>   .. ..$ CVD    : Factor w/ 2 levels "0 event","event": 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..$ age    : Factor w/ 4 levels "20-39 years",..: 1 2 1 2 3 1 3 2 1 2 ...
#>   .. ..$ sex    : Factor w/ 2 levels "Female","Male": 2 2 1 2 2 2 1 1 1 1 ...
#>   .. ..$ income : Factor w/ 4 levels "$29,999 or less",..: 4 1 3 4 2 2 1 4 4 3 ...
#>   .. ..$ race   : Factor w/ 2 levels "Non-white","White": 2 2 2 2 2 2 2 2 2 2 ...
#>   .. ..$ bmicat : Factor w/ 3 levels "Normal","Overweight",..: 1 2 1 1 2 1 2 2 3 2 ...
#>   .. ..$ phyact : Factor w/ 3 levels "Active","Inactive",..: 2 1 2 2 3 3 2 1 2 2 ...
#>   .. ..$ smoke  : Factor w/ 3 levels "Current smoker",..: 2 2 2 2 2 3 2 2 3 3 ...
#>   .. ..$ fruit  : Factor w/ 3 levels "0-3 daily serving",..: 2 2 2 2 2 1 2 3 3 2 ...
#>   .. ..$ painmed: Factor w/ 2 levels "No","Yes": 2 2 2 1 2 2 2 2 2 2 ...
#>   .. ..$ ht     : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 1 1 1 ...
#>   .. ..$ copd   : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..$ diab   : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..$ edu    : Factor w/ 4 levels "< 2ndary","2nd grad.",..: 4 4 4 4 2 4 1 4 4 4 ...
#>   .. ..$ weight : num [1:135448] 56.1 56.1 39.2 44 59.9 ...
#>   .. ..$ OA     : Factor w/ 2 levels "Control","OA": 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..$ ID     : int [1:135448] 1 3 6 7 12 13 14 16 20 21 ...
#>   ..$ :'data.frame': 135448 obs. of  19 variables:
#>   .. ..$ .imp   : int [1:135448] 4 4 4 4 4 4 4 4 4 4 ...
#>   .. ..$ .id    : int [1:135448] 1 2 3 4 5 6 7 8 9 10 ...
#>   .. ..$ CVD    : Factor w/ 2 levels "0 event","event": 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..$ age    : Factor w/ 4 levels "20-39 years",..: 1 2 1 2 3 1 3 2 1 2 ...
#>   .. ..$ sex    : Factor w/ 2 levels "Female","Male": 2 2 1 2 2 2 1 1 1 1 ...
#>   .. ..$ income : Factor w/ 4 levels "$29,999 or less",..: 4 1 3 4 2 2 1 4 4 3 ...
#>   .. ..$ race   : Factor w/ 2 levels "Non-white","White": 2 2 2 2 2 2 2 2 2 2 ...
#>   .. ..$ bmicat : Factor w/ 3 levels "Normal","Overweight",..: 1 2 1 1 2 1 2 2 3 2 ...
#>   .. ..$ phyact : Factor w/ 3 levels "Active","Inactive",..: 2 1 2 2 3 3 2 1 2 2 ...
#>   .. ..$ smoke  : Factor w/ 3 levels "Current smoker",..: 2 2 2 2 2 3 2 2 3 3 ...
#>   .. ..$ fruit  : Factor w/ 3 levels "0-3 daily serving",..: 2 2 2 2 2 1 2 3 3 2 ...
#>   .. ..$ painmed: Factor w/ 2 levels "No","Yes": 2 2 2 2 1 1 2 1 2 2 ...
#>   .. ..$ ht     : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 1 1 1 ...
#>   .. ..$ copd   : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..$ diab   : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..$ edu    : Factor w/ 4 levels "< 2ndary","2nd grad.",..: 4 4 4 4 2 4 1 4 4 4 ...
#>   .. ..$ weight : num [1:135448] 56.1 56.1 39.2 44 59.9 ...
#>   .. ..$ OA     : Factor w/ 2 levels "Control","OA": 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..$ ID     : int [1:135448] 1 3 6 7 12 13 14 16 20 21 ...
#>   ..$ :'data.frame': 135448 obs. of  19 variables:
#>   .. ..$ .imp   : int [1:135448] 5 5 5 5 5 5 5 5 5 5 ...
#>   .. ..$ .id    : int [1:135448] 1 2 3 4 5 6 7 8 9 10 ...
#>   .. ..$ CVD    : Factor w/ 2 levels "0 event","event": 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..$ age    : Factor w/ 4 levels "20-39 years",..: 1 2 1 2 3 1 3 2 1 2 ...
#>   .. ..$ sex    : Factor w/ 2 levels "Female","Male": 2 2 1 2 2 2 1 1 1 1 ...
#>   .. ..$ income : Factor w/ 4 levels "$29,999 or less",..: 4 1 3 4 2 2 1 4 4 3 ...
#>   .. ..$ race   : Factor w/ 2 levels "Non-white","White": 2 2 2 2 2 2 2 2 2 2 ...
#>   .. ..$ bmicat : Factor w/ 3 levels "Normal","Overweight",..: 1 2 1 1 2 1 2 2 3 2 ...
#>   .. ..$ phyact : Factor w/ 3 levels "Active","Inactive",..: 2 1 2 2 3 3 2 1 2 2 ...
#>   .. ..$ smoke  : Factor w/ 3 levels "Current smoker",..: 2 2 2 2 2 3 2 2 3 3 ...
#>   .. ..$ fruit  : Factor w/ 3 levels "0-3 daily serving",..: 2 2 2 2 2 1 2 3 3 2 ...
#>   .. ..$ painmed: Factor w/ 2 levels "No","Yes": 1 1 2 2 2 2 1 2 2 2 ...
#>   .. ..$ ht     : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 1 1 1 ...
#>   .. ..$ copd   : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..$ diab   : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..$ edu    : Factor w/ 4 levels "< 2ndary","2nd grad.",..: 4 4 4 4 2 4 1 4 4 4 ...
#>   .. ..$ weight : num [1:135448] 56.1 56.1 39.2 44 59.9 ...
#>   .. ..$ OA     : Factor w/ 2 levels "Control","OA": 1 1 1 1 1 1 1 1 1 1 ...
#>   .. ..$ ID     : int [1:135448] 1 3 6 7 12 13 14 16 20 21 ...
#>  $ call       : language imputationList(list(subset(impdata, subset = .imp == 1), subset(impdata,      subset = .imp == 2), subset(impdata| __truncated__ ...
#>  - attr(*, "class")= chr "imputationList"

Estimating treatment effect

Individual beta estimates

library(survey)
w.design <- svydesign(ids=~1, weights=~weight,
                           data = allImputations)
model.formula <- as.formula("I(CVD=='event') ~ OA + age + sex +
                            income + race + painmed + ht +
                            copd + diab + OA:painmed +
                            age:copd + sex:copd")
estimates <- with(w.design, svyglm(model.formula, family=quasibinomial))
estimates
#> [[1]]
#> Independent Sampling design (with replacement)
#> svydesign(ids = ids, probs = probs, strata = strata, variables = variables, 
#>     fpc = fpc, nest = nest, check.strata = check.strata, weights = weights, 
#>     data = d, pps = pps, ...)
#> 
#> Call:  svyglm(formula = model.formula, design = .design, family = quasibinomial)
#> 
#> Coefficients:
#>            (Intercept)                    OAOA          age40-49 years  
#>                -5.6809                  1.1063                  0.7911  
#>         age50-59 years          age60-64 years                 sexMale  
#>                 1.6233                  2.0076                  0.6278  
#>  income$30,000-$49,999   income$50,000-$79,999   income$80,000 or more  
#>                -0.4347                 -0.6031                 -0.6805  
#>              raceWhite              painmedYes                   htYes  
#>                 0.2136                  0.8277                  1.0344  
#>                copdYes                 diabYes         OAOA:painmedYes  
#>                 1.9143                  0.8166                 -0.8183  
#> age40-49 years:copdYes  age50-59 years:copdYes  age60-64 years:copdYes  
#>                -1.1404                 -0.7286                 -0.9360  
#>        sexMale:copdYes  
#>                 0.6167  
#> 
#> Degrees of Freedom: 135447 Total (i.e. Null);  135429 Residual
#> Null Deviance:       36680 
#> Residual Deviance: 31100     AIC: NA
#> 
#> [[2]]
#> Independent Sampling design (with replacement)
#> svydesign(ids = ids, probs = probs, strata = strata, variables = variables, 
#>     fpc = fpc, nest = nest, check.strata = check.strata, weights = weights, 
#>     data = d, pps = pps, ...)
#> 
#> Call:  svyglm(formula = model.formula, design = .design, family = quasibinomial)
#> 
#> Coefficients:
#>            (Intercept)                    OAOA          age40-49 years  
#>                -5.4837                  0.8766                  0.7781  
#>         age50-59 years          age60-64 years                 sexMale  
#>                 1.6090                  1.9901                  0.6083  
#>  income$30,000-$49,999   income$50,000-$79,999   income$80,000 or more  
#>                -0.4415                 -0.5969                 -0.6840  
#>              raceWhite              painmedYes                   htYes  
#>                 0.2374                  0.5877                  1.0413  
#>                copdYes                 diabYes         OAOA:painmedYes  
#>                 1.8837                  0.8054                 -0.5388  
#> age40-49 years:copdYes  age50-59 years:copdYes  age60-64 years:copdYes  
#>                -1.1303                 -0.6944                 -0.8545  
#>        sexMale:copdYes  
#>                 0.5857  
#> 
#> Degrees of Freedom: 135447 Total (i.e. Null);  135429 Residual
#> Null Deviance:       36680 
#> Residual Deviance: 31260     AIC: NA
#> 
#> [[3]]
#> Independent Sampling design (with replacement)
#> svydesign(ids = ids, probs = probs, strata = strata, variables = variables, 
#>     fpc = fpc, nest = nest, check.strata = check.strata, weights = weights, 
#>     data = d, pps = pps, ...)
#> 
#> Call:  svyglm(formula = model.formula, design = .design, family = quasibinomial)
#> 
#> Coefficients:
#>            (Intercept)                    OAOA          age40-49 years  
#>                -5.6365                  1.0389                  0.7815  
#>         age50-59 years          age60-64 years                 sexMale  
#>                 1.6140                  1.9986                  0.6180  
#>  income$30,000-$49,999   income$50,000-$79,999   income$80,000 or more  
#>                -0.4334                 -0.5945                 -0.6843  
#>              raceWhite              painmedYes                   htYes  
#>                 0.2042                  0.7963                  1.0386  
#>                copdYes                 diabYes         OAOA:painmedYes  
#>                 1.9468                  0.8020                 -0.7330  
#> age40-49 years:copdYes  age50-59 years:copdYes  age60-64 years:copdYes  
#>                -1.1185                 -0.7775                 -0.9414  
#>        sexMale:copdYes  
#>                 0.5548  
#> 
#> Degrees of Freedom: 135447 Total (i.e. Null);  135429 Residual
#> Null Deviance:       36680 
#> Residual Deviance: 31130     AIC: NA
#> 
#> [[4]]
#> Independent Sampling design (with replacement)
#> svydesign(ids = ids, probs = probs, strata = strata, variables = variables, 
#>     fpc = fpc, nest = nest, check.strata = check.strata, weights = weights, 
#>     data = d, pps = pps, ...)
#> 
#> Call:  svyglm(formula = model.formula, design = .design, family = quasibinomial)
#> 
#> Coefficients:
#>            (Intercept)                    OAOA          age40-49 years  
#>                -5.6811                  1.2937                  0.7838  
#>         age50-59 years          age60-64 years                 sexMale  
#>                 1.6248                  2.0097                  0.6285  
#>  income$30,000-$49,999   income$50,000-$79,999   income$80,000 or more  
#>                -0.4431                 -0.6012                 -0.6877  
#>              raceWhite              painmedYes                   htYes  
#>                 0.2325                  0.8179                  1.0401  
#>                copdYes                 diabYes         OAOA:painmedYes  
#>                 1.9368                  0.8022                 -1.0624  
#> age40-49 years:copdYes  age50-59 years:copdYes  age60-64 years:copdYes  
#>                -1.0381                 -0.7407                 -0.9356  
#>        sexMale:copdYes  
#>                 0.5422  
#> 
#> Degrees of Freedom: 135447 Total (i.e. Null);  135429 Residual
#> Null Deviance:       36680 
#> Residual Deviance: 31100     AIC: NA
#> 
#> [[5]]
#> Independent Sampling design (with replacement)
#> svydesign(ids = ids, probs = probs, strata = strata, variables = variables, 
#>     fpc = fpc, nest = nest, check.strata = check.strata, weights = weights, 
#>     data = d, pps = pps, ...)
#> 
#> Call:  svyglm(formula = model.formula, design = .design, family = quasibinomial)
#> 
#> Coefficients:
#>            (Intercept)                    OAOA          age40-49 years  
#>                -5.4487                  0.7569                  0.7752  
#>         age50-59 years          age60-64 years                 sexMale  
#>                 1.6021                  1.9847                  0.6107  
#>  income$30,000-$49,999   income$50,000-$79,999   income$80,000 or more  
#>                -0.4393                 -0.5977                 -0.6788  
#>              raceWhite              painmedYes                   htYes  
#>                 0.2351                  0.5457                  1.0421  
#>                copdYes                 diabYes         OAOA:painmedYes  
#>                 1.9235                  0.8057                 -0.3884  
#> age40-49 years:copdYes  age50-59 years:copdYes  age60-64 years:copdYes  
#>                -1.1275                 -0.7370                 -0.9152  
#>        sexMale:copdYes  
#>                 0.5871  
#> 
#> Degrees of Freedom: 135447 Total (i.e. Null);  135429 Residual
#> Null Deviance:       36680 
#> Residual Deviance: 31290     AIC: NA
#> 
#> attr(,"call")
#> with(w.design, svyglm(model.formula, family = quasibinomial))

Pooled / averaged estimates for beta and OR

library("mitools")
pooled.estimates <- MIcombine(estimates)
pooled.estimates
#> Multiple imputation results:
#>       with(w.design, svyglm(model.formula, family = quasibinomial))
#>       MIcombine.default(estimates)
#>                           results         se
#> (Intercept)            -5.5861842 0.19239878
#> OAOA                    1.0144758 0.27097789
#> age40-49 years          0.7819429 0.10165218
#> age50-59 years          1.6146502 0.09905167
#> age60-64 years          1.9981358 0.10380099
#> sexMale                 0.6186653 0.05484262
#> income$30,000-$49,999  -0.4383959 0.06787453
#> income$50,000-$79,999  -0.5987066 0.06593903
#> income$80,000 or more  -0.6830429 0.06907307
#> raceWhite               0.2245784 0.10567063
#> painmedYes              0.7150386 0.16508180
#> htYes                   1.0393147 0.05522682
#> copdYes                 1.9210219 0.65240500
#> diabYes                 0.8063590 0.08169615
#> OAOA:painmedYes        -0.7081707 0.32575000
#> age40-49 years:copdYes -1.1109564 0.69441469
#> age50-59 years:copdYes -0.7356335 0.67352566
#> age60-64 years:copdYes -0.9165474 0.65626790
#> sexMale:copdYes         0.5772949 0.30888502

sum.pooled <- summary(pooled.estimates,logeffect=TRUE, digits = 2)
#> Multiple imputation results:
#>       with(w.design, svyglm(model.formula, family = quasibinomial))
#>       MIcombine.default(estimates)
#>                        results      se (lower  upper) missInfo
#> (Intercept)             0.0037 0.00072 0.0025  0.0056     45 %
#> OAOA                    2.7579 0.74733 1.4778  5.1469     76 %
#> age40-49 years          2.1857 0.22218 1.7909  2.6676      0 %
#> age50-59 years          5.0261 0.49785 4.1392  6.1031      1 %
#> age60-64 years          7.3753 0.76556 6.0175  9.0394      1 %
#> sexMale                 1.8564 0.10181 1.6672  2.0672      4 %
#> income$30,000-$49,999   0.6451 0.04378 0.5647  0.7369      0 %
#> income$50,000-$79,999   0.5495 0.03623 0.4829  0.6253      0 %
#> income$80,000 or more   0.5051 0.03489 0.4411  0.5783      0 %
#> raceWhite               1.2518 0.13228 1.0176  1.5399      2 %
#> painmedYes              2.0443 0.33747 1.3628  3.0664     86 %
#> htYes                   2.8273 0.15614 2.5372  3.1505      0 %
#> copdYes                 6.8279 4.45458 1.9009 24.5255      0 %
#> diabYes                 2.2397 0.18298 1.9083  2.6287      1 %
#> OAOA:painmedYes         0.4925 0.16045 0.2275  1.0663     81 %
#> age40-49 years:copdYes  0.3292 0.22863 0.0844  1.2841      0 %
#> age50-59 years:copdYes  0.4792 0.32275 0.1280  1.7940      0 %
#> age60-64 years:copdYes  0.3999 0.26244 0.1105  1.4473      0 %
#> sexMale:copdYes         1.7812 0.55019 0.9723  3.2632      1 %
sum.pooled

Estimating model performance (AUC and AL)

Individual AUC estimates (with interactions)

library(ROCR)
library(WeightedROC)
model.formula <- as.formula("I(CVD=='event') ~ OA + age + sex +
                            income + race + painmed + ht +
                            copd + diab + OA:painmed +
                            age:copd + sex:copd")
AL.scalar <- AUC.scalar <- vector("list", 5)
for (i in 1:5) {
  analytic.i <- allImputations$imputations[[i]]
  w.design <- svydesign(id=~1, weights=~weight,
                        data=analytic.i)
  model.fit <- svyglm(model.formula, design=w.design, family=quasibinomial)
  auc <- svyROCw3(fit=model.fit,outcome=w.design$variables$CVD=='event', 
                  weight = w.design$variables$weight, plot = FALSE)
  
  AL <- AL.gof3(fit=model.fit, data = analytic.i, 
                   weight = "weight", 
                   psu = NULL, 
                   strata= NULL)
  AL.scalar[[i]] <- AL
  AUC.scalar[[i]] <- auc 
  cat("AUC calculated for data", i, "\n")
}
#> AUC calculated for data 1 
#> AUC calculated for data 2 
#> AUC calculated for data 3 
#> AUC calculated for data 4 
#> AUC calculated for data 5
str(AUC.scalar)
#> List of 5
#>  $ : num 0.8
#>  $ : num 0.795
#>  $ : num 0.798
#>  $ : num 0.8
#>  $ : num 0.794
AL.scalar
#> [[1]]
#> [1] 0.01243738
#> 
#> [[2]]
#> [1] 0.5471927
#> 
#> [[3]]
#> [1] 0.13081
#> 
#> [[4]]
#> [1] 0.644286
#> 
#> [[5]]
#> [1] 0.6049137

Averaged estimates for AUC (with interactions)

# summary of AUC
mean(unlist(AUC.scalar))
#> [1] 0.7973746
sd(unlist(AUC.scalar))
#> [1] 0.002688964
round(range(unlist(AUC.scalar)),3)
#> [1] 0.794 0.800
# p-values (from AL) by majority
sum(AL.scalar>0.05)
#> [1] 4

Model performance without interactions

Individual AUC estimates / AL p-values

library(ROCR)
library(WeightedROC)
AL.scalar <- AUC.scalar <- vector("list", 5)
model.formula <- as.formula("I(CVD=='event') ~ OA + age + sex +
                            income + race + painmed + ht +
                            copd + diab")
for (i in 1:5) {
  analytic.i <- allImputations$imputations[[i]]
  w.design <- svydesign(id=~1, weights=~weight,
                        data=analytic.i)
  model.fit <- svyglm(model.formula, design=w.design, family=quasibinomial)
  auc <- svyROCw3(fit=model.fit,outcome=w.design$variables$CVD=='event', 
                  weight = w.design$variables$weight, plot = FALSE)
  
  AL <- AL.gof3(fit=model.fit, data = analytic.i, 
                   weight = "weight", 
                   psu = NULL, 
                   strata= NULL)
  AL.scalar[[i]] <- AL
  AUC.scalar[[i]] <- auc 
  cat("AUC calculated for data", i, "\n")
}
#> AUC calculated for data 1 
#> AUC calculated for data 2 
#> AUC calculated for data 3 
#> AUC calculated for data 4 
#> AUC calculated for data 5
str(AUC.scalar)
#> List of 5
#>  $ : num 0.798
#>  $ : num 0.794
#>  $ : num 0.797
#>  $ : num 0.798
#>  $ : num 0.794
AL.scalar
#> [[1]]
#> [1] 0.01839624
#> 
#> [[2]]
#> [1] 0.2836256
#> 
#> [[3]]
#> [1] 0.02439659
#> 
#> [[4]]
#> [1] 0.8333214
#> 
#> [[5]]
#> [1] 0.2050434

Averaged estimates for AUC / majority of AL p-values

# summary of AUC
mean(unlist(AUC.scalar))
#> [1] 0.7964061
sd(unlist(AUC.scalar))
#> [1] 0.002002636
round(range(unlist(AUC.scalar)),3)
#> [1] 0.794 0.798
# p-values (from AL) by majority
sum(AL.scalar>0.05)
#> [1] 3

Appendix

User-written svyROCw3 and AL.gof3 functions

svyROCw3
#> function(fit=fit7,outcome=analytic2$CVD=="event", weight = NULL,plot=FALSE){
#>   if (is.null(weight)){ # require(ROCR)
#>     prob <- predict(fit, type = "response")
#>   pred <- prediction(as.vector(prob), outcome)
#>   perf <- performance(pred, "tpr", "fpr")
#>   auc <- performance(pred, measure = "auc")
#>   auc <- auc@y.values[[1]]
#>   if (plot == TRUE){
#>     roc.data <- data.frame(fpr = unlist(perf@x.values), tpr = unlist(perf@y.values), 
#>       model = "Logistic")
#>     with(data = roc.data,plot(fpr, tpr, type="l", xlim=c(0,1), ylim=c(0,1), lwd=1,
#>      xlab="1 - specificity", ylab="Sensitivity",
#>      main = paste("AUC = ", round(auc,3))))
#>   mtext("Unweighted ROC")
#>   abline(0,1, lty=2)
#>   }
#>   } else { # library(WeightedROC)
#>     outcome <- as.numeric(outcome)
#>   pred <- predict(fit, type = "response")
#>   tp.fp <- WeightedROC(pred, outcome, weight)
#>   auc <- WeightedAUC(tp.fp)
#>   if (plot == TRUE){
#>     with(data = tp.fp,plot(FPR, TPR, type="l", xlim=c(0,1), ylim=c(0,1), lwd=1,
#>      xlab="1 - specificity", ylab="Sensitivity",
#>      main = paste("AUC = ", round(auc,3))))
#>   abline(0,1, lty=2)
#>   mtext("Weighted ROC")
#>   }
#>   }
#>   return(auc)
#> }
#> <bytecode: 0x000002492d933ec0>
AL.gof3
#> function(fit=fit7, data = analytic2, weight = "weight", psu = "psu", 
#>                     strata= "strata"){
#>   r <- residuals(fit, type="response") 
#>   f<-fitted(fit) 
#>   breaks.g <- c(-Inf, quantile(f,  (1:9)/10), Inf)
#>   breaks.g <- breaks.g + seq_along(breaks.g) * .Machine$double.eps
#>   g<- cut(f, breaks.g)
#>   data2g <- cbind(data,r,g)
#>   if (is.null(psu)){
#>     newdesign <- svydesign(id=~1,
#>                          weights=as.formula(paste0("~",weight)), 
#>                         data=data2g, nest = TRUE)
#>   }
#>   if (!is.null(psu)) {
#>     newdesign <- svydesign(id=as.formula(paste0("~",psu)),
#>                          strata=as.formula(paste0("~",strata)),
#>                          weights=as.formula(paste0("~",weight)), 
#>                         data=data2g, nest = TRUE)
#>   }
#>   decilemodel<- svyglm(r~g, design=newdesign) 
#>   res <- regTermTest(decilemodel, ~g)
#>   return(as.numeric(res$p)) 
#> }
#> <bytecode: 0x000002492dae1c48>