Performance with NA
This is a tutorial of how to estimate model performance while analyzing survey data with missing values (for predictive goals).
Useful functions
The functions below could be helpful in
-
svyROCw3
: calculating area under the ROC curve (AUC) value with survey data with logistic regression
-
AL.gof3
: testing Archer-Lemeshow goodness of fit for survey data with logistic regression
knitr::opts_chunk$set(echo = TRUE)
# ROC curve for Survey Data with Logistic Regression
svyROCw3 <- function(fit=fit7,outcome=analytic2$CVD=="event", weight = NULL,plot=FALSE){
if (is.null(weight)){ # require(ROCR)
prob <- predict(fit, type = "response")
pred <- prediction(as.vector(prob), outcome)
perf <- performance(pred, "tpr", "fpr")
auc <- performance(pred, measure = "auc")
auc <- auc@y.values[[1]]
if (plot == TRUE){
roc.data <- data.frame(fpr = unlist(perf@x.values), tpr = unlist(perf@y.values),
model = "Logistic")
with(data = roc.data,plot(fpr, tpr, type="l", xlim=c(0,1), ylim=c(0,1), lwd=1,
xlab="1 - specificity", ylab="Sensitivity",
main = paste("AUC = ", round(auc,3))))
mtext("Unweighted ROC")
abline(0,1, lty=2)
}
} else { # library(WeightedROC)
outcome <- as.numeric(outcome)
pred <- predict(fit, type = "response")
tp.fp <- WeightedROC(pred, outcome, weight)
auc <- WeightedAUC(tp.fp)
if (plot == TRUE){
with(data = tp.fp,plot(FPR, TPR, type="l", xlim=c(0,1), ylim=c(0,1), lwd=1,
xlab="1 - specificity", ylab="Sensitivity",
main = paste("AUC = ", round(auc,3))))
abline(0,1, lty=2)
mtext("Weighted ROC")
}
}
return(auc)
}
# Archer-Lemeshow Goodness of Fit Test for Survey Data with Logistic Regression
AL.gof3 <- function(fit=fit7, data = analytic2, weight = "weight", psu = "psu",
strata= "strata"){
r <- residuals(fit, type="response")
f<-fitted(fit)
breaks.g <- c(-Inf, quantile(f, (1:9)/10), Inf)
breaks.g <- breaks.g + seq_along(breaks.g) * .Machine$double.eps
g<- cut(f, breaks.g)
data2g <- cbind(data,r,g)
if (is.null(psu)){
newdesign <- svydesign(id=~1,
weights=as.formula(paste0("~",weight)),
data=data2g, nest = TRUE)
}
if (!is.null(psu)) {
newdesign <- svydesign(id=as.formula(paste0("~",psu)),
strata=as.formula(paste0("~",strata)),
weights=as.formula(paste0("~",weight)),
data=data2g, nest = TRUE)
}
decilemodel<- svyglm(r~g, design=newdesign)
res <- regTermTest(decilemodel, ~g)
return(as.numeric(res$p))
}
Load imputed 5 sets of data
Saved at the end of Lab 6 part 2.
# Saved from last lab
load("Data/missingdata/missOA123CVDnorth.RData")
str(allImputations)
#> List of 2
#> $ imputations:List of 5
#> ..$ :'data.frame': 135448 obs. of 19 variables:
#> .. ..$ .imp : int [1:135448] 1 1 1 1 1 1 1 1 1 1 ...
#> .. ..$ .id : int [1:135448] 1 2 3 4 5 6 7 8 9 10 ...
#> .. ..$ CVD : Factor w/ 2 levels "0 event","event": 1 1 1 1 1 1 1 1 1 1 ...
#> .. ..$ age : Factor w/ 4 levels "20-39 years",..: 1 2 1 2 3 1 3 2 1 2 ...
#> .. ..$ sex : Factor w/ 2 levels "Female","Male": 2 2 1 2 2 2 1 1 1 1 ...
#> .. ..$ income : Factor w/ 4 levels "$29,999 or less",..: 4 1 3 4 2 2 1 4 4 3 ...
#> .. ..$ race : Factor w/ 2 levels "Non-white","White": 2 2 2 2 2 2 2 2 2 2 ...
#> .. ..$ bmicat : Factor w/ 3 levels "Normal","Overweight",..: 1 2 1 1 2 1 2 2 3 2 ...
#> .. ..$ phyact : Factor w/ 3 levels "Active","Inactive",..: 2 1 2 2 3 3 2 1 2 2 ...
#> .. ..$ smoke : Factor w/ 3 levels "Current smoker",..: 2 2 2 2 2 3 2 2 3 3 ...
#> .. ..$ fruit : Factor w/ 3 levels "0-3 daily serving",..: 2 2 2 2 2 1 2 3 3 2 ...
#> .. ..$ painmed: Factor w/ 2 levels "No","Yes": 2 1 2 1 1 2 2 2 2 2 ...
#> .. ..$ ht : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 1 1 1 ...
#> .. ..$ copd : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
#> .. ..$ diab : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
#> .. ..$ edu : Factor w/ 4 levels "< 2ndary","2nd grad.",..: 4 4 4 4 2 4 1 4 4 4 ...
#> .. ..$ weight : num [1:135448] 56.1 56.1 39.2 44 59.9 ...
#> .. ..$ OA : Factor w/ 2 levels "Control","OA": 1 1 1 1 1 1 1 1 1 1 ...
#> .. ..$ ID : int [1:135448] 1 3 6 7 12 13 14 16 20 21 ...
#> ..$ :'data.frame': 135448 obs. of 19 variables:
#> .. ..$ .imp : int [1:135448] 2 2 2 2 2 2 2 2 2 2 ...
#> .. ..$ .id : int [1:135448] 1 2 3 4 5 6 7 8 9 10 ...
#> .. ..$ CVD : Factor w/ 2 levels "0 event","event": 1 1 1 1 1 1 1 1 1 1 ...
#> .. ..$ age : Factor w/ 4 levels "20-39 years",..: 1 2 1 2 3 1 3 2 1 2 ...
#> .. ..$ sex : Factor w/ 2 levels "Female","Male": 2 2 1 2 2 2 1 1 1 1 ...
#> .. ..$ income : Factor w/ 4 levels "$29,999 or less",..: 4 1 3 4 2 2 1 4 4 3 ...
#> .. ..$ race : Factor w/ 2 levels "Non-white","White": 2 2 2 2 2 2 2 2 2 2 ...
#> .. ..$ bmicat : Factor w/ 3 levels "Normal","Overweight",..: 1 2 1 1 2 1 2 2 3 2 ...
#> .. ..$ phyact : Factor w/ 3 levels "Active","Inactive",..: 2 1 2 2 3 3 2 1 2 2 ...
#> .. ..$ smoke : Factor w/ 3 levels "Current smoker",..: 2 2 2 2 2 3 2 2 3 3 ...
#> .. ..$ fruit : Factor w/ 3 levels "0-3 daily serving",..: 2 2 2 2 2 1 2 3 3 2 ...
#> .. ..$ painmed: Factor w/ 2 levels "No","Yes": 2 2 2 2 2 1 2 2 2 2 ...
#> .. ..$ ht : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 1 1 1 ...
#> .. ..$ copd : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
#> .. ..$ diab : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
#> .. ..$ edu : Factor w/ 4 levels "< 2ndary","2nd grad.",..: 4 4 4 4 2 4 1 4 4 4 ...
#> .. ..$ weight : num [1:135448] 56.1 56.1 39.2 44 59.9 ...
#> .. ..$ OA : Factor w/ 2 levels "Control","OA": 1 1 1 1 1 1 1 1 1 1 ...
#> .. ..$ ID : int [1:135448] 1 3 6 7 12 13 14 16 20 21 ...
#> ..$ :'data.frame': 135448 obs. of 19 variables:
#> .. ..$ .imp : int [1:135448] 3 3 3 3 3 3 3 3 3 3 ...
#> .. ..$ .id : int [1:135448] 1 2 3 4 5 6 7 8 9 10 ...
#> .. ..$ CVD : Factor w/ 2 levels "0 event","event": 1 1 1 1 1 1 1 1 1 1 ...
#> .. ..$ age : Factor w/ 4 levels "20-39 years",..: 1 2 1 2 3 1 3 2 1 2 ...
#> .. ..$ sex : Factor w/ 2 levels "Female","Male": 2 2 1 2 2 2 1 1 1 1 ...
#> .. ..$ income : Factor w/ 4 levels "$29,999 or less",..: 4 1 3 4 2 2 1 4 4 3 ...
#> .. ..$ race : Factor w/ 2 levels "Non-white","White": 2 2 2 2 2 2 2 2 2 2 ...
#> .. ..$ bmicat : Factor w/ 3 levels "Normal","Overweight",..: 1 2 1 1 2 1 2 2 3 2 ...
#> .. ..$ phyact : Factor w/ 3 levels "Active","Inactive",..: 2 1 2 2 3 3 2 1 2 2 ...
#> .. ..$ smoke : Factor w/ 3 levels "Current smoker",..: 2 2 2 2 2 3 2 2 3 3 ...
#> .. ..$ fruit : Factor w/ 3 levels "0-3 daily serving",..: 2 2 2 2 2 1 2 3 3 2 ...
#> .. ..$ painmed: Factor w/ 2 levels "No","Yes": 2 2 2 1 2 2 2 2 2 2 ...
#> .. ..$ ht : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 1 1 1 ...
#> .. ..$ copd : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
#> .. ..$ diab : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
#> .. ..$ edu : Factor w/ 4 levels "< 2ndary","2nd grad.",..: 4 4 4 4 2 4 1 4 4 4 ...
#> .. ..$ weight : num [1:135448] 56.1 56.1 39.2 44 59.9 ...
#> .. ..$ OA : Factor w/ 2 levels "Control","OA": 1 1 1 1 1 1 1 1 1 1 ...
#> .. ..$ ID : int [1:135448] 1 3 6 7 12 13 14 16 20 21 ...
#> ..$ :'data.frame': 135448 obs. of 19 variables:
#> .. ..$ .imp : int [1:135448] 4 4 4 4 4 4 4 4 4 4 ...
#> .. ..$ .id : int [1:135448] 1 2 3 4 5 6 7 8 9 10 ...
#> .. ..$ CVD : Factor w/ 2 levels "0 event","event": 1 1 1 1 1 1 1 1 1 1 ...
#> .. ..$ age : Factor w/ 4 levels "20-39 years",..: 1 2 1 2 3 1 3 2 1 2 ...
#> .. ..$ sex : Factor w/ 2 levels "Female","Male": 2 2 1 2 2 2 1 1 1 1 ...
#> .. ..$ income : Factor w/ 4 levels "$29,999 or less",..: 4 1 3 4 2 2 1 4 4 3 ...
#> .. ..$ race : Factor w/ 2 levels "Non-white","White": 2 2 2 2 2 2 2 2 2 2 ...
#> .. ..$ bmicat : Factor w/ 3 levels "Normal","Overweight",..: 1 2 1 1 2 1 2 2 3 2 ...
#> .. ..$ phyact : Factor w/ 3 levels "Active","Inactive",..: 2 1 2 2 3 3 2 1 2 2 ...
#> .. ..$ smoke : Factor w/ 3 levels "Current smoker",..: 2 2 2 2 2 3 2 2 3 3 ...
#> .. ..$ fruit : Factor w/ 3 levels "0-3 daily serving",..: 2 2 2 2 2 1 2 3 3 2 ...
#> .. ..$ painmed: Factor w/ 2 levels "No","Yes": 2 2 2 2 1 1 2 1 2 2 ...
#> .. ..$ ht : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 1 1 1 ...
#> .. ..$ copd : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
#> .. ..$ diab : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
#> .. ..$ edu : Factor w/ 4 levels "< 2ndary","2nd grad.",..: 4 4 4 4 2 4 1 4 4 4 ...
#> .. ..$ weight : num [1:135448] 56.1 56.1 39.2 44 59.9 ...
#> .. ..$ OA : Factor w/ 2 levels "Control","OA": 1 1 1 1 1 1 1 1 1 1 ...
#> .. ..$ ID : int [1:135448] 1 3 6 7 12 13 14 16 20 21 ...
#> ..$ :'data.frame': 135448 obs. of 19 variables:
#> .. ..$ .imp : int [1:135448] 5 5 5 5 5 5 5 5 5 5 ...
#> .. ..$ .id : int [1:135448] 1 2 3 4 5 6 7 8 9 10 ...
#> .. ..$ CVD : Factor w/ 2 levels "0 event","event": 1 1 1 1 1 1 1 1 1 1 ...
#> .. ..$ age : Factor w/ 4 levels "20-39 years",..: 1 2 1 2 3 1 3 2 1 2 ...
#> .. ..$ sex : Factor w/ 2 levels "Female","Male": 2 2 1 2 2 2 1 1 1 1 ...
#> .. ..$ income : Factor w/ 4 levels "$29,999 or less",..: 4 1 3 4 2 2 1 4 4 3 ...
#> .. ..$ race : Factor w/ 2 levels "Non-white","White": 2 2 2 2 2 2 2 2 2 2 ...
#> .. ..$ bmicat : Factor w/ 3 levels "Normal","Overweight",..: 1 2 1 1 2 1 2 2 3 2 ...
#> .. ..$ phyact : Factor w/ 3 levels "Active","Inactive",..: 2 1 2 2 3 3 2 1 2 2 ...
#> .. ..$ smoke : Factor w/ 3 levels "Current smoker",..: 2 2 2 2 2 3 2 2 3 3 ...
#> .. ..$ fruit : Factor w/ 3 levels "0-3 daily serving",..: 2 2 2 2 2 1 2 3 3 2 ...
#> .. ..$ painmed: Factor w/ 2 levels "No","Yes": 1 1 2 2 2 2 1 2 2 2 ...
#> .. ..$ ht : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 2 1 1 1 ...
#> .. ..$ copd : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
#> .. ..$ diab : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
#> .. ..$ edu : Factor w/ 4 levels "< 2ndary","2nd grad.",..: 4 4 4 4 2 4 1 4 4 4 ...
#> .. ..$ weight : num [1:135448] 56.1 56.1 39.2 44 59.9 ...
#> .. ..$ OA : Factor w/ 2 levels "Control","OA": 1 1 1 1 1 1 1 1 1 1 ...
#> .. ..$ ID : int [1:135448] 1 3 6 7 12 13 14 16 20 21 ...
#> $ call : language imputationList(list(subset(impdata, subset = .imp == 1), subset(impdata, subset = .imp == 2), subset(impdata| __truncated__ ...
#> - attr(*, "class")= chr "imputationList"
Estimating treatment effect
Individual beta estimates
library(survey)
w.design <- svydesign(ids=~1, weights=~weight,
data = allImputations)
model.formula <- as.formula("I(CVD=='event') ~ OA + age + sex +
income + race + painmed + ht +
copd + diab + OA:painmed +
age:copd + sex:copd")
estimates <- with(w.design, svyglm(model.formula, family=quasibinomial))
estimates
#> [[1]]
#> Independent Sampling design (with replacement)
#> svydesign(ids = ids, probs = probs, strata = strata, variables = variables,
#> fpc = fpc, nest = nest, check.strata = check.strata, weights = weights,
#> data = d, pps = pps, ...)
#>
#> Call: svyglm(formula = model.formula, design = .design, family = quasibinomial)
#>
#> Coefficients:
#> (Intercept) OAOA age40-49 years
#> -5.6809 1.1063 0.7911
#> age50-59 years age60-64 years sexMale
#> 1.6233 2.0076 0.6278
#> income$30,000-$49,999 income$50,000-$79,999 income$80,000 or more
#> -0.4347 -0.6031 -0.6805
#> raceWhite painmedYes htYes
#> 0.2136 0.8277 1.0344
#> copdYes diabYes OAOA:painmedYes
#> 1.9143 0.8166 -0.8183
#> age40-49 years:copdYes age50-59 years:copdYes age60-64 years:copdYes
#> -1.1404 -0.7286 -0.9360
#> sexMale:copdYes
#> 0.6167
#>
#> Degrees of Freedom: 135447 Total (i.e. Null); 135429 Residual
#> Null Deviance: 36680
#> Residual Deviance: 31100 AIC: NA
#>
#> [[2]]
#> Independent Sampling design (with replacement)
#> svydesign(ids = ids, probs = probs, strata = strata, variables = variables,
#> fpc = fpc, nest = nest, check.strata = check.strata, weights = weights,
#> data = d, pps = pps, ...)
#>
#> Call: svyglm(formula = model.formula, design = .design, family = quasibinomial)
#>
#> Coefficients:
#> (Intercept) OAOA age40-49 years
#> -5.4837 0.8766 0.7781
#> age50-59 years age60-64 years sexMale
#> 1.6090 1.9901 0.6083
#> income$30,000-$49,999 income$50,000-$79,999 income$80,000 or more
#> -0.4415 -0.5969 -0.6840
#> raceWhite painmedYes htYes
#> 0.2374 0.5877 1.0413
#> copdYes diabYes OAOA:painmedYes
#> 1.8837 0.8054 -0.5388
#> age40-49 years:copdYes age50-59 years:copdYes age60-64 years:copdYes
#> -1.1303 -0.6944 -0.8545
#> sexMale:copdYes
#> 0.5857
#>
#> Degrees of Freedom: 135447 Total (i.e. Null); 135429 Residual
#> Null Deviance: 36680
#> Residual Deviance: 31260 AIC: NA
#>
#> [[3]]
#> Independent Sampling design (with replacement)
#> svydesign(ids = ids, probs = probs, strata = strata, variables = variables,
#> fpc = fpc, nest = nest, check.strata = check.strata, weights = weights,
#> data = d, pps = pps, ...)
#>
#> Call: svyglm(formula = model.formula, design = .design, family = quasibinomial)
#>
#> Coefficients:
#> (Intercept) OAOA age40-49 years
#> -5.6365 1.0389 0.7815
#> age50-59 years age60-64 years sexMale
#> 1.6140 1.9986 0.6180
#> income$30,000-$49,999 income$50,000-$79,999 income$80,000 or more
#> -0.4334 -0.5945 -0.6843
#> raceWhite painmedYes htYes
#> 0.2042 0.7963 1.0386
#> copdYes diabYes OAOA:painmedYes
#> 1.9468 0.8020 -0.7330
#> age40-49 years:copdYes age50-59 years:copdYes age60-64 years:copdYes
#> -1.1185 -0.7775 -0.9414
#> sexMale:copdYes
#> 0.5548
#>
#> Degrees of Freedom: 135447 Total (i.e. Null); 135429 Residual
#> Null Deviance: 36680
#> Residual Deviance: 31130 AIC: NA
#>
#> [[4]]
#> Independent Sampling design (with replacement)
#> svydesign(ids = ids, probs = probs, strata = strata, variables = variables,
#> fpc = fpc, nest = nest, check.strata = check.strata, weights = weights,
#> data = d, pps = pps, ...)
#>
#> Call: svyglm(formula = model.formula, design = .design, family = quasibinomial)
#>
#> Coefficients:
#> (Intercept) OAOA age40-49 years
#> -5.6811 1.2937 0.7838
#> age50-59 years age60-64 years sexMale
#> 1.6248 2.0097 0.6285
#> income$30,000-$49,999 income$50,000-$79,999 income$80,000 or more
#> -0.4431 -0.6012 -0.6877
#> raceWhite painmedYes htYes
#> 0.2325 0.8179 1.0401
#> copdYes diabYes OAOA:painmedYes
#> 1.9368 0.8022 -1.0624
#> age40-49 years:copdYes age50-59 years:copdYes age60-64 years:copdYes
#> -1.0381 -0.7407 -0.9356
#> sexMale:copdYes
#> 0.5422
#>
#> Degrees of Freedom: 135447 Total (i.e. Null); 135429 Residual
#> Null Deviance: 36680
#> Residual Deviance: 31100 AIC: NA
#>
#> [[5]]
#> Independent Sampling design (with replacement)
#> svydesign(ids = ids, probs = probs, strata = strata, variables = variables,
#> fpc = fpc, nest = nest, check.strata = check.strata, weights = weights,
#> data = d, pps = pps, ...)
#>
#> Call: svyglm(formula = model.formula, design = .design, family = quasibinomial)
#>
#> Coefficients:
#> (Intercept) OAOA age40-49 years
#> -5.4487 0.7569 0.7752
#> age50-59 years age60-64 years sexMale
#> 1.6021 1.9847 0.6107
#> income$30,000-$49,999 income$50,000-$79,999 income$80,000 or more
#> -0.4393 -0.5977 -0.6788
#> raceWhite painmedYes htYes
#> 0.2351 0.5457 1.0421
#> copdYes diabYes OAOA:painmedYes
#> 1.9235 0.8057 -0.3884
#> age40-49 years:copdYes age50-59 years:copdYes age60-64 years:copdYes
#> -1.1275 -0.7370 -0.9152
#> sexMale:copdYes
#> 0.5871
#>
#> Degrees of Freedom: 135447 Total (i.e. Null); 135429 Residual
#> Null Deviance: 36680
#> Residual Deviance: 31290 AIC: NA
#>
#> attr(,"call")
#> with(w.design, svyglm(model.formula, family = quasibinomial))
Pooled / averaged estimates for beta and OR
library("mitools")
pooled.estimates <- MIcombine(estimates)
pooled.estimates
#> Multiple imputation results:
#> with(w.design, svyglm(model.formula, family = quasibinomial))
#> MIcombine.default(estimates)
#> results se
#> (Intercept) -5.5861842 0.19239878
#> OAOA 1.0144758 0.27097789
#> age40-49 years 0.7819429 0.10165218
#> age50-59 years 1.6146502 0.09905167
#> age60-64 years 1.9981358 0.10380099
#> sexMale 0.6186653 0.05484262
#> income$30,000-$49,999 -0.4383959 0.06787453
#> income$50,000-$79,999 -0.5987066 0.06593903
#> income$80,000 or more -0.6830429 0.06907307
#> raceWhite 0.2245784 0.10567063
#> painmedYes 0.7150386 0.16508180
#> htYes 1.0393147 0.05522682
#> copdYes 1.9210219 0.65240500
#> diabYes 0.8063590 0.08169615
#> OAOA:painmedYes -0.7081707 0.32575000
#> age40-49 years:copdYes -1.1109564 0.69441469
#> age50-59 years:copdYes -0.7356335 0.67352566
#> age60-64 years:copdYes -0.9165474 0.65626790
#> sexMale:copdYes 0.5772949 0.30888502
sum.pooled <- summary(pooled.estimates,logeffect=TRUE, digits = 2)
#> Multiple imputation results:
#> with(w.design, svyglm(model.formula, family = quasibinomial))
#> MIcombine.default(estimates)
#> results se (lower upper) missInfo
#> (Intercept) 0.0037 0.00072 0.0025 0.0056 45 %
#> OAOA 2.7579 0.74733 1.4778 5.1469 76 %
#> age40-49 years 2.1857 0.22218 1.7909 2.6676 0 %
#> age50-59 years 5.0261 0.49785 4.1392 6.1031 1 %
#> age60-64 years 7.3753 0.76556 6.0175 9.0394 1 %
#> sexMale 1.8564 0.10181 1.6672 2.0672 4 %
#> income$30,000-$49,999 0.6451 0.04378 0.5647 0.7369 0 %
#> income$50,000-$79,999 0.5495 0.03623 0.4829 0.6253 0 %
#> income$80,000 or more 0.5051 0.03489 0.4411 0.5783 0 %
#> raceWhite 1.2518 0.13228 1.0176 1.5399 2 %
#> painmedYes 2.0443 0.33747 1.3628 3.0664 86 %
#> htYes 2.8273 0.15614 2.5372 3.1505 0 %
#> copdYes 6.8279 4.45458 1.9009 24.5255 0 %
#> diabYes 2.2397 0.18298 1.9083 2.6287 1 %
#> OAOA:painmedYes 0.4925 0.16045 0.2275 1.0663 81 %
#> age40-49 years:copdYes 0.3292 0.22863 0.0844 1.2841 0 %
#> age50-59 years:copdYes 0.4792 0.32275 0.1280 1.7940 0 %
#> age60-64 years:copdYes 0.3999 0.26244 0.1105 1.4473 0 %
#> sexMale:copdYes 1.7812 0.55019 0.9723 3.2632 1 %
sum.pooled
Estimating model performance (AUC and AL)
Individual AUC estimates (with interactions)
library(ROCR)
library(WeightedROC)
model.formula <- as.formula("I(CVD=='event') ~ OA + age + sex +
income + race + painmed + ht +
copd + diab + OA:painmed +
age:copd + sex:copd")
AL.scalar <- AUC.scalar <- vector("list", 5)
for (i in 1:5) {
analytic.i <- allImputations$imputations[[i]]
w.design <- svydesign(id=~1, weights=~weight,
data=analytic.i)
model.fit <- svyglm(model.formula, design=w.design, family=quasibinomial)
auc <- svyROCw3(fit=model.fit,outcome=w.design$variables$CVD=='event',
weight = w.design$variables$weight, plot = FALSE)
AL <- AL.gof3(fit=model.fit, data = analytic.i,
weight = "weight",
psu = NULL,
strata= NULL)
AL.scalar[[i]] <- AL
AUC.scalar[[i]] <- auc
cat("AUC calculated for data", i, "\n")
}
#> AUC calculated for data 1
#> AUC calculated for data 2
#> AUC calculated for data 3
#> AUC calculated for data 4
#> AUC calculated for data 5
str(AUC.scalar)
#> List of 5
#> $ : num 0.8
#> $ : num 0.795
#> $ : num 0.798
#> $ : num 0.8
#> $ : num 0.794
AL.scalar
#> [[1]]
#> [1] 0.01243738
#>
#> [[2]]
#> [1] 0.5471927
#>
#> [[3]]
#> [1] 0.13081
#>
#> [[4]]
#> [1] 0.644286
#>
#> [[5]]
#> [1] 0.6049137
Averaged estimates for AUC (with interactions)
Model performance without interactions
Individual AUC estimates / AL p-values
library(ROCR)
library(WeightedROC)
AL.scalar <- AUC.scalar <- vector("list", 5)
model.formula <- as.formula("I(CVD=='event') ~ OA + age + sex +
income + race + painmed + ht +
copd + diab")
for (i in 1:5) {
analytic.i <- allImputations$imputations[[i]]
w.design <- svydesign(id=~1, weights=~weight,
data=analytic.i)
model.fit <- svyglm(model.formula, design=w.design, family=quasibinomial)
auc <- svyROCw3(fit=model.fit,outcome=w.design$variables$CVD=='event',
weight = w.design$variables$weight, plot = FALSE)
AL <- AL.gof3(fit=model.fit, data = analytic.i,
weight = "weight",
psu = NULL,
strata= NULL)
AL.scalar[[i]] <- AL
AUC.scalar[[i]] <- auc
cat("AUC calculated for data", i, "\n")
}
#> AUC calculated for data 1
#> AUC calculated for data 2
#> AUC calculated for data 3
#> AUC calculated for data 4
#> AUC calculated for data 5
str(AUC.scalar)
#> List of 5
#> $ : num 0.798
#> $ : num 0.794
#> $ : num 0.797
#> $ : num 0.798
#> $ : num 0.794
AL.scalar
#> [[1]]
#> [1] 0.01839624
#>
#> [[2]]
#> [1] 0.2836256
#>
#> [[3]]
#> [1] 0.02439659
#>
#> [[4]]
#> [1] 0.8333214
#>
#> [[5]]
#> [1] 0.2050434
Averaged estimates for AUC / majority of AL p-values
Appendix
User-written svyROCw3
and AL.gof3
functions
svyROCw3
#> function(fit=fit7,outcome=analytic2$CVD=="event", weight = NULL,plot=FALSE){
#> if (is.null(weight)){ # require(ROCR)
#> prob <- predict(fit, type = "response")
#> pred <- prediction(as.vector(prob), outcome)
#> perf <- performance(pred, "tpr", "fpr")
#> auc <- performance(pred, measure = "auc")
#> auc <- auc@y.values[[1]]
#> if (plot == TRUE){
#> roc.data <- data.frame(fpr = unlist(perf@x.values), tpr = unlist(perf@y.values),
#> model = "Logistic")
#> with(data = roc.data,plot(fpr, tpr, type="l", xlim=c(0,1), ylim=c(0,1), lwd=1,
#> xlab="1 - specificity", ylab="Sensitivity",
#> main = paste("AUC = ", round(auc,3))))
#> mtext("Unweighted ROC")
#> abline(0,1, lty=2)
#> }
#> } else { # library(WeightedROC)
#> outcome <- as.numeric(outcome)
#> pred <- predict(fit, type = "response")
#> tp.fp <- WeightedROC(pred, outcome, weight)
#> auc <- WeightedAUC(tp.fp)
#> if (plot == TRUE){
#> with(data = tp.fp,plot(FPR, TPR, type="l", xlim=c(0,1), ylim=c(0,1), lwd=1,
#> xlab="1 - specificity", ylab="Sensitivity",
#> main = paste("AUC = ", round(auc,3))))
#> abline(0,1, lty=2)
#> mtext("Weighted ROC")
#> }
#> }
#> return(auc)
#> }
#> <bytecode: 0x00000224e3ffdcb8>
AL.gof3
#> function(fit=fit7, data = analytic2, weight = "weight", psu = "psu",
#> strata= "strata"){
#> r <- residuals(fit, type="response")
#> f<-fitted(fit)
#> breaks.g <- c(-Inf, quantile(f, (1:9)/10), Inf)
#> breaks.g <- breaks.g + seq_along(breaks.g) * .Machine$double.eps
#> g<- cut(f, breaks.g)
#> data2g <- cbind(data,r,g)
#> if (is.null(psu)){
#> newdesign <- svydesign(id=~1,
#> weights=as.formula(paste0("~",weight)),
#> data=data2g, nest = TRUE)
#> }
#> if (!is.null(psu)) {
#> newdesign <- svydesign(id=as.formula(paste0("~",psu)),
#> strata=as.formula(paste0("~",strata)),
#> weights=as.formula(paste0("~",weight)),
#> data=data2g, nest = TRUE)
#> }
#> decilemodel<- svyglm(r~g, design=newdesign)
#> res <- regTermTest(decilemodel, ~g)
#> return(as.numeric(res$p))
#> }
#> <bytecode: 0x00000224e41a6d68>