load(file = "data/analytic17.RData")
25 Recoding cycle 10
Creating analytic dataset from 2017-18 cycle
25.1 Load downloaded dataset
25.2 Recoding
25.2.1 ID
<- nhanes17
dat2 $id <- dat2$SEQN dat2
25.2.2 Demographic
25.2.2.1 Age
$age <- dat2$RIDAGEYR
dat2$age.cat <- car::recode(dat2$age, " 0:19 = '<20'; 20:49 = '20-49'; 50:64 = '50-64';
dat2 65:80 = '65+'; else = NA ")
$age.cat <- factor(dat2$age.cat, levels = c("<20", "20-49", "50-64", "65+"))
dat2table(dat2$age.cat, useNA = "always")
#>
#> <20 20-49 50-64 65+ <NA>
#> 3685 2500 1569 1500 0
25.2.2.2 Sex
$sex <- dat2$RIAGENDR
dat2table(dat2$sex, useNA = "always")
#>
#> Male Female <NA>
#> 4557 4697 0
25.2.2.3 Education
$education <- dat2$DMDEDUC2
dat2$education <- as.factor(dat2$education)
dat2$education <- car::recode(dat2$education, recodes = " c('College graduate or above') =
dat2'College graduate or above'; c('Some college or AA degree', 'High school graduate/GED or equi') =
'High school'; c('Less than 9th grade', '9-11th grade (Includes 12th grad') =
'Less than high school'; else = NA ")
$education <- factor(dat2$education,
dat2levels = c("Less than high school", "High school",
"College graduate or above"))
table(dat2$education, useNA = "always")
#>
#> Less than high school High school College graduate or above
#> 1117 3103 1336
#> <NA>
#> 3698
25.2.2.4 Race/ethnicity
$race <- dat2$RIDRETH1
dat2$race <- car::recode(dat2$race, recodes = " 'Non-Hispanic White'='White';
dat2 'Non-Hispanic Black'='Black'; c('Mexican American',
'Other Hispanic')= 'Hispanic'; else='Others' ")
$race <- factor(dat2$race, levels = c("White", "Black", "Hispanic", "Others"))
dat2table(dat2$race, useNA = "always")
#>
#> White Black Hispanic Others <NA>
#> 3150 2115 2187 1802 0
25.2.2.5 Marital status
$marital <- dat2$DMDMARTL
dat2$marital <- car::recode(dat2$marital, recodes = " 'Never married'='Never married';
dat2c('Married', 'Living with partner') = 'Married/with partner';
c('Widowed', 'Divorced', 'Separated')='Other'; else=NA ")
$marital <- factor(dat2$marital, levels = c("Never married", "Married/with partner",
dat2"Other"))
table(dat2$marital, useNA = "always")
#>
#> Never married Married/with partner Other
#> 1006 3252 1305
#> <NA>
#> 3691
25.2.2.6 Income
$income <- dat2$INDHHIN2
dat2$income <- car::recode(dat2$income, recodes = " c('$ 0 to $ 4,999', '$ 5,000 to $ 9,999',
dat2'$10,000 to $14,999', '$15,000 to $19,999', 'Under $20,000')='less than $20,000';
c('Over $20,000','$20,000 and Over', '$20,000 to $24,999',
'$25,000 to $34,999', '$35,000 to $44,999', '$45,000 to $54,999',
'$55,000 to $64,999', '$65,000 to $74,999')='$20,000 to $74,999';
c('$75,000 to $99,999','$100,000 and Over')='$75,000 and Over';
else=NA ")
$income <- factor(dat2$income , levels=c("less than $20,000", "$20,000 to $74,999",
dat2"$75,000 and Over"))
table(dat2$income, useNA = "always")
#>
#> less than $20,000 $20,000 to $74,999 $75,000 and Over <NA>
#> 1589 4331 2453 881
25.2.2.7 Where born / citizenship
$born <- dat2$DMDBORN4
dat2$born <- car::recode(dat2$born, recodes = " 'Others'='Other place';
dat2 'Born in 50 US states or Washingt'= 'Born in US'; else=NA")
$born <- factor(dat2$born, levels = c("Born in US", "Other place"))
dat2table(dat2$born, useNA = "always")
#>
#> Born in US Other place <NA>
#> 7303 1948 3
25.2.2.8 Pregnancy
$pregnancy <- dat2$RIDEXPRG
dat2$pregnancy <- car::recode(dat2$pregnancy,
dat2recodes = " 'Yes, positive lab pregnancy test' = 'Yes';
'The participant was not pregnant' = 'No';
'Cannot ascertain if the particip' = 'inconclusive';
else= 'outside of target population' ")
table(dat2$pregnancy, useNA = "always")
#>
#> inconclusive No
#> 89 966
#> outside of target population Yes
#> 8144 55
#> <NA>
#> 0
25.2.3 BMI
25.2.3.1 BMI and Obesity
$bmi <- dat2$BMXBMI
dat2summary(dat2$bmi)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 12.30 20.40 25.80 26.58 31.30 86.20 1249
$obese <- ifelse(dat2$BMXBMI >= 30, "Yes", "No")
dat2$obese <- factor(dat2$obese, levels = c("No", "Yes"))
dat2table(dat2$obese, useNA = "always")
#>
#> No Yes <NA>
#> 5597 2408 1249
25.2.4 Diabetes
$diabetes <- dat2$DIQ010
dat2$diabetes <- car::recode(dat2$diabetes, " 'Yes'='Yes'; c('No','Borderline')='No';
dat2 else=NA ")
# Taking insulin now or diabetic pills to lower blood sugar - they have diabetes
$diabetes[dat2$DIQ050 == "Yes"] <- "Yes"
dat2$diabetes[dat2$DIQ070 == "Yes"] <- "Yes"
dat2table(dat2$diabetes, useNA = "always")
#>
#> No Yes <NA>
#> 7927 966 361
25.2.5 Family history of diabetes
table(dat2$DIQ175A, useNA = "always")
#>
#> Family history Don't know <NA>
#> 1143 2 8109
$diabetes.family.history <- dat2$DIQ175A
dat2$diabetes.family.history <- car::recode(dat2$diabetes.family.history, " 'Family history' = 'Yes';
dat2 else = 'No' ")
$diabetes.family.history <- factor(dat2$diabetes.family.history, levels = c("No", "Yes"))
dat2$diabetes.family.history[dat2$DIQ175A=="Don't know"] <- NA
dat2table(dat2$diabetes.family.history, useNA = "always")
#>
#> No Yes <NA>
#> 8109 1143 2
25.2.6 Smoking
$smoking <- dat2$SMQ020
dat2$smoking <- car::recode(dat2$smoking, " 'Yes' = 'Current smoker'; 'No' = 'Never smoker'; else=NA ")
dat2$smoking <- factor(dat2$smoking, levels = c("Never smoker", "Previous smoker", "Current smoker"))
dat2$smoking[dat2$SMQ040 == "Not at all"] <- "Previous smoker"
dat2table(dat2$smoking, useNA = "always")
#>
#> Never smoker Previous smoker Current smoker <NA>
#> 3497 1338 1021 3398
25.2.7 Diet
25.2.7.1 How healthy is the diet
$diet.healthy <- dat2$DBQ700
dat2$diet.healthy <- car::recode(dat2$diet.healthy, recodes = " c('Excellent', 'Very good')=
dat2 'Very good or excellent'; 'Good'='Good'; c('Fair', 'Poor')=
'Poor or fair'; else = NA ")
$diet.healthy <- factor(dat2$diet.healthy, levels = c("Poor or fair", "Good",
dat2"Very good or excellent"))
table(dat2$diet.healthy, useNA = "always")
#>
#> Poor or fair Good Very good or excellent
#> 2036 2411 1712
#> <NA>
#> 3095
25.2.8 Vigorous physical activity
$physical.activity <- dat2$PAQ605
dat2$physical.activity <- car::recode(dat2$physical.activity, recodes = " 'No' = 'No';
dat2 'Yes' = 'Yes'; else=NA")
$physical.activity <- factor(dat2$physical.activity, levels = c("No", "Yes"))
dat2table(dat2$physical.activity, useNA = "always")
#>
#> No Yes <NA>
#> 4461 1389 3404
25.2.9 Access to medical services
$medical.access <- dat2$HUQ030
dat2$medical.access <- car::recode(dat2$medical.access, recodes = " c('Yes',
dat2 'There is more than one place')='Yes'; 'There is no place'=
'No'; else=NA")
table(dat2$medical.access, useNA = "always")
#>
#> No Yes <NA>
#> 1398 7854 2
25.2.10 Hypertension/high blood pressure
25.2.10.1 Systolic BP
$systolic1 <- dat2$BPXSY1
dat2$systolic2 <- dat2$BPXSY2
dat2$systolic3 <- dat2$BPXSY3
dat2$systolic4 <- dat2$BPXSY4
dat2
<- dat2 %>%
dat2 mutate(systolicBP = rowMeans(dat2[, c("systolic1", "systolic2",
"systolic3", "systolic4")],
na.rm = TRUE))
summary(dat2$systolicBP)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 72.67 106.67 118.00 121.68 132.67 238.00 2537
25.2.10.2 Diastolic BP
$diastolic1 <- dat2$BPXDI1
dat2$diastolic2 <- dat2$BPXDI2
dat2$diastolic3 <- dat2$BPXDI3
dat2$diastolic4 <- dat2$BPXDI4
dat2<- dat2[, c("diastolic1", "diastolic2",
datX "diastolic3", "diastolic4")]
==0] <- NA
datX[datX $diastolicBP <- rowMeans(datX[, c("diastolic1", "diastolic2",
dat2"diastolic3", "diastolic4")],
na.rm = TRUE)
summary(dat2$diastolicBP)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 8.00 61.33 70.00 69.54 77.33 135.33 2618
25.2.11 Sleep (daily in hours)
$sleep <- dat2$SLD012
dat2summary(dat2$sleep)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 2.000 7.000 8.000 7.659 8.500 14.000 3141
25.2.12 Laboratory data
25.2.12.1 Uric acid (mg/dL)
$uric.acid <- dat2$LBXSUA
dat2summary(dat2$uric.acid)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 0.800 4.300 5.300 5.402 6.300 15.100 3353
25.2.12.2 Total protein (g/dL)
$protein.total <- dat2$LBXSTP
dat2summary(dat2$protein.total)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 5.300 6.900 7.200 7.166 7.400 10.000 3353
25.2.12.3 Total bilirubin (mg/dL)
$bilirubin.total <- dat2$LBXSTB
dat2summary(dat2$bilirubin.total)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 0.10 0.30 0.40 0.46 0.60 3.70 3351
25.2.12.4 Phosphorus (mg/dL)
$phosphorus <- dat2$LBXSPH
dat2summary(dat2$phosphorus)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 1.900 3.300 3.600 3.665 4.000 9.600 3353
25.2.12.5 Sodium (mmol/L)
$sodium <- dat2$LBXSNASI
dat2summary(dat2$sodium)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 121.0 138.0 140.0 140.3 142.0 151.0 3350
25.2.12.6 Potassium (mmol/L)
$potassium <- dat2$LBXSKSI
dat2summary(dat2$potassium)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 2.800 3.900 4.100 4.094 4.300 6.600 3355
25.2.12.7 Globulin (g/dL)
$globulin <- dat2$LBXSGB
dat2summary(dat2$globulin)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 1.800 2.800 3.100 3.087 3.300 6.000 3353
25.2.12.8 Total calcium (mg/dL)
$calcium.total <- dat2$LBXSCA
dat2summary(dat2$calcium.total)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 6.40 9.10 9.30 9.32 9.60 11.70 3353
25.2.12.9 High cholesterol
$high.cholesterol <- dat2$BPQ080
dat2$high.cholesterol <- car::recode(dat2$high.cholesterol, recodes = " 'Yes'='Yes';
dat2 'No'='No'; else = NA")
table(dat2$high.cholesterol, useNA = "always")
#>
#> No Yes <NA>
#> 4153 1968 3133
25.2.13 Survey features
25.2.13.1 Weight
$survey.weight <- dat2$WTINT2YR
dat2summary(dat2$survey.weight)
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 2571 13074 21099 34671 36923 433085
$survey.weight.mec <- dat2$WTMEC2YR
dat2summary(dat2$survey.weight.mec)
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 0 12347 21060 34671 37562 419763
25.2.13.2 PSU
$psu <- as.factor(dat2$SDMVPSU)
dat2table(dat2$psu)
#>
#> 1 2
#> 4464 4790
25.2.13.3 Strata
$strata <- as.factor(dat2$SDMVSTRA)
dat2table(dat2$strata)
#>
#> 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
#> 510 638 695 554 605 653 612 693 735 551 689 609 604 596 510
25.2.14 Survey year
$year <- dat2$SDDSRVYR
dat2table(dat2$year, useNA = "always")
#>
#> 10 <NA>
#> 9254 0
25.2.15 ICD-10-CM codes
colnames(rxq12) <- c("id", "icd10")
colnames(rxq22) <- c("id", "icd10")
colnames(rxq32) <- c("id", "icd10")
<- rbind(rxq12, rxq22, rxq32)
rx2017 <- rx2017[order(rx2017$id),]
rx2017
$icd10[rx2017$icd10 == "Unknown"] <- NA
rx2017$icd10[rx2017$icd10 == "Refused"] <- NA
rx2017$icd10[rx2017$icd10 == "Don't know"] <- NA
rx2017$icd10[rx2017$icd10 == ""] <- NA
rx2017$icd10.new <- substr(rx2017$icd10, start = 1, stop = 3)
rx2017
<- na.omit(rx2017) rx2017
25.3 Analytic data
25.3.1 Full dataset
<- dat2 nhanes17r
25.3.2 Analytic datset - adults 20 years of more
<- c(
vars # ID
"id",
# Demographic
"age", "age.cat", "sex", "education", "race",
"marital", "income", "born", "pregnancy",
# obesity
"obese",
# Diabetes
"diabetes", "diabetes.family.history",
# Smoking
"smoking",
# Diet
"diet.healthy",
# Physical activity
"physical.activity",
# Access to routine healthcare
"medical.access",
# Blood pressure and Hypertension
"systolicBP", "diastolicBP",
# Sleep
"sleep",
# Laboratory
"uric.acid", "protein.total", "bilirubin.total", "phosphorus",
"sodium", "potassium", "globulin", "calcium.total",
"high.cholesterol",
# Survey features
"survey.weight", "survey.weight.mec", "psu", "strata",
# Survey year
"year"
)
<- nhanes17r[, vars] nhanes17r.sel
# Adults 20 years of more and not pregnant
dim(nhanes17r.sel)
#> [1] 9254 34
<- subset(nhanes17r.sel, age >= 20 &
analytic17 != 'yes')
pregnancy dim(analytic17)
#> [1] 5569 34
25.3.3 Save dataset for later use
dim(analytic17)
#> [1] 5569 34
dim(rx2017)
#> [1] 15025 3
save(analytic17, rx2017, file = "data/analytic17recoded.RData")