load(file = "data/analytic13.RData")
23 Recoding cycle 8
Creating analytic dataset from 2013-14 cycle
23.1 Load downloaded dataset
23.2 Recoding
23.2.1 ID
<- nhanes13
dat2 $id <- dat2$SEQN dat2
23.2.2 Demographic
23.2.2.1 Age
$age <- dat2$RIDAGEYR
dat2$age.cat <- car::recode(dat2$age, " 0:19 = '<20'; 20:49 = '20-49'; 50:64 = '50-64';
dat2 65:80 = '65+'; else = NA ")
$age.cat <- factor(dat2$age.cat, levels = c("<20", "20-49", "50-64", "65+"))
dat2table(dat2$age.cat, useNA = "always")
#>
#> <20 20-49 50-64 65+ <NA>
#> 4406 2989 1474 1306 0
23.2.2.2 Sex
$sex <- dat2$RIAGENDR
dat2table(dat2$sex, useNA = "always")
#>
#> Male Female <NA>
#> 5003 5172 0
23.2.2.3 Education
$education <- dat2$DMDEDUC2
dat2$education <- as.factor(dat2$education)
dat2$education <- car::recode(dat2$education, recodes = " c('College graduate or above') =
dat2'College graduate or above'; c('Some college or AA degree', 'High school graduate/GED or equi') =
'High school'; c('Less than 9th grade', '9-11th grade (Includes 12th grad') =
'Less than high school'; else = NA ")
$education <- factor(dat2$education,
dat2levels = c("Less than high school", "High school",
"College graduate or above"))
table(dat2$education, useNA = "always")
#>
#> Less than high school High school College graduate or above
#> 1246 3073 1443
#> <NA>
#> 4413
23.2.2.4 Race/ethnicity
$race <- dat2$RIDRETH1
dat2$race <- car::recode(dat2$race, recodes = " 'Non-Hispanic White'='White';
dat2 'Non-Hispanic Black'='Black'; c('Mexican American',
'Other Hispanic')= 'Hispanic'; else='Others' ")
$race <- factor(dat2$race, levels = c("White", "Black", "Hispanic", "Others"))
dat2table(dat2$race, useNA = "always")
#>
#> White Black Hispanic Others <NA>
#> 3674 2267 2690 1544 0
23.2.2.5 Marital status
$marital <- dat2$DMDMARTL
dat2$marital <- car::recode(dat2$marital, recodes = " 'Never married'='Never married';
dat2c('Married', 'Living with partner') = 'Married/with partner';
c('Widowed', 'Divorced', 'Separated')='Other'; else=NA ")
$marital <- factor(dat2$marital, levels = c("Never married", "Married/with partner",
dat2"Other"))
table(dat2$marital, useNA = "always")
#>
#> Never married Married/with partner Other
#> 1112 3382 1272
#> <NA>
#> 4409
23.2.2.6 Income
$income <- dat2$INDHHIN2
dat2$income <- car::recode(dat2$income, recodes = " c('$ 0 to $ 4,999', '$ 5,000 to $ 9,999',
dat2'$10,000 to $14,999', '$15,000 to $19,999', 'Under $20,000')='less than $20,000';
c('Over $20,000','$20,000 and Over', '$20,000 to $24,999',
'$25,000 to $34,999', '$35,000 to $44,999', '$45,000 to $54,999',
'$55,000 to $64,999', '$65,000 to $74,999')='$20,000 to $74,999';
c('$75,000 to $99,999','$100,000 and Over')='$75,000 and Over';
else=NA ")
$income <- factor(dat2$income , levels=c("less than $20,000", "$20,000 to $74,999",
dat2"$75,000 and Over"))
table(dat2$income, useNA = "always")
#>
#> less than $20,000 $20,000 to $74,999 $75,000 and Over <NA>
#> 2110 4964 2641 460
23.2.2.7 Where born / citizenship
$born <- dat2$DMDBORN4
dat2$born <- car::recode(dat2$born, recodes = " 'Others'='Other place';
dat2 'Born in 50 US states or Washingt'= 'Born in US'; else=NA")
$born <- factor(dat2$born, levels = c("Born in US", "Other place"))
dat2table(dat2$born, useNA = "always")
#>
#> Born in US Other place <NA>
#> 8262 1908 5
23.2.2.8 Pregnancy
$pregnancy <- dat2$RIDEXPRG
dat2$pregnancy <- car::recode(dat2$pregnancy,
dat2recodes = " 'Yes, positive lab pregnancy test' = 'Yes';
'The participant was not pregnant' = 'No';
'Cannot ascertain if the particip' = 'inconclusive';
else= 'outside of target population' ")
table(dat2$pregnancy, useNA = "always")
#>
#> inconclusive No
#> 94 1150
#> outside of target population Yes
#> 8866 65
#> <NA>
#> 0
23.2.3 BMI
23.2.3.1 BMI and Obesity
$bmi <- dat2$BMXBMI
dat2summary(dat2$bmi)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 12.10 19.70 24.70 25.68 30.20 82.90 1120
$obese <- ifelse(dat2$BMXBMI >= 30, "Yes", "No")
dat2$obese <- factor(dat2$obese, levels = c("No", "Yes"))
dat2table(dat2$obese, useNA = "always")
#>
#> No Yes <NA>
#> 6708 2347 1120
23.2.4 Diabetes
$diabetes <- dat2$DIQ010
dat2$diabetes <- car::recode(dat2$diabetes, " 'Yes'='Yes'; c('No','Borderline')='No';
dat2 else=NA ")
# Taking insulin now or diabetic pills to lower blood sugar - they have diabetes
$diabetes[dat2$DIQ050 == "Yes"] <- "Yes"
dat2$diabetes[dat2$DIQ070 == "Yes"] <- "Yes"
dat2table(dat2$diabetes, useNA = "always")
#>
#> No Yes <NA>
#> 8981 782 412
23.2.5 Family history of diabetes
$diabetes.family.history <- car::recode(dat2$DIQ175A, " 'Family history' = 'Yes';
dat2 else = 'No' ")
$diabetes.family.history <- factor(dat2$diabetes.family.history, levels = c("No", "Yes"))
dat2$diabetes.family.history[dat2$DIQ175A=="Don't know"] <- NA
dat2table(dat2$diabetes.family.history, useNA = "always")
#>
#> No Yes <NA>
#> 8837 1337 1
23.2.6 Smoking
$smoking <- dat2$SMQ020
dat2$smoking <- car::recode(dat2$smoking, " 'Yes' = 'Current smoker'; 'No' = 'Never smoker'; else=NA ")
dat2$smoking <- factor(dat2$smoking, levels = c("Never smoker", "Previous smoker", "Current smoker"))
dat2$smoking[dat2$SMQ040 == "Not at all"] <- "Previous smoker"
dat2table(dat2$smoking, useNA = "always")
#>
#> Never smoker Previous smoker Current smoker <NA>
#> 3532 1347 1232 4064
23.2.7 Diet
23.2.7.1 How healthy is the diet
$diet.healthy <- dat2$DBQ700
dat2$diet.healthy <- car::recode(dat2$diet.healthy, recodes = " c('Excellent', 'Very good')=
dat2 'Very good or excellent'; 'Good'='Good'; c('Fair', 'Poor')=
'Poor or fair'; else = NA ")
$diet.healthy <- factor(dat2$diet.healthy, levels = c("Poor or fair", "Good",
dat2"Very good or excellent"))
table(dat2$diet.healthy, useNA = "always")
#>
#> Poor or fair Good Very good or excellent
#> 1824 2743 1896
#> <NA>
#> 3712
23.2.8 Vigorous physical activity
$physical.activity <- dat2$PAQ605
dat2$physical.activity <- car::recode(dat2$physical.activity, recodes = " 'No' = 'No';
dat2 'Yes' = 'Yes'; else=NA")
$physical.activity <- factor(dat2$physical.activity, levels = c("No", "Yes"))
dat2table(dat2$physical.activity, useNA = "always")
#>
#> No Yes <NA>
#> 5975 1172 3028
23.2.9 Access to medical services
$medical.access <- dat2$HUQ030
dat2$medical.access <- car::recode(dat2$medical.access, recodes = " c('Yes',
dat2 'There is more than one place')='Yes'; 'There is no place'=
'No'; else=NA")
table(dat2$medical.access, useNA = "always")
#>
#> No Yes <NA>
#> 1194 8981 0
23.2.10 Hypertension/high blood pressure
23.2.10.1 Systolic BP
$systolic1 <- dat2$BPXSY1
dat2$systolic2 <- dat2$BPXSY2
dat2$systolic3 <- dat2$BPXSY3
dat2$systolic4 <- dat2$BPXSY4
dat2
<- dat2 %>%
dat2 mutate(systolicBP = rowMeans(dat2[, c("systolic1", "systolic2",
"systolic3", "systolic4")],
na.rm = TRUE))
summary(dat2$systolicBP)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 64.67 106.00 115.33 118.31 128.00 228.67 2644
23.2.10.2 Diastolic BP
$diastolic1 <- dat2$BPXDI1
dat2$diastolic2 <- dat2$BPXDI2
dat2$diastolic3 <- dat2$BPXDI3
dat2$diastolic4 <- dat2$BPXDI4
dat2<- dat2[, c("diastolic1", "diastolic2",
datX "diastolic3", "diastolic4")]
==0] <- NA
datX[datX $diastolicBP <- rowMeans(datX[, c("diastolic1", "diastolic2",
dat2"diastolic3", "diastolic4")],
na.rm = TRUE)
summary(dat2$diastolicBP)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 3.333 58.000 66.667 66.329 74.667 128.000 2688
23.2.11 Sleep (daily in hours)
$sleep <- dat2$SLD010H
dat2$sleep[dat2$sleep == 99] <- NA
dat2summary(dat2$sleep)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 2.000 6.000 7.000 6.951 8.000 12.000 3721
23.2.12 Laboratory data
23.2.12.1 Uric acid (mg/dL)
$uric.acid <- dat2$LBXSUA
dat2summary(dat2$uric.acid)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 0.70 4.30 5.20 5.35 6.20 13.30 3624
23.2.12.2 Total protein (g/dL)
$protein.total <- dat2$LBXSTP
dat2summary(dat2$protein.total)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 4.700 6.800 7.100 7.108 7.400 10.200 3631
23.2.12.3 Total bilirubin (mg/dL)
$bilirubin.total <- dat2$LBXSTB
dat2summary(dat2$bilirubin.total)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 0.100 0.400 0.600 0.639 0.800 7.100 3626
23.2.12.4 Phosphorus (mg/dL)
$phosphorus <- dat2$LBXSPH
dat2summary(dat2$phosphorus)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 1.800 3.500 3.900 3.929 4.300 10.900 3623
23.2.12.5 Sodium (mmol/L)
$sodium <- dat2$LBXSNASI
dat2summary(dat2$sodium)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 119.0 139.0 140.0 139.8 141.0 154.0 3622
23.2.12.6 Potassium (mmol/L)
$potassium <- dat2$LBXSKSI
dat2summary(dat2$potassium)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 2.800 3.800 4.000 4.027 4.200 5.800 3623
23.2.12.7 Globulin (g/dL)
$globulin <- dat2$LBXSGB
dat2summary(dat2$globulin)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 1.400 2.500 2.800 2.826 3.100 6.500 3631
23.2.12.8 Total calcium (mg/dL)
$calcium.total <- dat2$LBXSCA
dat2summary(dat2$calcium.total)
#> Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
#> 7.600 9.200 9.500 9.486 9.700 14.800 3664
23.2.12.9 High cholesterol
$high.cholesterol <- dat2$BPQ080
dat2$high.cholesterol <- car::recode(dat2$high.cholesterol, recodes = " 'Yes'='Yes';
dat2 'No'='No'; else = NA")
table(dat2$high.cholesterol, useNA = "always")
#>
#> No Yes <NA>
#> 4391 2037 3747
23.2.13 Survey features
23.2.13.1 Weight
$survey.weight <- dat2$WTINT2YR
dat2summary(dat2$survey.weight)
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 3698 12754 20233 30585 36280 167885
$survey.weight.mec <- dat2$WTMEC2YR
dat2summary(dat2$survey.weight.mec)
#> Min. 1st Qu. Median Mean 3rd Qu. Max.
#> 0 12562 20175 30585 36748 171395
23.2.13.2 PSU
$psu <- as.factor(dat2$SDMVPSU)
dat2table(dat2$psu)
#>
#> 1 2
#> 5249 4926
23.2.13.3 Strata
$strata <- as.factor(dat2$SDMVSTRA)
dat2table(dat2$strata)
#>
#> 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
#> 674 646 671 732 674 752 664 663 723 665 741 681 700 500 689
23.2.14 Survey year
$year <- dat2$SDDSRVYR
dat2table(dat2$year, useNA = "always")
#>
#> 8 <NA>
#> 10175 0
23.2.15 ICD-10-CM codes
colnames(rxq12) <- c("id", "icd10")
colnames(rxq22) <- c("id", "icd10")
colnames(rxq32) <- c("id", "icd10")
<- rbind(rxq12, rxq22, rxq32)
rx2013 <- rx2013[order(rx2013$id),]
rx2013
$icd10[rx2013$icd10 == "Unknown"] <- NA
rx2013$icd10[rx2013$icd10 == "Refused"] <- NA
rx2013$icd10[rx2013$icd10 == "Don't know"] <- NA
rx2013$icd10[rx2013$icd10 == ""] <- NA
rx2013$icd10.new <- substr(rx2013$icd10, start = 1, stop = 3)
rx2013
<- na.omit(rx2013) rx2013
23.3 Analytic data
23.3.1 Full dataset
<- dat2 nhanes13r
23.3.2 Analytic datset - adults 20 years of more
<- c(
vars # ID
"id",
# Demographic
"age", "age.cat", "sex", "education", "race",
"marital", "income", "born", "pregnancy",
# obesity
"obese",
# Diabetes
"diabetes", "diabetes.family.history",
# Smoking
"smoking",
# Diet
"diet.healthy",
# Physical activity
"physical.activity",
# Access to routine healthcare
"medical.access",
# Blood pressure and Hypertension
"systolicBP", "diastolicBP",
# Sleep
"sleep",
# Laboratory
"uric.acid", "protein.total", "bilirubin.total", "phosphorus",
"sodium", "potassium", "globulin", "calcium.total",
"high.cholesterol",
# Survey features
"survey.weight", "survey.weight.mec", "psu", "strata",
# Survey year
"year"
)
<- nhanes13r[, vars] nhanes13r.sel
# Adults 20 years of more and not pregnant
dim(nhanes13r.sel)
#> [1] 10175 34
<- subset(nhanes13r.sel, age >= 20 &
analytic13 != 'yes')
pregnancy dim(analytic13)
#> [1] 5769 34
23.3.3 Save dataset for later use
dim(analytic13)
#> [1] 5769 34
dim(rx2013)
#> [1] 14474 3
save(analytic13, rx2013, file = "data/analytic13recoded.RData")