24  Recoding cycle 9

Creating analytic dataset from 2015-16 cycle

24.1 Load downloaded dataset

load(file = "data/analytic15.RData")

24.2 Recoding

24.2.1 ID

dat2 <- nhanes15
dat2$id <- dat2$SEQN

24.2.2 Demographic

24.2.2.1 Age

dat2$age <- dat2$RIDAGEYR
dat2$age.cat <- car::recode(dat2$age, " 0:19 = '<20'; 20:49 = '20-49'; 50:64 = '50-64'; 
                            65:80 = '65+'; else = NA ")
dat2$age.cat <- factor(dat2$age.cat, levels = c("<20", "20-49", "50-64", "65+"))
table(dat2$age.cat, useNA = "always")
#> 
#>   <20 20-49 50-64   65+  <NA> 
#>  4252  2894  1447  1378     0

24.2.2.2 Sex

dat2$sex <- dat2$RIAGENDR
table(dat2$sex, useNA = "always")
#> 
#>   Male Female   <NA> 
#>   4892   5079      0

24.2.2.3 Education

dat2$education <- dat2$DMDEDUC2
dat2$education <- as.factor(dat2$education)
dat2$education <- car::recode(dat2$education, recodes = " c('College graduate or above') = 
'College graduate or above'; c('Some college or AA degree', 'High school graduate/GED or equi') = 
'High school'; c('Less than 9th grade', '9-11th grade (Includes 12th grad') = 
'Less than high school'; else = NA ")
dat2$education <- factor(dat2$education, 
                         levels = c("Less than high school", "High school", 
                                    "College graduate or above"))
table(dat2$education, useNA = "always")
#> 
#>     Less than high school               High school College graduate or above 
#>                      1364                      2928                      1422 
#>                      <NA> 
#>                      4257

24.2.2.4 Race/ethnicity

dat2$race <- dat2$RIDRETH1
dat2$race <- car::recode(dat2$race, recodes = " 'Non-Hispanic White'='White';
                    'Non-Hispanic Black'='Black'; c('Mexican American',
                    'Other Hispanic')= 'Hispanic'; else='Others' ")
dat2$race <- factor(dat2$race, levels = c("White", "Black", "Hispanic", "Others"))
table(dat2$race, useNA = "always")
#> 
#>    White    Black Hispanic   Others     <NA> 
#>     3066     2129     3229     1547        0

24.2.2.5 Marital status

dat2$marital <- dat2$DMDMARTL
dat2$marital <- car::recode(dat2$marital, recodes = " 'Never married'='Never married';
c('Married', 'Living with partner') = 'Married/with partner'; 
                            c('Widowed', 'Divorced', 'Separated')='Other'; else=NA ")
dat2$marital <- factor(dat2$marital, levels = c("Never married", "Married/with partner",
                                                "Other"))
table(dat2$marital, useNA = "always")
#> 
#>        Never married Married/with partner                Other 
#>                 1048                 3441                 1227 
#>                 <NA> 
#>                 4255

24.2.2.6 Income

dat2$income <- dat2$INDHHIN2
dat2$income  <- car::recode(dat2$income, recodes = " c('$ 0 to $ 4,999', '$ 5,000 to $ 9,999',
'$10,000 to $14,999', '$15,000 to $19,999', 'Under $20,000')='less than $20,000';
                       c('Over $20,000','$20,000 and Over', '$20,000 to $24,999', 
                       '$25,000 to $34,999', '$35,000 to $44,999', '$45,000 to $54,999', 
                       '$55,000 to $64,999', '$65,000 to $74,999')='$20,000 to $74,999';
                       c('$75,000 to $99,999','$100,000 and Over')='$75,000 and Over'; 
                            else=NA ")
dat2$income  <- factor(dat2$income , levels=c("less than $20,000", "$20,000 to $74,999", 
                                              "$75,000 and Over"))
table(dat2$income, useNA = "always")
#> 
#>  less than $20,000 $20,000 to $74,999   $75,000 and Over               <NA> 
#>               1906               4812               2554                699

24.2.2.7 Where born / citizenship

dat2$born <- dat2$DMDBORN4
dat2$born <- car::recode(dat2$born, recodes = " 'Others'='Other place';
                       'Born in 50 US states or Washingt'= 'Born in US'; else=NA")
dat2$born <- factor(dat2$born, levels = c("Born in US", "Other place"))
table(dat2$born, useNA = "always") 
#> 
#>  Born in US Other place        <NA> 
#>        7733        2236           2

24.2.2.8 Pregnancy

dat2$pregnancy <- dat2$RIDEXPRG
dat2$pregnancy <- car::recode(dat2$pregnancy, 
                      recodes = " 'Yes, positive lab pregnancy test' = 'Yes';
                       'The participant was not pregnant' = 'No'; 
                       'Cannot ascertain if the particip' = 'inconclusive';
                       else= 'outside of target population'  ")
table(dat2$pregnancy, useNA = "always") 
#> 
#>                 inconclusive                           No 
#>                           93                         1125 
#> outside of target population                          Yes 
#>                         8683                           70 
#>                         <NA> 
#>                            0

24.2.3 BMI

24.2.3.1 BMI and Obesity

dat2$bmi <- dat2$BMXBMI
summary(dat2$bmi)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   11.50   19.90   25.20   26.02   30.60   67.30    1215
dat2$obese <- ifelse(dat2$BMXBMI >= 30, "Yes", "No")
dat2$obese <- factor(dat2$obese, levels = c("No", "Yes"))
table(dat2$obese, useNA = "always")
#> 
#>   No  Yes <NA> 
#> 6346 2410 1215

24.2.4 Diabetes

dat2$diabetes <- dat2$DIQ010
dat2$diabetes <- car::recode(dat2$diabetes, " 'Yes'='Yes'; c('No','Borderline')='No';
                             else=NA ")

# Taking insulin now or diabetic pills to lower blood sugar - they have diabetes
dat2$diabetes[dat2$DIQ050 == "Yes"] <- "Yes"
dat2$diabetes[dat2$DIQ070 == "Yes"] <- "Yes"
table(dat2$diabetes, useNA = "always")
#> 
#>   No  Yes <NA> 
#> 8648  923  400

24.2.5 Family history of diabetes

table(dat2$DIQ175A, useNA = "always")
#> 
#>   10 <NA> 
#> 1186 8785
dat2$diabetes.family.history <- dat2$DIQ175A
dat2$diabetes.family.history <- car::recode(dat2$diabetes.family.history, " 10 = 'Yes'; 
                             else = 'No' ")
dat2$diabetes.family.history <- factor(dat2$diabetes.family.history, levels = c("No", "Yes"))
dat2$diabetes.family.history[dat2$DIQ175A=="Don't know"] <- NA
table(dat2$diabetes.family.history, useNA = "always")
#> 
#>   No  Yes <NA> 
#> 8785 1186    0

24.2.6 Smoking

dat2$smoking <- dat2$SMQ020
dat2$smoking <- car::recode(dat2$smoking, " 'Yes' = 'Current smoker'; 'No' = 'Never smoker'; else=NA  ")
dat2$smoking <- factor(dat2$smoking, levels = c("Never smoker", "Previous smoker", "Current smoker"))
dat2$smoking[dat2$SMQ040 == "Not at all"] <- "Previous smoker"
table(dat2$smoking, useNA = "always")
#> 
#>    Never smoker Previous smoker  Current smoker            <NA> 
#>            3559            1322            1100            3990

24.2.7 Diet

24.2.7.1 How healthy is the diet

dat2$diet.healthy <- dat2$DBQ700
dat2$diet.healthy <- car::recode(dat2$diet.healthy, recodes = " c('Excellent', 'Very good')=
                    'Very good or excellent'; 'Good'='Good'; c('Fair', 'Poor')=
                    'Poor or fair'; else = NA ")
dat2$diet.healthy <- factor(dat2$diet.healthy, levels = c("Poor or fair", "Good", 
                                                          "Very good or excellent"))
table(dat2$diet.healthy, useNA = "always")
#> 
#>           Poor or fair                   Good Very good or excellent 
#>                   2105                   2524                   1697 
#>                   <NA> 
#>                   3645

24.2.8 Vigorous physical activity

dat2$physical.activity <- dat2$PAQ605
dat2$physical.activity <- car::recode(dat2$physical.activity, recodes = " 'No' = 'No'; 
                                      'Yes' = 'Yes'; else=NA")
dat2$physical.activity <- factor(dat2$physical.activity, levels = c("No", "Yes"))
table(dat2$physical.activity, useNA = "always")
#> 
#>   No  Yes <NA> 
#> 5596 1366 3009

24.2.9 Access to medical services

dat2$medical.access <- dat2$HUQ030
dat2$medical.access <- car::recode(dat2$medical.access, recodes = " c('Yes',
                              'There is more than one place')='Yes'; 'There is no place'=
                              'No'; else=NA")
table(dat2$medical.access, useNA = "always")
#> 
#>   No  Yes <NA> 
#> 1340 8631    0

24.2.10 Hypertension/high blood pressure

24.2.10.1 Systolic BP

dat2$systolic1 <- dat2$BPXSY1
dat2$systolic2 <- dat2$BPXSY2
dat2$systolic3 <- dat2$BPXSY3
dat2$systolic4 <- dat2$BPXSY4

dat2 <- dat2 %>% 
  mutate(systolicBP = rowMeans(dat2[, c("systolic1", "systolic2", 
                                        "systolic3", "systolic4")], 
                             na.rm = TRUE))
summary(dat2$systolicBP)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>    74.0   107.3   117.3   120.4   130.0   231.3    2608

24.2.10.2 Diastolic BP

dat2$diastolic1 <- dat2$BPXDI1
dat2$diastolic2 <- dat2$BPXDI2
dat2$diastolic3 <- dat2$BPXDI3
dat2$diastolic4 <- dat2$BPXDI4
datX <- dat2[, c("diastolic1", "diastolic2", 
                 "diastolic3", "diastolic4")]
datX[datX ==0] <- NA
dat2$diastolicBP <- rowMeans(datX[, c("diastolic1", "diastolic2", 
                                      "diastolic3", "diastolic4")], 
                             na.rm = TRUE)
summary(dat2$diastolicBP)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>    2.00   58.00   66.67   66.64   74.67  138.67    2636

24.2.11 Sleep (daily in hours)

dat2$sleep <- dat2$SLD012
summary(dat2$sleep)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   2.000   7.000   8.000   7.753   8.500  14.500    3677

24.2.12 Laboratory data

24.2.12.1 Uric acid (mg/dL)

dat2$uric.acid <- dat2$LBXSUA
summary(dat2$uric.acid)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   1.600   4.300   5.200   5.335   6.200  18.000    3717

24.2.12.2 Total protein (g/dL)

dat2$protein.total <- dat2$LBXSTP
summary(dat2$protein.total)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   5.200   6.900   7.200   7.201   7.500  10.100    3718

24.2.12.3 Total bilirubin (mg/dL)

dat2$bilirubin.total <- dat2$LBXSTB
summary(dat2$bilirubin.total)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   0.000   0.400   0.500   0.552   0.700   3.500    3717

24.2.12.4 Phosphorus (mg/dL)

dat2$phosphorus <- dat2$LBXSPH
summary(dat2$phosphorus)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   1.000   3.400   3.800   3.796   4.200   9.700    3715

24.2.12.5 Sodium (mmol/L)

dat2$sodium <- dat2$LBXSNASI
summary(dat2$sodium)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   124.0   137.0   139.0   138.7   140.0   161.0    3714

24.2.12.6 Potassium (mmol/L)

dat2$potassium <- dat2$LBXSKSI
summary(dat2$potassium)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   2.600   3.740   3.930   3.952   4.150   5.860    3714

24.2.12.7 Globulin (g/dL)

dat2$globulin <- dat2$LBXSGB
summary(dat2$globulin)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   0.600   2.600   2.800   2.857   3.100   7.000    3719

24.2.12.8 Total calcium (mg/dL)

dat2$calcium.total <- dat2$LBXSCA
summary(dat2$calcium.total)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   7.300   9.100   9.400   9.375   9.600  11.500    3714

24.2.12.9 High cholesterol

dat2$high.cholesterol <- dat2$BPQ080
dat2$high.cholesterol <- car::recode(dat2$high.cholesterol, recodes = " 'Yes'='Yes';
                                     'No'='No'; else = NA")
table(dat2$high.cholesterol, useNA = "always")
#> 
#>   No  Yes <NA> 
#> 4323 1960 3688

24.2.13 Survey features

24.2.13.1 Weight

dat2$survey.weight <- dat2$WTINT2YR
summary(dat2$survey.weight)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>    3294   12878   20160   31740   33257  233756
dat2$survey.weight.mec <- dat2$WTMEC2YR
summary(dat2$survey.weight.mec)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>       0   12551   20281   31740   33708  242387

24.2.13.2 PSU

dat2$psu <- as.factor(dat2$SDMVPSU)
table(dat2$psu)
#> 
#>    1    2 
#> 5127 4844

24.2.13.3 Strata

dat2$strata <- as.factor(dat2$SDMVSTRA)
table(dat2$strata)
#> 
#> 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 
#> 462 685 694 629 612 571 673 723 665 688 681 759 805 773 551

24.2.14 Survey year

dat2$year <- dat2$SDDSRVYR
table(dat2$year, useNA = "always") 
#> 
#>    9 <NA> 
#> 9971    0

24.2.15 ICD-10-CM codes

colnames(rxq12) <- c("id", "icd10")
colnames(rxq22) <- c("id", "icd10")
colnames(rxq32) <- c("id", "icd10")

rx2015 <- rbind(rxq12, rxq22, rxq32)
rx2015 <- rx2015[order(rx2015$id),]

rx2015$icd10[rx2015$icd10 == "Unknown"] <- NA
rx2015$icd10[rx2015$icd10 == "Refused"] <- NA
rx2015$icd10[rx2015$icd10 == "Don't know"] <- NA
rx2015$icd10[rx2015$icd10 == ""] <- NA
rx2015$icd10.new <- substr(rx2015$icd10, start = 1, stop = 3)

rx2015 <- na.omit(rx2015)

24.3 Analytic data

24.3.1 Full dataset

nhanes15r <- dat2

24.3.2 Analytic datset - adults 20 years of more

vars <- c(
  # ID
  "id",
  
  # Demographic
  "age", "age.cat", "sex", "education", "race", 
  "marital", "income", "born", "pregnancy",
  
  # obesity
  "obese", 
  
  # Diabetes
  "diabetes", "diabetes.family.history",
  
  # Smoking
  "smoking", 
  
  # Diet
  "diet.healthy", 

  # Physical activity
  "physical.activity", 
  
  # Access to routine healthcare
  "medical.access",
  
  # Blood pressure and Hypertension
  "systolicBP", "diastolicBP", 
  
  # Sleep 
  "sleep",

  # Laboratory 
  "uric.acid", "protein.total", "bilirubin.total", "phosphorus",
  "sodium", "potassium", "globulin", "calcium.total", 
  "high.cholesterol",
  
  # Survey features
  "survey.weight", "survey.weight.mec", "psu", "strata", 
  
  # Survey year
  "year"
)

nhanes15r.sel <- nhanes15r[, vars]
# Adults 20 years of more and not pregnant
dim(nhanes15r.sel)
#> [1] 9971   34
analytic15 <- subset(nhanes15r.sel, age >= 20 & 
                       pregnancy != 'yes')
dim(analytic15)
#> [1] 5719   34

24.3.3 Save dataset for later use

dim(analytic15)
#> [1] 5719   34
dim(rx2015)
#> [1] 14084     3
save(analytic15, rx2015, file = "data/analytic15recoded.RData")