25  Recoding cycle 10

Creating analytic dataset from 2017-18 cycle

25.1 Load downloaded dataset

load(file = "data/analytic17.RData")

25.2 Recoding

25.2.1 ID

dat2 <- nhanes17
dat2$id <- dat2$SEQN

25.2.2 Demographic

25.2.2.1 Age

dat2$age <- dat2$RIDAGEYR
dat2$age.cat <- car::recode(dat2$age, " 0:19 = '<20'; 20:49 = '20-49'; 50:64 = '50-64'; 
                            65:80 = '65+'; else = NA ")
dat2$age.cat <- factor(dat2$age.cat, levels = c("<20", "20-49", "50-64", "65+"))
table(dat2$age.cat, useNA = "always")
#> 
#>   <20 20-49 50-64   65+  <NA> 
#>  3685  2500  1569  1500     0

25.2.2.2 Sex

dat2$sex <- dat2$RIAGENDR
table(dat2$sex, useNA = "always")
#> 
#>   Male Female   <NA> 
#>   4557   4697      0

25.2.2.3 Education

dat2$education <- dat2$DMDEDUC2
dat2$education <- as.factor(dat2$education)
dat2$education <- car::recode(dat2$education, recodes = " c('College graduate or above') = 
'College graduate or above'; c('Some college or AA degree', 'High school graduate/GED or equi') = 
'High school'; c('Less than 9th grade', '9-11th grade (Includes 12th grad') = 
'Less than high school'; else = NA ")
dat2$education <- factor(dat2$education, 
                         levels = c("Less than high school", "High school", 
                                    "College graduate or above"))
table(dat2$education, useNA = "always")
#> 
#>     Less than high school               High school College graduate or above 
#>                      1117                      3103                      1336 
#>                      <NA> 
#>                      3698

25.2.2.4 Race/ethnicity

dat2$race <- dat2$RIDRETH1
dat2$race <- car::recode(dat2$race, recodes = " 'Non-Hispanic White'='White';
                    'Non-Hispanic Black'='Black'; c('Mexican American',
                    'Other Hispanic')= 'Hispanic'; else='Others' ")
dat2$race <- factor(dat2$race, levels = c("White", "Black", "Hispanic", "Others"))
table(dat2$race, useNA = "always")
#> 
#>    White    Black Hispanic   Others     <NA> 
#>     3150     2115     2187     1802        0

25.2.2.5 Marital status

dat2$marital <- dat2$DMDMARTL
dat2$marital <- car::recode(dat2$marital, recodes = " 'Never married'='Never married';
c('Married', 'Living with partner') = 'Married/with partner'; 
                            c('Widowed', 'Divorced', 'Separated')='Other'; else=NA ")
dat2$marital <- factor(dat2$marital, levels = c("Never married", "Married/with partner",
                                                "Other"))
table(dat2$marital, useNA = "always")
#> 
#>        Never married Married/with partner                Other 
#>                 1006                 3252                 1305 
#>                 <NA> 
#>                 3691

25.2.2.6 Income

dat2$income <- dat2$INDHHIN2
dat2$income  <- car::recode(dat2$income, recodes = " c('$ 0 to $ 4,999', '$ 5,000 to $ 9,999',
'$10,000 to $14,999', '$15,000 to $19,999', 'Under $20,000')='less than $20,000';
                       c('Over $20,000','$20,000 and Over', '$20,000 to $24,999', 
                       '$25,000 to $34,999', '$35,000 to $44,999', '$45,000 to $54,999', 
                       '$55,000 to $64,999', '$65,000 to $74,999')='$20,000 to $74,999';
                       c('$75,000 to $99,999','$100,000 and Over')='$75,000 and Over'; 
                            else=NA ")
dat2$income  <- factor(dat2$income , levels=c("less than $20,000", "$20,000 to $74,999", 
                                              "$75,000 and Over"))
table(dat2$income, useNA = "always")
#> 
#>  less than $20,000 $20,000 to $74,999   $75,000 and Over               <NA> 
#>               1589               4331               2453                881

25.2.2.7 Where born / citizenship

dat2$born <- dat2$DMDBORN4
dat2$born <- car::recode(dat2$born, recodes = " 'Others'='Other place';
                       'Born in 50 US states or Washingt'= 'Born in US'; else=NA")
dat2$born <- factor(dat2$born, levels = c("Born in US", "Other place"))
table(dat2$born, useNA = "always") 
#> 
#>  Born in US Other place        <NA> 
#>        7303        1948           3

25.2.2.8 Pregnancy

dat2$pregnancy <- dat2$RIDEXPRG
dat2$pregnancy <- car::recode(dat2$pregnancy, 
                      recodes = " 'Yes, positive lab pregnancy test' = 'Yes';
                       'The participant was not pregnant' = 'No'; 
                       'Cannot ascertain if the particip' = 'inconclusive';
                       else= 'outside of target population'  ")
table(dat2$pregnancy, useNA = "always") 
#> 
#>                 inconclusive                           No 
#>                           89                          966 
#> outside of target population                          Yes 
#>                         8144                           55 
#>                         <NA> 
#>                            0

25.2.3 BMI

25.2.3.1 BMI and Obesity

dat2$bmi <- dat2$BMXBMI
summary(dat2$bmi)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   12.30   20.40   25.80   26.58   31.30   86.20    1249
dat2$obese <- ifelse(dat2$BMXBMI >= 30, "Yes", "No")
dat2$obese <- factor(dat2$obese, levels = c("No", "Yes"))
table(dat2$obese, useNA = "always")
#> 
#>   No  Yes <NA> 
#> 5597 2408 1249

25.2.4 Diabetes

dat2$diabetes <- dat2$DIQ010
dat2$diabetes <- car::recode(dat2$diabetes, " 'Yes'='Yes'; c('No','Borderline')='No';
                             else=NA ")

# Taking insulin now or diabetic pills to lower blood sugar - they have diabetes
dat2$diabetes[dat2$DIQ050 == "Yes"] <- "Yes"
dat2$diabetes[dat2$DIQ070 == "Yes"] <- "Yes"
table(dat2$diabetes, useNA = "always")
#> 
#>   No  Yes <NA> 
#> 7927  966  361

25.2.5 Family history of diabetes

table(dat2$DIQ175A, useNA = "always")
#> 
#> Family history     Don't know           <NA> 
#>           1143              2           8109
dat2$diabetes.family.history <- dat2$DIQ175A
dat2$diabetes.family.history <- car::recode(dat2$diabetes.family.history, " 'Family history' = 'Yes'; 
                             else = 'No' ")
dat2$diabetes.family.history <- factor(dat2$diabetes.family.history, levels = c("No", "Yes"))
dat2$diabetes.family.history[dat2$DIQ175A=="Don't know"] <- NA
table(dat2$diabetes.family.history, useNA = "always")
#> 
#>   No  Yes <NA> 
#> 8109 1143    2

25.2.6 Smoking

dat2$smoking <- dat2$SMQ020
dat2$smoking <- car::recode(dat2$smoking, " 'Yes' = 'Current smoker'; 'No' = 'Never smoker'; else=NA  ")
dat2$smoking <- factor(dat2$smoking, levels = c("Never smoker", "Previous smoker", "Current smoker"))
dat2$smoking[dat2$SMQ040 == "Not at all"] <- "Previous smoker"
table(dat2$smoking, useNA = "always")
#> 
#>    Never smoker Previous smoker  Current smoker            <NA> 
#>            3497            1338            1021            3398

25.2.7 Diet

25.2.7.1 How healthy is the diet

dat2$diet.healthy <- dat2$DBQ700
dat2$diet.healthy <- car::recode(dat2$diet.healthy, recodes = " c('Excellent', 'Very good')=
                    'Very good or excellent'; 'Good'='Good'; c('Fair', 'Poor')=
                    'Poor or fair'; else = NA ")
dat2$diet.healthy <- factor(dat2$diet.healthy, levels = c("Poor or fair", "Good", 
                                                          "Very good or excellent"))
table(dat2$diet.healthy, useNA = "always")
#> 
#>           Poor or fair                   Good Very good or excellent 
#>                   2036                   2411                   1712 
#>                   <NA> 
#>                   3095

25.2.8 Vigorous physical activity

dat2$physical.activity <- dat2$PAQ605
dat2$physical.activity <- car::recode(dat2$physical.activity, recodes = " 'No' = 'No'; 
                                      'Yes' = 'Yes'; else=NA")
dat2$physical.activity <- factor(dat2$physical.activity, levels = c("No", "Yes"))
table(dat2$physical.activity, useNA = "always")
#> 
#>   No  Yes <NA> 
#> 4461 1389 3404

25.2.9 Access to medical services

dat2$medical.access <- dat2$HUQ030
dat2$medical.access <- car::recode(dat2$medical.access, recodes = " c('Yes',
                              'There is more than one place')='Yes'; 'There is no place'=
                              'No'; else=NA")
table(dat2$medical.access, useNA = "always")
#> 
#>   No  Yes <NA> 
#> 1398 7854    2

25.2.10 Hypertension/high blood pressure

25.2.10.1 Systolic BP

dat2$systolic1 <- dat2$BPXSY1
dat2$systolic2 <- dat2$BPXSY2
dat2$systolic3 <- dat2$BPXSY3
dat2$systolic4 <- dat2$BPXSY4

dat2 <- dat2 %>% 
  mutate(systolicBP = rowMeans(dat2[, c("systolic1", "systolic2", 
                                        "systolic3", "systolic4")], 
                             na.rm = TRUE))
summary(dat2$systolicBP)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   72.67  106.67  118.00  121.68  132.67  238.00    2537

25.2.10.2 Diastolic BP

dat2$diastolic1 <- dat2$BPXDI1
dat2$diastolic2 <- dat2$BPXDI2
dat2$diastolic3 <- dat2$BPXDI3
dat2$diastolic4 <- dat2$BPXDI4
datX <- dat2[, c("diastolic1", "diastolic2", 
                 "diastolic3", "diastolic4")]
datX[datX ==0] <- NA
dat2$diastolicBP <- rowMeans(datX[, c("diastolic1", "diastolic2", 
                                      "diastolic3", "diastolic4")], 
                             na.rm = TRUE)
summary(dat2$diastolicBP)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>    8.00   61.33   70.00   69.54   77.33  135.33    2618

25.2.11 Sleep (daily in hours)

dat2$sleep <- dat2$SLD012
summary(dat2$sleep)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   2.000   7.000   8.000   7.659   8.500  14.000    3141

25.2.12 Laboratory data

25.2.12.1 Uric acid (mg/dL)

dat2$uric.acid <- dat2$LBXSUA
summary(dat2$uric.acid)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   0.800   4.300   5.300   5.402   6.300  15.100    3353

25.2.12.2 Total protein (g/dL)

dat2$protein.total <- dat2$LBXSTP
summary(dat2$protein.total)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   5.300   6.900   7.200   7.166   7.400  10.000    3353

25.2.12.3 Total bilirubin (mg/dL)

dat2$bilirubin.total <- dat2$LBXSTB
summary(dat2$bilirubin.total)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>    0.10    0.30    0.40    0.46    0.60    3.70    3351

25.2.12.4 Phosphorus (mg/dL)

dat2$phosphorus <- dat2$LBXSPH
summary(dat2$phosphorus)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   1.900   3.300   3.600   3.665   4.000   9.600    3353

25.2.12.5 Sodium (mmol/L)

dat2$sodium <- dat2$LBXSNASI
summary(dat2$sodium)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   121.0   138.0   140.0   140.3   142.0   151.0    3350

25.2.12.6 Potassium (mmol/L)

dat2$potassium <- dat2$LBXSKSI
summary(dat2$potassium)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   2.800   3.900   4.100   4.094   4.300   6.600    3355

25.2.12.7 Globulin (g/dL)

dat2$globulin <- dat2$LBXSGB
summary(dat2$globulin)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   1.800   2.800   3.100   3.087   3.300   6.000    3353

25.2.12.8 Total calcium (mg/dL)

dat2$calcium.total <- dat2$LBXSCA
summary(dat2$calcium.total)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>    6.40    9.10    9.30    9.32    9.60   11.70    3353

25.2.12.9 High cholesterol

dat2$high.cholesterol <- dat2$BPQ080
dat2$high.cholesterol <- car::recode(dat2$high.cholesterol, recodes = " 'Yes'='Yes';
                                     'No'='No'; else = NA")
table(dat2$high.cholesterol, useNA = "always")
#> 
#>   No  Yes <NA> 
#> 4153 1968 3133

25.2.13 Survey features

25.2.13.1 Weight

dat2$survey.weight <- dat2$WTINT2YR
summary(dat2$survey.weight)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>    2571   13074   21099   34671   36923  433085
dat2$survey.weight.mec <- dat2$WTMEC2YR
summary(dat2$survey.weight.mec)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>       0   12347   21060   34671   37562  419763

25.2.13.2 PSU

dat2$psu <- as.factor(dat2$SDMVPSU)
table(dat2$psu)
#> 
#>    1    2 
#> 4464 4790

25.2.13.3 Strata

dat2$strata <- as.factor(dat2$SDMVSTRA)
table(dat2$strata)
#> 
#> 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 
#> 510 638 695 554 605 653 612 693 735 551 689 609 604 596 510

25.2.14 Survey year

dat2$year <- dat2$SDDSRVYR
table(dat2$year, useNA = "always") 
#> 
#>   10 <NA> 
#> 9254    0

25.2.15 ICD-10-CM codes

colnames(rxq12) <- c("id", "icd10")
colnames(rxq22) <- c("id", "icd10")
colnames(rxq32) <- c("id", "icd10")

rx2017 <- rbind(rxq12, rxq22, rxq32)
rx2017 <- rx2017[order(rx2017$id),]

rx2017$icd10[rx2017$icd10 == "Unknown"] <- NA
rx2017$icd10[rx2017$icd10 == "Refused"] <- NA
rx2017$icd10[rx2017$icd10 == "Don't know"] <- NA
rx2017$icd10[rx2017$icd10 == ""] <- NA
rx2017$icd10.new <- substr(rx2017$icd10, start = 1, stop = 3)

rx2017 <- na.omit(rx2017)

25.3 Analytic data

25.3.1 Full dataset

nhanes17r <- dat2

25.3.2 Analytic datset - adults 20 years of more

vars <- c(
  # ID
  "id",
  
  # Demographic
  "age", "age.cat", "sex", "education", "race", 
  "marital", "income", "born", "pregnancy",
  
  # obesity
  "obese", 
  
  # Diabetes
  "diabetes", "diabetes.family.history",
  
  # Smoking
  "smoking", 
  
  # Diet
  "diet.healthy", 

  # Physical activity
  "physical.activity", 
  
  # Access to routine healthcare
  "medical.access",
  
  # Blood pressure and Hypertension
  "systolicBP", "diastolicBP", 
  
  # Sleep 
  "sleep",

  # Laboratory 
  "uric.acid", "protein.total", "bilirubin.total", "phosphorus",
  "sodium", "potassium", "globulin", "calcium.total", 
  "high.cholesterol",
  
  # Survey features
  "survey.weight", "survey.weight.mec", "psu", "strata", 
  
  # Survey year
  "year"
)

nhanes17r.sel <- nhanes17r[, vars]
# Adults 20 years of more and not pregnant
dim(nhanes17r.sel)
#> [1] 9254   34
analytic17 <- subset(nhanes17r.sel, age >= 20 & 
                       pregnancy != 'yes')
dim(analytic17)
#> [1] 5569   34

25.3.3 Save dataset for later use

dim(analytic17)
#> [1] 5569   34
dim(rx2017)
#> [1] 15025     3
save(analytic17, rx2017, file = "data/analytic17recoded.RData")