23  Recoding cycle 8

Creating analytic dataset from 2013-14 cycle

23.1 Load downloaded dataset

load(file = "data/analytic13.RData")

23.2 Recoding

23.2.1 ID

dat2 <- nhanes13
dat2$id <- dat2$SEQN

23.2.2 Demographic

23.2.2.1 Age

dat2$age <- dat2$RIDAGEYR
dat2$age.cat <- car::recode(dat2$age, " 0:19 = '<20'; 20:49 = '20-49'; 50:64 = '50-64'; 
                            65:80 = '65+'; else = NA ")
dat2$age.cat <- factor(dat2$age.cat, levels = c("<20", "20-49", "50-64", "65+"))
table(dat2$age.cat, useNA = "always")
#> 
#>   <20 20-49 50-64   65+  <NA> 
#>  4406  2989  1474  1306     0

23.2.2.2 Sex

dat2$sex <- dat2$RIAGENDR
table(dat2$sex, useNA = "always")
#> 
#>   Male Female   <NA> 
#>   5003   5172      0

23.2.2.3 Education

dat2$education <- dat2$DMDEDUC2
dat2$education <- as.factor(dat2$education)
dat2$education <- car::recode(dat2$education, recodes = " c('College graduate or above') = 
'College graduate or above'; c('Some college or AA degree', 'High school graduate/GED or equi') = 
'High school'; c('Less than 9th grade', '9-11th grade (Includes 12th grad') = 
'Less than high school'; else = NA ")
dat2$education <- factor(dat2$education, 
                         levels = c("Less than high school", "High school", 
                                    "College graduate or above"))
table(dat2$education, useNA = "always")
#> 
#>     Less than high school               High school College graduate or above 
#>                      1246                      3073                      1443 
#>                      <NA> 
#>                      4413

23.2.2.4 Race/ethnicity

dat2$race <- dat2$RIDRETH1
dat2$race <- car::recode(dat2$race, recodes = " 'Non-Hispanic White'='White';
                    'Non-Hispanic Black'='Black'; c('Mexican American',
                    'Other Hispanic')= 'Hispanic'; else='Others' ")
dat2$race <- factor(dat2$race, levels = c("White", "Black", "Hispanic", "Others"))
table(dat2$race, useNA = "always")
#> 
#>    White    Black Hispanic   Others     <NA> 
#>     3674     2267     2690     1544        0

23.2.2.5 Marital status

dat2$marital <- dat2$DMDMARTL
dat2$marital <- car::recode(dat2$marital, recodes = " 'Never married'='Never married';
c('Married', 'Living with partner') = 'Married/with partner'; 
                            c('Widowed', 'Divorced', 'Separated')='Other'; else=NA ")
dat2$marital <- factor(dat2$marital, levels = c("Never married", "Married/with partner",
                                                "Other"))
table(dat2$marital, useNA = "always")
#> 
#>        Never married Married/with partner                Other 
#>                 1112                 3382                 1272 
#>                 <NA> 
#>                 4409

23.2.2.6 Income

dat2$income <- dat2$INDHHIN2
dat2$income  <- car::recode(dat2$income, recodes = " c('$ 0 to $ 4,999', '$ 5,000 to $ 9,999',
'$10,000 to $14,999', '$15,000 to $19,999', 'Under $20,000')='less than $20,000';
                       c('Over $20,000','$20,000 and Over', '$20,000 to $24,999', 
                       '$25,000 to $34,999', '$35,000 to $44,999', '$45,000 to $54,999', 
                       '$55,000 to $64,999', '$65,000 to $74,999')='$20,000 to $74,999';
                       c('$75,000 to $99,999','$100,000 and Over')='$75,000 and Over'; 
                            else=NA ")
dat2$income  <- factor(dat2$income , levels=c("less than $20,000", "$20,000 to $74,999", 
                                              "$75,000 and Over"))
table(dat2$income, useNA = "always")
#> 
#>  less than $20,000 $20,000 to $74,999   $75,000 and Over               <NA> 
#>               2110               4964               2641                460

23.2.2.7 Where born / citizenship

dat2$born <- dat2$DMDBORN4
dat2$born <- car::recode(dat2$born, recodes = " 'Others'='Other place';
                       'Born in 50 US states or Washingt'= 'Born in US'; else=NA")
dat2$born <- factor(dat2$born, levels = c("Born in US", "Other place"))
table(dat2$born, useNA = "always") 
#> 
#>  Born in US Other place        <NA> 
#>        8262        1908           5

23.2.2.8 Pregnancy

dat2$pregnancy <- dat2$RIDEXPRG
dat2$pregnancy <- car::recode(dat2$pregnancy, 
                      recodes = " 'Yes, positive lab pregnancy test' = 'Yes';
                       'The participant was not pregnant' = 'No'; 
                       'Cannot ascertain if the particip' = 'inconclusive';
                       else= 'outside of target population'  ")
table(dat2$pregnancy, useNA = "always") 
#> 
#>                 inconclusive                           No 
#>                           94                         1150 
#> outside of target population                          Yes 
#>                         8866                           65 
#>                         <NA> 
#>                            0

23.2.3 BMI

23.2.3.1 BMI and Obesity

dat2$bmi <- dat2$BMXBMI
summary(dat2$bmi)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   12.10   19.70   24.70   25.68   30.20   82.90    1120
dat2$obese <- ifelse(dat2$BMXBMI >= 30, "Yes", "No")
dat2$obese <- factor(dat2$obese, levels = c("No", "Yes"))
table(dat2$obese, useNA = "always")
#> 
#>   No  Yes <NA> 
#> 6708 2347 1120

23.2.4 Diabetes

dat2$diabetes <- dat2$DIQ010
dat2$diabetes <- car::recode(dat2$diabetes, " 'Yes'='Yes'; c('No','Borderline')='No';
                             else=NA ")

# Taking insulin now or diabetic pills to lower blood sugar - they have diabetes
dat2$diabetes[dat2$DIQ050 == "Yes"] <- "Yes"
dat2$diabetes[dat2$DIQ070 == "Yes"] <- "Yes"
table(dat2$diabetes, useNA = "always")
#> 
#>   No  Yes <NA> 
#> 8981  782  412

23.2.5 Family history of diabetes

dat2$diabetes.family.history <- car::recode(dat2$DIQ175A, " 'Family history' = 'Yes'; 
                             else = 'No' ")
dat2$diabetes.family.history <- factor(dat2$diabetes.family.history, levels = c("No", "Yes"))
dat2$diabetes.family.history[dat2$DIQ175A=="Don't know"] <- NA
table(dat2$diabetes.family.history, useNA = "always")
#> 
#>   No  Yes <NA> 
#> 8837 1337    1

23.2.6 Smoking

dat2$smoking <- dat2$SMQ020
dat2$smoking <- car::recode(dat2$smoking, " 'Yes' = 'Current smoker'; 'No' = 'Never smoker'; else=NA  ")
dat2$smoking <- factor(dat2$smoking, levels = c("Never smoker", "Previous smoker", "Current smoker"))
dat2$smoking[dat2$SMQ040 == "Not at all"] <- "Previous smoker"
table(dat2$smoking, useNA = "always")
#> 
#>    Never smoker Previous smoker  Current smoker            <NA> 
#>            3532            1347            1232            4064

23.2.7 Diet

23.2.7.1 How healthy is the diet

dat2$diet.healthy <- dat2$DBQ700
dat2$diet.healthy <- car::recode(dat2$diet.healthy, recodes = " c('Excellent', 'Very good')=
                    'Very good or excellent'; 'Good'='Good'; c('Fair', 'Poor')=
                    'Poor or fair'; else = NA ")
dat2$diet.healthy <- factor(dat2$diet.healthy, levels = c("Poor or fair", "Good", 
                                                          "Very good or excellent"))
table(dat2$diet.healthy, useNA = "always")
#> 
#>           Poor or fair                   Good Very good or excellent 
#>                   1824                   2743                   1896 
#>                   <NA> 
#>                   3712

23.2.8 Vigorous physical activity

dat2$physical.activity <- dat2$PAQ605
dat2$physical.activity <- car::recode(dat2$physical.activity, recodes = " 'No' = 'No'; 
                                      'Yes' = 'Yes'; else=NA")
dat2$physical.activity <- factor(dat2$physical.activity, levels = c("No", "Yes"))
table(dat2$physical.activity, useNA = "always")
#> 
#>   No  Yes <NA> 
#> 5975 1172 3028

23.2.9 Access to medical services

dat2$medical.access <- dat2$HUQ030
dat2$medical.access <- car::recode(dat2$medical.access, recodes = " c('Yes',
                              'There is more than one place')='Yes'; 'There is no place'=
                              'No'; else=NA")
table(dat2$medical.access, useNA = "always")
#> 
#>   No  Yes <NA> 
#> 1194 8981    0

23.2.10 Hypertension/high blood pressure

23.2.10.1 Systolic BP

dat2$systolic1 <- dat2$BPXSY1
dat2$systolic2 <- dat2$BPXSY2
dat2$systolic3 <- dat2$BPXSY3
dat2$systolic4 <- dat2$BPXSY4

dat2 <- dat2 %>% 
  mutate(systolicBP = rowMeans(dat2[, c("systolic1", "systolic2", 
                                        "systolic3", "systolic4")], 
                             na.rm = TRUE))
summary(dat2$systolicBP)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   64.67  106.00  115.33  118.31  128.00  228.67    2644

23.2.10.2 Diastolic BP

dat2$diastolic1 <- dat2$BPXDI1
dat2$diastolic2 <- dat2$BPXDI2
dat2$diastolic3 <- dat2$BPXDI3
dat2$diastolic4 <- dat2$BPXDI4
datX <- dat2[, c("diastolic1", "diastolic2", 
                 "diastolic3", "diastolic4")]
datX[datX ==0] <- NA
dat2$diastolicBP <- rowMeans(datX[, c("diastolic1", "diastolic2", 
                                      "diastolic3", "diastolic4")], 
                             na.rm = TRUE)
summary(dat2$diastolicBP)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   3.333  58.000  66.667  66.329  74.667 128.000    2688

23.2.11 Sleep (daily in hours)

dat2$sleep <- dat2$SLD010H
dat2$sleep[dat2$sleep == 99] <- NA
summary(dat2$sleep)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   2.000   6.000   7.000   6.951   8.000  12.000    3721

23.2.12 Laboratory data

23.2.12.1 Uric acid (mg/dL)

dat2$uric.acid <- dat2$LBXSUA
summary(dat2$uric.acid)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>    0.70    4.30    5.20    5.35    6.20   13.30    3624

23.2.12.2 Total protein (g/dL)

dat2$protein.total <- dat2$LBXSTP
summary(dat2$protein.total)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   4.700   6.800   7.100   7.108   7.400  10.200    3631

23.2.12.3 Total bilirubin (mg/dL)

dat2$bilirubin.total <- dat2$LBXSTB
summary(dat2$bilirubin.total)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   0.100   0.400   0.600   0.639   0.800   7.100    3626

23.2.12.4 Phosphorus (mg/dL)

dat2$phosphorus <- dat2$LBXSPH
summary(dat2$phosphorus)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   1.800   3.500   3.900   3.929   4.300  10.900    3623

23.2.12.5 Sodium (mmol/L)

dat2$sodium <- dat2$LBXSNASI
summary(dat2$sodium)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   119.0   139.0   140.0   139.8   141.0   154.0    3622

23.2.12.6 Potassium (mmol/L)

dat2$potassium <- dat2$LBXSKSI
summary(dat2$potassium)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   2.800   3.800   4.000   4.027   4.200   5.800    3623

23.2.12.7 Globulin (g/dL)

dat2$globulin <- dat2$LBXSGB
summary(dat2$globulin)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   1.400   2.500   2.800   2.826   3.100   6.500    3631

23.2.12.8 Total calcium (mg/dL)

dat2$calcium.total <- dat2$LBXSCA
summary(dat2$calcium.total)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
#>   7.600   9.200   9.500   9.486   9.700  14.800    3664

23.2.12.9 High cholesterol

dat2$high.cholesterol <- dat2$BPQ080
dat2$high.cholesterol <- car::recode(dat2$high.cholesterol, recodes = " 'Yes'='Yes';
                                     'No'='No'; else = NA")
table(dat2$high.cholesterol, useNA = "always")
#> 
#>   No  Yes <NA> 
#> 4391 2037 3747

23.2.13 Survey features

23.2.13.1 Weight

dat2$survey.weight <- dat2$WTINT2YR
summary(dat2$survey.weight)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>    3698   12754   20233   30585   36280  167885
dat2$survey.weight.mec <- dat2$WTMEC2YR
summary(dat2$survey.weight.mec)
#>    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
#>       0   12562   20175   30585   36748  171395

23.2.13.2 PSU

dat2$psu <- as.factor(dat2$SDMVPSU)
table(dat2$psu)
#> 
#>    1    2 
#> 5249 4926

23.2.13.3 Strata

dat2$strata <- as.factor(dat2$SDMVSTRA)
table(dat2$strata)
#> 
#> 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 
#> 674 646 671 732 674 752 664 663 723 665 741 681 700 500 689

23.2.14 Survey year

dat2$year <- dat2$SDDSRVYR
table(dat2$year, useNA = "always") 
#> 
#>     8  <NA> 
#> 10175     0

23.2.15 ICD-10-CM codes

colnames(rxq12) <- c("id", "icd10")
colnames(rxq22) <- c("id", "icd10")
colnames(rxq32) <- c("id", "icd10")

rx2013 <- rbind(rxq12, rxq22, rxq32)
rx2013 <- rx2013[order(rx2013$id),]

rx2013$icd10[rx2013$icd10 == "Unknown"] <- NA
rx2013$icd10[rx2013$icd10 == "Refused"] <- NA
rx2013$icd10[rx2013$icd10 == "Don't know"] <- NA
rx2013$icd10[rx2013$icd10 == ""] <- NA
rx2013$icd10.new <- substr(rx2013$icd10, start = 1, stop = 3)

rx2013 <- na.omit(rx2013)

23.3 Analytic data

23.3.1 Full dataset

nhanes13r <- dat2

23.3.2 Analytic datset - adults 20 years of more

vars <- c(
  # ID
  "id",
  
  # Demographic
  "age", "age.cat", "sex", "education", "race", 
  "marital", "income", "born", "pregnancy",
  
  # obesity
  "obese", 
  
  # Diabetes
  "diabetes", "diabetes.family.history",
  
  # Smoking
  "smoking", 
  
  # Diet
  "diet.healthy", 

  # Physical activity
  "physical.activity", 
  
  # Access to routine healthcare
  "medical.access",
  
  # Blood pressure and Hypertension
  "systolicBP", "diastolicBP", 
  
  # Sleep 
  "sleep",

  # Laboratory 
  "uric.acid", "protein.total", "bilirubin.total", "phosphorus",
  "sodium", "potassium", "globulin", "calcium.total", 
  "high.cholesterol",
  
  # Survey features
  "survey.weight", "survey.weight.mec", "psu", "strata", 
  
  # Survey year
  "year"
)

nhanes13r.sel <- nhanes13r[, vars]
# Adults 20 years of more and not pregnant
dim(nhanes13r.sel)
#> [1] 10175    34
analytic13 <- subset(nhanes13r.sel, age >= 20 & 
                       pregnancy != 'yes')
dim(analytic13)
#> [1] 5769   34

23.3.3 Save dataset for later use

dim(analytic13)
#> [1] 5769   34
dim(rx2013)
#> [1] 14474     3
save(analytic13, rx2013, file = "data/analytic13recoded.RData")