Test Constant Effect (PH) Assumption for Pooled svycoxph Models

This function provides a valid alternative to cox.zph() for complex survey designs with multiple imputation. It takes a multiply-imputed survey design object that has been split into time intervals (using survival::survSplit) and fits a separate pooled model for each interval. It generates a plot of the coefficients over time and provides tidy summary tables to guide model selection.

Usage

svycoxph_CE_mi(
  formula_rhs,
  design_split,
  var_to_test,
  tgroup_var = "tgroup",
  time_var = "followup",
  status_var = "death",
  main_model_fit = NULL,
  print_split_summary = TRUE,
  ...
)

Arguments

formula_rhs: A string for the RHS of the Cox model (e.g., "var + age").
design_split: A svyimputationList object, created from data that has *already been split* using survSplit().
var_to_test: A string for the exact coefficient name to plot (e.g., "active_asthma" or "age").
tgroup_var: A string for the time-interval variable created by survSplit (default is "tgroup").
time_var: A string for the original 'stop' time variable in the split data (default is "followup" or "stime").
status_var: A string for the event status variable (default is "death" or "status").
main_model_fit: A pooled svycoxph fit object (class mipo). This is the main, un-split model. It is optional, but highly recommended, as the function will print its tidy summary for you.
print_split_summary: A logical. If TRUE, prints a tidy summary table of all pooled coefficients from all interval models.
...: Other arguments passed to plotting, such as title, xlab, ylab, add_smoother (TRUE/FALSE), use_classic_theme (TRUE/FALSE), or show_null_effect (TRUE/FALSE).

Value

A ggplot object showing the coefficient trend over time.

Details

How to Interpret the Results:

This function helps you decide which model to report in your paper.

1. Examine the "Time-Varying Effect Plot": * If the plot is flat: The confidence intervals for all time intervals should substantially overlap. This indicates that the effect of your variable is constant over time. * If the plot is sloped: A clear, non-random trend (upward or downward) suggests the effect is not constant. The statistical test for this is to check if the confidence intervals from different time points (e.g., the first and last) do *not* overlap.

2. Choose Your Final Model: * If the assumption is MET (plot is flat): You should report the "Tidy Table for Final Paper (if Constant Effect Assumption is Met)". This single, pooled model is valid, more precise, and the correct model to report. * If the assumption is VIOLATED (plot is sloped): You must not report the main model. It is statistically invalid. The correct model to report is the time-split model, which is summarized in the "Tidy Summary of All Time-Interval Models" table.

Examples

if (FALSE) { # \dontrun{

# --- 1. Load Libraries ---
library(svyTable1)
library(dplyr)
library(survival)
library(survey)
library(mitools)
library(mice)
library(ggplot2)
library(knitr)

# --- 2. Load data and create base design ---
data(nhanes_mortality)
analytic_design <- svydesign(
  strata = ~strata,
  id = ~psu,
  weights = ~survey_weight,
  data = nhanes_mortality,
  nest = TRUE
)

# --- 3. Create analysis data.frame and INDUCE MISSINGNESS in COVARIATES ---
set.seed(123)
data_with_miss <- analytic_design$variables %>%
  filter(stime > 0) %>%
  mutate(
    # Create the exposure variable (complete)
    caff_bin = factor(
      ifelse(caff == "No consumption", "No", "Yes"),
      levels = c("No", "Yes")
    ),
    # Induce 10% missingness in 'age' (a confounder)
    age = ifelse(runif(n()) < 0.10, NA, age),

    # Induce 15% MAR missingness in 'bmi_cat' (a confounder)
    bmi_cat = factor(ifelse(age > 50 & runif(n()) < 0.15, NA, as.character(bmi.cat)))
  )

print("--- Missingness Induced in Covariates ---")
print(mice::md.pattern(data_with_miss[, c("age", "bmi_cat", "caff_bin", "stime", "status")],
                       plot=FALSE))

# --- 4. Add Nelson-Aalen hazard (auxiliary variable for MICE) ---
data_with_miss$nelson_aalen <- nelsonaalen(
  data_with_miss,
  time = stime,
  status = status
)

# --- 5. Run MICE ---
print("--- Starting MICE ---")
M_IMPUTATIONS <- 5
MAX_ITERATIONS <- 5

pred_matrix <- make.predictorMatrix(data_with_miss)
vars_to_keep_as_is <- c("id", "survey.weight", "psu", "strata",
                        "stime", "status", "nelson_aalen", "caff_bin", "sex")
pred_matrix[, vars_to_keep_as_is] <- 0
pred_matrix[vars_to_keep_as_is, ] <- 0

imputed_data <- mice(
  data_with_miss,
  m = M_IMPUTATIONS,
  maxit = MAX_ITERATIONS,
  predictorMatrix = pred_matrix,
  method = 'pmm',
  seed = 123,
  printFlag = FALSE
)
print("--- MICE Complete ---")

# --- 6. Create Stacked Long-Format Data ---
impdata_long <- mice::complete(imputed_data, "long", include = FALSE)

# --- 7. Fit the MAIN (Constant Effect) Model ---
print("--- Fitting Main Pooled (Constant Effect) Model ---")

allImputations_main <- imputationList(split(impdata_long, impdata_long$.imp))
design_main <- svydesign(
  strata = ~strata,
  id = ~psu,
  weights = ~survey_weight,
  data = allImputations_main,
  nest = TRUE
)

my_formula <- "caff_bin + sex + age + bmi_cat"
main_formula <- as.formula(paste0("Surv(stime, status) ~ ", my_formula))

main_fit_list <- with(design_main, svycoxph(main_formula))
main_fit_pooled <- pool(main_fit_list)

# --- 8. Create the SPLIT-TIME Design ---
print("--- Creating Split-Time Design ---")
event_times <- data_with_miss$stime[data_with_miss$status == 1]
cuts <- quantile(event_times, probs = c(0.2, 0.4, 0.6, 0.8), na.rm = TRUE)

impdata_long_split <- survSplit(Surv(stime, status) ~ .,
                                data = impdata_long,
                                cut = cuts,
                                episode = "tgroup",
                                id = "split_id")

allImputations_split <- imputationList(split(impdata_long_split,
                                             impdata_long_split$.imp))

design_split <- svydesign(
  strata = ~strata,
  id = ~psu,
  weights = ~survey_weight,
  data = allImputations_split,
  nest = TRUE
)

# --- 9. Run the PH Test Function and Print the Plot ---
print("--- Calling svycoxph_CE_mi function to test 'caff_binYes' ---")

my_ph_plot <- svycoxph_CE_mi(
  formula_rhs = my_formula,
  design_split = design_split,
  var_to_test = "caff_binYes", # Test the exposure
  tgroup_var = "tgroup",
  time_var = "stime",
  status_var = "status",
  main_model_fit = main_fit_pooled, # Pass the main model here
  print_split_summary = TRUE,
  show_null_effect = TRUE
)

# Explicitly print the plot
print(my_ph_plot)

} # } # End \dontrun{}