# Function to perform Bayesian nowcasting # # Author: ___UITZONDERINGSGROND_2___ - RIVM # # The nowcast() function requires: # case.data = dataframe with two columns named admission.date and report.date in Date format # # Optionally it requires: # start.date = Starting date of the epidemic curve in Date format # If not provided, the first report.date is taken # nowcast.date = Nowcast date in Date format # If not provided, the last report.date is taken # probs = Vector with quantile points of the predictive distribution # Defaults to 10, 25, 50, 75 and 90 percent point nowcast <- function(case.data = NULL, start.date = NULL, nowcast.date = NULL, probs = c(0.1, 0.25, 0.5, 0.75, 0.9)) { # # Init ---- # # Checks if (is.null(case.data) | !all(c("admission.date", "report.date") %in% names(case.data))) stop("Provide dataframe with two columns named admission.date and report.date") if (is.null(start.date)) { warning("Starting date not provided. Start date is set to first report date") start.date <- min(case.data$report.date) } if (is.null(nowcast.date)) { warning("Nowcast date not provided. Nowcast date is set to last report date") nowcast.date <- max(case.data$report.date) } # Number of cores n.cores <- parallel::detectCores()-6 # Day of the week sequence, starting on Monday (happens to be on 2018-01-01) # We use this for the factor levels of the day-of-the week covariate day.seq <- seq( from = as.Date("2018-01-01"), to = as.Date("2018-01-07"), by = "day") %>% weekdays() # # Data preparation ---- # # Date sequence date.seq <- seq(from = start.date, to = nowcast.date, by = "day") # Delays case.data <- case.data %>% # Calculate delays mutate(delay = (report.date - admission.date) %>% as.numeric) %>% # Remove unrealistic delays (not outside 0 - 10 weeks) filter(delay %>% between(0, 70)) # Apply right truncation # (Occurs naturally in real-time setting) case.data <- case.data %>% filter(report.date %>% between(start.date, nowcast.date)) # # Construct reporting trapezoid ---- # # Length of the date sequence T <- length(date.seq) # Maximum delay (99% point of all reported delays) D <- case.data$delay %>% quantile(probs = 0.99) %>% round() %>% max(15) # Apply maximum delay to case.data # All cases with delay d >= D are set to delay D case.data <- case.data %>% mutate( delay = if_else( condition = delay >= D, true = D, false = delay)) # Add factor versions for admission.date and delay for tabulation of cases # to create the reporting trapezoid case.data <- case.data %>% mutate( admission.date.cat = admission.date %>% factor(levels = date.seq %>% as.character), delay.cat = delay %>% factor(levels = 0:D)) # Setup the reporting trapezoid data as a grid by admission.date and delay rep.data <- expand.grid( admission.date = date.seq, delay = 0:D) %>% as_tibble() %>% mutate( # Add reporting date rep.date = admission.date + delay, # Add day of reporting rep.day = rep.date %>% weekdays() %>% factor(levels = day.seq), # Add t and d, to assist in the calculations t = (admission.date - start.date + 1) %>% as.integer, d = delay, # Add reporting indicator r # Cases with t + d <= T have been reported r = t + d <= T, # Add tabulated cases by onset date and delay # Except the ones that have not been repported yet cases = with(case.data, table( admission.date.cat, delay.cat)) %>% as.vector, cases = if_else( condition = !r, true = NA_integer_, false = cases)) # # Plot reporting trapezoid # image( # x = date.seq, # y = 0:D, # z = matrix(rep.data$cases, nrow = T, ncol = D + 1) %>% log1p, # asp = 1) # # Model setup ---- # # We do not need the estimates and predictions for the entire time period t = 1:T # because we are only intersted in the (expected) epicurve near the present # and things get really slower if T gets larger # Therefore, we introduce a time window t_window = max(1, T - m):T, where m = 2*D # The number of time points in the window is T_window m <- 2*D t_window <- max(1, T - m):T T_window <- length(t_window) rep.data_window <- rep.data %>% filter(t %in% t_window) # Model matrices for P-splines # Use default settings mm_epi <- ortho_Pspline(x = t_window, k = max(4, round(T_window/3))) mm_delay_t <- ortho_Pspline(x = t_window, k = max(4, round(T_window/3))) mm_delay_d <- ortho_Pspline(x = 0:D, k = max(4, round(D/3))) # Model matrix for day-of-the week effect # This is an T_window x (D + 1) x 6 array mm_delay_c <- model.matrix(~ rep.day, data = rep.data_window)[, -1] %>% array(dim = c(T_window, D + 1, 6)) # Data data.list <- list( T_window = T_window, D = D, y_rep = matrix(rep.data_window$cases, nrow = T_window, ncol = D + 1), X_epi = mm_epi$X, X_delay_t = mm_delay_t$X, X_delay_d = mm_delay_d$X, X_delay_c = mm_delay_c, Z_epi = mm_epi$Z, Z_delay_t = mm_delay_t$Z, Z_delay_d = mm_delay_d$Z) data.list <- within(data.list, { n_beta_epi = ncol(X_epi) + 1 n_beta_delay_t = ncol(X_delay_t) n_beta_delay_d = ncol(X_delay_d) n_beta_delay_c = dim(X_delay_c)[3] n_b_epi = ncol(Z_epi) n_b_delay_t = ncol(Z_delay_t) n_b_delay_d = ncol(Z_delay_d) }) # Inits inits.fun <- function() list( .RNG.name = "base::Mersenne-Twister", .RNG.seed = sample.int(n = 10000, size = 1)) # Run Model model.string <- paste("model", jags_nowcast %>% body %>% paste(collapse = "\n"), "}") post.runjags <- run.jags(model = model.string, data = data.list, inits = inits.fun, n.chains = n.cores, adapt = 100, burnin = 0, sample = 200, thin = 1, method = "parallel", modules = "glm", monitor = c("beta_epi", "theta_rep", "sigma_b_epi", "sigma_b_delay_d", "sigma_b_delay_t", "logit_h_delay_t", "logit_h_delay_d", "mu_epi", "y_epi")) # # Traceplots # post.runjags %>% plot( # vars = c("beta_epi", "theta_rep", "sigma_b_epi", "sigma_b_delay_d", "sigma_b_delay_t"), # plot.type = "trace", # layout = c(3, 2)) # # Post processing ---- # # Turn runjags object into matrix post.mat <- post.runjags %>% as.mcmc %>% as.matrix n <- nrow(post.mat) parnames <- colnames(post.mat) # Parameters of interest logit_h_delay_d.sim <- post.mat[, grep( "logit_h_delay_d", parnames)] # log(OR) hazard delay logit_h_delay_t.sim <- post.mat[, grep( "logit_h_delay_t", parnames)] # log(OR) hazard time mu_epi.sim <- post.mat[, grep("mu_epi", parnames)] # Trend y_epi.sim <- post.mat[, grep( "y_epi", parnames)] # Nowcast # logit_h_delay_d.sim %>% apply(2, quantile, prob = probs) %>% t %>% matplot(x = 0:(D - 1), type = "l", lty = 1, col = 1) # logit_h_delay_t.sim %>% apply(2, quantile, prob = probs) %>% t %>% matplot(x = 1:T_window, type = "l", lty = 1, col = 1) # mu_epi.sim %>% apply(2, quantile, prob = probs) %>% t %>% matplot(x = 1:T_window, type = "l", lty = 1, col = 1) # Generate statistics mu_epi_stat <- mu_epi.sim %>% apply(MARGIN = 2, FUN = quantile, probs = c(0.025, 0.5, 0.975)) %>% t() y_epi_stat <- y_epi.sim %>% apply(MARGIN = 2, FUN = quantile, probs = probs) %>% t() # # Output ---- # # The trend and nowcast are calculated for t in t_window # Add NA for the timepoints before min(t_window) mu_epi_stat <- rbind( matrix(NA, nrow = min(t_window) - 1, ncol = 3), mu_epi_stat) y_epi_stat <- rbind( matrix(NA, nrow = min(t_window) - 1, ncol = length(probs)), y_epi_stat) # Convert mu_epi_stat and y_epi_stat into tibbles # and add informative column names mu_epi_stat <- mu_epi_stat %>% as_tibble() y_epi_stat <- y_epi_stat %>% as_tibble() colnames(mu_epi_stat) <- c("trend_lwr", "trend_med", "trend_upr") colnames(y_epi_stat) <- paste0("nowcast_p", formatC(probs*100, width = 2, flag = 0)) # Return output nowcast_data <- bind_cols(tibble(admission.date = date.seq), mu_epi_stat, y_epi_stat) nowcast_data }