########################################################### # Epicurve symptom onset of hospitalised cases in Osiris data # Rt development over time, corrected for SO to reporting delay # 01-04-2020 ___UITZONDERINGSGROND_2___ ########################################################### library(tidyverse) library(lubridate) # get all case data files from Osiris files <- list.files("/___UITZONDERINGSGROND_6___Previous", full.names = TRUE) files <- c(files, list.files("/___UITZONDERINGSGROND_6___Data_OSIRIS_geschoond", full.names = TRUE)) files <- files[grepl(files, pattern = "rds")] report_dates <- seq(as.Date("2020-03-13"), as.Date("2020-03-27"), by = "day") %>% as.character report_dates <- c("2020-03-24", "2020-03-28") report_dates <- c("2020-03-31") report_date <- report_dates epicurves <- tibble() for(report_date in report_dates) { pattern_date <- report_date %>% str_split(pattern = "-") %>% unlist %>% paste0(collapse = "") tmpfiles <- files[grep(files, pattern = pattern_date)] file.name <- tmpfiles[tmpfiles %>% file.info %>% pull(ctime) %>% which.max] dataOsiris <- read_rds(file.name) data <- dataOsiris %>% #filter(Leeftijd >= 70) %>% mutate(CREATIEDT = CREATIEDT %>% as.Date(format = "%d-%m-%Y"), NCOVdat1ezkhopn = NCOVdat1ezkhopn %>% as.Date(format = "%Y-%m-&d")) #%>% #filter(NCOVpatZhs == "J") epicurve <- extract_epicurve(data) epicurve <- calculate_Ru_uncertainty(epicurve) epicurve <- epicurve %>% mutate(report_date = report_date) epicurves <- bind_rows(epicurves, epicurve) } plot_Ru(epicurve) ggplot(data = epicurve, mapping = aes(x = dates, y = incidenceTOT/p)) + geom_bar(stat = "identity", fill = "grey") + geom_bar(mapping = aes(y = incidenceTOT), stat = "identity", fill = "#01689b") + scale_x_date( limits = c(as.Date("2020-02-27"), NA), breaks = as.Date("2020-02-17") + seq(0, 50, 5), labels = format(as.Date("2020-02-17") + seq(0, 50, 5), "%d %b")) + #scale_y_continuous(trans = "log10") + labs(x = "eerste ziektedag", y = "aantal per dag") + theme_minimal() plot_Ru(epicurves) + labs(x = "eerste ziektedag", y = "effectieve R", subtitle = "effectieve R op basis van eerste ziektedag van gehospitaliseerde cases\n(Osiris data t/m 28 Mar 2020)") ggsave("Reff_SOhosp_28mrt2020.tiff", width = 6, height = 4, dpi = 150) ggplot(data = epicurves %>% filter(report_date == "2020-03-31") %>% mutate(incidenceEXTRA = round(incidenceTOT/p) - incidenceTOT) %>% pivot_longer(c(incidenceTOT, incidenceEXTRA), names_to = "incidence"), mapping = aes(x = dates, y = value, fill = factor(incidence, labels = c("verwachte gevallen op basis\nvan rapportagevertraging", "gemelde gevallen")))) + geom_bar(stat = "identity") + scale_x_date(limits = c(as.Date("2020-02-14"), NA), breaks = as.Date("2020-02-14") + seq(0, 40, 4), labels = format(as.Date("2020-02-14") + seq(0, 40, 4), "%d %b")) + scale_fill_manual( values = c("lightgrey", "#01689b")) + #scale_y_continuous(trans = "log10") + labs(x = "eerste ziektedag", y = "aantal per dag", fill = NULL, subtitle = "eerste ziektedag van gemelde ziekenhuisopnames t/m 28 maart 2020") + theme_minimal() + theme(legend.position = c(0.2,0.8)) epicurve <- all_epicurves %>% filter(extracted == "2020-03-26") epicurve %>% filter(Ru > 1) %>% slice(n()) ggplot(data = all_epicurves %>% group_by(extracted) %>% filter(Ru > 1) %>% slice(n()), mapping = aes(x = as.Date(extracted), y = dates)) + geom_point() + geom_line(aes(x = as.Date(extracted), y = as.Date(extracted) - 10)) + labs(x = "analysis date", y = "last date Ru > 1") + theme_minimal() ggplot(data = all_epicurves %>% filter(dates == "2020-03-13"), mapping = aes(x = as.Date(extracted), y = Ru, ymin = Rulower, ymax = Ruupper)) + geom_ribbon(fill = "grey", col = NA) + geom_line() + geom_line(y = 1, lty = 2) + # scale_x_date( # limits = c(as.Date("2020-02-14"), NA), # breaks = as.Date("2020-02-17") + seq(0, 35, 7), # labels = format(as.Date("2020-02-17") + seq(0, 35, 7), "%d %b")) + labs(x = "analysis date", y = "Ru at 13 March") + theme_minimal() # stabilisatie plot (nog netter maken) plot(1, type = "n", xlim = c(as.Date("2020-03-13"), as.Date("2020-03-27")), ylim = c(0,38)) for(d in 6:24) { abline(h = 39 - 2*(d - 5), col = "grey") x <- epicurves %>% filter(dates == paste0("2020-03-",d)) %>% pull(report_date) y <- epicurves %>% filter(dates == paste0("2020-03-",d)) %>% pull(Ru) lines(as.Date(x), y + 38 - 2*(d - 5)) } # functions extract_epicurve <- function(data) { data <- data %>% mutate(bron = if("Bron" %in% names(data)) Bron else Bron_land) epicurve <- data %>% filter(!is.na(ZIE1eZiekteDt)) %>% filter(ZIE1eZiekteDt <= today()) %>% # omit symptom onset dates in future group_by(ZIE1eZiekteDt) %>% summarise(incidenceABROAD = sum(bron == "Buitenland", na.rm = TRUE), incidenceTOT = n()) %>% mutate(incidenceNL = incidenceTOT - incidenceABROAD) epicurve <- epicurve %>% full_join(tibble(ZIE1eZiekteDt = min(epicurve$ZIE1eZiekteDt) + (0:as.integer(max(data$CREATIEDT) - min(epicurve$ZIE1eZiekteDt))))) %>% arrange(ZIE1eZiekteDt) %>% replace_na(list(incidenceNL = 0, incidenceTOT = 0, incidenceABROAD = 0)) %>% mutate(dates = ZIE1eZiekteDt) %>% select(dates, incidenceNL, incidenceABROAD, incidenceTOT) # inflating reports to account for reporting delay interval <- as.integer(data$CREATIEDT - data$ZIE1eZiekteDt) interval <- interval[interval <= nrow(epicurve)] interval <- interval[!is.na(interval) & interval >= 0] reporting_delay <- ecdf(interval) reporting_delay <- rev(reporting_delay(0:(nrow(epicurve)-1))) epicurve <- epicurve %>% mutate(p = reporting_delay) # if most recent days are zero, give them one case and a p to reflect total number of last non-zero day max_nonzero_inc_day <- max(which(epicurve$incidenceTOT != 0)) if(max_nonzero_inc_day < nrow(epicurve)) { epicurve$incidenceTOT[(max_nonzero_inc_day + 1):nrow(epicurve)] <- 1 epicurve$incidenceNL[(max_nonzero_inc_day + 1):nrow(epicurve)] <- 1 epicurve$p[(max_nonzero_inc_day + 1):nrow(epicurve)] <- epicurve$p[max_nonzero_inc_day]/epicurve$incidenceTOT[max_nonzero_inc_day] } return(epicurve) } # to be extended with SI dist as input argument calculate_Ru_uncertainty <- function(epicurve, nsample = 1000) { pdfSI <- dnorm(x = 1:7, mean = 6.8/2, sd = 1.27) pdfSI <- pdfSI/sum(pdfSI) g <- pdfSI # sample non-observed cases due to reporting delay from neg binomial incidence <- matrix(0, nrow = nrow(epicurve), ncol = nsample) for(j in 1:nrow(epicurve)) { if(epicurve$incidenceTOT[j] != 0) { incidence[j, ] <- rnbinom(n = nsample, size = epicurve$incidenceTOT[j], prob = epicurve$p[j]) + epicurve$incidenceTOT[j] } } # construct instantaneous Rt from incidence Rt <- matrix(0, nrow = nrow(epicurve), ncol = nsample) for(t in 2:nrow(epicurve)) { g2 <- g[1:min(length(g), t-1)] Rt[t,] <- as.numeric((incidence[t, ] - epicurve$incidenceABROAD[t])/colSums(incidence[t - (1:length(g2)), , drop = FALSE]*g2)) } # construct case Ru from instantaneous Rt Ru <- matrix(0, nrow = nrow(epicurve), ncol = nsample) for(u in 1:(nrow(Rt)-1)) { g2 <- g[1:min(length(g), nrow(Rt)-u)] Ru[u,] <- colSums(g2*Rt[u+1:length(g2), ,drop = FALSE])/sum(g2) # new: divide by sum(g2) to account for not observed Rt's in future } # construct confidence bounds by sampling from poisson Rtupper <- rep(0, nrow(incidence)) Rtlower <- rep(0, nrow(incidence)) for(j in 1:nrow(incidence)) { tmp <- sapply(1:nsample, function(i) if(incidence[j,i] != 0 & Rt[j,i] !=Inf) rpois(100, incidence[j,i]*Rt[j,i])/incidence[j,i]) Rtlower[j] <- quantile(unlist(tmp), probs = 0.025) Rtupper[j] <- quantile(unlist(tmp), probs = 0.975) } # construct confidence bounds by sampling from poisson Ruupper <- rep(0, nrow(incidence)) Rulower <- rep(0, nrow(incidence)) for(j in 1:nrow(incidence)) { tmp <- sapply(1:nsample, function(i) if(incidence[j,i] != 0 & Ru[j,i] !=Inf) rpois(100, incidence[j,i]*Ru[j,i])/incidence[j,i]) Rulower[j] <- quantile(unlist(tmp), probs = 0.025) Ruupper[j] <- quantile(unlist(tmp), probs = 0.975) } epicurve <- epicurve %>% mutate(Rt = apply(Rt, MARGIN = 1, mean), Ru = apply(Ru, MARGIN = 1, mean), Rtlower = Rtlower, Rtupper = Rtupper, Rulower = Rulower, Ruupper = Ruupper) return(epicurve) } plot_Ru <- function(epicurve) { p <- ggplot(data = epicurve %>% slice(-nrow(.)), mapping = aes(x = dates, y = Ru, ymin = Rulower, ymax = Ruupper)) + geom_ribbon(data = epicurve %>% filter(Ruupper != 0), fill = "grey", col = NA) + geom_line() + geom_line(y = 1, lty = 2) + scale_x_date( limits = c(as.Date("2020-02-14"), NA), breaks = as.Date("2020-02-17") + seq(0, 50, 4), labels = format(as.Date("2020-02-17") + seq(0, 50, 4), "%d %b")) + labs(x = NULL) + theme_minimal() return(p) }