########################################################### # Epicurve symptom onset of hospitalised cases in Osiris data # Rt development over time # 28-03-2020 ___UITZONDERINGSGROND_2___ ########################################################### library(tidyverse) library(lubridate) # get all case data files from Osiris files <- list.files("/___UITZONDERINGSGROND_6___Previous", full.names = TRUE) files <- c(files, list.files("/___UITZONDERINGSGROND_6___Data_OSIRIS_geschoond", full.names = TRUE)) files <- files[grepl(files, pattern = "rds")] report_dates <- seq(as.Date("2020-03-13"), as.Date("2020-03-27"), by = "day") %>% as.character report_dates <- c("2020-03-24", "2020-03-28") report_dates <- c("2020-03-28") report_date <- report_dates epicurves <- tibble() for(report_date in report_dates) { pattern_date <- report_date %>% str_split(pattern = "-") %>% unlist %>% paste0(collapse = "") tmpfiles <- files[grep(files, pattern = pattern_date)] file.name <- tmpfiles[tmpfiles %>% file.info %>% pull(ctime) %>% which.max] dataOsiris <- read_rds(file.name) data <- dataOsiris %>% #filter(Leeftijd >= 70) %>% mutate(CREATIEDT = CREATIEDT %>% as.Date(format = "%d-%m-%Y"), NCOVdat1ezkhopn = NCOVdat1ezkhopn %>% as.Date(format = "%Y-%m-&d")) %>% filter(NCOVpatZhs == "J") epicurve <- extract_epicurve(data) epicurve <- calculate_Ru_uncertainty(epicurve) epicurve <- epicurve %>% mutate(report_date = report_date) epicurves <- bind_rows(epicurves, epicurve) } p <- epicurves %>% filter(report_date == "2020-03-27") %>% plot_Ru p <- plot_Ru(epicurves) + facet_wrap(facets = vars(report_date), ncol = 1) ggsave(filename = "Rt_series_28mrt2020.tiff", plot = p, width = 4, height = 12, dpi = 300) plot_Ru(epicurves) + labs(x = "eerste ziektedag", y = "effectieve R", subtitle = "effectieve R op basis van eerste ziektedag van gehospitaliseerde cases\n(Osiris data t/m 28 Mar 2020)") ggsave("Reff_SOhosp_28mrt2020.tiff", width = 6, height = 4, dpi = 150) ggplot(data = epicurves %>% filter(report_date == "2020-03-28"), mapping = aes(x = dates, y = incidenceTOT/p)) + geom_bar(stat = "identity", fill = "lightgrey") + geom_bar(mapping = aes(x = dates, y = incidenceTOT), inherit.aes = FALSE, stat = "identity", fill = "#01689b") + scale_x_date(limits = c(as.Date("2020-02-14"), as.Date("2020-03-26")), breaks = as.Date("2020-02-14") + seq(0, 40, 4), labels = format(as.Date("2020-02-14") + seq(0, 40, 4), "%d %b")) + #scale_y_continuous(trans = "log10") + labs(x = "eerste ziektedag", y = "aantal per dag", subtitle = "eerste ziektedag van gemelde ziekenhuisopnames t/m 28 maart 2020") + theme_minimal() ggsave(filename = "eersteziektedag_hospcases_28mrt2020.tiff", width = 6, height = 4, dpi = 300) ggplot(data = epicurves %>% filter(report_date == "2020-03-28") %>% mutate(incidenceEXTRA = round(incidenceTOT/p) - incidenceTOT) %>% pivot_longer(c(incidenceTOT, incidenceEXTRA), names_to = "incidence"), mapping = aes(x = dates, y = value, fill = factor(incidence, labels = c("verwachte gevallen op basis\nvan rapportagevertraging", "gemelde gevallen")))) + geom_bar(stat = "identity") + scale_x_date(limits = c(as.Date("2020-02-14"), as.Date("2020-03-26")), breaks = as.Date("2020-02-14") + seq(0, 40, 4), labels = format(as.Date("2020-02-14") + seq(0, 40, 4), "%d %b")) + scale_fill_manual( values = c("lightgrey", "#01689b")) + #scale_y_continuous(trans = "log10") + labs(x = "eerste ziektedag", y = "aantal per dag", fill = NULL, subtitle = "eerste ziektedag van gemelde ziekenhuisopnames t/m 28 maart 2020") + theme_minimal() + theme(legend.position = c(0.2,0.8)) ggsave(filename = "eersteziektedag_hospcases_28mrt2020.tiff", width = 6, height = 4, dpi = 300) sum(!is.na(data$ZIE1eZiekteDt))/nrow(data) ggsave(filename = "eersteziektedag_hospcases_28mrt2020.tiff", width = 6, height = 4, dpi = 300) epicurve <- all_epicurves %>% filter(extracted == "2020-03-26") epicurve %>% filter(Ru > 1) %>% slice(n()) ggplot(data = all_epicurves %>% group_by(extracted) %>% filter(Ru > 1) %>% slice(n()), mapping = aes(x = as.Date(extracted), y = dates)) + geom_point() + geom_line(aes(x = as.Date(extracted), y = as.Date(extracted) - 10)) + labs(x = "analysis date", y = "last date Ru > 1") + theme_minimal() ggplot(data = all_epicurves %>% filter(dates == "2020-03-13"), mapping = aes(x = as.Date(extracted), y = Ru, ymin = Rulower, ymax = Ruupper)) + geom_ribbon(fill = "grey", col = NA) + geom_line() + geom_line(y = 1, lty = 2) + # scale_x_date( # limits = c(as.Date("2020-02-14"), NA), # breaks = as.Date("2020-02-17") + seq(0, 35, 7), # labels = format(as.Date("2020-02-17") + seq(0, 35, 7), "%d %b")) + labs(x = "analysis date", y = "Ru at 13 March") + theme_minimal() # stabilisatie plot (nog netter maken) plot(1, type = "n", xlim = c(as.Date("2020-03-13"), as.Date("2020-03-27")), ylim = c(0,38)) for(d in 6:24) { abline(h = 39 - 2*(d - 5), col = "grey") x <- epicurves %>% filter(dates == paste0("2020-03-",d)) %>% pull(report_date) y <- epicurves %>% filter(dates == paste0("2020-03-",d)) %>% pull(Ru) lines(as.Date(x), y + 38 - 2*(d - 5)) } # functions extract_epicurve <- function(data) { bron <- if("Bron" %in% names(data)) "Bron" else "Bron_land" epicurve_raw <- data %>% filter(!is.na(ZIE1eZiekteDt)) %>% filter(ZIE1eZiekteDt <= today()) %>% # omit symptom onset dates in future group_by(ZIE1eZiekteDt) %>% summarise(incidenceABROAD = sum(bron == "Buitenland", na.rm = TRUE), incidenceTOT = n()) %>% mutate(day = as.integer(ZIE1eZiekteDt-min(ZIE1eZiekteDt)+1), incidenceNL = incidenceTOT - incidenceABROAD) epicurve_raw <- epicurve_raw %>% full_join(tibble(day = 1:max(epicurve_raw$day))) %>% arrange(day) %>% replace_na(list(incidenceNL = 0, incidenceTOT = 0, incidenceABROAD = 0)) %>% mutate(dates = epicurve_raw$ZIE1eZiekteDt[1] + day - 1) %>% select(dates, day, incidenceNL, incidenceABROAD, incidenceTOT) # inflating reports to account for reporting delay interval <- data$CREATIEDT - data$ZIE1eZiekteDt interval <- interval[interval < nrow(epicurve_raw)] reporting_delay <- ecdf(interval) reporting_delay <- rev(reporting_delay(1:nrow(epicurve_raw))) epicurve <- epicurve_raw %>% mutate(p = reporting_delay) # epicurve <- epicurve_raw %>% # mutate(incidenceTOToriginal = incidenceTOT, # incidenceNL = round(incidenceNL/reporting_delay), # incidenceTOT = round(incidenceTOT/reporting_delay), # incidenceABROAD = incidenceTOT - incidenceNL) return(epicurve) } # to be extended with SI dist as input argument calculate_Ru_uncertainty <- function(epicurve, nsample = 1000) { pdfSI <- dnorm(x = 1:7, mean = 6.8/2, sd = 1.27) pdfSI <- pdfSI/sum(pdfSI) g <- pdfSI # sample non-observed cases due to reporting delay from neg binomial incidence <- matrix(0, nrow = nrow(epicurve), ncol = nsample) for(j in 1:nrow(epicurve)) { if(epicurve$incidenceTOT[j] != 0) { incidence[j, ] <- rnbinom(n = nsample, size = epicurve$incidenceTOT[j], prob = epicurve$p[j]) + epicurve$incidenceTOT[j] } } # construct instantaneous Rt from incidence Rt <- matrix(0, nrow = nrow(epicurve), ncol = nsample) for(t in 2:nrow(epicurve)) { g2 <- g[1:min(length(g), t-1)] Rt[t,] <- as.numeric((incidence[t, ] - epicurve$incidenceABROAD[t])/colSums(incidence[t - (1:length(g2)), , drop = FALSE]*g2)) } # construct case Ru from instantaneous Rt Ru <- matrix(0, nrow = nrow(epicurve), ncol = nsample) for(u in 1:(nrow(Rt)-1)) { g2 <- g[1:min(length(g), nrow(Rt)-u-1)] Ru[u,] <- colSums(g2*Rt[u+1:length(g2), ,drop = FALSE])/sum(g2) # new: divide by sum(g2) to account for not observed Rt's in future } # construct confidence bounds by sampling from poisson Ruupper <- rep(0, nrow(epicurve)) Rulower <- rep(0, nrow(epicurve)) for(j in 1:nrow(epicurve)) { if(incidence[j,i] != 0) { tmp <- sapply(1:nsample, function(i) rpois(100, incidence[j,i]*Ru[j,i])/incidence[j,i]) Rulower[j] <- quantile(tmp, probs = 0.025) Ruupper[j] <- quantile(tmp, probs = 0.975) } } epicurve <- epicurve %>% mutate(Ru = apply(Ru, MARGIN = 1, mean), Rulower = Rulower, Ruupper = Ruupper) return(epicurve) } plot_Ru <- function(epicurve) { p <- ggplot(data = epicurve %>% slice(-nrow(.)), mapping = aes(x = dates, y = Ru, ymin = Rulower, ymax = Ruupper)) + geom_ribbon(data = epicurve %>% filter(Ruupper != 0), fill = "grey", col = NA) + geom_line() + geom_line(y = 1, lty = 2) + scale_x_date( limits = c(as.Date("2020-02-14"), NA), breaks = as.Date("2020-02-17") + seq(0, 50, 4), labels = format(as.Date("2020-02-17") + seq(0, 50, 4), "%d %b")) + labs(x = NULL) + theme_minimal() return(p) }