########################################################### # Epicurve symptom onset of hospitalised cases in Osiris data # Rt development over time, corrected for SO to hosp and hosp to reporting delay # not finished: does not seem to be much difference with SO to reporting # 01-04-2020 ___UITZONDERINGSGROND_2___ ########################################################### library(tidyverse) library(lubridate) # get all case data files from Osiris files <- list.files("/___UITZONDERINGSGROND_6___Previous", full.names = TRUE) files <- c(files, list.files("/___UITZONDERINGSGROND_6___Data_OSIRIS_geschoond", full.names = TRUE)) files <- files[grepl(files, pattern = "rds")] report_date <- c("2020-03-31") pattern_date <- report_date %>% str_split(pattern = "-") %>% unlist %>% paste0(collapse = "") tmpfiles <- files[grep(files, pattern = pattern_date)] file.name <- tmpfiles[tmpfiles %>% file.info %>% pull(ctime) %>% which.max] dataOsiris <- read_rds(file.name) IC <- TRUE data <- dataOsiris %>% #filter(Leeftijd >= 70) %>% mutate(CREATIEDT = CREATIEDT %>% as.Date(format = "%d-%m-%Y"), LAATSTEWIJZIGINGDT = LAATSTEWIJZIGINGDT %>% as.Date(format = "%d-%m-%Y"), NCOVopnamedatumICU = NCOVopnamedatumICU %>% as.Date(format = "%d-%m-%Y"), NCOVdat1ezkhopn = NCOVdat1ezkhopn %>% as.Date(format = "%Y-%m-&d")) %>% {if (IC) filter(., NCOVopnameICU == "J") else filter(., NCOVpatZhs == "J")} %>% {if (IC) mutate(., adm_dates = NCOVopnamedatumICU) else mutate(., adm_dates = NCOVdat1ezkhopn)} epicurve <- extract_epicurve(data) epicurve <- calculate_Ru_uncertainty(epicurve) which(lapply(dataOsiris, class) == "Date") as.integer(as.Date(dataOsiris$LAATSTEWIJZIGINGDT) - dataOsiris$CREATIEDT) %>% hist(breaks = -1:35) names(dataOsiris) ggplot(data = epicurve, mapping = aes(x = dates, y = admissionTOT/p)) + geom_bar(stat = "identity", fill = "grey") + geom_bar(mapping = aes(y = admissionTOT), stat = "identity", fill = "#01689b") + scale_x_date( limits = c(as.Date("2020-02-27"), NA), breaks = as.Date("2020-02-17") + seq(0, 50, 5), labels = format(as.Date("2020-02-17") + seq(0, 50, 5), "%d %b")) + #scale_y_continuous(trans = "log10") + labs(x = "opnamedatum ziekenhuis", y = "aantal per dag") + theme_minimal() epicurve <- all_epicurves %>% filter(extracted == "2020-03-26") epicurve %>% filter(Ru > 1) %>% slice(n()) ggplot(data = all_epicurves %>% group_by(extracted) %>% filter(Ru > 1) %>% slice(n()), mapping = aes(x = as.Date(extracted), y = dates)) + geom_point() + geom_line(aes(x = as.Date(extracted), y = as.Date(extracted) - 10)) + labs(x = "analysis date", y = "last date Ru > 1") + theme_minimal() ggplot(data = all_epicurves %>% filter(dates == "2020-03-13"), mapping = aes(x = as.Date(extracted), y = Ru, ymin = Rulower, ymax = Ruupper)) + geom_ribbon(fill = "grey", col = NA) + geom_line() + geom_line(y = 1, lty = 2) + # scale_x_date( # limits = c(as.Date("2020-02-14"), NA), # breaks = as.Date("2020-02-17") + seq(0, 35, 7), # labels = format(as.Date("2020-02-17") + seq(0, 35, 7), "%d %b")) + labs(x = "analysis date", y = "Ru at 13 March") + theme_minimal() # stabilisatie plot (nog netter maken) plot(1, type = "n", xlim = c(as.Date("2020-03-13"), as.Date("2020-03-27")), ylim = c(0,38)) for(d in 6:24) { abline(h = 39 - 2*(d - 5), col = "grey") x <- epicurves %>% filter(dates == paste0("2020-03-",d)) %>% pull(report_date) y <- epicurves %>% filter(dates == paste0("2020-03-",d)) %>% pull(Ru) lines(as.Date(x), y + 38 - 2*(d - 5)) } # functions extract_epicurve <- function(data) { data <- data %>% filter(is.na(adm_dates) | (adm_dates >= as.Date("2020-02-01") & adm_dates <= today())) %>% # omit admission dates in future filter(is.na(ZIE1eZiekteDt) | (ZIE1eZiekteDt >= as.Date("2020-02-01") & ZIE1eZiekteDt <= today())) %>% # omit symptom onset dates in future mutate(bron = if("Bron" %in% names(data)) Bron else Bron_land) admcurve <- data %>% filter(!is.na(adm_dates)) %>% group_by(adm_dates) %>% summarise(admissionABROAD = sum(bron == "Buitenland", na.rm = TRUE), admissionTOT = n()) %>% mutate(admissionNL = admissionTOT - admissionABROAD) %>% rename(dates = adm_dates) admcurve_knownSO <- data %>% filter(!is.na(adm_dates) & !is.na(ZIE1eZiekteDt)) %>% group_by(adm_dates) %>% summarise(admissionABROAD_SO = sum(bron == "Buitenland", na.rm = TRUE), admissionTOT_SO = n()) %>% mutate(admissionNL_SO = admissionTOT_SO - admissionABROAD_SO) %>% rename(dates = adm_dates) epicurve <- data %>% filter(!is.na(adm_dates) & !is.na(ZIE1eZiekteDt)) %>% group_by(ZIE1eZiekteDt) %>% summarise(incidenceABROAD = sum(bron == "Buitenland", na.rm = TRUE), incidenceTOT = n()) %>% mutate(incidenceNL = incidenceTOT - incidenceABROAD) %>% rename(dates = ZIE1eZiekteDt) epicurve <- admcurve %>% full_join(admcurve_knownSO) %>% full_join(epicurve) epicurve <- epicurve %>% full_join(tibble(dates = min(epicurve$dates) + (0:as.integer(max(data$CREATIEDT) - min(epicurve$dates))))) %>% arrange(dates) %>% replace_na(list(incidenceNL = 0, incidenceTOT = 0, incidenceABROAD = 0, admissionNL = 0, admissionTOT = 0, admissionABROAD = 0, admissionNL_SO = 0, admissionTOT_SO = 0, admissionABROAD_SO = 0)) # inflating reports to account for reporting delay interval <- as.integer(data$LAATSTEWIJZIGINGDT - data$adm_dates) # interval <- as.integer(data$CREATIEDT - data$adm_dates) interval <- interval[interval <= nrow(epicurve)] interval <- interval[!is.na(interval) & interval >= 0] reporting_delay <- ecdf(interval) reporting_delay <- rev(reporting_delay(0:(nrow(epicurve)-1))) epicurve <- epicurve %>% mutate(p = reporting_delay) # if most recent days are zero, give them one case and a p to reflect total number of last non-zero day max_nonzero_inc_day <- max(which(epicurve$admissionTOT != 0)) if(max_nonzero_inc_day < nrow(epicurve)) { epicurve$admissionTOT[(max_nonzero_inc_day + 1):nrow(epicurve)] <- 1 epicurve$admissionNL[(max_nonzero_inc_day + 1):nrow(epicurve)] <- 1 epicurve$p[(max_nonzero_inc_day + 1):nrow(epicurve)] <- epicurve$p[max_nonzero_inc_day]/epicurve$admissionTOT[max_nonzero_inc_day] } return(epicurve) } # to be extended with SI dist as input argument calculate_Ru_uncertainty <- function(epicurve, nsample = 1000) { pdfSI <- dnorm(x = 1:7, mean = 6.8/2, sd = 1.27) pdfSI <- pdfSI/sum(pdfSI) g <- pdfSI # sample non-observed cases due to reporting delay from neg binomial incidence <- matrix(0, nrow = nrow(epicurve), ncol = nsample) for(j in 1:nrow(epicurve)) { if(epicurve$incidenceTOT[j] != 0) { incidence[j, ] <- rnbinom(n = nsample, size = epicurve$incidenceTOT[j], prob = epicurve$p[j]) + epicurve$incidenceTOT[j] } } # construct instantaneous Rt from incidence Rt <- matrix(0, nrow = nrow(epicurve), ncol = nsample) for(t in 2:nrow(epicurve)) { g2 <- g[1:min(length(g), t-1)] Rt[t,] <- as.numeric((incidence[t, ] - epicurve$incidenceABROAD[t])/colSums(incidence[t - (1:length(g2)), , drop = FALSE]*g2)) } # construct case Ru from instantaneous Rt Ru <- matrix(0, nrow = nrow(epicurve), ncol = nsample) for(u in 1:(nrow(Rt)-1)) { g2 <- g[1:min(length(g), nrow(Rt)-u)] Ru[u,] <- colSums(g2*Rt[u+1:length(g2), ,drop = FALSE])/sum(g2) # new: divide by sum(g2) to account for not observed Rt's in future } # construct confidence bounds by sampling from poisson Rtupper <- rep(0, nrow(incidence)) Rtlower <- rep(0, nrow(incidence)) for(j in 1:nrow(incidence)) { tmp <- sapply(1:nsample, function(i) if(incidence[j,i] != 0 & Rt[j,i] !=Inf) rpois(100, incidence[j,i]*Rt[j,i])/incidence[j,i]) Rtlower[j] <- quantile(unlist(tmp), probs = 0.025) Rtupper[j] <- quantile(unlist(tmp), probs = 0.975) } # construct confidence bounds by sampling from poisson Ruupper <- rep(0, nrow(incidence)) Rulower <- rep(0, nrow(incidence)) for(j in 1:nrow(incidence)) { tmp <- sapply(1:nsample, function(i) if(incidence[j,i] != 0 & Ru[j,i] !=Inf) rpois(100, incidence[j,i]*Ru[j,i])/incidence[j,i]) Rulower[j] <- quantile(unlist(tmp), probs = 0.025) Ruupper[j] <- quantile(unlist(tmp), probs = 0.975) } epicurve <- epicurve %>% mutate(Rt = apply(Rt, MARGIN = 1, mean), Ru = apply(Ru, MARGIN = 1, mean), Rtlower = Rtlower, Rtupper = Rtupper, Rulower = Rulower, Ruupper = Ruupper) return(epicurve) } plot_Ru <- function(epicurve) { p <- ggplot(data = epicurve %>% slice(-nrow(.)), mapping = aes(x = dates, y = Ru, ymin = Rulower, ymax = Ruupper)) + geom_ribbon(data = epicurve %>% filter(Ruupper != 0), fill = "grey", col = NA) + geom_line() + geom_line(y = 1, lty = 2) + scale_x_date( limits = c(as.Date("2020-02-14"), NA), breaks = as.Date("2020-02-17") + seq(0, 50, 4), labels = format(as.Date("2020-02-17") + seq(0, 50, 4), "%d %b")) + labs(x = NULL) + theme_minimal() return(p) }