################################################################### # Functions to use with estimationRt_OSIRIS script ################################################################### library(tidyverse) library(lubridate) get_symptomonset2reporting <- function(start_date = as.Date("2020-03-13"), end_date = today(), IC = FALSE) { # get all case data files from Osiris files <- list.files("/___UITZONDERINGSGROND_6___Previous", full.names = TRUE) files <- c(files, list.files("/___UITZONDERINGSGROND_6___Geschoond", full.names = TRUE)) files <- files[grepl(files, pattern = "rds")] report_dates <- seq(start_date, end_date, by = "day") %>% as.character SO2rep <- rep(0,40) for(report_date in report_dates) { pattern_date <- report_date %>% str_split(pattern = "-") %>% unlist %>% paste0(collapse = "") tmpfiles <- files[grep(files, pattern = pattern_date)] file.name <- tmpfiles[tmpfiles %>% file.info %>% pull(ctime) %>% which.max] # if today's data is not yet available if(length(file.name)==0) { print(paste("only files up to ", report_date, "used")) return(cumsum(rev(SO2rep)/sum(SO2rep))) } newdata <- read_rds(file.name) %>% {if(IC) filter(., NCOVopnameICU == "J") else filter(., NCOVpatZhs == "J")} %>% filter(!is.na(ZIE1eZiekteDt) & ZIE1eZiekteDt <= end_date) %>% group_by(ZIE1eZiekteDt) %>% summarise(incidence = n()) %>% full_join(tibble(ZIE1eZiekteDt = seq(min(min(.$ZIE1eZiekteDt), as.Date(report_date)-length(SO2rep)+1), as.Date(report_date), by = "day"))) %>% replace_na(list(incidence = 0)) %>% arrange(ZIE1eZiekteDt) if(as.Date(report_date) > start_date) { newdata <- full_join(prevdata, newdata, by = "ZIE1eZiekteDt") %>% replace_na(list(incidence.x = 0, incidence.y = 0)) %>% arrange(ZIE1eZiekteDt) %>% mutate(newentries = incidence.y - incidence.x) SO2rep <- SO2rep + (newdata %>% pull(newentries) %>% tail(40)) newdata <- newdata %>% rename(incidence = incidence.y) %>% select(ZIE1eZiekteDt, incidence) } prevdata <- newdata } cdf <- cumsum(rev(SO2rep)/sum(SO2rep)) cdf[cdf > 1] <- 1 return(cdf) } get_symptomonset2reporting_perprov <- function(provincie, start_date = as.Date("2020-03-13"), end_date = today(), IC = FALSE) { # get all case data files from Osiris files <- list.files("/___UITZONDERINGSGROND_6___Previous", full.names = TRUE) files <- c(files, list.files("/___UITZONDERINGSGROND_6___Geschoond", full.names = TRUE)) files <- files[grepl(files, pattern = "rds")] report_dates <- seq(start_date, end_date, by = "day") %>% as.character SO2rep <- rep(0,40) for(report_date in report_dates) { pattern_date <- report_date %>% str_split(pattern = "-") %>% unlist %>% paste0(collapse = "") tmpfiles <- files[grep(files, pattern = pattern_date)] file.name <- tmpfiles[tmpfiles %>% file.info %>% pull(ctime) %>% which.max] # if today's data is not yet available if(length(file.name)==0) { print(paste("only files up to ", report_date, "used")) return(cumsum(rev(SO2rep)/sum(SO2rep))) } newdata <- read_rds(file.name) %>% filter(Provincie == provincie) %>% {if(IC) filter(., NCOVopnameICU == "J") else filter(., NCOVpatZhs == "J")} %>% filter(!is.na(ZIE1eZiekteDt) & ZIE1eZiekteDt <= end_date) %>% group_by(ZIE1eZiekteDt) %>% summarise(incidence = n()) %>% full_join(tibble(ZIE1eZiekteDt = seq(min(min(.$ZIE1eZiekteDt), as.Date(report_date)-length(SO2rep)+1), as.Date(report_date), by = "day"))) %>% replace_na(list(incidence = 0)) %>% arrange(ZIE1eZiekteDt) if(as.Date(report_date) > start_date) { newdata <- full_join(prevdata, newdata, by = "ZIE1eZiekteDt") %>% replace_na(list(incidence.x = 0, incidence.y = 0)) %>% arrange(ZIE1eZiekteDt) %>% mutate(newentries = incidence.y - incidence.x) SO2rep <- SO2rep + (newdata %>% pull(newentries) %>% tail(40)) newdata <- newdata %>% rename(incidence = incidence.y) %>% select(ZIE1eZiekteDt, incidence) } prevdata <- newdata } cdf <- cumsum(rev(SO2rep)/sum(SO2rep)) cdf[cdf > 1] <- 1 return(cdf) } extract_epicurve <- function(data, IC = FALSE, report_date = today(), SOtoRep = NULL) { data <- data %>% {if(IC) filter(., NCOVopnameICU == "J") else filter(., NCOVpatZhs == "J")} %>% mutate(bron = if("Bron" %in% names(data)) Bron else Bron_land) epicurve <- data %>% filter(!is.na(ZIE1eZiekteDt) & ZIE1eZiekteDt <= today()) %>% # omit symptom onset dates in future group_by(ZIE1eZiekteDt) %>% summarise(incidenceABROAD = sum(bron == "Buitenland", na.rm = TRUE), incidenceTOT = n()) %>% mutate(incidenceNL = incidenceTOT - incidenceABROAD) %>% full_join(tibble(ZIE1eZiekteDt = seq(min(.$ZIE1eZiekteDt), as.Date(report_date), by = "day"))) %>% replace_na(list(incidenceNL = 0, incidenceTOT = 0, incidenceABROAD = 0)) %>% arrange(ZIE1eZiekteDt) %>% mutate(dates = ZIE1eZiekteDt) %>% select(dates, incidenceNL, incidenceABROAD, incidenceTOT) if(length(SOtoRep) == 0) { # default SO to reporting delay over last 7 days reporting_delay <- get_symptomonset2reporting(start_date = as.Date(report_date) - 7, end_date = as.Date(report_date), IC = IC) } else { reporting_delay = SOtoRep } if(nrow(epicurve) > length(reporting_delay)) { reporting_delay <- c(rep(1, nrow(epicurve)-length(reporting_delay)), rev(reporting_delay)) } else { reporting_delay <- rev(reporting_delay[1:nrow(epicurve)]) } epicurve <- epicurve %>% mutate(p = reporting_delay) # if most recent days are zero, give them one case and a p to reflect total number of last non-zero day max_nonzero_inc_day <- max(which(epicurve$incidenceTOT != 0)) if(max_nonzero_inc_day < nrow(epicurve)) { epicurve$incidenceTOT[(max_nonzero_inc_day + 1):nrow(epicurve)] <- 1 epicurve$incidenceNL[(max_nonzero_inc_day + 1):nrow(epicurve)] <- 1 epicurve$p[(max_nonzero_inc_day + 1):nrow(epicurve)] <- epicurve$p[max_nonzero_inc_day]/epicurve$incidenceTOT[max_nonzero_inc_day] } return(epicurve) } calculate_Ru_Osiris <- function(epicurve, nsample = 1000) { pdfSI <- dnorm(x = 1:7, mean = 6.8/2, sd = 1.27) pdfSI <- pdfSI/sum(pdfSI) g <- pdfSI # sample non-observed cases due to reporting delay from neg binomial incidence <- matrix(0, nrow = nrow(epicurve), ncol = nsample) for(j in 1:nrow(epicurve)) { if(epicurve$incidenceTOT[j] != 0) { incidence[j, ] <- rnbinom(n = nsample, size = epicurve$incidenceTOT[j], prob = epicurve$p[j]) + epicurve$incidenceTOT[j] } } # construct instantaneous Rt from incidence Rt <- matrix(0, nrow = nrow(epicurve), ncol = nsample) for(t in 2:nrow(epicurve)) { g2 <- g[1:min(length(g), t-1)] Rt[t,] <- as.numeric((incidence[t, ] - epicurve$incidenceABROAD[t])/colSums(incidence[t - (1:length(g2)), , drop = FALSE]*g2)) } # construct case Ru from instantaneous Rt Ru <- matrix(0, nrow = nrow(epicurve), ncol = nsample) for(u in 1:(nrow(Rt)-1)) { g2 <- g[1:min(length(g), nrow(Rt)-u)] Ru[u,] <- colSums(g2*Rt[u+1:length(g2), ,drop = FALSE])/sum(g2) # divide by sum(g2) to account for not observed Rt's in future } # construct confidence bounds by sampling from poisson Rtupper <- rep(0, nrow(incidence)) Rtlower <- rep(0, nrow(incidence)) for(j in 1:nrow(incidence)) { tmp <- sapply(1:nsample, function(i) if(incidence[j,i] != 0 & Rt[j,i] !=Inf) rpois(100, incidence[j,i]*Rt[j,i])/incidence[j,i]) Rtlower[j] <- quantile(unlist(tmp), probs = 0.025) Rtupper[j] <- quantile(unlist(tmp), probs = 0.975) } # construct confidence bounds by sampling from poisson Ruupper <- rep(0, nrow(incidence)) Rulower <- rep(0, nrow(incidence)) for(j in 1:nrow(incidence)) { tmp <- sapply(1:nsample, function(i) if(incidence[j,i] != 0 & Ru[j,i] !=Inf) rpois(100, incidence[j,i]*Ru[j,i])/incidence[j,i]) Rulower[j] <- quantile(unlist(tmp), probs = 0.025) Ruupper[j] <- quantile(unlist(tmp), probs = 0.975) } epicurve <- epicurve %>% mutate(mean_incidence = apply(incidence, MARGIN = 1, mean), instR = apply(Rt, MARGIN = 1, mean), caseR = apply(Ru, MARGIN = 1, mean), instRlower = Rtlower, instRupper = Rtupper, caseRlower = Rulower, caseRupper = Ruupper) return(epicurve) } plot_epicurve_Osiris <- function(epicurve, IC = FALSE) { p <- ggplot(data = epicurve, mapping = aes(x = dates, y = incidenceTOT/p)) + geom_bar(stat = "identity", fill = "grey") + geom_bar(mapping = aes(y = incidenceTOT), stat = "identity", fill = "#01689b") + geom_bar(mapping = aes(y = incidenceABROAD), stat = "identity", fill = 2) + scale_x_date( limits = c(as.Date("2020-02-27"), NA), breaks = as.Date("2020-02-17") + seq(0, 50, 7), labels = format(as.Date("2020-02-17") + seq(0, 50, 7), "%d %b")) + labs(x = "eerste ziektedag", y = "aantal per dag", subtitle = if(IC) paste("gebaseerd op IC opnames uit OSIRIS data", last(epicurve$dates)) else paste("gebaseerd op ziekenhuisopnames uit OSIRIS data", last(epicurve$dates))) + theme_minimal() return(p) } plot_Reff <- function(epicurve, caseReff = TRUE, IC = TRUE) { p <- ggplot(data = epicurve, # %>% {if(caseReff) slice(., -nrow(.)) else .}, mapping = aes(x = dates, y = if(caseReff) caseR else instR, ymin = if(caseReff) caseRlower else instRlower, ymax = if(caseReff) caseRupper else instRupper)) + geom_ribbon(fill = "grey", col = NA) + geom_line(col = 1) + geom_line(y = 1, lty = 2) + coord_cartesian( ylim = c(0,4)) + scale_x_date( limits = c(as.Date("2020-02-27"), if(caseReff) max(epicurve$dates)-1 else max(epicurve$dates)), breaks = as.Date("2020-02-17") + seq(0, 50, 7), labels = format(as.Date("2020-02-17") + seq(0, 50, 7), "%d %b")) + labs(x = "eerste ziektedag", y = if(caseReff) "effectieve R" else "instantane R", subtitle = if(IC) paste("gebaseerd op IC opnames uit OSIRIS data", last(epicurve$dates)) else paste("gebaseerd op ziekenhuisopnames uit OSIRIS data", last(epicurve$dates))) + theme_minimal() return(p) }