################################################################### # Functions to use with estimationRt_OSIRIS script ################################################################### library(tidyverse) library(lubridate) library(odbc) library(runjags) library(coda) # Source nowcast functions list.files(path = "/___UITZONDERINGSGROND_6___Functies", full.names = TRUE) %>% walk(.f = source) get_nowcast_Osiris <- function(var_name = NULL, type = NULL, start_date = NULL, end_date = today()) { if(length(var_name) == 0) stop("provide Osiris name to analyse") if(length(type) == 0) stop("provide type of reports (\"reported\", \"hosp\" or \"IC\")") # get all case data files from Osiris files <- list.files("/___UITZONDERINGSGROND_6___Previous", full.names = TRUE) files <- c(files, list.files("/___UITZONDERINGSGROND_6___Geschoond", full.names = TRUE)) files <- files[grepl(files, pattern = "rds")] files <- tibble(file_name = files, date = files %>% str_extract_all(pattern = "(Data_[0-9]+)") %>% unlist %>% gsub(pattern = "Data_", replacement = "") %>% as.Date(format = "%Y%m%d"), time = files %>% file.info %>% pull(ctime)) if(length(start_date) == 0) start_date <- min(files$date) report_dates <- seq(start_date, end_date, by = "day") %>% as.character case_data <- tibble(var_date = end_date, rep_date = end_date) for(report_date in report_dates) { file_name <- files %>% filter(date == report_date) %>% filter(time == max(time)) %>% # get most recent file (if multiple files exist) pull(file_name) # if today's data is not yet available if(length(file_name)==0) { print(paste("file ", report_date, "not available")) return(0) } case_data <- read_rds(file_name) %>% {if(type == "IC") filter(., NCOVopnameICU == "J") else if(type == "hosp") filter(., NCOVpatZhs == "J") else .} %>% mutate(var_date = pull(., var_name)) %>% mutate(var_date = as.Date(var_date, tryFormats = c("%d-%m-%Y", "%Y-%m-%d"))) %>% filter(!is.na(var_date) & var_date <= end_date) %>% select(OSIRISNR, var_date) %>% left_join(case_data) %>% replace_na(replace = list(rep_date = report_date)) } nowcast_date <- end_date nowcast_data <- nowcast( case.data = case_data %>% transmute(onset.date = var_date, report.date = rep_date) %>% drop_na, start.date = start_date, nowcast.date = nowcast_date) tmp <- case_data %>% filter(report_date <= nowcast_date & var_date <= nowcast_date) %>% group_by(var_date) %>% summarise(incidence = n()) %>% full_join(nowcast_data, by = c("var_date" = "onset.date")) %>% replace_na(replace = list(incidence = 0, nowcast_p50 = 0)) %>% arrange(var_date) %>% mutate(nowcast_p50 = ifelse(incidence > nowcast_p50, incidence, nowcast_p50)) # get cdf reporting delay from case data interval <- case_data %>% mutate(interval = as.integer(rep_date - var_date)) %>% pull(interval) interval_cdf <- ecdf(interval[interval >= 0]) # admission can only be reported after admission interval_cdf <- interval_cdf(0:(nrow(tmp)-1)) tmp <- tmp %>% mutate(p = rev(interval_cdf)) return(tmp %>% select(var_date, nowcast_p50, p)) } get_nowcast_Osiris_province <- function(province = NULL, var_name = NULL, type = NULL, start_date = as.Date("2020-03-13"), end_date = today()) { if(length(province) == 0) stop("provide province name to analyse") if(length(var_name) == 0) stop("provide Osiris name to analyse") if(length(type) == 0) stop("provide type of reports (\"reported\", \"hosp\" or \"IC\")") # get filenames of all Osiris downloads files <- list.files("/___UITZONDERINGSGROND_6___Previous", full.names = TRUE) files <- c(files, list.files("/___UITZONDERINGSGROND_6___Geschoond", full.names = TRUE)) files <- files[grepl(files, pattern = "rds")] files <- tibble(file_name = files, date = files %>% str_extract_all(pattern = "(Data_[0-9]+)") %>% unlist %>% gsub(pattern = "Data_", replacement = "") %>% as.Date(format = "%Y%m%d"), time = files %>% file.info %>% pull(ctime)) if(length(start_date) == 0) start_date <- min(files$date) report_dates <- seq(start_date, end_date, by = "day") %>% as.character case_data <- tibble(var_date = end_date, rep_date = end_date) for(report_date in report_dates) { file_name <- files %>% filter(date == report_date) %>% filter(time == max(time)) %>% # get most recent file (if multiple files exist) pull(file_name) # if today's data is not yet available if(length(file_name)==0) { print(paste("file ", report_date, "not available")) return(0) } case_data <- read_rds(file_name) %>% filter(Provincie == as.character(province)) %>% {if(type == "IC") filter(., NCOVopnameICU == "J") else if(type == "hosp") filter(., NCOVpatZhs == "J") else .} %>% mutate(var_date = pull(., var_name)) %>% mutate(var_date = as.Date(var_date, tryFormats = c("%d-%m-%Y", "%Y-%m-%d"))) %>% filter(!is.na(var_date) & var_date <= end_date) %>% select(OSIRISNR, var_date) %>% left_join(case_data) %>% replace_na(replace = list(rep_date = report_date)) } nowcast_date <- end_date nowcast_data <- nowcast( case.data = case_data %>% transmute(onset.date = var_date, report.date = rep_date) %>% drop_na, start.date = start_date, nowcast.date = nowcast_date) tmp <- case_data %>% filter(report_date <= nowcast_date & var_date <= nowcast_date) %>% group_by(var_date) %>% summarise(incidence = n()) %>% full_join(nowcast_data, by = c("var_date" = "onset.date")) %>% replace_na(replace = list(incidence = 0, nowcast_p50 = 0)) %>% arrange(var_date) %>% mutate(nowcast_p50 = ifelse(incidence > nowcast_p50, incidence, nowcast_p50)) # get cdf reporting delay from case data interval <- case_data %>% mutate(interval = as.integer(rep_date - var_date)) %>% pull(interval) interval_cdf <- ecdf(interval[interval >= 0]) # admission can only be reported after admission interval_cdf <- interval_cdf(0:(nrow(tmp)-1)) tmp <- tmp %>% mutate(p = rev(interval_cdf)) return(tmp %>% select(var_date, nowcast_p50, p)) } get_symptomonset2admission_Osiris <- function(IC = TRUE, report_date = today()) { files <- list.files("/___UITZONDERINGSGROND_6___Geschoond", full.names = TRUE) files <- c(files, list.files("/___UITZONDERINGSGROND_6___Previous", full.names = TRUE)) files <- grep(files, pattern = "rds", value = TRUE) file_name <- grep(files, pattern = report_date %>% gsub(pattern = "-", replacement = ""), value = TRUE) file_name <- file_name[file_name %>% file.info %>% pull(ctime) %>% which.max] data <- read_rds(file_name) data <- data %>% mutate(NCOVdat1ezkhopn = NCOVdat1ezkhopn %>% as.Date(format = "%Y-%m-%d")) %>% filter((if(IC) NCOVopnameICU else NCOVpatZhs) == "J") %>% {if(IC) mutate(., adm_date = NCOVopnamedatumICU) else mutate(., adm_date = NCOVdat1ezkhopn)} interval <- data %>% filter(ZIE1eZiekteDt >= as.Date("2020-02-01") & ZIE1eZiekteDt <= report_date & adm_date >= as.Date("2020-02-27") & adm_date <= report_date) %>% mutate(interval = as.integer(adm_date - ZIE1eZiekteDt)) %>% pull(interval) interval_cdf <- ecdf(interval[interval >= 0 & interval <= 30]) # don't trust symptom onset after day of admission and admission nor admission more than 30 days after symptom onset interval_cdf <- interval_cdf(0:30) interval_pdf <- diff(c(0, interval_cdf)) return(interval_pdf) } extract_totcurve_Osiris <- function(data, IC = FALSE, report_date = today(), now_cast = NULL) { data <- data %>% {if(IC) filter(., NCOVopnameICU == "J") else filter(., NCOVpatZhs == "J")} %>% mutate(bron = if("Bron" %in% names(data)) Bron else Bron_land, adm_date = if(IC) NCOVopnamedatumICU else NCOVdat1ezkhopn) # construct admission curve: number of admissions per day, distinguishing between infection in NL or abroad admcurve <- data %>% filter(!is.na(adm_date) & adm_date >= as.Date("2020-02-01") & adm_date <= report_date) %>% # omit symptom onset dates in future group_by(adm_date) %>% summarise(adm_incidenceABROAD = sum(bron == "Buitenland", na.rm = TRUE), adm_incidenceTOT = n()) %>% mutate(adm_incidenceNL = adm_incidenceTOT - adm_incidenceABROAD) %>% full_join(tibble(adm_date = seq(min(.$adm_date), as.Date(report_date), by = "day"))) %>% replace_na(list(adm_incidenceNL = 0, adm_incidenceTOT = 0, adm_incidenceABROAD = 0)) %>% arrange(adm_date) %>% select(adm_date, adm_incidenceNL, adm_incidenceABROAD, adm_incidenceTOT) # construct admission curve with known symptom onset date: number of admissions per day # choose to ignore whether case is infected abroad or not (in total 13 hospital admissions without symptom onset date, infected abroad (data Osiris 2020-05-15)) admcurve_SO <- data %>% filter(!is.na(adm_date) & adm_date >= as.Date("2020-02-01") & adm_date <= report_date) %>% # omit symptom onset dates in future filter(!is.na(ZIE1eZiekteDt)) %>% group_by(adm_date) %>% summarise(adm_SO_incidenceTOT = n()) %>% full_join(tibble(adm_date = seq(min(.$adm_date), as.Date(report_date), by = "day"))) %>% replace_na(list(adm_SO_incidenceTOT = 0)) %>% arrange(adm_date) %>% select(adm_date, adm_SO_incidenceTOT) admcurve <- full_join(admcurve, admcurve_SO) if(length(now_cast) == 0) { now_cast <- get_nowcast_Osiris(var_name = if(IC) "NCOVopnamedatumICU" else "NCOVdat1ezkhopn", type = if(IC) "IC" else "hosp", end_date = as.Date(report_date)) } admcurve <- admcurve %>% left_join(now_cast, by = c("adm_date" = "var_date")) %>% select(-p) epicurve <- data %>% filter(!is.na(ZIE1eZiekteDt) & ZIE1eZiekteDt <= today()) %>% # omit symptom onset dates in future group_by(ZIE1eZiekteDt) %>% summarise(incidenceABROAD = sum(bron == "Buitenland", na.rm = TRUE), incidenceTOT = n()) %>% mutate(incidenceNL = incidenceTOT - incidenceABROAD) %>% full_join(tibble(ZIE1eZiekteDt = seq(min(.$ZIE1eZiekteDt), as.Date(report_date), by = "day"))) %>% replace_na(list(incidenceNL = 0, incidenceTOT = 0, incidenceABROAD = 0)) %>% arrange(ZIE1eZiekteDt) %>% mutate(dates = ZIE1eZiekteDt) %>% select(dates, incidenceNL, incidenceABROAD, incidenceTOT) totcurve <- full_join(admcurve, epicurve, by = c("adm_date" = "dates")) %>% rename(dates = adm_date) %>% arrange(dates) %>% replace_na(list(adm_incidenceNL = 0, adm_incidenceTOT = 0, adm_incidenceABROAD = 0, adm_SO_incidenceTOT = 0, nowcast_p50 = 0, incidenceNL = 0, incidenceTOT = 0, incidenceABROAD = 0)) %>% rename(adm_incidenceEXPECTED = nowcast_p50) return(totcurve) } # able to handle negative serial intervals calculate_Ru_Osiris <- function(epicurve, SOtoAdm, nsample = 1000) { #pdfSI <- dnorm(x = 1:7, mean = 6.8/2, sd = 1.27) # pdfSI <- dgamma(x = 1:10, scale = 1.0, shape = 4) # discretized pdf: probability density for serial interval of 1 day by integrating from 0.5 to 1.5 (note: 0 to 0.5 omitted) pdfSI <- diff(pgamma(q = seq(0.5, 10.5, 1), scale = 1.0, shape = 4)) pdfSI <- pdfSI/sum(pdfSI) g <- pdfSI names(g) <- 1:10 #g <- readRDS("/___UITZONDERINGSGROND_6___negSI.rds") # sample non-observed cases due to reporting delay from neg binomial incidence_adm <- matrix(0, nrow = nrow(epicurve), ncol = nsample) for(j in 1:nrow(epicurve)) { if(epicurve$adm_incidenceEXPECTED[j] != 0) { inctot <- if(epicurve$adm_incidenceTOT[j] == 0) 1 else epicurve$adm_incidenceTOT[j] incidence_adm[j, ] <- rnbinom(n = nsample, size = inctot , prob = inctot/epicurve$adm_incidenceEXPECTED[j]) + inctot } } # known SO make up fixed part of incidence matrix incidence <- matrix(rep(epicurve$incidenceTOT, nsample), ncol = nsample) # sampling symtpom onsets for(j in 1:nrow(incidence)) { for(i in 1:nsample) { tmp <- table(j - sample(0:(length(SOtoAdm)-1), size = incidence_adm[j,i] - epicurve$adm_SO_incidenceTOT[j] , prob = SOtoAdm, replace = TRUE)) tmp <- tmp[names(tmp) > 0] if(length(tmp) > 0) incidence[as.integer(names(tmp)), i] <- incidence[as.integer(names(tmp)), i] + tmp } } # for most recent dates divide by sum(SOtoAdm2) to correct for symptom onset of admissions in future (implicitly assuming they remain constant) for(j in (nrow(incidence)-length(SOtoAdm)+2):nrow(incidence)) { SOtoAdm2 <- SOtoAdm[1:min(length(SOtoAdm), nrow(incidence)-j+1)] incidence[j,] <- incidence[j,]/sum(SOtoAdm2) } # construct instantaneous Rt from incidence Rt <- matrix(0, nrow = nrow(incidence), ncol = nsample) for(t in 1:nrow(incidence)) { g2 <- g[as.integer(names(g)) < t & as.integer(names(g)) >= t - nrow(incidence)] Rt[t,] <- as.numeric((incidence[t, ] - epicurve$incidenceABROAD[t])/colSums(incidence[t - as.integer(names(g2)), , drop = FALSE]*as.vector(g2)))/sum(g2) } # construct case Ru from instantaneous Rt Ru <- matrix(0, nrow = nrow(incidence), ncol = nsample) for(u in 1:nrow(Rt)) { g2 <- g[as.integer(names(g)) > - u & as.integer(names(g)) <= nrow(incidence) - u] Ru[u,] <- colSums(as.vector(g2)*Rt[u + as.integer(names(g2)), ,drop = FALSE])/sum(g2) # divide by sum(g2) to account for not observed Rt's in future } Ru[is.na(Ru)] <- 0 # only necessary when SI comprise strictly positive values # construct confidence bounds by sampling from poisson Rtupper <- rep(0, nrow(incidence)) Rtlower <- rep(0, nrow(incidence)) for(j in 1:nrow(incidence)) { tmp <- sapply(1:nsample, function(i) if(incidence[j,i] != 0 & Rt[j,i] !=Inf & !is.na(Rt[j,i])) rpois(100, incidence[j,i]*Rt[j,i])/incidence[j,i]) Rtlower[j] <- quantile(unlist(tmp), probs = 0.025) Rtupper[j] <- quantile(unlist(tmp), probs = 0.975) } # construct confidence bounds by sampling from poisson Ruupper <- rep(0, nrow(incidence)) Rulower <- rep(0, nrow(incidence)) for(j in 1:nrow(incidence)) { tmp <- sapply(1:nsample, function(i) if(incidence[j,i] != 0 & Ru[j,i] !=Inf) rpois(100, incidence[j,i]*Ru[j,i])/incidence[j,i]) Rulower[j] <- quantile(unlist(tmp), probs = 0.025) Ruupper[j] <- quantile(unlist(tmp), probs = 0.975) } # last Ru's are 0, change to NA to prevent plotting them Ru[nrow(incidence),] <- NA Rulower[nrow(incidence)] <- NA Ruupper[nrow(incidence)] <- NA epicurve <- epicurve %>% mutate(incidence_mean = apply(incidence, MARGIN = 1, mean), incidence_lower = apply(incidence, MARGIN = 1, quantile, probs = 0.025), incidence_upper = apply(incidence, MARGIN = 1, quantile, probs = 0.975), instR = apply(Rt, MARGIN = 1, mean), instRlower = Rtlower, instRupper = Rtupper, caseR = apply(Ru, MARGIN = 1, mean), caseRlower = Rulower, caseRupper = Ruupper) return(epicurve) } plot_totcurve_Osiris <- function(epicurve, IC = FALSE) { perc_reportedSO <- round(100*sum(epicurve$incidenceTOT)/sum(epicurve$incidence_mean)) perc_estimatedSO <- 100 - perc_reportedSO p <- ggplot(data = epicurve, mapping = aes(x = dates)) + geom_bar(data = . %>% mutate(adm_incidenceEXTRA = adm_incidenceEXPECTED - adm_incidenceTOT) %>% pivot_longer(cols = c(adm_incidenceABROAD, adm_incidenceNL, adm_incidenceEXTRA), names_to = "incidenceType", values_to = "incidence") %>% mutate(incidenceType = factor(incidenceType) %>% fct_relevel("adm_incidenceEXTRA", "adm_incidenceNL", "adm_incidenceABROAD")), mapping = aes(y = incidence, fill = incidenceType), stat = "identity") + geom_ribbon(mapping = aes(ymin = incidence_lower, ymax = incidence_upper), fill = adjustcolor("#d52b1e", alpha = 0.5)) + geom_line(mapping = aes(y = incidence_mean, col = cut(incidence_lower, breaks = c(-1, 1000, 2000)))) + scale_fill_manual( values = c("grey", "#01689b", "#ffb612"), labels = c("verwacht op basis van\nrapportagevertraging", paste0("opnamedatum ", if(IC) "IC" else "ziekenhuis", ", geïnfecteerd\nin Nederland"), paste0("opnamedatum ", if(IC) "IC" else "ziekenhuis", ", geïnfecteerd\nbuiten Nederland"))) + scale_color_manual( values = c("#d52b1e", 1), labels = c(paste0("eerste ziektedag\n (", perc_reportedSO, "% gerapporteerd, ", perc_estimatedSO, "% geschat)"), "")) + scale_x_date( limits = c(as.Date("2020-02-14"), last(epicurve$dates)+1), expand = expansion(add = -0.5), breaks = "4 days", minor_breaks = "1 day", date_labels = "%e %b") + labs(x = NULL, y = "aantal per dag", subtitle = paste("gebaseerd op", if(IC) "IC" else "ziekenhuis", "opnames uit OSIRIS data", last(epicurve$dates))) + theme_minimal() + theme(legend.position = c(1, 1), legend.justification = c(1, 1), legend.title = element_blank(), axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), panel.grid.minor = element_blank()) + guides(fill = guide_legend(order = 1), color = guide_legend(order = 2)) return(p) } plot_Reff_Osiris <- function(epicurve, caseReff = TRUE, IC = TRUE) { p <- ggplot(data = epicurve, mapping = aes(x = dates, y = if(caseReff) caseR else instR, ymin = if(caseReff) caseRlower else instRlower, ymax = if(caseReff) caseRupper else instRupper)) + geom_ribbon(fill = "#c6b8cf", col = NA) + geom_line(col = "#42145f") + geom_line(y = 1, lty = 2) + coord_cartesian( ylim = c(0,4)) + scale_x_date( limits = c(as.Date("2020-02-14"), last(epicurve$dates)+1), expand = expansion(add = -0.5), breaks = "4 days", minor_breaks = "1 day", date_labels = "%e %b") + labs(x = NULL, y = if(caseReff) "effectieve R" else "instantane R", subtitle = paste("gebaseerd op", if(IC) "IC" else "ziekenhuis", "opnames uit OSIRIS data", last(epicurve$dates))) + theme_minimal() + theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5), panel.grid.minor = element_blank()) return(p) }