library(tidyverse) library(lubridate) get_symptomonset2reporting <- function(start_date = as.Date("2020-03-13"), end_date = today(), IC = FALSE) { # get all case data files from Osiris files <- list.files("/___UITZONDERINGSGROND_6___Previous", full.names = TRUE) files <- c(files, list.files("/___UITZONDERINGSGROND_6___Geschoond", full.names = TRUE)) files <- files[grepl(files, pattern = "rds")] report_dates <- seq(start_date, end_date, by = "day") %>% as.character SO2rep <- rep(0,40) for(report_date in report_dates) { pattern_date <- report_date %>% str_split(pattern = "-") %>% unlist %>% paste0(collapse = "") tmpfiles <- files[grep(files, pattern = pattern_date)] file.name <- tmpfiles[tmpfiles %>% file.info %>% pull(ctime) %>% which.max] # if today's data is not yet available if(length(file.name)==0) { print(paste("only files up to ", report_date, "used")) return(cumsum(rev(SO2rep)/sum(SO2rep))) } newdata <- read_rds(file.name) %>% {if(IC) filter(., NCOVopnameICU == "J") else filter(., NCOVpatZhs == "J")} %>% filter(!is.na(ZIE1eZiekteDt) & ZIE1eZiekteDt <= end_date) %>% group_by(ZIE1eZiekteDt) %>% summarise(incidence = n()) %>% full_join(tibble(ZIE1eZiekteDt = seq(min(min(.$ZIE1eZiekteDt), as.Date(report_date)-length(SO2rep)+1), as.Date(report_date), by = "day"))) %>% replace_na(list(incidence = 0)) %>% arrange(ZIE1eZiekteDt) if(as.Date(report_date) > start_date) { newdata <- full_join(prevdata, newdata, by = "ZIE1eZiekteDt") %>% replace_na(list(incidence.x = 0, incidence.y = 0)) %>% arrange(ZIE1eZiekteDt) %>% mutate(newentries = incidence.y - incidence.x) SO2rep <- SO2rep + (newdata %>% pull(newentries) %>% tail(40)) newdata <- newdata %>% rename(incidence = incidence.y) %>% select(ZIE1eZiekteDt, incidence) } prevdata <- newdata } cdf <- cumsum(rev(SO2rep)/sum(SO2rep)) cdf[cdf > 1] <- 1 return(cdf) }