report_date <- as.Date("2020-04-29") var_name <- "NCOVdat1ezkhopn" type <- "hosp" start_date <- as.Date("2020-03-13") end_date <- report_date # get filenames of all Osiris downloads files <- list.files("/___UITZONDERINGSGROND_6___Previous", full.names = TRUE) files <- c(files, list.files("/___UITZONDERINGSGROND_6___Geschoond", full.names = TRUE)) files <- files[grepl(files, pattern = "rds")] files <- tibble(file_name = files, date = files %>% str_extract_all(pattern = "(Data_[0-9]+)") %>% unlist %>% gsub(pattern = "Data_", replacement = "") %>% as.Date(format = "%Y%m%d"), time = files %>% file.info %>% pull(ctime)) report_dates <- seq(start_date, end_date, by = "day") %>% as.character report_dates <- seq(as.Date("2020-03-13"), today()-1, by = "day") %>% as.character report_date <- report_dates[30] data <- tibble(var_date = as.Date(report_dates)) for(report_date in report_dates) { file_name <- files %>% filter(date == report_date) %>% filter(time == max(time)) %>% # get most recent file (if multiple files exist) pull(file_name) newdata <- read_rds(file_name) %>% {if(type == "IC") filter(., NCOVopnameICU == "J") else if(type == "hosp") filter(., NCOVpatZhs == "J") else .} %>% mutate(var_date = pull(., var_name)) %>% mutate(var_date = as.Date(var_date, tryFormats = c("%d-%m-%Y", "%Y-%m-%d"))) %>% filter(!is.na(var_date) & var_date >= as.Date("2020-02-01") & var_date <= report_date) %>% group_by(var_date) %>% summarise(incidence = n()) %>% #full_join(tibble(var_date = seq(min(min(.$var_date), as.Date(report_date)-length(rep_delay)+1), as.Date(report_date), by = "day"))) %>% full_join(tibble(var_date = seq(min(.$var_date), as.Date(report_date), by = "day"))) %>% replace_na(list(incidence = 0)) %>% #rename(!!paste0("rep_", report_date) := incidence) rename(!!report_date := incidence) data <- full_join(data, newdata) %>% arrange(var_date) } incidence <- c(round(exp(0.18*(1:40))), round(2000*exp(-0.1*(1:(80-40))))) rep_matrix <- matrix(0, nrow = length(incidence), ncol = length(incidence) + 10) for(i in 1:length(incidence)) { tmp <- sample(0:10, size = incidence[i], prob = c(0, 0.05, 0.08, 0.15, 0.2, 0.18, 0.14, 0.1, 0.05, 0.03, 0.02), replace = TRUE) %>% table rep_matrix[i, as.integer(names(tmp)) + i] <- tmp rep_matrix[i,] <- cumsum(rep_matrix[i,]) } rep_matrix <- rep_matrix[, 1:length(incidence)] colnames(rep_matrix) <- as.character(data$var_date) dummy_data <- as_tibble(rep_matrix) %>% mutate(var_date = data$var_date, realincidence = incidence) incidence <- c(round(exp(0.2*(1:30))), round(500*exp(-0.1*(1:30)))) test_data <- c() for(i in 1:length(incidence)) { tmp <- sample(0:10, size = incidence[i], prob = c(0, 0.05, 0.08, 0.15, 0.2, 0.18, 0.14, 0.1, 0.05, 0.03, 0.02), replace = TRUE) %>% table test_data <- bind_rows(test_data, tibble(onset = as.Date("2020-01-01") + rep(i, incidence[i]), reported = as.Date("2020-01-01") + rep(as.integer(names(tmp)) + i, times = tmp))) } write_csv(test_data, path = "./___UITZONDERINGSGROND_6___test_data.csv") report_date <- "2020-04-10" repdelay_retrospective <- tibble() for(report_date in report_dates[2:length(report_dates)]) { #for(report_date in as.character(data$var_date)[-1]) { repdelay_retrospective <- bind_rows(repdelay_retrospective, data %>% select(var_date, report_date, as.character(as.Date(report_date)-1)) %>% filter(var_date > as.Date("2020-03-07") & var_date <= report_date) %>% mutate(cumincidence = pull(., report_date), cumincidence_yesterday = pull(., as.character(as.Date(report_date)-1)), incidence = cumincidence - cumincidence_yesterday, incidence = ifelse(incidence < 0, 0, incidence), day = as.integer(as.Date(report_date) - var_date)) %>% replace_na(replace = list(incidence = 0, cumincidence = 0)) %>% arrange(day) %>% mutate(prep = cumsum(incidence)/sum(incidence), ratio = incidence/cumincidence, reported = report_date) %>% select(var_date, reported, cumincidence, incidence, day, prep, ratio) ) } repdelay_retrospective %>% filter(day > 20 & day <= 30 & cumincidence < 20 & incidence > 0) # zorgen voor minimaal aantal observaties per day (10 of length(dates_seq)) # lage cumincidence eruit filteren? -> min 20 tmp <- repdelay_retrospective %>% filter(reported > as.Date("2020-04-24") & reported < as.Date("2020-05-01")) %>% #filter(day > 15) %>% View # group_by(var_date) %>% # mutate(tot = max(cumincidence)) %>% # ungroup() %>% # filter(tot >= 20) %>% group_by(day) %>% mutate(tot = n()) %>% ungroup() %>% filter(tot >= min(0, max(tot))) %>% group_by(day) %>% summarise(ratio = sum(incidence, na.rm = TRUE)/sum(cumincidence, na.rm = TRUE), sum_incidence = sum(incidence, na.rm = TRUE), sum_cumincidence = sum(cumincidence, na.rm = TRUE)) %>% mutate(prep = calculate_cdf_repdelay(ratio), p = diff(c(0, prep))) plot(diff(c(0,tmp$prep)), type = "l") lines(tmp$p, col = 4) tmp$prep[1:40] tmp %>% View as.Date("2020-04-04") - as.Date("2020-03-07") ratio <- tmp$ratio calculate_cdf_repdelay <- function(ratio, maxdelay = 40) { tmpratio <- c(ratio[1:min(maxdelay, length(ratio))], rep(0, max(0, maxdelay - length(ratio)))) p <- rep(0, maxdelay) for(i in rev(1:maxdelay)) { p[i] <- tmpratio[i]*(1-sum(p)) } p[is.na(p)] <- 0 # first element can be NaN when cumincidence and incidence are 0 for all days 0 (only in start of epidemic) p <- c(p[1:min(maxdelay, length(ratio))], rep(0, max(0, length(ratio)- maxdelay))) return(cumsum(p)/max(cumsum(p))) } calculate_cdf_repdelay(tmp$ratio) repdelay_retrospective %>% filter(reported == as.Date("2020-04-29")) %>% #filter(day > 15) %>% View filter(tot >= 20) %>% View group_by(day) %>% mutate(tot = n()) %>% ungroup() %>% filter(tot >= min(10, max(tot))) %>% group_by(day) %>% summarise(ratio = sum(incidence, na.rm = TRUE)/sum(cumincidence, na.rm = TRUE)) %>% mutate(prep = calculate_cdf_repdelay(ratio)) ggplot(data = repdelay_retrospective %>% filter(reported > as.Date("2020-03-20")) %>% #filter(day > 15) %>% View group_by(reported, var_date) %>% mutate(tot = max(cumincidence)) %>% ungroup() %>% filter(tot >= 20) %>% group_by(reported, day) %>% mutate(tot = n()) %>% ungroup() %>% filter(tot >= min(10, max(tot))) %>% group_by(reported, day) %>% summarise(ratio = sum(incidence, na.rm = TRUE)/sum(cumincidence, na.rm = TRUE)) %>% mutate(prep = calculate_cdf_repdelay(ratio)), mapping = aes(x = day, y = prep, col = factor(reported))) + geom_line() + # geom_line(data = . %>% group_by(day) %>% # summarise(incidence = sum(incidence)) %>% # mutate(prep = cumsum(incidence)/sum(incidence)), # col = 1, # lwd = 1.2) + # geom_line(data = tibble(day = 0:39, # prep = cumsum(p)), # col = 1, # lty = 2, # lwd = 1.2) + #coord_cartesian(xlim = c(0,11)) + #guides(col = FALSE) + theme_minimal() p <- rep(0, length(tmp)) for(i in rev(1:length(tmp))) { p[i] <- tmp[i]*(1-sum(p)) } p[is.na(p)] <- 0 probs <- c(0, 0.05, 0.08, 0.15, 0.2, 0.18, 0.14, 0.1, 0.05, 0.03, 0.02) probs/cumsum(probs) plot(cumsum(p), type = "l") lines(cumsum(p), col = 2) p[1] <- 0 repdelay_retrospective %>% arrange(var_date) %>% View repdelay_prospective <- dummy_data %>% # filter(var_date == report_date) %>% group_by(var_date) %>% pivot_longer(grep(names(.), pattern = "2020", value = TRUE), names_to = "reported", values_to = "cumincidence") %>% filter(reported >= var_date) %>% mutate(incidence = diff(c(0, cumincidence)), incidence = ifelse(incidence < 0, 0, incidence), day = as.integer(as.Date(reported) - var_date)) %>% mutate(prep = cumsum(incidence)/sum(incidence)) #%>% select(var_date, day, prep) %>% pivot_wider(names_from = day, names_prefix = "D", values_from = prep) ggplot(data = repdelay_prospective %>% filter(var_date > as.Date("2020-04-13") & var_date < as.Date("2020-04-27")), mapping = aes(x = day, y = prep, col = factor(var_date))) + geom_line() + geom_line(data = . %>% group_by(day) %>% summarise(incidence = sum(incidence)) %>% mutate(prep = cumsum(incidence)/sum(incidence)), col = 1, lwd = 1.5) + geom_line(data = tibble(day = 0:10, prep = cumsum(c(0, 0.05, 0.08, 0.15, 0.2, 0.18, 0.14, 0.1, 0.05, 0.03, 0.02))), col = 1, lty = 2, lwd = 1.2) + coord_cartesian(xlim = c(0,11)) + guides(col = FALSE) + theme_minimal() ggplot(data = repdelay_retrospective %>% filter(reported > as.Date("2020-04-13") & reported < as.Date("2020-04-27")), mapping = aes(x = day, y = prep, col = factor(reported))) + geom_line() + geom_line(data = . %>% group_by(day) %>% summarise(incidence = sum(incidence)) %>% mutate(prep = cumsum(incidence)/sum(incidence)), col = 1, lwd = 1.2) + geom_line(data = tibble(day = 0:39, prep = cumsum(p)), col = 1, lty = 2, lwd = 1.2) + #coord_cartesian(xlim = c(0,11)) + guides(col = FALSE) + theme_minimal() repdelay_retrospective %>% filter(day == 1) %>% pull(prep) %>% mean(na.rm = TRUE) c(0, 0.05, 0.08, 0.15, 0.2, 0.18, 0.14, 0.1, 0.05, 0.03, 0.02)/cumsum(c(0, 0.05, 0.08, 0.15, 0.2, 0.18, 0.14, 0.1, 0.05, 0.03, 0.02))