################################################################## # Generate case data file for hospitalised patients # - date of symptom onset # - date of admission # - date admission reported # - infected abroad (yes/no) ################################################################## library(tidyverse) library(lubridate) generate_case_data_reports <- function(date = today()) { files <- list.files("/___UITZONDERINGSGROND_6___Previous", full.names = TRUE) files <- c(files, list.files("/___UITZONDERINGSGROND_6___Geschoond", full.names = TRUE)) files <- files[grepl(files, pattern = "rds")] files <- tibble(file_name = files, date = files %>% str_extract_all(pattern = "(Data_[0-9]+)") %>% unlist %>% gsub(pattern = "Data_", replacement = "") %>% as.Date(format = "%Y%m%d"), time = files %>% file.info %>% pull(ctime)) case_data_filename <- "/___UITZONDERINGSGROND_6___case_data_reports.rds" case_data <- if(file.exists(case_data_filename)) { readRDS(case_data_filename) } else { tibble(admission_date = min(files$date), admission_date_reported = min(files$date), admission_reported = min(files$date)) } if(date > max(case_data$admission_reported)) { report_dates <- seq(max(case_data$admission_reported), date, by = "day") %>% as.character } else { return(case_data) # remember to cut-off the case data after date when using it in main scrips } for(report_date in report_dates) { file_name <- files %>% filter(date == report_date) %>% filter(time == max(time)) %>% # get most recent file (if multiple files exist) pull(file_name) # if data is not available if(length(file_name)==0) { print(paste("file ", report_date, "not available")) return(0) } case_data <- read_rds(file_name) %>% {if("EIGENAARDesc" %in% names(.)) filter(., !(EIGENAARDesc %in% c("Sint Eustatius", "Bonaire", "Saba", "Sint Maarten", "Aruba", "Curaçao", "Curacao"))) else . } %>% mutate(admission_date = MELGGDOntvDt) %>% mutate(admission_date = as.Date(admission_date, tryFormats = c("%d-%m-%Y", "%Y-%m-%d"))) %>% #filter(is.na(admission_date) | (admission_date >= as.Date("2020-02-01") & admission_date <= date)) %>% select(OSIRISNR, admission_date) %>% full_join(case_data) %>% replace_na(replace = list(admission_date_reported = report_date, admission_reported = report_date)) %>% mutate(admission_date = if_else(admission_date >= as.Date("2020-02-01") & admission_date <= date, admission_date, ymd(NA)), admission_date_reported = if_else(is.na(admission_date), ymd(NA), admission_date_reported)) } case_data <- case_data %>% group_by(OSIRISNR) %>% summarise(admission_date = admission_date[which.max(admission_reported)], admission_date_reported = min(admission_date_reported, na.rm = TRUE), admission_reported = min(admission_reported)) file_name <- files %>% filter(date == last(report_dates)) %>% filter(time == max(time)) %>% # get most recent file (if multiple files exist) pull(file_name) case_data <- read_rds(file_name) %>% {if("EIGENAARDesc" %in% names(.)) filter(., !(EIGENAARDesc %in% c("Sint Eustatius", "Bonaire", "Saba", "Sint Maarten", "Aruba", "Curaçao", "Curacao"))) else . } %>% mutate(# infected_abroad = ifelse(Bron_land == "Buitenland", "Yes", "No"), infected_abroad = ifelse(Bron_buitenland %in% c("Waarschijnlijk buitenland", "Mogelijk buitenland"), "Yes", "No"), symptom_onset = ZIE1eZiekteDt, no_symptoms = if_else(NCOVVast1eziektedag == "NVT", 1, 0, 0)) %>% select(OSIRISNR, symptom_onset, no_symptoms, infected_abroad, Provincie, Veiligheidsregio) %>% left_join(case_data) %>% #select(symptom_onset, admission_date, admission_date_reported, admission_reported, infected_abroad) %>% filter(!(is.na(admission_date) & is.na(symptom_onset)), is.na(symptom_onset) | symptom_onset >= as.Date("2020-02-01"), is.na(admission_date) | admission_date >= as.Date("2020-02-01")) saveRDS(case_data, case_data_filename) return(case_data) }