###################################################################
# Functions to use with estimationRt_OSIRIS script
###################################################################

library(tidyverse)
library(lubridate)
library(odbc)
library(runjags)
library(coda)

# Source nowcast functions
list.files(path = "/___UITZONDERINGSGROND_6___Functies", full.names = TRUE) %>% 
  walk(.f = source)

get_nowcast_Osiris <- function(var_name = NULL, type = NULL, start_date = NULL, end_date = today()) {
  
  if(length(var_name) == 0) stop("provide Osiris name to analyse")
  if(length(type) == 0) stop("provide type of reports (\"reported\", \"hosp\" or \"IC\")")

  # get all case data files from Osiris
  files <- list.files("/___UITZONDERINGSGROND_6___Previous", full.names = TRUE)
  files <- c(files, list.files("/___UITZONDERINGSGROND_6___Geschoond", full.names = TRUE))
  files <- files[grepl(files, pattern = "rds")]
  files <- tibble(file_name = files, 
                  date = files %>% str_extract_all(pattern = "(Data_[0-9]+)") %>% unlist %>% gsub(pattern = "Data_", replacement = "") %>% as.Date(format = "%Y%m%d"),
                  time = files %>% file.info %>% pull(ctime))
  
  if(length(start_date) == 0) start_date <- min(files$date)

  report_dates <- seq(start_date, end_date, by = "day") %>% as.character

  case_data <- tibble(var_date = end_date, rep_date = end_date)
  for(report_date in report_dates) {
    file_name <- files %>% 
      filter(date == report_date) %>%
      filter(time == max(time)) %>% # get most recent file (if multiple files exist) 
      pull(file_name)
    
    # if today's data is not yet available
    if(length(file_name)==0) {
      print(paste("file ", report_date, "not available"))
      return(0)
    }
    
    case_data <- read_rds(file_name) %>% 
    {if(type == "IC") filter(., NCOVopnameICU == "J") else if(type == "hosp") filter(., NCOVpatZhs == "J") else .} %>% 
      mutate(var_date = pull(., var_name)) %>%
      mutate(var_date = as.Date(var_date, tryFormats = c("%d-%m-%Y", "%Y-%m-%d"))) %>% 
      filter(!is.na(var_date) & var_date <= end_date) %>%
      select(OSIRISNR, var_date) %>%
      left_join(case_data) %>%
      replace_na(replace = list(rep_date = report_date))
  }
  
  nowcast_date <- end_date
  
  nowcast_data <- nowcast(
    case.data = case_data %>%
      transmute(onset.date = var_date, report.date = rep_date) %>% 
      drop_na,
    start.date   = start_date,
    nowcast.date = nowcast_date)
  
  tmp <- case_data %>% 
    filter(report_date <= nowcast_date & var_date <= nowcast_date) %>%
    group_by(var_date) %>% 
    summarise(incidence = n()) %>% 
    full_join(nowcast_data, by = c("var_date" = "onset.date")) %>%
    replace_na(replace = list(incidence = 0, nowcast_p50 = 0)) %>%
    arrange(var_date) %>%
    mutate(nowcast_p50 = ifelse(incidence > nowcast_p50, incidence, nowcast_p50))
  
  # get cdf reporting delay from case data
  interval <- case_data %>% mutate(interval = as.integer(rep_date - var_date)) %>% pull(interval) 
  interval_cdf <- ecdf(interval[interval >= 0]) # admission can only be reported after admission
  interval_cdf <- interval_cdf(0:(nrow(tmp)-1))
  
  tmp <- tmp %>% mutate(p = rev(interval_cdf))
  
  return(tmp %>% select(var_date, nowcast_p50, p))
}

get_nowcast_Osiris_province <- function(province = NULL, var_name = NULL, type = NULL, start_date = as.Date("2020-03-13"), end_date = today()) {
  
  if(length(province) == 0) stop("provide province name to analyse")
  if(length(var_name) == 0) stop("provide Osiris name to analyse")
  if(length(type) == 0) stop("provide type of reports (\"reported\", \"hosp\" or \"IC\")")
  
  # get filenames of all Osiris downloads
  files <- list.files("/___UITZONDERINGSGROND_6___Previous", full.names = TRUE)
  files <- c(files, list.files("/___UITZONDERINGSGROND_6___Geschoond", full.names = TRUE))
  files <- files[grepl(files, pattern = "rds")]
  files <- tibble(file_name = files, 
                  date = files %>% str_extract_all(pattern = "(Data_[0-9]+)") %>% unlist %>% gsub(pattern = "Data_", replacement = "") %>% as.Date(format = "%Y%m%d"),
                  time = files %>% file.info %>% pull(ctime))
  
  if(length(start_date) == 0) start_date <- min(files$date)
  report_dates <- seq(start_date, end_date, by = "day") %>% as.character

  case_data <- tibble(var_date = end_date, rep_date = end_date)
  for(report_date in report_dates) {
    file_name <- files %>% 
      filter(date == report_date) %>%
      filter(time == max(time)) %>% # get most recent file (if multiple files exist) 
      pull(file_name)
    
    # if today's data is not yet available
    if(length(file_name)==0) {
      print(paste("file ", report_date, "not available"))
      return(0)
    }

    case_data <- read_rds(file_name) %>% 
      filter(Provincie == as.character(province)) %>%
      {if(type == "IC") filter(., NCOVopnameICU == "J") else if(type == "hosp") filter(., NCOVpatZhs == "J") else .} %>% 
      mutate(var_date = pull(., var_name)) %>%
      mutate(var_date = as.Date(var_date, tryFormats = c("%d-%m-%Y", "%Y-%m-%d"))) %>% 
      filter(!is.na(var_date) & var_date <= end_date) %>%
      select(OSIRISNR, var_date) %>%
      left_join(case_data) %>%
      replace_na(replace = list(rep_date = report_date))
  }
  
  nowcast_date <- end_date
  
  nowcast_data <- nowcast(
    case.data = case_data %>%
      transmute(onset.date = var_date, report.date = rep_date) %>% 
      drop_na,
    start.date   = start_date,
    nowcast.date = nowcast_date)
  
  tmp <- case_data %>% 
    filter(report_date <= nowcast_date & var_date <= nowcast_date) %>%
    group_by(var_date) %>% 
    summarise(incidence = n()) %>% 
    full_join(nowcast_data, by = c("var_date" = "onset.date")) %>%
    replace_na(replace = list(incidence = 0, nowcast_p50 = 0)) %>%
    arrange(var_date) %>%
    mutate(nowcast_p50 = ifelse(incidence > nowcast_p50, incidence, nowcast_p50))
  
  # get cdf reporting delay from case data
  interval <- case_data %>% mutate(interval = as.integer(rep_date - var_date)) %>% pull(interval) 
  interval_cdf <- ecdf(interval[interval >= 0]) # admission can only be reported after admission
  interval_cdf <- interval_cdf(0:(nrow(tmp)-1))
  
  tmp <- tmp %>% mutate(p = rev(interval_cdf))
  
  return(tmp %>% select(var_date, nowcast_p50, p))
}

get_symptomonset2admission_Osiris <- function(IC = TRUE, report_date = today()) {
  files <- list.files("/___UITZONDERINGSGROND_6___Geschoond", full.names = TRUE)
  files <- c(files, list.files("/___UITZONDERINGSGROND_6___Previous", full.names = TRUE))
  files <- grep(files, pattern = "rds", value = TRUE)
  file_name <- grep(files, pattern = report_date %>% gsub(pattern = "-", replacement = ""), value = TRUE)
  file_name <- file_name[file_name %>% file.info %>% pull(ctime) %>% which.max]
  data <- read_rds(file_name)
  
  data <- data %>% 
    mutate(NCOVdat1ezkhopn = NCOVdat1ezkhopn %>% as.Date(format = "%Y-%m-%d")) %>% 
    filter((if(IC) NCOVopnameICU else NCOVpatZhs) == "J") %>%
    {if(IC) mutate(., adm_date = NCOVopnamedatumICU) else mutate(., adm_date = NCOVdat1ezkhopn)}
  
  interval <- data %>% filter(ZIE1eZiekteDt >= as.Date("2020-02-01") & ZIE1eZiekteDt <= report_date &
                                adm_date >= as.Date("2020-02-27") & adm_date <= report_date) %>%
    mutate(interval = as.integer(adm_date - ZIE1eZiekteDt)) %>%
    pull(interval)
  
  interval_cdf <- ecdf(interval[interval >= 0 & interval <= 30]) # don't trust symptom onset after day of admission and admission nor admission more than 30 days after symptom onset
  interval_cdf <- interval_cdf(0:30)
  interval_pdf <- diff(c(0, interval_cdf))

  return(interval_pdf)
}

extract_totcurve_Osiris <- function(data, IC = FALSE, report_date = today(), now_cast = NULL) {
  data <- data %>%
    {if(IC) filter(., NCOVopnameICU == "J") else filter(., NCOVpatZhs == "J")} %>%
    mutate(bron = if("Bron" %in% names(data)) Bron else Bron_land,
           adm_date = if(IC) NCOVopnamedatumICU else NCOVdat1ezkhopn)

  # construct admission curve: number of admissions per day, distinguishing between infection in NL or abroad
  admcurve <- data %>% 
    filter(!is.na(adm_date) & adm_date >= as.Date("2020-02-01") & adm_date <= report_date) %>% # omit symptom onset dates in future
    group_by(adm_date) %>%
    summarise(adm_incidenceABROAD = sum(bron == "Buitenland", na.rm = TRUE),
              adm_incidenceTOT = n()) %>%
    mutate(adm_incidenceNL = adm_incidenceTOT - adm_incidenceABROAD) %>% 
    full_join(tibble(adm_date = seq(min(.$adm_date), as.Date(report_date), by = "day"))) %>%
    replace_na(list(adm_incidenceNL = 0, adm_incidenceTOT = 0, adm_incidenceABROAD = 0)) %>%
    arrange(adm_date) %>%
    select(adm_date, adm_incidenceNL, adm_incidenceABROAD, adm_incidenceTOT)
 
  
  # construct admission curve with known symptom onset date: number of admissions per day
  # choose to ignore whether case is infected abroad or not (in total 13 hospital admissions without symptom onset date, infected abroad (data Osiris 2020-05-15))
  admcurve_SO <- data %>% 
    filter(!is.na(adm_date) & adm_date >= as.Date("2020-02-01") & adm_date <= report_date) %>% # omit symptom onset dates in future
    filter(!is.na(ZIE1eZiekteDt)) %>%
    group_by(adm_date) %>%
    summarise(adm_SO_incidenceTOT = n()) %>%
    full_join(tibble(adm_date = seq(min(.$adm_date), as.Date(report_date), by = "day"))) %>%
    replace_na(list(adm_SO_incidenceTOT = 0)) %>%
    arrange(adm_date) %>%
    select(adm_date, adm_SO_incidenceTOT)
  
  admcurve <- full_join(admcurve, admcurve_SO) 
  
  if(length(now_cast) == 0) {
    now_cast <- get_nowcast_Osiris(var_name = if(IC) "NCOVopnamedatumICU" else "NCOVdat1ezkhopn", type = if(IC) "IC" else "hosp", end_date = as.Date(report_date))
  }
  
  admcurve <- admcurve %>% 
    left_join(now_cast, by = c("adm_date" = "var_date")) %>% select(-p)

  epicurve <- data %>% 
    filter(!is.na(ZIE1eZiekteDt) & ZIE1eZiekteDt <= today()) %>% # omit symptom onset dates in future
    group_by(ZIE1eZiekteDt) %>%
    summarise(incidenceABROAD = sum(bron == "Buitenland", na.rm = TRUE),
              incidenceTOT = n()) %>%
    mutate(incidenceNL = incidenceTOT - incidenceABROAD) %>% 
    full_join(tibble(ZIE1eZiekteDt = seq(min(.$ZIE1eZiekteDt), as.Date(report_date), by = "day"))) %>%
    replace_na(list(incidenceNL = 0, incidenceTOT = 0, incidenceABROAD = 0)) %>%
    arrange(ZIE1eZiekteDt) %>%
    mutate(dates = ZIE1eZiekteDt) %>%
    select(dates, incidenceNL, incidenceABROAD, incidenceTOT)
  
  totcurve <- full_join(admcurve, epicurve, by = c("adm_date" = "dates")) %>%
    rename(dates = adm_date) %>%
    arrange(dates) %>%
    replace_na(list(adm_incidenceNL = 0, adm_incidenceTOT = 0, adm_incidenceABROAD = 0, adm_SO_incidenceTOT = 0, nowcast_p50 = 0, incidenceNL = 0, incidenceTOT = 0, incidenceABROAD = 0)) %>%
    rename(adm_incidenceEXPECTED = nowcast_p50)

  return(totcurve)
}

# able to handle negative serial intervals
calculate_Ru_Osiris <- function(epicurve, SOtoAdm, nsample = 1000) {
  
  #pdfSI <- dnorm(x = 1:7, mean = 6.8/2, sd = 1.27)
  # pdfSI <- dgamma(x = 1:10, scale = 1.0, shape = 4)
  # discretized pdf: probability density for serial interval of 1 day by integrating from 0.5 to 1.5 (note: 0 to 0.5 omitted)
  pdfSI <- diff(pgamma(q = seq(0.5, 10.5, 1), scale = 1.0, shape = 4))
  pdfSI <- pdfSI/sum(pdfSI)
  g <- pdfSI
  names(g) <- 1:10
  
  #g <- readRDS("/___UITZONDERINGSGROND_6___negSI.rds")

  # sample non-observed cases due to reporting delay from neg binomial
  incidence_adm <- matrix(0, nrow = nrow(epicurve), ncol = nsample)
  for(j in 1:nrow(epicurve)) {
    if(epicurve$adm_incidenceEXPECTED[j] != 0) {
      inctot <-  if(epicurve$adm_incidenceTOT[j] == 0) 1 else epicurve$adm_incidenceTOT[j]
      incidence_adm[j, ] <- rnbinom(n = nsample, size = inctot , prob = inctot/epicurve$adm_incidenceEXPECTED[j]) + inctot
    }
  }
  
  # known SO make up fixed part of incidence matrix
  incidence <- matrix(rep(epicurve$incidenceTOT, nsample), ncol = nsample)
  
  # sampling symtpom onsets
  for(j in 1:nrow(incidence)) {
    for(i in 1:nsample) {
      tmp <- table(j - sample(0:(length(SOtoAdm)-1), size = incidence_adm[j,i] - epicurve$adm_SO_incidenceTOT[j] , prob = SOtoAdm, replace = TRUE))
      tmp <- tmp[names(tmp) > 0]
      if(length(tmp) > 0) incidence[as.integer(names(tmp)), i] <- incidence[as.integer(names(tmp)), i] + tmp
    }
  }
  # for most recent dates divide by sum(SOtoAdm2) to correct for symptom onset of admissions in future (implicitly assuming they remain constant)
  for(j in (nrow(incidence)-length(SOtoAdm)+2):nrow(incidence)) {
    SOtoAdm2 <- SOtoAdm[1:min(length(SOtoAdm), nrow(incidence)-j+1)]
    incidence[j,] <- incidence[j,]/sum(SOtoAdm2)
  }
  
  # construct instantaneous Rt from incidence
  Rt <- matrix(0, nrow = nrow(incidence), ncol = nsample)
  for(t in 1:nrow(incidence)) {
    g2 <- g[as.integer(names(g)) < t & as.integer(names(g)) >= t - nrow(incidence)]
    Rt[t,] <- as.numeric((incidence[t, ] - epicurve$incidenceABROAD[t])/colSums(incidence[t - as.integer(names(g2)), , drop = FALSE]*as.vector(g2)))/sum(g2)
  }
  
  # construct case Ru from instantaneous Rt 
  Ru <- matrix(0, nrow = nrow(incidence), ncol = nsample)
  for(u in 1:nrow(Rt)) {
    g2 <- g[as.integer(names(g)) > - u & as.integer(names(g)) <= nrow(incidence) - u]
    Ru[u,] <- colSums(as.vector(g2)*Rt[u + as.integer(names(g2)), ,drop = FALSE])/sum(g2) # divide by sum(g2) to account for not observed Rt's in future
  }
  Ru[is.na(Ru)] <- 0 # only necessary when SI comprise strictly positive values

  # construct confidence bounds by sampling from poisson
  Rtupper <- rep(0, nrow(incidence))
  Rtlower <- rep(0, nrow(incidence))
  for(j in 1:nrow(incidence)) {
    tmp <- sapply(1:nsample, function(i) if(incidence[j,i] != 0 & Rt[j,i] !=Inf & !is.na(Rt[j,i])) rpois(100, incidence[j,i]*Rt[j,i])/incidence[j,i])
    Rtlower[j] <- quantile(unlist(tmp), probs = 0.025)
    Rtupper[j] <- quantile(unlist(tmp), probs = 0.975)
  }
  
  # construct confidence bounds by sampling from poisson
  Ruupper <- rep(0, nrow(incidence))
  Rulower <- rep(0, nrow(incidence))
  for(j in 1:nrow(incidence)) {
    tmp <- sapply(1:nsample, function(i) if(incidence[j,i] != 0 & Ru[j,i] !=Inf) rpois(100, incidence[j,i]*Ru[j,i])/incidence[j,i])
    Rulower[j] <- quantile(unlist(tmp), probs = 0.025)
    Ruupper[j] <- quantile(unlist(tmp), probs = 0.975)
  }

  # last Ru's are 0, change to NA to prevent plotting them
  Ru[nrow(incidence),] <- NA
  Rulower[nrow(incidence)] <- NA
  Ruupper[nrow(incidence)] <- NA
  
  epicurve <- epicurve %>%
    mutate(incidence_mean = apply(incidence, MARGIN = 1, mean),
           incidence_lower = apply(incidence, MARGIN = 1, quantile, probs = 0.025),
           incidence_upper = apply(incidence, MARGIN = 1, quantile, probs = 0.975),
           instR = apply(Rt, MARGIN = 1, mean),
           instRlower = Rtlower,
           instRupper = Rtupper,
           caseR = apply(Ru, MARGIN = 1, mean),
           caseRlower = Rulower,
           caseRupper = Ruupper)
  
  return(epicurve)
}


plot_totcurve_Osiris <- function(epicurve, IC = FALSE) {
  
  perc_reportedSO <- round(100*sum(epicurve$incidenceTOT)/sum(epicurve$incidence_mean))
  perc_estimatedSO <- 100 - perc_reportedSO
  
  p <- ggplot(data = epicurve,
              mapping = aes(x = dates)) + 
    geom_bar(data = . %>% mutate(adm_incidenceEXTRA = adm_incidenceEXPECTED - adm_incidenceTOT) %>%
               pivot_longer(cols = c(adm_incidenceABROAD, adm_incidenceNL, adm_incidenceEXTRA), names_to = "incidenceType", values_to = "incidence") %>%
               mutate(incidenceType = factor(incidenceType) %>% fct_relevel("adm_incidenceEXTRA", "adm_incidenceNL", "adm_incidenceABROAD")),
             mapping = aes(y = incidence, fill = incidenceType),
             stat = "identity") +
    geom_ribbon(mapping = aes(ymin = incidence_lower, ymax = incidence_upper),
                fill = adjustcolor("#d52b1e", alpha = 0.5)) +
    geom_line(mapping = aes(y = incidence_mean, col = cut(incidence_lower, breaks = c(-1, 1000, 2000)))) +
    scale_fill_manual(
      values = c("grey", "#01689b", "#ffb612"),
      labels = c("verwacht op basis van\nrapportagevertraging", paste0("opnamedatum ", if(IC) "IC" else "ziekenhuis", ", geïnfecteerd\nin Nederland"), paste0("opnamedatum ", if(IC) "IC" else "ziekenhuis", ", geïnfecteerd\nbuiten Nederland"))) +
    scale_color_manual(
      values = c("#d52b1e", 1),
      labels = c(paste0("eerste ziektedag\n (", perc_reportedSO, "% gerapporteerd, ", perc_estimatedSO, "% geschat)"), "")) +
    scale_x_date(
      limits = c(as.Date("2020-02-14"), last(epicurve$dates)+1),
      expand = expansion(add = -0.5),
      breaks = "4 days",
      minor_breaks = "1 day",
      date_labels = "%e %b") +
    labs(x = NULL,
         y = "aantal per dag",
         subtitle = paste("gebaseerd op", if(IC) "IC" else "ziekenhuis", "opnames uit OSIRIS data", last(epicurve$dates))) +
    theme_minimal() +
    theme(legend.position = c(1, 1),
          legend.justification = c(1, 1),
          legend.title = element_blank(),   
          axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
          panel.grid.minor = element_blank()) +
    guides(fill = guide_legend(order = 1),
           color = guide_legend(order = 2))
  
  return(p)
}

plot_Reff_Osiris <- function(epicurve, caseReff = TRUE, IC = TRUE) {
  p <- 
    ggplot(data = epicurve,
           mapping = aes(x = dates, 
                         y = if(caseReff) caseR else instR, 
                         ymin = if(caseReff) caseRlower else instRlower, 
                         ymax = if(caseReff) caseRupper else instRupper)) + 
    geom_ribbon(fill = "#c6b8cf", col = NA) +
    geom_line(col = "#42145f") +
    geom_line(y = 1, lty = 2) +
    coord_cartesian(
      ylim = c(0,4)) +
    scale_x_date(
      limits = c(as.Date("2020-02-14"), last(epicurve$dates)+1),
      expand = expansion(add = -0.5),
      breaks = "4 days",
      minor_breaks = "1 day",
      date_labels = "%e %b") +
    labs(x = NULL,
         y = if(caseReff) "effectieve R" else "instantane R",
         subtitle = paste("gebaseerd op", if(IC) "IC" else "ziekenhuis", "opnames uit OSIRIS data", last(epicurve$dates))) +
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
          panel.grid.minor = element_blank())
  
  return(p)
}