library(tidyverse)
library(fixest)
####SPLIT OUT THE DATA MANAGEMENT  PULL IN ARIMA
################################Create the data need to model the age-sex specific death rates
DF1999 <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_Single_Age_1999-2020.csv") %>% select(Year,Sex,Age=`Single-Year Ages Code`,Mortality_Rate=`Crude Rate`) %>% mutate(Mortality_Rate=parse_number(Mortality_Rate)) %>% filter(!is.na(Mortality_Rate))  %>% mutate(Mortality_Rate=as.numeric(Mortality_Rate))
DF2018 <-  read_csv("Data/Raw_Data/Mortality_Rates_New/US_Single_Age_2018-2023.csv") %>% select(Year,Sex,Age=`Single-Year Ages Code`,Mortality_Rate=`Crude Rate`)  %>% filter(!is.na(Mortality_Rate))%>% mutate(Mortality_Rate=parse_number(Mortality_Rate)) %>% filter(!is.na(Mortality_Rate))  %>% mutate(Mortality_Rate=as.numeric(Mortality_Rate))

OLDER1 <-     	read_csv("Data/Raw_Data/Mortality_Rates_New/US_10_Year_Age_Groups_1999-2020.csv")%>% rename(Age=`Ten-Year Age Groups Code`,Mortality_Rate=`Crude Rate`)  %>% filter(Age=='85+')%>% mutate(Age=85,Year=as.numeric(Year),Mortality_Rate=parse_number(Mortality_Rate)) %>% select(Year,Sex,Age,Mortality_Rate) %>% mutate(Mortality_Rate=as.numeric(Mortality_Rate),Age=as.numeric(Age))
OLDER2 <-  read_csv("Data/Raw_Data/Mortality_Rates_New/US_10_Year_Age_Groups_2018-2023.csv")%>% rename(Age=`Ten-Year Age Groups Code`,Mortality_Rate=`Crude Rate`)  %>% filter(Age=='85+')%>% mutate(Age=85,Year=as.numeric(Year),Mortality_Rate=parse_number(Mortality_Rate)) %>% select(Year,Sex,Age,Mortality_Rate)%>% mutate(Mortality_Rate=as.numeric(Mortality_Rate),Age=as.numeric(Age))
DF <- rbind(DF1999,DF2018,OLDER1,OLDER2) %>% unique %>% group_by(Year,Sex,Age)  %>% arrange(Year,Sex,Age) %>% mutate(Age=as.numeric(Age)) %>% ungroup
#hist(US_CAUSES$Death_Rate,breaks=150)
#Overall US death rates
US_AGE_ADJ <- rbind(read_csv("Data/Raw_Data/Mortality_Rates_New/US_Age_Adjusted_1979-1998.csv") %>% select(Year,Sex,US_Adj_Death_Rate=`Crude Rate`),read_csv("Data/Raw_Data/Mortality_Rates_New/US_Age_Adjusted_1999-2020.csv") %>% select(Year,Sex,US_Adj_Death_Rate=`Crude Rate`),read_csv("Data/Raw_Data/Mortality_Rates_New/US_Age_Adjusted_2018-2023.csv") %>% select(Year,Sex,US_Adj_Death_Rate=`Crude Rate`)) %>% unique
REG_DATA <- DF %>% left_join(US_AGE_ADJ) %>%  pivot_wider(values_from=Mortality_Rate,names_from=Age,names_prefix="Age_")
#####################Model all ages and sex
MOD <- feols(Age_.[0:85]~US_Adj_Death_Rate+Sex*Year,REG_DATA)

###Simulate each age-sex death rate over time with the models
#########When project far into the future some death rate values become negative. Make bounds to limit the forecast to a reasonable range. In this case I select half of the historic minimum, or double the historic maximum as upper an lower bounds in the study period.
BOUNDS <- DF %>% group_by(Age) %>% summarize(MAX_RATE=2*max(Mortality_Rate),MIN_RATE=min(Mortality_Rate)/2)
MAX_BOUND <- BOUNDS %>% pull(MAX_RATE)
MIN_BOUND <- BOUNDS %>% pull(MIN_RATE)
#Create a proxy data set to simulate with
C_VAL <- REG_DATA %>% mutate(Year=Year+(2025-1999))  %>% select(Year,Sex,US_Adj_Death_Rate)
###Mostly Working: Pass in a data frame, with year, sex, and US age adjusted mortality rate. The years should go from the simulation start 2025, to the end roughly 2045. WHAT IS MISSING is to pass the arima results of the US age adjusted mortality rates as applied in Lincoln to replace the age adjusted mortality term. Once that is done, a new simulation will give the age specific mortality rates based on the forecasted Lincoln average rates.
RES <- do.call(rbind,lapply(1:86,function(x){return(predict(MOD[[x]],C_VAL))}))#For each data frame containing each year and sex combination of the forecast, predict the data for each age 0-85. Bind these by row to create a result with ages by row, and year by column
RES <- ifelse(TEMP<MIN_BOUND,MIN_BOUND,TEMP) #Make sure the values are not too low to be reasonable estimates
RES <- ifelse(TEMP>MAX_BOUND,MAX_BOUND,TEMP)#Make sure the values are not too high to be reasonable estimates
RES <- RES/10^5 #Chance of death per person