32 lines
4.1 KiB
R
32 lines
4.1 KiB
R
library(tidyverse)
|
|
library(fixest)
|
|
####SPLIT OUT THE DATA MANAGEMENT PULL IN ARIMA
|
|
################################Create the data need to model the age-sex specific death rates
|
|
DF1999 <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_Single_Age_1999-2020.csv") %>% select(Year,Sex,Age=`Single-Year Ages Code`,Mortality_Rate=`Crude Rate`) %>% mutate(Mortality_Rate=parse_number(Mortality_Rate)) %>% filter(!is.na(Mortality_Rate)) %>% mutate(Mortality_Rate=as.numeric(Mortality_Rate))
|
|
DF2018 <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_Single_Age_2018-2023.csv") %>% select(Year,Sex,Age=`Single-Year Ages Code`,Mortality_Rate=`Crude Rate`) %>% filter(!is.na(Mortality_Rate))%>% mutate(Mortality_Rate=parse_number(Mortality_Rate)) %>% filter(!is.na(Mortality_Rate)) %>% mutate(Mortality_Rate=as.numeric(Mortality_Rate))
|
|
|
|
OLDER1 <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_10_Year_Age_Groups_1999-2020.csv")%>% rename(Age=`Ten-Year Age Groups Code`,Mortality_Rate=`Crude Rate`) %>% filter(Age=='85+')%>% mutate(Age=85,Year=as.numeric(Year),Mortality_Rate=parse_number(Mortality_Rate)) %>% select(Year,Sex,Age,Mortality_Rate) %>% mutate(Mortality_Rate=as.numeric(Mortality_Rate),Age=as.numeric(Age))
|
|
OLDER2 <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_10_Year_Age_Groups_2018-2023.csv")%>% rename(Age=`Ten-Year Age Groups Code`,Mortality_Rate=`Crude Rate`) %>% filter(Age=='85+')%>% mutate(Age=85,Year=as.numeric(Year),Mortality_Rate=parse_number(Mortality_Rate)) %>% select(Year,Sex,Age,Mortality_Rate)%>% mutate(Mortality_Rate=as.numeric(Mortality_Rate),Age=as.numeric(Age))
|
|
DF <- rbind(DF1999,DF2018,OLDER1,OLDER2) %>% unique %>% group_by(Year,Sex,Age) %>% arrange(Year,Sex,Age) %>% mutate(Age=as.numeric(Age)) %>% ungroup
|
|
#hist(US_CAUSES$Death_Rate,breaks=150)
|
|
#Overall US death rates
|
|
US_AGE_ADJ <- rbind(read_csv("Data/Raw_Data/Mortality_Rates_New/US_Age_Adjusted_1979-1998.csv") %>% select(Year,Sex,US_Adj_Death_Rate=`Crude Rate`),read_csv("Data/Raw_Data/Mortality_Rates_New/US_Age_Adjusted_1999-2020.csv") %>% select(Year,Sex,US_Adj_Death_Rate=`Crude Rate`),read_csv("Data/Raw_Data/Mortality_Rates_New/US_Age_Adjusted_2018-2023.csv") %>% select(Year,Sex,US_Adj_Death_Rate=`Crude Rate`)) %>% unique
|
|
REG_DATA <- DF %>% left_join(US_AGE_ADJ) %>% pivot_wider(values_from=Mortality_Rate,names_from=Age,names_prefix="Age_")
|
|
#####################Model all ages and sex
|
|
MOD <- feols(Age_.[0:85]~US_Adj_Death_Rate+Sex*Year,REG_DATA)
|
|
|
|
###Simulate each age-sex death rate over time with the models
|
|
#########When project far into the future some death rate values become negative. Make bounds to limit the forecast to a reasonable range. In this case I select half of the historic minimum, or double the historic maximum as upper an lower bounds in the study period.
|
|
BOUNDS <- DF %>% group_by(Age) %>% summarize(MAX_RATE=2*max(Mortality_Rate),MIN_RATE=min(Mortality_Rate)/2)
|
|
MAX_BOUND <- BOUNDS %>% pull(MAX_RATE)
|
|
MIN_BOUND <- BOUNDS %>% pull(MIN_RATE)
|
|
#Create a proxy data set to simulate with
|
|
C_VAL <- REG_DATA %>% mutate(Year=Year+(2025-1999)) %>% select(Year,Sex,US_Adj_Death_Rate)
|
|
#################NOTE YOU NEED TO ADJUST THE SINGLE AGE DEATH RATE DOWN TO MATCH LINCOLN IN SOME WAY
|
|
###Mostly Working: Pass in a data frame, with year, sex, and US age adjusted mortality rate. The years should go from the simulation start 2025, to the end roughly 2045. WHAT IS MISSING is to pass the arima results of the US age adjusted mortality rates as applied in Lincoln to replace the age adjusted mortality term. Once that is done, a new simulation will give the age specific mortality rates based on the forecasted Lincoln average rates.
|
|
RES <- do.call(rbind,lapply(1:86,function(x){return(predict(MOD[[x]],C_VAL))}))#For each data frame containing each year and sex combination of the forecast, predict the data for each age 0-85. Bind these by row to create a result with ages by row, and year by column
|
|
RES <- ifelse(TEMP<MIN_BOUND,MIN_BOUND,TEMP) #Make sure the values are not too low to be reasonable estimates
|
|
RES <- ifelse(TEMP>MAX_BOUND,MAX_BOUND,TEMP)#Make sure the values are not too high to be reasonable estimates
|
|
RES <- RES/10^5 #Chance of death per person
|
|
|