Working on arima sim with age splits

This commit is contained in:
Alex Gebben Work 2025-11-25 16:03:08 -07:00
parent 3bf85ce48b
commit d1d4682b72
5 changed files with 106 additions and 173 deletions

View File

@ -10,4 +10,4 @@ Data is manually gathered from CDC WONDER data queries.
4) The world pandemic uncertainty index as collected from FRED which is used to account for pandemics in the regression, making the age time series stationary. 4) The world pandemic uncertainty index as collected from FRED which is used to account for pandemics in the regression, making the age time series stationary.
These are used to project mortality trends over time. In the case of the age adjusted data, this has local trends that can be compared to the national average. The single age-sex data is only at a national level but can be imparted to local levels as a general trend in the distribution of deaths These are used to project mortality trends over time. In the case of the age adjusted data, this has local trends that can be compared to the national average. The single age-sex data is only at a national level but can be imparted to local levels as a general trend in the distribution of deaths
--- Run Date: 2025-11-23 15:59:28 --- --- Run Date: 2025-11-25 12:04:27 ---

View File

@ -3,183 +3,76 @@ library(fixest)
library(forecast) library(forecast)
########################################################ARIMA ########################################################ARIMA
DATA_WOMEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Mortality_Rate_and_Pandemic_Data_for_Regression.Rds") %>% filter(Sex=='Female')
DATA_MEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Mortality_Rate_and_Pandemic_Data_for_Regression.Rds") %>% filter(Sex=='Male')
MOD_WOMEN <- feols(Mort_Rate_US~L_Mort_Rate_US+Year+WUPI,REG_DATA %>% filter(Sex=='Female')) #Create time series data
acf(resid(MOD_WOMEN))
MOD_WOMEN
MOD_MEN <- feols(Mort_Rate_US~L_Mort_Rate_US+Year+WUPI,REG_DATA %>% filter(Sex=='Male'))
###Lincoln
MOD_WOMEN <- feols(Mort_Rate_Lincoln~Mort_Rate_US,REG_DATA %>% filter(Sex=='Female'))
acf(resid(MOD_WOMEN))
MOD_WOMEN
MOD_MEN <- feols(Mort_Rate_US~L_Mort_Rate_US+Year+WUPI,REG_DATA %>% filter(Sex=='Male'))
acf(resid(MOD_MEN))
plot(resid(MOD_MEN))
plot(predict(MOD_MEN))
DATA_MEN <- REG_DATA %>% filter(Sex=='Male')
DATA_WOMEN <- REG_DATA %>% filter(Sex=='Female')
ST_YEAR <- DATA_MEN %>% pull(Year) %>% min ST_YEAR <- DATA_MEN %>% pull(Year) %>% min
TS_MEN_US <- DATA_MEN %>% select(Mort_Rate_US) %>% ts(start=ST_YEAR,frequency=1) TS_MEN_US <- DATA_MEN %>% select(Mort_Rate_US) %>% ts(start=ST_YEAR,frequency=1)
TS_MEN_LIN <- DATA_MEN %>% select(Mort_Rate_Lincoln) %>% ts(start=ST_YEAR,frequency=1) TS_MEN_LIN <- DATA_MEN %>% select(Mort_Rate_Lincoln) %>% ts(start=ST_YEAR,frequency=1)
TS_PANDEMIC <- DATA_MEN %>% select(WUPI,L_WUPI) %>% ts(start=ST_YEAR,frequency=1)
TS_WOMEN_US <- DATA_WOMEN %>% select(Mort_Rate_US) %>% ts(start=ST_YEAR,frequency=1) TS_WOMEN_US <- DATA_WOMEN %>% select(Mort_Rate_US) %>% ts(start=ST_YEAR,frequency=1)
TS_WOMEN_LIN <- DATA_WOMEN %>% select(Mort_Rate_Lincoln) %>% ts(start=ST_YEAR,frequency=1) TS_WOMEN_LIN <- DATA_WOMEN %>% select(Mort_Rate_Lincoln) %>% ts(start=ST_YEAR,frequency=1)
TS_MEN_US_INV <- TS_MEN_US TS_PANDEMIC <- DATA_MEN %>% select(WUPI,L_WUPI) %>% ts(start=ST_YEAR,frequency=1)
TS_WOMEN_US_INV <- TS_WOMEN_US TS_WOMEN_US_INV <- TS_WOMEN_US
FORECAST_XREG <- TS_PANDEMIC FORECAST_XREG <- TS_PANDEMIC
FORECAST_XREG[,] <- 0 FORECAST_XREG[,] <- 0
MOD_US_MEN <- auto.arima(TS_MEN_US,lambda=0,biasadj=TRUE,xreg=TS_PANDEMIC) MOD_US_MEN <- auto.arima(TS_MEN_US,lambda=0,biasadj=TRUE,xreg=TS_PANDEMIC)
checkresiduals(MOD_US_MEN) #checkresiduals(MOD_US_MEN)
plot(forecast(MOD_US_MEN,xreg=FORECAST_XREG)) #plot(forecast(MOD_US_MEN,xreg=FORECAST_XREG))
MOD_US_WOMEN <- auto.arima(TS_WOMEN_US,lambda=0,biasadj=TRUE,xreg=TS_PANDEMIC) MOD_US_WOMEN <- auto.arima(TS_WOMEN_US,lambda=0,biasadj=TRUE,xreg=TS_PANDEMIC)
checkresiduals(MOD_US_WOMEN) #checkresiduals(MOD_US_WOMEN)
plot(forecast(MOD_US_WOMEN,xreg=FORECAST_XREG)) #plot(forecast(MOD_US_WOMEN,xreg=FORECAST_XREG))
MOD_LIN <- auto.arima(TS_MEN_LIN,biasadj=TRUE,xreg=TS_MEN_US) MOD_LIN_MEN <- auto.arima(TS_MEN_LIN,biasadj=TRUE,xreg=TS_MEN_US)
simulate(MOD_US_MEN,xreg=FORECAST_XREG) MOD_LIN_WOMEN <- auto.arima(TS_WOMEN_LIN,biasadj=TRUE,xreg=TS_WOMEN_US)
plot(simulate(MOD_LIN,xreg=simulate(MOD_US_MEN,xreg=FORECAST_XREG))) ############################################Start Simualtion work
plot(forecast(MOD_LIN,xreg=simulate(MOD_US_MEN,xreg=FORECAST_XREG))) SINGLE_MODS <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Time_Series_Regression.Rds")
################################Other work MIN_VALUES <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Min_Values_for_Bounding_Predictions.Rds")
SINGLE_DATA <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_Single_Age_1999-2020.csv") %>% select(Year,Sex,Age=`Single-Year Ages Code`,Mortality_Rate=`Crude Rate`) %>% mutate(Mortality_Rate=parse_number(Mortality_Rate)) %>% filter(!is.na(Mortality_Rate)) MAX_VALUES <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Max_Values_for_Bounding_Predictions.Rds")
BASELINE_AGE_ADJUST_MEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_Population_in_2000.Rds") %>% filter(Sex=='Male') %>% pull(Percent_of_Population)
OLDER <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_10_Year_Age_Groups_1999-2020.csv")%>% rename(Age=`Ten-Year Age Groups Code`,Mortality_Rate=`Crude Rate`) %>% filter(Age=='85+')%>% mutate(Age=85,Year=as.numeric(Year),Mortality_Rate=parse_number(Mortality_Rate)) %>% select(Year,Sex,Age,Mortality_Rate)%>% select(colnames(SINGLE_DATA)) BASELINE_AGE_ADJUST_WOMEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_Population_in_2000.Rds") %>% filter(Sex=='Female') %>% pull(Percent_of_Population)
SINGLE_DATA <- rbind(SINGLE_DATA,OLDER) %>% left_join(REG_DATA %>% select(Year,Sex,US_Rate=Mort_Rate_US)) #Adjust to just women popualtion (Not all population percent
SINGLE_DATA_PLAIN <- SINGLE_DATA BASELINE_AGE_ADJUST_WOMEN <- BASELINE_AGE_ADJUST_WOMEN/ sum(BASELINE_AGE_ADJUST_WOMEN )
BASELINE_AGE_ADJUST_MEN <- BASELINE_AGE_ADJUST_MEN/ sum(BASELINE_AGE_ADJUST_MEN )
SINGLE_DATA <- SINGLE_DATA %>% group_by(Age,Sex) %>% mutate(INDEX=Mortality_Rate/sum(ifelse(Year==min(Year),Mortality_Rate,0)),US_INDEX=US_Rate/sum(ifelse(Year==min(Year),US_Rate,0))) %>% ungroup
ggplot(SINGLE_DATA,aes(x=Year,y=INDEX,group=Age,color=Age))+geom_point()+geom_smooth(aes(y=US_INDEX),se=FALSE,color="black",linetype=2,linewidth=2) + facet_grid(rows= vars(Sex))
ggplot(SINGLE_DATA,aes(x=Year,y=INDEX,group=Age,color=Age))+geom_point(size=0.5)+geom_line(aes(y=US_INDEX),color="black",linetype=2,linewidth=1) + facet_grid(rows= vars(Sex))
ggplot(SINGLE_DATA,aes(x=Year,y=Mortality_Rate,group=Age,color=Age))+geom_point() + scale_y_log10() + facet_grid(. ~ Sex)
ggplot(SINGLE_DATA,aes(x=Year,y=INDEX,group=Age,color=Age))+geom_smooth(se=FALSE,method="lm")+ scale_y_log10() + facet_grid(. ~ Sex)
COR_DAT <- SINGLE_DATA %>% arrange(Sex,Age) %>% select(-US_Rate,-US_INDEX,-INDEX) %>% pivot_wider(values_from=c(Mortality_Rate),names_from=c(Sex,Age)) %>% arrange(Year) %>% select(-Year) %>% as.matrix
rownames(COR_DAT) <- 1999:2020
TEST <- matrix(as.numeric(COR_DAT[1,]),nrow(COR_DAT),nrow=nrow(COR_DAT),ncol=ncol(COR_DAT))
COR_DAT <- COR_DAT/TEST
#COR_DAT <- t(COR_DAT)
#US_COR_DATA <- SINGLE_DATA %>% select(Year,Sex,US_Rate) %>% unique %>% pivot_wider(values_from=US_Rate,names_from=Sex) %>% arrange(Year)
#COR_DAT <- cbind(US_COR_DATA,COR_DAT) %>% as.matrix
library(factoextra)
COR_DAT
corrplot(cor(COR_DAT[,1:86]),type="lower")
corrplot(cor(COR_DAT[,87:172]),type="lower")
COR_DAT <- t(COR_DAT)
COR_DAT <- COR_DAT[,-1]
fviz_nbclust(COR_DAT,kmeans,"gap_stat")
fviz_nbclust(COR_DAT,kmeans,"wss",nboot=1000)
fviz_nbclust(COR_DAT,kmeans,nboot=1000)
km.res <- kmeans(COR_DAT, 3, nstart = 1)
fviz_cluster(km.res,COR_DAT)
print(km.res)
COR_DAT
SINGLE_DATA %>% filter(!is.numeric(Mortality_Rate))
SINGLE_DATA <- SINGLE_DATA %>% mutate(Age=as.numeric(Age),US_Rate=as.numeric(US_Rate),Year=as.numeric(Year),Mortality_Rate=as.numeric(Mortality_Rate))
SINGLE_DATA %>% filter(is.numeric(Age),is.numeric(Mortality_Rate),is.numeric(US_Rate))
DATA <- SINGLE_DATA %>% left_join(PANDIMIC_INDEX ) %>% ungroup %>% group_by(Sex,Year) %>% mutate(Rank=rank(Mortality_Rate)) %>% ungroup %>% group_by(Year,Sex) %>% mutate(PER_MORT_MAX=Mortality_Rate/max(Mortality_Rate))
feols(Mortality_Rate~factor(Rank)+Sex+Year,DATA)
MOD <- feols(log(Mortality_Rate)~log(US_Rate)*Sex+factor(Rank)+Sex|Year,DATA)
MOD
plot(resid(MOD))
DATA$RESID <- resid(MOD)
acf((DATA %>% filter(Age==85,Sex=='Female') %>% pull(RESID)))
DATA
ggplot(SINGLE_DATA,aes(x=Year,y=Mortality_Rate,group=Age,color=Age))+geom_point() + scale_y_log10() + facet_grid(. ~ Sex)
TEST <- SINGLE_DATA %>% group_by(Year,Sex) %>% mutate(TEST=rank(Mortality_Rate)) %>%ungroup
ggplot(TEST,aes(x=Year,y=TEST,group=Age,color=Age)) +geom_point()+ geom_smooth()
TEMP <- SINGLE_DATA %>% group_by(Sex,Year) %>% summarize(GAP=max(Mortality_Rate)-min(Mortality_Rate))
ggplot(TEMP,aes(x=Year,y=GAP,group=Sex,color=Sex))+geom_line()
MOD <- feols(log(Mortality_Rate)~factor(Age)*(log(US_Rate)+WUPI+L_WUPI)+Year,data=as_tibble(SINGLE_DATA))
SINGLE_DATA
plot(resid(MOD))
TEMP <- SINGLE_DATA
TEMP$RESID <- resid(MOD)
acf(TEMP %>% filter(Age==80) %>% pull(RESID))
etable(MOD,group=list("Single Age"="factor"))
SINGLE_DATA[87,]
resid(MOD)
predict
REG_SINGLE_DATA <- SINGLE_DATA_PLAIN %>% mutate( %>% pivot_wider(values_from="Mortality_Rate",names_from=c("Age")) ST_YEAR <- 2025
MALE <- REG_SINGLE_DATA %>% filter(Sex=='Male') END_YEAR <- 2025+40
FEMALE <- REG_SINGLE_DATA %>% filter(Sex=='Female') GAP <- END_YEAR-ST_YEAR
library(corrplot) NUM_SIMS <- END_YEAR-ST_YEAR+1
corrplot(cor(MALE %>% select(-Sex)))
US_CAUSES <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_Cause_of_Death_1999-2020.csv") %>% select(Year,ICD=`ICD Sub-Chapter Code`,Death_Rate=`Crude Rate`) %>% filter(!is.na(Death_Rate)) %>% mutate(Death_Rate=ifelse(Death_Rate=='Suppressed' |Death_Rate=='Unreliable',NA,Death_Rate)) %>% rbind(read_csv("Data/Raw_Data/Mortality_Rates_New/US_Cause_of_Death_2018-2023.csv") %>% select(Year,ICD=`ICD Sub-Chapter Code`,Death_Rate=`Crude Rate`) %>% filter(!is.na(Death_Rate)) %>% mutate(Death_Rate=ifelse(Death_Rate=='Suppressed' |Death_Rate=='Unreliable',NA,Death_Rate))) %>% mutate(Death_Rate=parse_number(Death_Rate)) %>% arrange(Year,ICD) %>% group_by(ICD) %>% filter(max(is.na(Death_Rate))==0,min(Death_Rate)!=max(Death_Rate)) %>% ungroup %>% unique
BIND <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_Cause_of_Death_1999-2020.csv") %>% select(ICD=`ICD Sub-Chapter Code`,NAME=`ICD Sub-Chapter`) %>% unique
US_CAUSES <- US_CAUSES %>% left_join(BIND) %>% select(-ICD) %>% rename(ICD=NAME)
US_CAUSES %>% group_by(ICD) %>% summarize(Rate=mean(Death_Rate)) %>% summarize(ICD,Rate, Rank=rank(desc(Rate))) %>% arrange(Rank)
ggplot(US_CAUSES,aes(x=Year,y=scale(Death_Rate),group=ICD,color=ICD,fill=ICD)) +geom_point() +geom_smooth()+theme(legend.position="bottom")
US_CAUSES
parse_number(REG_SINGLE_DATA[,3:89])
US_CAUSES <- US_CAUSES %>% pivot_wider(values_from=Death_Rate,names_from=ICD)
CAUS_MAT <- US_CAUSES %>% select(-Year)
sd(t(CAUS_MAT)[,118])
CAUS_MAT <- scale(CAUS_MAT) XREG <- cbind(rep(0,NUM_SIMS),rep(0,NUM_SIMS))
#colnames(XREG) <- c("WUPI","L_WUPI")
XREG <- ts(XREG,start=ST_YEAR,frequency=1)
fviz_nbclust(CAUS_MAT,kmeans,"gap_stat") #6 SIM_LIN_WOMEN <-simulate(MOD_LIN_WOMEN,xreg=simulate(MOD_US_WOMEN,xreg=XREG))
fviz_nbclust(CAUS_MAT,kmeans,"wss") #5 SIM_LIN_MEN <- simulate(MOD_LIN_MEN,xreg=simulate(MOD_US_MEN,xreg=XREG))
fviz_nbclust(CAUS_MAT,kmeans) #2 C_VAL <- rbind(cbind(ST_YEAR:END_YEAR,rep("Female",NUM_SIMS),as.vector(SIM_LIN_MEN)), cbind(ST_YEAR:END_YEAR,rep("Male",NUM_SIMS),as.vector(SIM_LIN_WOMEN))) %>% as_tibble
colnames(C_VAL) <- c("Year","Sex","US_Adj_Death_Rate")
km.res <- kmeans(CAUS_MAT, 6, nstart = 1) C_VAL$Year <- as.numeric(pull(C_VAL,Year))
fviz_cluster(km.res,CAUS_MAT) C_VAL$US_Adj_Death_Rate <- as.numeric(pull(C_VAL,US_Adj_Death_Rate))
summary(km.res C_VAL
print(km.res) ###Pedict
km.res$cluster RES <- do.call(rbind,lapply(1:86,function(x){return(predict(SINGLE_MODS[[x]],C_VAL))}))#For each data frame containing each year and sex combination of the forecast, predict the data for each age 0-85. Bind these by row to create a result with ages by row, and year by column
ncol(RES)
corrplot(cor(US_CAUSES)) #RES1 <- RES
MALE <- US_CAUSES %>% left_join(MALE) %>% select(Year,Sex,US_Rate,everything()) #Rows Year, Column Age
MALE %>% tail MIN_MAT <- matrix(rep(MIN_VALUES,ncol(RES)),ncol=ncol(RES))
COR_MALE <- MALE %>% select(-Sex) %>% as.matrix RES <- ifelse(RES<MIN_VALUES,MIN_VALUES,RES)
corrplot(cor(COR_MALE,use="pairwise.complete"),type="lower",diag=FALSE,) RES <- ifelse(RES>MAX_VALUES,MIN_VALUES,RES)
?corrplot FEMALE_RES <- t(RES[,1:NUM_SIMS])
MALE_RES <- t(RES[,(NUM_SIMS+1):(2*NUM_SIMS)])
COR_MALE PRED_ADJ_RATE_WOMEN <- rowSums(FEMALE_RES*BASELINE_AGE_ADJUST_WOMEN)
corrplot(cor(cbind(MALE[,1],MALE[,4:ncol(MALE)]/t(MALE[,3])))) PRED_ADJ_RATE_MEN <- rowSums(MALE_RES*BASELINE_AGE_ADJUST_MEN)
corrplot(cor(log(FEMALE %>% select(-Sex)))) MALE_RES <- MALE_RES*C_VAL[1:(nrow(C_VAL)/2),]$US_Adj_Death_Rate/PRED_ADJ_RATE_MEN
TEMP <- MALE %>% select(RATE=`36`,US_Rate,Year) %>% as.data.frame FEMALE_RES <- MALE_RES*C_VAL[(nrow(C_VAL)/2+1):(nrow(C_VAL)),]$US_Adj_Death_Rate/PRED_ADJ_RATE_WOMEN
TEST <- feols((RATE)~US_Rate+Year,TEMP) #Testing looks good so far
TEST <- feols(RATE/US_Rate~Year,TEMP) FEMALE_RES[,20:30]
acf(as.numeric((TEMP[,'RATE']-predict(TEST)) %>% unlist)) MALE_RES[,20:30]
acf
TEST <- lm(log(`22`)~log(US_Rate)+Year,data=MALE)
predict(TEST
resid(TEST)
lm(`20`~Year+
plot(as.vector(t(FEMALE %>% dplyr::select(`20`))))
%>% pull(`22`)
?corrplot
plot(cor(FEMALE %>% select(-Sex))[2,])
plot(cor(FEMALE %>% select(-Sex))[1,])
MAT <- (cbind(abs(cor(FEMALE %>% select(-Sex))[2,]),abs(cor(FEMALE %>% select(-Sex))[1,])))
plot(apply(MAT,1,min))
MALE
library(fixest)
MOD
MOD <- feols(Age_85~US_Rate,data=MALE)
acf(MALE[,"Age_85"]-predict(MOD))
residuals(MOD)
MOD0
resid(MOD0)

View File

@ -1,9 +1,11 @@
#Clean and collect data sets used in later code. #Clean and collect data sets used in later code.
Rscript "./Scripts/1A_Download_and_Process_Population_Data.r" Rscript "./Scripts/1A_Download_and_Process_Population_Data.r"
Rscript "./Scripts/1B_Process_Existing_NIH_Mortality_Data.r" Rscript "./Scripts/1B_Process_Existing_NIH_Mortality_Data.r" #Somewhat outdated, could use only the data in 1E and 1F but speeds up completion time to not change
Rscript "./Scripts/1C_Download_and_Process_Demographic_Data.r" Rscript "./Scripts/1C_Download_and_Process_Demographic_Data.r"
Rscript "./Scripts/1D_Use_ACS_Census_Data_to_Estimate_Kemmerer_Demographics.r" Rscript "./Scripts/1D_Use_ACS_Census_Data_to_Estimate_Kemmerer_Demographics.r"
Rscript "./Scripts/1E_Process_WONDER_Mortality_Data.r" Rscript "./Scripts/1E_Process_WONDER_Mortality_Data.r"
Rscript "./Scripts/1F_Process_WONDER_Single_Age_Sex_Mortality_Data.r"
#Create data sets used in later simulations, produce some results for the report when related to this process. #Create data sets used in later simulations, produce some results for the report when related to this process.
Rscript "./Scripts/2A_Birth_Rate_Regression_and_Impart_Kemmerer_Births.r" Rscript "./Scripts/2A_Birth_Rate_Regression_and_Impart_Kemmerer_Births.r"
Rscript "./Scripts/2B_Impart_Deaths_and_Migration_to_Subregions.r" Rscript "./Scripts/2B_Impart_Deaths_and_Migration_to_Subregions.r"

View File

@ -2,24 +2,25 @@ library(tidyverse)
library(fixest) library(fixest)
####SPLIT OUT THE DATA MANAGEMENT PULL IN ARIMA ####SPLIT OUT THE DATA MANAGEMENT PULL IN ARIMA
################################Create the data need to model the age-sex specific death rates ################################Create the data need to model the age-sex specific death rates
DF1999 <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_Single_Age_1999-2020.csv") %>% select(Year,Sex,Age=`Single-Year Ages Code`,Mortality_Rate=`Crude Rate`) %>% mutate(Mortality_Rate=parse_number(Mortality_Rate)) %>% filter(!is.na(Mortality_Rate)) %>% mutate(Mortality_Rate=as.numeric(Mortality_Rate)) RAW_DATA_LOC <- "Data/Cleaned_Data/Mortality_Data/RDS/"
DF2018 <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_Single_Age_2018-2023.csv") %>% select(Year,Sex,Age=`Single-Year Ages Code`,Mortality_Rate=`Crude Rate`) %>% filter(!is.na(Mortality_Rate))%>% mutate(Mortality_Rate=parse_number(Mortality_Rate)) %>% filter(!is.na(Mortality_Rate)) %>% mutate(Mortality_Rate=as.numeric(Mortality_Rate)) REG_DATA <- readRDS(paste0(RAW_DATA_LOC,"Single_Sex_Age_US_Mortality_Rate_Data_Wide.Rds"))
if(!exists("SAVE_DATA_LOC")){SAVE_DATA_LOC<- "Data/Intermediate_Inputs/Mortality_Regression_Data/"}
dir.create(SAVE_DATA_LOC, recursive = TRUE, showWarnings = FALSE)
OLDER1 <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_10_Year_Age_Groups_1999-2020.csv")%>% rename(Age=`Ten-Year Age Groups Code`,Mortality_Rate=`Crude Rate`) %>% filter(Age=='85+')%>% mutate(Age=85,Year=as.numeric(Year),Mortality_Rate=parse_number(Mortality_Rate)) %>% select(Year,Sex,Age,Mortality_Rate) %>% mutate(Mortality_Rate=as.numeric(Mortality_Rate),Age=as.numeric(Age))
OLDER2 <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_10_Year_Age_Groups_2018-2023.csv")%>% rename(Age=`Ten-Year Age Groups Code`,Mortality_Rate=`Crude Rate`) %>% filter(Age=='85+')%>% mutate(Age=85,Year=as.numeric(Year),Mortality_Rate=parse_number(Mortality_Rate)) %>% select(Year,Sex,Age,Mortality_Rate)%>% mutate(Mortality_Rate=as.numeric(Mortality_Rate),Age=as.numeric(Age))
DF <- rbind(DF1999,DF2018,OLDER1,OLDER2) %>% unique %>% group_by(Year,Sex,Age) %>% arrange(Year,Sex,Age) %>% mutate(Age=as.numeric(Age)) %>% ungroup
#hist(US_CAUSES$Death_Rate,breaks=150)
#Overall US death rates
US_AGE_ADJ <- rbind(read_csv("Data/Raw_Data/Mortality_Rates_New/US_Age_Adjusted_1979-1998.csv") %>% select(Year,Sex,US_Adj_Death_Rate=`Crude Rate`),read_csv("Data/Raw_Data/Mortality_Rates_New/US_Age_Adjusted_1999-2020.csv") %>% select(Year,Sex,US_Adj_Death_Rate=`Crude Rate`),read_csv("Data/Raw_Data/Mortality_Rates_New/US_Age_Adjusted_2018-2023.csv") %>% select(Year,Sex,US_Adj_Death_Rate=`Crude Rate`)) %>% unique
REG_DATA <- DF %>% left_join(US_AGE_ADJ) %>% pivot_wider(values_from=Mortality_Rate,names_from=Age,names_prefix="Age_")
#####################Model all ages and sex #####################Model all ages and sex
MOD <- feols(Age_.[0:85]~US_Adj_Death_Rate+Sex*Year,REG_DATA) MOD <- feols(Age_.[0:85]~US_Adj_Death_Rate+Sex*Year,REG_DATA)
###Simulate each age-sex death rate over time with the models ###Simulate each age-sex death rate over time with the models
#########When project far into the future some death rate values become negative. Make bounds to limit the forecast to a reasonable range. In this case I select half of the historic minimum, or double the historic maximum as upper an lower bounds in the study period. #########When project far into the future some death rate values become negative. Make bounds to limit the forecast to a reasonable range. In this case I select half of the historic minimum, or double the historic maximum as upper an lower bounds in the study period.
BOUNDS <- DF %>% group_by(Age) %>% summarize(MAX_RATE=2*max(Mortality_Rate),MIN_RATE=min(Mortality_Rate)/2) BOUNDS <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_US_Mortality_Rate_Data_Long.Rds") %>% group_by(Age) %>% summarize(MAX_RATE=2*max(Mortality_Rate),MIN_RATE=min(Mortality_Rate)/2)
MAX_BOUND <- BOUNDS %>% pull(MAX_RATE) MAX_BOUND <- BOUNDS %>% pull(MAX_RATE)
MIN_BOUND <- BOUNDS %>% pull(MIN_RATE) MIN_BOUND <- BOUNDS %>% pull(MIN_RATE)
saveRDS(MOD,paste0(SAVE_DATA_LOC,"Single_Sex_Age_Time_Series_Regression.Rds"))
saveRDS(MAX_BOUND,paste0(SAVE_DATA_LOC,"Single_Sex_Age_Max_Values_for_Bounding_Predictions.Rds"))
saveRDS(MIN_BOUND,paste0(SAVE_DATA_LOC,"Single_Sex_Age_Min_Values_for_Bounding_Predictions.Rds"))
#Create a proxy data set to simulate with #Create a proxy data set to simulate with
C_VAL <- REG_DATA %>% mutate(Year=Year+(2025-1999)) %>% select(Year,Sex,US_Adj_Death_Rate) C_VAL <- REG_DATA %>% mutate(Year=Year+(2025-1999)) %>% select(Year,Sex,US_Adj_Death_Rate)
#################NOTE YOU NEED TO ADJUST THE SINGLE AGE DEATH RATE DOWN TO MATCH LINCOLN IN SOME WAY #################NOTE YOU NEED TO ADJUST THE SINGLE AGE DEATH RATE DOWN TO MATCH LINCOLN IN SOME WAY

View File

@ -0,0 +1,37 @@
library(tidyverse)
#setwd("../")
#Define and set all working directories for reading or saving files
if(!exists("DATA_LOC_RAW")){DATA_LOC_RAW <- "./Data/Raw_Data/Mortality_Rates_Over_Time/"}
if(!exists("DATA_SAVE_LOC_RDS")){DATA_SAVE_LOC_RDS <- "./Data/Cleaned_Data/Mortality_Data/RDS/"}
if(!exists("DATA_SAVE_LOC_CSV")){DATA_SAVE_LOC_CSV <- "./Data/Cleaned_Data/Mortality_Data/CSV/"}
dir.create(DATA_SAVE_LOC_RDS, recursive = TRUE, showWarnings = FALSE)
dir.create(DATA_SAVE_LOC_CSV, recursive = TRUE, showWarnings = FALSE)
#get the 2000 age distribution for use to reverse engineer age adjusted values to later use when lining the age adjusted regression with single year-sex mortality estiamtes.
AGE_ADJUST_REF_DATA <- read_csv(paste0(DATA_LOC_RAW,"US_Single_Age_1999-2020.csv")) %>% select(Year,Sex,Age=`Single-Year Ages Code`,Population) %>% mutate(Population=parse_number(Population)) %>% filter(!is.na(Population)) %>% filter(Year==2000,!is.na(Population))
OLD_ADJUST_REF_DATA <- read_csv(paste0(DATA_LOC_RAW,"US_10_Year_Age_Groups_1999-2020.csv"))%>% rename(Age=`Ten-Year Age Groups Code`) %>% filter(Age=='85+',Year==2000)%>% mutate(Age=85,Year=as.numeric(Year),Population=parse_number(Population)) %>% select(Year,Sex,Age,Population) %>% mutate(Population=as.numeric(Population),Age=as.numeric(Age))
AGE_ADJUST_REF_DATA <- full_join(AGE_ADJUST_REF_DATA,OLD_ADJUST_REF_DATA) %>% arrange(Sex,Age) %>% ungroup %>% mutate(Population=as.numeric(Population))
AGE_ADJUST_REF_DATA$Percent_of_Population <- AGE_ADJUST_REF_DATA$Population/sum(AGE_ADJUST_REF_DATA$Population)
saveRDS(AGE_ADJUST_REF_DATA,paste0(DATA_SAVE_LOC_RDS,"Single_Sex_Age_Population_in_2000.Rds" ))
write_csv(AGE_ADJUST_REF_DATA,paste0(DATA_SAVE_LOC_RDS,"Single_Sex_Age_Population_in_2000.Rds.csv" ))
####
DF1999 <- read_csv(paste0(DATA_LOC_RAW,"US_Single_Age_1999-2020.csv")) %>% select(Year,Sex,Age=`Single-Year Ages Code`,Mortality_Rate=`Crude Rate`) %>% mutate(Mortality_Rate=parse_number(Mortality_Rate)) %>% filter(!is.na(Mortality_Rate)) %>% mutate(Mortality_Rate=as.numeric(Mortality_Rate))
sum(AGE_ADJUST_REF_DATA$Population,na.rm=TRUE)/10^6
DF2018 <- read_csv(paste0(DATA_LOC_RAW,"US_Single_Age_2018-2023.csv")) %>% select(Year,Sex,Age=`Single-Year Ages Code`,Mortality_Rate=`Crude Rate`) %>% filter(!is.na(Mortality_Rate))%>% mutate(Mortality_Rate=parse_number(Mortality_Rate)) %>% filter(!is.na(Mortality_Rate)) %>% mutate(Mortality_Rate=as.numeric(Mortality_Rate))
OLDER1 <- read_csv(paste0(DATA_LOC_RAW,"US_10_Year_Age_Groups_1999-2020.csv"))%>% rename(Age=`Ten-Year Age Groups Code`,Mortality_Rate=`Crude Rate`) %>% filter(Age=='85+')%>% mutate(Age=85,Year=as.numeric(Year),Mortality_Rate=parse_number(Mortality_Rate)) %>% select(Year,Sex,Age,Mortality_Rate) %>% mutate(Mortality_Rate=as.numeric(Mortality_Rate),Age=as.numeric(Age))
OLDER2 <- read_csv(paste0(DATA_LOC_RAW,"US_10_Year_Age_Groups_2018-2023.csv"))%>% rename(Age=`Ten-Year Age Groups Code`,Mortality_Rate=`Crude Rate`) %>% filter(Age=='85+')%>% mutate(Age=85,Year=as.numeric(Year),Mortality_Rate=parse_number(Mortality_Rate)) %>% select(Year,Sex,Age,Mortality_Rate)%>% mutate(Mortality_Rate=as.numeric(Mortality_Rate),Age=as.numeric(Age))
DF <- rbind(DF1999,DF2018,OLDER1,OLDER2) %>% unique %>% group_by(Year,Sex,Age) %>% arrange(Year,Sex,Age) %>% mutate(Age=as.numeric(Age)) %>% ungroup
#hist(US_CAUSES$Death_Rate,breaks=150)
#Overall US death rates
DF <- rbind(DF1999,DF2018,OLDER1,OLDER2) %>% unique %>% group_by(Year,Sex,Age) %>% arrange(Year,Sex,Age) %>% mutate(Age=as.numeric(Age)) %>% ungroup
US_AGE_ADJ <- rbind(read_csv(paste0(DATA_LOC_RAW,"US_Age_Adjusted_1979-1998.csv")) %>% select(Year,Sex,US_Adj_Death_Rate=`Crude Rate`),read_csv(paste0(DATA_LOC_RAW,"US_Age_Adjusted_1999-2020.csv")) %>% select(Year,Sex,US_Adj_Death_Rate=`Crude Rate`),read_csv(paste0(DATA_LOC_RAW,"US_Age_Adjusted_2018-2023.csv")) %>% select(Year,Sex,US_Adj_Death_Rate=`Crude Rate`)) %>% unique
US_AGE_ADJ <- rbind(read_csv(paste0(DATA_LOC_RAW,"US_Age_Adjusted_1979-1998.csv")) %>% select(Year,Sex,US_Adj_Death_Rate=`Crude Rate`),read_csv(paste0(DATA_LOC_RAW,"US_Age_Adjusted_1999-2020.csv")) %>% select(Year,Sex,US_Adj_Death_Rate=`Crude Rate`),read_csv(paste0(DATA_LOC_RAW,"US_Age_Adjusted_2018-2023.csv")) %>% select(Year,Sex,US_Adj_Death_Rate=`Crude Rate`)) %>% unique
REG_DATA <- DF %>% left_join(US_AGE_ADJ) %>% pivot_wider(values_from=Mortality_Rate,names_from=Age,names_prefix="Age_")
saveRDS(DF,paste0(DATA_SAVE_LOC_RDS,"Single_Sex_Age_US_Mortality_Rate_Data_Long.Rds" ))
write_csv(DF,paste0(DATA_SAVE_LOC_RDS,"Single_Sex_Age_US_Mortality_Rate_Data_Long.csv" ))
saveRDS(REG_DATA,paste0(DATA_SAVE_LOC_RDS,"Single_Sex_Age_US_Mortality_Rate_Data_Wide.Rds" ))
write_csv(REG_DATA,paste0(DATA_SAVE_LOC_RDS,"Single_Sex_Age_US_Mortality_Rate_Data_Wide.csv" ))