Population_Study/Mortality_Rate_Analysis.r
2025-11-25 16:03:08 -07:00

79 lines
4.0 KiB
R

library(tidyverse)
library(fixest)
library(forecast)
########################################################ARIMA
DATA_WOMEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Mortality_Rate_and_Pandemic_Data_for_Regression.Rds") %>% filter(Sex=='Female')
DATA_MEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Mortality_Rate_and_Pandemic_Data_for_Regression.Rds") %>% filter(Sex=='Male')
#Create time series data
ST_YEAR <- DATA_MEN %>% pull(Year) %>% min
TS_MEN_US <- DATA_MEN %>% select(Mort_Rate_US) %>% ts(start=ST_YEAR,frequency=1)
TS_MEN_LIN <- DATA_MEN %>% select(Mort_Rate_Lincoln) %>% ts(start=ST_YEAR,frequency=1)
TS_WOMEN_US <- DATA_WOMEN %>% select(Mort_Rate_US) %>% ts(start=ST_YEAR,frequency=1)
TS_WOMEN_LIN <- DATA_WOMEN %>% select(Mort_Rate_Lincoln) %>% ts(start=ST_YEAR,frequency=1)
TS_PANDEMIC <- DATA_MEN %>% select(WUPI,L_WUPI) %>% ts(start=ST_YEAR,frequency=1)
TS_WOMEN_US_INV <- TS_WOMEN_US
FORECAST_XREG <- TS_PANDEMIC
FORECAST_XREG[,] <- 0
MOD_US_MEN <- auto.arima(TS_MEN_US,lambda=0,biasadj=TRUE,xreg=TS_PANDEMIC)
#checkresiduals(MOD_US_MEN)
#plot(forecast(MOD_US_MEN,xreg=FORECAST_XREG))
MOD_US_WOMEN <- auto.arima(TS_WOMEN_US,lambda=0,biasadj=TRUE,xreg=TS_PANDEMIC)
#checkresiduals(MOD_US_WOMEN)
#plot(forecast(MOD_US_WOMEN,xreg=FORECAST_XREG))
MOD_LIN_MEN <- auto.arima(TS_MEN_LIN,biasadj=TRUE,xreg=TS_MEN_US)
MOD_LIN_WOMEN <- auto.arima(TS_WOMEN_LIN,biasadj=TRUE,xreg=TS_WOMEN_US)
############################################Start Simualtion work
SINGLE_MODS <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Time_Series_Regression.Rds")
MIN_VALUES <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Min_Values_for_Bounding_Predictions.Rds")
MAX_VALUES <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Max_Values_for_Bounding_Predictions.Rds")
BASELINE_AGE_ADJUST_MEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_Population_in_2000.Rds") %>% filter(Sex=='Male') %>% pull(Percent_of_Population)
BASELINE_AGE_ADJUST_WOMEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_Population_in_2000.Rds") %>% filter(Sex=='Female') %>% pull(Percent_of_Population)
#Adjust to just women popualtion (Not all population percent
BASELINE_AGE_ADJUST_WOMEN <- BASELINE_AGE_ADJUST_WOMEN/ sum(BASELINE_AGE_ADJUST_WOMEN )
BASELINE_AGE_ADJUST_MEN <- BASELINE_AGE_ADJUST_MEN/ sum(BASELINE_AGE_ADJUST_MEN )
ST_YEAR <- 2025
END_YEAR <- 2025+40
GAP <- END_YEAR-ST_YEAR
NUM_SIMS <- END_YEAR-ST_YEAR+1
XREG <- cbind(rep(0,NUM_SIMS),rep(0,NUM_SIMS))
#colnames(XREG) <- c("WUPI","L_WUPI")
XREG <- ts(XREG,start=ST_YEAR,frequency=1)
SIM_LIN_WOMEN <-simulate(MOD_LIN_WOMEN,xreg=simulate(MOD_US_WOMEN,xreg=XREG))
SIM_LIN_MEN <- simulate(MOD_LIN_MEN,xreg=simulate(MOD_US_MEN,xreg=XREG))
C_VAL <- rbind(cbind(ST_YEAR:END_YEAR,rep("Female",NUM_SIMS),as.vector(SIM_LIN_MEN)), cbind(ST_YEAR:END_YEAR,rep("Male",NUM_SIMS),as.vector(SIM_LIN_WOMEN))) %>% as_tibble
colnames(C_VAL) <- c("Year","Sex","US_Adj_Death_Rate")
C_VAL$Year <- as.numeric(pull(C_VAL,Year))
C_VAL$US_Adj_Death_Rate <- as.numeric(pull(C_VAL,US_Adj_Death_Rate))
C_VAL
###Pedict
RES <- do.call(rbind,lapply(1:86,function(x){return(predict(SINGLE_MODS[[x]],C_VAL))}))#For each data frame containing each year and sex combination of the forecast, predict the data for each age 0-85. Bind these by row to create a result with ages by row, and year by column
ncol(RES)
#RES1 <- RES
#Rows Year, Column Age
MIN_MAT <- matrix(rep(MIN_VALUES,ncol(RES)),ncol=ncol(RES))
RES <- ifelse(RES<MIN_VALUES,MIN_VALUES,RES)
RES <- ifelse(RES>MAX_VALUES,MIN_VALUES,RES)
FEMALE_RES <- t(RES[,1:NUM_SIMS])
MALE_RES <- t(RES[,(NUM_SIMS+1):(2*NUM_SIMS)])
PRED_ADJ_RATE_WOMEN <- rowSums(FEMALE_RES*BASELINE_AGE_ADJUST_WOMEN)
PRED_ADJ_RATE_MEN <- rowSums(MALE_RES*BASELINE_AGE_ADJUST_MEN)
MALE_RES <- MALE_RES*C_VAL[1:(nrow(C_VAL)/2),]$US_Adj_Death_Rate/PRED_ADJ_RATE_MEN
FEMALE_RES <- MALE_RES*C_VAL[(nrow(C_VAL)/2+1):(nrow(C_VAL)),]$US_Adj_Death_Rate/PRED_ADJ_RATE_WOMEN
#Testing looks good so far
FEMALE_RES[,20:30]
MALE_RES[,20:30]