Population_Study/Mortality_Rate_Analysis.r
2025-11-26 21:48:29 -07:00

77 lines
4.0 KiB
R

library(tidyverse)
library(fixest)
library(forecast)
########################################################ARIMA
DATA_WOMEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Mortality_Rate_and_Pandemic_Data_for_Regression.Rds") %>% filter(Sex=='Female')
DATA_MEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Mortality_Rate_and_Pandemic_Data_for_Regression.Rds") %>% filter(Sex=='Male')
#Create time series data
ST_YEAR <- DATA_MEN %>% pull(Year) %>% min
TS_MEN_US <- DATA_MEN %>% select(Mort_Rate_US) %>% ts(start=ST_YEAR,frequency=1)
TS_MEN_LIN <- DATA_MEN %>% select(Mort_Rate_Lincoln) %>% ts(start=ST_YEAR,frequency=1)
TS_WOMEN_US <- DATA_WOMEN %>% select(Mort_Rate_US) %>% ts(start=ST_YEAR,frequency=1)
TS_WOMEN_LIN <- DATA_WOMEN %>% select(Mort_Rate_Lincoln) %>% ts(start=ST_YEAR,frequency=1)
TS_PANDEMIC <- DATA_MEN %>% select(WUPI,L_WUPI) %>% ts(start=ST_YEAR,frequency=1)
TS_WOMEN_US_INV <- TS_WOMEN_US
FORECAST_XREG <- TS_PANDEMIC
FORECAST_XREG[,] <- 0
MOD_US_MEN <- auto.arima(TS_MEN_US,lambda=0,biasadj=TRUE,xreg=TS_PANDEMIC)
#checkresiduals(MOD_US_MEN)
#plot(forecast(MOD_US_MEN,xreg=FORECAST_XREG))
MOD_US_WOMEN <- auto.arima(TS_WOMEN_US,lambda=0,biasadj=TRUE,xreg=TS_PANDEMIC)
#checkresiduals(MOD_US_WOMEN)
#plot(forecast(MOD_US_WOMEN,xreg=FORECAST_XREG))
MOD_LIN_MEN <- auto.arima(TS_MEN_LIN,biasadj=TRUE,xreg=TS_MEN_US)
MOD_LIN_WOMEN <- auto.arima(TS_WOMEN_LIN,biasadj=TRUE,xreg=TS_WOMEN_US)
############################################Start Simualtion work
SINGLE_MODS <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Time_Series_Regression.Rds")
MIN_VALUES <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Min_Values_for_Bounding_Predictions.Rds")
MAX_VALUES <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Max_Values_for_Bounding_Predictions.Rds")
BASELINE_AGE_ADJUST_MEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_Population_in_2000.Rds") %>% filter(Sex=='Male') %>% pull(Percent_of_Population)
BASELINE_AGE_ADJUST_MEN
BASELINE_AGE_ADJUST_WOMEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_Population_in_2000.Rds") %>% filter(Sex=='Female') %>% pull(Percent_of_Population)
#Adjust to just women popualtion (Not all population percent
BASELINE_AGE_ADJUST_WOMEN <- BASELINE_AGE_ADJUST_WOMEN/ sum(BASELINE_AGE_ADJUST_WOMEN )
BASELINE_AGE_ADJUST_MEN <- BASELINE_AGE_ADJUST_MEN/ sum(BASELINE_AGE_ADJUST_MEN )
ST_YEAR <- 2025
END_YEAR <- 2025+40
GAP <- END_YEAR-ST_YEAR
NUM_SIMS <- END_YEAR-ST_YEAR+1
XREG <- cbind(rep(0,NUM_SIMS),rep(0,NUM_SIMS))
#colnames(XREG) <- c("WUPI","L_WUPI")
XREG <- ts(XREG,start=ST_YEAR,frequency=1)
SIM_LIN_WOMEN <-simulate(MOD_LIN_WOMEN,xreg=simulate(MOD_US_WOMEN,xreg=XREG))
SIM_LIN_MEN <- simulate(MOD_LIN_MEN,xreg=simulate(MOD_US_MEN,xreg=XREG))
C_VAL <- rbind(cbind(ST_YEAR:END_YEAR,rep("Female",NUM_SIMS),as.vector(SIM_LIN_MEN)), cbind(ST_YEAR:END_YEAR,rep("Male",NUM_SIMS),as.vector(SIM_LIN_WOMEN))) %>% as_tibble
colnames(C_VAL) <- c("Year","Sex","US_Adj_Death_Rate")
C_VAL$Year <- as.numeric(pull(C_VAL,Year))
C_VAL$US_Adj_Death_Rate <- as.numeric(pull(C_VAL,US_Adj_Death_Rate))
as.numeric(pull(C_VAL,US_Adj_Death_Rate))
###Pedict
RES <- do.call(rbind,lapply(1:86,function(x){return(predict(SINGLE_MODS[[x]],newdata=C_VAL))}))#For each data frame containing each year and sex combination of the forecast, predict the data for each age 0-85. Bind these by row to create a result with ages by row, and year by column
FEMALE <-RES[,1:(ncol(RES)/2)]
FEMALE <- ifelse(FEMALE<MIN_VALUES[1:86],MIN_VALUES[1:86],FEMALE)
MALE <- ifelse(MALE<MIN_VALUES[87:(86*2)],MIN_VALUES[87:(86*2)],MALE)
FEMALE <- ifelse(FEMALE>MAX_VALUES[1:86],MAX_VALUES[1:86],FEMALE)
MALE <- ifelse(MALE>MAX_VALUES[87:(86*2)],MAX_VALUES[87:(86*2)],MALE)
MALE_PRED <- pull(C_VAL[C_VAL$Sex=='Male',],US_Adj_Death_Rate)
FEMALE_PRED <- pull(C_VAL[C_VAL$Sex=='Female',],US_Adj_Death_Rate)
MALE <- MALE*(MALE_PRED/colSums(MALE*BASELINE_AGE_ADJUST_MEN))
FEMALE <- FEMALE*(FEMALE_PRED/colSums(FEMALE*BASELINE_AGE_ADJUST_WOMEN))
RES <- list(MALE,FEMALE)
MALE
MALE[,1]
qbinom(MALE
?qbinom