diff --git a/1_Run_Full_Simulation.r b/1_Run_Full_Simulation.r index ddf396a..acac409 100644 --- a/1_Run_Full_Simulation.r +++ b/1_Run_Full_Simulation.r @@ -12,6 +12,8 @@ if(RELOAD_DATA){system("bash Prelim_Process.sh")} source("Scripts/Load_Custom_Functions/Migration_Simulation_Functions.r") source("Scripts/Load_Custom_Functions/Birth_Simulation_Functions.r") source("Scripts/Load_Custom_Functions/Increment_Data_Year.r") +source("Scripts/Load_Custom_Functions/Single_Age_Mortality_Trend_Simulation.r") + #######Preliminary Model Inputs YEARS_AHEAD <- 10 NUM_SIMULATIONS <- 10^4 @@ -29,11 +31,36 @@ MIGRATION_MATRIX <- simulate(nsim=NUM_SIMULATIONS,MIGRATION_ARIMA,n=YEARS_AHEAD) MIGRATION_MATRIX <- do.call(cbind, mclapply(1:NUM_SIMULATIONS,function(x)(as.vector(simulate(nsim=YEARS_AHEAD,MIGRATION_ARIMA) )),mc.cores = detectCores()-1)) rownames(MIGRATION_MATRIX) <- ST_YEAR:(ST_YEAR+YEARS_AHEAD-1) colnames(MIGRATION_MATRIX) <- 1:NUM_SIMULATIONS - + #Data for death rate trends + SINGLE_AGE_MODS <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Time_Series_Regression_2016.Rds") + BOUNDS <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Bounds_for_Predictions.Rds") + MAX_MALE <- BOUNDS %>% filter(Sex=='Male') %>% pull(MAX_RATE) + MIN_MALE <- BOUNDS %>% filter(Sex=='Male') %>% pull(MIN_RATE) + MAX_FEMALE <- BOUNDS %>% filter(Sex=='Female') %>% pull(MAX_RATE) + MIN_FEMALE <- BOUNDS %>% filter(Sex=='Female') %>% pull(MIN_RATE) + MIN_GAP <- BOUNDS %>% filter(Sex=='Male') %>% pull(MIN_MALE_FEMALE_GAP) + MAX_GAP <- BOUNDS %>% filter(Sex=='Male') %>% pull(MAX_MALE_FEMALE_GAP) + #Adjusted for 2016 by filtering REMOVE LATER + BASELINE_AGE_ADJUST_MEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_Population_in_2000.Rds") %>% filter(Sex=='Male',Year<=2016) %>% pull(Percent_of_Population) + BASELINE_AGE_ADJUST_WOMEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_Population_in_2000.Rds") %>% filter(Sex=='Female',Year<=2016) %>% pull(Percent_of_Population) + #Adjust to just women popualtion (Not all population percent + BASELINE_AGE_ADJUST_WOMEN <- BASELINE_AGE_ADJUST_WOMEN/sum(BASELINE_AGE_ADJUST_WOMEN ) + BASELINE_AGE_ADJUST_MEN <- BASELINE_AGE_ADJUST_MEN/ sum(BASELINE_AGE_ADJUST_MEN ) + MOD_MEN_ALL <- readRDS("Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/ARIMA_US_Men_Mortality_by_Age_2016.Rds") + MOD_WOMEN_ALL <- readRDS("Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/ARIMA_US_Women_Mortality_by_Age_2016.Rds") + MOD_LIN_MEN <- readRDS("Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/ARIMA_Lincoln_Men_Mortality_by_Age_2016.Rds") + MOD_LIN_WOMEN <- readRDS("Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/ARIMA_Lincoln_Women_Mortality_by_Age_2016.Rds") + XREG <- cbind(rep(0.0001,YEARS_AHEAD+1),rep(0.0001,YEARS_AHEAD+1)) #Empty data set to simulate in the future + XREG <- ts(XREG,start=ST_YEAR,frequency=1) + SIMULATED_MORTALITY_DATA_SET <- MAKE_EMPTY(ST_YEAR,ST_YEAR+YEARS_AHEAD,MOD_LIN_MEN,MOD_LIN_WOMEN,MOD_MEN_ALL,MOD_WOMEN_ALL,XREG) +MORTALITY_SIMULATION <- AGE_DIST(SINGLE_AGE_MODS,SIMULATED_MORTALITY_DATA_SET ,MAX_MALE,MAX_FEMALE,MIN_MALE,MIN_FEMALE,MAX_GAP,MIN_GAP,BASELINE_AGE_ADJUST_MEN,BASELINE_AGE_ADJUST_WOMEN) + #####################START YEAR BY SIMULATIONS CURRENT_YEARS_AHEAD <- 1 CURRENT_SIM_NUM <- 58 +ORIG_DEMO <- DEMO DEMO <- DEMOGRAPHICS_AFTER_MIGRATION(DEMO, MIGRATION_MATRIX[CURRENT_YEARS_AHEAD ,CURRENT_SIM_NUM],MIGRATION_ODDS ) +TOTAL_MIGRATION <- sum(DEMO-ORIG_DEMO) BIRTH_DATA$Year <- BIRTH_DATA$Year+1 BIRTH_DATA$Lag_Two_Births <- BIRTH_DATA$Lag_Births BIRTH_DATA$Lag_Births <- BIRTH_DATA$Births @@ -41,8 +68,19 @@ BIRTH_DATA$Births <- NA ##We grab one year earlier than the window because they are one year older this year. Because the ages are from 0-85, row 18 is year 17, but one year is added making it 18 years in the current year. The birth windows are 18-28 for women and 18-30 for men. BIRTH_DATA$Min_Birth_Group <- min(sum(DEMO[18:30,1]),sum(DEMO[18:28,2])) NEW_BORNS <- BIRTH_SIM(BIRTH_MOD,BIRTH_DATA) +TOTAL_BIRTHS <- sum(NEW_BORNS) +BIRTH_DATA[,"Births"] <-TOTAL_BIRTHS DEMO <- INCREMENT_AGES(DEMO,NEW_BORNS) -DEMO +MALE_DEATHS <- sapply(1:86,function(x){rbinom(1,DEMO[x,1],MORTALITY_SIMULATION[[1]][x,CURRENT_YEARS_AHEAD])}) +FEMALE_DEATHS <- sapply(1:86,function(x){rbinom(1,DEMO[x,2],MORTALITY_SIMULATION[[2]][x,CURRENT_YEARS_AHEAD])}) +TOTAL_DEATHS <- sum(MALE_DEATHS+FEMALE_DEATHS) +DEMO[,"Num_Male"] <- DEMO[,"Num_Male"] -MALE_DEATHS +DEMO[,"Num_Female"] <- DEMO[,"Num_Female"] -MALE_DEATHS +#List of values needed for the next run or for reporting a result +list(DEMO,BIRTH_DATA,c(TOTAL_BIRTHS,TOTAL_DEATHS,TOTAL_MIGRATION)) + + + diff --git a/Data/Raw_Data/Mortality_Rates_Over_Time/README_MORTALITY_DATA.txt b/Data/Raw_Data/Mortality_Rates_Over_Time/README_MORTALITY_DATA.txt index 7bdcec0..4c05cce 100644 --- a/Data/Raw_Data/Mortality_Rates_Over_Time/README_MORTALITY_DATA.txt +++ b/Data/Raw_Data/Mortality_Rates_Over_Time/README_MORTALITY_DATA.txt @@ -10,4 +10,4 @@ Data is manually gathered from CDC WONDER data queries. 4) The world pandemic uncertainty index as collected from FRED which is used to account for pandemics in the regression, making the age time series stationary. These are used to project mortality trends over time. In the case of the age adjusted data, this has local trends that can be compared to the national average. The single age-sex data is only at a national level but can be imparted to local levels as a general trend in the distribution of deaths ---- Run Date: 2025-11-25 12:04:27 --- +--- Run Date: 2025-12-01 16:10:03 --- diff --git a/Mortality_Rate_Analysis.r b/Mortality_Rate_Analysis.r deleted file mode 100644 index 5be75c2..0000000 --- a/Mortality_Rate_Analysis.r +++ /dev/null @@ -1,76 +0,0 @@ -library(tidyverse) -library(fixest) -library(forecast) - -########################################################ARIMA -DATA_WOMEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Mortality_Rate_and_Pandemic_Data_for_Regression.Rds") %>% filter(Sex=='Female') -DATA_MEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Mortality_Rate_and_Pandemic_Data_for_Regression.Rds") %>% filter(Sex=='Male') - -#Create time series data -ST_YEAR <- DATA_MEN %>% pull(Year) %>% min - -TS_MEN_US <- DATA_MEN %>% select(Mort_Rate_US) %>% ts(start=ST_YEAR,frequency=1) -TS_MEN_LIN <- DATA_MEN %>% select(Mort_Rate_Lincoln) %>% ts(start=ST_YEAR,frequency=1) - -TS_WOMEN_US <- DATA_WOMEN %>% select(Mort_Rate_US) %>% ts(start=ST_YEAR,frequency=1) -TS_WOMEN_LIN <- DATA_WOMEN %>% select(Mort_Rate_Lincoln) %>% ts(start=ST_YEAR,frequency=1) - -TS_PANDEMIC <- DATA_MEN %>% select(WUPI,L_WUPI) %>% ts(start=ST_YEAR,frequency=1) - -TS_WOMEN_US_INV <- TS_WOMEN_US -FORECAST_XREG <- TS_PANDEMIC -FORECAST_XREG[,] <- 0 -MOD_US_MEN <- auto.arima(TS_MEN_US,lambda=0,biasadj=TRUE,xreg=TS_PANDEMIC) -#checkresiduals(MOD_US_MEN) - -#plot(forecast(MOD_US_MEN,xreg=FORECAST_XREG)) -MOD_US_WOMEN <- auto.arima(TS_WOMEN_US,lambda=0,biasadj=TRUE,xreg=TS_PANDEMIC) -#checkresiduals(MOD_US_WOMEN) -#plot(forecast(MOD_US_WOMEN,xreg=FORECAST_XREG)) - -MOD_LIN_MEN <- auto.arima(TS_MEN_LIN,biasadj=TRUE,xreg=TS_MEN_US) -MOD_LIN_WOMEN <- auto.arima(TS_WOMEN_LIN,biasadj=TRUE,xreg=TS_WOMEN_US) -############################################Start Simualtion work -SINGLE_MODS <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Time_Series_Regression.Rds") -MIN_VALUES <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Min_Values_for_Bounding_Predictions.Rds") -MAX_VALUES <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Max_Values_for_Bounding_Predictions.Rds") -BASELINE_AGE_ADJUST_MEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_Population_in_2000.Rds") %>% filter(Sex=='Male') %>% pull(Percent_of_Population) -BASELINE_AGE_ADJUST_MEN -BASELINE_AGE_ADJUST_WOMEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_Population_in_2000.Rds") %>% filter(Sex=='Female') %>% pull(Percent_of_Population) -#Adjust to just women popualtion (Not all population percent -BASELINE_AGE_ADJUST_WOMEN <- BASELINE_AGE_ADJUST_WOMEN/ sum(BASELINE_AGE_ADJUST_WOMEN ) -BASELINE_AGE_ADJUST_MEN <- BASELINE_AGE_ADJUST_MEN/ sum(BASELINE_AGE_ADJUST_MEN ) - -ST_YEAR <- 2025 -END_YEAR <- 2025+40 -GAP <- END_YEAR-ST_YEAR -NUM_SIMS <- END_YEAR-ST_YEAR+1 - -XREG <- cbind(rep(0,NUM_SIMS),rep(0,NUM_SIMS)) -#colnames(XREG) <- c("WUPI","L_WUPI") -XREG <- ts(XREG,start=ST_YEAR,frequency=1) -SIM_LIN_WOMEN <-simulate(MOD_LIN_WOMEN,xreg=simulate(MOD_US_WOMEN,xreg=XREG)) -SIM_LIN_MEN <- simulate(MOD_LIN_MEN,xreg=simulate(MOD_US_MEN,xreg=XREG)) -C_VAL <- rbind(cbind(ST_YEAR:END_YEAR,rep("Female",NUM_SIMS),as.vector(SIM_LIN_MEN)), cbind(ST_YEAR:END_YEAR,rep("Male",NUM_SIMS),as.vector(SIM_LIN_WOMEN))) %>% as_tibble -colnames(C_VAL) <- c("Year","Sex","US_Adj_Death_Rate") -C_VAL$Year <- as.numeric(pull(C_VAL,Year)) -C_VAL$US_Adj_Death_Rate <- as.numeric(pull(C_VAL,US_Adj_Death_Rate)) -as.numeric(pull(C_VAL,US_Adj_Death_Rate)) -###Pedict -RES <- do.call(rbind,lapply(1:86,function(x){return(predict(SINGLE_MODS[[x]],newdata=C_VAL))}))#For each data frame containing each year and sex combination of the forecast, predict the data for each age 0-85. Bind these by row to create a result with ages by row, and year by column -FEMALE <-RES[,1:(ncol(RES)/2)] -FEMALE <- ifelse(FEMALEMAX_VALUES[1:86],MAX_VALUES[1:86],FEMALE) -MALE <- ifelse(MALE>MAX_VALUES[87:(86*2)],MAX_VALUES[87:(86*2)],MALE) - -MALE_PRED <- pull(C_VAL[C_VAL$Sex=='Male',],US_Adj_Death_Rate) -FEMALE_PRED <- pull(C_VAL[C_VAL$Sex=='Female',],US_Adj_Death_Rate) -MALE <- MALE*(MALE_PRED/colSums(MALE*BASELINE_AGE_ADJUST_MEN)) -FEMALE <- FEMALE*(FEMALE_PRED/colSums(FEMALE*BASELINE_AGE_ADJUST_WOMEN)) -RES <- list(MALE,FEMALE) -MALE -MALE[,1] -qbinom(MALE - -?qbinom diff --git a/Prelim_Process.sh b/Prelim_Process.sh index e520825..d7080e0 100644 --- a/Prelim_Process.sh +++ b/Prelim_Process.sh @@ -12,5 +12,7 @@ Rscript "./Scripts/2B_Impart_Deaths_and_Migration_to_Subregions.r" Rscript "./Scripts/2C_Migration_by_Age_Regression.r" Rscript "./Scripts/2D_Overall_Migration_ARIMA.r" Rscript "./Scripts/2E_Move_Current_Demographic_Data_to_Current_Year.r" +Rscript "./Scripts/2F_Create_Age_Sex_Specfic_Mortality_Trends.r" +Rscript "./Scripts/2G_Single_Age_Sex_ARIMA_Models.r" #Produce final results for either the simulation, or information for the report, but not anything used in later stages of the simulation Rscript "./Scripts/3A_Population_Pyramid.r" diff --git a/START_HERE_Causes.r b/START_HERE_Causes.r deleted file mode 100644 index 0e45dfd..0000000 --- a/START_HERE_Causes.r +++ /dev/null @@ -1,24 +0,0 @@ -library(tidyverse) -library(fixest) -####SPLIT OUT THE DATA MANAGEMENT PULL IN ARIMA -################################Create the data need to model the age-sex specific death rates -RAW_DATA_LOC <- "Data/Cleaned_Data/Mortality_Data/RDS/" -REG_DATA <- readRDS(paste0(RAW_DATA_LOC,"Single_Sex_Age_US_Mortality_Rate_Data_Wide.Rds")) -if(!exists("SAVE_DATA_LOC")){SAVE_DATA_LOC<- "Data/Intermediate_Inputs/Mortality_Regression_Data/"} -dir.create(SAVE_DATA_LOC, recursive = TRUE, showWarnings = FALSE) - - - -#####################Model all ages and sex -MOD <- feols(Age_.[0:85]~US_Adj_Death_Rate+Sex*Year,REG_DATA, data.save = TRUE) -###Simulate each age-sex death rate over time with the models -#########When project far into the future some death rate values become negative. Make bounds to limit the forecast to a reasonable range. In this case I select half of the historic minimum, or double the historic maximum as upper an lower bounds in the study period. -#BOUNDS <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_US_Mortality_Rate_Data_Long.Rds") %>% group_by(Age) %>% summarize(MAX_RATE=2*max(Mortality_Rate),MIN_RATE=min(Mortality_Rate)/2) -BOUNDS <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_US_Mortality_Rate_Data_Long.Rds") %>% group_by(Sex,Age) %>% summarize(MAX_RATE=2*max(Mortality_Rate),MIN_RATE=min(Mortality_Rate)/2) %>% arrange(Sex,Age) - -MAX_BOUND <- BOUNDS %>% pull(MAX_RATE) -MIN_BOUND <- BOUNDS %>% pull(MIN_RATE) -saveRDS(MOD,paste0(SAVE_DATA_LOC,"Single_Sex_Age_Time_Series_Regression.Rds")) -saveRDS(MAX_BOUND,paste0(SAVE_DATA_LOC,"Single_Sex_Age_Max_Values_for_Bounding_Predictions.Rds")) -saveRDS(MIN_BOUND,paste0(SAVE_DATA_LOC,"Single_Sex_Age_Min_Values_for_Bounding_Predictions.Rds")) - diff --git a/Scripts/2F_Create_Age_Sex_Specfic_Mortality_Trends.r b/Scripts/2F_Create_Age_Sex_Specfic_Mortality_Trends.r new file mode 100644 index 0000000..9a71ee6 --- /dev/null +++ b/Scripts/2F_Create_Age_Sex_Specfic_Mortality_Trends.r @@ -0,0 +1,30 @@ +library(tidyverse) +library(fixest) +################################Create the data need to model the age-sex specific death rates +RAW_DATA_LOC <- "Data/Cleaned_Data/Mortality_Data/RDS/" +REG_DATA <- readRDS(paste0(RAW_DATA_LOC,"Single_Sex_Age_US_Mortality_Rate_Data_Wide.Rds")) + +if(!exists("SAVE_DATA_LOC")){SAVE_DATA_LOC<- "Data/Intermediate_Inputs/Mortality_Regression_Data/"} +dir.create(SAVE_DATA_LOC, recursive = TRUE, showWarnings = FALSE) +#####################Model all ages and sex +REG_DATA <- REG_DATA %>% left_join(readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Mortality_Rate_and_Pandemic_Data_for_Regression.Rds")) %>% mutate(WUPI=ifelse(WUPI==0,0.0001,WUPI),L_WUPI=ifelse(L_WUPI==0,0.0001,L_WUPI),L_TWO_WUPI=ifelse(L_TWO_WUPI==0,0.0001,L_TWO_WUPI)) +#Convert to log base +REG_DATA[,-1:-2] <- mutate_all(REG_DATA[,-1:-2],log) +REG_DATA[,c("WUPI","L_WUPI")] <- exp(REG_DATA[,c("WUPI","L_WUPI")]) +#Run for all ages + +REG_DATA_2016 <- REG_DATA %>% filter(Year>=2016) +MOD <- feols(Age_.[0:85]~US_Adj_Death_Rate+Sex*Year+WUPI+L_WUPI,REG_DATA, data.save = TRUE) +MOD_2016 <- feols(Age_.[0:85]~US_Adj_Death_Rate+Sex*Year+WUPI+L_WUPI,REG_DATA_2016, data.save = TRUE) + + +###Simulate each age-sex death rate over time with the models +#########When project far into the future some death rate values become negative. Make bounds to limit the forecast to a reasonable range. In this case I select half of the historic minimum, or double the historic maximum as upper an lower bounds in the study period. +BOUNDS <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_US_Mortality_Rate_Data_Long.Rds") %>% group_by(Sex,Age) %>% summarize(MAX_RATE=2*max(Mortality_Rate),MIN_RATE=min(Mortality_Rate)/2,.groups="drop") %>% arrange(Sex,Age) +BOUNDS <- BOUNDS %>% left_join( readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_US_Mortality_Rate_Data_Long.Rds") %>%pivot_wider(values_from=Mortality_Rate,names_from=Sex) %>% mutate(DIFF=Male/Female) %>% group_by(Age) %>% summarize(MIN_MALE_FEMALE_GAP=min(DIFF)/2,MAX_MALE_FEMALE_GAP=2*max(DIFF))) + +##Save Results +saveRDS(MOD,paste0(SAVE_DATA_LOC,"Single_Sex_Age_Time_Series_Regression.Rds")) +saveRDS(MOD_2016,paste0(SAVE_DATA_LOC,"Single_Sex_Age_Time_Series_Regression_2016.Rds")) +saveRDS(BOUNDS,paste0(SAVE_DATA_LOC,"Single_Sex_Age_Bounds_for_Predictions.Rds")) + diff --git a/Scripts/2G_Single_Age_Sex_ARIMA_Models.r b/Scripts/2G_Single_Age_Sex_ARIMA_Models.r new file mode 100644 index 0000000..cc41c37 --- /dev/null +++ b/Scripts/2G_Single_Age_Sex_ARIMA_Models.r @@ -0,0 +1,75 @@ +library(tidyverse) +library(forecast) +library(lmtest) +#################### +DATA_WOMEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Mortality_Rate_and_Pandemic_Data_for_Regression.Rds") %>% filter(Sex=='Female') +DATA_MEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Mortality_Rate_and_Pandemic_Data_for_Regression.Rds") %>% filter(Sex=='Male') + +DATA_WOMEN_2016 <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Mortality_Rate_and_Pandemic_Data_for_Regression.Rds") %>% filter(Sex=='Female',Year>=2016) +DATA_MEN_2016 <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Mortality_Rate_and_Pandemic_Data_for_Regression.Rds") %>% filter(Sex=='Male',Year>=2016) + + +#Create time series data +ST_YEAR <- DATA_MEN %>% pull(Year) %>% min + + +TS_MEN_US <- DATA_MEN %>% select(Mort_Rate_US) %>% ts(start=ST_YEAR,frequency=1) +TS_MEN_US_2016 <- DATA_MEN_2016 %>% select(Mort_Rate_US) %>% ts(start=ST_YEAR,frequency=1) + +TS_MEN_LIN <- DATA_MEN %>% select(Mort_Rate_Lincoln) %>% ts(start=ST_YEAR,frequency=1) +TS_MEN_LIN_2016 <- DATA_MEN_2016 %>% select(Mort_Rate_Lincoln) %>% ts(start=ST_YEAR,frequency=1) + + +TS_WOMEN_US <- DATA_WOMEN %>% select(Mort_Rate_US) %>% ts(start=ST_YEAR,frequency=1) +TS_WOMEN_US_2016 <- DATA_WOMEN_2016 %>% select(Mort_Rate_US) %>% ts(start=ST_YEAR,frequency=1) + +TS_WOMEN_LIN <- DATA_WOMEN %>% select(Mort_Rate_Lincoln) %>% ts(start=ST_YEAR,frequency=1) +TS_WOMEN_LIN_2016 <- DATA_WOMEN_2016 %>% select(Mort_Rate_Lincoln) %>% ts(start=ST_YEAR,frequency=1) + + +TS_PANDEMIC <- DATA_MEN %>% select(WUPI,L_WUPI) %>% ts(start=ST_YEAR,frequency=1) +TS_PANDEMIC_2016 <- DATA_MEN_2016 %>% select(WUPI,L_WUPI) %>% ts(start=ST_YEAR,frequency=1) + + +FORECAST_XREG <- TS_PANDEMIC +FORECAST_XREG_2016 <- TS_PANDEMIC_2016 + +FORECAST_XREG[,] <- 0 +FORECAST_XREG_2016[,] <- 0 + + +MOD_US_WOMEN <- auto.arima(TS_WOMEN_US,lambda=0,biasadj=TRUE,xreg=TS_PANDEMIC) +MOD_US_WOMEN_2016 <- auto.arima(TS_WOMEN_US_2016,lambda=0,biasadj=TRUE,xreg=TS_PANDEMIC_2016) +#checkresiduals(MOD_US_WOMEN) +MEN_XREG <- cbind(TS_WOMEN_US,TS_PANDEMIC) +MEN_XREG_2016 <- cbind(TS_WOMEN_US_2016,TS_PANDEMIC_2016) + +MOD_US_MEN <- auto.arima(TS_MEN_US,lambda=0,biasadj=TRUE,xreg=MEN_XREG) +MOD_US_MEN_2016 <- auto.arima(TS_MEN_US_2016,lambda=0,biasadj=TRUE,xreg=MEN_XREG_2016) + +#checkresiduals(MOD_US_MEN) +#plot(forecast(MOD_US_MEN,xreg=FORECAST_XREG)) + +#plot(forecast(MOD_US_WOMEN,xreg=FORECAST_XREG)) +MOD_LIN_WOMEN <- auto.arima(TS_WOMEN_LIN,biasadj=TRUE,xreg=TS_WOMEN_US) +MOD_LIN_WOMEN_2016 <- auto.arima(TS_WOMEN_LIN_2016,biasadj=TRUE,xreg=TS_WOMEN_US_2016) + +MOD_LIN_MEN <- auto.arima(TS_MEN_LIN,biasadj=TRUE,xreg=TS_MEN_US) +MOD_LIN_MEN_2016 <- auto.arima(TS_MEN_LIN_2016,biasadj=TRUE,xreg=TS_MEN_US_2016) + +#checkresiduals(MOD_LIN_MEN) +#coeftest(MOD_LIN_WOMEN) +###Create Location to save models + if(!exists("SAVE_LOC_MOD")){SAVE_LOC_MOD <-"./Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/"} + dir.create(SAVE_LOC_MOD, recursive = TRUE, showWarnings = FALSE) + +saveRDS(MOD_US_WOMEN,paste0(SAVE_LOC_MOD,"ARIMA_US_Women_Mortality_by_Age.Rds")) +saveRDS(MOD_US_MEN,paste0(SAVE_LOC_MOD,"ARIMA_US_Men_Mortality_by_Age.Rds")) +saveRDS(MOD_LIN_WOMEN,paste0(SAVE_LOC_MOD,"ARIMA_Lincoln_Women_Mortality_by_Age.Rds")) +saveRDS(MOD_LIN_MEN,paste0(SAVE_LOC_MOD,"ARIMA_Lincoln_Men_Mortality_by_Age.Rds")) +####2016 censured data for validity check +saveRDS(MOD_US_WOMEN_2016,paste0(SAVE_LOC_MOD,"ARIMA_US_Women_Mortality_by_Age_2016.Rds")) +saveRDS(MOD_US_MEN_2016,paste0(SAVE_LOC_MOD,"ARIMA_US_Men_Mortality_by_Age_2016.Rds")) +saveRDS(MOD_LIN_WOMEN_2016,paste0(SAVE_LOC_MOD,"ARIMA_Lincoln_Women_Mortality_by_Age_2016.Rds")) +saveRDS(MOD_LIN_MEN_2016,paste0(SAVE_LOC_MOD,"ARIMA_Lincoln_Men_Mortality_by_Age_2016.Rds")) + diff --git a/Scripts/Load_Custom_Functions/Single_Age_Mortality_Trend_Simulation.r b/Scripts/Load_Custom_Functions/Single_Age_Mortality_Trend_Simulation.r new file mode 100644 index 0000000..507d602 --- /dev/null +++ b/Scripts/Load_Custom_Functions/Single_Age_Mortality_Trend_Simulation.r @@ -0,0 +1,42 @@ +MAKE_EMPTY <- function(ST_YEAR,END_YEAR,MOD_MEN_LOCAL,MOD_WOMEN_LOCAL,MOD_MEN_US,MOD_WOMEN_US,XREG){ + NUM_SIMS <- END_YEAR-ST_YEAR+1 + WOMEN_SIM_US <- ts(simulate(MOD_WOMEN_US,xreg=XREG),start=ST_YEAR,frequency=1) + MALE_XREG_US <- cbind(XREG,WOMEN_SIM_US) + MALE_SIM_US <- simulate(MOD_MEN_US,xreg=MALE_XREG_US) + + SIM_WOMEN <-simulate(MOD_WOMEN_LOCAL,xreg=WOMEN_SIM_US) + SIM_MEN <- simulate(MOD_MEN_LOCAL,xreg=MALE_SIM_US) + SIM_MEN <- ifelse(SIM_MEN<0,0.0001,SIM_MEN) + SIM_WOMEN <- ifelse(SIM_WOMEN<0,0.0001,SIM_WOMEN) + + C_VAL <- rbind(cbind(ST_YEAR:END_YEAR,rep("Female",NUM_SIMS),as.vector(SIM_MEN)), cbind(ST_YEAR:END_YEAR,rep("Male",NUM_SIMS),as.vector(SIM_WOMEN))) %>% as_tibble + colnames(C_VAL) <- c("Year","Sex","US_Adj_Death_Rate") + C_VAL$Year <- as.numeric(pull(C_VAL,Year)) + C_VAL$US_Adj_Death_Rate <- log(as.numeric(pull(C_VAL,US_Adj_Death_Rate))) + C_VAL$WUPI <- 0.0001 + C_VAL$L_WUPI <- 0.0001 + return(C_VAL) +} + + +AGE_DIST <- function(SINGLE_MODS,EMPTY_SET,MAX_MALE,MAX_FEMALE,MIN_MALE,MIN_FEMALE,MAX_GAP,MIN_GAP,BASELINE_AGE_ADJUST_MEN,BASELINE_AGE_ADJUST_WOMEN){ + RES <- do.call(rbind,lapply(1:86,function(x){return(predict(SINGLE_MODS[[x]],newdata=EMPTY_SET))}))#For each data frame containing each year and sex combination of the forecast, predict the data for each age 0-85. Bind these by row to create a result with ages by row, and year by column + RES <- exp(RES) + FEMALE <-RES[,1:(ncol(RES)/2)] + MALE <-RES[,(ncol(RES)/2+1):ncol(RES)] + FEMALE <- ifelse(FEMALEMAX_FEMALE,MAX_FEMALE,FEMALE) + MALE <- ifelse(MALE>MAX_MALE,MAX_MALE,MALE) + MALE[MALE/FEMALEMAX_GAP] <- (MAX_GAP*FEMALE+0.00001)[MALE/FEMALE>MAX_GAP] + MALE_PRED <- exp(pull(EMPTY_SET[EMPTY_SET$Sex=='Male',],US_Adj_Death_Rate)) + FEMALE_PRED <- exp(pull(EMPTY_SET[EMPTY_SET$Sex=='Female',],US_Adj_Death_Rate)) + MALE <- MALE*(MALE_PRED/colSums(MALE*BASELINE_AGE_ADJUST_MEN)) + FEMALE <- FEMALE*(FEMALE_PRED/colSums(FEMALE*BASELINE_AGE_ADJUST_WOMEN)) + MALE <- MALE/10^5 + FEMALE <- FEMALE/10^5 + + RES <- list(MALE,FEMALE) + return(RES) +}