Made single age-year sim work

This commit is contained in:
Alex Gebben Work 2025-12-01 17:21:19 -07:00
parent 09cead096e
commit e3764b0c37
8 changed files with 190 additions and 103 deletions

View File

@ -12,6 +12,8 @@ if(RELOAD_DATA){system("bash Prelim_Process.sh")}
source("Scripts/Load_Custom_Functions/Migration_Simulation_Functions.r")
source("Scripts/Load_Custom_Functions/Birth_Simulation_Functions.r")
source("Scripts/Load_Custom_Functions/Increment_Data_Year.r")
source("Scripts/Load_Custom_Functions/Single_Age_Mortality_Trend_Simulation.r")
#######Preliminary Model Inputs
YEARS_AHEAD <- 10
NUM_SIMULATIONS <- 10^4
@ -29,11 +31,36 @@ MIGRATION_MATRIX <- simulate(nsim=NUM_SIMULATIONS,MIGRATION_ARIMA,n=YEARS_AHEAD)
MIGRATION_MATRIX <- do.call(cbind, mclapply(1:NUM_SIMULATIONS,function(x)(as.vector(simulate(nsim=YEARS_AHEAD,MIGRATION_ARIMA) )),mc.cores = detectCores()-1))
rownames(MIGRATION_MATRIX) <- ST_YEAR:(ST_YEAR+YEARS_AHEAD-1)
colnames(MIGRATION_MATRIX) <- 1:NUM_SIMULATIONS
#Data for death rate trends
SINGLE_AGE_MODS <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Time_Series_Regression_2016.Rds")
BOUNDS <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Bounds_for_Predictions.Rds")
MAX_MALE <- BOUNDS %>% filter(Sex=='Male') %>% pull(MAX_RATE)
MIN_MALE <- BOUNDS %>% filter(Sex=='Male') %>% pull(MIN_RATE)
MAX_FEMALE <- BOUNDS %>% filter(Sex=='Female') %>% pull(MAX_RATE)
MIN_FEMALE <- BOUNDS %>% filter(Sex=='Female') %>% pull(MIN_RATE)
MIN_GAP <- BOUNDS %>% filter(Sex=='Male') %>% pull(MIN_MALE_FEMALE_GAP)
MAX_GAP <- BOUNDS %>% filter(Sex=='Male') %>% pull(MAX_MALE_FEMALE_GAP)
#Adjusted for 2016 by filtering REMOVE LATER
BASELINE_AGE_ADJUST_MEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_Population_in_2000.Rds") %>% filter(Sex=='Male',Year<=2016) %>% pull(Percent_of_Population)
BASELINE_AGE_ADJUST_WOMEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_Population_in_2000.Rds") %>% filter(Sex=='Female',Year<=2016) %>% pull(Percent_of_Population)
#Adjust to just women popualtion (Not all population percent
BASELINE_AGE_ADJUST_WOMEN <- BASELINE_AGE_ADJUST_WOMEN/sum(BASELINE_AGE_ADJUST_WOMEN )
BASELINE_AGE_ADJUST_MEN <- BASELINE_AGE_ADJUST_MEN/ sum(BASELINE_AGE_ADJUST_MEN )
MOD_MEN_ALL <- readRDS("Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/ARIMA_US_Men_Mortality_by_Age_2016.Rds")
MOD_WOMEN_ALL <- readRDS("Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/ARIMA_US_Women_Mortality_by_Age_2016.Rds")
MOD_LIN_MEN <- readRDS("Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/ARIMA_Lincoln_Men_Mortality_by_Age_2016.Rds")
MOD_LIN_WOMEN <- readRDS("Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/ARIMA_Lincoln_Women_Mortality_by_Age_2016.Rds")
XREG <- cbind(rep(0.0001,YEARS_AHEAD+1),rep(0.0001,YEARS_AHEAD+1)) #Empty data set to simulate in the future
XREG <- ts(XREG,start=ST_YEAR,frequency=1)
SIMULATED_MORTALITY_DATA_SET <- MAKE_EMPTY(ST_YEAR,ST_YEAR+YEARS_AHEAD,MOD_LIN_MEN,MOD_LIN_WOMEN,MOD_MEN_ALL,MOD_WOMEN_ALL,XREG)
MORTALITY_SIMULATION <- AGE_DIST(SINGLE_AGE_MODS,SIMULATED_MORTALITY_DATA_SET ,MAX_MALE,MAX_FEMALE,MIN_MALE,MIN_FEMALE,MAX_GAP,MIN_GAP,BASELINE_AGE_ADJUST_MEN,BASELINE_AGE_ADJUST_WOMEN)
#####################START YEAR BY SIMULATIONS
CURRENT_YEARS_AHEAD <- 1
CURRENT_SIM_NUM <- 58
ORIG_DEMO <- DEMO
DEMO <- DEMOGRAPHICS_AFTER_MIGRATION(DEMO, MIGRATION_MATRIX[CURRENT_YEARS_AHEAD ,CURRENT_SIM_NUM],MIGRATION_ODDS )
TOTAL_MIGRATION <- sum(DEMO-ORIG_DEMO)
BIRTH_DATA$Year <- BIRTH_DATA$Year+1
BIRTH_DATA$Lag_Two_Births <- BIRTH_DATA$Lag_Births
BIRTH_DATA$Lag_Births <- BIRTH_DATA$Births
@ -41,8 +68,19 @@ BIRTH_DATA$Births <- NA
##We grab one year earlier than the window because they are one year older this year. Because the ages are from 0-85, row 18 is year 17, but one year is added making it 18 years in the current year. The birth windows are 18-28 for women and 18-30 for men.
BIRTH_DATA$Min_Birth_Group <- min(sum(DEMO[18:30,1]),sum(DEMO[18:28,2]))
NEW_BORNS <- BIRTH_SIM(BIRTH_MOD,BIRTH_DATA)
TOTAL_BIRTHS <- sum(NEW_BORNS)
BIRTH_DATA[,"Births"] <-TOTAL_BIRTHS
DEMO <- INCREMENT_AGES(DEMO,NEW_BORNS)
DEMO
MALE_DEATHS <- sapply(1:86,function(x){rbinom(1,DEMO[x,1],MORTALITY_SIMULATION[[1]][x,CURRENT_YEARS_AHEAD])})
FEMALE_DEATHS <- sapply(1:86,function(x){rbinom(1,DEMO[x,2],MORTALITY_SIMULATION[[2]][x,CURRENT_YEARS_AHEAD])})
TOTAL_DEATHS <- sum(MALE_DEATHS+FEMALE_DEATHS)
DEMO[,"Num_Male"] <- DEMO[,"Num_Male"] -MALE_DEATHS
DEMO[,"Num_Female"] <- DEMO[,"Num_Female"] -MALE_DEATHS
#List of values needed for the next run or for reporting a result
list(DEMO,BIRTH_DATA,c(TOTAL_BIRTHS,TOTAL_DEATHS,TOTAL_MIGRATION))

View File

@ -10,4 +10,4 @@ Data is manually gathered from CDC WONDER data queries.
4) The world pandemic uncertainty index as collected from FRED which is used to account for pandemics in the regression, making the age time series stationary.
These are used to project mortality trends over time. In the case of the age adjusted data, this has local trends that can be compared to the national average. The single age-sex data is only at a national level but can be imparted to local levels as a general trend in the distribution of deaths
--- Run Date: 2025-11-25 12:04:27 ---
--- Run Date: 2025-12-01 16:10:03 ---

View File

@ -1,76 +0,0 @@
library(tidyverse)
library(fixest)
library(forecast)
########################################################ARIMA
DATA_WOMEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Mortality_Rate_and_Pandemic_Data_for_Regression.Rds") %>% filter(Sex=='Female')
DATA_MEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Mortality_Rate_and_Pandemic_Data_for_Regression.Rds") %>% filter(Sex=='Male')
#Create time series data
ST_YEAR <- DATA_MEN %>% pull(Year) %>% min
TS_MEN_US <- DATA_MEN %>% select(Mort_Rate_US) %>% ts(start=ST_YEAR,frequency=1)
TS_MEN_LIN <- DATA_MEN %>% select(Mort_Rate_Lincoln) %>% ts(start=ST_YEAR,frequency=1)
TS_WOMEN_US <- DATA_WOMEN %>% select(Mort_Rate_US) %>% ts(start=ST_YEAR,frequency=1)
TS_WOMEN_LIN <- DATA_WOMEN %>% select(Mort_Rate_Lincoln) %>% ts(start=ST_YEAR,frequency=1)
TS_PANDEMIC <- DATA_MEN %>% select(WUPI,L_WUPI) %>% ts(start=ST_YEAR,frequency=1)
TS_WOMEN_US_INV <- TS_WOMEN_US
FORECAST_XREG <- TS_PANDEMIC
FORECAST_XREG[,] <- 0
MOD_US_MEN <- auto.arima(TS_MEN_US,lambda=0,biasadj=TRUE,xreg=TS_PANDEMIC)
#checkresiduals(MOD_US_MEN)
#plot(forecast(MOD_US_MEN,xreg=FORECAST_XREG))
MOD_US_WOMEN <- auto.arima(TS_WOMEN_US,lambda=0,biasadj=TRUE,xreg=TS_PANDEMIC)
#checkresiduals(MOD_US_WOMEN)
#plot(forecast(MOD_US_WOMEN,xreg=FORECAST_XREG))
MOD_LIN_MEN <- auto.arima(TS_MEN_LIN,biasadj=TRUE,xreg=TS_MEN_US)
MOD_LIN_WOMEN <- auto.arima(TS_WOMEN_LIN,biasadj=TRUE,xreg=TS_WOMEN_US)
############################################Start Simualtion work
SINGLE_MODS <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Time_Series_Regression.Rds")
MIN_VALUES <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Min_Values_for_Bounding_Predictions.Rds")
MAX_VALUES <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Max_Values_for_Bounding_Predictions.Rds")
BASELINE_AGE_ADJUST_MEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_Population_in_2000.Rds") %>% filter(Sex=='Male') %>% pull(Percent_of_Population)
BASELINE_AGE_ADJUST_MEN
BASELINE_AGE_ADJUST_WOMEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_Population_in_2000.Rds") %>% filter(Sex=='Female') %>% pull(Percent_of_Population)
#Adjust to just women popualtion (Not all population percent
BASELINE_AGE_ADJUST_WOMEN <- BASELINE_AGE_ADJUST_WOMEN/ sum(BASELINE_AGE_ADJUST_WOMEN )
BASELINE_AGE_ADJUST_MEN <- BASELINE_AGE_ADJUST_MEN/ sum(BASELINE_AGE_ADJUST_MEN )
ST_YEAR <- 2025
END_YEAR <- 2025+40
GAP <- END_YEAR-ST_YEAR
NUM_SIMS <- END_YEAR-ST_YEAR+1
XREG <- cbind(rep(0,NUM_SIMS),rep(0,NUM_SIMS))
#colnames(XREG) <- c("WUPI","L_WUPI")
XREG <- ts(XREG,start=ST_YEAR,frequency=1)
SIM_LIN_WOMEN <-simulate(MOD_LIN_WOMEN,xreg=simulate(MOD_US_WOMEN,xreg=XREG))
SIM_LIN_MEN <- simulate(MOD_LIN_MEN,xreg=simulate(MOD_US_MEN,xreg=XREG))
C_VAL <- rbind(cbind(ST_YEAR:END_YEAR,rep("Female",NUM_SIMS),as.vector(SIM_LIN_MEN)), cbind(ST_YEAR:END_YEAR,rep("Male",NUM_SIMS),as.vector(SIM_LIN_WOMEN))) %>% as_tibble
colnames(C_VAL) <- c("Year","Sex","US_Adj_Death_Rate")
C_VAL$Year <- as.numeric(pull(C_VAL,Year))
C_VAL$US_Adj_Death_Rate <- as.numeric(pull(C_VAL,US_Adj_Death_Rate))
as.numeric(pull(C_VAL,US_Adj_Death_Rate))
###Pedict
RES <- do.call(rbind,lapply(1:86,function(x){return(predict(SINGLE_MODS[[x]],newdata=C_VAL))}))#For each data frame containing each year and sex combination of the forecast, predict the data for each age 0-85. Bind these by row to create a result with ages by row, and year by column
FEMALE <-RES[,1:(ncol(RES)/2)]
FEMALE <- ifelse(FEMALE<MIN_VALUES[1:86],MIN_VALUES[1:86],FEMALE)
MALE <- ifelse(MALE<MIN_VALUES[87:(86*2)],MIN_VALUES[87:(86*2)],MALE)
FEMALE <- ifelse(FEMALE>MAX_VALUES[1:86],MAX_VALUES[1:86],FEMALE)
MALE <- ifelse(MALE>MAX_VALUES[87:(86*2)],MAX_VALUES[87:(86*2)],MALE)
MALE_PRED <- pull(C_VAL[C_VAL$Sex=='Male',],US_Adj_Death_Rate)
FEMALE_PRED <- pull(C_VAL[C_VAL$Sex=='Female',],US_Adj_Death_Rate)
MALE <- MALE*(MALE_PRED/colSums(MALE*BASELINE_AGE_ADJUST_MEN))
FEMALE <- FEMALE*(FEMALE_PRED/colSums(FEMALE*BASELINE_AGE_ADJUST_WOMEN))
RES <- list(MALE,FEMALE)
MALE
MALE[,1]
qbinom(MALE
?qbinom

View File

@ -12,5 +12,7 @@ Rscript "./Scripts/2B_Impart_Deaths_and_Migration_to_Subregions.r"
Rscript "./Scripts/2C_Migration_by_Age_Regression.r"
Rscript "./Scripts/2D_Overall_Migration_ARIMA.r"
Rscript "./Scripts/2E_Move_Current_Demographic_Data_to_Current_Year.r"
Rscript "./Scripts/2F_Create_Age_Sex_Specfic_Mortality_Trends.r"
Rscript "./Scripts/2G_Single_Age_Sex_ARIMA_Models.r"
#Produce final results for either the simulation, or information for the report, but not anything used in later stages of the simulation
Rscript "./Scripts/3A_Population_Pyramid.r"

View File

@ -1,24 +0,0 @@
library(tidyverse)
library(fixest)
####SPLIT OUT THE DATA MANAGEMENT PULL IN ARIMA
################################Create the data need to model the age-sex specific death rates
RAW_DATA_LOC <- "Data/Cleaned_Data/Mortality_Data/RDS/"
REG_DATA <- readRDS(paste0(RAW_DATA_LOC,"Single_Sex_Age_US_Mortality_Rate_Data_Wide.Rds"))
if(!exists("SAVE_DATA_LOC")){SAVE_DATA_LOC<- "Data/Intermediate_Inputs/Mortality_Regression_Data/"}
dir.create(SAVE_DATA_LOC, recursive = TRUE, showWarnings = FALSE)
#####################Model all ages and sex
MOD <- feols(Age_.[0:85]~US_Adj_Death_Rate+Sex*Year,REG_DATA, data.save = TRUE)
###Simulate each age-sex death rate over time with the models
#########When project far into the future some death rate values become negative. Make bounds to limit the forecast to a reasonable range. In this case I select half of the historic minimum, or double the historic maximum as upper an lower bounds in the study period.
#BOUNDS <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_US_Mortality_Rate_Data_Long.Rds") %>% group_by(Age) %>% summarize(MAX_RATE=2*max(Mortality_Rate),MIN_RATE=min(Mortality_Rate)/2)
BOUNDS <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_US_Mortality_Rate_Data_Long.Rds") %>% group_by(Sex,Age) %>% summarize(MAX_RATE=2*max(Mortality_Rate),MIN_RATE=min(Mortality_Rate)/2) %>% arrange(Sex,Age)
MAX_BOUND <- BOUNDS %>% pull(MAX_RATE)
MIN_BOUND <- BOUNDS %>% pull(MIN_RATE)
saveRDS(MOD,paste0(SAVE_DATA_LOC,"Single_Sex_Age_Time_Series_Regression.Rds"))
saveRDS(MAX_BOUND,paste0(SAVE_DATA_LOC,"Single_Sex_Age_Max_Values_for_Bounding_Predictions.Rds"))
saveRDS(MIN_BOUND,paste0(SAVE_DATA_LOC,"Single_Sex_Age_Min_Values_for_Bounding_Predictions.Rds"))

View File

@ -0,0 +1,30 @@
library(tidyverse)
library(fixest)
################################Create the data need to model the age-sex specific death rates
RAW_DATA_LOC <- "Data/Cleaned_Data/Mortality_Data/RDS/"
REG_DATA <- readRDS(paste0(RAW_DATA_LOC,"Single_Sex_Age_US_Mortality_Rate_Data_Wide.Rds"))
if(!exists("SAVE_DATA_LOC")){SAVE_DATA_LOC<- "Data/Intermediate_Inputs/Mortality_Regression_Data/"}
dir.create(SAVE_DATA_LOC, recursive = TRUE, showWarnings = FALSE)
#####################Model all ages and sex
REG_DATA <- REG_DATA %>% left_join(readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Mortality_Rate_and_Pandemic_Data_for_Regression.Rds")) %>% mutate(WUPI=ifelse(WUPI==0,0.0001,WUPI),L_WUPI=ifelse(L_WUPI==0,0.0001,L_WUPI),L_TWO_WUPI=ifelse(L_TWO_WUPI==0,0.0001,L_TWO_WUPI))
#Convert to log base
REG_DATA[,-1:-2] <- mutate_all(REG_DATA[,-1:-2],log)
REG_DATA[,c("WUPI","L_WUPI")] <- exp(REG_DATA[,c("WUPI","L_WUPI")])
#Run for all ages
REG_DATA_2016 <- REG_DATA %>% filter(Year>=2016)
MOD <- feols(Age_.[0:85]~US_Adj_Death_Rate+Sex*Year+WUPI+L_WUPI,REG_DATA, data.save = TRUE)
MOD_2016 <- feols(Age_.[0:85]~US_Adj_Death_Rate+Sex*Year+WUPI+L_WUPI,REG_DATA_2016, data.save = TRUE)
###Simulate each age-sex death rate over time with the models
#########When project far into the future some death rate values become negative. Make bounds to limit the forecast to a reasonable range. In this case I select half of the historic minimum, or double the historic maximum as upper an lower bounds in the study period.
BOUNDS <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_US_Mortality_Rate_Data_Long.Rds") %>% group_by(Sex,Age) %>% summarize(MAX_RATE=2*max(Mortality_Rate),MIN_RATE=min(Mortality_Rate)/2,.groups="drop") %>% arrange(Sex,Age)
BOUNDS <- BOUNDS %>% left_join( readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_US_Mortality_Rate_Data_Long.Rds") %>%pivot_wider(values_from=Mortality_Rate,names_from=Sex) %>% mutate(DIFF=Male/Female) %>% group_by(Age) %>% summarize(MIN_MALE_FEMALE_GAP=min(DIFF)/2,MAX_MALE_FEMALE_GAP=2*max(DIFF)))
##Save Results
saveRDS(MOD,paste0(SAVE_DATA_LOC,"Single_Sex_Age_Time_Series_Regression.Rds"))
saveRDS(MOD_2016,paste0(SAVE_DATA_LOC,"Single_Sex_Age_Time_Series_Regression_2016.Rds"))
saveRDS(BOUNDS,paste0(SAVE_DATA_LOC,"Single_Sex_Age_Bounds_for_Predictions.Rds"))

View File

@ -0,0 +1,75 @@
library(tidyverse)
library(forecast)
library(lmtest)
####################
DATA_WOMEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Mortality_Rate_and_Pandemic_Data_for_Regression.Rds") %>% filter(Sex=='Female')
DATA_MEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Mortality_Rate_and_Pandemic_Data_for_Regression.Rds") %>% filter(Sex=='Male')
DATA_WOMEN_2016 <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Mortality_Rate_and_Pandemic_Data_for_Regression.Rds") %>% filter(Sex=='Female',Year>=2016)
DATA_MEN_2016 <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Mortality_Rate_and_Pandemic_Data_for_Regression.Rds") %>% filter(Sex=='Male',Year>=2016)
#Create time series data
ST_YEAR <- DATA_MEN %>% pull(Year) %>% min
TS_MEN_US <- DATA_MEN %>% select(Mort_Rate_US) %>% ts(start=ST_YEAR,frequency=1)
TS_MEN_US_2016 <- DATA_MEN_2016 %>% select(Mort_Rate_US) %>% ts(start=ST_YEAR,frequency=1)
TS_MEN_LIN <- DATA_MEN %>% select(Mort_Rate_Lincoln) %>% ts(start=ST_YEAR,frequency=1)
TS_MEN_LIN_2016 <- DATA_MEN_2016 %>% select(Mort_Rate_Lincoln) %>% ts(start=ST_YEAR,frequency=1)
TS_WOMEN_US <- DATA_WOMEN %>% select(Mort_Rate_US) %>% ts(start=ST_YEAR,frequency=1)
TS_WOMEN_US_2016 <- DATA_WOMEN_2016 %>% select(Mort_Rate_US) %>% ts(start=ST_YEAR,frequency=1)
TS_WOMEN_LIN <- DATA_WOMEN %>% select(Mort_Rate_Lincoln) %>% ts(start=ST_YEAR,frequency=1)
TS_WOMEN_LIN_2016 <- DATA_WOMEN_2016 %>% select(Mort_Rate_Lincoln) %>% ts(start=ST_YEAR,frequency=1)
TS_PANDEMIC <- DATA_MEN %>% select(WUPI,L_WUPI) %>% ts(start=ST_YEAR,frequency=1)
TS_PANDEMIC_2016 <- DATA_MEN_2016 %>% select(WUPI,L_WUPI) %>% ts(start=ST_YEAR,frequency=1)
FORECAST_XREG <- TS_PANDEMIC
FORECAST_XREG_2016 <- TS_PANDEMIC_2016
FORECAST_XREG[,] <- 0
FORECAST_XREG_2016[,] <- 0
MOD_US_WOMEN <- auto.arima(TS_WOMEN_US,lambda=0,biasadj=TRUE,xreg=TS_PANDEMIC)
MOD_US_WOMEN_2016 <- auto.arima(TS_WOMEN_US_2016,lambda=0,biasadj=TRUE,xreg=TS_PANDEMIC_2016)
#checkresiduals(MOD_US_WOMEN)
MEN_XREG <- cbind(TS_WOMEN_US,TS_PANDEMIC)
MEN_XREG_2016 <- cbind(TS_WOMEN_US_2016,TS_PANDEMIC_2016)
MOD_US_MEN <- auto.arima(TS_MEN_US,lambda=0,biasadj=TRUE,xreg=MEN_XREG)
MOD_US_MEN_2016 <- auto.arima(TS_MEN_US_2016,lambda=0,biasadj=TRUE,xreg=MEN_XREG_2016)
#checkresiduals(MOD_US_MEN)
#plot(forecast(MOD_US_MEN,xreg=FORECAST_XREG))
#plot(forecast(MOD_US_WOMEN,xreg=FORECAST_XREG))
MOD_LIN_WOMEN <- auto.arima(TS_WOMEN_LIN,biasadj=TRUE,xreg=TS_WOMEN_US)
MOD_LIN_WOMEN_2016 <- auto.arima(TS_WOMEN_LIN_2016,biasadj=TRUE,xreg=TS_WOMEN_US_2016)
MOD_LIN_MEN <- auto.arima(TS_MEN_LIN,biasadj=TRUE,xreg=TS_MEN_US)
MOD_LIN_MEN_2016 <- auto.arima(TS_MEN_LIN_2016,biasadj=TRUE,xreg=TS_MEN_US_2016)
#checkresiduals(MOD_LIN_MEN)
#coeftest(MOD_LIN_WOMEN)
###Create Location to save models
if(!exists("SAVE_LOC_MOD")){SAVE_LOC_MOD <-"./Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/"}
dir.create(SAVE_LOC_MOD, recursive = TRUE, showWarnings = FALSE)
saveRDS(MOD_US_WOMEN,paste0(SAVE_LOC_MOD,"ARIMA_US_Women_Mortality_by_Age.Rds"))
saveRDS(MOD_US_MEN,paste0(SAVE_LOC_MOD,"ARIMA_US_Men_Mortality_by_Age.Rds"))
saveRDS(MOD_LIN_WOMEN,paste0(SAVE_LOC_MOD,"ARIMA_Lincoln_Women_Mortality_by_Age.Rds"))
saveRDS(MOD_LIN_MEN,paste0(SAVE_LOC_MOD,"ARIMA_Lincoln_Men_Mortality_by_Age.Rds"))
####2016 censured data for validity check
saveRDS(MOD_US_WOMEN_2016,paste0(SAVE_LOC_MOD,"ARIMA_US_Women_Mortality_by_Age_2016.Rds"))
saveRDS(MOD_US_MEN_2016,paste0(SAVE_LOC_MOD,"ARIMA_US_Men_Mortality_by_Age_2016.Rds"))
saveRDS(MOD_LIN_WOMEN_2016,paste0(SAVE_LOC_MOD,"ARIMA_Lincoln_Women_Mortality_by_Age_2016.Rds"))
saveRDS(MOD_LIN_MEN_2016,paste0(SAVE_LOC_MOD,"ARIMA_Lincoln_Men_Mortality_by_Age_2016.Rds"))

View File

@ -0,0 +1,42 @@
MAKE_EMPTY <- function(ST_YEAR,END_YEAR,MOD_MEN_LOCAL,MOD_WOMEN_LOCAL,MOD_MEN_US,MOD_WOMEN_US,XREG){
NUM_SIMS <- END_YEAR-ST_YEAR+1
WOMEN_SIM_US <- ts(simulate(MOD_WOMEN_US,xreg=XREG),start=ST_YEAR,frequency=1)
MALE_XREG_US <- cbind(XREG,WOMEN_SIM_US)
MALE_SIM_US <- simulate(MOD_MEN_US,xreg=MALE_XREG_US)
SIM_WOMEN <-simulate(MOD_WOMEN_LOCAL,xreg=WOMEN_SIM_US)
SIM_MEN <- simulate(MOD_MEN_LOCAL,xreg=MALE_SIM_US)
SIM_MEN <- ifelse(SIM_MEN<0,0.0001,SIM_MEN)
SIM_WOMEN <- ifelse(SIM_WOMEN<0,0.0001,SIM_WOMEN)
C_VAL <- rbind(cbind(ST_YEAR:END_YEAR,rep("Female",NUM_SIMS),as.vector(SIM_MEN)), cbind(ST_YEAR:END_YEAR,rep("Male",NUM_SIMS),as.vector(SIM_WOMEN))) %>% as_tibble
colnames(C_VAL) <- c("Year","Sex","US_Adj_Death_Rate")
C_VAL$Year <- as.numeric(pull(C_VAL,Year))
C_VAL$US_Adj_Death_Rate <- log(as.numeric(pull(C_VAL,US_Adj_Death_Rate)))
C_VAL$WUPI <- 0.0001
C_VAL$L_WUPI <- 0.0001
return(C_VAL)
}
AGE_DIST <- function(SINGLE_MODS,EMPTY_SET,MAX_MALE,MAX_FEMALE,MIN_MALE,MIN_FEMALE,MAX_GAP,MIN_GAP,BASELINE_AGE_ADJUST_MEN,BASELINE_AGE_ADJUST_WOMEN){
RES <- do.call(rbind,lapply(1:86,function(x){return(predict(SINGLE_MODS[[x]],newdata=EMPTY_SET))}))#For each data frame containing each year and sex combination of the forecast, predict the data for each age 0-85. Bind these by row to create a result with ages by row, and year by column
RES <- exp(RES)
FEMALE <-RES[,1:(ncol(RES)/2)]
MALE <-RES[,(ncol(RES)/2+1):ncol(RES)]
FEMALE <- ifelse(FEMALE<MIN_FEMALE,MIN_FEMALE,FEMALE)
MALE <- ifelse(MALE<MIN_MALE,MIN_MALE,MALE)
FEMALE <- ifelse(FEMALE>MAX_FEMALE,MAX_FEMALE,FEMALE)
MALE <- ifelse(MALE>MAX_MALE,MAX_MALE,MALE)
MALE[MALE/FEMALE<MIN_GAP] <- (MIN_GAP*FEMALE+0.00001)[MALE/FEMALE<MIN_GAP]
MALE[MALE/FEMALE>MAX_GAP] <- (MAX_GAP*FEMALE+0.00001)[MALE/FEMALE>MAX_GAP]
MALE_PRED <- exp(pull(EMPTY_SET[EMPTY_SET$Sex=='Male',],US_Adj_Death_Rate))
FEMALE_PRED <- exp(pull(EMPTY_SET[EMPTY_SET$Sex=='Female',],US_Adj_Death_Rate))
MALE <- MALE*(MALE_PRED/colSums(MALE*BASELINE_AGE_ADJUST_MEN))
FEMALE <- FEMALE*(FEMALE_PRED/colSums(FEMALE*BASELINE_AGE_ADJUST_WOMEN))
MALE <- MALE/10^5
FEMALE <- FEMALE/10^5
RES <- list(MALE,FEMALE)
return(RES)
}