Population_Study/1_Run_Full_Simulation.r
2025-12-02 17:22:14 -07:00

130 lines
7.9 KiB
R

#####Packages
library(tidyverse) #Cleaning data
library(fixest) #Estimating a model of birth rates, to provide variance in the birth rate Monte Carlo using a fixed effect model.
library(forecast) #Fore ARIMA migration simulations
library(parallel)
library(uuid) #To add a index to each batch
####If the prelimnary data needs to be reloaded run the supplied bash script to download, process, and generate all needed data sets for the Monte Carlo population Simulation. Otherwise skip this step to save time
RELOAD_DATA <- FALSE
if(RELOAD_DATA){system("bash Prelim_Process.sh")}
#Load custom functions needed for the simulation
source("Scripts/Load_Custom_Functions/Migration_Simulation_Functions.r")
source("Scripts/Load_Custom_Functions/Birth_Simulation_Functions.r")
source("Scripts/Load_Custom_Functions/Increment_Data_Year.r")
source("Scripts/Load_Custom_Functions/Single_Age_Mortality_Trend_Simulation.r")
#######Preliminary Model Inputs
YEARS_AHEAD <- 10
NUM_SIMULATIONS <- 10^4
ST_YEAR <- 2017
################################Load Data
DEMO <- readRDS("Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2016_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds")
sum(readRDS("Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2023_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds"))
BIRTH_MOD <- readRDS("Data/Intermediate_Inputs/Birth_Regressions/Birth_Regression_2016.Rds")
#Must add region as a factor with multiple levels for predict to work. Seems to check for multiple levels although that is not needed econometrics.
BIRTH_DATA <- readRDS("Data/Intermediate_Inputs/Birth_Regressions/Regression_Data/Birth_Simulation_Key_Starting_Points.Rds") %>% mutate(Region=factor(Region)) %>% filter(Region=='Kemmerer & Diamondville',Year==2016)
MIGRATION_ARIMA <- readRDS("Data/Intermediate_Inputs/Migration_ARIMA_Models/Kemmerer_Diamondville_Net_Migration_ARIMA_2016.Rds")
MIGRATION_ODDS <- readRDS("Data/Intermediate_Inputs/Migration_Trends/Migration_Age_Probability_Zero_to_85.Rds")
##############
#Data for death rate trends
SINGLE_AGE_MODS <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Time_Series_Regression_2016.Rds")
BOUNDS <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Bounds_for_Predictions.Rds")
MAX_MALE <- BOUNDS %>% filter(Sex=='Male') %>% pull(MAX_RATE)
MIN_MALE <- BOUNDS %>% filter(Sex=='Male') %>% pull(MIN_RATE)
MAX_FEMALE <- BOUNDS %>% filter(Sex=='Female') %>% pull(MAX_RATE)
MIN_FEMALE <- BOUNDS %>% filter(Sex=='Female') %>% pull(MIN_RATE)
MIN_GAP <- BOUNDS %>% filter(Sex=='Male') %>% pull(MIN_MALE_FEMALE_GAP)
MAX_GAP <- BOUNDS %>% filter(Sex=='Male') %>% pull(MAX_MALE_FEMALE_GAP)
#Adjusted for 2016 by filtering REMOVE LATER
BASELINE_AGE_ADJUST_MEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_Population_in_2000.Rds") %>% filter(Sex=='Male',Year<=2016) %>% pull(Percent_of_Population)
BASELINE_AGE_ADJUST_WOMEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_Population_in_2000.Rds") %>% filter(Sex=='Female',Year<=2016) %>% pull(Percent_of_Population)
#Adjust to just women popualtion (Not all population percent
BASELINE_AGE_ADJUST_WOMEN <- BASELINE_AGE_ADJUST_WOMEN/sum(BASELINE_AGE_ADJUST_WOMEN )
BASELINE_AGE_ADJUST_MEN <- BASELINE_AGE_ADJUST_MEN/ sum(BASELINE_AGE_ADJUST_MEN )
MOD_MEN_ALL <- readRDS("Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/ARIMA_US_Men_Mortality_by_Age_2016.Rds")
MOD_WOMEN_ALL <- readRDS("Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/ARIMA_US_Women_Mortality_by_Age_2016.Rds")
MOD_LIN_MEN <- readRDS("Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/ARIMA_Lincoln_Men_Mortality_by_Age_2016.Rds")
MOD_LIN_WOMEN <- readRDS("Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/ARIMA_Lincoln_Women_Mortality_by_Age_2016.Rds")
XREG <- cbind(rep(0.0001,YEARS_AHEAD+1),rep(0.0001,YEARS_AHEAD+1)) #Empty data set to simulate in the future
XREG <- ts(XREG,start=ST_YEAR,frequency=1)
SIMULATE_MORTALITY_RATE_TRENDS <- function(){
SIMULATED_MORTALITY_DATA_SET <- MAKE_EMPTY(ST_YEAR,ST_YEAR+YEARS_AHEAD,MOD_LIN_MEN,MOD_LIN_WOMEN,MOD_MEN_ALL,MOD_WOMEN_ALL,XREG)
MORTALITY_SIMULATION <- AGE_DIST(SINGLE_AGE_MODS,SIMULATED_MORTALITY_DATA_SET ,MAX_MALE,MAX_FEMALE,MIN_MALE,MIN_FEMALE,MAX_GAP,MIN_GAP,BASELINE_AGE_ADJUST_MEN,BASELINE_AGE_ADJUST_WOMEN)
return(MORTALITY_SIMULATION )
}
#####################START YEAR BY SIMULATIONS
#CURRENT_YEARS_AHEAD=1;CURRENT_SIM_NUM <- 1;MORTALITY_SIMULATION <- SIMULATE_MORTALITY_RATE_TRENDS()
SINGLE_YEAR_SIM <- function(DEMO,BIRTH_DATA,CURRENT_YEARS_AHEAD,MORTALITY_SIMULATION,NET_MIGRATION){
ORIG_DEMO <- DEMO
DEMO <- DEMOGRAPHICS_AFTER_MIGRATION(DEMO, NET_MIGRATION,MIGRATION_ODDS )
TOTAL_MIGRATION <- sum(DEMO-ORIG_DEMO)
BIRTH_DATA$Year <- BIRTH_DATA$Year+1
BIRTH_DATA$Lag_Two_Births <- BIRTH_DATA$Lag_Births
BIRTH_DATA$Lag_Births <- BIRTH_DATA$Births
BIRTH_DATA$Births <- NA
##We grab one year earlier than the window because they are one year older this year. Because the ages are from 0-85, row 18 is year 17, but one year is added making it 18 years in the current year. The birth windows are 18-28 for women and 18-30 for men.
BIRTH_DATA$Min_Birth_Group <- min(sum(DEMO[18:30,1]),sum(DEMO[18:28,2]))
NEW_BORNS <- BIRTH_SIM(BIRTH_MOD,BIRTH_DATA)
TOTAL_BIRTHS <- sum(NEW_BORNS)
BIRTH_DATA[,"Births"] <- TOTAL_BIRTHS
DEMO <- INCREMENT_AGES(DEMO,NEW_BORNS)
MORTALITY_SIMULATION
MALE_DEATHS <- sapply(1:86,function(x){rbinom(1,DEMO[x,1],MORTALITY_SIMULATION[[1]][x,CURRENT_YEARS_AHEAD])})
FEMALE_DEATHS <- sapply(1:86,function(x){rbinom(1,DEMO[x,2],MORTALITY_SIMULATION[[2]][x,CURRENT_YEARS_AHEAD])})
MALE_DEATHS <- ifelse(MALE_DEATHS>=DEMO[,1],DEMO[,1],MALE_DEATHS)
FEMALE_DEATHS <- ifelse(FEMALE_DEATHS>=DEMO[,1],DEMO[,1],FEMALE_DEATHS)
TOTAL_DEATHS <- sum(MALE_DEATHS+FEMALE_DEATHS)
DEMO[,"Num_Male"] <- DEMO[,"Num_Male"] -MALE_DEATHS
DEMO[,"Num_Female"] <- DEMO[,"Num_Female"] -FEMALE_DEATHS
#List of values needed for the next run or for reporting a result
TOTAL_POP <- sum(DEMO)
return(list(DEMO,BIRTH_DATA,c(TOTAL_POP,TOTAL_BIRTHS,TOTAL_DEATHS,TOTAL_MIGRATION)))
}
MIGRATION_ARIMA_MODEL <- MIGRATION_ARIMA
SINGLE_SIM <- function(DEMO,BIRTH_DATA,ST_YEAR,YEARS_AHEAD,MIGRATION_ARIMA_MODEL){
MIGRATION_SIM_VALUES <- round(as.vector(simulate(nsim=YEARS_AHEAD,MIGRATION_ARIMA_MODEL) ))
FINAL_REPORT_VALUES <- matrix(NA,ncol=6,nrow=YEARS_AHEAD)
colnames(FINAL_REPORT_VALUES ) <- c("Sim_UUID","Year","Population","Births","Deaths","Net_Migration")
FINAL_REPORT_VALUES[,1] <- UUIDgenerate()
for(i in 1:YEARS_AHEAD){
C_YEAR <- ST_YEAR+i-1
C_RES <-SINGLE_YEAR_SIM(DEMO,BIRTH_DATA,i,SIMULATE_MORTALITY_RATE_TRENDS(),MIGRATION_SIM_VALUES[i])
DEMO <- C_RES[[1]]
BIRTH_DATA <- C_RES[[2]]
FINAL_REPORT_VALUES[i,-1] <- c(C_YEAR,C_RES[[3]])
}
return(FINAL_REPORT_VALUES)
}
if(!exists("RES_DIR")){RES_DIR<- "./Results/"}
if(!exists("RES_SIM_DIR")){RES_SIM_DIR <- paste0(RES_DIR,"Simulations/")}
dir.create(RES_SIM_DIR, recursive = TRUE, showWarnings = FALSE)
NCORES <- detectCores()-1
BATCH_SIZE <- NCORES*10
TOTAL_SIMULATIONS <- 10^5
N_RUNS <-ceiling(TOTAL_SIMULATIONS/BATCH_SIZE )
SIM_RES_FILE <- paste0(RES_SIM_DIR,"Kemmerer_2016_Simulation.csv")
NEW_RES_FILE <- !file.exists(SIM_RES_FILE)
for(i in 1:N_RUNS){
# MIGRATION_MATRIX <- simulate(nsim=BATCH_SIZE,MIGRATION_ARIMA,n=YEARS_AHEAD)
MIGRATION_MATRIX <- do.call(cbind, mclapply(1:BATCH_SIZE,function(x)(as.vector(simulate(nsim=YEARS_AHEAD,MIGRATION_ARIMA) )),mc.cores = detectCores()-1))
# rownames(MIGRATION_MATRIX) <- ST_YEAR:(ST_YEAR+YEARS_AHEAD-1)
# colnames(MIGRATION_MATRIX) <- 1:BATCH_SIZE
try(RES <- do.call(rbind,mclapply(1:BATCH_SIZE,function(x){SINGLE_SIM(DEMO,BIRTH_DATA,ST_YEAR,YEARS_AHEAD,MIGRATION_ARIMA)},mc.cores=NCORES)))
if(exists("RES")){
RES <- as.data.frame(RES)
RES[,-1] <- as.numeric(as.matrix(RES[,-1]))
if(NEW_RES_FILE){write_csv(RES,SIM_RES_FILE)}else {write_csv(RES,SIM_RES_FILE,col_names=FALSE,append=TRUE)}
rm(RES)
}
}