68 lines
5.0 KiB
R
68 lines
5.0 KiB
R
#####Packages
|
|
library(tidyverse) #Cleaning data
|
|
library(fixest) #Estimating a model of birth rates, to provide variance in the birth rate Monte Carlo using a fixed effect model.
|
|
library(parallel)
|
|
library(uuid) #To add a index to each batch
|
|
#Load custom functions needed for the simulation
|
|
source("Scripts/Birth_Simulation_Functions.r")
|
|
source("Scripts/Monte_Carlo_Functions.r")
|
|
source("Scripts/Migration_Simulation_Functions.r")
|
|
|
|
#####User Configuration Values
|
|
NUM_SIMULATIONS <- 10^5 #Number of Monte Carlo Simulations to run
|
|
RERUN_MORTALITY_SIMULATION <- TRUE #Rerun the Monte Carlo simulation of future mortality rates (not actual deaths) even if a Rds file of a mortality rates exists. This can be used to speed up runs when FALSE
|
|
RERUN_MIGRATION_SIMULATION <- TRUE #Rerun the ARIMA simulations that predict total migration in any year even if a Rds file of a mortality rates exists. This can be used to speed up runs when FALSE
|
|
|
|
NUM_YEARS_PROJECTED <- 50 #How many years into the future should each Monte Carlo run project to. For example 25 years if starting from 2025 and ending in 2050.
|
|
BIRTH_RATE_REG_RESULTS <- "Data/Regression_Results/Birth_Rate_Model.Rds" #Location of the regression used to model variance in birth rates in each Monte Carlo simulation.
|
|
START_DEMOGRAPHIC_DATA <- "Data/Cleaned_Data/Start_Year_Demographic_Data_With_Fertility_Groups.Rds" #Location of the data for the first year needing a forecasted birth rate, which aggregates the yearly splits of populations, into a single, year-county data set with variables for key birth prediction (total number of men and women in ages with high fertility rates), and then combines with the data set including births, deaths, migration and population.
|
|
AGE_OF_MIGRANT_PROBABILITY <- "Data/Other_Intermediate_Outputs/Migreation_Age_Probablity_One_to_Ninety.csv" #Location of the data which is the result of regression analysis of the age of migrants. That is to say 18 year olds may migrate more than 70 year olds, and this is the distribution by age. Sex was not found to be a major factor
|
|
####Run any scirpts required before main Monte Carlo
|
|
source("Survival_Simulation.r") #Populate a table with a simulation of future mortality rates, for quick recall during the simulation.
|
|
#A script contains the code needed to create a feols (fixest) regression of the birth rate given age distribution. Load this saved result or else create it to use in each simulation for gathering variance of births in any given age distribution path of the Monte Carlo.
|
|
if(file.exists(BIRTH_RATE_REG_RESULTS)){MOD_BIRTHS <- readRDS(BIRTH_RATE_REG_RESULTS);FIRST_PREDICT_YEAR_POPULATION_DATA <- readRDS(START_DEMOGRAPHIC_DATA)} else{source("Birth_Rate_Regression.r")}
|
|
if(file.exists(AGE_OF_MIGRANT_PROBABILITY)){MIG_AGE_DIST <- read.csv(AGE_OF_MIGRANT_PROBABILITY)} else{source("Migration_Regression.r")}
|
|
|
|
#Rerun the migration simulation if requested
|
|
if(RERUN_MIGRATION_SIMULATION ){source("Migration_Regression.r")}
|
|
|
|
#######################################################Main Monte Carlo
|
|
START_DEM_DATA <- readRDS("Data/Cleaned_Data/Lincoln_Demographic_Data.Rds") %>% group_by(County) %>% filter(Year==2023) %>% ungroup %>% select(-County)
|
|
MORTALITY_SIMULATION <- readRDS("./Data/Simulated_Data_Sets/MORTALITY_MONTE_CARLO.Rds") #Load the Mortality simulation to speed up simulation
|
|
MIGRATION_ARIMA_SIMULATION <- readRDS("./Data/Simulated_Data_Sets/Migration_ARIMA.Rds") #Load the Migration simulation to speed up simulation
|
|
|
|
#Second run, work on making into loop and saving the output to file (check if that will slow the results). Maybe use a predifined matrix, so that the results can be stored quirckly
|
|
SINGLE_PATH_SIM <- function(j){
|
|
C_RES <- RUN_SINGLE_SIM(MOD_BIRTHS,FIRST_PREDICT_YEAR_POPULATION_DATA,START_DEM_DATA,MORTALITY_SIMULATION,SIM_NUMBER=j,START_OF_SIM=2023,MIGRATION_ARIMA_SIMULATION,MIG_AGE_DIST)
|
|
RES <- C_RES[[1]]
|
|
for(i in 1:NUM_YEARS_PROJECTED){
|
|
C_RES <- RUN_SINGLE_SIM(MOD_BIRTHS,C_RES[[3]],C_RES[[2]],MORTALITY_SIMULATION,SIM_NUMBER=j,START_OF_SIM=2023,MIGRATION_ARIMA_SIMULATION,MIG_AGE_DIST)
|
|
RES <- rbind(RES,C_RES[[1]])
|
|
}
|
|
return(RES)
|
|
}
|
|
#Run the full simulation across all simulations simulating changes in demographic, and mortality data.
|
|
#Setup save results
|
|
RES_DIR <- "./Results"
|
|
RAW_SIM_FILE <- paste0(RES_DIR,"/Raw_Simulations.csv")
|
|
PERCENTILE_DATA <- paste0(RES_DIR,"/Percentile_Clean_Results.csv")
|
|
#Loop all results saving in batches
|
|
dir.create(RES_DIR,showWarnings=FALSE)
|
|
BATCH_SIZE <- 1000
|
|
NUM_RUNS <- ceiling(NUM_SIMULATIONS/BATCH_SIZE)
|
|
#Run the loop
|
|
for(x in 1:NUM_RUNS) {
|
|
BATCH_GUID <- UUIDgenerate()
|
|
try(FULL_RESULTS <- mclapply(1:BATCH_SIZE,function(x){try(SINGLE_PATH_SIM(x))},mc.cores = detectCores()-1))
|
|
if(exists("FULL_RESULTS")){
|
|
FULL_RESULTS <- do.call(rbind,lapply(1:BATCH_SIZE,function(x){FULL_RESULTS[[x]] %>% mutate(SIM_ID=UUIDgenerate())}))
|
|
FULL_RESULTS$BATCH_ID <- BATCH_GUID
|
|
FULL_RESULTS <- FULL_RESULTS%>% select(BATCH_ID,SIM_ID,everything())
|
|
if(x==1){write_csv(FULL_RESULTS,RAW_SIM_FILE)}else {write_csv(FULL_RESULTS,RAW_SIM_FILE,append=TRUE)}
|
|
rm(FULL_RESULTS)
|
|
gc()
|
|
}
|
|
}
|
|
|
|
|