From 747f752f61ff654f36bdf487bf1cf74b86005374 Mon Sep 17 00:00:00 2001 From: Alex Gebben Work Date: Thu, 30 Oct 2025 12:31:23 -0600 Subject: [PATCH] Lunch save --- 1_Run_Full_Simulation.r | 38 +++++++++++++++++++----- Scripts/Migration_Simulation_Functions.r | 13 +++++--- Scripts/Monte_Carlo_Functions.r | 9 ++++-- 3 files changed, 46 insertions(+), 14 deletions(-) diff --git a/1_Run_Full_Simulation.r b/1_Run_Full_Simulation.r index 3c311f7..5e593a5 100644 --- a/1_Run_Full_Simulation.r +++ b/1_Run_Full_Simulation.r @@ -2,13 +2,14 @@ library(tidyverse) #Cleaning data library(fixest) #Estimating a model of birth rates, to provide variance in the birth rate Monte Carlo using a fixed effect model. library(parallel) +library(uuid) #To add a index to each batch #Load custom functions needed for the simulation source("Scripts/Birth_Simulation_Functions.r") source("Scripts/Monte_Carlo_Functions.r") source("Scripts/Migration_Simulation_Functions.r") #####User Configuration Values - NUM_SIMULATIONS <- 10^6 #Number of Monte Carlo Simulations to run + NUM_SIMULATIONS <- 10^5 #Number of Monte Carlo Simulations to run RERUN_MORTALITY_SIMULATION <- TRUE #Rerun the Monte Carlo simulation of future mortality rates (not actual deaths) even if a Rds file of a mortality rates exists. This can be used to speed up runs when FALSE RERUN_MIGRATION_SIMULATION <- TRUE #Rerun the ARIMA simulations that predict total migration in any year even if a Rds file of a mortality rates exists. This can be used to speed up runs when FALSE @@ -30,9 +31,6 @@ START_DEM_DATA <- readRDS("Data/Cleaned_Data/Lincoln_Demographic_Data.Rds") %>% MORTALITY_SIMULATION <- readRDS("./Data/Simulated_Data_Sets/MORTALITY_MONTE_CARLO.Rds") #Load the Mortality simulation to speed up simulation MIGRATION_ARIMA_SIMULATION <- readRDS("./Data/Simulated_Data_Sets/Migration_ARIMA.Rds") #Load the Migration simulation to speed up simulation -#First run -C_RES <-RUN_SINGLE_SIM(MOD_BIRTHS,FIRST_PREDICT_YEAR_POPULATION_DATA,START_DEM_DATA,MORTALITY_SIMULATION,SIM_NUMBER=1,START_OF_SIM=2023,MIGRATION_ARIMA_SIMULATION,MIG_AGE_DIST ) -# Year County Population Births Deaths Migration Min_Birth_Group PREV_BIRTH PREV_TWO_BIRTH Male_Birth_Group #Second run, work on making into loop and saving the output to file (check if that will slow the results). Maybe use a predifined matrix, so that the results can be stored quirckly SINGLE_PATH_SIM <- function(j){ C_RES <- RUN_SINGLE_SIM(MOD_BIRTHS,FIRST_PREDICT_YEAR_POPULATION_DATA,START_DEM_DATA,MORTALITY_SIMULATION,SIM_NUMBER=j,START_OF_SIM=2023,MIGRATION_ARIMA_SIMULATION,MIG_AGE_DIST) @@ -44,9 +42,33 @@ SINGLE_PATH_SIM <- function(j){ return(RES) } #Run the full simulation across all simulations simulating changes in demographic, and mortality data. -system.time({ -FULL_RESULTS <- mclapply(1:NUM_SIMULATIONS,SINGLE_PATH_SIM,mc.cores = detectCores()-1) + #Setup save results + RES_DIR <- "./Results" + RAW_SIM_FILE <- paste0(RES_DIR,"/Raw_Simulations.csv") + PERCENTILE_DATA <- paste0(RES_DIR,"/Percentile_Clean_Results.csv") + #Loop all results saving in batches + dir.create(RES_DIR,showWarnings=FALSE) + BATCH_SIZE <- 1000 + NUM_RUNS <- ceiling(NUM_SIMULATIONS/BATCH_SIZE) + #Run the loop +for(x in 1:NUM_RUNS) { + BATCH_GUID <- UUIDgenerate() + FULL_RESULTS <- mclapply(1:BATCH_SIZE,SINGLE_PATH_SIM,mc.cores = detectCores()-1) + FULL_RESULTS <- do.call(rbind,lapply(1:BATCH_SIZE,function(x){FULL_RESULTS[[x]] %>% mutate(SIM_ID=UUIDgenerate())})) + FULL_RESULTS$BATCH_ID <- BATCH_GUID + FULL_RESULTS <- FULL_RESULTS%>% select(BATCH_ID,SIM_ID,everything()) + if(x==1){write_csv(FULL_RESULTS,RAW_SIM_FILE)}else {write_csv(FULL_RESULTS,RAW_SIM_FILE,append=TRUE)} + rm(FULL_RESULTS) + gc() +} + +###Process the simulations and save the main percentile results by year + FULL_RESULTS <- read_csv(RAW_SIM_FILE) + GRAPH_DATA <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Population),c(0.05,0.1,0.25,0.5,0.75,0.9,0.95))})) %>% as_tibble + YEARS <- 2023:(2023+NUM_YEARS_PROJECTED) + GRAPH_DATA$Year <- YEARS + GRAPH_DATA <- GRAPH_DATA %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Population") + write_csv(GRAPH_DATA,PERCENTILE_DATA) + ggplot(aes(x=Year,y=Population,group=Percentile,color=Percentile),data=GRAPH_DATA)+geom_line() -}) -plot(FULL_RESULTS[[1]][3]) diff --git a/Scripts/Migration_Simulation_Functions.r b/Scripts/Migration_Simulation_Functions.r index 0a351e2..f4c888c 100644 --- a/Scripts/Migration_Simulation_Functions.r +++ b/Scripts/Migration_Simulation_Functions.r @@ -1,7 +1,7 @@ #################################Simulate data #A function used only inside the OUT_MIGRATION_SIMULATION function (see below). This creates a data set which has one entry for every individual in the county, with a assigned probability of moving out of the county. This allows the migration simulation to extract individuals based on true population distribution. When migration is net positive this does not matter, as anyone age-sex group can move in. -#DEMOGRAPHIC_DATA <- DEMO_DATA -#DEMOGRAPHIC_DATA <- START_DEM_DATA +#PROB_AGE_DIST <- Migration_Age_Distribution +#DEMOGRAPHIC_DATA <- START_DETAILED_DATA MAKE_SET <- function(PROB_AGE_DIST,DEMOGRAPHIC_DATA){ # if(nrow(DEMOGRAPHIC_DATA)==86){DEMOGRAPHIC_DATA<- DEMOGRAPHIC_DATA[-1,]} #Drop age zero if it was included @@ -24,7 +24,11 @@ MAKE_SET <- function(PROB_AGE_DIST,DEMOGRAPHIC_DATA){ return(FINAL_OUT) } #A function to find the number of migrants leaving the county (net out), accounting for the fact that fewer or more people in any one age-sex bracket will decrease the odds of being the person to leave even if they are 18-19 and likely to leave. +#DEMO_DATA <- START_DETAILED_DATA +#NUM_MIGRATED <- 29 +#MIG_AGE_DIST <- Migration_Age_Distribution OUT_MIGRATION_SIMULATION <- function(NUM_MIGRATED,MIG_AGE_DIST,DEMO_DATA){ + MIG_AGE_DIST CURRENT_POP <- MAKE_SET(MIG_AGE_DIST,DEMO_DATA) NUM_POP <- nrow(CURRENT_POP) #Rank all individuals to easily combine with the ordinal data set @@ -33,12 +37,13 @@ OUT_MIGRATION_SIMULATION <- function(NUM_MIGRATED,MIG_AGE_DIST,DEMO_DATA){ CURRENT_POP$Migrated <- 0 #The people drawn first are assumed to migrate up to the point where all migration is filled. CURRENT_POP[ORD[1:NUM_MIGRATED],"Migrated"] <- -1 - MIG_STATUS <- CURRENT_POP %>% group_by(Age,Sex) %>% summarize(Migrated=sum(Migrated)) %>% arrange(desc(Sex),Age) %>% ungroup + MIG_STATUS <- CURRENT_POP %>% group_by(Age,Sex) %>% summarize(Migrated=sum(Migrated), .groups = 'drop')%>% arrange(desc(Sex),Age) return(MIG_STATUS) } #Function to find the number of migrants to a county, by age-sex when coming from outside the county. #NUM_MIGRATED <- TOTAL_MIGRATION; +#NUM_MIGRATED <- 0 IN_MIGRATION_SIMULATION <- function(NUM_MIGRATED,MIG_AGE_DIST){ NUM_MIGRATED <- abs(NUM_MIGRATED) NUM_AGES <- 85 @@ -52,7 +57,7 @@ IN_MIGRATION_SIMULATION <- function(NUM_MIGRATED,MIG_AGE_DIST){ colnames(MIGRATED_DATA) <- c("Age","Sex","Migrated") MIGRATED_DATA$Age <- as.numeric(MIGRATED_DATA$Age) MIGRATED_DATA$Migrated <- as.numeric(MIGRATED_DATA$Migrated) - MIGRATED_DATA <- MIGRATED_DATA %>% group_by(Age,Sex) %>% summarize(Migrated=sum(Migrated)) %>% ungroup %>% arrange(desc(Sex),Age) + MIGRATED_DATA <- MIGRATED_DATA %>% group_by(Age,Sex) %>% summarize(Migrated=sum(Migrated),.groups = 'drop') %>% arrange(desc(Sex),Age) return(MIGRATED_DATA) } # MIG_AGE_DIST <- Migration_Age_Distribution;DEMO_DATA <- START_DETAILED_DATA;NET_MIGRATION <- TOTAL_MIGRATION diff --git a/Scripts/Monte_Carlo_Functions.r b/Scripts/Monte_Carlo_Functions.r index a044090..79ca9ac 100644 --- a/Scripts/Monte_Carlo_Functions.r +++ b/Scripts/Monte_Carlo_Functions.r @@ -1,5 +1,8 @@ #Uncomment to check the function line by line # REG_BIRTH_MODEL=MOD_BIRTHS;START_BASIC_DATA=FIRST_PREDICT_YEAR_POPULATION_DATA;START_DETAILED_DATA=START_DEM_DATA;Mortality_Rate_Sim=MORTALITY_SIMULATION;SIM_NUMBER=1;START_OF_SIM_YEAR=2023;Migration_ARIMA_Sim=MIGRATION_ARIMA_SIMULATION;Migration_Age_Distribution=MIG_AGE_DIST + #FIRST_PREDICT_YEAR_POPULATION_DATA + #START_BASIC_DATA <- C_RES[[3]] + #START_DETAILED_DATA <- C_RES[[2]] RUN_SINGLE_SIM <- function(REG_BIRTH_MODEL,START_BASIC_DATA,START_DETAILED_DATA,Mortality_Rate_Sim,SIM_NUMBER,START_OF_SIM_YEAR=2023,Migration_ARIMA_Sim,Migration_Age_Distribution ){ #REG_BIRTH_MODEL: Feols regression object of population model. @@ -28,9 +31,11 @@ RUN_SINGLE_SIM <- function(REG_BIRTH_MODEL,START_BASIC_DATA,START_DETAILED_DATA, YEARS_AHEAD <- max(START_BASIC_DATA[,'Year']-START_OF_SIM_YEAR,1) #Define the number of years forward from the simulation start based on the current year of analysis, and the user provided first year. TOTAL_MIGRATION <- Migration_ARIMA_Sim[YEARS_AHEAD,SIM_NUMBER] START_BASIC_DATA[,"Migration"] <- TOTAL_MIGRATION - MIG <- MIGRATION_SIMULATION(Migration_Age_Distribution,START_DETAILED_DATA,TOTAL_MIGRATION) MORTALITY_INPUT_DETAILED_DATA <- START_DETAILED_DATA +if(TOTAL_MIGRATION!=0){ + MIG <- MIGRATION_SIMULATION(Migration_Age_Distribution,START_DETAILED_DATA,TOTAL_MIGRATION) MORTALITY_INPUT_DETAILED_DATA[-1,c("Num_Male","Num_Female")] <- MORTALITY_INPUT_DETAILED_DATA[-1,c("Num_Male","Num_Female")]+MIG[,c("Num_Male","Num_Female")] +} MORTALITY_C_RES <- MORTALITY_SIM(YEARS_AHEAD,SIM_NUMBER,MORTALITY_INPUT_DETAILED_DATA,FALSE,Mortality_Rate_Sim ) #Update number of deaths in the current run (which should be blank when supplied to the function) START_BASIC_DATA[,"Deaths"] <- sum(MORTALITY_C_RES[,c("Male_Deaths","Female_Deaths")] ) @@ -45,5 +50,5 @@ RUN_SINGLE_SIM <- function(REG_BIRTH_MODEL,START_BASIC_DATA,START_DETAILED_DATA, NEXT_BASIC_DATA[,"Female_Birth_Group"] <- sum(NEXT_DETAILED_DATA[NEXT_DETAILED_DATA$Age>=18 & NEXT_DETAILED_DATA$Age<=28,"Num_Female"]) NEXT_BASIC_DATA[,"Min_Birth_Group"] <- min(NEXT_BASIC_DATA[,c("Female_Birth_Group","Male_Birth_Group")]) NEXT_BASIC_DATA <- NEXT_BASIC_DATA[,-10:-11] - return(list(START_BASIC_DATA,NEXT_DETAILED_DATA,NEXT_BASIC_DATA)) + return(list(START_BASIC_DATA,NEXT_DETAILED_DATA %>% ungroup,NEXT_BASIC_DATA)) }