Lunch save

2025-10-30 12:31:23 -06:00 · 2025-10-30 12:31:23 -06:00 · 747f752f61
commit 747f752f61
parent c66d260d41
3 changed files with 46 additions and 14 deletions
--- a/1_Run_Full_Simulation.r
+++ b/1_Run_Full_Simulation.r
@ -2,13 +2,14 @@
 library(tidyverse) #Cleaning data
 library(fixest) #Estimating a model of birth rates, to provide variance in the birth rate Monte Carlo using a fixed effect model.
 library(parallel)
 library(uuid) #To add a index to each batch
 #Load custom functions needed for the simulation
 source("Scripts/Birth_Simulation_Functions.r")
 source("Scripts/Monte_Carlo_Functions.r")
 source("Scripts/Migration_Simulation_Functions.r")
 #####User Configuration Values
-	NUM_SIMULATIONS <- 10^6 #Number of Monte Carlo Simulations to run
+	NUM_SIMULATIONS <- 10^5 #Number of Monte Carlo Simulations to run
 	RERUN_MORTALITY_SIMULATION <- TRUE #Rerun the Monte Carlo simulation of future mortality rates (not actual deaths) even if a Rds file of a mortality rates exists. This can be used to speed up runs when FALSE
 	RERUN_MIGRATION_SIMULATION <- TRUE #Rerun the ARIMA simulations that predict total migration in any year even if a Rds file of a mortality rates exists. This can be used to speed up runs when FALSE
@ -30,9 +31,6 @@ START_DEM_DATA <- readRDS("Data/Cleaned_Data/Lincoln_Demographic_Data.Rds") %>%
 MORTALITY_SIMULATION <- readRDS("./Data/Simulated_Data_Sets/MORTALITY_MONTE_CARLO.Rds") #Load the Mortality simulation to speed up simulation
 MIGRATION_ARIMA_SIMULATION <- readRDS("./Data/Simulated_Data_Sets/Migration_ARIMA.Rds") #Load the Migration simulation to speed up simulation
 #First run
 C_RES <-RUN_SINGLE_SIM(MOD_BIRTHS,FIRST_PREDICT_YEAR_POPULATION_DATA,START_DEM_DATA,MORTALITY_SIMULATION,SIM_NUMBER=1,START_OF_SIM=2023,MIGRATION_ARIMA_SIMULATION,MIG_AGE_DIST )
 # Year County  Population Births Deaths Migration Min_Birth_Group PREV_BIRTH PREV_TWO_BIRTH Male_Birth_Group
 #Second run, work on making into loop and saving the output to file (check if that will slow the results). Maybe use a predifined matrix, so that the results can be stored quirckly
 SINGLE_PATH_SIM <- function(j){
  C_RES <- RUN_SINGLE_SIM(MOD_BIRTHS,FIRST_PREDICT_YEAR_POPULATION_DATA,START_DEM_DATA,MORTALITY_SIMULATION,SIM_NUMBER=j,START_OF_SIM=2023,MIGRATION_ARIMA_SIMULATION,MIG_AGE_DIST)
@ -44,9 +42,33 @@ SINGLE_PATH_SIM <- function(j){
  return(RES)
 }
 #Run the full simulation across all simulations simulating changes in demographic, and mortality data.
-system.time({
+	#Setup save results
-FULL_RESULTS <- mclapply(1:NUM_SIMULATIONS,SINGLE_PATH_SIM,mc.cores = detectCores()-1)
+	RES_DIR <- "./Results"
 	RAW_SIM_FILE <- paste0(RES_DIR,"/Raw_Simulations.csv")
 	PERCENTILE_DATA <- paste0(RES_DIR,"/Percentile_Clean_Results.csv")
 	#Loop all results saving in batches
 	dir.create(RES_DIR,showWarnings=FALSE)
 	BATCH_SIZE <- 1000
 	NUM_RUNS <- ceiling(NUM_SIMULATIONS/BATCH_SIZE)
 	#Run the loop
 for(x in 1:NUM_RUNS) {
 	BATCH_GUID <- UUIDgenerate()
 	FULL_RESULTS <- mclapply(1:BATCH_SIZE,SINGLE_PATH_SIM,mc.cores = detectCores()-1)
 	FULL_RESULTS <- do.call(rbind,lapply(1:BATCH_SIZE,function(x){FULL_RESULTS[[x]] %>% mutate(SIM_ID=UUIDgenerate())}))
 	FULL_RESULTS$BATCH_ID <- BATCH_GUID
 	FULL_RESULTS <- FULL_RESULTS%>% select(BATCH_ID,SIM_ID,everything())
 	if(x==1){write_csv(FULL_RESULTS,RAW_SIM_FILE)}else {write_csv(FULL_RESULTS,RAW_SIM_FILE,append=TRUE)}
 	rm(FULL_RESULTS)
 	gc()
 }
 ###Process the simulations and save the main percentile results by year
 	FULL_RESULTS <- read_csv(RAW_SIM_FILE)
 	GRAPH_DATA <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Population),c(0.05,0.1,0.25,0.5,0.75,0.9,0.95))}))  %>% as_tibble
 	YEARS <- 2023:(2023+NUM_YEARS_PROJECTED)
 	GRAPH_DATA$Year <- YEARS 
 	GRAPH_DATA <- GRAPH_DATA %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Population")
 	write_csv(GRAPH_DATA,PERCENTILE_DATA)
 	ggplot(aes(x=Year,y=Population,group=Percentile,color=Percentile),data=GRAPH_DATA)+geom_line()
 })
 plot(FULL_RESULTS[[1]][3])
--- a/Scripts/Migration_Simulation_Functions.r
+++ b/Scripts/Migration_Simulation_Functions.r
@ -1,7 +1,7 @@
 #################################Simulate data
 #A function used only inside the OUT_MIGRATION_SIMULATION function (see below). This creates a data set which has one entry for every individual in the county, with a assigned probability of moving out of the county. This allows the migration simulation to extract individuals based on true population distribution. When migration is net positive this does not matter, as anyone age-sex group can move in.
-#DEMOGRAPHIC_DATA <- DEMO_DATA 
+#PROB_AGE_DIST <- Migration_Age_Distribution 
-#DEMOGRAPHIC_DATA <- START_DEM_DATA
+#DEMOGRAPHIC_DATA <- START_DETAILED_DATA 
 MAKE_SET <- function(PROB_AGE_DIST,DEMOGRAPHIC_DATA){
 #		if(nrow(DEMOGRAPHIC_DATA)==86){DEMOGRAPHIC_DATA<- DEMOGRAPHIC_DATA[-1,]} #Drop age zero if it was included
@ -24,7 +24,11 @@ MAKE_SET <- function(PROB_AGE_DIST,DEMOGRAPHIC_DATA){
 	return(FINAL_OUT)
 }
 #A function to find the number of migrants leaving the county (net out), accounting for the fact that fewer or more people in any one age-sex bracket will decrease the odds of being the person to leave even if they are 18-19 and likely to leave.
 #DEMO_DATA <- START_DETAILED_DATA
 #NUM_MIGRATED <- 29
 #MIG_AGE_DIST <- Migration_Age_Distribution
 OUT_MIGRATION_SIMULATION <- function(NUM_MIGRATED,MIG_AGE_DIST,DEMO_DATA){
 	MIG_AGE_DIST
 	CURRENT_POP <- MAKE_SET(MIG_AGE_DIST,DEMO_DATA)
 	NUM_POP <- nrow(CURRENT_POP)
 	#Rank all individuals to easily combine with the ordinal data set
@ -33,12 +37,13 @@ OUT_MIGRATION_SIMULATION <- function(NUM_MIGRATED,MIG_AGE_DIST,DEMO_DATA){
 	CURRENT_POP$Migrated <- 0 
 	#The people drawn first are assumed to migrate up to the point where all migration is filled.
 	CURRENT_POP[ORD[1:NUM_MIGRATED],"Migrated"] <- -1
-	MIG_STATUS <- CURRENT_POP %>% group_by(Age,Sex) %>% summarize(Migrated=sum(Migrated)) %>% arrange(desc(Sex),Age) %>% ungroup
+	MIG_STATUS <- CURRENT_POP %>% group_by(Age,Sex) %>% summarize(Migrated=sum(Migrated), .groups = 'drop')%>% arrange(desc(Sex),Age) 
 	return(MIG_STATUS)
 }
 #Function to find the number of migrants to a county, by age-sex when coming from outside the county.
 #NUM_MIGRATED <- TOTAL_MIGRATION;
 #NUM_MIGRATED <- 0
 IN_MIGRATION_SIMULATION <- function(NUM_MIGRATED,MIG_AGE_DIST){
 	NUM_MIGRATED <-	abs(NUM_MIGRATED)
 	NUM_AGES <- 85
@ -52,7 +57,7 @@ IN_MIGRATION_SIMULATION <- function(NUM_MIGRATED,MIG_AGE_DIST){
 	colnames(MIGRATED_DATA) <- c("Age","Sex","Migrated")
 	MIGRATED_DATA$Age <-  as.numeric(MIGRATED_DATA$Age)
 	MIGRATED_DATA$Migrated <-  as.numeric(MIGRATED_DATA$Migrated)
-	MIGRATED_DATA <- MIGRATED_DATA %>% group_by(Age,Sex) %>% summarize(Migrated=sum(Migrated)) %>% ungroup %>% arrange(desc(Sex),Age)
+	MIGRATED_DATA <- MIGRATED_DATA %>% group_by(Age,Sex) %>% summarize(Migrated=sum(Migrated),.groups = 'drop')  %>% arrange(desc(Sex),Age)
 	return(MIGRATED_DATA)
 }
 #	MIG_AGE_DIST <- Migration_Age_Distribution;DEMO_DATA <- START_DETAILED_DATA;NET_MIGRATION <- TOTAL_MIGRATION
--- a/Scripts/Monte_Carlo_Functions.r
+++ b/Scripts/Monte_Carlo_Functions.r
@ -1,5 +1,8 @@
 #Uncomment to check the function line by line
 #	REG_BIRTH_MODEL=MOD_BIRTHS;START_BASIC_DATA=FIRST_PREDICT_YEAR_POPULATION_DATA;START_DETAILED_DATA=START_DEM_DATA;Mortality_Rate_Sim=MORTALITY_SIMULATION;SIM_NUMBER=1;START_OF_SIM_YEAR=2023;Migration_ARIMA_Sim=MIGRATION_ARIMA_SIMULATION;Migration_Age_Distribution=MIG_AGE_DIST
 	#FIRST_PREDICT_YEAR_POPULATION_DATA
 	#START_BASIC_DATA <- C_RES[[3]]
 	#START_DETAILED_DATA <- C_RES[[2]]
 RUN_SINGLE_SIM <- function(REG_BIRTH_MODEL,START_BASIC_DATA,START_DETAILED_DATA,Mortality_Rate_Sim,SIM_NUMBER,START_OF_SIM_YEAR=2023,Migration_ARIMA_Sim,Migration_Age_Distribution ){
 	#REG_BIRTH_MODEL: Feols regression object of population model.
@ -28,9 +31,11 @@ RUN_SINGLE_SIM <- function(REG_BIRTH_MODEL,START_BASIC_DATA,START_DETAILED_DATA,
 		YEARS_AHEAD <- max(START_BASIC_DATA[,'Year']-START_OF_SIM_YEAR,1) #Define the number of years forward from the simulation start based on the current year of analysis, and the user provided first year.
 	TOTAL_MIGRATION <- Migration_ARIMA_Sim[YEARS_AHEAD,SIM_NUMBER]
 	START_BASIC_DATA[,"Migration"] <- TOTAL_MIGRATION
 	MIG <- MIGRATION_SIMULATION(Migration_Age_Distribution,START_DETAILED_DATA,TOTAL_MIGRATION)
 	MORTALITY_INPUT_DETAILED_DATA <- START_DETAILED_DATA
 if(TOTAL_MIGRATION!=0){
 	MIG <- MIGRATION_SIMULATION(Migration_Age_Distribution,START_DETAILED_DATA,TOTAL_MIGRATION)
 	MORTALITY_INPUT_DETAILED_DATA[-1,c("Num_Male","Num_Female")] <- MORTALITY_INPUT_DETAILED_DATA[-1,c("Num_Male","Num_Female")]+MIG[,c("Num_Male","Num_Female")]
 }
 	MORTALITY_C_RES <- MORTALITY_SIM(YEARS_AHEAD,SIM_NUMBER,MORTALITY_INPUT_DETAILED_DATA,FALSE,Mortality_Rate_Sim )
 	#Update number of deaths in the current run (which should be blank when supplied to the function)
        START_BASIC_DATA[,"Deaths"] <- sum(MORTALITY_C_RES[,c("Male_Deaths","Female_Deaths")] )
@ -45,5 +50,5 @@ RUN_SINGLE_SIM <- function(REG_BIRTH_MODEL,START_BASIC_DATA,START_DETAILED_DATA,
 	NEXT_BASIC_DATA[,"Female_Birth_Group"] <-  sum(NEXT_DETAILED_DATA[NEXT_DETAILED_DATA$Age>=18 & NEXT_DETAILED_DATA$Age<=28,"Num_Female"])
 	NEXT_BASIC_DATA[,"Min_Birth_Group"] <- min(NEXT_BASIC_DATA[,c("Female_Birth_Group","Male_Birth_Group")])
 	NEXT_BASIC_DATA <-	NEXT_BASIC_DATA[,-10:-11]
-	return(list(START_BASIC_DATA,NEXT_DETAILED_DATA,NEXT_BASIC_DATA))
+	return(list(START_BASIC_DATA,NEXT_DETAILED_DATA %>% ungroup,NEXT_BASIC_DATA))
 }