Lunch save

This commit is contained in:
Alex Gebben Work 2025-10-30 12:31:23 -06:00
parent c66d260d41
commit 747f752f61
3 changed files with 46 additions and 14 deletions

View File

@ -2,13 +2,14 @@
library(tidyverse) #Cleaning data library(tidyverse) #Cleaning data
library(fixest) #Estimating a model of birth rates, to provide variance in the birth rate Monte Carlo using a fixed effect model. library(fixest) #Estimating a model of birth rates, to provide variance in the birth rate Monte Carlo using a fixed effect model.
library(parallel) library(parallel)
library(uuid) #To add a index to each batch
#Load custom functions needed for the simulation #Load custom functions needed for the simulation
source("Scripts/Birth_Simulation_Functions.r") source("Scripts/Birth_Simulation_Functions.r")
source("Scripts/Monte_Carlo_Functions.r") source("Scripts/Monte_Carlo_Functions.r")
source("Scripts/Migration_Simulation_Functions.r") source("Scripts/Migration_Simulation_Functions.r")
#####User Configuration Values #####User Configuration Values
NUM_SIMULATIONS <- 10^6 #Number of Monte Carlo Simulations to run NUM_SIMULATIONS <- 10^5 #Number of Monte Carlo Simulations to run
RERUN_MORTALITY_SIMULATION <- TRUE #Rerun the Monte Carlo simulation of future mortality rates (not actual deaths) even if a Rds file of a mortality rates exists. This can be used to speed up runs when FALSE RERUN_MORTALITY_SIMULATION <- TRUE #Rerun the Monte Carlo simulation of future mortality rates (not actual deaths) even if a Rds file of a mortality rates exists. This can be used to speed up runs when FALSE
RERUN_MIGRATION_SIMULATION <- TRUE #Rerun the ARIMA simulations that predict total migration in any year even if a Rds file of a mortality rates exists. This can be used to speed up runs when FALSE RERUN_MIGRATION_SIMULATION <- TRUE #Rerun the ARIMA simulations that predict total migration in any year even if a Rds file of a mortality rates exists. This can be used to speed up runs when FALSE
@ -30,9 +31,6 @@ START_DEM_DATA <- readRDS("Data/Cleaned_Data/Lincoln_Demographic_Data.Rds") %>%
MORTALITY_SIMULATION <- readRDS("./Data/Simulated_Data_Sets/MORTALITY_MONTE_CARLO.Rds") #Load the Mortality simulation to speed up simulation MORTALITY_SIMULATION <- readRDS("./Data/Simulated_Data_Sets/MORTALITY_MONTE_CARLO.Rds") #Load the Mortality simulation to speed up simulation
MIGRATION_ARIMA_SIMULATION <- readRDS("./Data/Simulated_Data_Sets/Migration_ARIMA.Rds") #Load the Migration simulation to speed up simulation MIGRATION_ARIMA_SIMULATION <- readRDS("./Data/Simulated_Data_Sets/Migration_ARIMA.Rds") #Load the Migration simulation to speed up simulation
#First run
C_RES <-RUN_SINGLE_SIM(MOD_BIRTHS,FIRST_PREDICT_YEAR_POPULATION_DATA,START_DEM_DATA,MORTALITY_SIMULATION,SIM_NUMBER=1,START_OF_SIM=2023,MIGRATION_ARIMA_SIMULATION,MIG_AGE_DIST )
# Year County Population Births Deaths Migration Min_Birth_Group PREV_BIRTH PREV_TWO_BIRTH Male_Birth_Group
#Second run, work on making into loop and saving the output to file (check if that will slow the results). Maybe use a predifined matrix, so that the results can be stored quirckly #Second run, work on making into loop and saving the output to file (check if that will slow the results). Maybe use a predifined matrix, so that the results can be stored quirckly
SINGLE_PATH_SIM <- function(j){ SINGLE_PATH_SIM <- function(j){
C_RES <- RUN_SINGLE_SIM(MOD_BIRTHS,FIRST_PREDICT_YEAR_POPULATION_DATA,START_DEM_DATA,MORTALITY_SIMULATION,SIM_NUMBER=j,START_OF_SIM=2023,MIGRATION_ARIMA_SIMULATION,MIG_AGE_DIST) C_RES <- RUN_SINGLE_SIM(MOD_BIRTHS,FIRST_PREDICT_YEAR_POPULATION_DATA,START_DEM_DATA,MORTALITY_SIMULATION,SIM_NUMBER=j,START_OF_SIM=2023,MIGRATION_ARIMA_SIMULATION,MIG_AGE_DIST)
@ -44,9 +42,33 @@ SINGLE_PATH_SIM <- function(j){
return(RES) return(RES)
} }
#Run the full simulation across all simulations simulating changes in demographic, and mortality data. #Run the full simulation across all simulations simulating changes in demographic, and mortality data.
system.time({ #Setup save results
FULL_RESULTS <- mclapply(1:NUM_SIMULATIONS,SINGLE_PATH_SIM,mc.cores = detectCores()-1) RES_DIR <- "./Results"
RAW_SIM_FILE <- paste0(RES_DIR,"/Raw_Simulations.csv")
PERCENTILE_DATA <- paste0(RES_DIR,"/Percentile_Clean_Results.csv")
#Loop all results saving in batches
dir.create(RES_DIR,showWarnings=FALSE)
BATCH_SIZE <- 1000
NUM_RUNS <- ceiling(NUM_SIMULATIONS/BATCH_SIZE)
#Run the loop
for(x in 1:NUM_RUNS) {
BATCH_GUID <- UUIDgenerate()
FULL_RESULTS <- mclapply(1:BATCH_SIZE,SINGLE_PATH_SIM,mc.cores = detectCores()-1)
FULL_RESULTS <- do.call(rbind,lapply(1:BATCH_SIZE,function(x){FULL_RESULTS[[x]] %>% mutate(SIM_ID=UUIDgenerate())}))
FULL_RESULTS$BATCH_ID <- BATCH_GUID
FULL_RESULTS <- FULL_RESULTS%>% select(BATCH_ID,SIM_ID,everything())
if(x==1){write_csv(FULL_RESULTS,RAW_SIM_FILE)}else {write_csv(FULL_RESULTS,RAW_SIM_FILE,append=TRUE)}
rm(FULL_RESULTS)
gc()
}
###Process the simulations and save the main percentile results by year
FULL_RESULTS <- read_csv(RAW_SIM_FILE)
GRAPH_DATA <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Population),c(0.05,0.1,0.25,0.5,0.75,0.9,0.95))})) %>% as_tibble
YEARS <- 2023:(2023+NUM_YEARS_PROJECTED)
GRAPH_DATA$Year <- YEARS
GRAPH_DATA <- GRAPH_DATA %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Population")
write_csv(GRAPH_DATA,PERCENTILE_DATA)
ggplot(aes(x=Year,y=Population,group=Percentile,color=Percentile),data=GRAPH_DATA)+geom_line()
})
plot(FULL_RESULTS[[1]][3])

View File

@ -1,7 +1,7 @@
#################################Simulate data #################################Simulate data
#A function used only inside the OUT_MIGRATION_SIMULATION function (see below). This creates a data set which has one entry for every individual in the county, with a assigned probability of moving out of the county. This allows the migration simulation to extract individuals based on true population distribution. When migration is net positive this does not matter, as anyone age-sex group can move in. #A function used only inside the OUT_MIGRATION_SIMULATION function (see below). This creates a data set which has one entry for every individual in the county, with a assigned probability of moving out of the county. This allows the migration simulation to extract individuals based on true population distribution. When migration is net positive this does not matter, as anyone age-sex group can move in.
#DEMOGRAPHIC_DATA <- DEMO_DATA #PROB_AGE_DIST <- Migration_Age_Distribution
#DEMOGRAPHIC_DATA <- START_DEM_DATA #DEMOGRAPHIC_DATA <- START_DETAILED_DATA
MAKE_SET <- function(PROB_AGE_DIST,DEMOGRAPHIC_DATA){ MAKE_SET <- function(PROB_AGE_DIST,DEMOGRAPHIC_DATA){
# if(nrow(DEMOGRAPHIC_DATA)==86){DEMOGRAPHIC_DATA<- DEMOGRAPHIC_DATA[-1,]} #Drop age zero if it was included # if(nrow(DEMOGRAPHIC_DATA)==86){DEMOGRAPHIC_DATA<- DEMOGRAPHIC_DATA[-1,]} #Drop age zero if it was included
@ -24,7 +24,11 @@ MAKE_SET <- function(PROB_AGE_DIST,DEMOGRAPHIC_DATA){
return(FINAL_OUT) return(FINAL_OUT)
} }
#A function to find the number of migrants leaving the county (net out), accounting for the fact that fewer or more people in any one age-sex bracket will decrease the odds of being the person to leave even if they are 18-19 and likely to leave. #A function to find the number of migrants leaving the county (net out), accounting for the fact that fewer or more people in any one age-sex bracket will decrease the odds of being the person to leave even if they are 18-19 and likely to leave.
#DEMO_DATA <- START_DETAILED_DATA
#NUM_MIGRATED <- 29
#MIG_AGE_DIST <- Migration_Age_Distribution
OUT_MIGRATION_SIMULATION <- function(NUM_MIGRATED,MIG_AGE_DIST,DEMO_DATA){ OUT_MIGRATION_SIMULATION <- function(NUM_MIGRATED,MIG_AGE_DIST,DEMO_DATA){
MIG_AGE_DIST
CURRENT_POP <- MAKE_SET(MIG_AGE_DIST,DEMO_DATA) CURRENT_POP <- MAKE_SET(MIG_AGE_DIST,DEMO_DATA)
NUM_POP <- nrow(CURRENT_POP) NUM_POP <- nrow(CURRENT_POP)
#Rank all individuals to easily combine with the ordinal data set #Rank all individuals to easily combine with the ordinal data set
@ -33,12 +37,13 @@ OUT_MIGRATION_SIMULATION <- function(NUM_MIGRATED,MIG_AGE_DIST,DEMO_DATA){
CURRENT_POP$Migrated <- 0 CURRENT_POP$Migrated <- 0
#The people drawn first are assumed to migrate up to the point where all migration is filled. #The people drawn first are assumed to migrate up to the point where all migration is filled.
CURRENT_POP[ORD[1:NUM_MIGRATED],"Migrated"] <- -1 CURRENT_POP[ORD[1:NUM_MIGRATED],"Migrated"] <- -1
MIG_STATUS <- CURRENT_POP %>% group_by(Age,Sex) %>% summarize(Migrated=sum(Migrated)) %>% arrange(desc(Sex),Age) %>% ungroup MIG_STATUS <- CURRENT_POP %>% group_by(Age,Sex) %>% summarize(Migrated=sum(Migrated), .groups = 'drop')%>% arrange(desc(Sex),Age)
return(MIG_STATUS) return(MIG_STATUS)
} }
#Function to find the number of migrants to a county, by age-sex when coming from outside the county. #Function to find the number of migrants to a county, by age-sex when coming from outside the county.
#NUM_MIGRATED <- TOTAL_MIGRATION; #NUM_MIGRATED <- TOTAL_MIGRATION;
#NUM_MIGRATED <- 0
IN_MIGRATION_SIMULATION <- function(NUM_MIGRATED,MIG_AGE_DIST){ IN_MIGRATION_SIMULATION <- function(NUM_MIGRATED,MIG_AGE_DIST){
NUM_MIGRATED <- abs(NUM_MIGRATED) NUM_MIGRATED <- abs(NUM_MIGRATED)
NUM_AGES <- 85 NUM_AGES <- 85
@ -52,7 +57,7 @@ IN_MIGRATION_SIMULATION <- function(NUM_MIGRATED,MIG_AGE_DIST){
colnames(MIGRATED_DATA) <- c("Age","Sex","Migrated") colnames(MIGRATED_DATA) <- c("Age","Sex","Migrated")
MIGRATED_DATA$Age <- as.numeric(MIGRATED_DATA$Age) MIGRATED_DATA$Age <- as.numeric(MIGRATED_DATA$Age)
MIGRATED_DATA$Migrated <- as.numeric(MIGRATED_DATA$Migrated) MIGRATED_DATA$Migrated <- as.numeric(MIGRATED_DATA$Migrated)
MIGRATED_DATA <- MIGRATED_DATA %>% group_by(Age,Sex) %>% summarize(Migrated=sum(Migrated)) %>% ungroup %>% arrange(desc(Sex),Age) MIGRATED_DATA <- MIGRATED_DATA %>% group_by(Age,Sex) %>% summarize(Migrated=sum(Migrated),.groups = 'drop') %>% arrange(desc(Sex),Age)
return(MIGRATED_DATA) return(MIGRATED_DATA)
} }
# MIG_AGE_DIST <- Migration_Age_Distribution;DEMO_DATA <- START_DETAILED_DATA;NET_MIGRATION <- TOTAL_MIGRATION # MIG_AGE_DIST <- Migration_Age_Distribution;DEMO_DATA <- START_DETAILED_DATA;NET_MIGRATION <- TOTAL_MIGRATION

View File

@ -1,5 +1,8 @@
#Uncomment to check the function line by line #Uncomment to check the function line by line
# REG_BIRTH_MODEL=MOD_BIRTHS;START_BASIC_DATA=FIRST_PREDICT_YEAR_POPULATION_DATA;START_DETAILED_DATA=START_DEM_DATA;Mortality_Rate_Sim=MORTALITY_SIMULATION;SIM_NUMBER=1;START_OF_SIM_YEAR=2023;Migration_ARIMA_Sim=MIGRATION_ARIMA_SIMULATION;Migration_Age_Distribution=MIG_AGE_DIST # REG_BIRTH_MODEL=MOD_BIRTHS;START_BASIC_DATA=FIRST_PREDICT_YEAR_POPULATION_DATA;START_DETAILED_DATA=START_DEM_DATA;Mortality_Rate_Sim=MORTALITY_SIMULATION;SIM_NUMBER=1;START_OF_SIM_YEAR=2023;Migration_ARIMA_Sim=MIGRATION_ARIMA_SIMULATION;Migration_Age_Distribution=MIG_AGE_DIST
#FIRST_PREDICT_YEAR_POPULATION_DATA
#START_BASIC_DATA <- C_RES[[3]]
#START_DETAILED_DATA <- C_RES[[2]]
RUN_SINGLE_SIM <- function(REG_BIRTH_MODEL,START_BASIC_DATA,START_DETAILED_DATA,Mortality_Rate_Sim,SIM_NUMBER,START_OF_SIM_YEAR=2023,Migration_ARIMA_Sim,Migration_Age_Distribution ){ RUN_SINGLE_SIM <- function(REG_BIRTH_MODEL,START_BASIC_DATA,START_DETAILED_DATA,Mortality_Rate_Sim,SIM_NUMBER,START_OF_SIM_YEAR=2023,Migration_ARIMA_Sim,Migration_Age_Distribution ){
#REG_BIRTH_MODEL: Feols regression object of population model. #REG_BIRTH_MODEL: Feols regression object of population model.
@ -28,9 +31,11 @@ RUN_SINGLE_SIM <- function(REG_BIRTH_MODEL,START_BASIC_DATA,START_DETAILED_DATA,
YEARS_AHEAD <- max(START_BASIC_DATA[,'Year']-START_OF_SIM_YEAR,1) #Define the number of years forward from the simulation start based on the current year of analysis, and the user provided first year. YEARS_AHEAD <- max(START_BASIC_DATA[,'Year']-START_OF_SIM_YEAR,1) #Define the number of years forward from the simulation start based on the current year of analysis, and the user provided first year.
TOTAL_MIGRATION <- Migration_ARIMA_Sim[YEARS_AHEAD,SIM_NUMBER] TOTAL_MIGRATION <- Migration_ARIMA_Sim[YEARS_AHEAD,SIM_NUMBER]
START_BASIC_DATA[,"Migration"] <- TOTAL_MIGRATION START_BASIC_DATA[,"Migration"] <- TOTAL_MIGRATION
MIG <- MIGRATION_SIMULATION(Migration_Age_Distribution,START_DETAILED_DATA,TOTAL_MIGRATION)
MORTALITY_INPUT_DETAILED_DATA <- START_DETAILED_DATA MORTALITY_INPUT_DETAILED_DATA <- START_DETAILED_DATA
if(TOTAL_MIGRATION!=0){
MIG <- MIGRATION_SIMULATION(Migration_Age_Distribution,START_DETAILED_DATA,TOTAL_MIGRATION)
MORTALITY_INPUT_DETAILED_DATA[-1,c("Num_Male","Num_Female")] <- MORTALITY_INPUT_DETAILED_DATA[-1,c("Num_Male","Num_Female")]+MIG[,c("Num_Male","Num_Female")] MORTALITY_INPUT_DETAILED_DATA[-1,c("Num_Male","Num_Female")] <- MORTALITY_INPUT_DETAILED_DATA[-1,c("Num_Male","Num_Female")]+MIG[,c("Num_Male","Num_Female")]
}
MORTALITY_C_RES <- MORTALITY_SIM(YEARS_AHEAD,SIM_NUMBER,MORTALITY_INPUT_DETAILED_DATA,FALSE,Mortality_Rate_Sim ) MORTALITY_C_RES <- MORTALITY_SIM(YEARS_AHEAD,SIM_NUMBER,MORTALITY_INPUT_DETAILED_DATA,FALSE,Mortality_Rate_Sim )
#Update number of deaths in the current run (which should be blank when supplied to the function) #Update number of deaths in the current run (which should be blank when supplied to the function)
START_BASIC_DATA[,"Deaths"] <- sum(MORTALITY_C_RES[,c("Male_Deaths","Female_Deaths")] ) START_BASIC_DATA[,"Deaths"] <- sum(MORTALITY_C_RES[,c("Male_Deaths","Female_Deaths")] )
@ -45,5 +50,5 @@ RUN_SINGLE_SIM <- function(REG_BIRTH_MODEL,START_BASIC_DATA,START_DETAILED_DATA,
NEXT_BASIC_DATA[,"Female_Birth_Group"] <- sum(NEXT_DETAILED_DATA[NEXT_DETAILED_DATA$Age>=18 & NEXT_DETAILED_DATA$Age<=28,"Num_Female"]) NEXT_BASIC_DATA[,"Female_Birth_Group"] <- sum(NEXT_DETAILED_DATA[NEXT_DETAILED_DATA$Age>=18 & NEXT_DETAILED_DATA$Age<=28,"Num_Female"])
NEXT_BASIC_DATA[,"Min_Birth_Group"] <- min(NEXT_BASIC_DATA[,c("Female_Birth_Group","Male_Birth_Group")]) NEXT_BASIC_DATA[,"Min_Birth_Group"] <- min(NEXT_BASIC_DATA[,c("Female_Birth_Group","Male_Birth_Group")])
NEXT_BASIC_DATA <- NEXT_BASIC_DATA[,-10:-11] NEXT_BASIC_DATA <- NEXT_BASIC_DATA[,-10:-11]
return(list(START_BASIC_DATA,NEXT_DETAILED_DATA,NEXT_BASIC_DATA)) return(list(START_BASIC_DATA,NEXT_DETAILED_DATA %>% ungroup,NEXT_BASIC_DATA))
} }