From fa11049040c4a6c4e1668f3bc00988adcbe70024 Mon Sep 17 00:00:00 2001 From: Alex Gebben Work Date: Thu, 30 Oct 2025 15:59:42 -0600 Subject: [PATCH] Running Lincoln sim --- .gitignore | 3 +++ 1_Run_Full_Simulation.r | 25 +++++++++---------------- 2_Result_Analysis.r | 28 ++++++++++++++++++++++++++++ Migration_Regression.r | 3 ++- 4 files changed, 42 insertions(+), 17 deletions(-) create mode 100644 2_Result_Analysis.r diff --git a/.gitignore b/.gitignore index d66191e..5be186d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,7 @@ # ---> R +# +*.png +Results/ #Don't save any major data files on the server, can be regenerated after pulling *.Rds # ##Very large simulation should not be saved diff --git a/1_Run_Full_Simulation.r b/1_Run_Full_Simulation.r index 5e593a5..f871d47 100644 --- a/1_Run_Full_Simulation.r +++ b/1_Run_Full_Simulation.r @@ -53,22 +53,15 @@ SINGLE_PATH_SIM <- function(j){ #Run the loop for(x in 1:NUM_RUNS) { BATCH_GUID <- UUIDgenerate() - FULL_RESULTS <- mclapply(1:BATCH_SIZE,SINGLE_PATH_SIM,mc.cores = detectCores()-1) - FULL_RESULTS <- do.call(rbind,lapply(1:BATCH_SIZE,function(x){FULL_RESULTS[[x]] %>% mutate(SIM_ID=UUIDgenerate())})) - FULL_RESULTS$BATCH_ID <- BATCH_GUID - FULL_RESULTS <- FULL_RESULTS%>% select(BATCH_ID,SIM_ID,everything()) - if(x==1){write_csv(FULL_RESULTS,RAW_SIM_FILE)}else {write_csv(FULL_RESULTS,RAW_SIM_FILE,append=TRUE)} - rm(FULL_RESULTS) - gc() + try(FULL_RESULTS <- mclapply(1:BATCH_SIZE,function(x){try(SINGLE_PATH_SIM(x))},mc.cores = detectCores()-1)) + if(exists("FULL_RESULTS")){ + FULL_RESULTS <- do.call(rbind,lapply(1:BATCH_SIZE,function(x){FULL_RESULTS[[x]] %>% mutate(SIM_ID=UUIDgenerate())})) + FULL_RESULTS$BATCH_ID <- BATCH_GUID + FULL_RESULTS <- FULL_RESULTS%>% select(BATCH_ID,SIM_ID,everything()) + if(x==1){write_csv(FULL_RESULTS,RAW_SIM_FILE)}else {write_csv(FULL_RESULTS,RAW_SIM_FILE,append=TRUE)} + rm(FULL_RESULTS) + gc() +} } -###Process the simulations and save the main percentile results by year - FULL_RESULTS <- read_csv(RAW_SIM_FILE) - GRAPH_DATA <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Population),c(0.05,0.1,0.25,0.5,0.75,0.9,0.95))})) %>% as_tibble - YEARS <- 2023:(2023+NUM_YEARS_PROJECTED) - GRAPH_DATA$Year <- YEARS - GRAPH_DATA <- GRAPH_DATA %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Population") - write_csv(GRAPH_DATA,PERCENTILE_DATA) - ggplot(aes(x=Year,y=Population,group=Percentile,color=Percentile),data=GRAPH_DATA)+geom_line() - diff --git a/2_Result_Analysis.r b/2_Result_Analysis.r new file mode 100644 index 0000000..56397a5 --- /dev/null +++ b/2_Result_Analysis.r @@ -0,0 +1,28 @@ +library(tidyverse) + NUM_YEARS_PROJECTED <- 50 #How many years into the future should each Monte Carlo run project to. For example 25 years if starting from 2025 and ending in 2050. +YEARS <- 2023:(2023+NUM_YEARS_PROJECTED) +#Setup save results + RES_DIR <- "./Results" + RAW_SIM_FILE <- paste0(RES_DIR,"/Raw_Simulations.csv") + PERCENTILE_DATA <- paste0(RES_DIR,"/Percentile_Clean_Results.csv") + +###Process the simulations and save the main percentile results by year + RES <- read_csv(RAW_SIM_FILE) + GRAPH_DATA <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Population),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble + YEARS <- 2023:(2023+NUM_YEARS_PROJECTED) + GRAPH_DATA$Year <- YEARS + FAN_DATA <- GRAPH_DATA + GRAPH_DATA <- GRAPH_DATA %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Population") + write_csv(GRAPH_DATA,PERCENTILE_DATA) + #Add historic + MEDIAN_PRED <- GRAPH_DATA %>% filter(Percentile=='50%') + GRAPH_DATA <- GRAPH_DATA %>% filter(Percentile!='50%') + + HIST <- readRDS("Data/Cleaned_Data/Wyoming_County_Population.Rds") %>% filter(County=='Lincoln') %>% mutate(Percentile="Actual Population") %>% filter(Year>1930) + ALPHA=0.2 + COLOR <- 'black' +GRAPH_DATA$Percentile <- factor(GRAPH_DATA$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%'))) +GRAPH <- ggplot(data=GRAPH_DATA)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Population,group=Percentile,color=Percentile))+geom_line(data=HIST,aes(x=Year,y=Population),color='black',size=0.75)+geom_line(data=MEDIAN_PRED,aes(x=Year,y=Population),color='black',linetype=4,size=0.75)+ scale_x_continuous(breaks = c(seq(1940, 2070, by = 10)))+ scale_y_continuous(breaks = seq(0, 35000, by = 5000))+theme_bw()+ggtitle("Lincoln County, Wyoming Population Forecast") +GRAPH +ggsave("Lincoln_Forecast.png",GRAPH) + diff --git a/Migration_Regression.r b/Migration_Regression.r index 8f1ee39..58f5ea8 100644 --- a/Migration_Regression.r +++ b/Migration_Regression.r @@ -60,7 +60,8 @@ GRAPH_DATA <- RES %>% filter(abs(MIGRATION_COEF)% filter(Age!=2 ##Graph when using log scales and grouping by child/adult. Looks pretty linear #ggplot(GRAPH_DATA,aes(x=Age,y=MIGRATION_COEF,group=Group,color=Group)) +geom_point()+geom_smooth(method="lm")+ scale_y_continuous(trans = scales::log_trans()) #Graph when not using log scale but including a geom_smooth to show the actual trend. - #ggplot(GRAPH_DATA,aes(x=Age,y=MIGRATION_COEF,group=Group,color=Group)) +geom_point()+geom_smooth(span=0.9) +#GRAPH <-ggplot(GRAPH_DATA,aes(x=Age,y=MIGRATION_COEF,group=Group,color=Group)) +geom_point()+geom_smooth(span=0.9)+ylab("Migration Coefficient (Pop. Change Per Added Immigrant)") +#ggsave("Migration_Age_Distribution.png",GRAPH) ####Create results which find a functional form for the probability that a migrant is in a certain age bracket, so that the probability of any age can be drawn from in the Monte Carlo for net migration numbers. Note that a function is used, because point estimates will have large variably, but the overall trend looks VERY clean. CHILD_MOD <- lm(log(MIGRATION_COEF)~Age,data=GRAPH_DATA %>% filter(Group=='Child')) #The childhood range (1-18), has a great exponential fit with age, but has a different trend than adults. Because there are fewer data points we prefer a exponential fit, compared to a smoothed fit as the variance changes the end points, yet in both cases a exponential fit looks good.