Running Lincoln sim

This commit is contained in:
Alex Gebben Work 2025-10-30 15:59:42 -06:00
parent 747f752f61
commit fa11049040
4 changed files with 42 additions and 17 deletions

3
.gitignore vendored
View File

@ -1,4 +1,7 @@
# ---> R
#
*.png
Results/
#Don't save any major data files on the server, can be regenerated after pulling
*.Rds
# ##Very large simulation should not be saved

View File

@ -53,22 +53,15 @@ SINGLE_PATH_SIM <- function(j){
#Run the loop
for(x in 1:NUM_RUNS) {
BATCH_GUID <- UUIDgenerate()
FULL_RESULTS <- mclapply(1:BATCH_SIZE,SINGLE_PATH_SIM,mc.cores = detectCores()-1)
FULL_RESULTS <- do.call(rbind,lapply(1:BATCH_SIZE,function(x){FULL_RESULTS[[x]] %>% mutate(SIM_ID=UUIDgenerate())}))
FULL_RESULTS$BATCH_ID <- BATCH_GUID
FULL_RESULTS <- FULL_RESULTS%>% select(BATCH_ID,SIM_ID,everything())
if(x==1){write_csv(FULL_RESULTS,RAW_SIM_FILE)}else {write_csv(FULL_RESULTS,RAW_SIM_FILE,append=TRUE)}
rm(FULL_RESULTS)
gc()
try(FULL_RESULTS <- mclapply(1:BATCH_SIZE,function(x){try(SINGLE_PATH_SIM(x))},mc.cores = detectCores()-1))
if(exists("FULL_RESULTS")){
FULL_RESULTS <- do.call(rbind,lapply(1:BATCH_SIZE,function(x){FULL_RESULTS[[x]] %>% mutate(SIM_ID=UUIDgenerate())}))
FULL_RESULTS$BATCH_ID <- BATCH_GUID
FULL_RESULTS <- FULL_RESULTS%>% select(BATCH_ID,SIM_ID,everything())
if(x==1){write_csv(FULL_RESULTS,RAW_SIM_FILE)}else {write_csv(FULL_RESULTS,RAW_SIM_FILE,append=TRUE)}
rm(FULL_RESULTS)
gc()
}
}
###Process the simulations and save the main percentile results by year
FULL_RESULTS <- read_csv(RAW_SIM_FILE)
GRAPH_DATA <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Population),c(0.05,0.1,0.25,0.5,0.75,0.9,0.95))})) %>% as_tibble
YEARS <- 2023:(2023+NUM_YEARS_PROJECTED)
GRAPH_DATA$Year <- YEARS
GRAPH_DATA <- GRAPH_DATA %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Population")
write_csv(GRAPH_DATA,PERCENTILE_DATA)
ggplot(aes(x=Year,y=Population,group=Percentile,color=Percentile),data=GRAPH_DATA)+geom_line()

28
2_Result_Analysis.r Normal file
View File

@ -0,0 +1,28 @@
library(tidyverse)
NUM_YEARS_PROJECTED <- 50 #How many years into the future should each Monte Carlo run project to. For example 25 years if starting from 2025 and ending in 2050.
YEARS <- 2023:(2023+NUM_YEARS_PROJECTED)
#Setup save results
RES_DIR <- "./Results"
RAW_SIM_FILE <- paste0(RES_DIR,"/Raw_Simulations.csv")
PERCENTILE_DATA <- paste0(RES_DIR,"/Percentile_Clean_Results.csv")
###Process the simulations and save the main percentile results by year
RES <- read_csv(RAW_SIM_FILE)
GRAPH_DATA <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Population),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble
YEARS <- 2023:(2023+NUM_YEARS_PROJECTED)
GRAPH_DATA$Year <- YEARS
FAN_DATA <- GRAPH_DATA
GRAPH_DATA <- GRAPH_DATA %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Population")
write_csv(GRAPH_DATA,PERCENTILE_DATA)
#Add historic
MEDIAN_PRED <- GRAPH_DATA %>% filter(Percentile=='50%')
GRAPH_DATA <- GRAPH_DATA %>% filter(Percentile!='50%')
HIST <- readRDS("Data/Cleaned_Data/Wyoming_County_Population.Rds") %>% filter(County=='Lincoln') %>% mutate(Percentile="Actual Population") %>% filter(Year>1930)
ALPHA=0.2
COLOR <- 'black'
GRAPH_DATA$Percentile <- factor(GRAPH_DATA$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%')))
GRAPH <- ggplot(data=GRAPH_DATA)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Population,group=Percentile,color=Percentile))+geom_line(data=HIST,aes(x=Year,y=Population),color='black',size=0.75)+geom_line(data=MEDIAN_PRED,aes(x=Year,y=Population),color='black',linetype=4,size=0.75)+ scale_x_continuous(breaks = c(seq(1940, 2070, by = 10)))+ scale_y_continuous(breaks = seq(0, 35000, by = 5000))+theme_bw()+ggtitle("Lincoln County, Wyoming Population Forecast")
GRAPH
ggsave("Lincoln_Forecast.png",GRAPH)

View File

@ -60,7 +60,8 @@ GRAPH_DATA <- RES %>% filter(abs(MIGRATION_COEF)<Inf,Age<100) %>% filter(Age!=2
##Graph when using log scales and grouping by child/adult. Looks pretty linear
#ggplot(GRAPH_DATA,aes(x=Age,y=MIGRATION_COEF,group=Group,color=Group)) +geom_point()+geom_smooth(method="lm")+ scale_y_continuous(trans = scales::log_trans())
#Graph when not using log scale but including a geom_smooth to show the actual trend.
#ggplot(GRAPH_DATA,aes(x=Age,y=MIGRATION_COEF,group=Group,color=Group)) +geom_point()+geom_smooth(span=0.9)
#GRAPH <-ggplot(GRAPH_DATA,aes(x=Age,y=MIGRATION_COEF,group=Group,color=Group)) +geom_point()+geom_smooth(span=0.9)+ylab("Migration Coefficient (Pop. Change Per Added Immigrant)")
#ggsave("Migration_Age_Distribution.png",GRAPH)
####Create results which find a functional form for the probability that a migrant is in a certain age bracket, so that the probability of any age can be drawn from in the Monte Carlo for net migration numbers. Note that a function is used, because point estimates will have large variably, but the overall trend looks VERY clean.
CHILD_MOD <- lm(log(MIGRATION_COEF)~Age,data=GRAPH_DATA %>% filter(Group=='Child')) #The childhood range (1-18), has a great exponential fit with age, but has a different trend than adults. Because there are fewer data points we prefer a exponential fit, compared to a smoothed fit as the variance changes the end points, yet in both cases a exponential fit looks good.