Running Lincoln sim

2025-10-30 15:59:42 -06:00 · 2025-10-30 15:59:42 -06:00 · fa11049040
commit fa11049040
parent 747f752f61
4 changed files with 42 additions and 17 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,7 @@
 # ---> R
+# 
+*.png
+Results/
 #Don't save any major data files on the server, can be regenerated after pulling
 *.Rds
 # ##Very large simulation should not be saved
--- a/1_Run_Full_Simulation.r
+++ b/1_Run_Full_Simulation.r
@ -53,22 +53,15 @@ SINGLE_PATH_SIM <- function(j){
 	#Run the loop
 for(x in 1:NUM_RUNS) {
 	BATCH_GUID <- UUIDgenerate()
-	FULL_RESULTS <- mclapply(1:BATCH_SIZE,SINGLE_PATH_SIM,mc.cores = detectCores()-1)
-	FULL_RESULTS <- do.call(rbind,lapply(1:BATCH_SIZE,function(x){FULL_RESULTS[[x]] %>% mutate(SIM_ID=UUIDgenerate())}))
-	FULL_RESULTS$BATCH_ID <- BATCH_GUID
-	FULL_RESULTS <- FULL_RESULTS%>% select(BATCH_ID,SIM_ID,everything())
-	if(x==1){write_csv(FULL_RESULTS,RAW_SIM_FILE)}else {write_csv(FULL_RESULTS,RAW_SIM_FILE,append=TRUE)}
-	rm(FULL_RESULTS)
-	gc()
+	try(FULL_RESULTS <- mclapply(1:BATCH_SIZE,function(x){try(SINGLE_PATH_SIM(x))},mc.cores = detectCores()-1))
+	if(exists("FULL_RESULTS")){
+		FULL_RESULTS <- do.call(rbind,lapply(1:BATCH_SIZE,function(x){FULL_RESULTS[[x]] %>% mutate(SIM_ID=UUIDgenerate())}))
+		FULL_RESULTS$BATCH_ID <- BATCH_GUID
+		FULL_RESULTS <- FULL_RESULTS%>% select(BATCH_ID,SIM_ID,everything())
+		if(x==1){write_csv(FULL_RESULTS,RAW_SIM_FILE)}else {write_csv(FULL_RESULTS,RAW_SIM_FILE,append=TRUE)}
+		rm(FULL_RESULTS)
+		gc()
+}
 }

-###Process the simulations and save the main percentile results by year
-	FULL_RESULTS <- read_csv(RAW_SIM_FILE)
-	GRAPH_DATA <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Population),c(0.05,0.1,0.25,0.5,0.75,0.9,0.95))}))  %>% as_tibble
-	YEARS <- 2023:(2023+NUM_YEARS_PROJECTED)
-	GRAPH_DATA$Year <- YEARS 
-	GRAPH_DATA <- GRAPH_DATA %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Population")
-	write_csv(GRAPH_DATA,PERCENTILE_DATA)
-	ggplot(aes(x=Year,y=Population,group=Percentile,color=Percentile),data=GRAPH_DATA)+geom_line()
-

--- a/2_Result_Analysis.r
+++ b/2_Result_Analysis.r
@ -0,0 +1,28 @@
+library(tidyverse)
+	NUM_YEARS_PROJECTED <-  50 #How many years into the future should each Monte Carlo run project to. For example 25 years if starting from 2025 and ending in 2050.
+YEARS <- 2023:(2023+NUM_YEARS_PROJECTED)
+#Setup save results
+	RES_DIR <- "./Results"
+	RAW_SIM_FILE <- paste0(RES_DIR,"/Raw_Simulations.csv")
+	PERCENTILE_DATA <- paste0(RES_DIR,"/Percentile_Clean_Results.csv")
+
+###Process the simulations and save the main percentile results by year
+	RES <- read_csv(RAW_SIM_FILE)
+	GRAPH_DATA <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Population),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))}))  %>% as_tibble
+	YEARS <- 2023:(2023+NUM_YEARS_PROJECTED)
+	GRAPH_DATA$Year <- YEARS 
+	FAN_DATA <- GRAPH_DATA
+	GRAPH_DATA <- GRAPH_DATA %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Population")
+	write_csv(GRAPH_DATA,PERCENTILE_DATA)
+	#Add historic
+	MEDIAN_PRED <- 	GRAPH_DATA %>% filter(Percentile=='50%')
+	GRAPH_DATA <- 	GRAPH_DATA %>% filter(Percentile!='50%')
+
+	HIST <-	readRDS("Data/Cleaned_Data/Wyoming_County_Population.Rds") %>% filter(County=='Lincoln') %>% mutate(Percentile="Actual Population") %>% filter(Year>1930)
+	ALPHA=0.2
+	COLOR <- 'black'
+GRAPH_DATA$Percentile <-	factor(GRAPH_DATA$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%')))
+GRAPH <-	ggplot(data=GRAPH_DATA)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Population,group=Percentile,color=Percentile))+geom_line(data=HIST,aes(x=Year,y=Population),color='black',size=0.75)+geom_line(data=MEDIAN_PRED,aes(x=Year,y=Population),color='black',linetype=4,size=0.75)+ scale_x_continuous(breaks = c(seq(1940, 2070, by = 10)))+ scale_y_continuous(breaks = seq(0, 35000, by = 5000))+theme_bw()+ggtitle("Lincoln County, Wyoming Population Forecast")
+GRAPH 
+ggsave("Lincoln_Forecast.png",GRAPH)
+
--- a/Migration_Regression.r
+++ b/Migration_Regression.r
@ -60,7 +60,8 @@ GRAPH_DATA  <- RES %>% filter(abs(MIGRATION_COEF)<Inf,Age<100) %>% filter(Age!=2
 ##Graph when using log scales and grouping by child/adult. Looks pretty linear
 	#ggplot(GRAPH_DATA,aes(x=Age,y=MIGRATION_COEF,group=Group,color=Group)) +geom_point()+geom_smooth(method="lm")+ scale_y_continuous(trans = scales::log_trans())
 #Graph when not using log scale but including a geom_smooth to show the actual trend.
-	#ggplot(GRAPH_DATA,aes(x=Age,y=MIGRATION_COEF,group=Group,color=Group)) +geom_point()+geom_smooth(span=0.9)
+#GRAPH <-ggplot(GRAPH_DATA,aes(x=Age,y=MIGRATION_COEF,group=Group,color=Group)) +geom_point()+geom_smooth(span=0.9)+ylab("Migration Coefficient (Pop. Change Per Added Immigrant)")
+#ggsave("Migration_Age_Distribution.png",GRAPH)

 ####Create results which find a functional form for the probability that a migrant is in a certain age bracket, so that the probability of any age can be drawn from in the Monte Carlo for net migration numbers. Note that a function is used, because point estimates will have large variably, but the overall trend looks VERY clean.
 	CHILD_MOD <- lm(log(MIGRATION_COEF)~Age,data=GRAPH_DATA %>% filter(Group=='Child')) #The childhood range (1-18), has a great exponential fit with age, but has a different trend than adults. Because there are fewer data points we prefer a exponential fit, compared to a smoothed fit as the variance changes the end points, yet in both cases a exponential fit looks good.