Working on Kemmerer Conversion form Lincoln data

2025-11-03 17:04:15 -07:00 · 2025-11-03 17:04:15 -07:00 · e60fa6fd53
commit e60fa6fd53
parent 628a601d6b
5 changed files with 57 additions and 52 deletions
--- a/1_Run_Full_Simulation.r
+++ b/1_Run_Full_Simulation.r
@ -9,6 +9,8 @@ source("Scripts/Monte_Carlo_Functions.r")
 source("Scripts/Migration_Simulation_Functions.r")
 #####User Configuration Values
 KEMMER_SIM <- TRUE #Wether the simulation should predict Kemmerer (and Diamondville) or Lincoln County as a whole. TRUE, is Kemmerer False is Lincoln
 START_SIM_YEAR <- 2025 #The first year to simulate
 	NUM_SIMULATIONS <- 10^5 #Number of Monte Carlo Simulations to run
 	RERUN_MORTALITY_SIMULATION <- TRUE #Rerun the Monte Carlo simulation of future mortality rates (not actual deaths) even if a Rds file of a mortality rates exists. This can be used to speed up runs when FALSE
 	RERUN_MIGRATION_SIMULATION <- TRUE #Rerun the ARIMA simulations that predict total migration in any year even if a Rds file of a mortality rates exists. This can be used to speed up runs when FALSE
@ -17,7 +19,7 @@ source("Scripts/Migration_Simulation_Functions.r")
 	BIRTH_RATE_REG_RESULTS <- "Data/Regression_Results/Birth_Rate_Model.Rds" #Location of the regression used to model variance in birth rates in each Monte Carlo simulation.
 	START_DEMOGRAPHIC_DATA <- "Data/Cleaned_Data/Start_Year_Demographic_Data_With_Fertility_Groups.Rds" #Location of the data for the first year needing a forecasted birth rate, which aggregates the yearly splits of populations, into a single, year-county data set with variables for key birth prediction (total number of men and women in ages with high fertility rates), and then combines with the data set including births, deaths, migration and population.
 	AGE_OF_MIGRANT_PROBABILITY <- "Data/Other_Intermediate_Outputs/Migreation_Age_Probablity_One_to_Ninety.csv" #Location of the data which is the result of regression analysis of the age of migrants. That is to say 18 year olds may migrate more than 70 year olds, and this is the distribution by age. Sex was not found to be a major factor
-####Run any scirpts required before main Monte Carlo 
+####Run any scripts required before main Monte Carlo 
 	source("Survival_Simulation.r") #Populate a table with a simulation of future mortality rates, for quick recall during the simulation.
 	#A script contains the code needed to create a feols (fixest) regression of the birth rate given age distribution. Load this saved result or else create it to use in each simulation for gathering variance of births in any given age distribution path of the Monte Carlo.
 	if(file.exists(BIRTH_RATE_REG_RESULTS)){MOD_BIRTHS <- readRDS(BIRTH_RATE_REG_RESULTS);FIRST_PREDICT_YEAR_POPULATION_DATA <- readRDS(START_DEMOGRAPHIC_DATA)} else{source("Birth_Rate_Regression.r")}
@ -25,18 +27,25 @@ source("Scripts/Migration_Simulation_Functions.r")
 	#Rerun the migration simulation if requested
 if(RERUN_MIGRATION_SIMULATION ){source("Migration_Regression.r")}
 #######################################################Main Monte Carlo
 START_DEM_DATA <- readRDS("Data/Cleaned_Data/Lincoln_Demographic_Data.Rds") %>% group_by(County) %>% filter(Year==2023) %>% ungroup %>% select(-County)
 MORTALITY_SIMULATION <- readRDS("./Data/Simulated_Data_Sets/MORTALITY_MONTE_CARLO.Rds") #Load the Mortality simulation to speed up simulation
 MIGRATION_ARIMA_SIMULATION <- readRDS("./Data/Simulated_Data_Sets/Migration_ARIMA.Rds") #Load the Migration simulation to speed up simulation
 if(KEMMER_SIM){
 	LN_POP_ST <- FIRST_PREDICT_YEAR_POPULATION_DATA$Population #Population of Lincoln County
 	START_DEM_DATA <- readRDS("Data/Cleaned_Data/Kemmerer_Demographic_Data.Rds") %>% select(-County) %>% mutate(County='Lincoln')
 	FIRST_PREDICT_YEAR_POPULATION_DATA <-  readRDS("Data/Cleaned_Data/Kemmerer_Summary_Start_Data.Rds")
 	KEM_POP_ST <- FIRST_PREDICT_YEAR_POPULATION_DATA$Population
 	POP_ST_RATIO <- LN_POP_ST/KEM_POP_ST
 	MIGRATION_ARIMA_SIMS <- round(MIGRATION_ARIMA_SIMS*POP_ST_RATIO) #Downscale County migreation to the city level based on average popualtion 
 }
 #Second run, work on making into loop and saving the output to file (check if that will slow the results). Maybe use a predefined matrix, so that the results can be stored quickly
 #Second run, work on making into loop and saving the output to file (check if that will slow the results). Maybe use a predifined matrix, so that the results can be stored quirckly
 SINGLE_PATH_SIM <- function(j){
-  C_RES <- RUN_SINGLE_SIM(MOD_BIRTHS,FIRST_PREDICT_YEAR_POPULATION_DATA,START_DEM_DATA,MORTALITY_SIMULATION,SIM_NUMBER=j,START_OF_SIM=2023,MIGRATION_ARIMA_SIMULATION,MIG_AGE_DIST)
+  C_RES <- RUN_SINGLE_SIM(MOD_BIRTHS,FIRST_PREDICT_YEAR_POPULATION_DATA,START_DEM_DATA,MORTALITY_SIMULATION,SIM_NUMBER=j,START_OF_SIM=START_SIM_YEAR,MIGRATION_ARIMA_SIMULATION,MIG_AGE_DIST)
  RES <- C_RES[[1]]
    for(i in 1:NUM_YEARS_PROJECTED){
-      C_RES <- RUN_SINGLE_SIM(MOD_BIRTHS,C_RES[[3]],C_RES[[2]],MORTALITY_SIMULATION,SIM_NUMBER=j,START_OF_SIM=2023,MIGRATION_ARIMA_SIMULATION,MIG_AGE_DIST)
+      C_RES <- RUN_SINGLE_SIM(MOD_BIRTHS,C_RES[[3]],C_RES[[2]],MORTALITY_SIMULATION,SIM_NUMBER=j,START_OF_SIM=START_SIM_YEAR,MIGRATION_ARIMA_SIMULATION,MIG_AGE_DIST)
      RES <- rbind(RES,C_RES[[1]])
    }
  return(RES)
--- a/Birth_Rate_Regression.r
+++ b/Birth_Rate_Regression.r
@ -24,7 +24,7 @@ MOD_BIRTHS <- feols(log(Births)~log(PREV_BIRTH)+log(PREV_TWO_BIRTH)+log(Min_Birt
  RES_DATA$RESID <- resid(MOD_BIRTHS)
  acf(RES_DATA %>% pull(RESID))
  pacf(RES_DATA %>% pull(RESID))
-
+saveRDS(RES_DATA,"Data/Regression_Results/Birth_Regression_Data_Set.Rds")
  saveRDS(MOD_BIRTHS,BIRTH_RATE_REG_RESULTS)
  saveRDS(FIRST_PREDICT_YEAR_POPULATION_DATA,START_DEMOGRAPHIC_DATA) #Save the cleaned data set for later use when starting the simulation.
  #Cleanup data no longer needed, and save some RAM
--- a/Scripts/Birth_Simulation_Functions.r
+++ b/Scripts/Birth_Simulation_Functions.r
@ -2,7 +2,6 @@
 	#Uncomment to test the function step by step
 #REG_MODEL <- MOD_BIRTHS;REG_DATA <- FIRST_PREDICT_YEAR_POPULATION_DATA;NUM_SIMS=1
 BIRTH_SIM <- function(REG_MODEL,REG_DATA,NUM_SIMS=1){
 	predict(REG_MODEL,newdata=REG_DATA)
 	C_PREDICT <- predict(REG_MODEL,REG_DATA,interval = "prediction",level=0.95)
 	PRED_MEAN <- C_PREDICT$fit
 	SE_PRED <- (C_PREDICT$ci_high-C_PREDICT$ci_low)/3.92
--- a/Scripts/Monte_Carlo_Functions.r
+++ b/Scripts/Monte_Carlo_Functions.r
@ -1,5 +1,5 @@
 #Uncomment to check the function line by line
-#	REG_BIRTH_MODEL=MOD_BIRTHS;START_BASIC_DATA=FIRST_PREDICT_YEAR_POPULATION_DATA;START_DETAILED_DATA=START_DEM_DATA;Mortality_Rate_Sim=MORTALITY_SIMULATION;SIM_NUMBER=1;START_OF_SIM_YEAR=2023;Migration_ARIMA_Sim=MIGRATION_ARIMA_SIMULATION;Migration_Age_Distribution=MIG_AGE_DIST
+#	REG_BIRTH_MODEL=MOD_BIRTHS;START_BASIC_DATA=FIRST_PREDICT_YEAR_POPULATION_DATA;START_DETAILED_DATA=START_DEM_DATA;Mortality_Rate_Sim=MORTALITY_SIMULATION;SIM_NUMBER=1;START_OF_SIM_YEAR=2025;Migration_ARIMA_Sim=MIGRATION_ARIMA_SIMULATION;Migration_Age_Distribution=MIG_AGE_DIST
 	#FIRST_PREDICT_YEAR_POPULATION_DATA
 	#START_BASIC_DATA <- C_RES[[3]]
 	#START_DETAILED_DATA <- C_RES[[2]]
@ -25,6 +25,7 @@ RUN_SINGLE_SIM <- function(REG_BIRTH_MODEL,START_BASIC_DATA,START_DETAILED_DATA,
 	START_DETAILED_DATA[,"Age"] <- START_DETAILED_DATA[,"Age"]+1
 	START_DETAILED_DATA[START_DETAILED_DATA$Age==85,c("Num_Male","Num_Female")] <- START_DETAILED_DATA[START_DETAILED_DATA$Age==86,c("Num_Male","Num_Female")]+START_DETAILED_DATA[START_DETAILED_DATA$Age==85,c("Num_Male","Num_Female")] #Sum the 85 and 86 ages into one row for age 85
 	START_DETAILED_DATA <- START_DETAILED_DATA[START_DETAILED_DATA$Age!=86,] #Anyone older than 85 is lumped into one group remove the 86 group
 	####CURRENT ERROR MISSING COUNTY IN ONE COLUMN
 	START_DETAILED_DATA <- rbind(C_BIRTHS,START_DETAILED_DATA) #Add the new simulated births
 	#Run a preliminary Monte Carlo which estimates the future mortality rate, for each simulation and year of of Monte Carlo Simulation
--- a/Zip_Code.r
+++ b/Zip_Code.r
@ -1,6 +1,7 @@
 library(tidyverse)
 library(tidycensus)
 library(zipcodeR)
 library(scales) #For a pretty population Pyramid
 source("Scripts/Downshift_Population_Functions.r")
 #Packages to instal on computer if zipcodeR won't install
 #Sudo apt install libssl-dev libudunits2-dev libabsl-dev libcurl4-openssl-dev libgdal-dev cmake libfontconfig1-dev libharfbuzz-dev libfribidi-dev
@ -47,54 +48,49 @@ AGE_DATA <- AGE_DATA %>%  mutate(Per_Pop=Population/sum(Population)) %>% group_b
 AGE_DATA <- AGE_DATA %>% mutate(Region=ifelse(IN_KEM==1,'Kemmerer','Lincoln')) 
 #AGE_DATA <- AGE_DATA %>% group_by(IN_KEM) %>% mutate(MORE_KEMMER=ifelse(sum(IN_KEM*Per_Pop_Region)>sum((1-IN_KEM)*Per_Pop_Region),1,0)) %>% ungroup 
-PLOT <- ggplot(AGE_DATA, aes(x =Ages, y = Per_Pop_Region)) +  geom_line() +  geom_point(aes(color = Region ), size = 5) +  scale_color_brewer(palette = "Set1", direction = 1) +  theme(legend.position = "bottom")+facet_wrap(~Sex,ncol=1)
+#PLOT <- ggplot(AGE_DATA, aes(x =Ages, y = Per_Pop_Region)) +  geom_line() +  geom_point(aes(color = Region ), size = 5) +  scale_color_brewer(palette = "Set1", direction = 1) +  theme(legend.position = "bottom")+facet_wrap(~Sex,ncol=1)
 #PLOT
-LIN_TO_KEMMER_CONVERSION_RATIOS <- INTERPOLATE_COUNTY_AGE_DEMOGRAPHIC_DATA_TO_CITY_LEVEL(AGE_DATA,YEARS_FORWARD=5)
+LIN_TO_KEMMER_CONVERSION_RATIOS <- INTERPOLATE_COUNTY_AGE_DEMOGRAPHIC_DATA_TO_CITY_LEVEL(AGE_DATA,YEARS_FORWARD=1) %>% pivot_wider(names_from=Sex,values_from=Conversion_Ratio,names_prefix="Convert_")
-LIN_TO_KEMMER_CONVERSION_RATIOS 
+
 KEM_DEMO_DATA <- readRDS("Data/Cleaned_Data/Lincoln_Demographic_Data.Rds")  %>% filter(Year==max(Year)) %>% left_join(LIN_TO_KEMMER_CONVERSION_RATIOS ) %>% mutate(Num_Male=Num_Male*Convert_Male,Num_Female=Num_Female*Convert_Female) %>% mutate(County='Kemmerer') %>% select(-Convert_Male,-Convert_Female)
 NUM_KEM <- sum(KEM_DEMO_DATA[,4:5] )
 CURRENT_KEM_POP <- readRDS("Data/Cleaned_Data/Wyoming_City_Population.Rds") %>% filter(City=='Kemmerer' | City=='Diamondville') %>% group_by(City) %>% filter(Year==max(Year)) %>% ungroup %>% group_by(Year) %>% summarize(Population=sum(as.numeric(Population)),County='Kemmerer')
 SCALE_KEM_FACTOR <- CURRENT_KEM_POP$Population/NUM_KEM
 KEM_NEW_DEMO_DATA <- KEM_DEMO_DATA
 KEM_NEW_DEMO_DATA[,c("Num_Male","Num_Female")] <- round(SCALE_KEM_FACTOR*KEM_NEW_DEMO_DATA[,c("Num_Male","Num_Female")])
 ##Create a simplfied summary of the Kemmerer/Diamondville population estimates, to start the Monte Carlo
 	ST_BIRTH_KEM <- sum((KEM_NEW_DEMO_DATA %>% filter(Age==0))[4:5])
 	ST_BIRTH_TWO_PREV_KEM <- sum((KEM_NEW_DEMO_DATA %>% filter(Age==1))[4:5])
 KEM_DEMO_DATA %>% filter(Age==0)
 	ST_POP <- sum((KEM_NEW_DEMO_DATA)[4:5])
 	INTIATE_KEMMER <- KEM_NEW_DEMO_DATA%>% mutate(Male_Window=Age>=18 & Age<=30,Female_Window=Age>=18 & Age<=28) %>% group_by(County,Year) %>% summarize(Female_Birth_Group=sum(Num_Female*Female_Window),Male_Birth_Group=sum(Num_Male*Male_Window),Min_Birth_Group=ifelse(Female_Birth_Group<Male_Birth_Group,Female_Birth_Group,Male_Birth_Group),PREV_BIRTH=ST_BIRTH_KEM,PREV_TWO_BIRTH=ST_BIRTH_TWO_PREV_KEM,Population=ST_POP) %>% mutate(Births=NA,Deaths=NA,Migration=NA) %>% select(Year,County,Population,Births,Deaths,Migration,Min_Birth_Group,PREV_BIRTH,PREV_TWO_BIRTH) %>% ungroup
 INTIATE_KEMMER$County <- (readRDS("Data/Regression_Results/Birth_Regression_Data_Set.Rds") %>% filter(County=='Lincoln',Year==2020))$County #Making the factor the same for the regression
 	saveRDS(INTIATE_KEMMER ,"Data/Cleaned_Data/Kemmerer_Summary_Start_Data.Rds")
 	write_csv(INTIATE_KEMMER ,"Data/Cleaned_Data/Kemmerer_Summary_Start_Data.csv")
 	saveRDS(KEM_NEW_DEMO_DATA,"Data/Cleaned_Data/Kemmerer_Demographic_Data.Rds")
 	write_csv(KEM_NEW_DEMO_DATA,"Data/Cleaned_Data/Kemmerer_Demographic_Data.csv")
 #sum(KEM_NEW_DEMO_DATA[,4:5])-CURRENT_KEM_POP$Population
 ###Make a population Pyramid Graph
 PY_KEM_DATA <- KEM_NEW_DEMO_DATA %>% pivot_longer(cols=c(Num_Male,Num_Female),names_to="Sex",values_to="Population") %>% mutate(Sex=ifelse(Sex=='Num_Male','Male','Female'),Population=ifelse(Sex=='Male',-Population,Population))
 LN_CURRENT_DEMO <- readRDS("Data/Cleaned_Data/Wyoming_County_Demographic_Data.Rds") %>% filter(County=='Lincoln',Year==max(Year))
 PY_LN_DATA <- LN_CURRENT_DEMO %>% pivot_longer(cols=c(Num_Male,Num_Female),names_to="Sex",values_to="Population") %>% mutate(Sex=ifelse(Sex=='Num_Male','Male','Female'),Population=ifelse(Sex=='Male',-Population,Population))
 PY_LN_DATA <- PY_LN_DATA %>% left_join(PY_KEM_DATA %>% rename(POP_SHIFT=Population) %>% select(-County)) %>% mutate(Population=Population-POP_SHIFT) %>% select(-POP_SHIFT)
 PY_LN_DATA$Population <- PY_LN_DATA$Population/sum(abs(PY_LN_DATA$Population ))
 PY_KEM_PER <- PY_KEM_DATA
 PY_KEM_PER$Population <- PY_KEM_PER$Population/sum(abs(PY_KEM_PER$Population ))
 GRAPH_DATA <- full_join(PY_KEM_PER ,PY_LN_DATA)   %>% mutate(Population=ifelse(County=='Kemmerer',abs(Population),-abs(Population)))
-
+RANGE <- c(-0.015,pretty(range(GRAPH_DATA$Population),n=8))
-AGE_FORWARD <- AGE_DATA %>% filter(!(Min_Age==0 & Max_Age==Inf))%>% mutate(POP_TOTAL=POP_OUT+POP_KEM,KEM_RATIO=POP_KEM/POP_TOTAL) %>% select(Sex,Min_Age,Max_Age,KEM_RATIO)
+LAB <- 	percent(abs(RANGE),accuracy=0.1 )
-STORE <- AGE_FORWARD %>% filter(Min_Age==0,Max_Age==4)
+ POP_PYRAMID <- ggplot(GRAPH_DATA,aes(y=factor(Age),x=Population,fill=County))+geom_col() +scale_x_continuous(breaks  = RANGE,labels = LAB)+facet_grid(~Sex)
-AGE_FORWARD <- STORE %>% full_join(AGE_FORWARD %>% mutate(Min_Age=Min_Age+4,Max_Age=Max_Age+4))
+POP_PYRAMID 
 MALE_FORWARD <- AGE_FORWARD %>% filter(Sex=='Male') %>% arrange(Min_Age) %>% filter(KEM_RATIO!=0) %>% mutate(Med_Age=(Min_Age+Max_Age)/2)
 NUM_IN_GROUP <- MALE_FORWARD$Max_Age- MALE_FORWARD$Min_Age+1
 NUM_IN_GROUP[23] <- 1
 MALE_FORWARD$KEM_RATIO
 ggplot(MALE_FORWARD,aes(x=Med_Age,y=KEM_RATIO)) +geom_point()+geom_smooth(span=0.25)
 loess(KEM_RATIO ~ Med_Age,data=MALE_FORWARD,span=0.3 )
 ?loess
 FEMALE_FORWARD <- AGE_FORWARD %>% filter(Sex=='Female') %>% arrange(Min_Age)
 MALE_FORWARD 
 plot(AGE_FORWARD$KEM_RATIO)
 gg <- ggplot(GRAPH_DATA, aes(x=PER_OUT, xend=PER_KEM, y=MED_AGE, group=Sex)) +
        geom_dumbbell(color="#a3c4dc",
                      size=0.75,
                      point.colour.l="#0e668b") +
        scale_x_continuous(label=percent) +
        labs(x=NULL,
             y=NULL,
             title="Dumbbell Chart",
             subtitle="Pct Change: 2013 vs 2014",
             caption="Source: https://github.com/hrbrmstr/ggalt") +
        theme(plot.title = element_text(hjust=0.5, face="bold"),
              plot.background=element_rect(fill="#f7f7f7"),
              panel.background=element_rect(fill="#f7f7f7"),
              panel.grid.minor=element_blank(),
              panel.grid.major.y=element_blank(),
              panel.grid.major.x=element_line(),
              axis.ticks=element_blank(),
              legend.position="top",
 	       panel.border=element_blank())
 ggplot(GRAPH_DATA %>% filter(Sex=='Male')) +geom_point(aes(x=Med_Age,y=PER_KEM),color='blue') +geom_point(aes(x=Med_Age,y=PER_OUT)) +geom_smooth(aes(x=Med_Age,y=PER_OUT),color='black',span=SPAN)+geom_smooth(aes(x=Med_Age,y=PER_KEM),span=SPAN)
 ggplot(GRAPH_DATA %>% filter(Sex=='Female')) +geom_point(aes(x=Med_Age,y=PER_KEM),color='blue') +geom_point(aes(x=Med_Age,y=PER_OUT)) +geom_smooth(aes(x=Med_Age,y=PER_OUT),color='black',span=SPAN)+geom_smooth(aes(x=Med_Age,y=PER_KEM),span=SPAN)
 AGE_DATA %>% pivot_wider(names_from=c(Sex,Min_Age,Max_Age,Med_Age,IN_KEM),values_from=Population)