Finishing model, finished test 2016 data

This commit is contained in:
Alex Gebben Work 2025-12-03 17:27:42 -07:00
parent fce74d225f
commit d8257b8fc3
9 changed files with 321 additions and 35 deletions

View File

@ -20,12 +20,12 @@ NUM_SIMULATIONS <- 10^4
ST_YEAR <- 2017
################################Load Data
DEMO <- readRDS("Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2016_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds")
sum(readRDS("Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2023_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds"))
BIRTH_MOD <- readRDS("Data/Intermediate_Inputs/Birth_Regressions/Birth_Regression_2016.Rds")
#Must add region as a factor with multiple levels for predict to work. Seems to check for multiple levels although that is not needed econometrics.
BIRTH_DATA <- readRDS("Data/Intermediate_Inputs/Birth_Regressions/Regression_Data/Birth_Simulation_Key_Starting_Points.Rds") %>% mutate(Region=factor(Region)) %>% filter(Region=='Kemmerer & Diamondville',Year==2016)
MIGRATION_ARIMA <- readRDS("Data/Intermediate_Inputs/Migration_ARIMA_Models/Kemmerer_Diamondville_Net_Migration_ARIMA_2016.Rds")
BIRTH_DATA <- readRDS("Data/Intermediate_Inputs/Birth_Regressions/Regression_Data/Birth_Simulation_Key_Starting_Points.Rds") %>% mutate(Region=factor(Region)) %>% filter(KEM==1,Year==2016)
MIGRATION_ARIMA <- readRDS("Data/Intermediate_Inputs/Migration_ARIMA_Models/Kemmerer_Diamondville_Adjusted_to_Lincoln_Model_Net_Migration_ARIMA_2016.Rds")
MIGRATION_ODDS <- readRDS("Data/Intermediate_Inputs/Migration_Trends/Migration_Age_Probability_Zero_to_85.Rds")
##############
#Data for death rate trends
@ -108,7 +108,7 @@ SINGLE_SIM <- function(DEMO,BIRTH_DATA,ST_YEAR,YEARS_AHEAD,MIGRATION_ARIMA_MODEL
NCORES <- detectCores()-1
BATCH_SIZE <- NCORES*10
TOTAL_SIMULATIONS <- 10^5
TOTAL_SIMULATIONS <- 10^6
N_RUNS <-ceiling(TOTAL_SIMULATIONS/BATCH_SIZE )
SIM_RES_FILE <- paste0(RES_SIM_DIR,"Kemmerer_2016_Simulation.csv")
@ -122,7 +122,7 @@ NEW_RES_FILE <- !file.exists(SIM_RES_FILE)
if(exists("RES")){
RES <- as.data.frame(RES)
RES[,-1] <- as.numeric(as.matrix(RES[,-1]))
if(NEW_RES_FILE){write_csv(RES,SIM_RES_FILE)}else {write_csv(RES,SIM_RES_FILE,col_names=FALSE,append=TRUE)}
if(NEW_RES_FILE & i==1){write_csv(RES,SIM_RES_FILE)}else {write_csv(RES,SIM_RES_FILE,col_names=FALSE,append=TRUE)}
rm(RES)
}
}

129
1B_Run_Full_Simulation.r Normal file
View File

@ -0,0 +1,129 @@
#####Packages
library(tidyverse) #Cleaning data
library(fixest) #Estimating a model of birth rates, to provide variance in the birth rate Monte Carlo using a fixed effect model.
library(forecast) #Fore ARIMA migration simulations
library(parallel)
library(uuid) #To add a index to each batch
####If the prelimnary data needs to be reloaded run the supplied bash script to download, process, and generate all needed data sets for the Monte Carlo population Simulation. Otherwise skip this step to save time
RELOAD_DATA <- FALSE
if(RELOAD_DATA){system("bash Prelim_Process.sh")}
#Load custom functions needed for the simulation
source("Scripts/Load_Custom_Functions/Migration_Simulation_Functions.r")
source("Scripts/Load_Custom_Functions/Birth_Simulation_Functions.r")
source("Scripts/Load_Custom_Functions/Increment_Data_Year.r")
source("Scripts/Load_Custom_Functions/Single_Age_Mortality_Trend_Simulation.r")
#######Preliminary Model Inputs
YEARS_AHEAD <- 10
NUM_SIMULATIONS <- 10^4
ST_YEAR <- 2017
################################Load Data
DEMO <- readRDS("Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2016_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds")
BIRTH_MOD <- readRDS("Data/Intermediate_Inputs/Birth_Regressions/Birth_Regression_2016.Rds")
#Must add region as a factor with multiple levels for predict to work. Seems to check for multiple levels although that is not needed econometrics.
BIRTH_DATA <- readRDS("Data/Intermediate_Inputs/Birth_Regressions/Regression_Data/Birth_Simulation_Key_Starting_Points.Rds") %>% mutate(Region=factor(Region)) %>% filter(KEM==1,Year==2016)
MIGRATION_ARIMA <- readRDS("Data/Intermediate_Inputs/Migration_ARIMA_Models/Kemmerer_Diamondville_Adjusted_to_Lincoln_Model_Net_Migration_ARIMA_2016.Rds")
MIGRATION_ODDS <- readRDS("Data/Intermediate_Inputs/Migration_Trends/Migration_Age_Probability_Zero_to_85.Rds")
##############
#Data for death rate trends
SINGLE_AGE_MODS <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Time_Series_Regression_2016.Rds")
BOUNDS <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Bounds_for_Predictions.Rds")
MAX_MALE <- BOUNDS %>% filter(Sex=='Male') %>% pull(MAX_RATE)
MIN_MALE <- BOUNDS %>% filter(Sex=='Male') %>% pull(MIN_RATE)
MAX_FEMALE <- BOUNDS %>% filter(Sex=='Female') %>% pull(MAX_RATE)
MIN_FEMALE <- BOUNDS %>% filter(Sex=='Female') %>% pull(MIN_RATE)
MIN_GAP <- BOUNDS %>% filter(Sex=='Male') %>% pull(MIN_MALE_FEMALE_GAP)
MAX_GAP <- BOUNDS %>% filter(Sex=='Male') %>% pull(MAX_MALE_FEMALE_GAP)
#Adjusted for 2016 by filtering REMOVE LATER
BASELINE_AGE_ADJUST_MEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_Population_in_2000.Rds") %>% filter(Sex=='Male',Year<=2016) %>% pull(Percent_of_Population)
BASELINE_AGE_ADJUST_WOMEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_Population_in_2000.Rds") %>% filter(Sex=='Female',Year<=2016) %>% pull(Percent_of_Population)
#Adjust to just women popualtion (Not all population percent
BASELINE_AGE_ADJUST_WOMEN <- BASELINE_AGE_ADJUST_WOMEN/sum(BASELINE_AGE_ADJUST_WOMEN )
BASELINE_AGE_ADJUST_MEN <- BASELINE_AGE_ADJUST_MEN/ sum(BASELINE_AGE_ADJUST_MEN )
MOD_MEN_ALL <- readRDS("Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/ARIMA_US_Men_Mortality_by_Age_2016.Rds")
MOD_WOMEN_ALL <- readRDS("Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/ARIMA_US_Women_Mortality_by_Age_2016.Rds")
MOD_LIN_MEN <- readRDS("Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/ARIMA_Lincoln_Men_Mortality_by_Age_2016.Rds")
MOD_LIN_WOMEN <- readRDS("Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/ARIMA_Lincoln_Women_Mortality_by_Age_2016.Rds")
XREG <- cbind(rep(0.0001,YEARS_AHEAD+1),rep(0.0001,YEARS_AHEAD+1)) #Empty data set to simulate in the future
XREG <- ts(XREG,start=ST_YEAR,frequency=1)
SIMULATE_MORTALITY_RATE_TRENDS <- function(){
SIMULATED_MORTALITY_DATA_SET <- MAKE_EMPTY(ST_YEAR,ST_YEAR+YEARS_AHEAD,MOD_LIN_MEN,MOD_LIN_WOMEN,MOD_MEN_ALL,MOD_WOMEN_ALL,XREG)
MORTALITY_SIMULATION <- AGE_DIST(SINGLE_AGE_MODS,SIMULATED_MORTALITY_DATA_SET ,MAX_MALE,MAX_FEMALE,MIN_MALE,MIN_FEMALE,MAX_GAP,MIN_GAP,BASELINE_AGE_ADJUST_MEN,BASELINE_AGE_ADJUST_WOMEN)
return(MORTALITY_SIMULATION )
}
#####################START YEAR BY SIMULATIONS
#CURRENT_YEARS_AHEAD=1;CURRENT_SIM_NUM <- 1;MORTALITY_SIMULATION <- SIMULATE_MORTALITY_RATE_TRENDS()
SINGLE_YEAR_SIM <- function(DEMO,BIRTH_DATA,CURRENT_YEARS_AHEAD,MORTALITY_SIMULATION,NET_MIGRATION){
ORIG_DEMO <- DEMO
DEMO <- DEMOGRAPHICS_AFTER_MIGRATION(DEMO, NET_MIGRATION,MIGRATION_ODDS )
TOTAL_MIGRATION <- sum(DEMO-ORIG_DEMO)
BIRTH_DATA$Year <- BIRTH_DATA$Year+1
BIRTH_DATA$Lag_Two_Births <- BIRTH_DATA$Lag_Births
BIRTH_DATA$Lag_Births <- BIRTH_DATA$Births
BIRTH_DATA$Births <- NA
##We grab one year earlier than the window because they are one year older this year. Because the ages are from 0-85, row 18 is year 17, but one year is added making it 18 years in the current year. The birth windows are 18-28 for women and 18-30 for men.
BIRTH_DATA$Min_Birth_Group <- min(sum(DEMO[18:30,1]),sum(DEMO[18:28,2]))
NEW_BORNS <- BIRTH_SIM(BIRTH_MOD,BIRTH_DATA)
TOTAL_BIRTHS <- sum(NEW_BORNS)
BIRTH_DATA[,"Births"] <- TOTAL_BIRTHS
DEMO <- INCREMENT_AGES(DEMO,NEW_BORNS)
MORTALITY_SIMULATION
MALE_DEATHS <- sapply(1:86,function(x){rbinom(1,DEMO[x,1],MORTALITY_SIMULATION[[1]][x,CURRENT_YEARS_AHEAD])})
FEMALE_DEATHS <- sapply(1:86,function(x){rbinom(1,DEMO[x,2],MORTALITY_SIMULATION[[2]][x,CURRENT_YEARS_AHEAD])})
MALE_DEATHS <- ifelse(MALE_DEATHS>=DEMO[,1],DEMO[,1],MALE_DEATHS)
FEMALE_DEATHS <- ifelse(FEMALE_DEATHS>=DEMO[,1],DEMO[,1],FEMALE_DEATHS)
TOTAL_DEATHS <- sum(MALE_DEATHS+FEMALE_DEATHS)
DEMO[,"Num_Male"] <- DEMO[,"Num_Male"] -MALE_DEATHS
DEMO[,"Num_Female"] <- DEMO[,"Num_Female"] -FEMALE_DEATHS
#List of values needed for the next run or for reporting a result
TOTAL_POP <- sum(DEMO)
return(list(DEMO,BIRTH_DATA,c(TOTAL_POP,TOTAL_BIRTHS,TOTAL_DEATHS,TOTAL_MIGRATION)))
}
MIGRATION_ARIMA_MODEL <- MIGRATION_ARIMA
SINGLE_SIM <- function(DEMO,BIRTH_DATA,ST_YEAR,YEARS_AHEAD,MIGRATION_ARIMA_MODEL){
MIGRATION_SIM_VALUES <- round(as.vector(simulate(nsim=YEARS_AHEAD,MIGRATION_ARIMA_MODEL) ))
FINAL_REPORT_VALUES <- matrix(NA,ncol=6,nrow=YEARS_AHEAD)
colnames(FINAL_REPORT_VALUES ) <- c("Sim_UUID","Year","Population","Births","Deaths","Net_Migration")
FINAL_REPORT_VALUES[,1] <- UUIDgenerate()
for(i in 1:YEARS_AHEAD){
C_YEAR <- ST_YEAR+i-1
C_RES <-SINGLE_YEAR_SIM(DEMO,BIRTH_DATA,i,SIMULATE_MORTALITY_RATE_TRENDS(),MIGRATION_SIM_VALUES[i])
DEMO <- C_RES[[1]]
BIRTH_DATA <- C_RES[[2]]
FINAL_REPORT_VALUES[i,-1] <- c(C_YEAR,C_RES[[3]])
}
return(FINAL_REPORT_VALUES)
}
if(!exists("RES_DIR")){RES_DIR<- "./Results/"}
if(!exists("RES_SIM_DIR")){RES_SIM_DIR <- paste0(RES_DIR,"Simulations/")}
dir.create(RES_SIM_DIR, recursive = TRUE, showWarnings = FALSE)
NCORES <- detectCores()-1
BATCH_SIZE <- NCORES*10
TOTAL_SIMULATIONS <- 10^6
N_RUNS <-ceiling(TOTAL_SIMULATIONS/BATCH_SIZE )
SIM_RES_FILE <- paste0(RES_SIM_DIR,"Kemmerer_2016_Simulation.csv")
NEW_RES_FILE <- !file.exists(SIM_RES_FILE)
for(i in 1:N_RUNS){
# MIGRATION_MATRIX <- simulate(nsim=BATCH_SIZE,MIGRATION_ARIMA,n=YEARS_AHEAD)
MIGRATION_MATRIX <- do.call(cbind, mclapply(1:BATCH_SIZE,function(x)(as.vector(simulate(nsim=YEARS_AHEAD,MIGRATION_ARIMA) )),mc.cores = detectCores()-1))
# rownames(MIGRATION_MATRIX) <- ST_YEAR:(ST_YEAR+YEARS_AHEAD-1)
# colnames(MIGRATION_MATRIX) <- 1:BATCH_SIZE
try(RES <- do.call(rbind,mclapply(1:BATCH_SIZE,function(x){SINGLE_SIM(DEMO,BIRTH_DATA,ST_YEAR,YEARS_AHEAD,MIGRATION_ARIMA)},mc.cores=NCORES)))
if(exists("RES")){
RES <- as.data.frame(RES)
RES[,-1] <- as.numeric(as.matrix(RES[,-1]))
if(NEW_RES_FILE & i==1){write_csv(RES,SIM_RES_FILE)}else {write_csv(RES,SIM_RES_FILE,col_names=FALSE,append=TRUE)}
rm(RES)
}
}

65
2A_Result_Analysis_2016.r Normal file
View File

@ -0,0 +1,65 @@
library(tidyverse)
###Process the simulations and save the main percentile results by year
RES <- read_csv("Results/Simulations/Kemmerer_2016_Simulation.csv")
HIST <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds") %>% filter(County=='Lincoln') %>% mutate(Percentile="Actual Population") %>% filter(Year>=1940)
YEARS <- min(RES$Year):max(RES$Year)
######Population
START_POP <- HIST %>% filter(Year==2016) %>% pull(Population)
GRAPH_DATA <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Population),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS)
#Add 2016 as a starting value
GRAPH_DATA <- rbind(GRAPH_DATA[1,],GRAPH_DATA)
GRAPH_DATA[1,] <-t(c(rep(START_POP,ncol(GRAPH_DATA)-1),min(GRAPH_DATA$Year)-1))
FAN_DATA <- GRAPH_DATA
GRAPH_DATA <- GRAPH_DATA %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Population")
GRAPH_DATA$Percentile <- factor(GRAPH_DATA$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','50%','60%','75%','90%','95%','97.5%')))
START_POP <- HIST %>% filter(Year==2016) %>% pull(Population)
GRAPH_DATA <- rbind(GRAPH_DATA,GRAPH_DATA %>% filter(Year==2017) %>% mutate(Population=START_POP,Year=2016))
MEDIAN_PRED <- GRAPH_DATA %>% filter(Percentile=='50%')
######Migration
GRAPH_DATA_MIGRATION <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Net_Migration),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS)
FAN_DATA_MIGRATION <- GRAPH_DATA_MIGRATION
GRAPH_DATA_MIGRATION <- GRAPH_DATA_MIGRATION %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Migration")
GRAPH_DATA_MIGRATION$Percentile <- factor(GRAPH_DATA_MIGRATION$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%')))
MEDIAN_PRED_MIGRATION <- GRAPH_DATA_MIGRATION %>% filter(Percentile=='50%')
######Mortalities
GRAPH_DATA_MORTALITY <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Deaths),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS)
FAN_DATA_MORTALITY <- GRAPH_DATA_MORTALITY
GRAPH_DATA_MORTALITY <- GRAPH_DATA_MORTALITY %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Deaths")
GRAPH_DATA_MORTALITY$Percentile <- factor(GRAPH_DATA_MORTALITY$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%')))
MEDIAN_PRED_MORTALITY<- GRAPH_DATA_MORTALITY %>% filter(Percentile=='50%')
######Births
START_BIRTHS <- HIST %>% filter(Year==2016) %>% pull(Births)
GRAPH_DATA_BIRTHS <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Births),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS)
GRAPH_DATA_BIRTHS <- rbind(GRAPH_DATA_BIRTHS[1,],GRAPH_DATA_BIRTHS)
GRAPH_DATA_BIRTHS[1,] <-t(c(rep(START_BIRTHS,ncol(GRAPH_DATA_BIRTHS)-1),min(GRAPH_DATA_BIRTHS$Year)-1))
FAN_DATA_BIRTHS <- GRAPH_DATA_BIRTHS
GRAPH_DATA_BIRTHS<- GRAPH_DATA_BIRTHS %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Births")
GRAPH_DATA_BIRTHS$Percentile <- factor(GRAPH_DATA_BIRTHS$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%')))
MEDIAN_PRED_BIRTHS<- GRAPH_DATA_BIRTHS %>% filter(Percentile=='50%')
#write_csv(GRAPH_DATA,PERCENTILE_DATA)
#Add historic
MEDIAN_PRED <- GRAPH_DATA %>% filter(Percentile=='50%')
GRAPH_DATA <- GRAPH_DATA %>% filter(Percentile!='50%')
ALPHA=0.2
COLOR <- 'black'
#GRAPH <-
nrow(RES)/10
ggplot(data=GRAPH_DATA)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Population,group=Percentile,color=Percentile))+geom_line(data=HIST,aes(x=Year,y=Population),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED,aes(x=Year,y=Population),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(1940, 2030, by = 5)))+ scale_y_continuous(breaks = seq(0, 35000, by = 250))+theme_bw()+ggtitle("Kemmerer & Diamondville, Population Forecast")+ expand_limits( y = 0)
ggplot(data=GRAPH_DATA_MIGRATION)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Migration,group=Percentile,color=Percentile))+geom_line(data=HIST %>% filter(Year>=2009),aes(x=Year,y=Migration),color='black',linewidth=0.75)
+geom_line(data=MEDIAN_PRED_MIGRATION,aes(x=Year,y=Migration),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(1940, 2030, by = 5)))+ scale_y_continuous(breaks = seq(0, 35000, by = 250))+theme_bw()+ggtitle("Kemmerer & Diamondville, Migration Forecast")+ expand_limits( y = 0)
ggplot(data=GRAPH_DATA_MORTALITY)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Deaths,group=Percentile,color=Percentile))+geom_line(data=HIST %>% filter(Year>=2009),aes(x=Year,y=Deaths),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED_MORTALITY,aes(x=Year,y=Deaths),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(2000, 2030, by = 5)))+ scale_y_continuous(breaks = seq(0, 50, by = 5))+theme_bw()+ggtitle("Kemmerer, Wyoming Death Forecast")+ expand_limits( y = 0)
ggplot(data=GRAPH_DATA_BIRTHS)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Births,group=Percentile,color=Percentile))+geom_line(data=HIST %>% filter(Year>=2009),aes(x=Year,y=Births),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED_BIRTHS,aes(x=Year,y=Births),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(2000, 2030, by = 5)))+ scale_y_continuous(breaks = seq(0, 200, by = 5))+theme_bw()+ggtitle("Kemmerer, Wyoming Birth Forecast")+ expand_limits( y = 0)

63
2B_Result_Analysis.r Normal file
View File

@ -0,0 +1,63 @@
library(tidyverse)
###Process the simulations and save the main percentile results by year
RES <- read_csv("Results/Simulations/Kemmerer_2023_Simulation.csv")
RES[,"Year"] <- RES[,"Year"]+1
HIST <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds") %>% filter(County=='Lincoln') %>% mutate(Percentile="Actual Population") %>% filter(Year>=1940)
YEARS <- min(RES$Year,na.rm=TRUE):max(RES$Year,na.rm=TRUE)
######Population
START_POP <- HIST %>% filter(Year==2023) %>% pull(Population)
GRAPH_DATA <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Population),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS)
GRAPH_DATA <- rbind(GRAPH_DATA[1,],GRAPH_DATA)
GRAPH_DATA[1,] <-t(c(rep(START_POP,ncol(GRAPH_DATA)-1),min(GRAPH_DATA$Year)-1))
FAN_DATA <- GRAPH_DATA
GRAPH_DATA <- GRAPH_DATA %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Population")
GRAPH_DATA$Percentile <- factor(GRAPH_DATA$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','50%','60%','75%','90%','95%','97.5%')))
START_POP <- HIST %>% filter(Year==2025) %>% pull(Population)
MEDIAN_PRED <- GRAPH_DATA %>% filter(Percentile=='50%')
######Migration
GRAPH_DATA_MIGRATION <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Net_Migration),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS)
FAN_DATA_MIGRATION <- GRAPH_DATA_MIGRATION
GRAPH_DATA_MIGRATION <- GRAPH_DATA_MIGRATION %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Migration")
GRAPH_DATA_MIGRATION$Percentile <- factor(GRAPH_DATA_MIGRATION$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%')))
MEDIAN_PRED_MIGRATION <- GRAPH_DATA_MIGRATION %>% filter(Percentile=='50%')
######Mortalities
GRAPH_DATA_MORTALITY <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Deaths),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS)
FAN_DATA_MORTALITY <- GRAPH_DATA_MORTALITY
GRAPH_DATA_MORTALITY <- GRAPH_DATA_MORTALITY %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Deaths")
GRAPH_DATA_MORTALITY$Percentile <- factor(GRAPH_DATA_MORTALITY$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%')))
MEDIAN_PRED_MORTALITY<- GRAPH_DATA_MORTALITY %>% filter(Percentile=='50%')
######Births
START_BIRTHS <- HIST %>% filter(Year==2024) %>% pull(Births)
GRAPH_DATA_BIRTHS <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Births),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS)
GRAPH_DATA_BIRTHS <- rbind(GRAPH_DATA_BIRTHS[1,],GRAPH_DATA_BIRTHS)
GRAPH_DATA_BIRTHS[1,] <-t(c(rep(START_BIRTHS,ncol(GRAPH_DATA_BIRTHS)-1),min(GRAPH_DATA_BIRTHS$Year)-1))
GRAPH_DATA_BIRTHS$Year <- GRAPH_DATA_BIRTHS$Year+1
FAN_DATA_BIRTHS <- GRAPH_DATA_BIRTHS
GRAPH_DATA_BIRTHS<- GRAPH_DATA_BIRTHS %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Births")
GRAPH_DATA_BIRTHS$Percentile <- factor(GRAPH_DATA_BIRTHS$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%')))
MEDIAN_PRED_BIRTHS<- GRAPH_DATA_BIRTHS %>% filter(Percentile=='50%')
#write_csv(GRAPH_DATA,PERCENTILE_DATA)
#Add historic
MEDIAN_PRED <- GRAPH_DATA %>% filter(Percentile=='50%')
GRAPH_DATA <- GRAPH_DATA %>% filter(Percentile!='50%')
ALPHA=0.2
COLOR <- 'black'
#GRAPH <-
nrow(RES)/10
ggplot(data=GRAPH_DATA)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Population,group=Percentile,color=Percentile))+geom_line(data=HIST,aes(x=Year,y=Population),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED,aes(x=Year,y=Population),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(1940, 2065, by = 5)))+ scale_y_continuous(breaks = seq(0, 35000, by = 250))+theme_bw()+ggtitle("Kemmerer & Diamondville, Population Forecast")+ expand_limits( y = 0)
ggplot(data=GRAPH_DATA_MIGRATION)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Migration,group=Percentile,color=Percentile))+geom_line(data=HIST %>% filter(Year>=2009),aes(x=Year,y=Migration),color='black',linewidth=0.75) +geom_line(data=MEDIAN_PRED_MIGRATION,aes(x=Year,y=Migration),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(1940, 2065, by = 5)))+ scale_y_continuous(breaks = seq(0, 35000, by = 250))+theme_bw()+ggtitle("Kemmerer & Diamondville, Migration Forecast")+ expand_limits( y = 0)
ggplot(data=GRAPH_DATA_MORTALITY)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Deaths,group=Percentile,color=Percentile))+geom_line(data=HIST %>% filter(Year>=2009),aes(x=Year,y=Deaths),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED_MORTALITY,aes(x=Year,y=Deaths),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(2000, 2065, by = 5)))+ scale_y_continuous(breaks = seq(0, 50, by = 5))+theme_bw()+ggtitle("Kemmerer, Wyoming Death Forecast")+ expand_limits( y = 0)
ggplot(data=GRAPH_DATA_BIRTHS)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Births,group=Percentile,color=Percentile))+geom_line(data=HIST %>% filter(Year>=2009),aes(x=Year,y=Births),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED_BIRTHS,aes(x=Year,y=Births),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(2000, 2065, by = 5)))+ scale_y_continuous(breaks = seq(0, 200, by = 5))+theme_bw()+ggtitle("Kemmerer, Wyoming Birth Forecast")+ expand_limits( y = 0)

View File

@ -4,10 +4,14 @@ library(tidyverse)
HIST <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds") %>% filter(County=='Lincoln') %>% mutate(Percentile="Actual Population") %>% filter(Year>=1940)
YEARS <- min(RES$Year):max(RES$Year)
######Population
START_POP <- HIST %>% filter(Year==2016) %>% pull(Population)
GRAPH_DATA <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Population),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS)
#Add 2016 as a starting value
GRAPH_DATA <- rbind(GRAPH_DATA[1,],GRAPH_DATA)
GRAPH_DATA[1,] <-t(c(rep(START_POP,ncol(GRAPH_DATA)-1),min(GRAPH_DATA$Year)-1))
FAN_DATA <- GRAPH_DATA
GRAPH_DATA <- GRAPH_DATA %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Population")
GRAPH_DATA$Percentile <- factor(GRAPH_DATA$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%')))
GRAPH_DATA$Percentile <- factor(GRAPH_DATA$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','50%','60%','75%','90%','95%','97.5%')))
START_POP <- HIST %>% filter(Year==2016) %>% pull(Population)
GRAPH_DATA <- rbind(GRAPH_DATA,GRAPH_DATA %>% filter(Year==2017) %>% mutate(Population=START_POP,Year=2016))
MEDIAN_PRED <- GRAPH_DATA %>% filter(Percentile=='50%')
@ -17,7 +21,23 @@ library(tidyverse)
GRAPH_DATA_MIGRATION <- GRAPH_DATA_MIGRATION %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Migration")
GRAPH_DATA_MIGRATION$Percentile <- factor(GRAPH_DATA_MIGRATION$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%')))
MEDIAN_PRED_MIGRATION <- GRAPH_DATA_MIGRATION %>% filter(Percentile=='50%')
######Mortalities
GRAPH_DATA_MORTALITY <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Deaths),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS)
FAN_DATA_MORTALITY <- GRAPH_DATA_MORTALITY
GRAPH_DATA_MORTALITY <- GRAPH_DATA_MORTALITY %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Deaths")
GRAPH_DATA_MORTALITY$Percentile <- factor(GRAPH_DATA_MORTALITY$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%')))
MEDIAN_PRED_MORTALITY<- GRAPH_DATA_MORTALITY %>% filter(Percentile=='50%')
######Births
START_BIRTHS <- HIST %>% filter(Year==2016) %>% pull(Births)
GRAPH_DATA_BIRTHS <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Births),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS)
GRAPH_DATA_BIRTHS <- rbind(GRAPH_DATA_BIRTHS[1,],GRAPH_DATA_BIRTHS)
GRAPH_DATA_BIRTHS[1,] <-t(c(rep(START_BIRTHS,ncol(GRAPH_DATA_BIRTHS)-1),min(GRAPH_DATA_BIRTHS$Year)-1))
FAN_DATA_BIRTHS <- GRAPH_DATA_BIRTHS
GRAPH_DATA_BIRTHS<- GRAPH_DATA_BIRTHS %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Births")
GRAPH_DATA_BIRTHS$Percentile <- factor(GRAPH_DATA_BIRTHS$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%')))
MEDIAN_PRED_BIRTHS<- GRAPH_DATA_BIRTHS %>% filter(Percentile=='50%')
@ -29,7 +49,17 @@ library(tidyverse)
ALPHA=0.2
COLOR <- 'black'
#GRAPH <-
nrow(RES)
ggplot(data=GRAPH_DATA)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Population,group=Percentile,color=Percentile))+geom_line(data=HIST,aes(x=Year,y=Population),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED,aes(x=Year,y=Population),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(1940, 2070, by = 10)))+ scale_y_continuous(breaks = seq(0, 35000, by = 250))+theme_bw()+ggtitle("Kemmerer & Diamondville, Population Forecast")+ expand_limits( y = 0)
HIST %>% filter(!is.na(Migration))
ggplot(data=GRAPH_DATA_MIGRATION)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Migration,group=Percentile,color=Percentile))+geom_line(data=HIST %>% filter(Year>=2008),aes(x=Year,y=Migration),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED_MIGRATION,aes(x=Year,y=Migration),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(2010, 2070, by = 10)))+ scale_y_continuous(breaks = seq(-500, 100, by = 100))+theme_bw()+ggtitle("Kemmerer, Wyoming Migration Forecast")
nrow(RES)/10
ggplot(data=GRAPH_DATA)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Population,group=Percentile,color=Percentile))+geom_line(data=HIST,aes(x=Year,y=Population),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED,aes(x=Year,y=Population),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(1940, 2030, by = 5)))+ scale_y_continuous(breaks = seq(0, 35000, by = 250))+theme_bw()+ggtitle("Kemmerer & Diamondville, Population Forecast")+ expand_limits( y = 0)
ggplot(data=GRAPH_DATA_MIGRATION)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Migration,group=Percentile,color=Percentile))+geom_line(data=HIST %>% filter(Year>=2009),aes(x=Year,y=Migration),color='black',linewidth=0.75)
+geom_line(data=MEDIAN_PRED_MIGRATION,aes(x=Year,y=Migration),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(1940, 2030, by = 5)))+ scale_y_continuous(breaks = seq(0, 35000, by = 250))+theme_bw()+ggtitle("Kemmerer & Diamondville, Migration Forecast")+ expand_limits( y = 0)
ggplot(data=GRAPH_DATA_MORTALITY)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Deaths,group=Percentile,color=Percentile))+geom_line(data=HIST %>% filter(Year>=2009),aes(x=Year,y=Deaths),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED_MORTALITY,aes(x=Year,y=Deaths),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(2000, 2030, by = 5)))+ scale_y_continuous(breaks = seq(0, 50, by = 5))+theme_bw()+ggtitle("Kemmerer, Wyoming Death Forecast")+ expand_limits( y = 0)
ggplot(data=GRAPH_DATA_BIRTHS)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Births,group=Percentile,color=Percentile))+geom_line(data=HIST %>% filter(Year>=2009),aes(x=Year,y=Births),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED_BIRTHS,aes(x=Year,y=Births),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(2000, 2030, by = 5)))+ scale_y_continuous(breaks = seq(0, 200, by = 5))+theme_bw()+ggtitle("Kemmerer, Wyoming Birth Forecast")+ expand_limits( y = 0)

View File

@ -10,4 +10,4 @@ Data is manually gathered from CDC WONDER data queries.
4) The world pandemic uncertainty index as collected from FRED which is used to account for pandemics in the regression, making the age time series stationary.
These are used to project mortality trends over time. In the case of the age adjusted data, this has local trends that can be compared to the national average. The single age-sex data is only at a national level but can be imparted to local levels as a general trend in the distribution of deaths
--- Run Date: 2025-12-01 16:10:03 ---
--- Run Date: 2025-12-03 14:11:57 ---

View File

@ -49,32 +49,32 @@ OTHER_BIRTH_DATA <- OTHER_BIRTH_DATA %>% select(-Births) %>% left_join(ADJUSTED_
REG_DATA <- rbind(COUNTY_BIRTH_DATA,rbind(KEM_BIRTH_DATA,OTHER_BIRTH_DATA) %>% mutate(Deaths=NA,Migration=NA) %>% select(colnames(COUNTY_BIRTH_DATA)))
REG_DATA <- REG_DATA %>% group_by(Region) %>% arrange(Year) %>% mutate(Lag_Births=lag(Births),Lag_Two_Births=lag(Births,2)) %>% ungroup %>% arrange(County,Region,Year)
REG_DATA <- REG_DATA %>% group_by(Region) %>% arrange(Year) %>% mutate(Lag_Births=lag(Births),Lag_Two_Births=lag(Births,2)) %>% ungroup %>% arrange(County,Region,Year) %>% mutate(KEM=ifelse(Region=='Kemmerer & Diamondville',1,0),Region=ifelse(Region=='Kemmerer & Diamondville','Lincoln',Region))
REG_REDUCED_DATA <- REG_DATA %>% filter(!is.na(Births),!is.na(Lag_Two_Births),!is.na(Min_Birth_Group),!is.na(Lag_Births),!is.na(Region)) %>% select(Year,Region,KEM,Min_Birth_Group,Births,Lag_Births,Lag_Two_Births)
REG_REDUCED_DATA <- REG_DATA %>% filter(!is.na(Births),!is.na(Lag_Two_Births),!is.na(Min_Birth_Group),!is.na(Lag_Births),!is.na(Region)) %>% select(Year,Region,Min_Birth_Group,Births,Lag_Births,Lag_Two_Births)
###Predict the number of Births
MOD_BIRTHS <- feols(log(Births)~log(Lag_Births)+log(Lag_Two_Births)+log(Min_Birth_Group)+Year*Region,cluster=~Year+Region, data=REG_REDUCED_DATA ) #Higher AIC but worse acf
MOD_BIRTHS <- feols(log(Births)~log(Lag_Births)+log(Lag_Two_Births)+log(Min_Birth_Group)+Year*KEM+Year*Region,cluster=~Year+Region, data=REG_REDUCED_DATA,data.save = TRUE) #Higher AIC but worse acf
####Alternate models with different years of data to test to model against a counterfactual
REG_DATA_2016 <- REG_REDUCED_DATA %>% filter(Year<=2016)
MOD_BIRTHS_2016 <- feols(log(Births)~log(Lag_Births)+log(Lag_Two_Births)+log(Min_Birth_Group)+Year*Region,cluster=~Year+Region, data=REG_DATA_2016 )
MOD_BIRTHS_2016 <- feols(log(Births)~log(Lag_Births)+log(Lag_Two_Births)+log(Min_Birth_Group)+KEM+Year*Region,cluster=~Year+Region, data=REG_DATA_2016,data.save = TRUE)
REG_DATA_1985 <- REG_REDUCED_DATA %>% filter(Year<=1985)
MOD_BIRTHS_1985 <- feols(log(Births)~log(Lag_Births)+log(Lag_Two_Births)+log(Min_Birth_Group)+Year*Region,cluster=~Year+Region, data=REG_DATA_1985 )
MOD_BIRTHS_1985 <- feols(log(Births)~log(Lag_Births)+log(Lag_Two_Births)+log(Min_Birth_Group)+Year*Region,cluster=~Year+Region, data=REG_DATA_1985 ,data.save = TRUE)
###Models to more easily show the results
#Easier to read for a LaTex Table
DICT <- c("log(Lag_Births)"="Births Last Years (log)","log(Lag_Two_Births)"="Births Two Years Ago (log)","LN"='Kemmerer Area','log(Births)'='Births (log)','log(Min_Birth_Group)'='Child Rearing Aged Adults (log)','Region'="County" )
REG_VIEW_DATA <- REG_REDUCED_DATA %>% mutate(LN=ifelse(Region=='Kemmerer & Diamondville',1,0))
MOD_VIEW_BIRTHS <- feols(log(Births)~log(Lag_Births)+log(Lag_Two_Births)+log(Min_Birth_Group)+LN*Year+Year*Region|Region,cluster=~Year+Region, data=REG_VIEW_DATA ) #Higher AIC but worse acf
DICT <- c("log(Lag_Births)"="Births Last Years (log)","log(Lag_Two_Births)"="Births Two Years Ago (log)","LN"='Lincoln',"KEM"='Kemmerer Area','log(Births)'='Births (log)','log(Min_Birth_Group)'='Child Rearing Aged Adults (log)','Region'="County" )
REG_VIEW_DATA <- REG_REDUCED_DATA %>% mutate(KEM=ifelse(Region=='Kemmerer & Diamondville',1,0),LN=ifelse(Region=='Kemmerer & Diamondville' |Region=='Lincoln' ,1,0),Region=ifelse(Region=='Kemmerer & Diamondville' ,'Lincoln',Region))
MOD_VIEW_BIRTHS <- feols(log(Births)~log(Lag_Births)+log(Lag_Two_Births)+log(Min_Birth_Group)+KEM*Year+Year*Region|Region,cluster=~Year+Region, data=REG_VIEW_DATA ) #Higher AIC but worse acf
REG_VIEW_DATA_2016 <- REG_VIEW_DATA %>% filter(Year<=2016)
MOD_VIEW_BIRTHS_2016 <- feols(log(Births)~log(Lag_Births)+log(Lag_Two_Births)+log(Min_Birth_Group)+LN*Year+Year*Region|Region,cluster=~Year+Region, data=REG_VIEW_DATA_2016 )
REG_DATA_2016 <- REG_REDUCED_DATA %>% filter(Year<=2016)
MOD_VIEW_BIRTHS_2016 <- feols(log(Births)~log(Lag_Births)+log(Lag_Two_Births)+log(Min_Birth_Group)+KEM+LN*Year+Year*Region|Region,cluster=~Year+Region, data=REG_VIEW_DATA_2016 )
MOD_VIEW_BIRTHS_2016
REG_VIEW_DATA_1985 <- REG_VIEW_DATA%>% filter(Year<=1985)
MOD_VIEW_BIRTHS_1985 <- feols(log(Births)~log(Lag_Births)+log(Lag_Two_Births)+log(Min_Birth_Group)+LN*Year+Year*Region|Region,cluster=~Year+Region, data=REG_VIEW_DATA_1985 )
1
###Prelim information for fixest table
NOTES <- c("Natural log used for all variables besides years, and counties","Kemmerer Area: Includes both Kemmerer and Diamondville","Child Rearing Adults: Minimum of all women aged 18-28 or men aged 18-30.","Kemmerer data from the American Community Survey Data starts in 2009")
LAB <- c("Kem.","Kem. Pre 2016","Lincoln Pre 1985")
@ -98,8 +98,6 @@ try(etable(MOD_VIEW_BIRTHS,MOD_VIEW_BIRTHS_2016,MOD_VIEW_BIRTHS_1985,headers=HEA
TEMP <- REG_REDUCED_DATA
TEMP$RESID <- resid(MOD_BIRTHS)
#Kemmerer ACF/PACF
C_TEMP <- TEMP %>% filter(Region=='Kemmerer & Diamondville') %>% arrange(Year)
png(paste0(SAVE_FIG_LOC,"/Kemmerer_ACF.png"), width = 12, height = 8, units = "in", res = 600)
@ -126,8 +124,7 @@ C_TEMP <- TEMP %>% filter(Region=='Lincoln_Other') %>% arrange(Year)
dev.off()
####Create data stubs to start a simulation. That is predict the births from this most recent year. Include records from various years of potential interest
ST_REG_DATA <- REG_REDUCED_DATA %>% filter(Region=='Lincoln') %>% filter(Year==max(Year)) %>% rbind(REG_REDUCED_DATA %>% filter(Region=='Kemmerer & Diamondville') %>% filter(Year==max(Year))) %>% rbind(REG_REDUCED_DATA %>% filter(Region=='Lincoln_Other') %>% filter(Year==max(Year))) %>% rbind(REG_REDUCED_DATA %>% filter(Region=='Lincoln') %>% filter(Year==2016)) %>% rbind(REG_REDUCED_DATA %>% filter(Region=='Kemmerer & Diamondville') %>% filter(Year==2016)) %>% rbind(REG_REDUCED_DATA %>% filter(Region=='Lincoln_Other') %>% filter(Year==2016)) %>% rbind(REG_REDUCED_DATA %>% filter(Region=='Lincoln') %>% filter(Year==1985))
ST_REG_DATA <- REG_REDUCED_DATA %>% filter(Region=='Lincoln') %>% filter(Year==max(Year)) %>% rbind(REG_REDUCED_DATA %>% filter(Region=='Kemmerer & Diamondville') %>% filter(Year==max(Year))) %>% rbind(REG_REDUCED_DATA %>% filter(Region=='Lincoln_Other') %>% filter(Year==max(Year))) %>% rbind(REG_REDUCED_DATA %>% filter(Region=='Lincoln') %>% filter(Year==2016)) %>% rbind(REG_REDUCED_DATA %>% filter(Region=='Kemmerer & Diamondville') %>% filter(Year==2016)) %>% rbind(REG_REDUCED_DATA %>% filter(Region=='Lincoln_Other') %>% filter(Year==2016)) %>% rbind(REG_REDUCED_DATA %>% filter(Region=='Lincoln') %>% filter(Year==1985)) %>% rbind(REG_REDUCED_DATA %>% filter(KEM==1) %>% filter(Year==max(Year)))
if(!exists("SAVE_REG_LOC")){SAVE_REG_LOC <- "Data/Intermediate_Inputs/Birth_Regressions"}
dir.create(SAVE_REG_LOC , recursive = TRUE, showWarnings = FALSE)
@ -156,7 +153,7 @@ if(!exists("POPULATION_SAVE_RDS")){POPULATION_SAVE_RDS <- "./Data/Cleaned_Data/P
dir.create(POPULATION_SAVE_RDS, recursive = TRUE, showWarnings = FALSE)
if(!exists("POPULATION_SAVE_CSV")){POPULATION_SAVE_CSV <- "./Data/Cleaned_Data/Population_Data/CSV/"}
dir.create(POPULATION_SAVE_CSV, recursive = TRUE, showWarnings = FALSE)
dir.create(POPULATION_SAVE_CSV, recursive = TRUE, showWarnings = FALSE)
saveRDS(LIN_DATA_NEW,paste0(POPULATION_SAVE_RDS,"Full_Lincoln_County_Population_Data.Rds"))
write_csv(LIN_DATA_NEW,paste0(POPULATION_SAVE_CSV,"Full_Lincoln_County_Population_Data.csv"))
saveRDS(KEM_DATA_NEW,paste0(POPULATION_SAVE_RDS,"Kemmerer_Diamondville_Population_Data.Rds"))

View File

@ -62,9 +62,12 @@ OTHER_NEW <- LN/LN_ADJ_OTHER
OTHER_2016_NEW <- LN_2016/LN_ADJ_OTHER
OTHER_1985_NEW <- LN_1985/LN_ADJ_OTHER
#Create new models for migration forecasts
MOD_KEM_ADJ <- auto.arima(KEM_NEW ,stationary=TRUE)
MOD_KEM_ADJ <- auto.arima(KEM_NEW )
MOD_KEM_ADJ_SHIFT <- auto.arima(KEM_NEW +as.numeric(coef(MOD_KEM)["intercept"]))
median(simulate(MOD_KEM_ADJ,100000 ))
MOD_KEM_ADJ_2016 <- auto.arima(KEM_2016_NEW ,stationary=TRUE)
median(abs(simulate(MOD_KEM_ADJ,100000 )))
#MOD_KEM_ADJ_2016 <- auto.arima(KEM_2016_NEW ,stationary=TRUE)
MOD_KEM_ADJ_2016 <- auto.arima(KEM_2016_NEW+as.numeric(coef(MOD_KEM_2016)["intercept"]))
MOD_KEM_ADJ_1985 <- auto.arima(KEM_1985_NEW ,stationary=TRUE)
MOD_OTHER_ADJ <- auto.arima(OTHER_NEW,stationary=TRUE)
@ -75,6 +78,7 @@ MOD_OTHER_ADJ_1985 <- auto.arima(OTHER_1985_NEW,stationary=TRUE)
dir.create(SAVE_LOC_ARIMA_MODELS, recursive = TRUE, showWarnings = FALSE)
saveRDS(MOD,paste0(SAVE_LOC_ARIMA_MODELS,"Full_Lincoln_County_Net_Migration_ARIMA.Rds"))
saveRDS(MOD_KEM_ADJ,paste0(SAVE_LOC_ARIMA_MODELS,"Kemmerer_Diamondville_Net_Migration_ARIMA.Rds"))
saveRDS(MOD_KEM_ADJ_SHIFT,paste0(SAVE_LOC_ARIMA_MODELS,"Kemmerer_Diamondville_Net_Migration_ARIMA_With_Downward_Shift.Rds"))
saveRDS(MOD_OTHER_ADJ,paste0(SAVE_LOC_ARIMA_MODELS,"Other_Lincoln_Net_Migration_ARIMA.Rds"))
saveRDS(MOD_2016,paste0(SAVE_LOC_ARIMA_MODELS,"Full_Lincoln_County_Net_Migration_ARIMA_2016.Rds"))

View File

@ -1,6 +1,6 @@
#Births,PREV_BIRTH,PREV_TWO_BIRTH,Min_Birth_Group,Year,County
#Uncomment to test the function step by step
#REG_MODEL <- MOD_BIRTHS;REG_DATA <- FIRST_PREDICT_YEAR_POPULATION_DATA;NUM_SIMS=1
#REG_MODEL <- BIRTH_MOD;REG_DATA <- BIRTH_DATA;NUM_SIMS=1
BIRTH_SIM <- function(REG_MODEL,REG_DATA,NUM_SIMS=1){
C_PREDICT <- predict(REG_MODEL,REG_DATA,interval = "prediction",level=0.95)
PRED_MEAN <- C_PREDICT$fit
@ -14,7 +14,5 @@ BIRTH_SIM <- function(REG_MODEL,REG_DATA,NUM_SIMS=1){
#%>% as_tibble
#colnames(RES) <- c("Num_Male","Num_Female")
#"0"
return(RES)
}