diff --git a/1_Run_Full_Simulation.r b/1A_Run_Full_Simulation_2016.r similarity index 94% rename from 1_Run_Full_Simulation.r rename to 1A_Run_Full_Simulation_2016.r index 9a6c09f..160642c 100644 --- a/1_Run_Full_Simulation.r +++ b/1A_Run_Full_Simulation_2016.r @@ -20,12 +20,12 @@ NUM_SIMULATIONS <- 10^4 ST_YEAR <- 2017 ################################Load Data DEMO <- readRDS("Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2016_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds") -sum(readRDS("Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2023_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds")) BIRTH_MOD <- readRDS("Data/Intermediate_Inputs/Birth_Regressions/Birth_Regression_2016.Rds") #Must add region as a factor with multiple levels for predict to work. Seems to check for multiple levels although that is not needed econometrics. -BIRTH_DATA <- readRDS("Data/Intermediate_Inputs/Birth_Regressions/Regression_Data/Birth_Simulation_Key_Starting_Points.Rds") %>% mutate(Region=factor(Region)) %>% filter(Region=='Kemmerer & Diamondville',Year==2016) -MIGRATION_ARIMA <- readRDS("Data/Intermediate_Inputs/Migration_ARIMA_Models/Kemmerer_Diamondville_Net_Migration_ARIMA_2016.Rds") +BIRTH_DATA <- readRDS("Data/Intermediate_Inputs/Birth_Regressions/Regression_Data/Birth_Simulation_Key_Starting_Points.Rds") %>% mutate(Region=factor(Region)) %>% filter(KEM==1,Year==2016) + +MIGRATION_ARIMA <- readRDS("Data/Intermediate_Inputs/Migration_ARIMA_Models/Kemmerer_Diamondville_Adjusted_to_Lincoln_Model_Net_Migration_ARIMA_2016.Rds") MIGRATION_ODDS <- readRDS("Data/Intermediate_Inputs/Migration_Trends/Migration_Age_Probability_Zero_to_85.Rds") ############## #Data for death rate trends @@ -108,7 +108,7 @@ SINGLE_SIM <- function(DEMO,BIRTH_DATA,ST_YEAR,YEARS_AHEAD,MIGRATION_ARIMA_MODEL NCORES <- detectCores()-1 BATCH_SIZE <- NCORES*10 - TOTAL_SIMULATIONS <- 10^5 + TOTAL_SIMULATIONS <- 10^6 N_RUNS <-ceiling(TOTAL_SIMULATIONS/BATCH_SIZE ) SIM_RES_FILE <- paste0(RES_SIM_DIR,"Kemmerer_2016_Simulation.csv") @@ -122,7 +122,7 @@ NEW_RES_FILE <- !file.exists(SIM_RES_FILE) if(exists("RES")){ RES <- as.data.frame(RES) RES[,-1] <- as.numeric(as.matrix(RES[,-1])) - if(NEW_RES_FILE){write_csv(RES,SIM_RES_FILE)}else {write_csv(RES,SIM_RES_FILE,col_names=FALSE,append=TRUE)} + if(NEW_RES_FILE & i==1){write_csv(RES,SIM_RES_FILE)}else {write_csv(RES,SIM_RES_FILE,col_names=FALSE,append=TRUE)} rm(RES) } } diff --git a/1B_Run_Full_Simulation.r b/1B_Run_Full_Simulation.r new file mode 100644 index 0000000..160642c --- /dev/null +++ b/1B_Run_Full_Simulation.r @@ -0,0 +1,129 @@ +#####Packages +library(tidyverse) #Cleaning data +library(fixest) #Estimating a model of birth rates, to provide variance in the birth rate Monte Carlo using a fixed effect model. +library(forecast) #Fore ARIMA migration simulations +library(parallel) +library(uuid) #To add a index to each batch +####If the prelimnary data needs to be reloaded run the supplied bash script to download, process, and generate all needed data sets for the Monte Carlo population Simulation. Otherwise skip this step to save time +RELOAD_DATA <- FALSE +if(RELOAD_DATA){system("bash Prelim_Process.sh")} + +#Load custom functions needed for the simulation +source("Scripts/Load_Custom_Functions/Migration_Simulation_Functions.r") +source("Scripts/Load_Custom_Functions/Birth_Simulation_Functions.r") +source("Scripts/Load_Custom_Functions/Increment_Data_Year.r") +source("Scripts/Load_Custom_Functions/Single_Age_Mortality_Trend_Simulation.r") + +#######Preliminary Model Inputs +YEARS_AHEAD <- 10 +NUM_SIMULATIONS <- 10^4 +ST_YEAR <- 2017 +################################Load Data +DEMO <- readRDS("Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2016_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds") + +BIRTH_MOD <- readRDS("Data/Intermediate_Inputs/Birth_Regressions/Birth_Regression_2016.Rds") +#Must add region as a factor with multiple levels for predict to work. Seems to check for multiple levels although that is not needed econometrics. +BIRTH_DATA <- readRDS("Data/Intermediate_Inputs/Birth_Regressions/Regression_Data/Birth_Simulation_Key_Starting_Points.Rds") %>% mutate(Region=factor(Region)) %>% filter(KEM==1,Year==2016) + +MIGRATION_ARIMA <- readRDS("Data/Intermediate_Inputs/Migration_ARIMA_Models/Kemmerer_Diamondville_Adjusted_to_Lincoln_Model_Net_Migration_ARIMA_2016.Rds") +MIGRATION_ODDS <- readRDS("Data/Intermediate_Inputs/Migration_Trends/Migration_Age_Probability_Zero_to_85.Rds") +############## + #Data for death rate trends + SINGLE_AGE_MODS <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Time_Series_Regression_2016.Rds") + BOUNDS <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Bounds_for_Predictions.Rds") + MAX_MALE <- BOUNDS %>% filter(Sex=='Male') %>% pull(MAX_RATE) + MIN_MALE <- BOUNDS %>% filter(Sex=='Male') %>% pull(MIN_RATE) + MAX_FEMALE <- BOUNDS %>% filter(Sex=='Female') %>% pull(MAX_RATE) + MIN_FEMALE <- BOUNDS %>% filter(Sex=='Female') %>% pull(MIN_RATE) + MIN_GAP <- BOUNDS %>% filter(Sex=='Male') %>% pull(MIN_MALE_FEMALE_GAP) + MAX_GAP <- BOUNDS %>% filter(Sex=='Male') %>% pull(MAX_MALE_FEMALE_GAP) + #Adjusted for 2016 by filtering REMOVE LATER + BASELINE_AGE_ADJUST_MEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_Population_in_2000.Rds") %>% filter(Sex=='Male',Year<=2016) %>% pull(Percent_of_Population) + BASELINE_AGE_ADJUST_WOMEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_Population_in_2000.Rds") %>% filter(Sex=='Female',Year<=2016) %>% pull(Percent_of_Population) + #Adjust to just women popualtion (Not all population percent + BASELINE_AGE_ADJUST_WOMEN <- BASELINE_AGE_ADJUST_WOMEN/sum(BASELINE_AGE_ADJUST_WOMEN ) + BASELINE_AGE_ADJUST_MEN <- BASELINE_AGE_ADJUST_MEN/ sum(BASELINE_AGE_ADJUST_MEN ) + MOD_MEN_ALL <- readRDS("Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/ARIMA_US_Men_Mortality_by_Age_2016.Rds") + MOD_WOMEN_ALL <- readRDS("Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/ARIMA_US_Women_Mortality_by_Age_2016.Rds") + MOD_LIN_MEN <- readRDS("Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/ARIMA_Lincoln_Men_Mortality_by_Age_2016.Rds") + MOD_LIN_WOMEN <- readRDS("Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/ARIMA_Lincoln_Women_Mortality_by_Age_2016.Rds") + XREG <- cbind(rep(0.0001,YEARS_AHEAD+1),rep(0.0001,YEARS_AHEAD+1)) #Empty data set to simulate in the future + XREG <- ts(XREG,start=ST_YEAR,frequency=1) +SIMULATE_MORTALITY_RATE_TRENDS <- function(){ + SIMULATED_MORTALITY_DATA_SET <- MAKE_EMPTY(ST_YEAR,ST_YEAR+YEARS_AHEAD,MOD_LIN_MEN,MOD_LIN_WOMEN,MOD_MEN_ALL,MOD_WOMEN_ALL,XREG) + MORTALITY_SIMULATION <- AGE_DIST(SINGLE_AGE_MODS,SIMULATED_MORTALITY_DATA_SET ,MAX_MALE,MAX_FEMALE,MIN_MALE,MIN_FEMALE,MAX_GAP,MIN_GAP,BASELINE_AGE_ADJUST_MEN,BASELINE_AGE_ADJUST_WOMEN) + return(MORTALITY_SIMULATION ) + } + +#####################START YEAR BY SIMULATIONS +#CURRENT_YEARS_AHEAD=1;CURRENT_SIM_NUM <- 1;MORTALITY_SIMULATION <- SIMULATE_MORTALITY_RATE_TRENDS() +SINGLE_YEAR_SIM <- function(DEMO,BIRTH_DATA,CURRENT_YEARS_AHEAD,MORTALITY_SIMULATION,NET_MIGRATION){ +ORIG_DEMO <- DEMO +DEMO <- DEMOGRAPHICS_AFTER_MIGRATION(DEMO, NET_MIGRATION,MIGRATION_ODDS ) +TOTAL_MIGRATION <- sum(DEMO-ORIG_DEMO) +BIRTH_DATA$Year <- BIRTH_DATA$Year+1 +BIRTH_DATA$Lag_Two_Births <- BIRTH_DATA$Lag_Births +BIRTH_DATA$Lag_Births <- BIRTH_DATA$Births +BIRTH_DATA$Births <- NA +##We grab one year earlier than the window because they are one year older this year. Because the ages are from 0-85, row 18 is year 17, but one year is added making it 18 years in the current year. The birth windows are 18-28 for women and 18-30 for men. +BIRTH_DATA$Min_Birth_Group <- min(sum(DEMO[18:30,1]),sum(DEMO[18:28,2])) +NEW_BORNS <- BIRTH_SIM(BIRTH_MOD,BIRTH_DATA) +TOTAL_BIRTHS <- sum(NEW_BORNS) +BIRTH_DATA[,"Births"] <- TOTAL_BIRTHS +DEMO <- INCREMENT_AGES(DEMO,NEW_BORNS) +MORTALITY_SIMULATION +MALE_DEATHS <- sapply(1:86,function(x){rbinom(1,DEMO[x,1],MORTALITY_SIMULATION[[1]][x,CURRENT_YEARS_AHEAD])}) +FEMALE_DEATHS <- sapply(1:86,function(x){rbinom(1,DEMO[x,2],MORTALITY_SIMULATION[[2]][x,CURRENT_YEARS_AHEAD])}) +MALE_DEATHS <- ifelse(MALE_DEATHS>=DEMO[,1],DEMO[,1],MALE_DEATHS) +FEMALE_DEATHS <- ifelse(FEMALE_DEATHS>=DEMO[,1],DEMO[,1],FEMALE_DEATHS) + +TOTAL_DEATHS <- sum(MALE_DEATHS+FEMALE_DEATHS) +DEMO[,"Num_Male"] <- DEMO[,"Num_Male"] -MALE_DEATHS +DEMO[,"Num_Female"] <- DEMO[,"Num_Female"] -FEMALE_DEATHS +#List of values needed for the next run or for reporting a result +TOTAL_POP <- sum(DEMO) +return(list(DEMO,BIRTH_DATA,c(TOTAL_POP,TOTAL_BIRTHS,TOTAL_DEATHS,TOTAL_MIGRATION))) +} +MIGRATION_ARIMA_MODEL <- MIGRATION_ARIMA +SINGLE_SIM <- function(DEMO,BIRTH_DATA,ST_YEAR,YEARS_AHEAD,MIGRATION_ARIMA_MODEL){ + MIGRATION_SIM_VALUES <- round(as.vector(simulate(nsim=YEARS_AHEAD,MIGRATION_ARIMA_MODEL) )) + + FINAL_REPORT_VALUES <- matrix(NA,ncol=6,nrow=YEARS_AHEAD) + colnames(FINAL_REPORT_VALUES ) <- c("Sim_UUID","Year","Population","Births","Deaths","Net_Migration") + FINAL_REPORT_VALUES[,1] <- UUIDgenerate() + for(i in 1:YEARS_AHEAD){ + C_YEAR <- ST_YEAR+i-1 + C_RES <-SINGLE_YEAR_SIM(DEMO,BIRTH_DATA,i,SIMULATE_MORTALITY_RATE_TRENDS(),MIGRATION_SIM_VALUES[i]) + DEMO <- C_RES[[1]] + BIRTH_DATA <- C_RES[[2]] + FINAL_REPORT_VALUES[i,-1] <- c(C_YEAR,C_RES[[3]]) + } + return(FINAL_REPORT_VALUES) +} + + if(!exists("RES_DIR")){RES_DIR<- "./Results/"} + if(!exists("RES_SIM_DIR")){RES_SIM_DIR <- paste0(RES_DIR,"Simulations/")} + dir.create(RES_SIM_DIR, recursive = TRUE, showWarnings = FALSE) + + + NCORES <- detectCores()-1 + BATCH_SIZE <- NCORES*10 + TOTAL_SIMULATIONS <- 10^6 + N_RUNS <-ceiling(TOTAL_SIMULATIONS/BATCH_SIZE ) + + SIM_RES_FILE <- paste0(RES_SIM_DIR,"Kemmerer_2016_Simulation.csv") +NEW_RES_FILE <- !file.exists(SIM_RES_FILE) + for(i in 1:N_RUNS){ +# MIGRATION_MATRIX <- simulate(nsim=BATCH_SIZE,MIGRATION_ARIMA,n=YEARS_AHEAD) + MIGRATION_MATRIX <- do.call(cbind, mclapply(1:BATCH_SIZE,function(x)(as.vector(simulate(nsim=YEARS_AHEAD,MIGRATION_ARIMA) )),mc.cores = detectCores()-1)) +# rownames(MIGRATION_MATRIX) <- ST_YEAR:(ST_YEAR+YEARS_AHEAD-1) +# colnames(MIGRATION_MATRIX) <- 1:BATCH_SIZE + try(RES <- do.call(rbind,mclapply(1:BATCH_SIZE,function(x){SINGLE_SIM(DEMO,BIRTH_DATA,ST_YEAR,YEARS_AHEAD,MIGRATION_ARIMA)},mc.cores=NCORES))) + if(exists("RES")){ + RES <- as.data.frame(RES) + RES[,-1] <- as.numeric(as.matrix(RES[,-1])) + if(NEW_RES_FILE & i==1){write_csv(RES,SIM_RES_FILE)}else {write_csv(RES,SIM_RES_FILE,col_names=FALSE,append=TRUE)} + rm(RES) + } + } + diff --git a/2A_Result_Analysis_2016.r b/2A_Result_Analysis_2016.r new file mode 100644 index 0000000..fb6c507 --- /dev/null +++ b/2A_Result_Analysis_2016.r @@ -0,0 +1,65 @@ +library(tidyverse) +###Process the simulations and save the main percentile results by year + RES <- read_csv("Results/Simulations/Kemmerer_2016_Simulation.csv") + HIST <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds") %>% filter(County=='Lincoln') %>% mutate(Percentile="Actual Population") %>% filter(Year>=1940) + YEARS <- min(RES$Year):max(RES$Year) + ######Population + START_POP <- HIST %>% filter(Year==2016) %>% pull(Population) + GRAPH_DATA <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Population),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS) + #Add 2016 as a starting value + GRAPH_DATA <- rbind(GRAPH_DATA[1,],GRAPH_DATA) + GRAPH_DATA[1,] <-t(c(rep(START_POP,ncol(GRAPH_DATA)-1),min(GRAPH_DATA$Year)-1)) + FAN_DATA <- GRAPH_DATA + GRAPH_DATA <- GRAPH_DATA %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Population") + GRAPH_DATA$Percentile <- factor(GRAPH_DATA$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','50%','60%','75%','90%','95%','97.5%'))) + START_POP <- HIST %>% filter(Year==2016) %>% pull(Population) + GRAPH_DATA <- rbind(GRAPH_DATA,GRAPH_DATA %>% filter(Year==2017) %>% mutate(Population=START_POP,Year=2016)) + MEDIAN_PRED <- GRAPH_DATA %>% filter(Percentile=='50%') + ######Migration + GRAPH_DATA_MIGRATION <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Net_Migration),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS) + FAN_DATA_MIGRATION <- GRAPH_DATA_MIGRATION + GRAPH_DATA_MIGRATION <- GRAPH_DATA_MIGRATION %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Migration") + GRAPH_DATA_MIGRATION$Percentile <- factor(GRAPH_DATA_MIGRATION$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%'))) + MEDIAN_PRED_MIGRATION <- GRAPH_DATA_MIGRATION %>% filter(Percentile=='50%') + ######Mortalities + GRAPH_DATA_MORTALITY <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Deaths),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS) + FAN_DATA_MORTALITY <- GRAPH_DATA_MORTALITY + GRAPH_DATA_MORTALITY <- GRAPH_DATA_MORTALITY %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Deaths") + GRAPH_DATA_MORTALITY$Percentile <- factor(GRAPH_DATA_MORTALITY$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%'))) + MEDIAN_PRED_MORTALITY<- GRAPH_DATA_MORTALITY %>% filter(Percentile=='50%') + ######Births + START_BIRTHS <- HIST %>% filter(Year==2016) %>% pull(Births) + + GRAPH_DATA_BIRTHS <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Births),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS) + GRAPH_DATA_BIRTHS <- rbind(GRAPH_DATA_BIRTHS[1,],GRAPH_DATA_BIRTHS) + GRAPH_DATA_BIRTHS[1,] <-t(c(rep(START_BIRTHS,ncol(GRAPH_DATA_BIRTHS)-1),min(GRAPH_DATA_BIRTHS$Year)-1)) + + FAN_DATA_BIRTHS <- GRAPH_DATA_BIRTHS + GRAPH_DATA_BIRTHS<- GRAPH_DATA_BIRTHS %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Births") + GRAPH_DATA_BIRTHS$Percentile <- factor(GRAPH_DATA_BIRTHS$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%'))) + MEDIAN_PRED_BIRTHS<- GRAPH_DATA_BIRTHS %>% filter(Percentile=='50%') + + + + #write_csv(GRAPH_DATA,PERCENTILE_DATA) + #Add historic + MEDIAN_PRED <- GRAPH_DATA %>% filter(Percentile=='50%') + GRAPH_DATA <- GRAPH_DATA %>% filter(Percentile!='50%') + + ALPHA=0.2 + COLOR <- 'black' +#GRAPH <- + nrow(RES)/10 + ggplot(data=GRAPH_DATA)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Population,group=Percentile,color=Percentile))+geom_line(data=HIST,aes(x=Year,y=Population),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED,aes(x=Year,y=Population),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(1940, 2030, by = 5)))+ scale_y_continuous(breaks = seq(0, 35000, by = 250))+theme_bw()+ggtitle("Kemmerer & Diamondville, Population Forecast")+ expand_limits( y = 0) + + ggplot(data=GRAPH_DATA_MIGRATION)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Migration,group=Percentile,color=Percentile))+geom_line(data=HIST %>% filter(Year>=2009),aes(x=Year,y=Migration),color='black',linewidth=0.75) + +geom_line(data=MEDIAN_PRED_MIGRATION,aes(x=Year,y=Migration),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(1940, 2030, by = 5)))+ scale_y_continuous(breaks = seq(0, 35000, by = 250))+theme_bw()+ggtitle("Kemmerer & Diamondville, Migration Forecast")+ expand_limits( y = 0) + + + + + ggplot(data=GRAPH_DATA_MORTALITY)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Deaths,group=Percentile,color=Percentile))+geom_line(data=HIST %>% filter(Year>=2009),aes(x=Year,y=Deaths),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED_MORTALITY,aes(x=Year,y=Deaths),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(2000, 2030, by = 5)))+ scale_y_continuous(breaks = seq(0, 50, by = 5))+theme_bw()+ggtitle("Kemmerer, Wyoming Death Forecast")+ expand_limits( y = 0) + + +ggplot(data=GRAPH_DATA_BIRTHS)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Births,group=Percentile,color=Percentile))+geom_line(data=HIST %>% filter(Year>=2009),aes(x=Year,y=Births),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED_BIRTHS,aes(x=Year,y=Births),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(2000, 2030, by = 5)))+ scale_y_continuous(breaks = seq(0, 200, by = 5))+theme_bw()+ggtitle("Kemmerer, Wyoming Birth Forecast")+ expand_limits( y = 0) + diff --git a/2B_Result_Analysis.r b/2B_Result_Analysis.r new file mode 100644 index 0000000..e5fa984 --- /dev/null +++ b/2B_Result_Analysis.r @@ -0,0 +1,63 @@ +library(tidyverse) +###Process the simulations and save the main percentile results by year + RES <- read_csv("Results/Simulations/Kemmerer_2023_Simulation.csv") + RES[,"Year"] <- RES[,"Year"]+1 + HIST <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds") %>% filter(County=='Lincoln') %>% mutate(Percentile="Actual Population") %>% filter(Year>=1940) + YEARS <- min(RES$Year,na.rm=TRUE):max(RES$Year,na.rm=TRUE) + ######Population + START_POP <- HIST %>% filter(Year==2023) %>% pull(Population) + GRAPH_DATA <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Population),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS) + GRAPH_DATA <- rbind(GRAPH_DATA[1,],GRAPH_DATA) + GRAPH_DATA[1,] <-t(c(rep(START_POP,ncol(GRAPH_DATA)-1),min(GRAPH_DATA$Year)-1)) + FAN_DATA <- GRAPH_DATA + GRAPH_DATA <- GRAPH_DATA %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Population") + GRAPH_DATA$Percentile <- factor(GRAPH_DATA$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','50%','60%','75%','90%','95%','97.5%'))) + START_POP <- HIST %>% filter(Year==2025) %>% pull(Population) + MEDIAN_PRED <- GRAPH_DATA %>% filter(Percentile=='50%') + ######Migration + GRAPH_DATA_MIGRATION <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Net_Migration),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS) + FAN_DATA_MIGRATION <- GRAPH_DATA_MIGRATION + GRAPH_DATA_MIGRATION <- GRAPH_DATA_MIGRATION %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Migration") + GRAPH_DATA_MIGRATION$Percentile <- factor(GRAPH_DATA_MIGRATION$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%'))) + MEDIAN_PRED_MIGRATION <- GRAPH_DATA_MIGRATION %>% filter(Percentile=='50%') + ######Mortalities + GRAPH_DATA_MORTALITY <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Deaths),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS) + FAN_DATA_MORTALITY <- GRAPH_DATA_MORTALITY + GRAPH_DATA_MORTALITY <- GRAPH_DATA_MORTALITY %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Deaths") + GRAPH_DATA_MORTALITY$Percentile <- factor(GRAPH_DATA_MORTALITY$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%'))) + MEDIAN_PRED_MORTALITY<- GRAPH_DATA_MORTALITY %>% filter(Percentile=='50%') + ######Births + START_BIRTHS <- HIST %>% filter(Year==2024) %>% pull(Births) + GRAPH_DATA_BIRTHS <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Births),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS) + GRAPH_DATA_BIRTHS <- rbind(GRAPH_DATA_BIRTHS[1,],GRAPH_DATA_BIRTHS) + GRAPH_DATA_BIRTHS[1,] <-t(c(rep(START_BIRTHS,ncol(GRAPH_DATA_BIRTHS)-1),min(GRAPH_DATA_BIRTHS$Year)-1)) + GRAPH_DATA_BIRTHS$Year <- GRAPH_DATA_BIRTHS$Year+1 + + FAN_DATA_BIRTHS <- GRAPH_DATA_BIRTHS + GRAPH_DATA_BIRTHS<- GRAPH_DATA_BIRTHS %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Births") + GRAPH_DATA_BIRTHS$Percentile <- factor(GRAPH_DATA_BIRTHS$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%'))) + MEDIAN_PRED_BIRTHS<- GRAPH_DATA_BIRTHS %>% filter(Percentile=='50%') + + + + #write_csv(GRAPH_DATA,PERCENTILE_DATA) + #Add historic + MEDIAN_PRED <- GRAPH_DATA %>% filter(Percentile=='50%') + GRAPH_DATA <- GRAPH_DATA %>% filter(Percentile!='50%') + + ALPHA=0.2 + COLOR <- 'black' +#GRAPH <- + nrow(RES)/10 + ggplot(data=GRAPH_DATA)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Population,group=Percentile,color=Percentile))+geom_line(data=HIST,aes(x=Year,y=Population),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED,aes(x=Year,y=Population),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(1940, 2065, by = 5)))+ scale_y_continuous(breaks = seq(0, 35000, by = 250))+theme_bw()+ggtitle("Kemmerer & Diamondville, Population Forecast")+ expand_limits( y = 0) + + ggplot(data=GRAPH_DATA_MIGRATION)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Migration,group=Percentile,color=Percentile))+geom_line(data=HIST %>% filter(Year>=2009),aes(x=Year,y=Migration),color='black',linewidth=0.75) +geom_line(data=MEDIAN_PRED_MIGRATION,aes(x=Year,y=Migration),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(1940, 2065, by = 5)))+ scale_y_continuous(breaks = seq(0, 35000, by = 250))+theme_bw()+ggtitle("Kemmerer & Diamondville, Migration Forecast")+ expand_limits( y = 0) + + + + + ggplot(data=GRAPH_DATA_MORTALITY)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Deaths,group=Percentile,color=Percentile))+geom_line(data=HIST %>% filter(Year>=2009),aes(x=Year,y=Deaths),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED_MORTALITY,aes(x=Year,y=Deaths),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(2000, 2065, by = 5)))+ scale_y_continuous(breaks = seq(0, 50, by = 5))+theme_bw()+ggtitle("Kemmerer, Wyoming Death Forecast")+ expand_limits( y = 0) + + +ggplot(data=GRAPH_DATA_BIRTHS)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Births,group=Percentile,color=Percentile))+geom_line(data=HIST %>% filter(Year>=2009),aes(x=Year,y=Births),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED_BIRTHS,aes(x=Year,y=Births),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(2000, 2065, by = 5)))+ scale_y_continuous(breaks = seq(0, 200, by = 5))+theme_bw()+ggtitle("Kemmerer, Wyoming Birth Forecast")+ expand_limits( y = 0) + diff --git a/2_Result_Analysis.r b/2_Result_Analysis.r index 2a2623c..fb6c507 100644 --- a/2_Result_Analysis.r +++ b/2_Result_Analysis.r @@ -4,10 +4,14 @@ library(tidyverse) HIST <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds") %>% filter(County=='Lincoln') %>% mutate(Percentile="Actual Population") %>% filter(Year>=1940) YEARS <- min(RES$Year):max(RES$Year) ######Population + START_POP <- HIST %>% filter(Year==2016) %>% pull(Population) GRAPH_DATA <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Population),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS) + #Add 2016 as a starting value + GRAPH_DATA <- rbind(GRAPH_DATA[1,],GRAPH_DATA) + GRAPH_DATA[1,] <-t(c(rep(START_POP,ncol(GRAPH_DATA)-1),min(GRAPH_DATA$Year)-1)) FAN_DATA <- GRAPH_DATA GRAPH_DATA <- GRAPH_DATA %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Population") - GRAPH_DATA$Percentile <- factor(GRAPH_DATA$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%'))) + GRAPH_DATA$Percentile <- factor(GRAPH_DATA$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','50%','60%','75%','90%','95%','97.5%'))) START_POP <- HIST %>% filter(Year==2016) %>% pull(Population) GRAPH_DATA <- rbind(GRAPH_DATA,GRAPH_DATA %>% filter(Year==2017) %>% mutate(Population=START_POP,Year=2016)) MEDIAN_PRED <- GRAPH_DATA %>% filter(Percentile=='50%') @@ -17,7 +21,23 @@ library(tidyverse) GRAPH_DATA_MIGRATION <- GRAPH_DATA_MIGRATION %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Migration") GRAPH_DATA_MIGRATION$Percentile <- factor(GRAPH_DATA_MIGRATION$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%'))) MEDIAN_PRED_MIGRATION <- GRAPH_DATA_MIGRATION %>% filter(Percentile=='50%') + ######Mortalities + GRAPH_DATA_MORTALITY <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Deaths),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS) + FAN_DATA_MORTALITY <- GRAPH_DATA_MORTALITY + GRAPH_DATA_MORTALITY <- GRAPH_DATA_MORTALITY %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Deaths") + GRAPH_DATA_MORTALITY$Percentile <- factor(GRAPH_DATA_MORTALITY$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%'))) + MEDIAN_PRED_MORTALITY<- GRAPH_DATA_MORTALITY %>% filter(Percentile=='50%') + ######Births + START_BIRTHS <- HIST %>% filter(Year==2016) %>% pull(Births) + GRAPH_DATA_BIRTHS <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Births),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS) + GRAPH_DATA_BIRTHS <- rbind(GRAPH_DATA_BIRTHS[1,],GRAPH_DATA_BIRTHS) + GRAPH_DATA_BIRTHS[1,] <-t(c(rep(START_BIRTHS,ncol(GRAPH_DATA_BIRTHS)-1),min(GRAPH_DATA_BIRTHS$Year)-1)) + + FAN_DATA_BIRTHS <- GRAPH_DATA_BIRTHS + GRAPH_DATA_BIRTHS<- GRAPH_DATA_BIRTHS %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Births") + GRAPH_DATA_BIRTHS$Percentile <- factor(GRAPH_DATA_BIRTHS$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%'))) + MEDIAN_PRED_BIRTHS<- GRAPH_DATA_BIRTHS %>% filter(Percentile=='50%') @@ -29,7 +49,17 @@ library(tidyverse) ALPHA=0.2 COLOR <- 'black' #GRAPH <- - nrow(RES) -ggplot(data=GRAPH_DATA)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Population,group=Percentile,color=Percentile))+geom_line(data=HIST,aes(x=Year,y=Population),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED,aes(x=Year,y=Population),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(1940, 2070, by = 10)))+ scale_y_continuous(breaks = seq(0, 35000, by = 250))+theme_bw()+ggtitle("Kemmerer & Diamondville, Population Forecast")+ expand_limits( y = 0) -HIST %>% filter(!is.na(Migration)) -ggplot(data=GRAPH_DATA_MIGRATION)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Migration,group=Percentile,color=Percentile))+geom_line(data=HIST %>% filter(Year>=2008),aes(x=Year,y=Migration),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED_MIGRATION,aes(x=Year,y=Migration),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(2010, 2070, by = 10)))+ scale_y_continuous(breaks = seq(-500, 100, by = 100))+theme_bw()+ggtitle("Kemmerer, Wyoming Migration Forecast") + nrow(RES)/10 + ggplot(data=GRAPH_DATA)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Population,group=Percentile,color=Percentile))+geom_line(data=HIST,aes(x=Year,y=Population),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED,aes(x=Year,y=Population),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(1940, 2030, by = 5)))+ scale_y_continuous(breaks = seq(0, 35000, by = 250))+theme_bw()+ggtitle("Kemmerer & Diamondville, Population Forecast")+ expand_limits( y = 0) + + ggplot(data=GRAPH_DATA_MIGRATION)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Migration,group=Percentile,color=Percentile))+geom_line(data=HIST %>% filter(Year>=2009),aes(x=Year,y=Migration),color='black',linewidth=0.75) + +geom_line(data=MEDIAN_PRED_MIGRATION,aes(x=Year,y=Migration),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(1940, 2030, by = 5)))+ scale_y_continuous(breaks = seq(0, 35000, by = 250))+theme_bw()+ggtitle("Kemmerer & Diamondville, Migration Forecast")+ expand_limits( y = 0) + + + + + ggplot(data=GRAPH_DATA_MORTALITY)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Deaths,group=Percentile,color=Percentile))+geom_line(data=HIST %>% filter(Year>=2009),aes(x=Year,y=Deaths),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED_MORTALITY,aes(x=Year,y=Deaths),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(2000, 2030, by = 5)))+ scale_y_continuous(breaks = seq(0, 50, by = 5))+theme_bw()+ggtitle("Kemmerer, Wyoming Death Forecast")+ expand_limits( y = 0) + + +ggplot(data=GRAPH_DATA_BIRTHS)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Births,group=Percentile,color=Percentile))+geom_line(data=HIST %>% filter(Year>=2009),aes(x=Year,y=Births),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED_BIRTHS,aes(x=Year,y=Births),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(2000, 2030, by = 5)))+ scale_y_continuous(breaks = seq(0, 200, by = 5))+theme_bw()+ggtitle("Kemmerer, Wyoming Birth Forecast")+ expand_limits( y = 0) + diff --git a/Data/Raw_Data/Mortality_Rates_Over_Time/README_MORTALITY_DATA.txt b/Data/Raw_Data/Mortality_Rates_Over_Time/README_MORTALITY_DATA.txt index 4c05cce..363364a 100644 --- a/Data/Raw_Data/Mortality_Rates_Over_Time/README_MORTALITY_DATA.txt +++ b/Data/Raw_Data/Mortality_Rates_Over_Time/README_MORTALITY_DATA.txt @@ -10,4 +10,4 @@ Data is manually gathered from CDC WONDER data queries. 4) The world pandemic uncertainty index as collected from FRED which is used to account for pandemics in the regression, making the age time series stationary. These are used to project mortality trends over time. In the case of the age adjusted data, this has local trends that can be compared to the national average. The single age-sex data is only at a national level but can be imparted to local levels as a general trend in the distribution of deaths ---- Run Date: 2025-12-01 16:10:03 --- +--- Run Date: 2025-12-03 14:11:57 --- diff --git a/Scripts/2A_Birth_Rate_Regression_and_Impart_Kemmerer_Births.r b/Scripts/2A_Birth_Rate_Regression_and_Impart_Kemmerer_Births.r index 1af5066..f1da6fe 100644 --- a/Scripts/2A_Birth_Rate_Regression_and_Impart_Kemmerer_Births.r +++ b/Scripts/2A_Birth_Rate_Regression_and_Impart_Kemmerer_Births.r @@ -49,32 +49,32 @@ OTHER_BIRTH_DATA <- OTHER_BIRTH_DATA %>% select(-Births) %>% left_join(ADJUSTED_ REG_DATA <- rbind(COUNTY_BIRTH_DATA,rbind(KEM_BIRTH_DATA,OTHER_BIRTH_DATA) %>% mutate(Deaths=NA,Migration=NA) %>% select(colnames(COUNTY_BIRTH_DATA))) -REG_DATA <- REG_DATA %>% group_by(Region) %>% arrange(Year) %>% mutate(Lag_Births=lag(Births),Lag_Two_Births=lag(Births,2)) %>% ungroup %>% arrange(County,Region,Year) +REG_DATA <- REG_DATA %>% group_by(Region) %>% arrange(Year) %>% mutate(Lag_Births=lag(Births),Lag_Two_Births=lag(Births,2)) %>% ungroup %>% arrange(County,Region,Year) %>% mutate(KEM=ifelse(Region=='Kemmerer & Diamondville',1,0),Region=ifelse(Region=='Kemmerer & Diamondville','Lincoln',Region)) + + +REG_REDUCED_DATA <- REG_DATA %>% filter(!is.na(Births),!is.na(Lag_Two_Births),!is.na(Min_Birth_Group),!is.na(Lag_Births),!is.na(Region)) %>% select(Year,Region,KEM,Min_Birth_Group,Births,Lag_Births,Lag_Two_Births) -REG_REDUCED_DATA <- REG_DATA %>% filter(!is.na(Births),!is.na(Lag_Two_Births),!is.na(Min_Birth_Group),!is.na(Lag_Births),!is.na(Region)) %>% select(Year,Region,Min_Birth_Group,Births,Lag_Births,Lag_Two_Births) ###Predict the number of Births -MOD_BIRTHS <- feols(log(Births)~log(Lag_Births)+log(Lag_Two_Births)+log(Min_Birth_Group)+Year*Region,cluster=~Year+Region, data=REG_REDUCED_DATA ) #Higher AIC but worse acf - +MOD_BIRTHS <- feols(log(Births)~log(Lag_Births)+log(Lag_Two_Births)+log(Min_Birth_Group)+Year*KEM+Year*Region,cluster=~Year+Region, data=REG_REDUCED_DATA,data.save = TRUE) #Higher AIC but worse acf ####Alternate models with different years of data to test to model against a counterfactual -REG_DATA_2016 <- REG_REDUCED_DATA %>% filter(Year<=2016) -MOD_BIRTHS_2016 <- feols(log(Births)~log(Lag_Births)+log(Lag_Two_Births)+log(Min_Birth_Group)+Year*Region,cluster=~Year+Region, data=REG_DATA_2016 ) +REG_DATA_2016 <- REG_REDUCED_DATA %>% filter(Year<=2016) +MOD_BIRTHS_2016 <- feols(log(Births)~log(Lag_Births)+log(Lag_Two_Births)+log(Min_Birth_Group)+KEM+Year*Region,cluster=~Year+Region, data=REG_DATA_2016,data.save = TRUE) REG_DATA_1985 <- REG_REDUCED_DATA %>% filter(Year<=1985) -MOD_BIRTHS_1985 <- feols(log(Births)~log(Lag_Births)+log(Lag_Two_Births)+log(Min_Birth_Group)+Year*Region,cluster=~Year+Region, data=REG_DATA_1985 ) - +MOD_BIRTHS_1985 <- feols(log(Births)~log(Lag_Births)+log(Lag_Two_Births)+log(Min_Birth_Group)+Year*Region,cluster=~Year+Region, data=REG_DATA_1985 ,data.save = TRUE) ###Models to more easily show the results #Easier to read for a LaTex Table -DICT <- c("log(Lag_Births)"="Births Last Years (log)","log(Lag_Two_Births)"="Births Two Years Ago (log)","LN"='Kemmerer Area','log(Births)'='Births (log)','log(Min_Birth_Group)'='Child Rearing Aged Adults (log)','Region'="County" ) - -REG_VIEW_DATA <- REG_REDUCED_DATA %>% mutate(LN=ifelse(Region=='Kemmerer & Diamondville',1,0)) - MOD_VIEW_BIRTHS <- feols(log(Births)~log(Lag_Births)+log(Lag_Two_Births)+log(Min_Birth_Group)+LN*Year+Year*Region|Region,cluster=~Year+Region, data=REG_VIEW_DATA ) #Higher AIC but worse acf +DICT <- c("log(Lag_Births)"="Births Last Years (log)","log(Lag_Two_Births)"="Births Two Years Ago (log)","LN"='Lincoln',"KEM"='Kemmerer Area','log(Births)'='Births (log)','log(Min_Birth_Group)'='Child Rearing Aged Adults (log)','Region'="County" ) +REG_VIEW_DATA <- REG_REDUCED_DATA %>% mutate(KEM=ifelse(Region=='Kemmerer & Diamondville',1,0),LN=ifelse(Region=='Kemmerer & Diamondville' |Region=='Lincoln' ,1,0),Region=ifelse(Region=='Kemmerer & Diamondville' ,'Lincoln',Region)) + MOD_VIEW_BIRTHS <- feols(log(Births)~log(Lag_Births)+log(Lag_Two_Births)+log(Min_Birth_Group)+KEM*Year+Year*Region|Region,cluster=~Year+Region, data=REG_VIEW_DATA ) #Higher AIC but worse acf REG_VIEW_DATA_2016 <- REG_VIEW_DATA %>% filter(Year<=2016) -MOD_VIEW_BIRTHS_2016 <- feols(log(Births)~log(Lag_Births)+log(Lag_Two_Births)+log(Min_Birth_Group)+LN*Year+Year*Region|Region,cluster=~Year+Region, data=REG_VIEW_DATA_2016 ) +REG_DATA_2016 <- REG_REDUCED_DATA %>% filter(Year<=2016) +MOD_VIEW_BIRTHS_2016 <- feols(log(Births)~log(Lag_Births)+log(Lag_Two_Births)+log(Min_Birth_Group)+KEM+LN*Year+Year*Region|Region,cluster=~Year+Region, data=REG_VIEW_DATA_2016 ) +MOD_VIEW_BIRTHS_2016 REG_VIEW_DATA_1985 <- REG_VIEW_DATA%>% filter(Year<=1985) MOD_VIEW_BIRTHS_1985 <- feols(log(Births)~log(Lag_Births)+log(Lag_Two_Births)+log(Min_Birth_Group)+LN*Year+Year*Region|Region,cluster=~Year+Region, data=REG_VIEW_DATA_1985 ) -1 ###Prelim information for fixest table NOTES <- c("Natural log used for all variables besides years, and counties","Kemmerer Area: Includes both Kemmerer and Diamondville","Child Rearing Adults: Minimum of all women aged 18-28 or men aged 18-30.","Kemmerer data from the American Community Survey Data starts in 2009") LAB <- c("Kem.","Kem. Pre 2016","Lincoln Pre 1985") @@ -98,8 +98,6 @@ try(etable(MOD_VIEW_BIRTHS,MOD_VIEW_BIRTHS_2016,MOD_VIEW_BIRTHS_1985,headers=HEA -TEMP <- REG_REDUCED_DATA - TEMP$RESID <- resid(MOD_BIRTHS) #Kemmerer ACF/PACF C_TEMP <- TEMP %>% filter(Region=='Kemmerer & Diamondville') %>% arrange(Year) png(paste0(SAVE_FIG_LOC,"/Kemmerer_ACF.png"), width = 12, height = 8, units = "in", res = 600) @@ -126,8 +124,7 @@ C_TEMP <- TEMP %>% filter(Region=='Lincoln_Other') %>% arrange(Year) dev.off() ####Create data stubs to start a simulation. That is predict the births from this most recent year. Include records from various years of potential interest -ST_REG_DATA <- REG_REDUCED_DATA %>% filter(Region=='Lincoln') %>% filter(Year==max(Year)) %>% rbind(REG_REDUCED_DATA %>% filter(Region=='Kemmerer & Diamondville') %>% filter(Year==max(Year))) %>% rbind(REG_REDUCED_DATA %>% filter(Region=='Lincoln_Other') %>% filter(Year==max(Year))) %>% rbind(REG_REDUCED_DATA %>% filter(Region=='Lincoln') %>% filter(Year==2016)) %>% rbind(REG_REDUCED_DATA %>% filter(Region=='Kemmerer & Diamondville') %>% filter(Year==2016)) %>% rbind(REG_REDUCED_DATA %>% filter(Region=='Lincoln_Other') %>% filter(Year==2016)) %>% rbind(REG_REDUCED_DATA %>% filter(Region=='Lincoln') %>% filter(Year==1985)) - +ST_REG_DATA <- REG_REDUCED_DATA %>% filter(Region=='Lincoln') %>% filter(Year==max(Year)) %>% rbind(REG_REDUCED_DATA %>% filter(Region=='Kemmerer & Diamondville') %>% filter(Year==max(Year))) %>% rbind(REG_REDUCED_DATA %>% filter(Region=='Lincoln_Other') %>% filter(Year==max(Year))) %>% rbind(REG_REDUCED_DATA %>% filter(Region=='Lincoln') %>% filter(Year==2016)) %>% rbind(REG_REDUCED_DATA %>% filter(Region=='Kemmerer & Diamondville') %>% filter(Year==2016)) %>% rbind(REG_REDUCED_DATA %>% filter(Region=='Lincoln_Other') %>% filter(Year==2016)) %>% rbind(REG_REDUCED_DATA %>% filter(Region=='Lincoln') %>% filter(Year==1985)) %>% rbind(REG_REDUCED_DATA %>% filter(KEM==1) %>% filter(Year==max(Year))) if(!exists("SAVE_REG_LOC")){SAVE_REG_LOC <- "Data/Intermediate_Inputs/Birth_Regressions"} dir.create(SAVE_REG_LOC , recursive = TRUE, showWarnings = FALSE) @@ -156,7 +153,7 @@ if(!exists("POPULATION_SAVE_RDS")){POPULATION_SAVE_RDS <- "./Data/Cleaned_Data/P dir.create(POPULATION_SAVE_RDS, recursive = TRUE, showWarnings = FALSE) if(!exists("POPULATION_SAVE_CSV")){POPULATION_SAVE_CSV <- "./Data/Cleaned_Data/Population_Data/CSV/"} - dir.create(POPULATION_SAVE_CSV, recursive = TRUE, showWarnings = FALSE) +dir.create(POPULATION_SAVE_CSV, recursive = TRUE, showWarnings = FALSE) saveRDS(LIN_DATA_NEW,paste0(POPULATION_SAVE_RDS,"Full_Lincoln_County_Population_Data.Rds")) write_csv(LIN_DATA_NEW,paste0(POPULATION_SAVE_CSV,"Full_Lincoln_County_Population_Data.csv")) saveRDS(KEM_DATA_NEW,paste0(POPULATION_SAVE_RDS,"Kemmerer_Diamondville_Population_Data.Rds")) diff --git a/Scripts/2D_Overall_Migration_ARIMA.r b/Scripts/2D_Overall_Migration_ARIMA.r index e61d325..96cf912 100644 --- a/Scripts/2D_Overall_Migration_ARIMA.r +++ b/Scripts/2D_Overall_Migration_ARIMA.r @@ -62,9 +62,12 @@ OTHER_NEW <- LN/LN_ADJ_OTHER OTHER_2016_NEW <- LN_2016/LN_ADJ_OTHER OTHER_1985_NEW <- LN_1985/LN_ADJ_OTHER #Create new models for migration forecasts -MOD_KEM_ADJ <- auto.arima(KEM_NEW ,stationary=TRUE) +MOD_KEM_ADJ <- auto.arima(KEM_NEW ) +MOD_KEM_ADJ_SHIFT <- auto.arima(KEM_NEW +as.numeric(coef(MOD_KEM)["intercept"])) median(simulate(MOD_KEM_ADJ,100000 )) -MOD_KEM_ADJ_2016 <- auto.arima(KEM_2016_NEW ,stationary=TRUE) + +median(abs(simulate(MOD_KEM_ADJ,100000 ))) +#MOD_KEM_ADJ_2016 <- auto.arima(KEM_2016_NEW ,stationary=TRUE) MOD_KEM_ADJ_2016 <- auto.arima(KEM_2016_NEW+as.numeric(coef(MOD_KEM_2016)["intercept"])) MOD_KEM_ADJ_1985 <- auto.arima(KEM_1985_NEW ,stationary=TRUE) MOD_OTHER_ADJ <- auto.arima(OTHER_NEW,stationary=TRUE) @@ -75,6 +78,7 @@ MOD_OTHER_ADJ_1985 <- auto.arima(OTHER_1985_NEW,stationary=TRUE) dir.create(SAVE_LOC_ARIMA_MODELS, recursive = TRUE, showWarnings = FALSE) saveRDS(MOD,paste0(SAVE_LOC_ARIMA_MODELS,"Full_Lincoln_County_Net_Migration_ARIMA.Rds")) saveRDS(MOD_KEM_ADJ,paste0(SAVE_LOC_ARIMA_MODELS,"Kemmerer_Diamondville_Net_Migration_ARIMA.Rds")) +saveRDS(MOD_KEM_ADJ_SHIFT,paste0(SAVE_LOC_ARIMA_MODELS,"Kemmerer_Diamondville_Net_Migration_ARIMA_With_Downward_Shift.Rds")) saveRDS(MOD_OTHER_ADJ,paste0(SAVE_LOC_ARIMA_MODELS,"Other_Lincoln_Net_Migration_ARIMA.Rds")) saveRDS(MOD_2016,paste0(SAVE_LOC_ARIMA_MODELS,"Full_Lincoln_County_Net_Migration_ARIMA_2016.Rds")) diff --git a/Scripts/Load_Custom_Functions/Birth_Simulation_Functions.r b/Scripts/Load_Custom_Functions/Birth_Simulation_Functions.r index 1996233..bdeea8d 100644 --- a/Scripts/Load_Custom_Functions/Birth_Simulation_Functions.r +++ b/Scripts/Load_Custom_Functions/Birth_Simulation_Functions.r @@ -1,6 +1,6 @@ #Births,PREV_BIRTH,PREV_TWO_BIRTH,Min_Birth_Group,Year,County #Uncomment to test the function step by step -#REG_MODEL <- MOD_BIRTHS;REG_DATA <- FIRST_PREDICT_YEAR_POPULATION_DATA;NUM_SIMS=1 +#REG_MODEL <- BIRTH_MOD;REG_DATA <- BIRTH_DATA;NUM_SIMS=1 BIRTH_SIM <- function(REG_MODEL,REG_DATA,NUM_SIMS=1){ C_PREDICT <- predict(REG_MODEL,REG_DATA,interval = "prediction",level=0.95) PRED_MEAN <- C_PREDICT$fit @@ -14,7 +14,5 @@ BIRTH_SIM <- function(REG_MODEL,REG_DATA,NUM_SIMS=1){ #%>% as_tibble #colnames(RES) <- c("Num_Male","Num_Female") #"0" - return(RES) } -