From 9d0cf742d867671ce07e7ed25eff3727af923f42 Mon Sep 17 00:00:00 2001 From: Alex Gebben Work Date: Mon, 8 Dec 2025 17:27:40 -0700 Subject: [PATCH] Fixed some bugs, started run --- 1B_Run_Full_Simulation.r | 45 +++++--- 2B_Result_Analysis.r | 108 ++++++++---------- 2_Result_Analysis.r | 65 ----------- .../README_MORTALITY_DATA.txt | 2 +- Prelim_Process.sh | 2 +- ...s_Data_to_Estimate_Kemmerer_Demographics.r | 2 - Scripts/1E_Process_WONDER_Mortality_Data.r | 5 +- ...ess_WONDER_Single_Age_Sex_Mortality_Data.r | 1 - Scripts/1G_Terra_Power_Migration_Rates.r | 1 + ...te_Regression_and_Impart_Kemmerer_Births.r | 12 +- ...mpart_Deaths_and_Migration_to_Subregions.r | 3 +- Scripts/2C_Migration_by_Age_Regression.r | 2 + ...Current_Demographic_Data_to_Current_Year.r | 5 +- .../Induced_Migration_Functions.r | 9 +- 14 files changed, 92 insertions(+), 170 deletions(-) delete mode 100644 2_Result_Analysis.r diff --git a/1B_Run_Full_Simulation.r b/1B_Run_Full_Simulation.r index 5020c05..0e94067 100644 --- a/1B_Run_Full_Simulation.r +++ b/1B_Run_Full_Simulation.r @@ -16,20 +16,24 @@ source("Scripts/Load_Custom_Functions/Single_Age_Mortality_Trend_Simulation.r") source("Scripts/Load_Custom_Functions/Induced_Migration_Functions.r") #######Preliminary Model Inputs -YEARS_AHEAD <- 43 -ST_YEAR <- 2023 +YEARS_AHEAD <- 41 +ST_YEAR <- 2025 ################################Load Data -DEMO <- readRDS("Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2023_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds") +DEMO <- readRDS("Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2024_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds") BIRTH_MOD <- readRDS("Data/Intermediate_Inputs/Birth_Regressions/Birth_Regression.Rds") #Must add region as a factor with multiple levels for predict to work. Seems to check for multiple levels although that is not needed econometrics. -BIRTH_DATA <- readRDS("Data/Intermediate_Inputs/Birth_Regressions/Regression_Data/Birth_Simulation_Key_Starting_Points.Rds") %>% mutate(Region=factor(Region)) %>% filter(KEM==1,Year==2023) +BIRTH_DATA <- readRDS("Data/Intermediate_Inputs/Birth_Regressions/Regression_Data/Birth_Simulation_Key_Starting_Points.Rds") %>% mutate(Region=factor(Region)) %>% filter(KEM==1,Year==2023) %>% mutate(Year=2024) + MIGRATION_ARIMA <- readRDS("Data/Intermediate_Inputs/Migration_ARIMA_Models/Kemmerer_Diamondville_Net_Migration_ARIMA.Rds") MIGRATION_ODDS <- readRDS("Data/Intermediate_Inputs/Migration_Trends/Migration_Age_Probability_Zero_to_85.Rds") #### OPERATORS <- readRDS("Data/Cleaned_Data/TerraPower_Impact/Operating_Worker_Related_Migration.Rds") CONSTRUCTION <- readRDS("Data/Cleaned_Data/TerraPower_Impact/Construction_Related_Migration.Rds") +OPERATOR_LIN_MIGRATION <- OPERATORS %>% pull("Operator_Emp_Migrated") +CONSTRUCTION_LIN_MIGRATION <- CONSTRUCTION %>% pull("Construction_Emp_Migrated") + INDUCED_MIGRATION_MULTIPLIERS <- readRDS("Data/Cleaned_Data/TerraPower_Impact/Induced_Jobs.Rds") ############## @@ -89,27 +93,24 @@ TOTAL_POP <- sum(DEMO) return(list(DEMO,BIRTH_DATA,c(TOTAL_POP,TOTAL_BIRTHS,TOTAL_DEATHS,TOTAL_MIGRATION))) } -MIGRATION_ARIMA_MODEL <- MIGRATION_ARIMA -SINGLE_SIM <- function(DEMO,BIRTH_DATA,ST_YEAR,YEARS_AHEAD,MIGRATION_ARIMA_MODEL,OPERATOR_TOTAL,CONSTRUCTION_TOTAL,MIGRATION_MULTIPLIERS ){ + MIGRATION_ARIMA_MODEL <- MIGRATION_ARIMA +#DEMO,BIRTH_DATA,ST_YEAR,YEARS_AHEAD,MIGRATION_ARIMA_MODEL,OPERATOR_TOTAL,CONSTRUCTION_TOTAL,MIGRATION_MULTIPLIERS +CONSTRUCTION_MIGRATION <- CONSTRUCTION_LIN_MIGRATION +MIGRATION_MULTIPLIERS <- INDUCED_MIGRATION_MULTIPLIERS +OPERATOR_MIGRATION <- OPERATOR_LIN_MIGRATION +SINGLE_SIM <- function(DEMO,BIRTH_DATA,ST_YEAR,YEARS_AHEAD,MIGRATION_ARIMA_MODEL,OPERATOR_MIGRATION,CONSTRUCTION_MIGRATION,MIGRATION_MULTIPLIERS ){ TERRA_POWER_EFFECT <- rep(0,YEARS_AHEAD) - OPERATOR_MIGRATION <-OPERATOR_TOTAL%>% pull("Operator_Emp_Migrated") - CONSTRUCTION_MIGRATION <- CONSTRUCTION_TOTAL%>% pull("Construction_Emp_Migrated") OPERATOR_MIGRATION <- LOCAL_WORK_ADJ(OPERATOR_MIGRATION ,0.85) #Assume between 85%-100% operators live in Kemmerer CONSTRUCTION_MIGRATION <- LOCAL_WORK_ADJ(CONSTRUCTION_MIGRATION,0.41) #Assume between 41%-100% operators live in Kemmerer - - OPERATOR_MIGRATION <- OPERATOR_MIGRATION %>% pull("Operator_Emp_Migrated") - CONSTRUCTION_MIGRATION <- CONSTRUCTION_MIGRATION %>% pull("Construction_Emp_Migrated") + CONSTRUCTION_MIGRATION[7] <- CONSTRUCTION_MIGRATION[7] - sum(CONSTRUCTION_MIGRATION ) CONSTRUCTION_POPULATION_ADDED <- cumsum(CONSTRUCTION_MIGRATION) - - PERMANENT_TERRAPOWER_MIGRATION <- INDUCED_SIMULATION(CONSTRUCTION_MIGRATION,OPERATOR_MIGRATION,RES)+OPERATOR_MIGRATION + PERMANENT_TERRAPOWER_MIGRATION <- INDUCED_SIMULATION(CONSTRUCTION_MIGRATION,OPERATOR_MIGRATION,MIGRATION_MULTIPLIERS)+OPERATOR_MIGRATION ###############NOTE NEED TO USE THIS AT END TO ADJUST THE RESULTS WHILE LEAVING THE DEMOGRAPHIC MATRIX - TEMP_TERRAPOWER_MIGRATION_<- TERRA_POWER_EFFECT+CONSTRUCTION_MIGRATION - - - MIGRATION_SIM_VALUES <- round(as.vector(simulate(nsim=YEARS_AHEAD,MIGRATION_ARIMA_MODEL)+runif(1,-55,0))+PERMANENT_TERRAPOWER_MIGRATION) + TERRA_POWER_EFFECT[1:7] <- TERRA_POWER_EFFECT[1:7]+CONSTRUCTION_MIGRATION + MIGRATION_SIM_VALUES <- round(as.vector(simulate(nsim=YEARS_AHEAD,MIGRATION_ARIMA_MODEL))+runif(1,-55,0)+TERRA_POWER_EFFECT) #The runif applies a downshift ranging from the historic decline rate all the way to the Lincoln rate applied in the model FINAL_REPORT_VALUES <- matrix(NA,ncol=6,nrow=YEARS_AHEAD) @@ -122,6 +123,8 @@ SINGLE_SIM <- function(DEMO,BIRTH_DATA,ST_YEAR,YEARS_AHEAD,MIGRATION_ARIMA_MODEL BIRTH_DATA <- C_RES[[2]] FINAL_REPORT_VALUES[i,-1] <- c(C_YEAR,C_RES[[3]]) } + FINAL_REPORT_VALUES[1:7,3] <- as.numeric(FINAL_REPORT_VALUES[1:7,3])+CONSTRUCTION_POPULATION_ADDED + FINAL_REPORT_VALUES[1:7,6] <- as.numeric(FINAL_REPORT_VALUES[1:7,6]) +CONSTRUCTION_MIGRATION return(FINAL_REPORT_VALUES) } @@ -135,14 +138,18 @@ SINGLE_SIM <- function(DEMO,BIRTH_DATA,ST_YEAR,YEARS_AHEAD,MIGRATION_ARIMA_MODEL TOTAL_SIMULATIONS <- 10^6 N_RUNS <-ceiling(TOTAL_SIMULATIONS/BATCH_SIZE ) - SIM_RES_FILE <- paste0(RES_SIM_DIR,"Kemmerer_2023_Simulation.csv") + SIM_RES_FILE <- paste0(RES_SIM_DIR,"Kemmerer_2024_Simulation.csv") NEW_RES_FILE <- !file.exists(SIM_RES_FILE) +OPERATOR_LIN_MIGRATION <- OPERATORS %>% pull("Operator_Emp_Migrated") +CONSTRUCTION_LIN_MIGRATION <- CONSTRUCTION %>% pull("Construction_Emp_Migrated") + for(i in 1:N_RUNS){ # MIGRATION_MATRIX <- simulate(nsim=BATCH_SIZE,MIGRATION_ARIMA,n=YEARS_AHEAD) MIGRATION_MATRIX <- do.call(cbind, mclapply(1:BATCH_SIZE,function(x)(as.vector(simulate(nsim=YEARS_AHEAD,MIGRATION_ARIMA) )),mc.cores = detectCores()-1)) # rownames(MIGRATION_MATRIX) <- ST_YEAR:(ST_YEAR+YEARS_AHEAD-1) # colnames(MIGRATION_MATRIX) <- 1:BATCH_SIZE - try(RES <- do.call(rbind,mclapply(1:BATCH_SIZE,function(x){SINGLE_SIM(DEMO,BIRTH_DATA,ST_YEAR,YEARS_AHEAD,MIGRATION_ARIMA)},mc.cores=NCORES))) +#SINGLE_SIM(DEMO,BIRTH_DATA,ST_YEAR,YEARS_AHEAD,MIGRATION_ARIMA,OPERATOR_LIN_MIGRATION,CONSTRUCTION_LIN_MIGRATION,INDUCED_MIGRATION_MULTIPLIERS) + try(RES <- do.call(rbind,mclapply(1:BATCH_SIZE,function(x){SINGLE_SIM(DEMO,BIRTH_DATA,ST_YEAR,YEARS_AHEAD,MIGRATION_ARIMA,OPERATOR_LIN_MIGRATION,CONSTRUCTION_LIN_MIGRATION,INDUCED_MIGRATION_MULTIPLIERS)},mc.cores=NCORES))) if(exists("RES")){ RES <- as.data.frame(RES) RES[,-1] <- as.numeric(as.matrix(RES[,-1])) diff --git a/2B_Result_Analysis.r b/2B_Result_Analysis.r index 7b7836c..e502462 100644 --- a/2B_Result_Analysis.r +++ b/2B_Result_Analysis.r @@ -1,72 +1,56 @@ library(tidyverse) ###Process the simulations and save the main percentile results by year - RES <- read_csv("Results/Simulations/Kemmerer_2023_Simulation.csv") - RES[,"Year"] <- RES[,"Year"]+1 -RES<- RES %>% filter(!is.na(Year)) -RES <- RES %>% filter(!is.na(Population)) + RES <- read_csv("Results/Simulations/Kemmerer_2024_Simulation.csv") + RES[,"Year"] <- RES[,"Year"] + RES<- RES %>% filter(!is.na(Year)) + RES <- RES %>% filter(!is.na(Population)) HIST <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds") %>% filter(County=='Lincoln') %>% mutate(Percentile="Actual Population") %>% filter(Year>=1940) - YEARS <- min(RES$Year,na.rm=TRUE):max(RES$Year,na.rm=TRUE) ######Population - START_POP <- HIST %>% filter(Year==2023) %>% pull(Population) - GRAPH_DATA <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Population),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS) - GRAPH_DATA <- rbind(GRAPH_DATA[1,],GRAPH_DATA) - GRAPH_DATA[1,] <-t(c(rep(START_POP,ncol(GRAPH_DATA)-1),min(GRAPH_DATA$Year)-1)) - FAN_DATA <- GRAPH_DATA - GRAPH_DATA <- GRAPH_DATA %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Population") - GRAPH_DATA$Percentile <- factor(GRAPH_DATA$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','50%','60%','75%','90%','95%','97.5%'))) - START_POP <- HIST %>% filter(Year==2025) %>% pull(Population) - MEDIAN_PRED <- GRAPH_DATA %>% filter(Percentile=='50%') - ######Migration - GRAPH_DATA_MIGRATION <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Net_Migration),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS) - FAN_DATA_MIGRATION <- GRAPH_DATA_MIGRATION - GRAPH_DATA_MIGRATION <- GRAPH_DATA_MIGRATION %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Migration") - GRAPH_DATA_MIGRATION$Percentile <- factor(GRAPH_DATA_MIGRATION$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%'))) - MEDIAN_PRED_MIGRATION <- GRAPH_DATA_MIGRATION %>% filter(Percentile=='50%') - ######Mortalities - GRAPH_DATA_MORTALITY <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Deaths),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS) - FAN_DATA_MORTALITY <- GRAPH_DATA_MORTALITY - GRAPH_DATA_MORTALITY <- GRAPH_DATA_MORTALITY %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Deaths") - GRAPH_DATA_MORTALITY$Percentile <- factor(GRAPH_DATA_MORTALITY$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%'))) - MEDIAN_PRED_MORTALITY<- GRAPH_DATA_MORTALITY %>% filter(Percentile=='50%') - ######Births - START_BIRTHS <- HIST %>% filter(Year==2024) %>% pull(Births) - GRAPH_DATA_BIRTHS <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Births),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS) - GRAPH_DATA_BIRTHS <- rbind(GRAPH_DATA_BIRTHS[1,],GRAPH_DATA_BIRTHS) - GRAPH_DATA_BIRTHS[1,] <-t(c(rep(START_BIRTHS,ncol(GRAPH_DATA_BIRTHS)-1),min(GRAPH_DATA_BIRTHS$Year)-1)) - GRAPH_DATA_BIRTHS$Year <- GRAPH_DATA_BIRTHS$Year+1 + ####Fan New + GET_DATA <- function(RES,COL_NUM){ - FAN_DATA_BIRTHS <- GRAPH_DATA_BIRTHS - GRAPH_DATA_BIRTHS<- GRAPH_DATA_BIRTHS %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Births") - GRAPH_DATA_BIRTHS$Percentile <- factor(GRAPH_DATA_BIRTHS$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%'))) - MEDIAN_PRED_BIRTHS<- GRAPH_DATA_BIRTHS %>% filter(Percentile=='50%') + YEARS <- min(RES$Year,na.rm=TRUE):max(RES$Year,na.rm=TRUE) + FAN_DATA <- do.call(rbind,lapply(YEARS,function(x){quantile(as.numeric(t((RES %>% filter(Year==x))[,COL_NUM])),seq(0.01,0.99,by=0.01))})) %>% as_tibble %>% mutate(Year=YEARS) + FAN_DATA <- rbind(FAN_DATA[1,],FAN_DATA) + START_VALUE <- (HIST %>% filter(Year==2024))[,COL_NUM+1] %>% as.numeric + FAN_DATA <- FAN_DATA %>% pivot_longer(colnames(FAN_DATA %>% select(-Year)),names_to="Percentile") %>% filter(Year>2024) %>% unique + NUM_YEARS <- length(unique(FAN_DATA$Year) ) + FAN_DATA$Group <- rep(c(1:49,0,rev(1:49)),NUM_YEARS) + FAN_DATA <- FAN_DATA %>% group_by(Year,Group) %>% summarize(MIN=min(value),MAX=max(value)) + TEMP <- FAN_DATA %>% filter(Year==2025) %>% mutate(Year=2024) %>% ungroup + TEMP[,3:4] <- START_VALUE + FAN_DATA <- rbind(TEMP,FAN_DATA %>% ungroup) %>% as_tibble + return(FAN_DATA) + } + RES %>% pull(Sim_UUID) %>% unique %>% length() +MAKE_GRAPH <- function(GRAPH_DATA,ALPHA=0.03,COLOR='cadetblue',LINE_WIDTH=1){ + PLOT <- ggplot(data=GRAPH_DATA) + for(i in 1:49){ + C_DATA <- GRAPH_DATA%>% filter(Group==i) + PLOT <- PLOT +geom_ribbon(data=C_DATA,aes(x=Year,ymin=MIN,ymax=MAX),alpha=ALPHA,fill=COLOR) +} + CI_90 <- rbind(GRAPH_DATA%>% filter(Group==20) %>% mutate(Interval='80%'),GRAPH_DATA%>% filter(Group==5) %>% mutate(Interval='95%'),GRAPH_DATA%>% filter(Group==0) %>% mutate(Interval='Median Prediction')) + PLOT <- PLOT+geom_line(aes(x=Year,y=MIN,linetype=Interval,color=Interval),linewidth=LINE_WIDTH, data=CI_90)+geom_line(aes(x=Year,y=MAX,group=Interval,linetype=Interval,color=Interval),linewidth=LINE_WIDTH ,data=CI_90)+scale_color_manual(values=c("grey50","grey80","black"))+scale_linetype_manual(values = c("solid","solid","dotdash")) + return(PLOT) +} +POP_DATA <- GET_DATA(RES,3) +POP_PLOT <- MAKE_GRAPH(POP_DATA) +POP_PLOT <- POP_PLOT+geom_line(data=HIST,aes(x=Year,y=Population),color='black',linewidth=0.75)+ scale_x_continuous(breaks = c(seq(1940, 2060, by = 10),2065))+ scale_y_continuous(breaks = seq(0, 35000, by = 500))+ggtitle("Kemmerer & Diamondville, Population Forecast")+ expand_limits( y = 0)+labs(color = "Prediction Interval",linetype="Prediction Interval",y="Population")+ theme_bw()+ theme(legend.position = "top",panel.grid.minor = element_blank()) +POP_PLOT +BIRTH_DATA <- GET_DATA(RES,4) +BIRTH_PLOT <- MAKE_GRAPH(BIRTH_DATA) +BIRTH_PLOT <- BIRTH_PLOT+geom_line(data=HIST,aes(x=Year,y=Births),color='black',linewidth=0.75)+ scale_x_continuous(breaks = c(seq(2010, 2060, by = 10),2065),limits=c(2009,2065))+ scale_y_continuous(breaks = seq(0, 35000, by = 10))+ggtitle("Kemmerer & Diamondville, Birth Forecast")+ expand_limits( y = 0)+labs(color = "Prediction Interval",linetype="Prediction Interval",y="Births")+ theme_bw()+ theme(legend.position = "top",panel.grid.minor = element_blank()) +BIRTH_PLOT - #write_csv(GRAPH_DATA,PERCENTILE_DATA) - #Add historic - MEDIAN_PRED <- GRAPH_DATA %>% filter(Percentile=='50%') - GRAPH_DATA <- GRAPH_DATA %>% filter(Percentile!='50%') +DEATH_DATA <- GET_DATA(RES,5) %>% filter(!is.na(MIN)) +DEATH_PLOT <- MAKE_GRAPH(DEATH_DATA) +DEATH_PLOT <- DEATH_PLOT+geom_line(data=HIST,aes(x=Year,y=Deaths),color='black',linewidth=0.75)+ scale_x_continuous(breaks = c(seq(2010, 2060, by = 10),2065),limits=c(2009,2065))+ scale_y_continuous(breaks = seq(0, 35000, by = 10))+ggtitle("Kemmerer & Diamondville, Mortality Forecast")+ expand_limits( y = 0)+labs(color = "Prediction Interval",linetype="Prediction Interval",y="Deaths")+ theme_bw()+ theme(legend.position = "top",panel.grid.minor = element_blank()) +DEATH_PLOT - ALPHA=0.2 - COLOR <- 'black' -#GRAPH <- - nrow(RES)/40 -POP_PLOT <- ggplot(data=GRAPH_DATA)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Population,group=Percentile,color=Percentile))+geom_line(data=HIST,aes(x=Year,y=Population),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED,aes(x=Year,y=Population),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(1940, 2065, by = 5)))+ scale_y_continuous(breaks = seq(0, 35000, by = 250))+theme_bw()+ggtitle("Kemmerer & Diamondville, Population Forecast")+ expand_limits( y = 0) -POP_PLOT -ggsave("Kemmerer_Population_Fan_Chart.png",POP_PLOT ) - -MIGRATION_PLOT <- ggplot(data=GRAPH_DATA_MIGRATION)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Migration,group=Percentile,color=Percentile))+geom_line(data=HIST %>% filter(Year>=2009),aes(x=Year,y=Migration),color='black',linewidth=0.75) +geom_line(data=MEDIAN_PRED_MIGRATION,aes(x=Year,y=Migration),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(1940, 2065, by = 5)))+ scale_y_continuous(breaks = seq(-350, 350, by = 10))+theme_bw()+ggtitle("Kemmerer & Diamondville, Migration Forecast")+ expand_limits( y = 0) -MIGRATION_PLOT -ggsave("Kemmerer_Net_Migration_Fan_Chart.png",MIGRATION_PLOT) - - - -DEATHS_PLOT <- ggplot(data=GRAPH_DATA_MORTALITY)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Deaths,group=Percentile,color=Percentile))+geom_line(data=HIST %>% filter(Year>=2009),aes(x=Year,y=Deaths),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED_MORTALITY,aes(x=Year,y=Deaths),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(2000, 2065, by = 5)))+ scale_y_continuous(breaks = seq(0, 50, by = 5))+theme_bw()+ggtitle("Kemmerer, Wyoming Death Forecast")+ expand_limits( y = 0) -DEATHS_PLOT -ggsave("Kemmerer_Mortality_Fan_Chart.png",DEATHS_PLOT ) -2025-1940 - - -BIRTH_PLOT <- ggplot(data=GRAPH_DATA_BIRTHS)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Births,group=Percentile,color=Percentile))+geom_line(data=HIST %>% filter(Year>=2009),aes(x=Year,y=Births),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED_BIRTHS,aes(x=Year,y=Births),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(2000, 2065, by = 5)))+ scale_y_continuous(breaks = seq(0, 200, by = 5))+theme_bw()+ggtitle("Kemmerer, Wyoming Birth Forecast")+ expand_limits( y = 0) -ggsave("Kemmerer_Births_Fan_Chart.png",BIRTH_PLOT ) +MIGRATION_DATA <- GET_DATA(RES,6) %>% filter(!is.na(MIN)) +MIGRATION_PLOT <- MAKE_GRAPH(MIGRATION_DATA) +MIGRATION_PLOT <- MIGRATION_PLOT+geom_line(data=HIST,aes(x=Year,y=Migration),color='black',linewidth=0.75)+ scale_x_continuous(breaks = c(seq(2010, 2060, by = 10),2065),limits=c(2009,2065))+ scale_y_continuous(breaks = seq(-1000, 1000, by = 50))+ggtitle("Kemmerer & Diamondville, Net Migration Forecast")+ expand_limits( y = 0)+labs(color = "Prediction Interval",linetype="Prediction Interval",y="Migration")+ theme_bw()+ theme(legend.position = "top",panel.grid.minor = element_blank()) +MIGRATION_PLOT diff --git a/2_Result_Analysis.r b/2_Result_Analysis.r deleted file mode 100644 index fb6c507..0000000 --- a/2_Result_Analysis.r +++ /dev/null @@ -1,65 +0,0 @@ -library(tidyverse) -###Process the simulations and save the main percentile results by year - RES <- read_csv("Results/Simulations/Kemmerer_2016_Simulation.csv") - HIST <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds") %>% filter(County=='Lincoln') %>% mutate(Percentile="Actual Population") %>% filter(Year>=1940) - YEARS <- min(RES$Year):max(RES$Year) - ######Population - START_POP <- HIST %>% filter(Year==2016) %>% pull(Population) - GRAPH_DATA <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Population),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS) - #Add 2016 as a starting value - GRAPH_DATA <- rbind(GRAPH_DATA[1,],GRAPH_DATA) - GRAPH_DATA[1,] <-t(c(rep(START_POP,ncol(GRAPH_DATA)-1),min(GRAPH_DATA$Year)-1)) - FAN_DATA <- GRAPH_DATA - GRAPH_DATA <- GRAPH_DATA %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Population") - GRAPH_DATA$Percentile <- factor(GRAPH_DATA$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','50%','60%','75%','90%','95%','97.5%'))) - START_POP <- HIST %>% filter(Year==2016) %>% pull(Population) - GRAPH_DATA <- rbind(GRAPH_DATA,GRAPH_DATA %>% filter(Year==2017) %>% mutate(Population=START_POP,Year=2016)) - MEDIAN_PRED <- GRAPH_DATA %>% filter(Percentile=='50%') - ######Migration - GRAPH_DATA_MIGRATION <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Net_Migration),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS) - FAN_DATA_MIGRATION <- GRAPH_DATA_MIGRATION - GRAPH_DATA_MIGRATION <- GRAPH_DATA_MIGRATION %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Migration") - GRAPH_DATA_MIGRATION$Percentile <- factor(GRAPH_DATA_MIGRATION$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%'))) - MEDIAN_PRED_MIGRATION <- GRAPH_DATA_MIGRATION %>% filter(Percentile=='50%') - ######Mortalities - GRAPH_DATA_MORTALITY <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Deaths),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS) - FAN_DATA_MORTALITY <- GRAPH_DATA_MORTALITY - GRAPH_DATA_MORTALITY <- GRAPH_DATA_MORTALITY %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Deaths") - GRAPH_DATA_MORTALITY$Percentile <- factor(GRAPH_DATA_MORTALITY$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%'))) - MEDIAN_PRED_MORTALITY<- GRAPH_DATA_MORTALITY %>% filter(Percentile=='50%') - ######Births - START_BIRTHS <- HIST %>% filter(Year==2016) %>% pull(Births) - - GRAPH_DATA_BIRTHS <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Births),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS) - GRAPH_DATA_BIRTHS <- rbind(GRAPH_DATA_BIRTHS[1,],GRAPH_DATA_BIRTHS) - GRAPH_DATA_BIRTHS[1,] <-t(c(rep(START_BIRTHS,ncol(GRAPH_DATA_BIRTHS)-1),min(GRAPH_DATA_BIRTHS$Year)-1)) - - FAN_DATA_BIRTHS <- GRAPH_DATA_BIRTHS - GRAPH_DATA_BIRTHS<- GRAPH_DATA_BIRTHS %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Births") - GRAPH_DATA_BIRTHS$Percentile <- factor(GRAPH_DATA_BIRTHS$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%'))) - MEDIAN_PRED_BIRTHS<- GRAPH_DATA_BIRTHS %>% filter(Percentile=='50%') - - - - #write_csv(GRAPH_DATA,PERCENTILE_DATA) - #Add historic - MEDIAN_PRED <- GRAPH_DATA %>% filter(Percentile=='50%') - GRAPH_DATA <- GRAPH_DATA %>% filter(Percentile!='50%') - - ALPHA=0.2 - COLOR <- 'black' -#GRAPH <- - nrow(RES)/10 - ggplot(data=GRAPH_DATA)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Population,group=Percentile,color=Percentile))+geom_line(data=HIST,aes(x=Year,y=Population),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED,aes(x=Year,y=Population),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(1940, 2030, by = 5)))+ scale_y_continuous(breaks = seq(0, 35000, by = 250))+theme_bw()+ggtitle("Kemmerer & Diamondville, Population Forecast")+ expand_limits( y = 0) - - ggplot(data=GRAPH_DATA_MIGRATION)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Migration,group=Percentile,color=Percentile))+geom_line(data=HIST %>% filter(Year>=2009),aes(x=Year,y=Migration),color='black',linewidth=0.75) - +geom_line(data=MEDIAN_PRED_MIGRATION,aes(x=Year,y=Migration),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(1940, 2030, by = 5)))+ scale_y_continuous(breaks = seq(0, 35000, by = 250))+theme_bw()+ggtitle("Kemmerer & Diamondville, Migration Forecast")+ expand_limits( y = 0) - - - - - ggplot(data=GRAPH_DATA_MORTALITY)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MORTALITY,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Deaths,group=Percentile,color=Percentile))+geom_line(data=HIST %>% filter(Year>=2009),aes(x=Year,y=Deaths),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED_MORTALITY,aes(x=Year,y=Deaths),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(2000, 2030, by = 5)))+ scale_y_continuous(breaks = seq(0, 50, by = 5))+theme_bw()+ggtitle("Kemmerer, Wyoming Death Forecast")+ expand_limits( y = 0) - - -ggplot(data=GRAPH_DATA_BIRTHS)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_BIRTHS,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Births,group=Percentile,color=Percentile))+geom_line(data=HIST %>% filter(Year>=2009),aes(x=Year,y=Births),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED_BIRTHS,aes(x=Year,y=Births),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(2000, 2030, by = 5)))+ scale_y_continuous(breaks = seq(0, 200, by = 5))+theme_bw()+ggtitle("Kemmerer, Wyoming Birth Forecast")+ expand_limits( y = 0) - diff --git a/Data/Raw_Data/Mortality_Rates_Over_Time/README_MORTALITY_DATA.txt b/Data/Raw_Data/Mortality_Rates_Over_Time/README_MORTALITY_DATA.txt index b83acce..6ec6db2 100644 --- a/Data/Raw_Data/Mortality_Rates_Over_Time/README_MORTALITY_DATA.txt +++ b/Data/Raw_Data/Mortality_Rates_Over_Time/README_MORTALITY_DATA.txt @@ -10,4 +10,4 @@ Data is manually gathered from CDC WONDER data queries. 4) The world pandemic uncertainty index as collected from FRED which is used to account for pandemics in the regression, making the age time series stationary. These are used to project mortality trends over time. In the case of the age adjusted data, this has local trends that can be compared to the national average. The single age-sex data is only at a national level but can be imparted to local levels as a general trend in the distribution of deaths ---- Run Date: 2025-12-05 19:32:50 --- +--- Run Date: 2025-12-08 13:10:13 --- diff --git a/Prelim_Process.sh b/Prelim_Process.sh index 845bccc..05cf832 100644 --- a/Prelim_Process.sh +++ b/Prelim_Process.sh @@ -13,8 +13,8 @@ Rscript "./Scripts/2A_Birth_Rate_Regression_and_Impart_Kemmerer_Births.r" Rscript "./Scripts/2B_Impart_Deaths_and_Migration_to_Subregions.r" Rscript "./Scripts/2C_Migration_by_Age_Regression.r" Rscript "./Scripts/2D_Overall_Migration_ARIMA.r" -Rscript "./Scripts/2E_Move_Current_Demographic_Data_to_Current_Year.r" Rscript "./Scripts/2F_Create_Age_Sex_Specfic_Mortality_Trends.r" Rscript "./Scripts/2G_Single_Age_Sex_ARIMA_Models.r" +Rscript "./Scripts/2E_Move_Current_Demographic_Data_to_Current_Year.r" #Produce final results for either the simulation, or information for the report, but not anything used in later stages of the simulation Rscript "./Scripts/3A_Population_Pyramid.r" diff --git a/Scripts/1D_Use_ACS_Census_Data_to_Estimate_Kemmerer_Demographics.r b/Scripts/1D_Use_ACS_Census_Data_to_Estimate_Kemmerer_Demographics.r index f0e95c2..f231d84 100644 --- a/Scripts/1D_Use_ACS_Census_Data_to_Estimate_Kemmerer_Demographics.r +++ b/Scripts/1D_Use_ACS_Census_Data_to_Estimate_Kemmerer_Demographics.r @@ -57,8 +57,6 @@ rownames(OTHER_LIN_DEM_OLD_MAT ) <- 0:85 - - ####Save results CSV_SAVE <- paste0(SAVE_DEMO_LOC,"/CSV") RDS_SAVE <- paste0(SAVE_DEMO_LOC,"/RDS") diff --git a/Scripts/1E_Process_WONDER_Mortality_Data.r b/Scripts/1E_Process_WONDER_Mortality_Data.r index 329b43a..47066e7 100644 --- a/Scripts/1E_Process_WONDER_Mortality_Data.r +++ b/Scripts/1E_Process_WONDER_Mortality_Data.r @@ -23,15 +23,16 @@ sink() #Load data files which were generated using Wonder Data set manually LIN_1979 <- read_csv(paste0(DATA_LOC_RAW,"Lincoln_Age_Adjusted_1979-1998.csv")) %>% select(Year,Sex,Mort_Rate=`Age Adjusted Rate`)%>% mutate(Region='Lincoln') + WY_1979 <- read_csv(paste0(DATA_LOC_RAW,"Wyoming_Age_Adjusted_1979-1998.csv")) %>% select(Year,Sex,Mort_Rate=`Age Adjusted Rate`)%>% mutate(Region='Wyoming') US_1979 <- read_csv(paste0(DATA_LOC_RAW,"US_Age_Adjusted_1979-1998.csv")) %>% select(Year,Sex,Mort_Rate=`Age Adjusted Rate`) %>% filter(!is.na(Sex),!is.na(Year)) %>% mutate(Region="US")%>% filter(Year<2018) LIN_1999 <- read_csv(paste0(DATA_LOC_RAW,"Lincoln_Age_Adjusted_1999-2020.csv")) %>% select(Year,Sex,Mort_Rate=`Age Adjusted Rate`)%>% mutate(Region='Lincoln') WY_1999 <- read_csv(paste0(DATA_LOC_RAW,"Wyoming_Age_Adjusted_1999-2020.csv")) %>% select(Year,Sex,Mort_Rate=`Age Adjusted Rate`)%>% mutate(Region='Wyoming') %>% filter(Year<2018) US_1999 <- read_csv(paste0(DATA_LOC_RAW,"US_Age_Adjusted_1999-2020.csv"))%>% select(Year,Sex,Mort_Rate=`Age Adjusted Rate`) %>% filter(!is.na(Sex),!is.na(Year)) %>% mutate(Region="US")%>% filter(Year<2018) - WY_2018 <- read_csv(paste0(DATA_LOC_RAW,"Wyoming_Age_Adjusted_2018-2023.csv")) %>% select(Year,Sex,Mort_Rate=`Age Adjusted Rate`) %>% mutate(Region='Wyoming') US_2018 <- read_csv(paste0(DATA_LOC_RAW,"US_Age_Adjusted_2018-2023.csv"))%>% select(Year,Sex,Mort_Rate=`Age Adjusted Rate`) %>% mutate(Region="US") + ##No adjustment for later data allowed in WONDER, so applying an average value for use in the graphs as a reasonable assumption. LIN_2018<- read_csv(paste0(DATA_LOC_RAW,"Lincoln_Not_Age_Adjusted_2018-2023.csv")) %>% select(Year,Sex,Mort_Rate=`Crude Rate`)%>% mutate(Region='Lincoln') ADJUST_TERM <- LIN_2018 %>% rename(UNADJUSTED=Mort_Rate) %>% inner_join(LIN_1999) %>% filter(!is.na(Year)) %>% mutate(Ratio=Mort_Rate/UNADJUSTED) %>% group_by(Sex) %>% summarize(Ratio=mean(Ratio)) @@ -40,7 +41,6 @@ DF <- rbind(LIN_1999,LIN_2018,WY_1999,US_1999,US_2018,WY_2018,WY_1979,US_1979,LI DF <- DF %>% ungroup%>% group_by(Sex,Region) %>% arrange(Sex,Region,Year) %>% mutate(L_Mort_Rate=lag(Mort_Rate)) %>% ungroup #Make a regression table which includes lats of mortality rate for a stationary time series, and pivot such that each year is it's own row with each area of interest (US, Wyoming, Lincoln) REG_DATA <- DF %>% pivot_wider(values_from=c(Mort_Rate,L_Mort_Rate),names_from=Region) - ##Pull the World Pandemic Index from FRED for use in the regression data PANDIMIC_INDEX <- read_csv("https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23ebf3fb&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=off&txtcolor=%23444444&ts=12&tts=12&width=1320&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=WUPI&scale=left&cosd=1996-01-01&coed=2025-07-01&line_color=%230073e6&link_values=false&line_style=solid&mark_type=none&mw=3&lw=3&ost=-99999&oet=99999&mma=0&fml=a&fq=Annual&fam=sum&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2025-11-20&revision_date=2025-11-20&nd=1996-01-01") %>% mutate(Year=year(observation_date)) %>% select(Year,WUPI)%>% mutate(L_WUPI=lag(WUPI),L_TWO_WUPI=lag(WUPI,2)) REG_DATA <- REG_DATA %>% left_join(PANDIMIC_INDEX) %>% mutate(WUPI=ifelse(is.na(WUPI),0,WUPI),L_WUPI=ifelse(is.na(L_WUPI),0,L_WUPI)) @@ -59,7 +59,6 @@ write_csv(DF,paste0(DATA_SAVE_LOC_RDS,"Long_Mortality_Rate_Data.csv" )) #Data combined and cleaned for regression save saveRDS(REG_DATA,paste0(DATA_SAVE_LOC_RDS,"Mortality_Rate_and_Pandemic_Data_for_Regression.Rds" )) write_csv(REG_DATA,paste0(DATA_SAVE_LOC_CSV,"Mortality_Rate_and_Pandemic_Data_for_Regression.csv" )) - run_datetime <- format(Sys.time(), "%Y-%m-%d %H:%M:%S") sink(file=paste0(DATA_LOC_RAW,"README_MORTALITY_DATA.txt"),append=TRUE) cat(paste0("\n--- Run Date: ", run_datetime, " ---\n")) diff --git a/Scripts/1F_Process_WONDER_Single_Age_Sex_Mortality_Data.r b/Scripts/1F_Process_WONDER_Single_Age_Sex_Mortality_Data.r index 00b04c2..5f1d354 100644 --- a/Scripts/1F_Process_WONDER_Single_Age_Sex_Mortality_Data.r +++ b/Scripts/1F_Process_WONDER_Single_Age_Sex_Mortality_Data.r @@ -34,4 +34,3 @@ write_csv(DF,paste0(DATA_SAVE_LOC_RDS,"Single_Sex_Age_US_Mortality_Rate_Data_Lon saveRDS(REG_DATA,paste0(DATA_SAVE_LOC_RDS,"Single_Sex_Age_US_Mortality_Rate_Data_Wide.Rds" )) write_csv(REG_DATA,paste0(DATA_SAVE_LOC_RDS,"Single_Sex_Age_US_Mortality_Rate_Data_Wide.csv" )) - diff --git a/Scripts/1G_Terra_Power_Migration_Rates.r b/Scripts/1G_Terra_Power_Migration_Rates.r index 23806d4..78b85ea 100644 --- a/Scripts/1G_Terra_Power_Migration_Rates.r +++ b/Scripts/1G_Terra_Power_Migration_Rates.r @@ -80,3 +80,4 @@ RES*POP_WORK_RATIO #Total family included migration, converted per person (rathe RES$Job_Type <- c("Construction","Operator") RES <- RES[,c(3,1:2)] saveRDS(RES,paste0(SAVE_LOC,"Induced_Jobs.Rds")) + diff --git a/Scripts/2A_Birth_Rate_Regression_and_Impart_Kemmerer_Births.r b/Scripts/2A_Birth_Rate_Regression_and_Impart_Kemmerer_Births.r index 9ede6ad..c50f905 100644 --- a/Scripts/2A_Birth_Rate_Regression_and_Impart_Kemmerer_Births.r +++ b/Scripts/2A_Birth_Rate_Regression_and_Impart_Kemmerer_Births.r @@ -68,11 +68,10 @@ MOD_BIRTHS_1985 <- feols(log(Births)~log(Lag_Births)+log(Lag_Two_Births)+log(Min #Easier to read for a LaTex Table DICT <- c("log(Lag_Births)"="Births Last Years (log)","log(Lag_Two_Births)"="Births Two Years Ago (log)","LN"='Lincoln',"KEM"='Kemmerer Area','log(Births)'='Births (log)','log(Min_Birth_Group)'='Child Rearing Aged Adults (log)','Region'="County" ) REG_VIEW_DATA <- REG_REDUCED_DATA %>% mutate(KEM=ifelse(Region=='Kemmerer & Diamondville',1,0),LN=ifelse(Region=='Kemmerer & Diamondville' |Region=='Lincoln' ,1,0),Region=ifelse(Region=='Kemmerer & Diamondville' ,'Lincoln',Region)) - MOD_VIEW_BIRTHS <- feols(log(Births)~log(Lag_Births)+log(Lag_Two_Births)+log(Min_Birth_Group)+KEM*Year+Year*Region|Region,cluster=~Year+Region, data=REG_VIEW_DATA ) #Higher AIC but worse acf +MOD_VIEW_BIRTHS <- feols(log(Births)~log(Lag_Births)+log(Lag_Two_Births)+log(Min_Birth_Group)+KEM*Year+Year*Region|Region,cluster=~Year+Region, data=REG_VIEW_DATA ) #Higher AIC but worse acf REG_VIEW_DATA_2016 <- REG_VIEW_DATA %>% filter(Year<=2016) REG_DATA_2016 <- REG_REDUCED_DATA %>% filter(Year<=2016) MOD_VIEW_BIRTHS_2016 <- feols(log(Births)~log(Lag_Births)+log(Lag_Two_Births)+log(Min_Birth_Group)+KEM+LN*Year+Year*Region|Region,cluster=~Year+Region, data=REG_VIEW_DATA_2016 ) -MOD_VIEW_BIRTHS_2016 REG_VIEW_DATA_1985 <- REG_VIEW_DATA%>% filter(Year<=1985) MOD_VIEW_BIRTHS_1985 <- feols(log(Births)~log(Lag_Births)+log(Lag_Two_Births)+log(Min_Birth_Group)+LN*Year+Year*Region|Region,cluster=~Year+Region, data=REG_VIEW_DATA_1985 ) ###Prelim information for fixest table @@ -142,15 +141,14 @@ ST_REG_DATA <- REG_REDUCED_DATA %>% filter(Region=='Lincoln') %>% filter(Year==m write_csv(REG_REDUCED_DATA,paste0(SAVE_DATA_LOC,"/Birth_Regression_Input_Data.csv")) ##########Update existing demographic data with new births -KEM_DATA_NEW <- REG_DATA %>% filter(Region=='Kemmerer & Diamondville') %>% select(Year,County,Region,Population,Births,Deaths,Migration) -PRED_KEM_BIRTH_2024 <- REG_DATA %>% filter(Region=='Kemmerer & Diamondville',Year>=2023) %>% mutate(Min_Birth_Group=lag(Min_Birth_Group)) %>% filter(Year==2024) +KEM_DATA_NEW <- REG_DATA %>% filter(KEM==1) %>% select(Year,County,Region,Population,Births,Deaths,Migration) %>% mutate(Region='Kemmerer & Diamondville') +PRED_KEM_BIRTH_2024 <- REG_DATA %>% filter(KEM==1,Year>=2023) %>% mutate(Min_Birth_Group=lag(Min_Birth_Group)) %>% filter(Year==2024) KEM_DATA_NEW[KEM_DATA_NEW$Year==2024,"Births"] <- round(exp(predict(MOD_BIRTHS,PRED_KEM_BIRTH_2024))) - OTHER_DATA_NEW <- REG_DATA %>% filter(Region=='Lincoln_Other') %>% select(Year,County,Region,Population,Births,Deaths,Migration) PRED_OTHER_BIRTH_2024 <- REG_DATA %>% filter(Region=='Lincoln_Other',Year>=2023) %>% mutate(Min_Birth_Group=lag(Min_Birth_Group)) %>% filter(Year==2024) OTHER_DATA_NEW[OTHER_DATA_NEW$Year==2024,"Births"] <- round(exp(predict(MOD_BIRTHS,PRED_OTHER_BIRTH_2024))) -LIN_DATA_NEW <- REG_DATA %>% filter(Region=='Lincoln') %>% select(Year,County,Region,Population,Births,Deaths,Migration) +LIN_DATA_NEW <- REG_DATA %>% filter(Region=='Lincoln',KEM==0) %>% select(Year,County,Region,Population,Births,Deaths,Migration) # if(!exists("POPULATION_SAVE_RDS")){POPULATION_SAVE_RDS <- "./Data/Cleaned_Data/Population_Data/RDS/"} dir.create(POPULATION_SAVE_RDS, recursive = TRUE, showWarnings = FALSE) @@ -163,5 +161,5 @@ saveRDS(KEM_DATA_NEW,paste0(POPULATION_SAVE_RDS,"Kemmerer_Diamondville_Populatio write_csv(KEM_DATA_NEW,paste0(POPULATION_SAVE_CSV,"Kemmerer_Diamondville_Population_Data.csv")) saveRDS(OTHER_DATA_NEW,paste0(POPULATION_SAVE_RDS,"Other_Lincoln_Population_Data.Rds")) write_csv(OTHER_DATA_NEW,paste0(POPULATION_SAVE_CSV,"Other_Lincoln_Population_Data.csv")) - +print("Done") diff --git a/Scripts/2B_Impart_Deaths_and_Migration_to_Subregions.r b/Scripts/2B_Impart_Deaths_and_Migration_to_Subregions.r index 2084d63..125daee 100644 --- a/Scripts/2B_Impart_Deaths_and_Migration_to_Subregions.r +++ b/Scripts/2B_Impart_Deaths_and_Migration_to_Subregions.r @@ -15,8 +15,7 @@ MORTALITY <- MORT PRED_DEATHS <- function(DATA,MORTALITY){ MORTALITY <- MORTALITY%>% pivot_wider(values_from=Death_Rate,names_from=Sex) %>% group_by(County,Min_Age,Max_Age) %>% summarize(Rate_Male=mean(Male,na.rm=TRUE),Rate_Female=mean(Female,na.rm=TRUE)) %>% ungroup RES <- DATA[DATA$Age >= MORTALITY$Min_Age[1] & MORTALITY$Max_Age[1]>= DATA$Age,] %>% group_by(County,Region,Year,Age) %>% summarize(Num_Female=sum(Num_Female),Num_Male=sum(Num_Male)) %>% ungroup %>% mutate(Min_Age=MORTALITY$Min_Age[1],Max_Age= MORTALITY$Max_Age[1]) - for(i in 2:nrow(MORTALITY)){ - RES <- rbind(RES,DATA[DATA$Age >= MORTALITY$Min_Age[i] & MORTALITY$Max_Age[i]>= DATA$Age,] %>% group_by(County,Region,Year,Age) %>% summarize(Num_Female=sum(Num_Female),Num_Male=sum(Num_Male)) %>% ungroup %>% mutate(Min_Age=MORTALITY$Min_Age[i],Max_Age= MORTALITY$Max_Age[i])) + for(i in 2:nrow(MORTALITY)){ RES <- rbind(RES,DATA[DATA$Age >= MORTALITY$Min_Age[i] & MORTALITY$Max_Age[i]>= DATA$Age,] %>% group_by(County,Region,Year,Age) %>% summarize(Num_Female=sum(Num_Female),Num_Male=sum(Num_Male)) %>% ungroup %>% mutate(Min_Age=MORTALITY$Min_Age[i],Max_Age= MORTALITY$Max_Age[i])) } RES <- RES%>% arrange(Year) %>% left_join(MORTALITY)%>% group_by(County,Region,Year) %>% summarize(Predicted_Deaths=sum(Rate_Male*Num_Male)+sum(Rate_Female*Num_Female) ) %>% ungroup %>% select(County,Region,Year,Predicted_Deaths) return(RES) diff --git a/Scripts/2C_Migration_by_Age_Regression.r b/Scripts/2C_Migration_by_Age_Regression.r index 9da97da..b773d07 100644 --- a/Scripts/2C_Migration_by_Age_Regression.r +++ b/Scripts/2C_Migration_by_Age_Regression.r @@ -2,6 +2,7 @@ library(tidyverse) library(fixest) library(scales) +#setwd("../") ######Checking correlations with migration rates DEMOGRAPHIC_DATA <- readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/All_Wyoming_Counties_Demographics.Rds") #Extract the population trend data to connect with demographics (Population,births,deaths) @@ -108,3 +109,4 @@ dir.create(RES_SAVE_LOC , recursive = TRUE, showWarnings = FALSE) write.csv(MIG_AGE_DIST ,paste0(RES_SAVE_LOC,"Migration_Age_Probability_Zero_to_85.csv"),row.names=FALSE) saveRDS(MIG_AGE_DIST ,paste0(RES_SAVE_LOC,"Migration_Age_Probability_Zero_to_85.Rds")) + diff --git a/Scripts/2E_Move_Current_Demographic_Data_to_Current_Year.r b/Scripts/2E_Move_Current_Demographic_Data_to_Current_Year.r index fd8bb8a..c3ea935 100644 --- a/Scripts/2E_Move_Current_Demographic_Data_to_Current_Year.r +++ b/Scripts/2E_Move_Current_Demographic_Data_to_Current_Year.r @@ -59,7 +59,4 @@ KEM_DEMOGRAPHIC <- KEM_DEMOGRAPHIC - DEATH_MAT saveRDS(KEM_DEMOGRAPHIC,"Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2024_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds") ##Perhaps I can be more clever. I could average the direct simulation estimate as done above, with the Kemmerer values found when estimating the other region, and then subtracting from the Lincoln Total. OTHER_DEMOGRAPHIC_NEW <- readRDS("Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2024_Starting_Lincoln_County_Demographics_Matrix.Rds")-KEM_DEMOGRAPHIC -saveRDS(OTHER_DEMOGRAPHIC_NEW ,"Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2024_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds") - - - +saveRDS(OTHER_DEMOGRAPHIC_NEW ,"Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2024_Starting_Other_Lincoln_Demographics_Matrix.Rds") diff --git a/Scripts/Load_Custom_Functions/Induced_Migration_Functions.r b/Scripts/Load_Custom_Functions/Induced_Migration_Functions.r index 793b5f5..6cb6964 100644 --- a/Scripts/Load_Custom_Functions/Induced_Migration_Functions.r +++ b/Scripts/Load_Custom_Functions/Induced_Migration_Functions.r @@ -1,8 +1,11 @@ #Takes the added jobs table and downshift for people living outside of Kemmerer but in Lincoln -LOCAL_WORK_ADJ <- function(DF,MIN_LOCAL,MAX_LOCAL=1){ +#DF <- OPERATOR_MIGRATION +#MIN_LOCAL=0.85 +#MAX_LOCAL=1 - DF[,-1]<- runif(1,MIN_LOCAL,MAX_LOCAL)*DF[,-1]#Random range people choosing to live outside of Kemmerer assumed to be between 85% and 100% - DF[,-1:-2] <-round(DF[,-1:-2]) +LOCAL_WORK_ADJ <- function(DF,MIN_LOCAL,MAX_LOCAL=1){ + DF <- runif(1,MIN_LOCAL,MAX_LOCAL)*DF#Random range people choosing to live outside of Kemmerer assumed to be between 85% and 100% + DF <-round(DF) return(DF) } #Find the expected total of new induced jobs from TerraPower (Includes construction entering and leaving, and operators entering)