Population_Study/TESTING.r
2025-12-22 18:08:55 -07:00

125 lines
9.2 KiB
R

library(tidyverse)
library(gt) #For nice color coded capacity limits table.
source("Scripts/Load_Custom_Functions/Fan_Chart_Creation_Functions.r") #Functions created to make nice graphs
if(!exists("SAVE_RES_LOC")){SAVE_RES_LOC <- "./Results/Primary_Simulation_Results/Main_Results/"}
dir.create(SAVE_RES_LOC, recursive = TRUE, showWarnings = FALSE)
###Process the simulations and save the main percentile results by year
RES <- read_csv("Results/Simulations/Kemmerer_2024_With_Data_Center_Simulation.csv")
RES[,"Year"] <- RES[,"Year"]
RES<- RES %>% filter(!is.na(Year))
RES <- RES %>% filter(!is.na(Population))
HIST <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds") %>% filter(County=='Lincoln') %>% mutate(Percentile="Actual Population") %>% filter(Year>=1940)
######Population
####Fan New
#POP_DATA <- GET_DATA(RES,3)
#POP_PLOT <- MAKE_GRAPH(POP_DATA)
########TEST
SIM_START=2025
LAST_HISTORIC <- SIM_START-1
YEARS <- min(RES$Year,na.rm=TRUE):max(RES$Year,na.rm=TRUE)
LEVELS <- seq(0.00,1,by=0.005)
GROUPS <- floor(length(LEVELS)/2)
COL_NUM <-3
RES %>% tail
#C_DATA<- RES %>% filter(Growth_Rate=='HIGH')
#C_DATA <- RES %>% filter(Growth_Rate!='HIGH')
C_DATA <- RES
nrow(RES)
FAN_DATA <- do.call(rbind,lapply(YEARS,function(x){quantile(as.numeric(t((C_DATA %>% filter(Year==x))[,COL_NUM])),LEVELS)})) %>% as_tibble %>% mutate(Year=YEARS)
FAN_DATA <- FAN_DATA %>% pivot_longer(!Year,names_to="Percentile",values_to="Population") %>% group_by(Year) %>% mutate(DIFF=c(NA,diff(Population)))
GRAPH_DATA <- FAN_DATA %>% group_by(Year) %>% mutate(MIN_RANGE=lag(parse_number(Percentile)/100),MAX_RANGE=parse_number(Percentile)/100,MIN_POP=lag(Population),MAX_POP=Population) %>% filter(!is.na(DIFF))
GRAPH_DATA <- GRAPH_DATA %>% mutate(MAX_POP=ifelse(MAX_RANGE!=1,MAX_POP-0.001,MAX_POP))
CI_80 <- GRAPH_DATA%>% filter(Percentile %in%c('10.0%','90.0%')) %>% group_by(Year) %>% summarize(MIN_POP=min(MIN_POP),MAX_POP=max(MAX_POP),Interval='80%')
CI_95 <- GRAPH_DATA%>% filter(Percentile %in%c('2.5%','97.5%')) %>% group_by(Year) %>% summarize(MIN_POP=min(MIN_POP),MAX_POP=max(MAX_POP),Interval='95%')
MEDIAN <- GRAPH_DATA%>% filter(Percentile %in%c('50.0%'))%>% group_by(Year) %>% summarize(MIN_POP=min(MIN_POP),MAX_POP=max(MAX_POP),Interval='Median')
RANGE_DATA <- rbind(MEDIAN ,CI_80,CI_95)
ggplot(GRAPH_DATA %>% filter(MIN_RANGE>=0.015,MAX_RANGE<=0.985) )+geom_ribbon(aes(group=Percentile,x=Year,ymin=MIN_POP,ymax=MAX_POP,alpha=-DIFF),fill='cadetblue')+geom_line(data=GRAPH_DATA %>% filter(Percentile=='50%'),aes(x=Year,y=MIN_POP))+geom_line(data=RANGE_DATA,aes(color=Interval,linetype=Interval,x=Year,y=MIN_POP))+geom_line(data=RANGE_DATA,aes(color=Interval,linetype=Interval,x=Year,y=MAX_POP))+scale_color_manual(values=c("grey50","grey80","black"))+scale_linetype_manual(values = c("solid","solid","dotdash"))+geom_line(data=HIST,aes(x=Year,y=Population),color='black',linewidth=0.75)+ scale_x_continuous(breaks = c(seq(1940, 2060, by = 10),2065))+ scale_y_continuous(breaks = seq(0, 35000, by = 500))+ggtitle("Kemmerer & Diamondville, Population Forecast")+ expand_limits( y = 0)+labs(color = "Prediction Interval",linetype="Prediction Interval",y="Population")+ theme_bw()+ theme(legend.position = "top",panel.grid.minor = element_blank())+scale_alpha(range = c(0, 1),guide="none")
C_DATA
geom_
RANGE_DATA
QUANT <- quantile(as.numeric(t((RES %>% filter(Year==2025))[,3])),LEVELS) %>% as.numeric
RANGE <- QUANT %>% diff
RANGE
RANGE <- RANGE[-1]
RANGE <- RANGE[-length(RANGE)]
RANGE <- 1-RANGE/max(RANGE)
RANGE
scale(RANGE)
FAN_DATA <- do.call(rbind,lapply(YEARS,function(x){quantile(as.numeric(t((RES %>% filter(Year==x))[,COL_NUM])),LEVELS)})) %>% as_tibble %>% mutate(Year=YEARS)
FAN_DATA <- rbind(FAN_DATA[1,],FAN_DATA)
#######
POP_PLOT <- POP_PLOT+geom_line(data=HIST,aes(x=Year,y=Population),color='black',linewidth=0.75)+ scale_x_continuous(breaks = c(seq(1940, 2060, by = 10),2065))+ scale_y_continuous(breaks = seq(0, 35000, by = 500))+ggtitle("Kemmerer & Diamondville, Population Forecast")+ expand_limits( y = 0)+labs(color = "Prediction Interval",linetype="Prediction Interval",y="Population")+ theme_bw()+ theme(legend.position = "top",panel.grid.minor = element_blank())
png(paste0(SAVE_RES_LOC,"Population_Fan_Chart_Main_Results.png"), width = 12, height = 8, units = "in", res = 600)
POP_PLOT
dev.off()
#Population Summary table for the key years
KEY_YEARS <- c(2027,2030,2035,2045,2055,2065)
KEY_YEAR_SUMMARY <- POP_DATA %>% filter(Year %in% KEY_YEARS,Group==5) %>% select(-Group) %>% rename(CI_95_Lower=MIN,CI_95_Upper=MAX) %>% left_join(POP_DATA %>% filter(Year %in% KEY_YEARS,Group==0) %>% select(-Group) %>% select(Year,Median=MIN)) %>% select(Year,CI_95_Lower,Median,CI_95_Upper)
KEY_YEAR_SUMMARY <- round(KEY_YEAR_SUMMARY )
KEY_YEAR_SUMMARY <- t(KEY_YEAR_SUMMARY )
write.table(KEY_YEAR_SUMMARY,paste0(SAVE_RES_LOC,"Key_Year_Summary_Main_Results.csv"), sep=",",col.names=FALSE)
BIRTH_DATA <- GET_DATA(RES,4)
BIRTH_PLOT <- MAKE_GRAPH(BIRTH_DATA)
BIRTH_PLOT <- BIRTH_PLOT+geom_line(data=HIST,aes(x=Year,y=Births),color='black',linewidth=0.75)+ scale_x_continuous(breaks = c(seq(2010, 2060, by = 5),2065),limits=c(2009,2065))+ scale_y_continuous(breaks = seq(0, 35000, by = 10))+ggtitle("Kemmerer & Diamondville, Birth Forecast")+ expand_limits( y = 0)+labs(color = "Prediction Interval",linetype="Prediction Interval",y="Births")+ theme_bw()+ theme(legend.position = "top",panel.grid.minor = element_blank())
png(paste0(SAVE_RES_LOC,"Birth_Fan_Chart_Main_Results.png"), width = 12, height = 8, units = "in", res = 600)
BIRTH_PLOT
dev.off()
DEATH_DATA <- GET_DATA(RES,5) %>% filter(!is.na(MIN))
DEATH_PLOT <- MAKE_GRAPH(DEATH_DATA)
DEATH_PLOT <- DEATH_PLOT+geom_line(data=HIST,aes(x=Year,y=Deaths),color='black',linewidth=0.75)+ scale_x_continuous(breaks = c(seq(2010, 2060, by = 5),2065),limits=c(2009,2065))+ scale_y_continuous(breaks = seq(0, 35000, by = 10))+ggtitle("Kemmerer & Diamondville, Mortality Forecast")+ expand_limits( y = 0)+labs(color = "Prediction Interval",linetype="Prediction Interval",y="Deaths")+ theme_bw()+ theme(legend.position = "top",panel.grid.minor = element_blank())
png(paste0(SAVE_RES_LOC,"Mortality_Fan_Chart_Main_Results.png"), width = 12, height = 8, units = "in", res = 600)
DEATH_PLOT
dev.off()
MIGRATION_DATA <- GET_DATA(RES,6) %>% filter(!is.na(MIN))
MIGRATION_PLOT <- MAKE_GRAPH(MIGRATION_DATA)
MIGRATION_PLOT <- MIGRATION_PLOT+geom_line(data=HIST,aes(x=Year,y=Migration),color='black',linewidth=0.75)+ scale_x_continuous(breaks = c(seq(2010, 2060, by = 5),2065),limits=c(2009,2065))+ scale_y_continuous(breaks = seq(-1000, 1000, by = 50))+ggtitle("Kemmerer & Diamondville, Net Migration Forecast")+ expand_limits( y = 0)+labs(color = "Prediction Interval",linetype="Prediction Interval",y="Migration")+ theme_bw()+ theme(legend.position = "top",panel.grid.minor = element_blank())
png(paste0(SAVE_RES_LOC,"Migration_Fan_Chart_Main_Results.png"), width = 12, height = 8, units = "in", res = 600)
MIGRATION_PLOT
dev.off()
#####Key year table
YEARS <- c(2027,2030,2035,2045,2055,2065)
KEY_YEAR_DATA <- RES %>% filter(Year %in% YEARS)
AVG_VALUES <- KEY_YEAR_DATA %>% group_by(Year) %>% summarize(MED=median(Population),MEAN=mean(Population))
AVG_VALUES <- rbind(AVG_VALUES[,1:2]%>% rename(Value=MED) %>% mutate('Summary Stat.'="Median"),AVG_VALUES[,c(1,3)] %>% rename(Value=MEAN) %>% mutate('Summary Stat.'="Mean"))
HISTOGRAM <- ggplot(KEY_YEAR_DATA, aes(x = Population,group=-Year,Color=Year,fill=Year)) + geom_histogram(alpha=0.3,bins=800)+geom_vline(data = AVG_VALUES, aes(xintercept = Value,group=`Summary Stat.`,color = `Summary Stat.`), linewidth= 0.75)+scale_color_manual(values=c("red","black","black"))+ facet_grid(rows=vars(Year))+ scale_x_continuous(breaks = c(seq(0, 10000, by = 500)))+ theme_bw()+ theme(legend.position = "top",panel.grid.minor = element_blank())+ylab("Number of Simulation")+guides(fill= guide_legend(nrow = 1))
png(paste0(SAVE_RES_LOC,"Population_Histogram_Main_Results.png"), width = 8, height = 12, units = "in", res = 600)
HISTOGRAM
dev.off()
#rm(KEY_YEARS)
POP_LEVELS <- seq(2000,6000,100)
YEARS
if(exists("KEY_YEARS")){rm(KEY_YEARS)}
for(i in YEARS ){
CURRENT_YEAR_DATA <- RES %>% filter(Year==i ) %>% pull(Population)
ECDF <- ecdf(CURRENT_YEAR_DATA)
ECDF_VALUES <- ECDF(POP_LEVELS)
if(!exists("KEY_YEARS")){KEY_YEARS<- ECDF_VALUES} else{KEY_YEARS<- cbind(KEY_YEARS,ECDF_VALUES)}
}
colnames(KEY_YEARS) <- YEARS
rownames(KEY_YEARS) <- POP_LEVELS
PLOT_GREEN <- "forestgreen"
PLOT_YELLOW <- "yellow"
PLOT_RED <- "red"
KEY_YEARS <- KEY_YEARS%>% as.data.frame
Capacity_Risk <- KEY_YEARS%>% gt(rownames_to_stub = TRUE,caption="Year") %>% data_color( fn = scales::col_numeric( palette = c(PLOT_RED, PLOT_YELLOW, PLOT_GREEN), domain = c(0, 1) ) ) %>% fmt_percent( decimals = 1, drop_trailing_zeros = FALSE) %>% tab_stubhead(label =c("Capacity"))
gtsave( data = Capacity_Risk , filename = "./Results/Primary_Simulation_Results/Main_Results/Capacity_Table_Main_Results.html")
system("wkhtmltopdf --disable-smart-shrinking --no-stop-slow-scripts --enable-local-file-access --page-width 99mm --page-height 328mm ./Results/Primary_Simulation_Results/Main_Results/Capacity_Table_Main_Results.html ./Results/Primary_Simulation_Results/Main_Results/Capacity_Table_Main_Results.pdf")