193 lines
12 KiB
R
193 lines
12 KiB
R
#####Packages
|
|
library(tidyverse) #Cleaning data
|
|
library(fixest) #Estimating a model of birth rates, to provide variance in the birth rate Monte Carlo using a fixed effect model.
|
|
library(forecast) #Fore ARIMA migration simulations
|
|
library(parallel)
|
|
library(uuid) #To add a index to each batch
|
|
####If the preliminary data needs to be reloaded run the supplied bash script to download, process, and generate all needed data sets for the Monte Carlo population Simulation. Otherwise skip this step to save time
|
|
RELOAD_DATA <- FALSE
|
|
if(RELOAD_DATA){system("bash Prelim_Process.sh")}
|
|
|
|
#Load custom functions needed for the simulation
|
|
source("Scripts/Load_Custom_Functions/Migration_Simulation_Functions.r")
|
|
source("Scripts/Load_Custom_Functions/Birth_Simulation_Functions.r")
|
|
source("Scripts/Load_Custom_Functions/Increment_Data_Year.r")
|
|
source("Scripts/Load_Custom_Functions/Single_Age_Mortality_Trend_Simulation.r")
|
|
source("Scripts/Load_Custom_Functions/Induced_Migration_Functions.r")
|
|
|
|
#######Preliminary Model Inputs
|
|
YEARS_AHEAD <- 41
|
|
ST_YEAR <- 2025
|
|
################################Load Data
|
|
DEMO <- readRDS("Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2024_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds")
|
|
|
|
BIRTH_MOD <- readRDS("Data/Intermediate_Inputs/Birth_Regressions/Birth_Regression.Rds")
|
|
#Must add region as a factor with multiple levels for predict to work. Seems to check for multiple levels although that is not needed econometrics.
|
|
BIRTH_DATA <- readRDS("Data/Intermediate_Inputs/Birth_Regressions/Regression_Data/Birth_Simulation_Key_Starting_Points.Rds") %>% mutate(Region=factor(Region)) %>% filter(KEM==1,Year==2023) %>% mutate(Year=2024)
|
|
|
|
|
|
MIGRATION_ARIMA <- readRDS("Data/Intermediate_Inputs/Migration_ARIMA_Models/Kemmerer_Diamondville_Net_Migration_ARIMA.Rds")
|
|
MIGRATION_ODDS <- readRDS("Data/Intermediate_Inputs/Migration_Trends/Migration_Age_Probability_Zero_to_85.Rds")
|
|
####
|
|
OPERATORS <- readRDS("Data/Cleaned_Data/TerraPower_Impact/Operating_Worker_Related_Migration.Rds")
|
|
CONSTRUCTION <- readRDS("Data/Cleaned_Data/TerraPower_Impact/Construction_Related_Migration.Rds")
|
|
OPERATOR_LIN_MIGRATION <- OPERATORS %>% pull("Operator_Emp_Migrated")
|
|
CONSTRUCTION_LIN_MIGRATION <- CONSTRUCTION %>% pull("Construction_Emp_Migrated")
|
|
|
|
INDUCED_MIGRATION_MULTIPLIERS <- readRDS("Data/Cleaned_Data/TerraPower_Impact/Induced_Jobs.Rds")
|
|
|
|
##############
|
|
#Data for death rate trends
|
|
SINGLE_AGE_MODS <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Time_Series_Regression.Rds")
|
|
BOUNDS <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Bounds_for_Predictions.Rds")
|
|
MAX_MALE <- BOUNDS %>% filter(Sex=='Male') %>% pull(MAX_RATE)
|
|
MIN_MALE <- BOUNDS %>% filter(Sex=='Male') %>% pull(MIN_RATE)
|
|
MAX_FEMALE <- BOUNDS %>% filter(Sex=='Female') %>% pull(MAX_RATE)
|
|
MIN_FEMALE <- BOUNDS %>% filter(Sex=='Female') %>% pull(MIN_RATE)
|
|
MIN_GAP <- BOUNDS %>% filter(Sex=='Male') %>% pull(MIN_MALE_FEMALE_GAP)
|
|
MAX_GAP <- BOUNDS %>% filter(Sex=='Male') %>% pull(MAX_MALE_FEMALE_GAP)
|
|
BASELINE_AGE_ADJUST_MEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_Population_in_2000.Rds") %>% filter(Sex=='Male') %>% pull(Percent_of_Population)
|
|
BASELINE_AGE_ADJUST_WOMEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_Population_in_2000.Rds") %>% filter(Sex=='Female') %>% pull(Percent_of_Population)
|
|
#Adjust to just women popualtion (Not all population percent
|
|
BASELINE_AGE_ADJUST_WOMEN <- BASELINE_AGE_ADJUST_WOMEN/sum(BASELINE_AGE_ADJUST_WOMEN )
|
|
BASELINE_AGE_ADJUST_MEN <- BASELINE_AGE_ADJUST_MEN/ sum(BASELINE_AGE_ADJUST_MEN )
|
|
MOD_MEN_ALL <- readRDS("Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/ARIMA_US_Men_Mortality_by_Age.Rds")
|
|
MOD_WOMEN_ALL <- readRDS("Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/ARIMA_US_Women_Mortality_by_Age.Rds")
|
|
MOD_LIN_MEN <- readRDS("Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/ARIMA_Lincoln_Men_Mortality_by_Age.Rds")
|
|
MOD_LIN_WOMEN <- readRDS("Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/ARIMA_Lincoln_Women_Mortality_by_Age.Rds")
|
|
XREG <- cbind(rep(0.0001,YEARS_AHEAD+1),rep(0.0001,YEARS_AHEAD+1)) #Empty data set to simulate in the future
|
|
XREG <- ts(XREG,start=ST_YEAR,frequency=1)
|
|
SIMULATE_MORTALITY_RATE_TRENDS <- function(){
|
|
SIMULATED_MORTALITY_DATA_SET <- MAKE_EMPTY(ST_YEAR,ST_YEAR+YEARS_AHEAD,MOD_LIN_MEN,MOD_LIN_WOMEN,MOD_MEN_ALL,MOD_WOMEN_ALL,XREG)
|
|
MORTALITY_SIMULATION <- AGE_DIST(SINGLE_AGE_MODS,SIMULATED_MORTALITY_DATA_SET ,MAX_MALE,MAX_FEMALE,MIN_MALE,MIN_FEMALE,MAX_GAP,MIN_GAP,BASELINE_AGE_ADJUST_MEN,BASELINE_AGE_ADJUST_WOMEN)
|
|
return(MORTALITY_SIMULATION )
|
|
}
|
|
|
|
#####################START YEAR BY SIMULATIONS
|
|
#CURRENT_YEARS_AHEAD=1;CURRENT_SIM_NUM <- 1;MORTALITY_SIMULATION <- SIMULATE_MORTALITY_RATE_TRENDS()
|
|
SINGLE_YEAR_SIM <- function(DEMO,BIRTH_DATA,CURRENT_YEARS_AHEAD,MORTALITY_SIMULATION,NET_MIGRATION){
|
|
ORIG_DEMO <- DEMO
|
|
DEMO <- DEMOGRAPHICS_AFTER_MIGRATION(DEMO, NET_MIGRATION,MIGRATION_ODDS )
|
|
TOTAL_MIGRATION <- sum(DEMO-ORIG_DEMO)
|
|
BIRTH_DATA$Year <- BIRTH_DATA$Year+1
|
|
BIRTH_DATA$Lag_Two_Births <- BIRTH_DATA$Lag_Births
|
|
BIRTH_DATA$Lag_Births <- BIRTH_DATA$Births
|
|
BIRTH_DATA$Births <- NA
|
|
##We grab one year earlier than the window because they are one year older this year. Because the ages are from 0-85, row 18 is year 17, but one year is added making it 18 years in the current year. The birth windows are 18-28 for women and 18-30 for men.
|
|
BIRTH_DATA$Min_Birth_Group <- min(sum(DEMO[18:30,1]),sum(DEMO[18:28,2]))
|
|
NEW_BORNS <- BIRTH_SIM(BIRTH_MOD,BIRTH_DATA)
|
|
TOTAL_BIRTHS <- sum(NEW_BORNS)
|
|
BIRTH_DATA[,"Births"] <- TOTAL_BIRTHS
|
|
DEMO <- INCREMENT_AGES(DEMO,NEW_BORNS)
|
|
MORTALITY_SIMULATION
|
|
MALE_DEATHS <- sapply(1:86,function(x){rbinom(1,DEMO[x,1],MORTALITY_SIMULATION[[1]][x,CURRENT_YEARS_AHEAD])})
|
|
FEMALE_DEATHS <- sapply(1:86,function(x){rbinom(1,DEMO[x,2],MORTALITY_SIMULATION[[2]][x,CURRENT_YEARS_AHEAD])})
|
|
MALE_DEATHS <- ifelse(MALE_DEATHS>=DEMO[,1],DEMO[,1],MALE_DEATHS)
|
|
FEMALE_DEATHS <- ifelse(FEMALE_DEATHS>=DEMO[,1],DEMO[,1],FEMALE_DEATHS)
|
|
|
|
TOTAL_DEATHS <- sum(MALE_DEATHS+FEMALE_DEATHS)
|
|
DEMO[,"Num_Male"] <- DEMO[,"Num_Male"] -MALE_DEATHS
|
|
DEMO[,"Num_Female"] <- DEMO[,"Num_Female"] -FEMALE_DEATHS
|
|
#List of values needed for the next run or for reporting a result
|
|
TOTAL_POP <- sum(DEMO)
|
|
return(list(DEMO,BIRTH_DATA,c(TOTAL_POP,TOTAL_BIRTHS,TOTAL_DEATHS,TOTAL_MIGRATION)))
|
|
}
|
|
|
|
MIGRATION_ARIMA_MODEL <- MIGRATION_ARIMA
|
|
#DEMO,BIRTH_DATA,ST_YEAR,YEARS_AHEAD,MIGRATION_ARIMA_MODEL,OPERATOR_TOTAL,CONSTRUCTION_TOTAL,MIGRATION_MULTIPLIERS
|
|
CONSTRUCTION_MIGRATION <- CONSTRUCTION_LIN_MIGRATION
|
|
MIGRATION_MULTIPLIERS <- INDUCED_MIGRATION_MULTIPLIERS
|
|
OPERATOR_MIGRATION <- OPERATOR_LIN_MIGRATION
|
|
|
|
SINGLE_SIM <- function(DEMO,BIRTH_DATA,ST_YEAR,YEARS_AHEAD,MIGRATION_ARIMA_MODEL,OPERATOR_MIGRATION,CONSTRUCTION_MIGRATION,MIGRATION_MULTIPLIERS ){
|
|
|
|
TERRA_POWER_EFFECT <- rep(0,YEARS_AHEAD)
|
|
OTHER_PROJECT_EFFECTS_PERM <- rep(0,YEARS_AHEAD)
|
|
OTHER_PROJECT_EFFECTS_TEMP <- rep(0,YEARS_AHEAD)
|
|
OTHER_PROJECT_EFFECTS_TEMP_POP_ADDED <- rep(0,YEARS_AHEAD)
|
|
OPERATOR_MIGRATION <- LOCAL_WORK_ADJ(OPERATOR_MIGRATION ,0.85) #Assume between 85%-100% operators live in Kemmerer
|
|
CONSTRUCTION_MIGRATION <- LOCAL_WORK_ADJ(CONSTRUCTION_MIGRATION,0.41) #Assume between 41%-100% operators live in Kemmerer
|
|
CONSTRUCTION_MIGRATION[7] <- CONSTRUCTION_MIGRATION[7] - sum(CONSTRUCTION_MIGRATION )
|
|
CONSTRUCTION_POPULATION_ADDED <- cumsum(CONSTRUCTION_MIGRATION)
|
|
ADDED_FROM_BASELINE <- runif(1) #The percentage of the possible growth in industries like restaurants to add compared to the Kemmerer IMPLAN model which understates possible structural growth
|
|
PERMANENT_TERRAPOWER_MIGRATION <- INDUCED_SIMULATION(CONSTRUCTION_MIGRATION,OPERATOR_MIGRATION,MIGRATION_MULTIPLIERS,ADDED_FROM_BASELINE)+OPERATOR_MIGRATION
|
|
|
|
###############NOTE NEED TO USE THIS AT END TO ADJUST THE RESULTS WHILE LEAVING THE DEMOGRAPHIC MATRIX
|
|
TERRA_POWER_EFFECT[1:7] <- TERRA_POWER_EFFECT[1:7]+CONSTRUCTION_MIGRATION
|
|
#Dry Pine Migration
|
|
DRY_PINE <- round(runif(1,0.15,0.45)*c(126,176,-302)*1.15)
|
|
DRY_PINE[3] <- DRY_PINE[3]-sum(DRY_PINE)
|
|
OTHER_PROJECT_EFFECTS_TEMP[2:4] <- DRY_PINE
|
|
OTHER_PROJECT_EFFECTS_TEMP_POP_ADDED[2:4] <- cumsum(DRY_PINE)
|
|
#Data Center Simulation
|
|
if(runif(1)>0.67){
|
|
MW_CAPACITY <- runif(1,20,120)
|
|
PERM_WORKERS <- MW_CAPACITY/8+30 #Seems to be a good ration 32.5 workers at 20 capacity and 45 at max this is a heurstic I came up with
|
|
OTHER_PROJECT_EFFECTS_PERM
|
|
DATA_CENTER_START_YEAR <- which.max(runif(YEARS_AHEAD-6))+3
|
|
PERM_DATA_CENTER_OP <- PERM_WORKERS*(1+(2.515334+19.52286*ADDED_FROM_BASELINE)/40) #Add induced
|
|
#Estimate construction workers on data center
|
|
AVG_CONST_COST_PER_MW <- (10.4+13.2)/2
|
|
RELATIVE_COST_DRAW <- runif(1,10.4*0.75,13.2*1.25)/AVG_CONST_COST_PER_MW #Draw from the range of reported construction costs per MW of capacity in Cheyene. See "DATA CENTER DEVELOPMENT COST GUIDE"
|
|
PERCENT_IN_KEMMERER <- runif(1,0.41,1) #What percent of the in migration will live in Kemmerer vs other areas. Taken from TerraPower assumptions
|
|
POP_PER_YEAR <- (1.15*46.02*MW_CAPACITY)/3 #MW capacity ranges from 18 to 120 based on historic Sabre projects. IMPLAN estimates 46.02 total jobs, per MW of capacity (using average cost per MWh). 1.15 accounts for 5% of workers bringing families
|
|
TEMP_DATA_MIGRATION <- round(PERCENT_IN_KEMMERER*RELATIVE_COST_DRAW*(POP_PER_YEAR))
|
|
|
|
|
|
OTHER_PROJECT_EFFECTS_TEMP[DATA_CENTER_START_YEAR+3] <- OTHER_PROJECT_EFFECTS_TEMP[DATA_CENTER_START_YEAR+3]+TEMP_DATA_MIGRATION #Enter
|
|
OTHER_PROJECT_EFFECTS_TEMP[DATA_CENTER_START_YEAR+6] <- OTHER_PROJECT_EFFECTS_TEMP[DATA_CENTER_START_YEAR+6]-TEMP_DATA_MIGRATION #Leave
|
|
OTHER_PROJECT_EFFECTS_TEMP_POP_ADDED[(DATA_CENTER_START_YEAR+3):(DATA_CENTER_START_YEAR+5)]<- OTHER_PROJECT_EFFECTS_TEMP_POP_ADDED[(DATA_CENTER_START_YEAR+3):(DATA_CENTER_START_YEAR+5)]+TEMP_DATA_MIGRATION
|
|
OTHER_PROJECT_EFFECTS_PERM[DATA_CENTER_START_YEAR+6] <- OTHER_PROJECT_EFFECTS_PERM[DATA_CENTER_START_YEAR+3]+PERM_DATA_CENTER_OP
|
|
}
|
|
MIGRATION_SIM_VALUES <- round(as.vector(simulate(nsim=YEARS_AHEAD,MIGRATION_ARIMA_MODEL))+TERRA_POWER_EFFECT+OTHER_PROJECT_EFFECTS_PERM)
|
|
#The runif applies a downshift ranging from the historic decline rate all the way to the Lincoln rate applied in the model
|
|
|
|
FINAL_REPORT_VALUES <- matrix(NA,ncol=6,nrow=YEARS_AHEAD)
|
|
colnames(FINAL_REPORT_VALUES ) <- c("Sim_UUID","Year","Population","Births","Deaths","Net_Migration")
|
|
FINAL_REPORT_VALUES[,1] <- UUIDgenerate()
|
|
for(i in 1:YEARS_AHEAD){
|
|
C_YEAR <- ST_YEAR+i-1
|
|
C_RES <-SINGLE_YEAR_SIM(DEMO,BIRTH_DATA,i,SIMULATE_MORTALITY_RATE_TRENDS(),MIGRATION_SIM_VALUES[i])
|
|
DEMO <- C_RES[[1]]
|
|
BIRTH_DATA <- C_RES[[2]]
|
|
FINAL_REPORT_VALUES[i,-1] <- c(C_YEAR,C_RES[[3]])
|
|
}
|
|
FINAL_REPORT_VALUES[1:7,3] <- as.numeric(FINAL_REPORT_VALUES[1:7,3])+CONSTRUCTION_POPULATION_ADDED
|
|
FINAL_REPORT_VALUES[1:7,6] <- as.numeric(FINAL_REPORT_VALUES[1:7,6]) +CONSTRUCTION_MIGRATION
|
|
|
|
FINAL_REPORT_VALUES[,6] <- as.numeric(FINAL_REPORT_VALUES[,6]) +OTHER_PROJECT_EFFECTS_TEMP[1:nrow(FINAL_REPORT_VALUES)]
|
|
FINAL_REPORT_VALUES[,3] <- as.numeric(FINAL_REPORT_VALUES[,3]) +OTHER_PROJECT_EFFECTS_TEMP_POP_ADDED[1:nrow(FINAL_REPORT_VALUES)]
|
|
return(FINAL_REPORT_VALUES)
|
|
}
|
|
|
|
if(!exists("RES_DIR")){RES_DIR<- "./Results/"}
|
|
if(!exists("RES_SIM_DIR")){RES_SIM_DIR <- paste0(RES_DIR,"Simulations/")}
|
|
dir.create(RES_SIM_DIR, recursive = TRUE, showWarnings = FALSE)
|
|
|
|
|
|
NCORES <- detectCores()-1
|
|
BATCH_SIZE <- NCORES*10
|
|
TOTAL_SIMULATIONS <- 10^6
|
|
N_RUNS <-ceiling(TOTAL_SIMULATIONS/BATCH_SIZE )
|
|
|
|
SIM_RES_FILE <- paste0(RES_SIM_DIR,"Kemmerer_2024_Upper_Bound_With_Data_Center_Simulation.csv")
|
|
NEW_RES_FILE <- !file.exists(SIM_RES_FILE)
|
|
OPERATOR_LIN_MIGRATION <- OPERATORS %>% pull("Operator_Emp_Migrated")
|
|
CONSTRUCTION_LIN_MIGRATION <- CONSTRUCTION %>% pull("Construction_Emp_Migrated")
|
|
|
|
for(i in 1:N_RUNS){
|
|
# MIGRATION_MATRIX <- simulate(nsim=BATCH_SIZE,MIGRATION_ARIMA,n=YEARS_AHEAD)
|
|
MIGRATION_MATRIX <- do.call(cbind, mclapply(1:BATCH_SIZE,function(x)(as.vector(simulate(nsim=YEARS_AHEAD,MIGRATION_ARIMA) )),mc.cores = detectCores()-1))
|
|
# rownames(MIGRATION_MATRIX) <- ST_YEAR:(ST_YEAR+YEARS_AHEAD-1)
|
|
# colnames(MIGRATION_MATRIX) <- 1:BATCH_SIZE
|
|
#SINGLE_SIM(DEMO,BIRTH_DATA,ST_YEAR,YEARS_AHEAD,MIGRATION_ARIMA,OPERATOR_LIN_MIGRATION,CONSTRUCTION_LIN_MIGRATION,INDUCED_MIGRATION_MULTIPLIERS)
|
|
try(RES <- do.call(rbind,mclapply(1:BATCH_SIZE,function(x){SINGLE_SIM(DEMO,BIRTH_DATA,ST_YEAR,YEARS_AHEAD,MIGRATION_ARIMA,OPERATOR_LIN_MIGRATION,CONSTRUCTION_LIN_MIGRATION,INDUCED_MIGRATION_MULTIPLIERS)},mc.cores=NCORES)))
|
|
if(exists("RES")){
|
|
RES <- as.data.frame(RES)
|
|
RES[,-1] <- as.numeric(as.matrix(RES[,-1]))
|
|
if(NEW_RES_FILE & i==1){write_csv(RES,SIM_RES_FILE)}else {write_csv(RES,SIM_RES_FILE,col_names=FALSE,append=TRUE)}
|
|
rm(RES)
|
|
}
|
|
}
|
|
|