Got test simulation running
This commit is contained in:
parent
e3764b0c37
commit
8e6a4f48c7
2
.gitignore
vendored
2
.gitignore
vendored
@ -2,6 +2,8 @@
|
|||||||
#
|
#
|
||||||
Data/Raw_Data/Population/
|
Data/Raw_Data/Population/
|
||||||
Data/Raw_Data/Demographics/
|
Data/Raw_Data/Demographics/
|
||||||
|
*.swp
|
||||||
|
|
||||||
*.png
|
*.png
|
||||||
*.csv
|
*.csv
|
||||||
Data/Cleaned_Data/
|
Data/Cleaned_Data/
|
||||||
|
|||||||
@ -20,17 +20,14 @@ NUM_SIMULATIONS <- 10^4
|
|||||||
ST_YEAR <- 2017
|
ST_YEAR <- 2017
|
||||||
################################Load Data
|
################################Load Data
|
||||||
DEMO <- readRDS("Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2016_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds")
|
DEMO <- readRDS("Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2016_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds")
|
||||||
|
sum(readRDS("Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2023_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds"))
|
||||||
|
|
||||||
BIRTH_MOD <- readRDS("Data/Intermediate_Inputs/Birth_Regressions/Birth_Regression_2016.Rds")
|
BIRTH_MOD <- readRDS("Data/Intermediate_Inputs/Birth_Regressions/Birth_Regression_2016.Rds")
|
||||||
#Must add region as a factor with multiple levels for predict to work. Seems to check for multiple levels although that is not needed econometrics.
|
#Must add region as a factor with multiple levels for predict to work. Seems to check for multiple levels although that is not needed econometrics.
|
||||||
BIRTH_DATA <- readRDS("Data/Intermediate_Inputs/Birth_Regressions/Regression_Data/Birth_Simulation_Key_Starting_Points.Rds") %>% mutate(Region=factor(Region)) %>% filter(Region=='Kemmerer & Diamondville',Year==2016)
|
BIRTH_DATA <- readRDS("Data/Intermediate_Inputs/Birth_Regressions/Regression_Data/Birth_Simulation_Key_Starting_Points.Rds") %>% mutate(Region=factor(Region)) %>% filter(Region=='Kemmerer & Diamondville',Year==2016)
|
||||||
MIGRATION_ARIMA <- readRDS("Data/Intermediate_Inputs/Migration_ARIMA_Models/Kemmerer_Diamondville_Net_Migration_ARIMA_2016.Rds")
|
MIGRATION_ARIMA <- readRDS("Data/Intermediate_Inputs/Migration_ARIMA_Models/Kemmerer_Diamondville_Net_Migration_ARIMA_2016.Rds")
|
||||||
MIGRATION_ODDS <- readRDS("Data/Intermediate_Inputs/Migration_Trends/Migration_Age_Probability_Zero_to_85.Rds")
|
MIGRATION_ODDS <- readRDS("Data/Intermediate_Inputs/Migration_Trends/Migration_Age_Probability_Zero_to_85.Rds")
|
||||||
|
##############
|
||||||
|
|
||||||
MIGRATION_MATRIX <- simulate(nsim=NUM_SIMULATIONS,MIGRATION_ARIMA,n=YEARS_AHEAD)
|
|
||||||
MIGRATION_MATRIX <- do.call(cbind, mclapply(1:NUM_SIMULATIONS,function(x)(as.vector(simulate(nsim=YEARS_AHEAD,MIGRATION_ARIMA) )),mc.cores = detectCores()-1))
|
|
||||||
rownames(MIGRATION_MATRIX) <- ST_YEAR:(ST_YEAR+YEARS_AHEAD-1)
|
|
||||||
colnames(MIGRATION_MATRIX) <- 1:NUM_SIMULATIONS
|
|
||||||
#Data for death rate trends
|
#Data for death rate trends
|
||||||
SINGLE_AGE_MODS <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Time_Series_Regression_2016.Rds")
|
SINGLE_AGE_MODS <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Time_Series_Regression_2016.Rds")
|
||||||
BOUNDS <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Bounds_for_Predictions.Rds")
|
BOUNDS <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Bounds_for_Predictions.Rds")
|
||||||
@ -52,14 +49,17 @@ colnames(MIGRATION_MATRIX) <- 1:NUM_SIMULATIONS
|
|||||||
MOD_LIN_WOMEN <- readRDS("Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/ARIMA_Lincoln_Women_Mortality_by_Age_2016.Rds")
|
MOD_LIN_WOMEN <- readRDS("Data/Intermediate_Inputs/Age_Mortality_ARIMA_Models/ARIMA_Lincoln_Women_Mortality_by_Age_2016.Rds")
|
||||||
XREG <- cbind(rep(0.0001,YEARS_AHEAD+1),rep(0.0001,YEARS_AHEAD+1)) #Empty data set to simulate in the future
|
XREG <- cbind(rep(0.0001,YEARS_AHEAD+1),rep(0.0001,YEARS_AHEAD+1)) #Empty data set to simulate in the future
|
||||||
XREG <- ts(XREG,start=ST_YEAR,frequency=1)
|
XREG <- ts(XREG,start=ST_YEAR,frequency=1)
|
||||||
SIMULATED_MORTALITY_DATA_SET <- MAKE_EMPTY(ST_YEAR,ST_YEAR+YEARS_AHEAD,MOD_LIN_MEN,MOD_LIN_WOMEN,MOD_MEN_ALL,MOD_WOMEN_ALL,XREG)
|
SIMULATE_MORTALITY_RATE_TRENDS <- function(){
|
||||||
MORTALITY_SIMULATION <- AGE_DIST(SINGLE_AGE_MODS,SIMULATED_MORTALITY_DATA_SET ,MAX_MALE,MAX_FEMALE,MIN_MALE,MIN_FEMALE,MAX_GAP,MIN_GAP,BASELINE_AGE_ADJUST_MEN,BASELINE_AGE_ADJUST_WOMEN)
|
SIMULATED_MORTALITY_DATA_SET <- MAKE_EMPTY(ST_YEAR,ST_YEAR+YEARS_AHEAD,MOD_LIN_MEN,MOD_LIN_WOMEN,MOD_MEN_ALL,MOD_WOMEN_ALL,XREG)
|
||||||
|
MORTALITY_SIMULATION <- AGE_DIST(SINGLE_AGE_MODS,SIMULATED_MORTALITY_DATA_SET ,MAX_MALE,MAX_FEMALE,MIN_MALE,MIN_FEMALE,MAX_GAP,MIN_GAP,BASELINE_AGE_ADJUST_MEN,BASELINE_AGE_ADJUST_WOMEN)
|
||||||
|
return(MORTALITY_SIMULATION )
|
||||||
|
}
|
||||||
|
|
||||||
#####################START YEAR BY SIMULATIONS
|
#####################START YEAR BY SIMULATIONS
|
||||||
CURRENT_YEARS_AHEAD <- 1
|
#CURRENT_YEARS_AHEAD=1;CURRENT_SIM_NUM <- 1;MORTALITY_SIMULATION <- SIMULATE_MORTALITY_RATE_TRENDS()
|
||||||
CURRENT_SIM_NUM <- 58
|
SINGLE_YEAR_SIM <- function(DEMO,BIRTH_DATA,CURRENT_YEARS_AHEAD,MORTALITY_SIMULATION,NET_MIGRATION){
|
||||||
ORIG_DEMO <- DEMO
|
ORIG_DEMO <- DEMO
|
||||||
DEMO <- DEMOGRAPHICS_AFTER_MIGRATION(DEMO, MIGRATION_MATRIX[CURRENT_YEARS_AHEAD ,CURRENT_SIM_NUM],MIGRATION_ODDS )
|
DEMO <- DEMOGRAPHICS_AFTER_MIGRATION(DEMO, NET_MIGRATION,MIGRATION_ODDS )
|
||||||
TOTAL_MIGRATION <- sum(DEMO-ORIG_DEMO)
|
TOTAL_MIGRATION <- sum(DEMO-ORIG_DEMO)
|
||||||
BIRTH_DATA$Year <- BIRTH_DATA$Year+1
|
BIRTH_DATA$Year <- BIRTH_DATA$Year+1
|
||||||
BIRTH_DATA$Lag_Two_Births <- BIRTH_DATA$Lag_Births
|
BIRTH_DATA$Lag_Two_Births <- BIRTH_DATA$Lag_Births
|
||||||
@ -69,86 +69,61 @@ BIRTH_DATA$Births <- NA
|
|||||||
BIRTH_DATA$Min_Birth_Group <- min(sum(DEMO[18:30,1]),sum(DEMO[18:28,2]))
|
BIRTH_DATA$Min_Birth_Group <- min(sum(DEMO[18:30,1]),sum(DEMO[18:28,2]))
|
||||||
NEW_BORNS <- BIRTH_SIM(BIRTH_MOD,BIRTH_DATA)
|
NEW_BORNS <- BIRTH_SIM(BIRTH_MOD,BIRTH_DATA)
|
||||||
TOTAL_BIRTHS <- sum(NEW_BORNS)
|
TOTAL_BIRTHS <- sum(NEW_BORNS)
|
||||||
BIRTH_DATA[,"Births"] <-TOTAL_BIRTHS
|
BIRTH_DATA[,"Births"] <- TOTAL_BIRTHS
|
||||||
DEMO <- INCREMENT_AGES(DEMO,NEW_BORNS)
|
DEMO <- INCREMENT_AGES(DEMO,NEW_BORNS)
|
||||||
|
MORTALITY_SIMULATION
|
||||||
MALE_DEATHS <- sapply(1:86,function(x){rbinom(1,DEMO[x,1],MORTALITY_SIMULATION[[1]][x,CURRENT_YEARS_AHEAD])})
|
MALE_DEATHS <- sapply(1:86,function(x){rbinom(1,DEMO[x,1],MORTALITY_SIMULATION[[1]][x,CURRENT_YEARS_AHEAD])})
|
||||||
FEMALE_DEATHS <- sapply(1:86,function(x){rbinom(1,DEMO[x,2],MORTALITY_SIMULATION[[2]][x,CURRENT_YEARS_AHEAD])})
|
FEMALE_DEATHS <- sapply(1:86,function(x){rbinom(1,DEMO[x,2],MORTALITY_SIMULATION[[2]][x,CURRENT_YEARS_AHEAD])})
|
||||||
|
MALE_DEATHS <- ifelse(MALE_DEATHS>=DEMO[,1],DEMO[,1],MALE_DEATHS)
|
||||||
|
FEMALE_DEATHS <- ifelse(FEMALE_DEATHS>=DEMO[,1],DEMO[,1],FEMALE_DEATHS)
|
||||||
|
|
||||||
TOTAL_DEATHS <- sum(MALE_DEATHS+FEMALE_DEATHS)
|
TOTAL_DEATHS <- sum(MALE_DEATHS+FEMALE_DEATHS)
|
||||||
DEMO[,"Num_Male"] <- DEMO[,"Num_Male"] -MALE_DEATHS
|
DEMO[,"Num_Male"] <- DEMO[,"Num_Male"] -MALE_DEATHS
|
||||||
DEMO[,"Num_Female"] <- DEMO[,"Num_Female"] -MALE_DEATHS
|
DEMO[,"Num_Female"] <- DEMO[,"Num_Female"] -FEMALE_DEATHS
|
||||||
#List of values needed for the next run or for reporting a result
|
#List of values needed for the next run or for reporting a result
|
||||||
list(DEMO,BIRTH_DATA,c(TOTAL_BIRTHS,TOTAL_DEATHS,TOTAL_MIGRATION))
|
TOTAL_POP <- sum(DEMO)
|
||||||
|
return(list(DEMO,BIRTH_DATA,c(TOTAL_POP,TOTAL_BIRTHS,TOTAL_DEATHS,TOTAL_MIGRATION)))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#####User Configuration Values
|
|
||||||
KEMMER_SIM <- TRUE #Wether the simulation should predict Kemmerer (and Diamondville) or Lincoln County as a whole. TRUE, is Kemmerer False is Lincoln
|
|
||||||
START_SIM_YEAR <- 2025 #The first year to simulate
|
|
||||||
NUM_SIMULATIONS <- 10^5 #Number of Monte Carlo Simulations to run
|
|
||||||
RERUN_MORTALITY_SIMULATION <- TRUE #Rerun the Monte Carlo simulation of future mortality rates (not actual deaths) even if a Rds file of a mortality rates exists. This can be used to speed up runs when FALSE
|
|
||||||
RERUN_MIGRATION_SIMULATION <- TRUE #Rerun the ARIMA simulations that predict total migration in any year even if a Rds file of a mortality rates exists. This can be used to speed up runs when FALSE
|
|
||||||
|
|
||||||
NUM_YEARS_PROJECTED <- 50 #How many years into the future should each Monte Carlo run project to. For example 25 years if starting from 2025 and ending in 2050.
|
|
||||||
BIRTH_RATE_REG_RESULTS <- "Data/Regression_Results/Birth_Rate_Model.Rds" #Location of the regression used to model variance in birth rates in each Monte Carlo simulation.
|
|
||||||
START_DEMOGRAPHIC_DATA <- "Data/Cleaned_Data/Start_Year_Demographic_Data_With_Fertility_Groups.Rds" #Location of the data for the first year needing a forecasted birth rate, which aggregates the yearly splits of populations, into a single, year-county data set with variables for key birth prediction (total number of men and women in ages with high fertility rates), and then combines with the data set including births, deaths, migration and population.
|
|
||||||
AGE_OF_MIGRANT_PROBABILITY <- "Data/Other_Intermediate_Outputs/Migreation_Age_Probablity_One_to_Ninety.csv" #Location of the data which is the result of regression analysis of the age of migrants. That is to say 18 year olds may migrate more than 70 year olds, and this is the distribution by age. Sex was not found to be a major factor
|
|
||||||
####Run any scripts required before main Monte Carlo
|
|
||||||
source("Survival_Simulation.r") #Populate a table with a simulation of future mortality rates, for quick recall during the simulation.
|
|
||||||
#A script contains the code needed to create a feols (fixest) regression of the birth rate given age distribution. Load this saved result or else create it to use in each simulation for gathering variance of births in any given age distribution path of the Monte Carlo.
|
|
||||||
if(file.exists(BIRTH_RATE_REG_RESULTS)){MOD_BIRTHS <- readRDS(BIRTH_RATE_REG_RESULTS);FIRST_PREDICT_YEAR_POPULATION_DATA <- readRDS(START_DEMOGRAPHIC_DATA)} else{source("Birth_Rate_Regression.r")}
|
|
||||||
if(file.exists(AGE_OF_MIGRANT_PROBABILITY)){MIG_AGE_DIST <- read.csv(AGE_OF_MIGRANT_PROBABILITY)} else{source("Migration_Regression.r")}
|
|
||||||
|
|
||||||
#Rerun the migration simulation if requested
|
|
||||||
if(RERUN_MIGRATION_SIMULATION ){source("Migration_Regression.r")}
|
|
||||||
#######################################################Main Monte Carlo
|
|
||||||
START_DEM_DATA <- readRDS("Data/Cleaned_Data/Lincoln_Demographic_Data.Rds") %>% group_by(County) %>% filter(Year==2023) %>% ungroup %>% select(-County)
|
|
||||||
MORTALITY_SIMULATION <- readRDS("./Data/Simulated_Data_Sets/MORTALITY_MONTE_CARLO.Rds") #Load the Mortality simulation to speed up simulation
|
|
||||||
MIGRATION_ARIMA_SIMULATION <- readRDS("./Data/Simulated_Data_Sets/Migration_ARIMA.Rds") #Load the Migration simulation to speed up simulation
|
|
||||||
|
|
||||||
#####################!!!!!!!!!!!!!!!!!!!Working on pulling in data from the Data/Cleaned_Data/Intiate_Simulation/ directory which stores a starting point for all groups. Make data much more clean
|
|
||||||
if(KEMMER_SIM){
|
|
||||||
LN_POP_ST <- FIRST_PREDICT_YEAR_POPULATION_DATA$Population #Population of Lincoln County
|
|
||||||
START_DEM_DATA <- readRDS("Data/Cleaned_Data/Kemmerer_Demographic_Data.Rds") %>% select(-County) %>% mutate(County='Lincoln')
|
|
||||||
FIRST_PREDICT_YEAR_POPULATION_DATA <- readRDS("Data/Cleaned_Data/Kemmerer_Summary_Start_Data.Rds")
|
|
||||||
KEM_POP_ST <- FIRST_PREDICT_YEAR_POPULATION_DATA$Population
|
|
||||||
POP_ST_RATIO <- LN_POP_ST/KEM_POP_ST
|
|
||||||
MIGRATION_ARIMA_SIMS <- round(MIGRATION_ARIMA_SIMS*POP_ST_RATIO) #Downscale County migreation to the city level based on average popualtion
|
|
||||||
}
|
|
||||||
#Second run, work on making into loop and saving the output to file (check if that will slow the results). Maybe use a predefined matrix, so that the results can be stored quickly
|
|
||||||
|
|
||||||
SINGLE_PATH_SIM <- function(j){
|
|
||||||
C_RES <- RUN_SINGLE_SIM(MOD_BIRTHS,FIRST_PREDICT_YEAR_POPULATION_DATA,START_DEM_DATA,MORTALITY_SIMULATION,SIM_NUMBER=j,START_OF_SIM=START_SIM_YEAR,MIGRATION_ARIMA_SIMULATION,MIG_AGE_DIST)
|
|
||||||
RES <- C_RES[[1]]
|
|
||||||
for(i in 1:NUM_YEARS_PROJECTED){
|
|
||||||
C_RES <- RUN_SINGLE_SIM(MOD_BIRTHS,C_RES[[3]],C_RES[[2]],MORTALITY_SIMULATION,SIM_NUMBER=j,START_OF_SIM=START_SIM_YEAR,MIGRATION_ARIMA_SIMULATION,MIG_AGE_DIST)
|
|
||||||
RES <- rbind(RES,C_RES[[1]])
|
|
||||||
}
|
|
||||||
return(RES)
|
|
||||||
}
|
|
||||||
#Run the full simulation across all simulations simulating changes in demographic, and mortality data.
|
|
||||||
#Setup save results
|
|
||||||
RES_DIR <- "./Results"
|
|
||||||
RAW_SIM_FILE <- paste0(RES_DIR,"/Raw_Simulations.csv")
|
|
||||||
PERCENTILE_DATA <- paste0(RES_DIR,"/Percentile_Clean_Results.csv")
|
|
||||||
#Loop all results saving in batches
|
|
||||||
dir.create(RES_DIR,showWarnings=FALSE)
|
|
||||||
BATCH_SIZE <- 1000
|
|
||||||
NUM_RUNS <- ceiling(NUM_SIMULATIONS/BATCH_SIZE)
|
|
||||||
#Run the loop
|
|
||||||
for(x in 1:NUM_RUNS) {
|
|
||||||
BATCH_GUID <- UUIDgenerate()
|
|
||||||
try(FULL_RESULTS <- mclapply(1:BATCH_SIZE,function(x){try(SINGLE_PATH_SIM(x))},mc.cores = detectCores()-1))
|
|
||||||
if(exists("FULL_RESULTS")){
|
|
||||||
FULL_RESULTS <- do.call(rbind,lapply(1:BATCH_SIZE,function(x){FULL_RESULTS[[x]] %>% mutate(SIM_ID=UUIDgenerate())}))
|
|
||||||
FULL_RESULTS$BATCH_ID <- BATCH_GUID
|
|
||||||
FULL_RESULTS <- FULL_RESULTS%>% select(BATCH_ID,SIM_ID,everything())
|
|
||||||
if(x==1){write_csv(FULL_RESULTS,RAW_SIM_FILE)}else {write_csv(FULL_RESULTS,RAW_SIM_FILE,append=TRUE)}
|
|
||||||
rm(FULL_RESULTS)
|
|
||||||
gc()
|
|
||||||
}
|
}
|
||||||
|
MIGRATION_ARIMA_MODEL <- MIGRATION_ARIMA
|
||||||
|
SINGLE_SIM <- function(DEMO,BIRTH_DATA,ST_YEAR,YEARS_AHEAD,MIGRATION_ARIMA_MODEL){
|
||||||
|
MIGRATION_SIM_VALUES <- round(as.vector(simulate(nsim=YEARS_AHEAD,MIGRATION_ARIMA_MODEL) ))
|
||||||
|
|
||||||
|
FINAL_REPORT_VALUES <- matrix(NA,ncol=6,nrow=YEARS_AHEAD)
|
||||||
|
colnames(FINAL_REPORT_VALUES ) <- c("Sim_UUID","Year","Population","Births","Deaths","Net_Migration")
|
||||||
|
FINAL_REPORT_VALUES[,1] <- UUIDgenerate()
|
||||||
|
for(i in 1:YEARS_AHEAD){
|
||||||
|
C_YEAR <- ST_YEAR+i-1
|
||||||
|
C_RES <-SINGLE_YEAR_SIM(DEMO,BIRTH_DATA,i,SIMULATE_MORTALITY_RATE_TRENDS(),MIGRATION_SIM_VALUES[i])
|
||||||
|
DEMO <- C_RES[[1]]
|
||||||
|
BIRTH_DATA <- C_RES[[2]]
|
||||||
|
FINAL_REPORT_VALUES[i,-1] <- c(C_YEAR,C_RES[[3]])
|
||||||
|
}
|
||||||
|
return(FINAL_REPORT_VALUES)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(!exists("RES_DIR")){RES_DIR<- "./Results/"}
|
||||||
|
if(!exists("RES_SIM_DIR")){RES_SIM_DIR <- paste0(RES_DIR,"Simulations/")}
|
||||||
|
dir.create(RES_SIM_DIR, recursive = TRUE, showWarnings = FALSE)
|
||||||
|
|
||||||
|
|
||||||
|
NCORES <- detectCores()-1
|
||||||
|
BATCH_SIZE <- NCORES*10
|
||||||
|
TOTAL_SIMULATIONS <- 10^5
|
||||||
|
N_RUNS <-ceiling(TOTAL_SIMULATIONS/BATCH_SIZE )
|
||||||
|
|
||||||
|
SIM_RES_FILE <- paste0(RES_SIM_DIR,"Kemmerer_2016_Simulation.csv")
|
||||||
|
NEW_RES_FILE <- !file.exists(SIM_RES_FILE)
|
||||||
|
for(i in 1:N_RUNS){
|
||||||
|
# MIGRATION_MATRIX <- simulate(nsim=BATCH_SIZE,MIGRATION_ARIMA,n=YEARS_AHEAD)
|
||||||
|
MIGRATION_MATRIX <- do.call(cbind, mclapply(1:BATCH_SIZE,function(x)(as.vector(simulate(nsim=YEARS_AHEAD,MIGRATION_ARIMA) )),mc.cores = detectCores()-1))
|
||||||
|
# rownames(MIGRATION_MATRIX) <- ST_YEAR:(ST_YEAR+YEARS_AHEAD-1)
|
||||||
|
# colnames(MIGRATION_MATRIX) <- 1:BATCH_SIZE
|
||||||
|
try(RES <- do.call(rbind,mclapply(1:BATCH_SIZE,function(x){SINGLE_SIM(DEMO,BIRTH_DATA,ST_YEAR,YEARS_AHEAD,MIGRATION_ARIMA)},mc.cores=NCORES)))
|
||||||
|
if(exists("RES")){
|
||||||
|
RES <- as.data.frame(RES)
|
||||||
|
RES[,-1] <- as.numeric(as.matrix(RES[,-1]))
|
||||||
|
if(NEW_RES_FILE){write_csv(RES,SIM_RES_FILE)}else {write_csv(RES,SIM_RES_FILE,col_names=FALSE,append=TRUE)}
|
||||||
|
rm(RES)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@ -1,29 +1,36 @@
|
|||||||
library(tidyverse)
|
library(tidyverse)
|
||||||
NUM_YEARS_PROJECTED <- 50 #How many years into the future should each Monte Carlo run project to. For example 25 years if starting from 2025 and ending in 2050.
|
|
||||||
YEARS <- 2023:(2023+NUM_YEARS_PROJECTED)
|
|
||||||
#Setup save results
|
|
||||||
RES_DIR <- "./Results"
|
|
||||||
RAW_SIM_FILE <- paste0(RES_DIR,"/Raw_Simulations.csv")
|
|
||||||
PERCENTILE_DATA <- paste0(RES_DIR,"/Percentile_Clean_Results.csv")
|
|
||||||
|
|
||||||
###Process the simulations and save the main percentile results by year
|
###Process the simulations and save the main percentile results by year
|
||||||
RES <- read_csv(RAW_SIM_FILE)
|
RES <- read_csv("Results/Simulations/Kemmerer_2016_Simulation.csv")
|
||||||
GRAPH_DATA <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Population),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble
|
YEARS <- min(RES$Year):max(RES$Year)
|
||||||
YEARS <- 2023:(2023+NUM_YEARS_PROJECTED)
|
######Population
|
||||||
GRAPH_DATA$Year <- YEARS
|
GRAPH_DATA <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Population),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS)
|
||||||
FAN_DATA <- GRAPH_DATA
|
FAN_DATA <- GRAPH_DATA
|
||||||
GRAPH_DATA <- GRAPH_DATA %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Population")
|
GRAPH_DATA <- GRAPH_DATA %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Population")
|
||||||
write_csv(GRAPH_DATA,PERCENTILE_DATA)
|
GRAPH_DATA$Percentile <- factor(GRAPH_DATA$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%')))
|
||||||
|
START_POP <- HIST %>% filter(Year==2016) %>% pull(Population)
|
||||||
|
GRAPH_DATA <- rbind(GRAPH_DATA,GRAPH_DATA %>% filter(Year==2017) %>% mutate(Population=START_POP,Year=2016))
|
||||||
|
MEDIAN_PRED <- GRAPH_DATA %>% filter(Percentile=='50%')
|
||||||
|
######Migration
|
||||||
|
GRAPH_DATA_MIGRATION <- do.call(rbind,lapply(YEARS,function(x){quantile(RES %>% filter(Year==x) %>% pull(Net_Migration),c(0.025,0.05,0.1,0.25,0.4,0.5,0.6,0.75,0.9,0.95,0.975))})) %>% as_tibble %>% mutate(Year=YEARS)
|
||||||
|
FAN_DATA_MIGRATION <- GRAPH_DATA_MIGRATION
|
||||||
|
GRAPH_DATA_MIGRATION <- GRAPH_DATA_MIGRATION %>% pivot_longer(cols=!Year,names_to=c("Percentile"),values_to="Migration")
|
||||||
|
GRAPH_DATA_MIGRATION$Percentile <- factor(GRAPH_DATA_MIGRATION$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%')))
|
||||||
|
MEDIAN_PRED_MIGRATION <- GRAPH_DATA_MIGRATION %>% filter(Percentile=='50%')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#write_csv(GRAPH_DATA,PERCENTILE_DATA)
|
||||||
#Add historic
|
#Add historic
|
||||||
MEDIAN_PRED <- GRAPH_DATA %>% filter(Percentile=='50%')
|
MEDIAN_PRED <- GRAPH_DATA %>% filter(Percentile=='50%')
|
||||||
GRAPH_DATA <- GRAPH_DATA %>% filter(Percentile!='50%')
|
GRAPH_DATA <- GRAPH_DATA %>% filter(Percentile!='50%')
|
||||||
|
HIST <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds") %>% filter(County=='Lincoln') %>% mutate(Percentile="Actual Population") %>% filter(Year>=1940)
|
||||||
|
|
||||||
HIST <- readRDS("Data/Cleaned_Data/Wyoming_County_Population.Rds") %>% filter(County=='Lincoln') %>% mutate(Percentile="Actual Population") %>% filter(Year>1930)
|
|
||||||
ALPHA=0.2
|
ALPHA=0.2
|
||||||
COLOR <- 'black'
|
COLOR <- 'black'
|
||||||
GRAPH_DATA$Percentile <- factor(GRAPH_DATA$Percentile,levels=rev(c('2.5%','5%','10%','25%','40%','60%','75%','90%','95%','97.5%')))
|
#GRAPH <-
|
||||||
GRAPH <- ggplot(data=GRAPH_DATA)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Population,group=Percentile,color=Percentile))+geom_line(data=HIST,aes(x=Year,y=Population),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED,aes(x=Year,y=Population),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(1940, 2070, by = 10)))+ scale_y_continuous(breaks = seq(0, 35000, by = 5000))+theme_bw()+ggtitle("Lincoln County, Wyoming Population Forecast")
|
nrow(RES)
|
||||||
GRAPH
|
ggplot(data=GRAPH_DATA)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Population,group=Percentile,color=Percentile))+geom_line(data=HIST,aes(x=Year,y=Population),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED,aes(x=Year,y=Population),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(1940, 2070, by = 10)))+ scale_y_continuous(breaks = seq(0, 35000, by = 250))+theme_bw()+ggtitle("Kemmerer & Diamondville, Population Forecast")+ expand_limits( y = 0)
|
||||||
length(RES$SIM_ID %>% unique)
|
HIST %>% filter(!is.na(Migration))
|
||||||
ggsave("Lincoln_Forecast.png",GRAPH)
|
ggplot(data=GRAPH_DATA_MIGRATION)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`2.5%`,ymax=`97.5%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`5%`,ymax=`95%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`10%`,ymax=`90%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`25%`,ymax=`75%`),alpha=ALPHA,fill=COLOR)+geom_ribbon(data=FAN_DATA_MIGRATION,aes(x=Year,ymin=`40%`,ymax=`60%`),alpha=ALPHA,fill=COLOR)+geom_line(aes(x=Year,y=Migration,group=Percentile,color=Percentile))+geom_line(data=HIST %>% filter(Year>=2008),aes(x=Year,y=Migration),color='black',linewidth=0.75)+geom_line(data=MEDIAN_PRED_MIGRATION,aes(x=Year,y=Migration),color='black',linetype=4,linewidth=0.75)+ scale_x_continuous(breaks = c(seq(2010, 2070, by = 10)))+ scale_y_continuous(breaks = seq(-500, 100, by = 100))+theme_bw()+ggtitle("Kemmerer, Wyoming Migration Forecast")
|
||||||
|
|
||||||
|
|||||||
@ -34,6 +34,8 @@ LN_2016 <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(val
|
|||||||
LN_1985 <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% dplyr::select(Lincoln,Year) %>% filter(Year>=ST_YEAR,Year<=1985) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR),frequency=1)
|
LN_1985 <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% dplyr::select(Lincoln,Year) %>% filter(Year>=ST_YEAR,Year<=1985) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR),frequency=1)
|
||||||
|
|
||||||
KEM <- TS_KEM_DATA %>% dplyr::select(Year,Region,Migration) %>% pivot_wider(values_from=Migration,names_from=Region) %>% arrange(Year) %>% dplyr::select('Kemmerer & Diamondville',Year) %>% filter(Year>=ST_YEAR_KEM,Year<=END_YEAR_KEM) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR_KEM),frequency=1)
|
KEM <- TS_KEM_DATA %>% dplyr::select(Year,Region,Migration) %>% pivot_wider(values_from=Migration,names_from=Region) %>% arrange(Year) %>% dplyr::select('Kemmerer & Diamondville',Year) %>% filter(Year>=ST_YEAR_KEM,Year<=END_YEAR_KEM) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR_KEM),frequency=1)
|
||||||
|
KEM_2016 <- TS_KEM_DATA %>% dplyr::select(Year,Region,Migration) %>% filter(Year<=2016) %>% pivot_wider(values_from=Migration,names_from=Region) %>% arrange(Year) %>% dplyr::select('Kemmerer & Diamondville',Year) %>% filter(Year>=ST_YEAR_KEM,Year<=2016) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR_KEM),frequency=1)
|
||||||
|
|
||||||
OTHER <- TS_OTHER_DATA %>% dplyr::select(Year,Region,Migration) %>% pivot_wider(values_from=Migration,names_from=Region) %>% arrange(Year) %>% dplyr::select('Lincoln Other'=Lincoln_Other,Year) %>% filter(Year>=ST_YEAR_OTHER,Year<=END_YEAR_OTHER) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR_OTHER),frequency=1)
|
OTHER <- TS_OTHER_DATA %>% dplyr::select(Year,Region,Migration) %>% pivot_wider(values_from=Migration,names_from=Region) %>% arrange(Year) %>% dplyr::select('Lincoln Other'=Lincoln_Other,Year) %>% filter(Year>=ST_YEAR_OTHER,Year<=END_YEAR_OTHER) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR_OTHER),frequency=1)
|
||||||
|
|
||||||
#Create an ARIMA of Migration so the number of people migrating can be simulated
|
#Create an ARIMA of Migration so the number of people migrating can be simulated
|
||||||
@ -45,6 +47,7 @@ MOD <- auto.arima(LN)
|
|||||||
MOD_2016 <- auto.arima(LN_2016)
|
MOD_2016 <- auto.arima(LN_2016)
|
||||||
MOD_1985 <- auto.arima(LN_1985)
|
MOD_1985 <- auto.arima(LN_1985)
|
||||||
MOD_KEM <- auto.arima(KEM)
|
MOD_KEM <- auto.arima(KEM)
|
||||||
|
MOD_KEM_2016 <- auto.arima(KEM_2016)
|
||||||
MOD_OTHER <- auto.arima(OTHER)
|
MOD_OTHER <- auto.arima(OTHER)
|
||||||
|
|
||||||
##Because the Lincoln county simulation has more data, it will show trends more easily. As a proxy for the subregions adjust the Kemmerer and Diamondville data such that the magnitude of the mean migration is the same portion of the magnitude of the Lincoln County data. Note that the ARIMA for Kemmerer ONLY includes a mean value so this is a reasonable way to model overall migration. In a similar way asses all non Kemmerer net migration to the other regions. This does understate migration in years where the sub-regions have opposite direction of net migrations, but works well when assuming the two regions share a portion of total county migration
|
##Because the Lincoln county simulation has more data, it will show trends more easily. As a proxy for the subregions adjust the Kemmerer and Diamondville data such that the magnitude of the mean migration is the same portion of the magnitude of the Lincoln County data. Note that the ARIMA for Kemmerer ONLY includes a mean value so this is a reasonable way to model overall migration. In a similar way asses all non Kemmerer net migration to the other regions. This does understate migration in years where the sub-regions have opposite direction of net migrations, but works well when assuming the two regions share a portion of total county migration
|
||||||
@ -60,7 +63,9 @@ OTHER_2016_NEW <- LN_2016/LN_ADJ_OTHER
|
|||||||
OTHER_1985_NEW <- LN_1985/LN_ADJ_OTHER
|
OTHER_1985_NEW <- LN_1985/LN_ADJ_OTHER
|
||||||
#Create new models for migration forecasts
|
#Create new models for migration forecasts
|
||||||
MOD_KEM_ADJ <- auto.arima(KEM_NEW ,stationary=TRUE)
|
MOD_KEM_ADJ <- auto.arima(KEM_NEW ,stationary=TRUE)
|
||||||
|
median(simulate(MOD_KEM_ADJ,100000 ))
|
||||||
MOD_KEM_ADJ_2016 <- auto.arima(KEM_2016_NEW ,stationary=TRUE)
|
MOD_KEM_ADJ_2016 <- auto.arima(KEM_2016_NEW ,stationary=TRUE)
|
||||||
|
MOD_KEM_ADJ_2016 <- auto.arima(KEM_2016_NEW+as.numeric(coef(MOD_KEM_2016)["intercept"]))
|
||||||
MOD_KEM_ADJ_1985 <- auto.arima(KEM_1985_NEW ,stationary=TRUE)
|
MOD_KEM_ADJ_1985 <- auto.arima(KEM_1985_NEW ,stationary=TRUE)
|
||||||
MOD_OTHER_ADJ <- auto.arima(OTHER_NEW,stationary=TRUE)
|
MOD_OTHER_ADJ <- auto.arima(OTHER_NEW,stationary=TRUE)
|
||||||
MOD_OTHER_ADJ_2016 <- auto.arima(OTHER_2016_NEW,stationary=TRUE)
|
MOD_OTHER_ADJ_2016 <- auto.arima(OTHER_2016_NEW,stationary=TRUE)
|
||||||
@ -73,7 +78,8 @@ saveRDS(MOD_KEM_ADJ,paste0(SAVE_LOC_ARIMA_MODELS,"Kemmerer_Diamondville_Net_Migr
|
|||||||
saveRDS(MOD_OTHER_ADJ,paste0(SAVE_LOC_ARIMA_MODELS,"Other_Lincoln_Net_Migration_ARIMA.Rds"))
|
saveRDS(MOD_OTHER_ADJ,paste0(SAVE_LOC_ARIMA_MODELS,"Other_Lincoln_Net_Migration_ARIMA.Rds"))
|
||||||
|
|
||||||
saveRDS(MOD_2016,paste0(SAVE_LOC_ARIMA_MODELS,"Full_Lincoln_County_Net_Migration_ARIMA_2016.Rds"))
|
saveRDS(MOD_2016,paste0(SAVE_LOC_ARIMA_MODELS,"Full_Lincoln_County_Net_Migration_ARIMA_2016.Rds"))
|
||||||
saveRDS(MOD_KEM_ADJ_2016,paste0(SAVE_LOC_ARIMA_MODELS,"Kemmerer_Diamondville_Net_Migration_ARIMA_2016.Rds"))
|
saveRDS(MOD_KEM_2016,paste0(SAVE_LOC_ARIMA_MODELS,"Kemmerer_Diamondville_Net_Migration_ARIMA_2016.Rds"))
|
||||||
|
saveRDS(MOD_KEM_ADJ_2016,paste0(SAVE_LOC_ARIMA_MODELS,"Kemmerer_Diamondville_Adjusted_to_Lincoln_Model_Net_Migration_ARIMA_2016.Rds"))
|
||||||
saveRDS(MOD_OTHER_ADJ_2016,paste0(SAVE_LOC_ARIMA_MODELS,"Other_Lincoln_Net_Migration_ARIMA_2016.Rds"))
|
saveRDS(MOD_OTHER_ADJ_2016,paste0(SAVE_LOC_ARIMA_MODELS,"Other_Lincoln_Net_Migration_ARIMA_2016.Rds"))
|
||||||
|
|
||||||
saveRDS(MOD_1985,paste0(SAVE_LOC_ARIMA_MODELS,"Full_Lincoln_County_Net_Migration_ARIMA_1985.Rds"))
|
saveRDS(MOD_1985,paste0(SAVE_LOC_ARIMA_MODELS,"Full_Lincoln_County_Net_Migration_ARIMA_1985.Rds"))
|
||||||
|
|||||||
@ -87,6 +87,26 @@ OUT <- table(sample(0:85,NUM_MIGRATED_IN,prob=ODDS_MIGRATED,replace=TRUE),sample
|
|||||||
}
|
}
|
||||||
#Pull the correct rows of the demographics table using the row names. The names are a character set going from 0-85 ages, while the index is 1:86, so make sure to use the charter names. Add these new immigrants to the existing demographics of age-sex combinations.
|
#Pull the correct rows of the demographics table using the row names. The names are a character set going from 0-85 ages, while the index is 1:86, so make sure to use the charter names. Add these new immigrants to the existing demographics of age-sex combinations.
|
||||||
DEMOGRAPHIC_DATASET[as.character(rownames(OUT)),] <- DEMOGRAPHIC_DATASET[as.character(rownames(OUT)),]+OUT
|
DEMOGRAPHIC_DATASET[as.character(rownames(OUT)),] <- DEMOGRAPHIC_DATASET[as.character(rownames(OUT)),]+OUT
|
||||||
|
#tryCatch(
|
||||||
|
# expr = {
|
||||||
|
# DEMOGRAPHIC_DATASET[as.character(rownames(OUT)),] <- DEMOGRAPHIC_DATASET[as.character(rownames(OUT)),]+OUT
|
||||||
|
# Code to be evaluated
|
||||||
|
# },
|
||||||
|
# error = function(e) {
|
||||||
|
# Code to execute if an error occurs
|
||||||
|
# 'e' contains the error object
|
||||||
|
# sink("~/Gitea/Work/Population_Study/ERROR.txt")
|
||||||
|
# print(DEMOGRAPHIC_DATASET)
|
||||||
|
# saveRDS(list(DEMOGRAPHIC_DATASET,OUT,NUM_MIGRATED_IN),"~/Gitea/Work/Population_Study/ERROR.Rds")
|
||||||
|
# sink()
|
||||||
|
# },
|
||||||
|
# warning = function(w) {
|
||||||
|
# sink("~/Gitea/Work/Population_Study/WARNING.txt")
|
||||||
|
# print(DEMOGRAPHIC_DATASET)
|
||||||
|
# saveRDS(DEMOGRAPHIC_DATASET,"~/Gitea/Work/Population_Study/WARNING.Rds")
|
||||||
|
# sink()
|
||||||
|
# })
|
||||||
|
|
||||||
return(DEMOGRAPHIC_DATASET)
|
return(DEMOGRAPHIC_DATASET)
|
||||||
}
|
}
|
||||||
#Combine both functions for easier coding later on, allowing one function to be called in all scenarios
|
#Combine both functions for easier coding later on, allowing one function to be called in all scenarios
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user