136 lines
9.2 KiB
R
136 lines
9.2 KiB
R
library(forecast)
|
|
library(tidyverse)
|
|
library(texreg)
|
|
#setwd("../")
|
|
####Work on overall migration trends
|
|
#Could use code cleanup after trying things, but have but I have a working ARIMA to model Lincoln county migration
|
|
POP_DATA <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Full_Lincoln_County_Population_Data.Rds")
|
|
POP_DATA_TEST <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Full_Lincoln_County_Population_Data.Rds") %>% mutate(Migration=Migration/Population)
|
|
|
|
POP_KEM_DATA <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds")
|
|
POP_OTHER_DATA <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Other_Lincoln_Population_Data.Rds")
|
|
|
|
|
|
TS_DATA <- POP_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
|
|
TS_DATA <- POP_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
|
|
|
|
TS_KEM_DATA <- POP_KEM_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
|
|
TS_OTHER_DATA <- POP_OTHER_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
|
|
|
|
|
|
ST_YEAR <- min(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
|
|
END_YEAR <- max(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
|
|
|
|
ST_YEAR_KEM <- min(pull(TS_KEM_DATA %>% filter(!is.na(Migration)),Year))
|
|
END_YEAR_KEM <- max(pull(TS_KEM_DATA %>% filter(!is.na(Migration)),Year))
|
|
|
|
ST_YEAR_OTHER <- min(pull(TS_OTHER_DATA %>% filter(!is.na(Migration)),Year))
|
|
END_YEAR_OTHER <- max(pull(TS_OTHER_DATA %>% filter(!is.na(Migration)),Year))
|
|
|
|
|
|
|
|
LN <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% dplyr::select(Lincoln,Year) %>% filter(Year>=ST_YEAR,Year<=END_YEAR) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR),frequency=1)
|
|
LN_2016 <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% dplyr::select(Lincoln,Year) %>% filter(Year>=ST_YEAR,Year<=2016) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR),frequency=1)
|
|
LN_1985 <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% dplyr::select(Lincoln,Year) %>% filter(Year>=ST_YEAR,Year<=1985) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR),frequency=1)
|
|
|
|
KEM <- TS_KEM_DATA %>% dplyr::select(Year,Region,Migration) %>% pivot_wider(values_from=Migration,names_from=Region) %>% arrange(Year) %>% dplyr::select('Kemmerer & Diamondville',Year) %>% filter(Year>=ST_YEAR_KEM,Year<=END_YEAR_KEM) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR_KEM),frequency=1)
|
|
KEM_2016 <- TS_KEM_DATA %>% dplyr::select(Year,Region,Migration) %>% filter(Year<=2016) %>% pivot_wider(values_from=Migration,names_from=Region) %>% arrange(Year) %>% dplyr::select('Kemmerer & Diamondville',Year) %>% filter(Year>=ST_YEAR_KEM,Year<=2016) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR_KEM),frequency=1)
|
|
|
|
OTHER <- TS_OTHER_DATA %>% dplyr::select(Year,Region,Migration) %>% pivot_wider(values_from=Migration,names_from=Region) %>% arrange(Year) %>% dplyr::select('Lincoln Other'=Lincoln_Other,Year) %>% filter(Year>=ST_YEAR_OTHER,Year<=END_YEAR_OTHER) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR_OTHER),frequency=1)
|
|
|
|
#Create an ARIMA of Migration so the number of people migrating can be simulated
|
|
#Time series tests
|
|
#library(tseries)
|
|
#adf.test(LN,k=1) #Stationary with one lag, otherwise not stationary
|
|
#kpss.test(LN) #Stationary,default of program and has some model fit improvements
|
|
MOD <- auto.arima(LN)
|
|
MOD_2016 <- auto.arima(LN_2016)
|
|
MOD_1985 <- auto.arima(LN_1985)
|
|
MOD_KEM <- auto.arima(KEM)
|
|
MOD_KEM_2016 <- auto.arima(KEM_2016)
|
|
MOD_OTHER <- auto.arima(OTHER)
|
|
|
|
##Because the Lincoln county simulation has more data, it will show trends more easily. As a proxy for the subregions adjust the Kemmerer and Diamondville data such that the magnitude of the mean migration is the same portion of the magnitude of the Lincoln County data. Note that the ARIMA for Kemmerer ONLY includes a mean value so this is a reasonable way to model overall migration. In a similar way asses all non Kemmerer net migration to the other regions. This does understate migration in years where the sub-regions have opposite direction of net migrations, but works well when assuming the two regions share a portion of total county migration
|
|
LN_ADJ_KEM <- mean(abs(LN))/mean(abs(KEM))
|
|
LN_ADJ_OTHER <- 1-1/LN_ADJ_KEM
|
|
|
|
#Downshift the sub-region data
|
|
KEM_NEW <- LN/LN_ADJ_KEM
|
|
KEM_2016_NEW <- LN_2016/LN_ADJ_KEM
|
|
KEM_1985_NEW <- LN_1985/LN_ADJ_KEM
|
|
OTHER_NEW <- LN/LN_ADJ_OTHER
|
|
OTHER_2016_NEW <- LN_2016/LN_ADJ_OTHER
|
|
OTHER_1985_NEW <- LN_1985/LN_ADJ_OTHER
|
|
#Create new models for migration forecasts
|
|
MOD_KEM_ADJ <- auto.arima(KEM_NEW ,stationary=TRUE)
|
|
median(simulate(MOD_KEM_ADJ,100000 ))
|
|
MOD_KEM_ADJ_2016 <- auto.arima(KEM_2016_NEW ,stationary=TRUE)
|
|
MOD_KEM_ADJ_2016 <- auto.arima(KEM_2016_NEW+as.numeric(coef(MOD_KEM_2016)["intercept"]))
|
|
MOD_KEM_ADJ_1985 <- auto.arima(KEM_1985_NEW ,stationary=TRUE)
|
|
MOD_OTHER_ADJ <- auto.arima(OTHER_NEW,stationary=TRUE)
|
|
MOD_OTHER_ADJ_2016 <- auto.arima(OTHER_2016_NEW,stationary=TRUE)
|
|
MOD_OTHER_ADJ_1985 <- auto.arima(OTHER_1985_NEW,stationary=TRUE)
|
|
#Save the models for use in the Monte Carlo simulation
|
|
if(!exists("SAVE_LOC_ARIMA_MODELS")){SAVE_LOC_ARIMA_MODELS <-"./Data/Intermediate_Inputs/Migration_ARIMA_Models/"}
|
|
dir.create(SAVE_LOC_ARIMA_MODELS, recursive = TRUE, showWarnings = FALSE)
|
|
saveRDS(MOD,paste0(SAVE_LOC_ARIMA_MODELS,"Full_Lincoln_County_Net_Migration_ARIMA.Rds"))
|
|
saveRDS(MOD_KEM_ADJ,paste0(SAVE_LOC_ARIMA_MODELS,"Kemmerer_Diamondville_Net_Migration_ARIMA.Rds"))
|
|
saveRDS(MOD_OTHER_ADJ,paste0(SAVE_LOC_ARIMA_MODELS,"Other_Lincoln_Net_Migration_ARIMA.Rds"))
|
|
|
|
saveRDS(MOD_2016,paste0(SAVE_LOC_ARIMA_MODELS,"Full_Lincoln_County_Net_Migration_ARIMA_2016.Rds"))
|
|
saveRDS(MOD_KEM_2016,paste0(SAVE_LOC_ARIMA_MODELS,"Kemmerer_Diamondville_Net_Migration_ARIMA_2016.Rds"))
|
|
saveRDS(MOD_KEM_ADJ_2016,paste0(SAVE_LOC_ARIMA_MODELS,"Kemmerer_Diamondville_Adjusted_to_Lincoln_Model_Net_Migration_ARIMA_2016.Rds"))
|
|
saveRDS(MOD_OTHER_ADJ_2016,paste0(SAVE_LOC_ARIMA_MODELS,"Other_Lincoln_Net_Migration_ARIMA_2016.Rds"))
|
|
|
|
saveRDS(MOD_1985,paste0(SAVE_LOC_ARIMA_MODELS,"Full_Lincoln_County_Net_Migration_ARIMA_1985.Rds"))
|
|
saveRDS(MOD_KEM_ADJ_1985,paste0(SAVE_LOC_ARIMA_MODELS,"Kemmerer_Diamondville_Net_Migration_ARIMA_1985.Rds"))
|
|
saveRDS(MOD_OTHER_ADJ_1985,paste0(SAVE_LOC_ARIMA_MODELS,"Other_Lincoln_Net_Migration_ARIMA_1985.Rds"))
|
|
|
|
##Save model summary results and create useful figures
|
|
if(!exists("SAVE_LOC_ARIMA_FIGURES")){SAVE_LOC_ARIMA_FIGURES <-"./Results/Migration_ARIMA/"}
|
|
dir.create(SAVE_LOC_ARIMA_FIGURES, recursive = TRUE, showWarnings = FALSE)
|
|
|
|
|
|
DICT <- list("ar1"="Autoregressive (1 Year)","ma1"="Moving Average (1 Year)","intercept"="Average Migration")
|
|
FILE_NAME <- "Migration_ARIMA_Tables"
|
|
REG_TABLE_LOC <- paste0(SAVE_LOC_ARIMA_FIGURES,FILE_NAME,".tex")
|
|
sink(REG_TABLE_LOC)
|
|
cat("\\documentclass[border=0pt]{article}","\n","\\pagestyle{empty}","\n","\\usepackage{booktabs,dcolumn}","\n","\\begin{document}")
|
|
texreg(l=list(MOD,MOD_KEM),custom.model.names=c("\\textbf{Lincoln County}","\\textbf{Kemmerer \\& Diamondville}"),table=FALSE,use.packages=FALSE,booktabs=TRUE,dcolumn=TRUE,caption.above=TRUE,custom.coef.map=DICT)
|
|
cat("\n","\\end{document}")
|
|
sink()
|
|
system(paste0("pdflatex ",REG_TABLE_LOC))
|
|
system(paste0("pdfcrop --margins '5 5 5 5' ",FILE_NAME,".pdf ",SAVE_LOC_ARIMA_FIGURES,FILE_NAME,".pdf"))
|
|
file.remove(list.files(pattern=FILE_NAME))
|
|
|
|
####Lincoln Total Results
|
|
png(paste0(SAVE_LOC_ARIMA_FIGURES,"Lincoln_County_ARIMA_Migration_Forecast.png"), res = 600, height = 12, width=16, units = "in")
|
|
plot(forecast(MOD,h=2050-2023),xlab="Year","ylab"="Net Migration",main="Lincoln County Net Migration ARIMA Forcast")
|
|
dev.off()
|
|
|
|
png(paste0(SAVE_LOC_ARIMA_FIGURES,"Lincoln_County_Migration_ARIMA_Residual_Checks.png"), res = 600, height = 12, width=16, units = "in")
|
|
checkresiduals(MOD)
|
|
dev.off()
|
|
|
|
#####Kemmerer Results
|
|
png(paste0(SAVE_LOC_ARIMA_FIGURES,"Kemmerer_and_Diamondville_ARIMA_Migration_Forecast.png"), res = 600, height = 12, width=16, units = "in")
|
|
plot(forecast(MOD_KEM,h=2050-2023),xlab="Year","ylab"="Net Migration",main="Lincoln County Net Migration ARIMA Forcast")
|
|
dev.off()
|
|
|
|
png(paste0(SAVE_LOC_ARIMA_FIGURES,"Kemmerer_and_Diamondville_Migration_ARIMA_Residual_Checks.png"), res = 600, height = 12, width=16, units = "in")
|
|
checkresiduals(MOD_KEM)
|
|
dev.off()
|
|
|
|
#####Rest of Lincoln Results
|
|
png(paste0(SAVE_LOC_ARIMA_FIGURES,"Lincoln_Other_Areas_ARIMA_Migration_Forecast.png"), res = 600, height = 12, width=16, units = "in")
|
|
plot(forecast(MOD_OTHER,h=2050-2023),xlab="Year","ylab"="Net Migration",main="Kemmerer & Diamondville Net Migration ARIMA Forcast")
|
|
dev.off()
|
|
|
|
png(paste0(SAVE_LOC_ARIMA_FIGURES,"Lincoln_Other_Areas_Migration_ARIMA_Residual_Checks.png"), res = 600, height = 12, width=16, units = "in")
|
|
checkresiduals(MOD_OTHER)
|
|
dev.off()
|
|
|
|
|
|
|
|
#MIGRATION_ARIMA_SIMS <- (do.call(cbind,mclapply(1:NUM_SIMULATIONS,function(x){as.numeric(round(simulate(MOD,future=TRUE, nsim=NUM_YEARS_PROJECTED)))},mc.cores =detectCores()-1)))#testing a multiple run simulation could use parallel process
|