Population_Study/Scripts/2D_Overall_Migration_ARIMA.r
2025-11-19 17:37:39 -07:00

130 lines
8.6 KiB
R

library(forecast)
library(tidyverse)
library(texreg)
#setwd("../")
####Work on overall migration trends
#Could use code cleanup after trying things, but have but I have a working ARIMA to model Lincoln county migration
POP_DATA <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Full_Lincoln_County_Population_Data.Rds")
POP_DATA_TEST <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Full_Lincoln_County_Population_Data.Rds") %>% mutate(Migration=Migration/Population)
POP_KEM_DATA <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds")
POP_OTHER_DATA <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Other_Lincoln_Population_Data.Rds")
TS_DATA <- POP_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
TS_DATA <- POP_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
TS_KEM_DATA <- POP_KEM_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
TS_OTHER_DATA <- POP_OTHER_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
ST_YEAR <- min(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
END_YEAR <- max(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
ST_YEAR_KEM <- min(pull(TS_KEM_DATA %>% filter(!is.na(Migration)),Year))
END_YEAR_KEM <- max(pull(TS_KEM_DATA %>% filter(!is.na(Migration)),Year))
ST_YEAR_OTHER <- min(pull(TS_OTHER_DATA %>% filter(!is.na(Migration)),Year))
END_YEAR_OTHER <- max(pull(TS_OTHER_DATA %>% filter(!is.na(Migration)),Year))
LN <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% dplyr::select(Lincoln,Year) %>% filter(Year>=ST_YEAR,Year<=END_YEAR) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR),frequency=1)
LN_2016 <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% dplyr::select(Lincoln,Year) %>% filter(Year>=ST_YEAR,Year<=2016) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR),frequency=1)
LN_1985 <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% dplyr::select(Lincoln,Year) %>% filter(Year>=ST_YEAR,Year<=1985) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR),frequency=1)
KEM <- TS_KEM_DATA %>% dplyr::select(Year,Region,Migration) %>% pivot_wider(values_from=Migration,names_from=Region) %>% arrange(Year) %>% dplyr::select('Kemmerer & Diamondville',Year) %>% filter(Year>=ST_YEAR_KEM,Year<=END_YEAR_KEM) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR_KEM),frequency=1)
OTHER <- TS_OTHER_DATA %>% dplyr::select(Year,Region,Migration) %>% pivot_wider(values_from=Migration,names_from=Region) %>% arrange(Year) %>% dplyr::select('Lincoln Other'=Lincoln_Other,Year) %>% filter(Year>=ST_YEAR_OTHER,Year<=END_YEAR_OTHER) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR_OTHER),frequency=1)
#Create an ARIMA of Migration so the number of people migrating can be simulated
#Time series tests
#library(tseries)
#adf.test(LN,k=1) #Stationary with one lag, otherwise not stationary
#kpss.test(LN) #Stationary,default of program and has some model fit improvements
MOD <- auto.arima(LN)
MOD_2016 <- auto.arima(LN_2016)
MOD_1985 <- auto.arima(LN_1985)
MOD_KEM <- auto.arima(KEM)
MOD_OTHER <- auto.arima(OTHER)
##Because the Lincoln county simulation has more data, it will show trends more easily. As a proxy for the subregions adjust the Kemmerer and Diamondville data such that the magnitude of the mean migration is the same portion of the magnitude of the Lincoln County data. Note that the ARIMA for Kemmerer ONLY includes a mean value so this is a reasonable way to model overall migration. In a similar way asses all non Kemmerer net migration to the other regions. This does understate migration in years where the sub-regions have opposite direction of net migrations, but works well when assuming the two regions share a portion of total county migration
LN_ADJ_KEM <- mean(abs(LN))/mean(abs(KEM))
LN_ADJ_OTHER <- 1-1/LN_ADJ_KEM
#Downshift the sub-region data
KEM_NEW <- LN/LN_ADJ_KEM
KEM_2016_NEW <- LN_2016/LN_ADJ_KEM
KEM_1985_NEW <- LN_1985/LN_ADJ_KEM
OTHER_NEW <- LN/LN_ADJ_OTHER
OTHER_2016_NEW <- LN_2016/LN_ADJ_OTHER
OTHER_1985_NEW <- LN_1985/LN_ADJ_OTHER
#Create new models for migration forecasts
MOD_KEM_ADJ <- auto.arima(KEM_NEW ,stationary=TRUE)
MOD_KEM_ADJ_2016 <- auto.arima(KEM_2016_NEW ,stationary=TRUE)
MOD_KEM_ADJ_1985 <- auto.arima(KEM_1985_NEW ,stationary=TRUE)
MOD_OTHER_ADJ <- auto.arima(OTHER_NEW,stationary=TRUE)
MOD_OTHER_ADJ_2016 <- auto.arima(OTHER_2016_NEW,stationary=TRUE)
MOD_OTHER_ADJ_1985 <- auto.arima(OTHER_1985_NEW,stationary=TRUE)
#Save the models for use in the Monte Carlo simulation
if(!exists("SAVE_LOC_ARIMA_MODELS")){SAVE_LOC_ARIMA_MODELS <-"./Data/Intermediate_Inputs/Migration_ARIMA_Models/"}
dir.create(SAVE_LOC_ARIMA_MODELS, recursive = TRUE, showWarnings = FALSE)
saveRDS(MOD,paste0(SAVE_LOC_ARIMA_MODELS,"Full_Lincoln_County_Net_Migration_ARIMA.Rds"))
saveRDS(MOD_KEM_ADJ,paste0(SAVE_LOC_ARIMA_MODELS,"Kemmerer_Diamondville_Net_Migration_ARIMA.Rds"))
saveRDS(MOD_OTHER_ADJ,paste0(SAVE_LOC_ARIMA_MODELS,"Other_Lincoln_Net_Migration_ARIMA.Rds"))
saveRDS(MOD_2016,paste0(SAVE_LOC_ARIMA_MODELS,"Full_Lincoln_County_Net_Migration_ARIMA_2016.Rds"))
saveRDS(MOD_KEM_ADJ_2016,paste0(SAVE_LOC_ARIMA_MODELS,"Kemmerer_Diamondville_Net_Migration_ARIMA_2016.Rds"))
saveRDS(MOD_OTHER_ADJ_2016,paste0(SAVE_LOC_ARIMA_MODELS,"Other_Lincoln_Net_Migration_ARIMA_2016.Rds"))
saveRDS(MOD_1985,paste0(SAVE_LOC_ARIMA_MODELS,"Full_Lincoln_County_Net_Migration_ARIMA_1985.Rds"))
saveRDS(MOD_KEM_ADJ_1985,paste0(SAVE_LOC_ARIMA_MODELS,"Kemmerer_Diamondville_Net_Migration_ARIMA_1985.Rds"))
saveRDS(MOD_OTHER_ADJ_1985,paste0(SAVE_LOC_ARIMA_MODELS,"Other_Lincoln_Net_Migration_ARIMA_1985.Rds"))
##Save model summary results and create useful figures
if(!exists("SAVE_LOC_ARIMA_FIGURES")){SAVE_LOC_ARIMA_FIGURES <-"./Results/Migration_ARIMA/"}
dir.create(SAVE_LOC_ARIMA_FIGURES, recursive = TRUE, showWarnings = FALSE)
DICT <- list("ar1"="Autoregressive (1 Year)","ma1"="Moving Average (1 Year)","intercept"="Average Migration")
FILE_NAME <- "Migration_ARIMA_Tables"
REG_TABLE_LOC <- paste0(SAVE_LOC_ARIMA_FIGURES,FILE_NAME,".tex")
sink(REG_TABLE_LOC)
cat("\\documentclass[border=0pt]{article}","\n","\\pagestyle{empty}","\n","\\usepackage{booktabs,dcolumn}","\n","\\begin{document}")
texreg(l=list(MOD,MOD_KEM),custom.model.names=c("\\textbf{Lincoln County}","\\textbf{Kemmerer \\& Diamondville}"),table=FALSE,use.packages=FALSE,booktabs=TRUE,dcolumn=TRUE,caption.above=TRUE,custom.coef.map=DICT)
cat("\n","\\end{document}")
sink()
system(paste0("pdflatex ",REG_TABLE_LOC))
system(paste0("pdfcrop --margins '5 5 5 5' ",FILE_NAME,".pdf ",SAVE_LOC_ARIMA_FIGURES,FILE_NAME,".pdf"))
file.remove(list.files(pattern=FILE_NAME))
####Lincoln Total Results
png(paste0(SAVE_LOC_ARIMA_FIGURES,"Lincoln_County_ARIMA_Migration_Forecast.png"), res = 600, height = 12, width=16, units = "in")
plot(forecast(MOD,h=2050-2023),xlab="Year","ylab"="Net Migration",main="Lincoln County Net Migration ARIMA Forcast")
dev.off()
png(paste0(SAVE_LOC_ARIMA_FIGURES,"Lincoln_County_Migration_ARIMA_Residual_Checks.png"), res = 600, height = 12, width=16, units = "in")
checkresiduals(MOD)
dev.off()
#####Kemmerer Results
png(paste0(SAVE_LOC_ARIMA_FIGURES,"Kemmerer_and_Diamondville_ARIMA_Migration_Forecast.png"), res = 600, height = 12, width=16, units = "in")
plot(forecast(MOD_KEM,h=2050-2023),xlab="Year","ylab"="Net Migration",main="Lincoln County Net Migration ARIMA Forcast")
dev.off()
png(paste0(SAVE_LOC_ARIMA_FIGURES,"Kemmerer_and_Diamondville_Migration_ARIMA_Residual_Checks.png"), res = 600, height = 12, width=16, units = "in")
checkresiduals(MOD_KEM)
dev.off()
#####Rest of Lincoln Results
png(paste0(SAVE_LOC_ARIMA_FIGURES,"Lincoln_Other_Areas_ARIMA_Migration_Forecast.png"), res = 600, height = 12, width=16, units = "in")
plot(forecast(MOD_OTHER,h=2050-2023),xlab="Year","ylab"="Net Migration",main="Kemmerer & Diamondville Net Migration ARIMA Forcast")
dev.off()
png(paste0(SAVE_LOC_ARIMA_FIGURES,"Lincoln_Other_Areas_Migration_ARIMA_Residual_Checks.png"), res = 600, height = 12, width=16, units = "in")
checkresiduals(MOD_OTHER)
dev.off()
#MIGRATION_ARIMA_SIMS <- (do.call(cbind,mclapply(1:NUM_SIMULATIONS,function(x){as.numeric(round(simulate(MOD,future=TRUE, nsim=NUM_YEARS_PROJECTED)))},mc.cores =detectCores()-1)))#testing a multiple run simulation could use parallel process