library(forecast)
library(tidyverse)
library(texreg)
#setwd("../")
####Work on overall migration trends
#Could use code cleanup after trying things, but have but I have a working ARIMA to model Lincoln county migration
POP_DATA <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Full_Lincoln_County_Population_Data.Rds") 
POP_DATA_TEST <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Full_Lincoln_County_Population_Data.Rds") %>% mutate(Migration=Migration/Population)

POP_KEM_DATA <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds")
POP_OTHER_DATA <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Other_Lincoln_Population_Data.Rds")


TS_DATA <- POP_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
TS_DATA_TEST <- POP_DATA_TEST %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup

TS_KEM_DATA <- POP_KEM_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
TS_OTHER_DATA <- POP_OTHER_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup


ST_YEAR <- min(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
END_YEAR <- max(pull(TS_DATA %>% filter(!is.na(Migration)),Year))

ST_YEAR_KEM <- min(pull(TS_KEM_DATA %>% filter(!is.na(Migration)),Year))
END_YEAR_KEM <- max(pull(TS_KEM_DATA %>% filter(!is.na(Migration)),Year))

ST_YEAR_OTHER <- min(pull(TS_OTHER_DATA %>% filter(!is.na(Migration)),Year))
END_YEAR_OTHER <- max(pull(TS_OTHER_DATA %>% filter(!is.na(Migration)),Year))


TS_WIDE <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>ST_YEAR+1,Year<=END_YEAR) %>%ts(start=c(ST_YEAR+1),frequency=1)

TS_KEM_WIDE <- TS_KEM_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>ST_YEAR+1,Year<=END_YEAR) %>%ts(start=c(ST_YEAR+1),frequency=1)
TS_OTHER_WIDE <- TS_OTHER_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>ST_YEAR+1,Year<=END_YEAR) %>%ts(start=c(ST_YEAR+1),frequency=1)


LN <- TS_DATA %>%  dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% dplyr::select(Lincoln,Year) %>% filter(Year>=ST_YEAR,Year<=END_YEAR) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR),frequency=1)

KEM <- TS_KEM_DATA %>%  dplyr::select(Year,Region,Migration) %>% pivot_wider(values_from=Migration,names_from=Region) %>% arrange(Year) %>% dplyr::select('Kemmerer & Diamondville',Year) %>% filter(Year>=ST_YEAR_KEM,Year<=END_YEAR_KEM) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR_KEM),frequency=1)
OTHER <- TS_OTHER_DATA %>%  dplyr::select(Year,Region,Migration) %>% pivot_wider(values_from=Migration,names_from=Region) %>% arrange(Year) %>% dplyr::select('Lincoln Other'=Lincoln_Other,Year) %>% filter(Year>=ST_YEAR_OTHER,Year<=END_YEAR_OTHER) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR_OTHER),frequency=1)

#Create an ARIMA of Migration so the number of people migrating can be simulated
#Time series tests
#library(tseries)
#adf.test(LN,k=1) #Stationary with one lag, otherwise not stationary
#kpss.test(LN) #Stationary,default of program and has some model fit improvements
MOD <- auto.arima(LN)
MOD_KEM <- auto.arima(KEM)
MOD_OTHER <- auto.arima(OTHER)

##Because the Lincoln county simulation has more data, it will show trends more easily. As a proxy for the subregions adjust the Kemmerer and Diamondville data such that the magnitude of the mean migration is the same portion of the magnitude of the Lincoln County data. Note that the ARIMA for Kemmerer ONLY includes a mean value so this is a reasonable way to model overall migration. In a similar way asses all non Kemmerer net migration to the other regions. This does understate migration in years where the sub-regions have opposite direction of net migrations, but works well when assuming the two regions share a portion of total county migration 
LN_ADJ_KEM <- mean(abs(LN))/mean(abs(KEM))
LN_ADJ_OTHER <- 1-1/LN_ADJ_KEM

#Downshift the sub-region data
KEM_NEW <- LN/LN_ADJ_KEM 
OTHER_NEW <- LN/LN_ADJ_OTHER
#Create new models for migration forecasts
MOD_KEM_ADJ <- auto.arima(KEM_NEW ,stationary=TRUE)
MOD_OTHER_ADJ <- auto.arima(OTHER_NEW,stationary=TRUE)
#Save the models for use in the Monte Carlo simulation
	if(!exists("SAVE_LOC_ARIMA_MODELS")){SAVE_LOC_ARIMA_MODELS <-"./Data/Intermediate_Inputs/Migration_ARIMA_Models/"}
	dir.create(SAVE_LOC_ARIMA_MODELS, recursive = TRUE, showWarnings = FALSE)
saveRDS(MOD,paste0(SAVE_LOC_ARIMA_MODELS,"Full_Lincoln_County_Net_Migration_ARIMA.Rds"))
saveRDS(MOD_KEM_ADJ,paste0(SAVE_LOC_ARIMA_MODELS,"Kemmerer_Diamondville_Net_Migration_ARIMA.Rds"))
saveRDS(MOD_OTHER_ADJ,paste0(SAVE_LOC_ARIMA_MODELS,"Other_Lincoln_Net_Migration_ARIMA.Rds"))


##Save model summary results and create useful figures
	if(!exists("SAVE_LOC_ARIMA_FIGURES")){SAVE_LOC_ARIMA_FIGURES <-"./Results/Migration_ARIMA/"}
	dir.create(SAVE_LOC_ARIMA_FIGURES, recursive = TRUE, showWarnings = FALSE)


DICT <- list("ar1"="Autoregressive (1 Year)","ma1"="Moving Average (1 Year)","intercept"="Average Migration")
FILE_NAME <- "Migration_ARIMA_Tables"
REG_TABLE_LOC <- paste0(SAVE_LOC_ARIMA_FIGURES,FILE_NAME,".tex")
sink(REG_TABLE_LOC)
cat("\\documentclass[border=0pt]{article}","\n","\\pagestyle{empty}","\n","\\usepackage{booktabs,dcolumn}","\n","\\begin{document}")
texreg(l=list(MOD,MOD_KEM),custom.model.names=c("\\textbf{Lincoln County}","\\textbf{Kemmerer \\& Diamondville}"),table=FALSE,use.packages=FALSE,booktabs=TRUE,dcolumn=TRUE,caption.above=TRUE,custom.coef.map=DICT)
cat("\n","\\end{document}")
sink()
system(paste0("pdflatex ",REG_TABLE_LOC))
system(paste0("pdfcrop --margins '5 5 5 5' ",FILE_NAME,".pdf ",SAVE_LOC_ARIMA_FIGURES,FILE_NAME,".pdf"))
file.remove(list.files(pattern=FILE_NAME))

####Lincoln Total Results
png(paste0(SAVE_LOC_ARIMA_FIGURES,"Lincoln_County_ARIMA_Migration_Forecast.png"), res = 600, height = 12, width=16, units = "in")
	plot(forecast(MOD,h=2050-2023),xlab="Year","ylab"="Net Migration",main="Lincoln County Net Migration ARIMA Forcast")
dev.off()

png(paste0(SAVE_LOC_ARIMA_FIGURES,"Lincoln_County_Migration_ARIMA_Residual_Checks.png"), res = 600, height = 12, width=16, units = "in")
	checkresiduals(MOD)
dev.off()

#####Kemmerer Results
png(paste0(SAVE_LOC_ARIMA_FIGURES,"Kemmerer_and_Diamondville_ARIMA_Migration_Forecast.png"), res = 600, height = 12, width=16, units = "in")
	plot(forecast(MOD_KEM,h=2050-2023),xlab="Year","ylab"="Net Migration",main="Lincoln County Net Migration ARIMA Forcast")
dev.off()

png(paste0(SAVE_LOC_ARIMA_FIGURES,"Kemmerer_and_Diamondville_Migration_ARIMA_Residual_Checks.png"), res = 600, height = 12, width=16, units = "in")
	checkresiduals(MOD_KEM)
dev.off()

#####Rest of Lincoln Results
png(paste0(SAVE_LOC_ARIMA_FIGURES,"Lincoln_Other_Areas_ARIMA_Migration_Forecast.png"), res = 600, height = 12, width=16, units = "in")
	plot(forecast(MOD_OTHER,h=2050-2023),xlab="Year","ylab"="Net Migration",main="Kemmerer & Diamondville Net Migration ARIMA Forcast")
dev.off()

png(paste0(SAVE_LOC_ARIMA_FIGURES,"Lincoln_Other_Areas_Migration_ARIMA_Residual_Checks.png"), res = 600, height = 12, width=16, units = "in")
	checkresiduals(MOD_OTHER)
dev.off()


#MIGRATION_ARIMA_SIMS <- (do.call(cbind,mclapply(1:NUM_SIMULATIONS,function(x){as.numeric(round(simulate(MOD,future=TRUE, nsim=NUM_YEARS_PROJECTED)))},mc.cores =detectCores()-1)))#testing a multiple run simulation could use parallel process