Population_Study/Scripts/2C_Overall_Migration_ARIMA.r
2025-11-13 12:51:29 -07:00

31 lines
2.1 KiB
R

library(forecast)
####Work on overall migration trends
#Could use code cleanup after trying things, but have but I have a working ARIMA to model Lincoln county migration
TS_DATA <- POP_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population))
ST_YEAR <- min(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
END_YEAR <- max(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
#GRAPH_DATA <- TS_DATA %>% filter(!is.na(Migration))
#GRAPH_DATA_LN <- TS_DATA %>% filter(!is.na(Migration),County=="Lincoln")
#ggplot(GRAPH_DATA,aes(x=Year,y=Migration/Prev_Pop,group=County,color=County))+geom_point()+geom_line(data=GRAPH_DATA_LN)
TS_WIDE <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>ST_YEAR+1,Year<=END_YEAR) %>%ts(start=c(ST_YEAR+1),frequency=1)
LN <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% dplyr::select(Lincoln,Year) %>% filter(Year>=ST_YEAR,Year<=END_YEAR) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR),frequency=1)
#Create an ARIMA of Migration so the number of people migrating can be simulated
#Time series tests
#library(tseries)
#adf.test(LN,k=1) #Stationary with one lag, otherwise not stationary
#kpss.test(LN) #Stationary,default of program and has some model fit improvements
MOD <- auto.arima(LN,stationary=TRUE)
#summary(MOD)
#Validity tests
#autoplot(MOD)
#acf(resid(MOD))
#pacf(resid(MOD))
# adf.test(resid(MOD))
#checkresiduals(MOD)
#Save the resulting model outputs, will need to be changed if looking at other counties
saveRDS(MOD,"Data/Regression_Results/LN_ARIMA_MODEL.Rds")
MIGRATION_ARIMA_SIMS <- (do.call(cbind,mclapply(1:NUM_SIMULATIONS,function(x){as.numeric(round(simulate(MOD,future=TRUE, nsim=NUM_YEARS_PROJECTED)))},mc.cores =detectCores()-1)))#testing a multiple run simulation could use parallel process
saveRDS(MIGRATION_ARIMA_SIMS,"Data/Simulated_Data_Sets/Migration_ARIMA.Rds")
write.csv(MIGRATION_ARIMA_SIMS,row.names=FALSE,"Data/Simulated_Data_Sets/Migration_ARIMA.csv")