73 lines
4.8 KiB
R
73 lines
4.8 KiB
R
library(forecast)
|
|
library(tidyverse)
|
|
#setwd("../")
|
|
####Work on overall migration trends
|
|
#Could use code cleanup after trying things, but have but I have a working ARIMA to model Lincoln county migration
|
|
POP_DATA <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Full_Lincoln_County_Population_Data.Rds") %>% mutate(Migration=Migration/Population)
|
|
POP_DATA_TEST <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Full_Lincoln_County_Population_Data.Rds") %>% mutate(Migration=Migration/Population)
|
|
|
|
POP_KEM_DATA <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds")
|
|
POP_OTHER_DATA <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Other_Lincoln_Population_Data.Rds")
|
|
hist(POP_OTHER_DATA$Migration/POP_OTHER_DATA$Population)
|
|
hist(POP_KEM_DATA$Migration/POP_KEM_DATA$Population)
|
|
|
|
|
|
TS_DATA <- POP_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
|
|
TS_DATA_TEST <- POP_DATA_TEST %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
|
|
|
|
TS_KEM_DATA <- POP_KEM_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
|
|
TS_OTHER_DATA <- POP_OTHER_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
|
|
|
|
|
|
ST_YEAR <- min(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
|
|
END_YEAR <- max(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
|
|
|
|
ST_YEAR_KEM <- min(pull(TS_KEM_DATA %>% filter(!is.na(Migration)),Year))
|
|
END_YEAR_KEM <- max(pull(TS_KEM_DATA %>% filter(!is.na(Migration)),Year))
|
|
|
|
ST_YEAR_OTHER <- min(pull(TS_OTHER_DATA %>% filter(!is.na(Migration)),Year))
|
|
END_YEAR_OTHER <- max(pull(TS_OTHER_DATA %>% filter(!is.na(Migration)),Year))
|
|
|
|
|
|
#GRAPH_DATA <- TS_DATA %>% filter(!is.na(Migration))
|
|
#GRAPH_DATA_LN <- TS_DATA %>% filter(!is.na(Migration),County=="Lincoln")
|
|
#ggplot(GRAPH_DATA,aes(x=Year,y=Migration/Prev_Pop,group=County,color=County))+geom_point()+geom_line(data=GRAPH_DATA_LN)
|
|
TS_WIDE <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>ST_YEAR+1,Year<=END_YEAR) %>%ts(start=c(ST_YEAR+1),frequency=1)
|
|
|
|
TS_KEM_WIDE <- TS_KEM_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>ST_YEAR+1,Year<=END_YEAR) %>%ts(start=c(ST_YEAR+1),frequency=1)
|
|
TS_OTHER_WIDE <- TS_OTHER_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>ST_YEAR+1,Year<=END_YEAR) %>%ts(start=c(ST_YEAR+1),frequency=1)
|
|
|
|
|
|
LN <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% dplyr::select(Lincoln,Year) %>% filter(Year>=ST_YEAR,Year<=END_YEAR) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR),frequency=1)
|
|
|
|
KEM <- TS_KEM_DATA %>% dplyr::select(Year,Region,Migration) %>% pivot_wider(values_from=Migration,names_from=Region) %>% arrange(Year) %>% dplyr::select('Kemmerer & Diamondville',Year) %>% filter(Year>=ST_YEAR_KEM,Year<=END_YEAR_KEM) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR_KEM),frequency=1)
|
|
TS_OTHER_DATA
|
|
OTHER <- TS_OTHER_DATA %>% dplyr::select(Year,Region,Migration) %>% pivot_wider(values_from=Migration,names_from=Region) %>% arrange(Year) %>% dplyr::select('Lincoln Other'=Lincoln_Other,Year) %>% filter(Year>=ST_YEAR_OTHER,Year<=END_YEAR_OTHER) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR_OTHER),frequency=1)
|
|
|
|
#Create an ARIMA of Migration so the number of people migrating can be simulated
|
|
#Time series tests
|
|
#library(tseries)
|
|
#adf.test(LN,k=1) #Stationary with one lag, otherwise not stationary
|
|
#kpss.test(LN) #Stationary,default of program and has some model fit improvements
|
|
MOD <- auto.arima(LN,stationary=TRUE)
|
|
MOD_KEM <- auto.arima(KEM)
|
|
MOD_OTHER <- auto.arima(OTHER)
|
|
plot(forecast(MOD ))
|
|
plot(forecast(MOD_KEM ))
|
|
plot(forecast(MOD_OTHER ))
|
|
plot(forecast(MOD,abs(KEM))
|
|
|
|
#summary(MOD)
|
|
#Validity tests
|
|
#autoplot(MOD)
|
|
#acf(resid(MOD))
|
|
#pacf(resid(MOD))
|
|
# adf.test(resid(MOD))
|
|
#checkresiduals(MOD)
|
|
#Save the resulting model outputs, will need to be changed if looking at other counties
|
|
#saveRDS(MOD,"Data/Regression_Results/LN_ARIMA_MODEL.Rds")
|
|
|
|
MIGRATION_ARIMA_SIMS <- (do.call(cbind,mclapply(1:NUM_SIMULATIONS,function(x){as.numeric(round(simulate(MOD,future=TRUE, nsim=NUM_YEARS_PROJECTED)))},mc.cores =detectCores()-1)))#testing a multiple run simulation could use parallel process
|
|
saveRDS(MIGRATION_ARIMA_SIMS,"Data/Simulated_Data_Sets/Migration_ARIMA.Rds")
|
|
write.csv(MIGRATION_ARIMA_SIMS,row.names=FALSE,"Data/Simulated_Data_Sets/Migration_ARIMA.csv")
|