Finished an ARIMA model

This commit is contained in:
Alex Gebben Work 2025-10-28 17:31:55 -06:00
parent 23cccf13b2
commit 6ebd44fa1f

View File

@ -93,20 +93,9 @@ MIG_AGE_DIST <- PRED_DATA$MIGRATION_COEF/sum(PRED_DATA$MIGRATION_COEF)
write.csv(MIG_AGE_DIST ,"Data/Other_Intermediate_Outputs/Migreation_Age_Probablity_One_to_Ninety.csv",row.names=FALSE) write.csv(MIG_AGE_DIST ,"Data/Other_Intermediate_Outputs/Migreation_Age_Probablity_One_to_Ninety.csv",row.names=FALSE)
####Work on overall migration trends ####Work on overall migration trends
#Could use code cleanup after trying things, but have but I have a working ARIMA to model lincoln county migration
TS_DATA <- POP_DATA TS_DATA <- POP_DATA
TS_DATA <- TS_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population),ABS_PER_MIG=abs(Migration)/Prev_Pop,L_MIG=lag(Migration)) TS_DATA <- TS_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population),ABS_PER_MIG=abs(Migration)/Prev_Pop,L_MIG=lag(Migration))
TS_DATA
feols(Migration~L_MIG+Prev_Pop|Year+County,data=TS_DATA)
(feols(ABS_PER_MIG~In_Migration+log(Prev_Pop)+Year*County,TS_DATA))
LN_TS_DATA <- TS_DATA %>% filter(County=='Lincoln')
MOD <- feols(Migration~L_MIG+Prev_Pop+Year,data=LN_TS_DATA)
MOD <- feols(Migration~L_MIG+Prev_Pop+Year*County,data=TS_DATA)
acf(resid(MOD))
pacf(resid(MOD))
library("forecast")
ST_YEAR <- min(pull(TS_DATA,Year)) ST_YEAR <- min(pull(TS_DATA,Year))
END_YEAR <- max(pull(TS_DATA,Year)) END_YEAR <- max(pull(TS_DATA,Year))
TS_DATA %>% filter(County=='Lincoln') %>% pull(Migration) %>% plot() TS_DATA %>% filter(County=='Lincoln') %>% pull(Migration) %>% plot()
@ -115,39 +104,62 @@ GRAPH_DATA <- TS_DATA %>% filter(!is.na(Migration))
GRAPH_DATA_LN <- TS_DATA %>% filter(!is.na(Migration),County=="Lincoln") GRAPH_DATA_LN <- TS_DATA %>% filter(!is.na(Migration),County=="Lincoln")
ggplot(GRAPH_DATA,aes(x=Year,y=Migration/Prev_Pop,group=County,color=County))+geom_point()+geom_line(data=GRAPH_DATA_LN) ggplot(GRAPH_DATA,aes(x=Year,y=Migration/Prev_Pop,group=County,color=County))+geom_point()+geom_line(data=GRAPH_DATA_LN)
TS_DATA TS_WIDE <- TS_DATA %>% mutate(Migration=Migration) %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>1990,Year<=2021) %>%ts(start=c(1991),frequency=1)
TS_WIDE <- TS_DATA %>% mutate(Migration=Migration/Prev_Pop) %>% select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>1990,Year<=2021) %>%ts(start=c(1991),frequency=1) TS_DATA %>% filter(County=='Lincoln')
ST_YEAR <- 1970
LN <- TS_DATA %>% mutate(Migration=Migration) %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% dplyr::select(Lincoln,Year) %>% filter(Year>=ST_YEAR,Year<=2021) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR),frequency=1)
#Create an ARIMA of Migration so the number of people migrating can be simulated
library("forecast")
#Time series tests
adf.test(LN,k=1) #Stationary with one lag, otherwise not stationary
kpss.test(LN) #Stationary,default of program and has some model fit improvements
MOD <- auto.arima(LN,test="kpss",stationary=TRUE)
summary(MOD)
#Validity tests
autoplot(MOD)
acf(resid(MOD))
pacf(resid(MOD))
# adf.test(resid(MOD))
checkresiduals(MOD)
#Save the resulting model outputs, will need to be changed if looking at other counties
saveRDS(MOD,"Data/Regression_Results/LN_ARIMA_MODEL.Rds")
LN <- TS_DATA %>% mutate(Migration=Migration) %>% select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% select(Lincoln,Year) %>% filter(Year>1990,Year<=2021) %>% select(-Year) %>%ts(start=c(1991),frequency=1) lapply(1:10^6,function(x){round(simulate(MOD,future=TRUE, nsim=50))})#testing a multiple run simulation could use parallel process
TS_WIDE[,'Lincoln']
LN <- TS_WIDE[,"Lincoln"]
plot(LN )
library(tseries)
adf.test(LN)
TEST <- auto.arima(LN )
plot(TEST)
forecast(TEST)
acf(resid(TEST))
arima.sim(TEST,n=1)
arima.sim(arima(LN,order=c(2,4,3)),n=1)
summary(TEST)
plot(forecast(TEST))
resid(TEST)
acf(resid(TEST))
auto.arima(LN )
plot(forecast(TEST))
TS_WIDE %>% filter(Year>1970) %>% pull(Year)
TS_WIDE
summary(feols(ABS_PER_MIG~Year,LN_TS_DATA)) #################################Simulate data
MIG_AGE_DIST
NUM_SIMS<-34
PROB <- MIG_AGE_DIST
MIG_SIM <- round(simulate(MOD,future=TRUE, nsim=50)) #Round up for whole numbers
NUM_SIMS <- abs(MIG_SIM[[1]])
INCREASE <- MIG_SIM[[1]]/abs(MIG_SIM[[1]]) #Check if positive or negative migration, as these are handled diffrently
if(INCREASE==1){MF_SAMPLE <- sample(x=c("Male","Female"),NUM_SIMS,replace=TRUE)}
sample(x=1:90,size=NUM_SIMS,prob=PROB,replace=TRUE)
C_DEMO_DATA <- DEMOGRAPHIC_DATA %>% filter(County=='Lincoln',Year==max(Year))
NUM_MALE <- pull(C_DEMO_DATA ,"Num_Male")
NUM_FEMALE <- pull(C_DEMO_DATA,"Num_Female")
####WORKING ON THE CASE WHEN WE ARE REMOVING INDIVIDUALS. IF THERE ARE NONE THEY SHOULD NOT MOVE. ON THE OTHER HAND IF MOVING IN THE AVERAGE VALUES WORK
MAKE_SET <- function(x){
C_PROB <- MIG_AGE_DIST[x]
C_LOOP <- C_DEMO_DATA[x+1,]
NUM_MALE <- C_LOOP$Num_Male
NUM_FEMALE <- C_LOOP$Num_Female
C_AGE <- C_LOOP$Age
return(rbind(cbind(rep(C_AGE,NUM_MALE),rep("Male",NUM_MALE),rep(C_PROB,NUM_MALE)),
cbind(rep(C_AGE,NUM_FEMALE),rep("Female",NUM_FEMALE),rep(C_PROB,NUM_FEMALE))))
}
FULL_SET <- sapply(1:85,MAKE_SET) %>% as_tibble
FULL_SET[1]
sample(1:length(FULL_SET),prob=FULL_SET[,3],size=1)
sample(x=0,size=NUM_SIMS,prob=c(NUM_MALE,NUM_FEMALE)*rep(PROB,2),replace=TRUE)
sample(x=c('a','a','b'),size=2,prob=c(2,2,0),replace=FALSE)
mean(sample(x=c(1,0),2000000,prob=c(3,1),replace=TRUE))
#Start of a simulation method with the inputs.
#plot(100*PROB)
#NUM_SIMS <- 1000
#sample(x=1:90,size=NUM_SIMS,prob=PROB,replace=TRUE)
#sample(x=c("Male","Female"),size=NUM_SIMS,replace=TRUE)