Finished an ARIMA model
This commit is contained in:
parent
23cccf13b2
commit
6ebd44fa1f
@ -93,20 +93,9 @@ MIG_AGE_DIST <- PRED_DATA$MIGRATION_COEF/sum(PRED_DATA$MIGRATION_COEF)
|
||||
write.csv(MIG_AGE_DIST ,"Data/Other_Intermediate_Outputs/Migreation_Age_Probablity_One_to_Ninety.csv",row.names=FALSE)
|
||||
|
||||
####Work on overall migration trends
|
||||
#Could use code cleanup after trying things, but have but I have a working ARIMA to model lincoln county migration
|
||||
TS_DATA <- POP_DATA
|
||||
TS_DATA <- TS_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population),ABS_PER_MIG=abs(Migration)/Prev_Pop,L_MIG=lag(Migration))
|
||||
TS_DATA
|
||||
feols(Migration~L_MIG+Prev_Pop|Year+County,data=TS_DATA)
|
||||
(feols(ABS_PER_MIG~In_Migration+log(Prev_Pop)+Year*County,TS_DATA))
|
||||
LN_TS_DATA <- TS_DATA %>% filter(County=='Lincoln')
|
||||
|
||||
MOD <- feols(Migration~L_MIG+Prev_Pop+Year,data=LN_TS_DATA)
|
||||
|
||||
MOD <- feols(Migration~L_MIG+Prev_Pop+Year*County,data=TS_DATA)
|
||||
|
||||
acf(resid(MOD))
|
||||
pacf(resid(MOD))
|
||||
library("forecast")
|
||||
ST_YEAR <- min(pull(TS_DATA,Year))
|
||||
END_YEAR <- max(pull(TS_DATA,Year))
|
||||
TS_DATA %>% filter(County=='Lincoln') %>% pull(Migration) %>% plot()
|
||||
@ -115,39 +104,62 @@ GRAPH_DATA <- TS_DATA %>% filter(!is.na(Migration))
|
||||
GRAPH_DATA_LN <- TS_DATA %>% filter(!is.na(Migration),County=="Lincoln")
|
||||
|
||||
ggplot(GRAPH_DATA,aes(x=Year,y=Migration/Prev_Pop,group=County,color=County))+geom_point()+geom_line(data=GRAPH_DATA_LN)
|
||||
TS_DATA
|
||||
TS_WIDE <- TS_DATA %>% mutate(Migration=Migration/Prev_Pop) %>% select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>1990,Year<=2021) %>%ts(start=c(1991),frequency=1)
|
||||
TS_WIDE <- TS_DATA %>% mutate(Migration=Migration) %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>1990,Year<=2021) %>%ts(start=c(1991),frequency=1)
|
||||
TS_DATA %>% filter(County=='Lincoln')
|
||||
ST_YEAR <- 1970
|
||||
LN <- TS_DATA %>% mutate(Migration=Migration) %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% dplyr::select(Lincoln,Year) %>% filter(Year>=ST_YEAR,Year<=2021) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR),frequency=1)
|
||||
#Create an ARIMA of Migration so the number of people migrating can be simulated
|
||||
library("forecast")
|
||||
#Time series tests
|
||||
adf.test(LN,k=1) #Stationary with one lag, otherwise not stationary
|
||||
kpss.test(LN) #Stationary,default of program and has some model fit improvements
|
||||
MOD <- auto.arima(LN,test="kpss",stationary=TRUE)
|
||||
summary(MOD)
|
||||
#Validity tests
|
||||
autoplot(MOD)
|
||||
acf(resid(MOD))
|
||||
pacf(resid(MOD))
|
||||
# adf.test(resid(MOD))
|
||||
checkresiduals(MOD)
|
||||
#Save the resulting model outputs, will need to be changed if looking at other counties
|
||||
saveRDS(MOD,"Data/Regression_Results/LN_ARIMA_MODEL.Rds")
|
||||
|
||||
LN <- TS_DATA %>% mutate(Migration=Migration) %>% select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% select(Lincoln,Year) %>% filter(Year>1990,Year<=2021) %>% select(-Year) %>%ts(start=c(1991),frequency=1)
|
||||
|
||||
TS_WIDE[,'Lincoln']
|
||||
|
||||
LN <- TS_WIDE[,"Lincoln"]
|
||||
plot(LN )
|
||||
library(tseries)
|
||||
adf.test(LN)
|
||||
TEST <- auto.arima(LN )
|
||||
plot(TEST)
|
||||
forecast(TEST)
|
||||
acf(resid(TEST))
|
||||
arima.sim(TEST,n=1)
|
||||
arima.sim(arima(LN,order=c(2,4,3)),n=1)
|
||||
|
||||
summary(TEST)
|
||||
plot(forecast(TEST))
|
||||
resid(TEST)
|
||||
acf(resid(TEST))
|
||||
auto.arima(LN )
|
||||
|
||||
plot(forecast(TEST))
|
||||
TS_WIDE %>% filter(Year>1970) %>% pull(Year)
|
||||
TS_WIDE
|
||||
lapply(1:10^6,function(x){round(simulate(MOD,future=TRUE, nsim=50))})#testing a multiple run simulation could use parallel process
|
||||
|
||||
|
||||
summary(feols(ABS_PER_MIG~Year,LN_TS_DATA))
|
||||
#################################Simulate data
|
||||
MIG_AGE_DIST
|
||||
NUM_SIMS<-34
|
||||
PROB <- MIG_AGE_DIST
|
||||
|
||||
MIG_SIM <- round(simulate(MOD,future=TRUE, nsim=50)) #Round up for whole numbers
|
||||
NUM_SIMS <- abs(MIG_SIM[[1]])
|
||||
INCREASE <- MIG_SIM[[1]]/abs(MIG_SIM[[1]]) #Check if positive or negative migration, as these are handled diffrently
|
||||
if(INCREASE==1){MF_SAMPLE <- sample(x=c("Male","Female"),NUM_SIMS,replace=TRUE)}
|
||||
sample(x=1:90,size=NUM_SIMS,prob=PROB,replace=TRUE)
|
||||
C_DEMO_DATA <- DEMOGRAPHIC_DATA %>% filter(County=='Lincoln',Year==max(Year))
|
||||
NUM_MALE <- pull(C_DEMO_DATA ,"Num_Male")
|
||||
NUM_FEMALE <- pull(C_DEMO_DATA,"Num_Female")
|
||||
####WORKING ON THE CASE WHEN WE ARE REMOVING INDIVIDUALS. IF THERE ARE NONE THEY SHOULD NOT MOVE. ON THE OTHER HAND IF MOVING IN THE AVERAGE VALUES WORK
|
||||
MAKE_SET <- function(x){
|
||||
C_PROB <- MIG_AGE_DIST[x]
|
||||
C_LOOP <- C_DEMO_DATA[x+1,]
|
||||
NUM_MALE <- C_LOOP$Num_Male
|
||||
NUM_FEMALE <- C_LOOP$Num_Female
|
||||
C_AGE <- C_LOOP$Age
|
||||
return(rbind(cbind(rep(C_AGE,NUM_MALE),rep("Male",NUM_MALE),rep(C_PROB,NUM_MALE)),
|
||||
cbind(rep(C_AGE,NUM_FEMALE),rep("Female",NUM_FEMALE),rep(C_PROB,NUM_FEMALE))))
|
||||
}
|
||||
FULL_SET <- sapply(1:85,MAKE_SET) %>% as_tibble
|
||||
FULL_SET[1]
|
||||
|
||||
sample(1:length(FULL_SET),prob=FULL_SET[,3],size=1)
|
||||
|
||||
sample(x=0,size=NUM_SIMS,prob=c(NUM_MALE,NUM_FEMALE)*rep(PROB,2),replace=TRUE)
|
||||
sample(x=c('a','a','b'),size=2,prob=c(2,2,0),replace=FALSE)
|
||||
|
||||
|
||||
|
||||
mean(sample(x=c(1,0),2000000,prob=c(3,1),replace=TRUE))
|
||||
|
||||
|
||||
#Start of a simulation method with the inputs.
|
||||
#plot(100*PROB)
|
||||
#NUM_SIMS <- 1000
|
||||
#sample(x=1:90,size=NUM_SIMS,prob=PROB,replace=TRUE)
|
||||
#sample(x=c("Male","Female"),size=NUM_SIMS,replace=TRUE)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user