Imparted values to migration, and deaths. ARIMA

This commit is contained in:
Alex 2025-11-14 18:09:18 -07:00
parent 809e75e58d
commit 99063bec79
7 changed files with 157 additions and 41 deletions

View File

@ -4,9 +4,10 @@ Rscript "./Scripts/1B_Process_Existing_NIH_Mortality_Data.r"
Rscript "./Scripts/1C_Download_and_Process_Demographic_Data.r" Rscript "./Scripts/1C_Download_and_Process_Demographic_Data.r"
Rscript "./Scripts/1D_Use_ACS_Census_Data_to_Estimate_Kemmerer_Demographics.r" Rscript "./Scripts/1D_Use_ACS_Census_Data_to_Estimate_Kemmerer_Demographics.r"
#Create data sets used in later simulations, produce some results for the report when related to this process. #Create data sets used in later simulations, produce some results for the report when related to this process.
Rscript "./Scripts/2A_Birth_Rate_Regression.r" Rscript "./Scripts/2A_Birth_Rate_Regression_and_Impart_Kemmerer_Births.r"
Rscript "./Scripts/2B_Migration_by_Age_Regression.r" Rscript "./Scripts/2B_Impart_Deaths_and_Migration_to_Subregions.r"
Rscript "./Scripts/2C_Overall_Migration_ARIMA.r" Rscript "./Scripts/2C_Migration_by_Age_Regression.r"
Rscript "./Scripts/2D_Overall_Migration_ARIMA.r"
#Produce final results for either the simulation, or information for the report, but not anything used in later stages of the simulation #Produce final results for either the simulation, or information for the report, but not anything used in later stages of the simulation
Rscript "./Scripts/3A_Population_Pyramid.r" Rscript "./Scripts/3A_Population_Pyramid.r"

View File

@ -22,7 +22,6 @@ PROJ_TRACTS <- PROJ_TRACTS %>% select(-ZCTA5) %>% filter(GEOID!=56023978200) %>%
PROJ_TRACTS <- PROJ_TRACTS %>% select(GEOID) %>% mutate('IN_KEM'=1) %>% mutate(GEOID=as.character(GEOID)) PROJ_TRACTS <- PROJ_TRACTS %>% select(GEOID) %>% mutate('IN_KEM'=1) %>% mutate(GEOID=as.character(GEOID))
###Load data manually created which links vairable names to sex-age census data ###Load data manually created which links vairable names to sex-age census data
CODES <- read_csv("./Data/Raw_Data/ACS_Demographics/API_CENSUS_CODES.csv",skip=1) %>% mutate(Med_Age=(Min_Age+Max_Age)/2) %>% rename(variable=Code) CODES <- read_csv("./Data/Raw_Data/ACS_Demographics/API_CENSUS_CODES.csv",skip=1) %>% mutate(Med_Age=(Min_Age+Max_Age)/2) %>% rename(variable=Code)
#Testing age Comparison between the two
###Extract census data for all tracts in Lincoln county, clean up the data, and indicate if the tract is in Kemmerer/Diamondvile or not. ###Extract census data for all tracts in Lincoln county, clean up the data, and indicate if the tract is in Kemmerer/Diamondvile or not.
DEMO_DATA_ALL <- do.call(rbind,lapply(2009:ACS_END_YEAR,MAKE_KEM_DEMO_DATA_YEAR)) DEMO_DATA_ALL <- do.call(rbind,lapply(2009:ACS_END_YEAR,MAKE_KEM_DEMO_DATA_YEAR))
ORIG_DEMO_DATA_ALL <- DEMO_DATA_ALL ORIG_DEMO_DATA_ALL <- DEMO_DATA_ALL
@ -36,13 +35,13 @@ KEM_DEMO_DATA <- DEMO_DATA_ALL %>% filter(IN_KEM==1) %>% rename(Region=IN_KEM)
#Ajust the populations to match the total population stastics from the other data sources, since the tracts may spill into other areas #Ajust the populations to match the total population stastics from the other data sources, since the tracts may spill into other areas
POST_ADJUST_DATA <- KEM_DEMO_DATA %>% group_by(Year) %>% summarize(Kem_Demo_Population=sum(Num_Female)+sum(Num_Male)) %>% left_join(OTHER_LIN_DEMO_DATA %>% group_by(Year) %>% summarize(Other_Demo_Population=sum(Num_Female)+sum(Num_Male))) %>% mutate(Total_Lincoln_Demo_Population=Kem_Demo_Population+Other_Demo_Population) POST_ADJUST_DATA <- KEM_DEMO_DATA %>% group_by(Year) %>% summarize(Kem_Demo_Population=sum(Num_Female)+sum(Num_Male)) %>% left_join(OTHER_LIN_DEMO_DATA %>% group_by(Year) %>% summarize(Other_Demo_Population=sum(Num_Female)+sum(Num_Male))) %>% mutate(Total_Lincoln_Demo_Population=Kem_Demo_Population+Other_Demo_Population)
DIRECT_POP <- readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_County_Populations.Rds") %>% filter(County=='Lincoln') %>% select(Year,Lin_Direct_Population=Population) %>% full_join(readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_City_Populations.Rds") %>% filter(City %in% c('Kemmerer','Diamondville')) %>% group_by(Year) %>% summarize(Kem_Direct_Population=sum(Population,na.rm=TRUE))) %>% mutate(Other_Direct_Population=Lin_Direct_Population-Kem_Direct_Population) DIRECT_POP <- readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_County_Populations.Rds") %>% filter(County=='Lincoln') %>% select(Year,Lin_Direct_Population=Population) %>% full_join(readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_City_Populations.Rds") %>% filter(City %in% c('Kemmerer','Diamondville')) %>% group_by(Year) %>% summarize(Kem_Direct_Population=sum(Population,na.rm=TRUE))) %>% mutate(Other_Direct_Population=Lin_Direct_Population-Kem_Direct_Population)
ADJUST_TABLE <- DIRECT_POP %>% inner_join(POST_ADJUST_DATA) %>% mutate(KEM_ADJ=Kem_Direct_Population/Kem_Demo_Population,OTHER_ADJ=Lin_Direct_Population/Other_Demo_Population,LIN_ADJ=Lin_Direct_Population/Total_Lincoln_Demo_Population) %>% select(Year,LIN_ADJ,KEM_ADJ,OTHER_ADJ) ADJUST_TABLE <- DIRECT_POP %>% inner_join(POST_ADJUST_DATA) %>% mutate(KEM_ADJ=Kem_Direct_Population/Kem_Demo_Population,OTHER_ADJ=Other_Direct_Population/Other_Demo_Population,LIN_ADJ=Lin_Direct_Population/Total_Lincoln_Demo_Population) %>% select(Year,LIN_ADJ,KEM_ADJ,OTHER_ADJ)
##Checking Total
KEM_DEMO_DATA <- KEM_DEMO_DATA %>% left_join(ADJUST_TABLE %>% select(Year,KEM_ADJ)) %>% mutate(Num_Female=round(KEM_ADJ*Num_Female),Num_Male=round(KEM_ADJ*Num_Male)) %>% select(-KEM_ADJ) KEM_DEMO_DATA <- KEM_DEMO_DATA %>% left_join(ADJUST_TABLE %>% select(Year,KEM_ADJ)) %>% mutate(Num_Female=round(KEM_ADJ*Num_Female),Num_Male=round(KEM_ADJ*Num_Male)) %>% select(-KEM_ADJ)
OTHER_LIN_DEMO_DATA <- OTHER_LIN_DEMO_DATA%>% left_join(ADJUST_TABLE %>% select(Year,OTHER_ADJ)) %>% mutate(Num_Female=round(OTHER_ADJ*Num_Female),Num_Male=round(OTHER_ADJ*Num_Male)) %>% select(-OTHER_ADJ) OTHER_LIN_DEMO_DATA <- OTHER_LIN_DEMO_DATA%>% left_join(ADJUST_TABLE %>% select(Year,OTHER_ADJ)) %>% mutate(Num_Female=round(OTHER_ADJ*Num_Female),Num_Male=round(OTHER_ADJ*Num_Male)) %>% select(-OTHER_ADJ)
#Find the most recent data year #Find the most recent data year
MAX_YEAR <- max(KEM_DEMO_DATA$Year) MAX_YEAR <- max(KEM_DEMO_DATA$Year)
KEM_DEMO_MAT <- KEM_DEMO_DATA %>% filter(Year==MAX_YEAR) %>% select(Num_Male,Num_Female) %>% as.matrix KEM_DEMO_MAT <- KEM_DEMO_DATA %>% filter(Year==MAX_YEAR) %>% select(Num_Male,Num_Female) %>% as.matrix

View File

@ -36,9 +36,7 @@ KEM_BIRTH_DATA <- KEM_BIRTH_DATA %>% left_join(ADD_BIRTH_GROUP_DATA(KEM_RAW_DEMO
####################Create same data set but for only parts of Lincoln not in the Kemmerer Diamondville area ####################Create same data set but for only parts of Lincoln not in the Kemmerer Diamondville area
OTHER_RAW_DEMOGRAPHIC_DATA <- readRDS(DEMOGRAPHIC_OTHER_LIN_LOC) OTHER_RAW_DEMOGRAPHIC_DATA <- readRDS(DEMOGRAPHIC_OTHER_LIN_LOC)
OTHER_DEMO_DATA <- OTHER_RAW_DEMOGRAPHIC_DATA %>% mutate(POP=Num_Male+Num_Female,Births=ifelse(Age==0,POP,0)) %>% group_by(Year,County,Region) %>% summarize(Births=sum(Births)) OTHER_BIRTH_DATA<- OTHER_RAW_DEMOGRAPHIC_DATA %>% mutate(Population=Num_Male+Num_Female,Births=ifelse(Age==0,Population,0)) %>% group_by(Year,County,Region) %>% summarize(Population=sum(Population),Births=sum(Births)) %>% ungroup %>% arrange(desc(Year))
OTHER_POP_DATA <- readRDS(POPULATION_CITY_LOC)%>% rename(Region=City) %>% filter(!(Region %in% c("Kemmerer","Diamondville")),County=='Lincoln') %>% group_by(Year) %>% mutate(Population=sum(Population,na.rm=TRUE)) %>% ungroup %>% mutate(Region='Lincoln_Other') %>% unique %>% ungroup
OTHER_BIRTH_DATA <- OTHER_POP_DATA %>% left_join(OTHER_DEMO_DATA)
OTHER_BIRTH_DATA <- OTHER_BIRTH_DATA %>% left_join(ADD_BIRTH_GROUP_DATA(OTHER_RAW_DEMOGRAPHIC_DATA)) OTHER_BIRTH_DATA <- OTHER_BIRTH_DATA %>% left_join(ADD_BIRTH_GROUP_DATA(OTHER_RAW_DEMOGRAPHIC_DATA))
####################################################3 ####################################################3
@ -49,7 +47,10 @@ ADJUSTED_KEM_STUB_DATA <- COUNTY_BIRTH_DATA %>% filter(Region=='Lincoln') %>% s
KEM_BIRTH_DATA <- KEM_BIRTH_DATA %>% select(-Births) %>% left_join(ADJUSTED_KEM_STUB_DATA ) KEM_BIRTH_DATA <- KEM_BIRTH_DATA %>% select(-Births) %>% left_join(ADJUSTED_KEM_STUB_DATA )
OTHER_BIRTH_DATA <- OTHER_BIRTH_DATA %>% select(-Births) %>% left_join(ADJUSTED_KEM_STUB_DATA) OTHER_BIRTH_DATA <- OTHER_BIRTH_DATA %>% select(-Births) %>% left_join(ADJUSTED_KEM_STUB_DATA)
REG_DATA <- rbind(COUNTY_BIRTH_DATA,rbind(KEM_BIRTH_DATA,OTHER_BIRTH_DATA) %>% mutate(Deaths=NA,Migration=NA) %>% select(colnames(COUNTY_BIRTH_DATA))) REG_DATA <- rbind(COUNTY_BIRTH_DATA,rbind(KEM_BIRTH_DATA,OTHER_BIRTH_DATA) %>% mutate(Deaths=NA,Migration=NA) %>% select(colnames(COUNTY_BIRTH_DATA)))
REG_DATA <- REG_DATA %>% group_by(Region) %>% arrange(Year) %>% mutate(Lag_Births=lag(Births),Lag_Two_Births=lag(Births,2)) %>% ungroup %>% arrange(County,Region,Year) REG_DATA <- REG_DATA %>% group_by(Region) %>% arrange(Year) %>% mutate(Lag_Births=lag(Births),Lag_Two_Births=lag(Births,2)) %>% ungroup %>% arrange(County,Region,Year)
REG_REDUCED_DATA <- REG_DATA %>% filter(!is.na(Births),!is.na(Lag_Two_Births),!is.na(Min_Birth_Group),!is.na(Lag_Births),!is.na(Region)) %>% select(Year,Region,Min_Birth_Group,Births,Lag_Births,Lag_Two_Births) REG_REDUCED_DATA <- REG_DATA %>% filter(!is.na(Births),!is.na(Lag_Two_Births),!is.na(Min_Birth_Group),!is.na(Lag_Births),!is.na(Region)) %>% select(Year,Region,Min_Birth_Group,Births,Lag_Births,Lag_Two_Births)
@ -140,4 +141,27 @@ ST_REG_DATA <- REG_REDUCED_DATA %>% filter(Region=='Lincoln') %>% filter(Year==m
saveRDS(REG_REDUCED_DATA,paste0(SAVE_DATA_LOC,"/Birth_Regression_Input_Data.Rds")) saveRDS(REG_REDUCED_DATA,paste0(SAVE_DATA_LOC,"/Birth_Regression_Input_Data.Rds"))
write_csv(REG_REDUCED_DATA,paste0(SAVE_DATA_LOC,"/Birth_Regression_Input_Data.csv")) write_csv(REG_REDUCED_DATA,paste0(SAVE_DATA_LOC,"/Birth_Regression_Input_Data.csv"))
##########Update existing demogrpahic data with new births
KEM_DATA_NEW <- REG_DATA %>% filter(Region=='Kemmerer & Diamondville') %>% select(Year,County,Region,Population,Births,Deaths,Migration)
PRED_KEM_BIRTH_2024 <- REG_DATA %>% filter(Region=='Kemmerer & Diamondville',Year>=2023) %>% mutate(Min_Birth_Group=lag(Min_Birth_Group)) %>% filter(Year==2024)
KEM_DATA_NEW[KEM_DATA_NEW$Year==2024,"Births"] <- round(exp(predict(MOD_BIRTHS,PRED_KEM_BIRTH_2024)))
OTHER_DATA_NEW <- REG_DATA %>% filter(Region=='Lincoln_Other') %>% select(Year,County,Region,Population,Births,Deaths,Migration)
PRED_OTHER_BIRTH_2024 <- REG_DATA %>% filter(Region=='Lincoln_Other',Year>=2023) %>% mutate(Min_Birth_Group=lag(Min_Birth_Group)) %>% filter(Year==2024)
OTHER_DATA_NEW[OTHER_DATA_NEW$Year==2024,"Births"] <- round(exp(predict(MOD_BIRTHS,PRED_OTHER_BIRTH_2024)))
LIN_DATA_NEW <- REG_DATA %>% filter(Region=='Lincoln') %>% select(Year,County,Region,Population,Births,Deaths,Migration)
#
if(!exists("POPULATION_SAVE_RDS")){POPULATION_SAVE_RDS <- "./Data/Cleaned_Data/Population_Data/RDS/"}
dir.create(POPULATION_SAVE_RDS, recursive = TRUE, showWarnings = FALSE)
if(!exists("POPULATION_SAVE_CSV")){POPULATION_SAVE_CSV <- "./Data/Cleaned_Data/Population_Data/CSV/"}
dir.create(POPULATION_SAVE_CSV, recursive = TRUE, showWarnings = FALSE)
saveRDS(LIN_DATA_NEW,paste0(POPULATION_SAVE_RDS,"Full_Lincoln_County_Population_Data.Rds"))
write_csv(LIN_DATA_NEW,paste0(POPULATION_SAVE_CSV,"Full_Lincoln_County_Population_Data.csv"))
saveRDS(KEM_DATA_NEW,paste0(POPULATION_SAVE_RDS,"Kemmerer_Diamondville_Population_Data.Rds"))
write_csv(KEM_DATA_NEW,paste0(POPULATION_SAVE_CSV,"Kemmerer_Diamondville_Population_Data.csv"))
saveRDS(OTHER_DATA_NEW,paste0(POPULATION_SAVE_RDS,"Other_Lincoln_Population_Data.Rds"))
write_csv(OTHER_DATA_NEW,paste0(POPULATION_SAVE_CSV,"Other_Lincoln_Population_Data.csv"))

View File

@ -0,0 +1,50 @@
###May want to check rounding of demographic data as migration does not line up perfectly (off by about 9 on average)
library(tidyverse)
#setwd("../")
KEM_POP_LOC <- "Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds"
OTHER_POP_LOC <- "Data/Cleaned_Data/Population_Data/RDS/Other_Lincoln_Population_Data.Rds"
KEM_POP_LOC_CSV <- "Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.csv"
OTHER_POP_LOC_CSV <- "Data/Cleaned_Data/Population_Data/RDS/Other_Lincoln_Population_Data.csv"
MORT <- readRDS("Data/Cleaned_Data/Mortality_Rate_Data/RDS/Lincoln_County_Mortality_Rates.Rds")
KEM_DEM <-readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Kemmerer_Diamondville_Demographics.Rds")
OTHER_DEM <- readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Other_Lincoln_Demographics.Rds")
LIN_DEM <- readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Full_Lincoln_County_Demographics.Rds")
MORTALITY <- MORT
#Estimate split of deaths in county (share of Kemmerer vs other Lincoln)
PRED_DEATHS <- function(DATA,MORTALITY){
MORTALITY <- MORTALITY%>% pivot_wider(values_from=Death_Rate,names_from=Sex) %>% group_by(County,Min_Age,Max_Age) %>% summarize(Rate_Male=mean(Male,na.rm=TRUE),Rate_Female=mean(Female,na.rm=TRUE)) %>% ungroup
RES <- DATA[DATA$Age >= MORTALITY$Min_Age[1] & MORTALITY$Max_Age[1]>= DATA$Age,] %>% group_by(County,Region,Year,Age) %>% summarize(Num_Female=sum(Num_Female),Num_Male=sum(Num_Male)) %>% ungroup %>% mutate(Min_Age=MORTALITY$Min_Age[1],Max_Age= MORTALITY$Max_Age[1])
for(i in 2:nrow(MORTALITY)){
RES <- rbind(RES,DATA[DATA$Age >= MORTALITY$Min_Age[i] & MORTALITY$Max_Age[i]>= DATA$Age,] %>% group_by(County,Region,Year,Age) %>% summarize(Num_Female=sum(Num_Female),Num_Male=sum(Num_Male)) %>% ungroup %>% mutate(Min_Age=MORTALITY$Min_Age[i],Max_Age= MORTALITY$Max_Age[i]))
}
RES <- RES%>% arrange(Year) %>% left_join(MORTALITY)%>% group_by(County,Region,Year) %>% summarize(Predicted_Deaths=sum(Rate_Male*Num_Male)+sum(Rate_Female*Num_Female) ) %>% ungroup %>% select(County,Region,Year,Predicted_Deaths)
return(RES)
}
##Predict all deaths an merge into a table to find the ratios of predicted deaths
LIN_PRED <- PRED_DEATHS(LIN_DEM,MORT) %>% select(Year,Lin_Pred_Deaths=Predicted_Deaths)
KEM_PRED <- PRED_DEATHS(KEM_DEM,MORT) %>% select(Year,Kem_Pred_Deaths=Predicted_Deaths)
OTHER_PRED <- PRED_DEATHS(OTHER_DEM,MORT) %>% select(Year,Other_Pred_Deaths=Predicted_Deaths)
PRED_DATA<- LIN_PRED %>% left_join(KEM_PRED) %>% left_join(OTHER_PRED)
Death_Adj <- PRED_DATA %>% filter(!is.na(Kem_Pred_Deaths)) %>% mutate(Kem_Death_Ratio=Kem_Pred_Deaths/(Kem_Pred_Deaths+Other_Pred_Deaths),Other_Death_Ratio=1-Kem_Death_Ratio) %>% select(Year,Kem_Death_Ratio,Other_Death_Ratio)
#Add deaths to Lincoln were missing (lacking data)
LIN_POP <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Full_Lincoln_County_Population_Data.Rds")
LIN_POP <- LIN_POP %>% left_join(LIN_PRED) %>% mutate(Deaths=ifelse(is.na(Deaths) & !is.na(Lin_Pred_Deaths),Lin_Pred_Deaths,Deaths)) %>% select(-Lin_Pred_Deaths) %>% mutate(BD=Population+Births-Deaths) %>% mutate(Missing=Population-lag(BD),Missing=lead(Missing),Migration=ifelse(is.na(Migration),Missing,Migration)) %>% mutate(NEXT=Population+Births-Deaths+Migration) %>% arrange(desc(Year)) %>% select(colnames(LIN_POP))
###Estimate the number of deaths in Kemmerer based on the total deaths, and the predicted share of deaths
#Find migration based on the remaining missing population (after deaths,and births)
KEM_POP <- readRDS(KEM_POP_LOC)
KEM_DEATHS <- LIN_POP %>% select(Year,Deaths) %>% left_join(Death_Adj) %>% left_join(KEM_PRED) %>% mutate(Deaths=round(Deaths*Kem_Death_Ratio)) %>% select(Year,Deaths)
KEM_POP <- KEM_POP%>% select(-Deaths) %>% left_join(KEM_DEATHS) %>% mutate(BD=Population+Births-Deaths) %>% mutate(Missing=Population-lag(BD),Missing=lead(Missing),Migration=ifelse(is.na(Migration),Missing,Migration))%>% mutate(NEXT=Population+Births-Deaths+Migration)%>% arrange(desc(Year)) %>% select(colnames(LIN_POP))
###Estimate the number of deaths in the other parts of Lincoln (not Kemmerer) based on the total deaths, and the predicted share of deaths
#Find migration based on the remaining missing population (after deaths,and births)
OTHER_POP <- readRDS(OTHER_POP_LOC)
OTHER_DEATHS <- LIN_POP %>% select(Year,Deaths) %>% left_join(Death_Adj) %>% left_join(OTHER_PRED) %>% mutate(Deaths=round(Deaths*Other_Death_Ratio)) %>% select(Year,Deaths)
OTHER_POP <- OTHER_POP %>% select(-Deaths) %>% left_join(OTHER_DEATHS) %>% mutate(BD=Population+Births-Deaths) %>% mutate(Missing=Population-lag(BD),Missing=lead(Missing),Migration=ifelse(is.na(Migration),Missing,Migration))%>% arrange(desc(Year)) %>% mutate(NEXT=Population+Births-Deaths+Migration)%>% select(colnames(LIN_POP)) %>% arrange(desc(Year))
#Save outputs
saveRDS(KEM_POP,KEM_POP_LOC)
saveRDS(OTHER_POP,OTHER_POP_LOC)
write_csv(KEM_POP,KEM_POP_LOC_CSV)
write_csv(OTHER_POP,OTHER_POP_LOC_CSV)

View File

@ -1,30 +0,0 @@
library(forecast)
####Work on overall migration trends
#Could use code cleanup after trying things, but have but I have a working ARIMA to model Lincoln county migration
TS_DATA <- POP_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population))
ST_YEAR <- min(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
END_YEAR <- max(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
#GRAPH_DATA <- TS_DATA %>% filter(!is.na(Migration))
#GRAPH_DATA_LN <- TS_DATA %>% filter(!is.na(Migration),County=="Lincoln")
#ggplot(GRAPH_DATA,aes(x=Year,y=Migration/Prev_Pop,group=County,color=County))+geom_point()+geom_line(data=GRAPH_DATA_LN)
TS_WIDE <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>ST_YEAR+1,Year<=END_YEAR) %>%ts(start=c(ST_YEAR+1),frequency=1)
LN <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% dplyr::select(Lincoln,Year) %>% filter(Year>=ST_YEAR,Year<=END_YEAR) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR),frequency=1)
#Create an ARIMA of Migration so the number of people migrating can be simulated
#Time series tests
#library(tseries)
#adf.test(LN,k=1) #Stationary with one lag, otherwise not stationary
#kpss.test(LN) #Stationary,default of program and has some model fit improvements
MOD <- auto.arima(LN,stationary=TRUE)
#summary(MOD)
#Validity tests
#autoplot(MOD)
#acf(resid(MOD))
#pacf(resid(MOD))
# adf.test(resid(MOD))
#checkresiduals(MOD)
#Save the resulting model outputs, will need to be changed if looking at other counties
saveRDS(MOD,"Data/Regression_Results/LN_ARIMA_MODEL.Rds")
MIGRATION_ARIMA_SIMS <- (do.call(cbind,mclapply(1:NUM_SIMULATIONS,function(x){as.numeric(round(simulate(MOD,future=TRUE, nsim=NUM_YEARS_PROJECTED)))},mc.cores =detectCores()-1)))#testing a multiple run simulation could use parallel process
saveRDS(MIGRATION_ARIMA_SIMS,"Data/Simulated_Data_Sets/Migration_ARIMA.Rds")
write.csv(MIGRATION_ARIMA_SIMS,row.names=FALSE,"Data/Simulated_Data_Sets/Migration_ARIMA.csv")

View File

@ -0,0 +1,72 @@
library(forecast)
library(tidyverse)
#setwd("../")
####Work on overall migration trends
#Could use code cleanup after trying things, but have but I have a working ARIMA to model Lincoln county migration
POP_DATA <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Full_Lincoln_County_Population_Data.Rds") %>% mutate(Migration=Migration/Population)
POP_DATA_TEST <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Full_Lincoln_County_Population_Data.Rds") %>% mutate(Migration=Migration/Population)
POP_KEM_DATA <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds")
POP_OTHER_DATA <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Other_Lincoln_Population_Data.Rds")
hist(POP_OTHER_DATA$Migration/POP_OTHER_DATA$Population)
hist(POP_KEM_DATA$Migration/POP_KEM_DATA$Population)
TS_DATA <- POP_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
TS_DATA_TEST <- POP_DATA_TEST %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
TS_KEM_DATA <- POP_KEM_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
TS_OTHER_DATA <- POP_OTHER_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
ST_YEAR <- min(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
END_YEAR <- max(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
ST_YEAR_KEM <- min(pull(TS_KEM_DATA %>% filter(!is.na(Migration)),Year))
END_YEAR_KEM <- max(pull(TS_KEM_DATA %>% filter(!is.na(Migration)),Year))
ST_YEAR_OTHER <- min(pull(TS_OTHER_DATA %>% filter(!is.na(Migration)),Year))
END_YEAR_OTHER <- max(pull(TS_OTHER_DATA %>% filter(!is.na(Migration)),Year))
#GRAPH_DATA <- TS_DATA %>% filter(!is.na(Migration))
#GRAPH_DATA_LN <- TS_DATA %>% filter(!is.na(Migration),County=="Lincoln")
#ggplot(GRAPH_DATA,aes(x=Year,y=Migration/Prev_Pop,group=County,color=County))+geom_point()+geom_line(data=GRAPH_DATA_LN)
TS_WIDE <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>ST_YEAR+1,Year<=END_YEAR) %>%ts(start=c(ST_YEAR+1),frequency=1)
TS_KEM_WIDE <- TS_KEM_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>ST_YEAR+1,Year<=END_YEAR) %>%ts(start=c(ST_YEAR+1),frequency=1)
TS_OTHER_WIDE <- TS_OTHER_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>ST_YEAR+1,Year<=END_YEAR) %>%ts(start=c(ST_YEAR+1),frequency=1)
LN <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% dplyr::select(Lincoln,Year) %>% filter(Year>=ST_YEAR,Year<=END_YEAR) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR),frequency=1)
KEM <- TS_KEM_DATA %>% dplyr::select(Year,Region,Migration) %>% pivot_wider(values_from=Migration,names_from=Region) %>% arrange(Year) %>% dplyr::select('Kemmerer & Diamondville',Year) %>% filter(Year>=ST_YEAR_KEM,Year<=END_YEAR_KEM) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR_KEM),frequency=1)
TS_OTHER_DATA
OTHER <- TS_OTHER_DATA %>% dplyr::select(Year,Region,Migration) %>% pivot_wider(values_from=Migration,names_from=Region) %>% arrange(Year) %>% dplyr::select('Lincoln Other'=Lincoln_Other,Year) %>% filter(Year>=ST_YEAR_OTHER,Year<=END_YEAR_OTHER) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR_OTHER),frequency=1)
#Create an ARIMA of Migration so the number of people migrating can be simulated
#Time series tests
#library(tseries)
#adf.test(LN,k=1) #Stationary with one lag, otherwise not stationary
#kpss.test(LN) #Stationary,default of program and has some model fit improvements
MOD <- auto.arima(LN,stationary=TRUE)
MOD_KEM <- auto.arima(KEM)
MOD_OTHER <- auto.arima(OTHER)
plot(forecast(MOD ))
plot(forecast(MOD_KEM ))
plot(forecast(MOD_OTHER ))
plot(forecast(MOD,abs(KEM))
#summary(MOD)
#Validity tests
#autoplot(MOD)
#acf(resid(MOD))
#pacf(resid(MOD))
# adf.test(resid(MOD))
#checkresiduals(MOD)
#Save the resulting model outputs, will need to be changed if looking at other counties
#saveRDS(MOD,"Data/Regression_Results/LN_ARIMA_MODEL.Rds")
MIGRATION_ARIMA_SIMS <- (do.call(cbind,mclapply(1:NUM_SIMULATIONS,function(x){as.numeric(round(simulate(MOD,future=TRUE, nsim=NUM_YEARS_PROJECTED)))},mc.cores =detectCores()-1)))#testing a multiple run simulation could use parallel process
saveRDS(MIGRATION_ARIMA_SIMS,"Data/Simulated_Data_Sets/Migration_ARIMA.Rds")
write.csv(MIGRATION_ARIMA_SIMS,row.names=FALSE,"Data/Simulated_Data_Sets/Migration_ARIMA.csv")