Imparted values to migration, and deaths. ARIMA

This commit is contained in:
Alex 2025-11-14 18:09:18 -07:00
parent 809e75e58d
commit 99063bec79
7 changed files with 157 additions and 41 deletions

View File

@ -4,9 +4,10 @@ Rscript "./Scripts/1B_Process_Existing_NIH_Mortality_Data.r"
Rscript "./Scripts/1C_Download_and_Process_Demographic_Data.r" Rscript "./Scripts/1C_Download_and_Process_Demographic_Data.r"
Rscript "./Scripts/1D_Use_ACS_Census_Data_to_Estimate_Kemmerer_Demographics.r" Rscript "./Scripts/1D_Use_ACS_Census_Data_to_Estimate_Kemmerer_Demographics.r"
#Create data sets used in later simulations, produce some results for the report when related to this process. #Create data sets used in later simulations, produce some results for the report when related to this process.
Rscript "./Scripts/2A_Birth_Rate_Regression.r" Rscript "./Scripts/2A_Birth_Rate_Regression_and_Impart_Kemmerer_Births.r"
Rscript "./Scripts/2B_Migration_by_Age_Regression.r" Rscript "./Scripts/2B_Impart_Deaths_and_Migration_to_Subregions.r"
Rscript "./Scripts/2C_Overall_Migration_ARIMA.r" Rscript "./Scripts/2C_Migration_by_Age_Regression.r"
Rscript "./Scripts/2D_Overall_Migration_ARIMA.r"
#Produce final results for either the simulation, or information for the report, but not anything used in later stages of the simulation #Produce final results for either the simulation, or information for the report, but not anything used in later stages of the simulation
Rscript "./Scripts/3A_Population_Pyramid.r" Rscript "./Scripts/3A_Population_Pyramid.r"

View File

@ -22,7 +22,6 @@ PROJ_TRACTS <- PROJ_TRACTS %>% select(-ZCTA5) %>% filter(GEOID!=56023978200) %>%
PROJ_TRACTS <- PROJ_TRACTS %>% select(GEOID) %>% mutate('IN_KEM'=1) %>% mutate(GEOID=as.character(GEOID)) PROJ_TRACTS <- PROJ_TRACTS %>% select(GEOID) %>% mutate('IN_KEM'=1) %>% mutate(GEOID=as.character(GEOID))
###Load data manually created which links vairable names to sex-age census data ###Load data manually created which links vairable names to sex-age census data
CODES <- read_csv("./Data/Raw_Data/ACS_Demographics/API_CENSUS_CODES.csv",skip=1) %>% mutate(Med_Age=(Min_Age+Max_Age)/2) %>% rename(variable=Code) CODES <- read_csv("./Data/Raw_Data/ACS_Demographics/API_CENSUS_CODES.csv",skip=1) %>% mutate(Med_Age=(Min_Age+Max_Age)/2) %>% rename(variable=Code)
#Testing age Comparison between the two
###Extract census data for all tracts in Lincoln county, clean up the data, and indicate if the tract is in Kemmerer/Diamondvile or not. ###Extract census data for all tracts in Lincoln county, clean up the data, and indicate if the tract is in Kemmerer/Diamondvile or not.
DEMO_DATA_ALL <- do.call(rbind,lapply(2009:ACS_END_YEAR,MAKE_KEM_DEMO_DATA_YEAR)) DEMO_DATA_ALL <- do.call(rbind,lapply(2009:ACS_END_YEAR,MAKE_KEM_DEMO_DATA_YEAR))
ORIG_DEMO_DATA_ALL <- DEMO_DATA_ALL ORIG_DEMO_DATA_ALL <- DEMO_DATA_ALL
@ -36,13 +35,13 @@ KEM_DEMO_DATA <- DEMO_DATA_ALL %>% filter(IN_KEM==1) %>% rename(Region=IN_KEM)
#Ajust the populations to match the total population stastics from the other data sources, since the tracts may spill into other areas #Ajust the populations to match the total population stastics from the other data sources, since the tracts may spill into other areas
POST_ADJUST_DATA <- KEM_DEMO_DATA %>% group_by(Year) %>% summarize(Kem_Demo_Population=sum(Num_Female)+sum(Num_Male)) %>% left_join(OTHER_LIN_DEMO_DATA %>% group_by(Year) %>% summarize(Other_Demo_Population=sum(Num_Female)+sum(Num_Male))) %>% mutate(Total_Lincoln_Demo_Population=Kem_Demo_Population+Other_Demo_Population) POST_ADJUST_DATA <- KEM_DEMO_DATA %>% group_by(Year) %>% summarize(Kem_Demo_Population=sum(Num_Female)+sum(Num_Male)) %>% left_join(OTHER_LIN_DEMO_DATA %>% group_by(Year) %>% summarize(Other_Demo_Population=sum(Num_Female)+sum(Num_Male))) %>% mutate(Total_Lincoln_Demo_Population=Kem_Demo_Population+Other_Demo_Population)
DIRECT_POP <- readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_County_Populations.Rds") %>% filter(County=='Lincoln') %>% select(Year,Lin_Direct_Population=Population) %>% full_join(readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_City_Populations.Rds") %>% filter(City %in% c('Kemmerer','Diamondville')) %>% group_by(Year) %>% summarize(Kem_Direct_Population=sum(Population,na.rm=TRUE))) %>% mutate(Other_Direct_Population=Lin_Direct_Population-Kem_Direct_Population) DIRECT_POP <- readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_County_Populations.Rds") %>% filter(County=='Lincoln') %>% select(Year,Lin_Direct_Population=Population) %>% full_join(readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_City_Populations.Rds") %>% filter(City %in% c('Kemmerer','Diamondville')) %>% group_by(Year) %>% summarize(Kem_Direct_Population=sum(Population,na.rm=TRUE))) %>% mutate(Other_Direct_Population=Lin_Direct_Population-Kem_Direct_Population)
ADJUST_TABLE <- DIRECT_POP %>% inner_join(POST_ADJUST_DATA) %>% mutate(KEM_ADJ=Kem_Direct_Population/Kem_Demo_Population,OTHER_ADJ=Lin_Direct_Population/Other_Demo_Population,LIN_ADJ=Lin_Direct_Population/Total_Lincoln_Demo_Population) %>% select(Year,LIN_ADJ,KEM_ADJ,OTHER_ADJ) ADJUST_TABLE <- DIRECT_POP %>% inner_join(POST_ADJUST_DATA) %>% mutate(KEM_ADJ=Kem_Direct_Population/Kem_Demo_Population,OTHER_ADJ=Other_Direct_Population/Other_Demo_Population,LIN_ADJ=Lin_Direct_Population/Total_Lincoln_Demo_Population) %>% select(Year,LIN_ADJ,KEM_ADJ,OTHER_ADJ)
##Checking Total
KEM_DEMO_DATA <- KEM_DEMO_DATA %>% left_join(ADJUST_TABLE %>% select(Year,KEM_ADJ)) %>% mutate(Num_Female=round(KEM_ADJ*Num_Female),Num_Male=round(KEM_ADJ*Num_Male)) %>% select(-KEM_ADJ) KEM_DEMO_DATA <- KEM_DEMO_DATA %>% left_join(ADJUST_TABLE %>% select(Year,KEM_ADJ)) %>% mutate(Num_Female=round(KEM_ADJ*Num_Female),Num_Male=round(KEM_ADJ*Num_Male)) %>% select(-KEM_ADJ)
OTHER_LIN_DEMO_DATA <- OTHER_LIN_DEMO_DATA%>% left_join(ADJUST_TABLE %>% select(Year,OTHER_ADJ)) %>% mutate(Num_Female=round(OTHER_ADJ*Num_Female),Num_Male=round(OTHER_ADJ*Num_Male)) %>% select(-OTHER_ADJ) OTHER_LIN_DEMO_DATA <- OTHER_LIN_DEMO_DATA%>% left_join(ADJUST_TABLE %>% select(Year,OTHER_ADJ)) %>% mutate(Num_Female=round(OTHER_ADJ*Num_Female),Num_Male=round(OTHER_ADJ*Num_Male)) %>% select(-OTHER_ADJ)
#Find the most recent data year #Find the most recent data year
MAX_YEAR <- max(KEM_DEMO_DATA$Year) MAX_YEAR <- max(KEM_DEMO_DATA$Year)
KEM_DEMO_MAT <- KEM_DEMO_DATA %>% filter(Year==MAX_YEAR) %>% select(Num_Male,Num_Female) %>% as.matrix KEM_DEMO_MAT <- KEM_DEMO_DATA %>% filter(Year==MAX_YEAR) %>% select(Num_Male,Num_Female) %>% as.matrix

View File

@ -36,10 +36,8 @@ KEM_BIRTH_DATA <- KEM_BIRTH_DATA %>% left_join(ADD_BIRTH_GROUP_DATA(KEM_RAW_DEMO
####################Create same data set but for only parts of Lincoln not in the Kemmerer Diamondville area ####################Create same data set but for only parts of Lincoln not in the Kemmerer Diamondville area
OTHER_RAW_DEMOGRAPHIC_DATA <- readRDS(DEMOGRAPHIC_OTHER_LIN_LOC) OTHER_RAW_DEMOGRAPHIC_DATA <- readRDS(DEMOGRAPHIC_OTHER_LIN_LOC)
OTHER_DEMO_DATA <- OTHER_RAW_DEMOGRAPHIC_DATA %>% mutate(POP=Num_Male+Num_Female,Births=ifelse(Age==0,POP,0)) %>% group_by(Year,County,Region) %>% summarize(Births=sum(Births)) OTHER_BIRTH_DATA<- OTHER_RAW_DEMOGRAPHIC_DATA %>% mutate(Population=Num_Male+Num_Female,Births=ifelse(Age==0,Population,0)) %>% group_by(Year,County,Region) %>% summarize(Population=sum(Population),Births=sum(Births)) %>% ungroup %>% arrange(desc(Year))
OTHER_POP_DATA <- readRDS(POPULATION_CITY_LOC)%>% rename(Region=City) %>% filter(!(Region %in% c("Kemmerer","Diamondville")),County=='Lincoln') %>% group_by(Year) %>% mutate(Population=sum(Population,na.rm=TRUE)) %>% ungroup %>% mutate(Region='Lincoln_Other') %>% unique %>% ungroup OTHER_BIRTH_DATA <- OTHER_BIRTH_DATA %>% left_join(ADD_BIRTH_GROUP_DATA(OTHER_RAW_DEMOGRAPHIC_DATA))
OTHER_BIRTH_DATA <- OTHER_POP_DATA %>% left_join(OTHER_DEMO_DATA)
OTHER_BIRTH_DATA <- OTHER_BIRTH_DATA %>% left_join(ADD_BIRTH_GROUP_DATA(OTHER_RAW_DEMOGRAPHIC_DATA))
####################################################3 ####################################################3
###Because the total of the Kemmerer and Other Lincoln should sum to the births in the entire county, we assume that the ratio of children under the age of 1, in each area is the same percentage as the total births in the county. The total births in the county is then taken from the Wyoming eadiv estimates at the county level. This makes the two sum to the proper amount ###Because the total of the Kemmerer and Other Lincoln should sum to the births in the entire county, we assume that the ratio of children under the age of 1, in each area is the same percentage as the total births in the county. The total births in the county is then taken from the Wyoming eadiv estimates at the county level. This makes the two sum to the proper amount
@ -49,7 +47,10 @@ ADJUSTED_KEM_STUB_DATA <- COUNTY_BIRTH_DATA %>% filter(Region=='Lincoln') %>% s
KEM_BIRTH_DATA <- KEM_BIRTH_DATA %>% select(-Births) %>% left_join(ADJUSTED_KEM_STUB_DATA ) KEM_BIRTH_DATA <- KEM_BIRTH_DATA %>% select(-Births) %>% left_join(ADJUSTED_KEM_STUB_DATA )
OTHER_BIRTH_DATA <- OTHER_BIRTH_DATA %>% select(-Births) %>% left_join(ADJUSTED_KEM_STUB_DATA) OTHER_BIRTH_DATA <- OTHER_BIRTH_DATA %>% select(-Births) %>% left_join(ADJUSTED_KEM_STUB_DATA)
REG_DATA <- rbind(COUNTY_BIRTH_DATA,rbind(KEM_BIRTH_DATA,OTHER_BIRTH_DATA) %>% mutate(Deaths=NA,Migration=NA) %>% select(colnames(COUNTY_BIRTH_DATA))) REG_DATA <- rbind(COUNTY_BIRTH_DATA,rbind(KEM_BIRTH_DATA,OTHER_BIRTH_DATA) %>% mutate(Deaths=NA,Migration=NA) %>% select(colnames(COUNTY_BIRTH_DATA)))
REG_DATA <- REG_DATA %>% group_by(Region) %>% arrange(Year) %>% mutate(Lag_Births=lag(Births),Lag_Two_Births=lag(Births,2)) %>% ungroup %>% arrange(County,Region,Year) REG_DATA <- REG_DATA %>% group_by(Region) %>% arrange(Year) %>% mutate(Lag_Births=lag(Births),Lag_Two_Births=lag(Births,2)) %>% ungroup %>% arrange(County,Region,Year)
REG_REDUCED_DATA <- REG_DATA %>% filter(!is.na(Births),!is.na(Lag_Two_Births),!is.na(Min_Birth_Group),!is.na(Lag_Births),!is.na(Region)) %>% select(Year,Region,Min_Birth_Group,Births,Lag_Births,Lag_Two_Births) REG_REDUCED_DATA <- REG_DATA %>% filter(!is.na(Births),!is.na(Lag_Two_Births),!is.na(Min_Birth_Group),!is.na(Lag_Births),!is.na(Region)) %>% select(Year,Region,Min_Birth_Group,Births,Lag_Births,Lag_Two_Births)
@ -140,4 +141,27 @@ ST_REG_DATA <- REG_REDUCED_DATA %>% filter(Region=='Lincoln') %>% filter(Year==m
saveRDS(REG_REDUCED_DATA,paste0(SAVE_DATA_LOC,"/Birth_Regression_Input_Data.Rds")) saveRDS(REG_REDUCED_DATA,paste0(SAVE_DATA_LOC,"/Birth_Regression_Input_Data.Rds"))
write_csv(REG_REDUCED_DATA,paste0(SAVE_DATA_LOC,"/Birth_Regression_Input_Data.csv")) write_csv(REG_REDUCED_DATA,paste0(SAVE_DATA_LOC,"/Birth_Regression_Input_Data.csv"))
##########Update existing demogrpahic data with new births
KEM_DATA_NEW <- REG_DATA %>% filter(Region=='Kemmerer & Diamondville') %>% select(Year,County,Region,Population,Births,Deaths,Migration)
PRED_KEM_BIRTH_2024 <- REG_DATA %>% filter(Region=='Kemmerer & Diamondville',Year>=2023) %>% mutate(Min_Birth_Group=lag(Min_Birth_Group)) %>% filter(Year==2024)
KEM_DATA_NEW[KEM_DATA_NEW$Year==2024,"Births"] <- round(exp(predict(MOD_BIRTHS,PRED_KEM_BIRTH_2024)))
OTHER_DATA_NEW <- REG_DATA %>% filter(Region=='Lincoln_Other') %>% select(Year,County,Region,Population,Births,Deaths,Migration)
PRED_OTHER_BIRTH_2024 <- REG_DATA %>% filter(Region=='Lincoln_Other',Year>=2023) %>% mutate(Min_Birth_Group=lag(Min_Birth_Group)) %>% filter(Year==2024)
OTHER_DATA_NEW[OTHER_DATA_NEW$Year==2024,"Births"] <- round(exp(predict(MOD_BIRTHS,PRED_OTHER_BIRTH_2024)))
LIN_DATA_NEW <- REG_DATA %>% filter(Region=='Lincoln') %>% select(Year,County,Region,Population,Births,Deaths,Migration)
#
if(!exists("POPULATION_SAVE_RDS")){POPULATION_SAVE_RDS <- "./Data/Cleaned_Data/Population_Data/RDS/"}
dir.create(POPULATION_SAVE_RDS, recursive = TRUE, showWarnings = FALSE)
if(!exists("POPULATION_SAVE_CSV")){POPULATION_SAVE_CSV <- "./Data/Cleaned_Data/Population_Data/CSV/"}
dir.create(POPULATION_SAVE_CSV, recursive = TRUE, showWarnings = FALSE)
saveRDS(LIN_DATA_NEW,paste0(POPULATION_SAVE_RDS,"Full_Lincoln_County_Population_Data.Rds"))
write_csv(LIN_DATA_NEW,paste0(POPULATION_SAVE_CSV,"Full_Lincoln_County_Population_Data.csv"))
saveRDS(KEM_DATA_NEW,paste0(POPULATION_SAVE_RDS,"Kemmerer_Diamondville_Population_Data.Rds"))
write_csv(KEM_DATA_NEW,paste0(POPULATION_SAVE_CSV,"Kemmerer_Diamondville_Population_Data.csv"))
saveRDS(OTHER_DATA_NEW,paste0(POPULATION_SAVE_RDS,"Other_Lincoln_Population_Data.Rds"))
write_csv(OTHER_DATA_NEW,paste0(POPULATION_SAVE_CSV,"Other_Lincoln_Population_Data.csv"))

View File

@ -0,0 +1,50 @@
###May want to check rounding of demographic data as migration does not line up perfectly (off by about 9 on average)
library(tidyverse)
#setwd("../")
KEM_POP_LOC <- "Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds"
OTHER_POP_LOC <- "Data/Cleaned_Data/Population_Data/RDS/Other_Lincoln_Population_Data.Rds"
KEM_POP_LOC_CSV <- "Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.csv"
OTHER_POP_LOC_CSV <- "Data/Cleaned_Data/Population_Data/RDS/Other_Lincoln_Population_Data.csv"
MORT <- readRDS("Data/Cleaned_Data/Mortality_Rate_Data/RDS/Lincoln_County_Mortality_Rates.Rds")
KEM_DEM <-readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Kemmerer_Diamondville_Demographics.Rds")
OTHER_DEM <- readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Other_Lincoln_Demographics.Rds")
LIN_DEM <- readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Full_Lincoln_County_Demographics.Rds")
MORTALITY <- MORT
#Estimate split of deaths in county (share of Kemmerer vs other Lincoln)
PRED_DEATHS <- function(DATA,MORTALITY){
MORTALITY <- MORTALITY%>% pivot_wider(values_from=Death_Rate,names_from=Sex) %>% group_by(County,Min_Age,Max_Age) %>% summarize(Rate_Male=mean(Male,na.rm=TRUE),Rate_Female=mean(Female,na.rm=TRUE)) %>% ungroup
RES <- DATA[DATA$Age >= MORTALITY$Min_Age[1] & MORTALITY$Max_Age[1]>= DATA$Age,] %>% group_by(County,Region,Year,Age) %>% summarize(Num_Female=sum(Num_Female),Num_Male=sum(Num_Male)) %>% ungroup %>% mutate(Min_Age=MORTALITY$Min_Age[1],Max_Age= MORTALITY$Max_Age[1])
for(i in 2:nrow(MORTALITY)){
RES <- rbind(RES,DATA[DATA$Age >= MORTALITY$Min_Age[i] & MORTALITY$Max_Age[i]>= DATA$Age,] %>% group_by(County,Region,Year,Age) %>% summarize(Num_Female=sum(Num_Female),Num_Male=sum(Num_Male)) %>% ungroup %>% mutate(Min_Age=MORTALITY$Min_Age[i],Max_Age= MORTALITY$Max_Age[i]))
}
RES <- RES%>% arrange(Year) %>% left_join(MORTALITY)%>% group_by(County,Region,Year) %>% summarize(Predicted_Deaths=sum(Rate_Male*Num_Male)+sum(Rate_Female*Num_Female) ) %>% ungroup %>% select(County,Region,Year,Predicted_Deaths)
return(RES)
}
##Predict all deaths an merge into a table to find the ratios of predicted deaths
LIN_PRED <- PRED_DEATHS(LIN_DEM,MORT) %>% select(Year,Lin_Pred_Deaths=Predicted_Deaths)
KEM_PRED <- PRED_DEATHS(KEM_DEM,MORT) %>% select(Year,Kem_Pred_Deaths=Predicted_Deaths)
OTHER_PRED <- PRED_DEATHS(OTHER_DEM,MORT) %>% select(Year,Other_Pred_Deaths=Predicted_Deaths)
PRED_DATA<- LIN_PRED %>% left_join(KEM_PRED) %>% left_join(OTHER_PRED)
Death_Adj <- PRED_DATA %>% filter(!is.na(Kem_Pred_Deaths)) %>% mutate(Kem_Death_Ratio=Kem_Pred_Deaths/(Kem_Pred_Deaths+Other_Pred_Deaths),Other_Death_Ratio=1-Kem_Death_Ratio) %>% select(Year,Kem_Death_Ratio,Other_Death_Ratio)
#Add deaths to Lincoln were missing (lacking data)
LIN_POP <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Full_Lincoln_County_Population_Data.Rds")
LIN_POP <- LIN_POP %>% left_join(LIN_PRED) %>% mutate(Deaths=ifelse(is.na(Deaths) & !is.na(Lin_Pred_Deaths),Lin_Pred_Deaths,Deaths)) %>% select(-Lin_Pred_Deaths) %>% mutate(BD=Population+Births-Deaths) %>% mutate(Missing=Population-lag(BD),Missing=lead(Missing),Migration=ifelse(is.na(Migration),Missing,Migration)) %>% mutate(NEXT=Population+Births-Deaths+Migration) %>% arrange(desc(Year)) %>% select(colnames(LIN_POP))
###Estimate the number of deaths in Kemmerer based on the total deaths, and the predicted share of deaths
#Find migration based on the remaining missing population (after deaths,and births)
KEM_POP <- readRDS(KEM_POP_LOC)
KEM_DEATHS <- LIN_POP %>% select(Year,Deaths) %>% left_join(Death_Adj) %>% left_join(KEM_PRED) %>% mutate(Deaths=round(Deaths*Kem_Death_Ratio)) %>% select(Year,Deaths)
KEM_POP <- KEM_POP%>% select(-Deaths) %>% left_join(KEM_DEATHS) %>% mutate(BD=Population+Births-Deaths) %>% mutate(Missing=Population-lag(BD),Missing=lead(Missing),Migration=ifelse(is.na(Migration),Missing,Migration))%>% mutate(NEXT=Population+Births-Deaths+Migration)%>% arrange(desc(Year)) %>% select(colnames(LIN_POP))
###Estimate the number of deaths in the other parts of Lincoln (not Kemmerer) based on the total deaths, and the predicted share of deaths
#Find migration based on the remaining missing population (after deaths,and births)
OTHER_POP <- readRDS(OTHER_POP_LOC)
OTHER_DEATHS <- LIN_POP %>% select(Year,Deaths) %>% left_join(Death_Adj) %>% left_join(OTHER_PRED) %>% mutate(Deaths=round(Deaths*Other_Death_Ratio)) %>% select(Year,Deaths)
OTHER_POP <- OTHER_POP %>% select(-Deaths) %>% left_join(OTHER_DEATHS) %>% mutate(BD=Population+Births-Deaths) %>% mutate(Missing=Population-lag(BD),Missing=lead(Missing),Migration=ifelse(is.na(Migration),Missing,Migration))%>% arrange(desc(Year)) %>% mutate(NEXT=Population+Births-Deaths+Migration)%>% select(colnames(LIN_POP)) %>% arrange(desc(Year))
#Save outputs
saveRDS(KEM_POP,KEM_POP_LOC)
saveRDS(OTHER_POP,OTHER_POP_LOC)
write_csv(KEM_POP,KEM_POP_LOC_CSV)
write_csv(OTHER_POP,OTHER_POP_LOC_CSV)

View File

@ -1,30 +0,0 @@
library(forecast)
####Work on overall migration trends
#Could use code cleanup after trying things, but have but I have a working ARIMA to model Lincoln county migration
TS_DATA <- POP_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population))
ST_YEAR <- min(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
END_YEAR <- max(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
#GRAPH_DATA <- TS_DATA %>% filter(!is.na(Migration))
#GRAPH_DATA_LN <- TS_DATA %>% filter(!is.na(Migration),County=="Lincoln")
#ggplot(GRAPH_DATA,aes(x=Year,y=Migration/Prev_Pop,group=County,color=County))+geom_point()+geom_line(data=GRAPH_DATA_LN)
TS_WIDE <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>ST_YEAR+1,Year<=END_YEAR) %>%ts(start=c(ST_YEAR+1),frequency=1)
LN <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% dplyr::select(Lincoln,Year) %>% filter(Year>=ST_YEAR,Year<=END_YEAR) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR),frequency=1)
#Create an ARIMA of Migration so the number of people migrating can be simulated
#Time series tests
#library(tseries)
#adf.test(LN,k=1) #Stationary with one lag, otherwise not stationary
#kpss.test(LN) #Stationary,default of program and has some model fit improvements
MOD <- auto.arima(LN,stationary=TRUE)
#summary(MOD)
#Validity tests
#autoplot(MOD)
#acf(resid(MOD))
#pacf(resid(MOD))
# adf.test(resid(MOD))
#checkresiduals(MOD)
#Save the resulting model outputs, will need to be changed if looking at other counties
saveRDS(MOD,"Data/Regression_Results/LN_ARIMA_MODEL.Rds")
MIGRATION_ARIMA_SIMS <- (do.call(cbind,mclapply(1:NUM_SIMULATIONS,function(x){as.numeric(round(simulate(MOD,future=TRUE, nsim=NUM_YEARS_PROJECTED)))},mc.cores =detectCores()-1)))#testing a multiple run simulation could use parallel process
saveRDS(MIGRATION_ARIMA_SIMS,"Data/Simulated_Data_Sets/Migration_ARIMA.Rds")
write.csv(MIGRATION_ARIMA_SIMS,row.names=FALSE,"Data/Simulated_Data_Sets/Migration_ARIMA.csv")

View File

@ -0,0 +1,72 @@
library(forecast)
library(tidyverse)
#setwd("../")
####Work on overall migration trends
#Could use code cleanup after trying things, but have but I have a working ARIMA to model Lincoln county migration
POP_DATA <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Full_Lincoln_County_Population_Data.Rds") %>% mutate(Migration=Migration/Population)
POP_DATA_TEST <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Full_Lincoln_County_Population_Data.Rds") %>% mutate(Migration=Migration/Population)
POP_KEM_DATA <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds")
POP_OTHER_DATA <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Other_Lincoln_Population_Data.Rds")
hist(POP_OTHER_DATA$Migration/POP_OTHER_DATA$Population)
hist(POP_KEM_DATA$Migration/POP_KEM_DATA$Population)
TS_DATA <- POP_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
TS_DATA_TEST <- POP_DATA_TEST %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
TS_KEM_DATA <- POP_KEM_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
TS_OTHER_DATA <- POP_OTHER_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
ST_YEAR <- min(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
END_YEAR <- max(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
ST_YEAR_KEM <- min(pull(TS_KEM_DATA %>% filter(!is.na(Migration)),Year))
END_YEAR_KEM <- max(pull(TS_KEM_DATA %>% filter(!is.na(Migration)),Year))
ST_YEAR_OTHER <- min(pull(TS_OTHER_DATA %>% filter(!is.na(Migration)),Year))
END_YEAR_OTHER <- max(pull(TS_OTHER_DATA %>% filter(!is.na(Migration)),Year))
#GRAPH_DATA <- TS_DATA %>% filter(!is.na(Migration))
#GRAPH_DATA_LN <- TS_DATA %>% filter(!is.na(Migration),County=="Lincoln")
#ggplot(GRAPH_DATA,aes(x=Year,y=Migration/Prev_Pop,group=County,color=County))+geom_point()+geom_line(data=GRAPH_DATA_LN)
TS_WIDE <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>ST_YEAR+1,Year<=END_YEAR) %>%ts(start=c(ST_YEAR+1),frequency=1)
TS_KEM_WIDE <- TS_KEM_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>ST_YEAR+1,Year<=END_YEAR) %>%ts(start=c(ST_YEAR+1),frequency=1)
TS_OTHER_WIDE <- TS_OTHER_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>ST_YEAR+1,Year<=END_YEAR) %>%ts(start=c(ST_YEAR+1),frequency=1)
LN <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% dplyr::select(Lincoln,Year) %>% filter(Year>=ST_YEAR,Year<=END_YEAR) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR),frequency=1)
KEM <- TS_KEM_DATA %>% dplyr::select(Year,Region,Migration) %>% pivot_wider(values_from=Migration,names_from=Region) %>% arrange(Year) %>% dplyr::select('Kemmerer & Diamondville',Year) %>% filter(Year>=ST_YEAR_KEM,Year<=END_YEAR_KEM) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR_KEM),frequency=1)
TS_OTHER_DATA
OTHER <- TS_OTHER_DATA %>% dplyr::select(Year,Region,Migration) %>% pivot_wider(values_from=Migration,names_from=Region) %>% arrange(Year) %>% dplyr::select('Lincoln Other'=Lincoln_Other,Year) %>% filter(Year>=ST_YEAR_OTHER,Year<=END_YEAR_OTHER) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR_OTHER),frequency=1)
#Create an ARIMA of Migration so the number of people migrating can be simulated
#Time series tests
#library(tseries)
#adf.test(LN,k=1) #Stationary with one lag, otherwise not stationary
#kpss.test(LN) #Stationary,default of program and has some model fit improvements
MOD <- auto.arima(LN,stationary=TRUE)
MOD_KEM <- auto.arima(KEM)
MOD_OTHER <- auto.arima(OTHER)
plot(forecast(MOD ))
plot(forecast(MOD_KEM ))
plot(forecast(MOD_OTHER ))
plot(forecast(MOD,abs(KEM))
#summary(MOD)
#Validity tests
#autoplot(MOD)
#acf(resid(MOD))
#pacf(resid(MOD))
# adf.test(resid(MOD))
#checkresiduals(MOD)
#Save the resulting model outputs, will need to be changed if looking at other counties
#saveRDS(MOD,"Data/Regression_Results/LN_ARIMA_MODEL.Rds")
MIGRATION_ARIMA_SIMS <- (do.call(cbind,mclapply(1:NUM_SIMULATIONS,function(x){as.numeric(round(simulate(MOD,future=TRUE, nsim=NUM_YEARS_PROJECTED)))},mc.cores =detectCores()-1)))#testing a multiple run simulation could use parallel process
saveRDS(MIGRATION_ARIMA_SIMS,"Data/Simulated_Data_Sets/Migration_ARIMA.Rds")
write.csv(MIGRATION_ARIMA_SIMS,row.names=FALSE,"Data/Simulated_Data_Sets/Migration_ARIMA.csv")