Imparted values to migration, and deaths. ARIMA
This commit is contained in:
parent
809e75e58d
commit
99063bec79
@ -4,9 +4,10 @@ Rscript "./Scripts/1B_Process_Existing_NIH_Mortality_Data.r"
|
|||||||
Rscript "./Scripts/1C_Download_and_Process_Demographic_Data.r"
|
Rscript "./Scripts/1C_Download_and_Process_Demographic_Data.r"
|
||||||
Rscript "./Scripts/1D_Use_ACS_Census_Data_to_Estimate_Kemmerer_Demographics.r"
|
Rscript "./Scripts/1D_Use_ACS_Census_Data_to_Estimate_Kemmerer_Demographics.r"
|
||||||
#Create data sets used in later simulations, produce some results for the report when related to this process.
|
#Create data sets used in later simulations, produce some results for the report when related to this process.
|
||||||
Rscript "./Scripts/2A_Birth_Rate_Regression.r"
|
Rscript "./Scripts/2A_Birth_Rate_Regression_and_Impart_Kemmerer_Births.r"
|
||||||
Rscript "./Scripts/2B_Migration_by_Age_Regression.r"
|
Rscript "./Scripts/2B_Impart_Deaths_and_Migration_to_Subregions.r"
|
||||||
Rscript "./Scripts/2C_Overall_Migration_ARIMA.r"
|
Rscript "./Scripts/2C_Migration_by_Age_Regression.r"
|
||||||
|
Rscript "./Scripts/2D_Overall_Migration_ARIMA.r"
|
||||||
#Produce final results for either the simulation, or information for the report, but not anything used in later stages of the simulation
|
#Produce final results for either the simulation, or information for the report, but not anything used in later stages of the simulation
|
||||||
Rscript "./Scripts/3A_Population_Pyramid.r"
|
Rscript "./Scripts/3A_Population_Pyramid.r"
|
||||||
|
|
||||||
|
|||||||
@ -22,7 +22,6 @@ PROJ_TRACTS <- PROJ_TRACTS %>% select(-ZCTA5) %>% filter(GEOID!=56023978200) %>%
|
|||||||
PROJ_TRACTS <- PROJ_TRACTS %>% select(GEOID) %>% mutate('IN_KEM'=1) %>% mutate(GEOID=as.character(GEOID))
|
PROJ_TRACTS <- PROJ_TRACTS %>% select(GEOID) %>% mutate('IN_KEM'=1) %>% mutate(GEOID=as.character(GEOID))
|
||||||
###Load data manually created which links vairable names to sex-age census data
|
###Load data manually created which links vairable names to sex-age census data
|
||||||
CODES <- read_csv("./Data/Raw_Data/ACS_Demographics/API_CENSUS_CODES.csv",skip=1) %>% mutate(Med_Age=(Min_Age+Max_Age)/2) %>% rename(variable=Code)
|
CODES <- read_csv("./Data/Raw_Data/ACS_Demographics/API_CENSUS_CODES.csv",skip=1) %>% mutate(Med_Age=(Min_Age+Max_Age)/2) %>% rename(variable=Code)
|
||||||
#Testing age Comparison between the two
|
|
||||||
###Extract census data for all tracts in Lincoln county, clean up the data, and indicate if the tract is in Kemmerer/Diamondvile or not.
|
###Extract census data for all tracts in Lincoln county, clean up the data, and indicate if the tract is in Kemmerer/Diamondvile or not.
|
||||||
DEMO_DATA_ALL <- do.call(rbind,lapply(2009:ACS_END_YEAR,MAKE_KEM_DEMO_DATA_YEAR))
|
DEMO_DATA_ALL <- do.call(rbind,lapply(2009:ACS_END_YEAR,MAKE_KEM_DEMO_DATA_YEAR))
|
||||||
ORIG_DEMO_DATA_ALL <- DEMO_DATA_ALL
|
ORIG_DEMO_DATA_ALL <- DEMO_DATA_ALL
|
||||||
@ -36,13 +35,13 @@ KEM_DEMO_DATA <- DEMO_DATA_ALL %>% filter(IN_KEM==1) %>% rename(Region=IN_KEM)
|
|||||||
#Ajust the populations to match the total population stastics from the other data sources, since the tracts may spill into other areas
|
#Ajust the populations to match the total population stastics from the other data sources, since the tracts may spill into other areas
|
||||||
POST_ADJUST_DATA <- KEM_DEMO_DATA %>% group_by(Year) %>% summarize(Kem_Demo_Population=sum(Num_Female)+sum(Num_Male)) %>% left_join(OTHER_LIN_DEMO_DATA %>% group_by(Year) %>% summarize(Other_Demo_Population=sum(Num_Female)+sum(Num_Male))) %>% mutate(Total_Lincoln_Demo_Population=Kem_Demo_Population+Other_Demo_Population)
|
POST_ADJUST_DATA <- KEM_DEMO_DATA %>% group_by(Year) %>% summarize(Kem_Demo_Population=sum(Num_Female)+sum(Num_Male)) %>% left_join(OTHER_LIN_DEMO_DATA %>% group_by(Year) %>% summarize(Other_Demo_Population=sum(Num_Female)+sum(Num_Male))) %>% mutate(Total_Lincoln_Demo_Population=Kem_Demo_Population+Other_Demo_Population)
|
||||||
DIRECT_POP <- readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_County_Populations.Rds") %>% filter(County=='Lincoln') %>% select(Year,Lin_Direct_Population=Population) %>% full_join(readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_City_Populations.Rds") %>% filter(City %in% c('Kemmerer','Diamondville')) %>% group_by(Year) %>% summarize(Kem_Direct_Population=sum(Population,na.rm=TRUE))) %>% mutate(Other_Direct_Population=Lin_Direct_Population-Kem_Direct_Population)
|
DIRECT_POP <- readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_County_Populations.Rds") %>% filter(County=='Lincoln') %>% select(Year,Lin_Direct_Population=Population) %>% full_join(readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_City_Populations.Rds") %>% filter(City %in% c('Kemmerer','Diamondville')) %>% group_by(Year) %>% summarize(Kem_Direct_Population=sum(Population,na.rm=TRUE))) %>% mutate(Other_Direct_Population=Lin_Direct_Population-Kem_Direct_Population)
|
||||||
ADJUST_TABLE <- DIRECT_POP %>% inner_join(POST_ADJUST_DATA) %>% mutate(KEM_ADJ=Kem_Direct_Population/Kem_Demo_Population,OTHER_ADJ=Lin_Direct_Population/Other_Demo_Population,LIN_ADJ=Lin_Direct_Population/Total_Lincoln_Demo_Population) %>% select(Year,LIN_ADJ,KEM_ADJ,OTHER_ADJ)
|
ADJUST_TABLE <- DIRECT_POP %>% inner_join(POST_ADJUST_DATA) %>% mutate(KEM_ADJ=Kem_Direct_Population/Kem_Demo_Population,OTHER_ADJ=Other_Direct_Population/Other_Demo_Population,LIN_ADJ=Lin_Direct_Population/Total_Lincoln_Demo_Population) %>% select(Year,LIN_ADJ,KEM_ADJ,OTHER_ADJ)
|
||||||
|
##Checking Total
|
||||||
KEM_DEMO_DATA <- KEM_DEMO_DATA %>% left_join(ADJUST_TABLE %>% select(Year,KEM_ADJ)) %>% mutate(Num_Female=round(KEM_ADJ*Num_Female),Num_Male=round(KEM_ADJ*Num_Male)) %>% select(-KEM_ADJ)
|
KEM_DEMO_DATA <- KEM_DEMO_DATA %>% left_join(ADJUST_TABLE %>% select(Year,KEM_ADJ)) %>% mutate(Num_Female=round(KEM_ADJ*Num_Female),Num_Male=round(KEM_ADJ*Num_Male)) %>% select(-KEM_ADJ)
|
||||||
|
|
||||||
OTHER_LIN_DEMO_DATA <- OTHER_LIN_DEMO_DATA%>% left_join(ADJUST_TABLE %>% select(Year,OTHER_ADJ)) %>% mutate(Num_Female=round(OTHER_ADJ*Num_Female),Num_Male=round(OTHER_ADJ*Num_Male)) %>% select(-OTHER_ADJ)
|
OTHER_LIN_DEMO_DATA <- OTHER_LIN_DEMO_DATA%>% left_join(ADJUST_TABLE %>% select(Year,OTHER_ADJ)) %>% mutate(Num_Female=round(OTHER_ADJ*Num_Female),Num_Male=round(OTHER_ADJ*Num_Male)) %>% select(-OTHER_ADJ)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#Find the most recent data year
|
#Find the most recent data year
|
||||||
MAX_YEAR <- max(KEM_DEMO_DATA$Year)
|
MAX_YEAR <- max(KEM_DEMO_DATA$Year)
|
||||||
KEM_DEMO_MAT <- KEM_DEMO_DATA %>% filter(Year==MAX_YEAR) %>% select(Num_Male,Num_Female) %>% as.matrix
|
KEM_DEMO_MAT <- KEM_DEMO_DATA %>% filter(Year==MAX_YEAR) %>% select(Num_Male,Num_Female) %>% as.matrix
|
||||||
|
|||||||
@ -36,9 +36,7 @@ KEM_BIRTH_DATA <- KEM_BIRTH_DATA %>% left_join(ADD_BIRTH_GROUP_DATA(KEM_RAW_DEMO
|
|||||||
|
|
||||||
####################Create same data set but for only parts of Lincoln not in the Kemmerer Diamondville area
|
####################Create same data set but for only parts of Lincoln not in the Kemmerer Diamondville area
|
||||||
OTHER_RAW_DEMOGRAPHIC_DATA <- readRDS(DEMOGRAPHIC_OTHER_LIN_LOC)
|
OTHER_RAW_DEMOGRAPHIC_DATA <- readRDS(DEMOGRAPHIC_OTHER_LIN_LOC)
|
||||||
OTHER_DEMO_DATA <- OTHER_RAW_DEMOGRAPHIC_DATA %>% mutate(POP=Num_Male+Num_Female,Births=ifelse(Age==0,POP,0)) %>% group_by(Year,County,Region) %>% summarize(Births=sum(Births))
|
OTHER_BIRTH_DATA<- OTHER_RAW_DEMOGRAPHIC_DATA %>% mutate(Population=Num_Male+Num_Female,Births=ifelse(Age==0,Population,0)) %>% group_by(Year,County,Region) %>% summarize(Population=sum(Population),Births=sum(Births)) %>% ungroup %>% arrange(desc(Year))
|
||||||
OTHER_POP_DATA <- readRDS(POPULATION_CITY_LOC)%>% rename(Region=City) %>% filter(!(Region %in% c("Kemmerer","Diamondville")),County=='Lincoln') %>% group_by(Year) %>% mutate(Population=sum(Population,na.rm=TRUE)) %>% ungroup %>% mutate(Region='Lincoln_Other') %>% unique %>% ungroup
|
|
||||||
OTHER_BIRTH_DATA <- OTHER_POP_DATA %>% left_join(OTHER_DEMO_DATA)
|
|
||||||
OTHER_BIRTH_DATA <- OTHER_BIRTH_DATA %>% left_join(ADD_BIRTH_GROUP_DATA(OTHER_RAW_DEMOGRAPHIC_DATA))
|
OTHER_BIRTH_DATA <- OTHER_BIRTH_DATA %>% left_join(ADD_BIRTH_GROUP_DATA(OTHER_RAW_DEMOGRAPHIC_DATA))
|
||||||
|
|
||||||
####################################################3
|
####################################################3
|
||||||
@ -49,7 +47,10 @@ ADJUSTED_KEM_STUB_DATA <- COUNTY_BIRTH_DATA %>% filter(Region=='Lincoln') %>% s
|
|||||||
KEM_BIRTH_DATA <- KEM_BIRTH_DATA %>% select(-Births) %>% left_join(ADJUSTED_KEM_STUB_DATA )
|
KEM_BIRTH_DATA <- KEM_BIRTH_DATA %>% select(-Births) %>% left_join(ADJUSTED_KEM_STUB_DATA )
|
||||||
OTHER_BIRTH_DATA <- OTHER_BIRTH_DATA %>% select(-Births) %>% left_join(ADJUSTED_KEM_STUB_DATA)
|
OTHER_BIRTH_DATA <- OTHER_BIRTH_DATA %>% select(-Births) %>% left_join(ADJUSTED_KEM_STUB_DATA)
|
||||||
REG_DATA <- rbind(COUNTY_BIRTH_DATA,rbind(KEM_BIRTH_DATA,OTHER_BIRTH_DATA) %>% mutate(Deaths=NA,Migration=NA) %>% select(colnames(COUNTY_BIRTH_DATA)))
|
REG_DATA <- rbind(COUNTY_BIRTH_DATA,rbind(KEM_BIRTH_DATA,OTHER_BIRTH_DATA) %>% mutate(Deaths=NA,Migration=NA) %>% select(colnames(COUNTY_BIRTH_DATA)))
|
||||||
|
|
||||||
|
|
||||||
REG_DATA <- REG_DATA %>% group_by(Region) %>% arrange(Year) %>% mutate(Lag_Births=lag(Births),Lag_Two_Births=lag(Births,2)) %>% ungroup %>% arrange(County,Region,Year)
|
REG_DATA <- REG_DATA %>% group_by(Region) %>% arrange(Year) %>% mutate(Lag_Births=lag(Births),Lag_Two_Births=lag(Births,2)) %>% ungroup %>% arrange(County,Region,Year)
|
||||||
|
|
||||||
REG_REDUCED_DATA <- REG_DATA %>% filter(!is.na(Births),!is.na(Lag_Two_Births),!is.na(Min_Birth_Group),!is.na(Lag_Births),!is.na(Region)) %>% select(Year,Region,Min_Birth_Group,Births,Lag_Births,Lag_Two_Births)
|
REG_REDUCED_DATA <- REG_DATA %>% filter(!is.na(Births),!is.na(Lag_Two_Births),!is.na(Min_Birth_Group),!is.na(Lag_Births),!is.na(Region)) %>% select(Year,Region,Min_Birth_Group,Births,Lag_Births,Lag_Two_Births)
|
||||||
|
|
||||||
|
|
||||||
@ -140,4 +141,27 @@ ST_REG_DATA <- REG_REDUCED_DATA %>% filter(Region=='Lincoln') %>% filter(Year==m
|
|||||||
saveRDS(REG_REDUCED_DATA,paste0(SAVE_DATA_LOC,"/Birth_Regression_Input_Data.Rds"))
|
saveRDS(REG_REDUCED_DATA,paste0(SAVE_DATA_LOC,"/Birth_Regression_Input_Data.Rds"))
|
||||||
write_csv(REG_REDUCED_DATA,paste0(SAVE_DATA_LOC,"/Birth_Regression_Input_Data.csv"))
|
write_csv(REG_REDUCED_DATA,paste0(SAVE_DATA_LOC,"/Birth_Regression_Input_Data.csv"))
|
||||||
|
|
||||||
|
##########Update existing demogrpahic data with new births
|
||||||
|
KEM_DATA_NEW <- REG_DATA %>% filter(Region=='Kemmerer & Diamondville') %>% select(Year,County,Region,Population,Births,Deaths,Migration)
|
||||||
|
PRED_KEM_BIRTH_2024 <- REG_DATA %>% filter(Region=='Kemmerer & Diamondville',Year>=2023) %>% mutate(Min_Birth_Group=lag(Min_Birth_Group)) %>% filter(Year==2024)
|
||||||
|
KEM_DATA_NEW[KEM_DATA_NEW$Year==2024,"Births"] <- round(exp(predict(MOD_BIRTHS,PRED_KEM_BIRTH_2024)))
|
||||||
|
|
||||||
|
OTHER_DATA_NEW <- REG_DATA %>% filter(Region=='Lincoln_Other') %>% select(Year,County,Region,Population,Births,Deaths,Migration)
|
||||||
|
PRED_OTHER_BIRTH_2024 <- REG_DATA %>% filter(Region=='Lincoln_Other',Year>=2023) %>% mutate(Min_Birth_Group=lag(Min_Birth_Group)) %>% filter(Year==2024)
|
||||||
|
OTHER_DATA_NEW[OTHER_DATA_NEW$Year==2024,"Births"] <- round(exp(predict(MOD_BIRTHS,PRED_OTHER_BIRTH_2024)))
|
||||||
|
|
||||||
|
LIN_DATA_NEW <- REG_DATA %>% filter(Region=='Lincoln') %>% select(Year,County,Region,Population,Births,Deaths,Migration)
|
||||||
|
#
|
||||||
|
if(!exists("POPULATION_SAVE_RDS")){POPULATION_SAVE_RDS <- "./Data/Cleaned_Data/Population_Data/RDS/"}
|
||||||
|
dir.create(POPULATION_SAVE_RDS, recursive = TRUE, showWarnings = FALSE)
|
||||||
|
|
||||||
|
if(!exists("POPULATION_SAVE_CSV")){POPULATION_SAVE_CSV <- "./Data/Cleaned_Data/Population_Data/CSV/"}
|
||||||
|
dir.create(POPULATION_SAVE_CSV, recursive = TRUE, showWarnings = FALSE)
|
||||||
|
saveRDS(LIN_DATA_NEW,paste0(POPULATION_SAVE_RDS,"Full_Lincoln_County_Population_Data.Rds"))
|
||||||
|
write_csv(LIN_DATA_NEW,paste0(POPULATION_SAVE_CSV,"Full_Lincoln_County_Population_Data.csv"))
|
||||||
|
saveRDS(KEM_DATA_NEW,paste0(POPULATION_SAVE_RDS,"Kemmerer_Diamondville_Population_Data.Rds"))
|
||||||
|
write_csv(KEM_DATA_NEW,paste0(POPULATION_SAVE_CSV,"Kemmerer_Diamondville_Population_Data.csv"))
|
||||||
|
saveRDS(OTHER_DATA_NEW,paste0(POPULATION_SAVE_RDS,"Other_Lincoln_Population_Data.Rds"))
|
||||||
|
write_csv(OTHER_DATA_NEW,paste0(POPULATION_SAVE_CSV,"Other_Lincoln_Population_Data.csv"))
|
||||||
|
|
||||||
|
|
||||||
50
Scripts/2B_Impart_Deaths_and_Migration_to_Subregions.r
Normal file
50
Scripts/2B_Impart_Deaths_and_Migration_to_Subregions.r
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
###May want to check rounding of demographic data as migration does not line up perfectly (off by about 9 on average)
|
||||||
|
library(tidyverse)
|
||||||
|
#setwd("../")
|
||||||
|
KEM_POP_LOC <- "Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds"
|
||||||
|
OTHER_POP_LOC <- "Data/Cleaned_Data/Population_Data/RDS/Other_Lincoln_Population_Data.Rds"
|
||||||
|
KEM_POP_LOC_CSV <- "Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.csv"
|
||||||
|
OTHER_POP_LOC_CSV <- "Data/Cleaned_Data/Population_Data/RDS/Other_Lincoln_Population_Data.csv"
|
||||||
|
|
||||||
|
MORT <- readRDS("Data/Cleaned_Data/Mortality_Rate_Data/RDS/Lincoln_County_Mortality_Rates.Rds")
|
||||||
|
KEM_DEM <-readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Kemmerer_Diamondville_Demographics.Rds")
|
||||||
|
OTHER_DEM <- readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Other_Lincoln_Demographics.Rds")
|
||||||
|
LIN_DEM <- readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Full_Lincoln_County_Demographics.Rds")
|
||||||
|
MORTALITY <- MORT
|
||||||
|
#Estimate split of deaths in county (share of Kemmerer vs other Lincoln)
|
||||||
|
PRED_DEATHS <- function(DATA,MORTALITY){
|
||||||
|
MORTALITY <- MORTALITY%>% pivot_wider(values_from=Death_Rate,names_from=Sex) %>% group_by(County,Min_Age,Max_Age) %>% summarize(Rate_Male=mean(Male,na.rm=TRUE),Rate_Female=mean(Female,na.rm=TRUE)) %>% ungroup
|
||||||
|
RES <- DATA[DATA$Age >= MORTALITY$Min_Age[1] & MORTALITY$Max_Age[1]>= DATA$Age,] %>% group_by(County,Region,Year,Age) %>% summarize(Num_Female=sum(Num_Female),Num_Male=sum(Num_Male)) %>% ungroup %>% mutate(Min_Age=MORTALITY$Min_Age[1],Max_Age= MORTALITY$Max_Age[1])
|
||||||
|
for(i in 2:nrow(MORTALITY)){
|
||||||
|
RES <- rbind(RES,DATA[DATA$Age >= MORTALITY$Min_Age[i] & MORTALITY$Max_Age[i]>= DATA$Age,] %>% group_by(County,Region,Year,Age) %>% summarize(Num_Female=sum(Num_Female),Num_Male=sum(Num_Male)) %>% ungroup %>% mutate(Min_Age=MORTALITY$Min_Age[i],Max_Age= MORTALITY$Max_Age[i]))
|
||||||
|
}
|
||||||
|
RES <- RES%>% arrange(Year) %>% left_join(MORTALITY)%>% group_by(County,Region,Year) %>% summarize(Predicted_Deaths=sum(Rate_Male*Num_Male)+sum(Rate_Female*Num_Female) ) %>% ungroup %>% select(County,Region,Year,Predicted_Deaths)
|
||||||
|
return(RES)
|
||||||
|
}
|
||||||
|
##Predict all deaths an merge into a table to find the ratios of predicted deaths
|
||||||
|
LIN_PRED <- PRED_DEATHS(LIN_DEM,MORT) %>% select(Year,Lin_Pred_Deaths=Predicted_Deaths)
|
||||||
|
KEM_PRED <- PRED_DEATHS(KEM_DEM,MORT) %>% select(Year,Kem_Pred_Deaths=Predicted_Deaths)
|
||||||
|
OTHER_PRED <- PRED_DEATHS(OTHER_DEM,MORT) %>% select(Year,Other_Pred_Deaths=Predicted_Deaths)
|
||||||
|
PRED_DATA<- LIN_PRED %>% left_join(KEM_PRED) %>% left_join(OTHER_PRED)
|
||||||
|
Death_Adj <- PRED_DATA %>% filter(!is.na(Kem_Pred_Deaths)) %>% mutate(Kem_Death_Ratio=Kem_Pred_Deaths/(Kem_Pred_Deaths+Other_Pred_Deaths),Other_Death_Ratio=1-Kem_Death_Ratio) %>% select(Year,Kem_Death_Ratio,Other_Death_Ratio)
|
||||||
|
#Add deaths to Lincoln were missing (lacking data)
|
||||||
|
LIN_POP <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Full_Lincoln_County_Population_Data.Rds")
|
||||||
|
LIN_POP <- LIN_POP %>% left_join(LIN_PRED) %>% mutate(Deaths=ifelse(is.na(Deaths) & !is.na(Lin_Pred_Deaths),Lin_Pred_Deaths,Deaths)) %>% select(-Lin_Pred_Deaths) %>% mutate(BD=Population+Births-Deaths) %>% mutate(Missing=Population-lag(BD),Missing=lead(Missing),Migration=ifelse(is.na(Migration),Missing,Migration)) %>% mutate(NEXT=Population+Births-Deaths+Migration) %>% arrange(desc(Year)) %>% select(colnames(LIN_POP))
|
||||||
|
###Estimate the number of deaths in Kemmerer based on the total deaths, and the predicted share of deaths
|
||||||
|
#Find migration based on the remaining missing population (after deaths,and births)
|
||||||
|
KEM_POP <- readRDS(KEM_POP_LOC)
|
||||||
|
KEM_DEATHS <- LIN_POP %>% select(Year,Deaths) %>% left_join(Death_Adj) %>% left_join(KEM_PRED) %>% mutate(Deaths=round(Deaths*Kem_Death_Ratio)) %>% select(Year,Deaths)
|
||||||
|
KEM_POP <- KEM_POP%>% select(-Deaths) %>% left_join(KEM_DEATHS) %>% mutate(BD=Population+Births-Deaths) %>% mutate(Missing=Population-lag(BD),Missing=lead(Missing),Migration=ifelse(is.na(Migration),Missing,Migration))%>% mutate(NEXT=Population+Births-Deaths+Migration)%>% arrange(desc(Year)) %>% select(colnames(LIN_POP))
|
||||||
|
|
||||||
|
###Estimate the number of deaths in the other parts of Lincoln (not Kemmerer) based on the total deaths, and the predicted share of deaths
|
||||||
|
#Find migration based on the remaining missing population (after deaths,and births)
|
||||||
|
OTHER_POP <- readRDS(OTHER_POP_LOC)
|
||||||
|
OTHER_DEATHS <- LIN_POP %>% select(Year,Deaths) %>% left_join(Death_Adj) %>% left_join(OTHER_PRED) %>% mutate(Deaths=round(Deaths*Other_Death_Ratio)) %>% select(Year,Deaths)
|
||||||
|
OTHER_POP <- OTHER_POP %>% select(-Deaths) %>% left_join(OTHER_DEATHS) %>% mutate(BD=Population+Births-Deaths) %>% mutate(Missing=Population-lag(BD),Missing=lead(Missing),Migration=ifelse(is.na(Migration),Missing,Migration))%>% arrange(desc(Year)) %>% mutate(NEXT=Population+Births-Deaths+Migration)%>% select(colnames(LIN_POP)) %>% arrange(desc(Year))
|
||||||
|
#Save outputs
|
||||||
|
saveRDS(KEM_POP,KEM_POP_LOC)
|
||||||
|
saveRDS(OTHER_POP,OTHER_POP_LOC)
|
||||||
|
write_csv(KEM_POP,KEM_POP_LOC_CSV)
|
||||||
|
write_csv(OTHER_POP,OTHER_POP_LOC_CSV)
|
||||||
|
|
||||||
|
|
||||||
@ -1,30 +0,0 @@
|
|||||||
library(forecast)
|
|
||||||
####Work on overall migration trends
|
|
||||||
#Could use code cleanup after trying things, but have but I have a working ARIMA to model Lincoln county migration
|
|
||||||
TS_DATA <- POP_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population))
|
|
||||||
ST_YEAR <- min(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
|
|
||||||
END_YEAR <- max(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
|
|
||||||
#GRAPH_DATA <- TS_DATA %>% filter(!is.na(Migration))
|
|
||||||
#GRAPH_DATA_LN <- TS_DATA %>% filter(!is.na(Migration),County=="Lincoln")
|
|
||||||
#ggplot(GRAPH_DATA,aes(x=Year,y=Migration/Prev_Pop,group=County,color=County))+geom_point()+geom_line(data=GRAPH_DATA_LN)
|
|
||||||
TS_WIDE <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>ST_YEAR+1,Year<=END_YEAR) %>%ts(start=c(ST_YEAR+1),frequency=1)
|
|
||||||
LN <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% dplyr::select(Lincoln,Year) %>% filter(Year>=ST_YEAR,Year<=END_YEAR) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR),frequency=1)
|
|
||||||
#Create an ARIMA of Migration so the number of people migrating can be simulated
|
|
||||||
#Time series tests
|
|
||||||
#library(tseries)
|
|
||||||
#adf.test(LN,k=1) #Stationary with one lag, otherwise not stationary
|
|
||||||
#kpss.test(LN) #Stationary,default of program and has some model fit improvements
|
|
||||||
MOD <- auto.arima(LN,stationary=TRUE)
|
|
||||||
#summary(MOD)
|
|
||||||
#Validity tests
|
|
||||||
#autoplot(MOD)
|
|
||||||
#acf(resid(MOD))
|
|
||||||
#pacf(resid(MOD))
|
|
||||||
# adf.test(resid(MOD))
|
|
||||||
#checkresiduals(MOD)
|
|
||||||
#Save the resulting model outputs, will need to be changed if looking at other counties
|
|
||||||
saveRDS(MOD,"Data/Regression_Results/LN_ARIMA_MODEL.Rds")
|
|
||||||
|
|
||||||
MIGRATION_ARIMA_SIMS <- (do.call(cbind,mclapply(1:NUM_SIMULATIONS,function(x){as.numeric(round(simulate(MOD,future=TRUE, nsim=NUM_YEARS_PROJECTED)))},mc.cores =detectCores()-1)))#testing a multiple run simulation could use parallel process
|
|
||||||
saveRDS(MIGRATION_ARIMA_SIMS,"Data/Simulated_Data_Sets/Migration_ARIMA.Rds")
|
|
||||||
write.csv(MIGRATION_ARIMA_SIMS,row.names=FALSE,"Data/Simulated_Data_Sets/Migration_ARIMA.csv")
|
|
||||||
72
Scripts/2D_Overall_Migration_ARIMA.r
Normal file
72
Scripts/2D_Overall_Migration_ARIMA.r
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
library(forecast)
|
||||||
|
library(tidyverse)
|
||||||
|
#setwd("../")
|
||||||
|
####Work on overall migration trends
|
||||||
|
#Could use code cleanup after trying things, but have but I have a working ARIMA to model Lincoln county migration
|
||||||
|
POP_DATA <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Full_Lincoln_County_Population_Data.Rds") %>% mutate(Migration=Migration/Population)
|
||||||
|
POP_DATA_TEST <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Full_Lincoln_County_Population_Data.Rds") %>% mutate(Migration=Migration/Population)
|
||||||
|
|
||||||
|
POP_KEM_DATA <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds")
|
||||||
|
POP_OTHER_DATA <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Other_Lincoln_Population_Data.Rds")
|
||||||
|
hist(POP_OTHER_DATA$Migration/POP_OTHER_DATA$Population)
|
||||||
|
hist(POP_KEM_DATA$Migration/POP_KEM_DATA$Population)
|
||||||
|
|
||||||
|
|
||||||
|
TS_DATA <- POP_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
|
||||||
|
TS_DATA_TEST <- POP_DATA_TEST %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
|
||||||
|
|
||||||
|
TS_KEM_DATA <- POP_KEM_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
|
||||||
|
TS_OTHER_DATA <- POP_OTHER_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
|
||||||
|
|
||||||
|
|
||||||
|
ST_YEAR <- min(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
|
||||||
|
END_YEAR <- max(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
|
||||||
|
|
||||||
|
ST_YEAR_KEM <- min(pull(TS_KEM_DATA %>% filter(!is.na(Migration)),Year))
|
||||||
|
END_YEAR_KEM <- max(pull(TS_KEM_DATA %>% filter(!is.na(Migration)),Year))
|
||||||
|
|
||||||
|
ST_YEAR_OTHER <- min(pull(TS_OTHER_DATA %>% filter(!is.na(Migration)),Year))
|
||||||
|
END_YEAR_OTHER <- max(pull(TS_OTHER_DATA %>% filter(!is.na(Migration)),Year))
|
||||||
|
|
||||||
|
|
||||||
|
#GRAPH_DATA <- TS_DATA %>% filter(!is.na(Migration))
|
||||||
|
#GRAPH_DATA_LN <- TS_DATA %>% filter(!is.na(Migration),County=="Lincoln")
|
||||||
|
#ggplot(GRAPH_DATA,aes(x=Year,y=Migration/Prev_Pop,group=County,color=County))+geom_point()+geom_line(data=GRAPH_DATA_LN)
|
||||||
|
TS_WIDE <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>ST_YEAR+1,Year<=END_YEAR) %>%ts(start=c(ST_YEAR+1),frequency=1)
|
||||||
|
|
||||||
|
TS_KEM_WIDE <- TS_KEM_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>ST_YEAR+1,Year<=END_YEAR) %>%ts(start=c(ST_YEAR+1),frequency=1)
|
||||||
|
TS_OTHER_WIDE <- TS_OTHER_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>ST_YEAR+1,Year<=END_YEAR) %>%ts(start=c(ST_YEAR+1),frequency=1)
|
||||||
|
|
||||||
|
|
||||||
|
LN <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% dplyr::select(Lincoln,Year) %>% filter(Year>=ST_YEAR,Year<=END_YEAR) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR),frequency=1)
|
||||||
|
|
||||||
|
KEM <- TS_KEM_DATA %>% dplyr::select(Year,Region,Migration) %>% pivot_wider(values_from=Migration,names_from=Region) %>% arrange(Year) %>% dplyr::select('Kemmerer & Diamondville',Year) %>% filter(Year>=ST_YEAR_KEM,Year<=END_YEAR_KEM) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR_KEM),frequency=1)
|
||||||
|
TS_OTHER_DATA
|
||||||
|
OTHER <- TS_OTHER_DATA %>% dplyr::select(Year,Region,Migration) %>% pivot_wider(values_from=Migration,names_from=Region) %>% arrange(Year) %>% dplyr::select('Lincoln Other'=Lincoln_Other,Year) %>% filter(Year>=ST_YEAR_OTHER,Year<=END_YEAR_OTHER) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR_OTHER),frequency=1)
|
||||||
|
|
||||||
|
#Create an ARIMA of Migration so the number of people migrating can be simulated
|
||||||
|
#Time series tests
|
||||||
|
#library(tseries)
|
||||||
|
#adf.test(LN,k=1) #Stationary with one lag, otherwise not stationary
|
||||||
|
#kpss.test(LN) #Stationary,default of program and has some model fit improvements
|
||||||
|
MOD <- auto.arima(LN,stationary=TRUE)
|
||||||
|
MOD_KEM <- auto.arima(KEM)
|
||||||
|
MOD_OTHER <- auto.arima(OTHER)
|
||||||
|
plot(forecast(MOD ))
|
||||||
|
plot(forecast(MOD_KEM ))
|
||||||
|
plot(forecast(MOD_OTHER ))
|
||||||
|
plot(forecast(MOD,abs(KEM))
|
||||||
|
|
||||||
|
#summary(MOD)
|
||||||
|
#Validity tests
|
||||||
|
#autoplot(MOD)
|
||||||
|
#acf(resid(MOD))
|
||||||
|
#pacf(resid(MOD))
|
||||||
|
# adf.test(resid(MOD))
|
||||||
|
#checkresiduals(MOD)
|
||||||
|
#Save the resulting model outputs, will need to be changed if looking at other counties
|
||||||
|
#saveRDS(MOD,"Data/Regression_Results/LN_ARIMA_MODEL.Rds")
|
||||||
|
|
||||||
|
MIGRATION_ARIMA_SIMS <- (do.call(cbind,mclapply(1:NUM_SIMULATIONS,function(x){as.numeric(round(simulate(MOD,future=TRUE, nsim=NUM_YEARS_PROJECTED)))},mc.cores =detectCores()-1)))#testing a multiple run simulation could use parallel process
|
||||||
|
saveRDS(MIGRATION_ARIMA_SIMS,"Data/Simulated_Data_Sets/Migration_ARIMA.Rds")
|
||||||
|
write.csv(MIGRATION_ARIMA_SIMS,row.names=FALSE,"Data/Simulated_Data_Sets/Migration_ARIMA.csv")
|
||||||
Loading…
x
Reference in New Issue
Block a user