Imparted values to migration, and deaths. ARIMA
This commit is contained in:
parent
809e75e58d
commit
99063bec79
@ -4,9 +4,10 @@ Rscript "./Scripts/1B_Process_Existing_NIH_Mortality_Data.r"
|
|||||||
Rscript "./Scripts/1C_Download_and_Process_Demographic_Data.r"
|
Rscript "./Scripts/1C_Download_and_Process_Demographic_Data.r"
|
||||||
Rscript "./Scripts/1D_Use_ACS_Census_Data_to_Estimate_Kemmerer_Demographics.r"
|
Rscript "./Scripts/1D_Use_ACS_Census_Data_to_Estimate_Kemmerer_Demographics.r"
|
||||||
#Create data sets used in later simulations, produce some results for the report when related to this process.
|
#Create data sets used in later simulations, produce some results for the report when related to this process.
|
||||||
Rscript "./Scripts/2A_Birth_Rate_Regression.r"
|
Rscript "./Scripts/2A_Birth_Rate_Regression_and_Impart_Kemmerer_Births.r"
|
||||||
Rscript "./Scripts/2B_Migration_by_Age_Regression.r"
|
Rscript "./Scripts/2B_Impart_Deaths_and_Migration_to_Subregions.r"
|
||||||
Rscript "./Scripts/2C_Overall_Migration_ARIMA.r"
|
Rscript "./Scripts/2C_Migration_by_Age_Regression.r"
|
||||||
|
Rscript "./Scripts/2D_Overall_Migration_ARIMA.r"
|
||||||
#Produce final results for either the simulation, or information for the report, but not anything used in later stages of the simulation
|
#Produce final results for either the simulation, or information for the report, but not anything used in later stages of the simulation
|
||||||
Rscript "./Scripts/3A_Population_Pyramid.r"
|
Rscript "./Scripts/3A_Population_Pyramid.r"
|
||||||
|
|
||||||
|
|||||||
@ -22,7 +22,6 @@ PROJ_TRACTS <- PROJ_TRACTS %>% select(-ZCTA5) %>% filter(GEOID!=56023978200) %>%
|
|||||||
PROJ_TRACTS <- PROJ_TRACTS %>% select(GEOID) %>% mutate('IN_KEM'=1) %>% mutate(GEOID=as.character(GEOID))
|
PROJ_TRACTS <- PROJ_TRACTS %>% select(GEOID) %>% mutate('IN_KEM'=1) %>% mutate(GEOID=as.character(GEOID))
|
||||||
###Load data manually created which links vairable names to sex-age census data
|
###Load data manually created which links vairable names to sex-age census data
|
||||||
CODES <- read_csv("./Data/Raw_Data/ACS_Demographics/API_CENSUS_CODES.csv",skip=1) %>% mutate(Med_Age=(Min_Age+Max_Age)/2) %>% rename(variable=Code)
|
CODES <- read_csv("./Data/Raw_Data/ACS_Demographics/API_CENSUS_CODES.csv",skip=1) %>% mutate(Med_Age=(Min_Age+Max_Age)/2) %>% rename(variable=Code)
|
||||||
#Testing age Comparison between the two
|
|
||||||
###Extract census data for all tracts in Lincoln county, clean up the data, and indicate if the tract is in Kemmerer/Diamondvile or not.
|
###Extract census data for all tracts in Lincoln county, clean up the data, and indicate if the tract is in Kemmerer/Diamondvile or not.
|
||||||
DEMO_DATA_ALL <- do.call(rbind,lapply(2009:ACS_END_YEAR,MAKE_KEM_DEMO_DATA_YEAR))
|
DEMO_DATA_ALL <- do.call(rbind,lapply(2009:ACS_END_YEAR,MAKE_KEM_DEMO_DATA_YEAR))
|
||||||
ORIG_DEMO_DATA_ALL <- DEMO_DATA_ALL
|
ORIG_DEMO_DATA_ALL <- DEMO_DATA_ALL
|
||||||
@ -36,13 +35,13 @@ KEM_DEMO_DATA <- DEMO_DATA_ALL %>% filter(IN_KEM==1) %>% rename(Region=IN_KEM)
|
|||||||
#Ajust the populations to match the total population stastics from the other data sources, since the tracts may spill into other areas
|
#Ajust the populations to match the total population stastics from the other data sources, since the tracts may spill into other areas
|
||||||
POST_ADJUST_DATA <- KEM_DEMO_DATA %>% group_by(Year) %>% summarize(Kem_Demo_Population=sum(Num_Female)+sum(Num_Male)) %>% left_join(OTHER_LIN_DEMO_DATA %>% group_by(Year) %>% summarize(Other_Demo_Population=sum(Num_Female)+sum(Num_Male))) %>% mutate(Total_Lincoln_Demo_Population=Kem_Demo_Population+Other_Demo_Population)
|
POST_ADJUST_DATA <- KEM_DEMO_DATA %>% group_by(Year) %>% summarize(Kem_Demo_Population=sum(Num_Female)+sum(Num_Male)) %>% left_join(OTHER_LIN_DEMO_DATA %>% group_by(Year) %>% summarize(Other_Demo_Population=sum(Num_Female)+sum(Num_Male))) %>% mutate(Total_Lincoln_Demo_Population=Kem_Demo_Population+Other_Demo_Population)
|
||||||
DIRECT_POP <- readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_County_Populations.Rds") %>% filter(County=='Lincoln') %>% select(Year,Lin_Direct_Population=Population) %>% full_join(readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_City_Populations.Rds") %>% filter(City %in% c('Kemmerer','Diamondville')) %>% group_by(Year) %>% summarize(Kem_Direct_Population=sum(Population,na.rm=TRUE))) %>% mutate(Other_Direct_Population=Lin_Direct_Population-Kem_Direct_Population)
|
DIRECT_POP <- readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_County_Populations.Rds") %>% filter(County=='Lincoln') %>% select(Year,Lin_Direct_Population=Population) %>% full_join(readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_City_Populations.Rds") %>% filter(City %in% c('Kemmerer','Diamondville')) %>% group_by(Year) %>% summarize(Kem_Direct_Population=sum(Population,na.rm=TRUE))) %>% mutate(Other_Direct_Population=Lin_Direct_Population-Kem_Direct_Population)
|
||||||
ADJUST_TABLE <- DIRECT_POP %>% inner_join(POST_ADJUST_DATA) %>% mutate(KEM_ADJ=Kem_Direct_Population/Kem_Demo_Population,OTHER_ADJ=Lin_Direct_Population/Other_Demo_Population,LIN_ADJ=Lin_Direct_Population/Total_Lincoln_Demo_Population) %>% select(Year,LIN_ADJ,KEM_ADJ,OTHER_ADJ)
|
ADJUST_TABLE <- DIRECT_POP %>% inner_join(POST_ADJUST_DATA) %>% mutate(KEM_ADJ=Kem_Direct_Population/Kem_Demo_Population,OTHER_ADJ=Other_Direct_Population/Other_Demo_Population,LIN_ADJ=Lin_Direct_Population/Total_Lincoln_Demo_Population) %>% select(Year,LIN_ADJ,KEM_ADJ,OTHER_ADJ)
|
||||||
|
##Checking Total
|
||||||
KEM_DEMO_DATA <- KEM_DEMO_DATA %>% left_join(ADJUST_TABLE %>% select(Year,KEM_ADJ)) %>% mutate(Num_Female=round(KEM_ADJ*Num_Female),Num_Male=round(KEM_ADJ*Num_Male)) %>% select(-KEM_ADJ)
|
KEM_DEMO_DATA <- KEM_DEMO_DATA %>% left_join(ADJUST_TABLE %>% select(Year,KEM_ADJ)) %>% mutate(Num_Female=round(KEM_ADJ*Num_Female),Num_Male=round(KEM_ADJ*Num_Male)) %>% select(-KEM_ADJ)
|
||||||
|
|
||||||
OTHER_LIN_DEMO_DATA <- OTHER_LIN_DEMO_DATA%>% left_join(ADJUST_TABLE %>% select(Year,OTHER_ADJ)) %>% mutate(Num_Female=round(OTHER_ADJ*Num_Female),Num_Male=round(OTHER_ADJ*Num_Male)) %>% select(-OTHER_ADJ)
|
OTHER_LIN_DEMO_DATA <- OTHER_LIN_DEMO_DATA%>% left_join(ADJUST_TABLE %>% select(Year,OTHER_ADJ)) %>% mutate(Num_Female=round(OTHER_ADJ*Num_Female),Num_Male=round(OTHER_ADJ*Num_Male)) %>% select(-OTHER_ADJ)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#Find the most recent data year
|
#Find the most recent data year
|
||||||
MAX_YEAR <- max(KEM_DEMO_DATA$Year)
|
MAX_YEAR <- max(KEM_DEMO_DATA$Year)
|
||||||
KEM_DEMO_MAT <- KEM_DEMO_DATA %>% filter(Year==MAX_YEAR) %>% select(Num_Male,Num_Female) %>% as.matrix
|
KEM_DEMO_MAT <- KEM_DEMO_DATA %>% filter(Year==MAX_YEAR) %>% select(Num_Male,Num_Female) %>% as.matrix
|
||||||
|
|||||||
@ -36,10 +36,8 @@ KEM_BIRTH_DATA <- KEM_BIRTH_DATA %>% left_join(ADD_BIRTH_GROUP_DATA(KEM_RAW_DEMO
|
|||||||
|
|
||||||
####################Create same data set but for only parts of Lincoln not in the Kemmerer Diamondville area
|
####################Create same data set but for only parts of Lincoln not in the Kemmerer Diamondville area
|
||||||
OTHER_RAW_DEMOGRAPHIC_DATA <- readRDS(DEMOGRAPHIC_OTHER_LIN_LOC)
|
OTHER_RAW_DEMOGRAPHIC_DATA <- readRDS(DEMOGRAPHIC_OTHER_LIN_LOC)
|
||||||
OTHER_DEMO_DATA <- OTHER_RAW_DEMOGRAPHIC_DATA %>% mutate(POP=Num_Male+Num_Female,Births=ifelse(Age==0,POP,0)) %>% group_by(Year,County,Region) %>% summarize(Births=sum(Births))
|
OTHER_BIRTH_DATA<- OTHER_RAW_DEMOGRAPHIC_DATA %>% mutate(Population=Num_Male+Num_Female,Births=ifelse(Age==0,Population,0)) %>% group_by(Year,County,Region) %>% summarize(Population=sum(Population),Births=sum(Births)) %>% ungroup %>% arrange(desc(Year))
|
||||||
OTHER_POP_DATA <- readRDS(POPULATION_CITY_LOC)%>% rename(Region=City) %>% filter(!(Region %in% c("Kemmerer","Diamondville")),County=='Lincoln') %>% group_by(Year) %>% mutate(Population=sum(Population,na.rm=TRUE)) %>% ungroup %>% mutate(Region='Lincoln_Other') %>% unique %>% ungroup
|
OTHER_BIRTH_DATA <- OTHER_BIRTH_DATA %>% left_join(ADD_BIRTH_GROUP_DATA(OTHER_RAW_DEMOGRAPHIC_DATA))
|
||||||
OTHER_BIRTH_DATA <- OTHER_POP_DATA %>% left_join(OTHER_DEMO_DATA)
|
|
||||||
OTHER_BIRTH_DATA <- OTHER_BIRTH_DATA %>% left_join(ADD_BIRTH_GROUP_DATA(OTHER_RAW_DEMOGRAPHIC_DATA))
|
|
||||||
|
|
||||||
####################################################3
|
####################################################3
|
||||||
###Because the total of the Kemmerer and Other Lincoln should sum to the births in the entire county, we assume that the ratio of children under the age of 1, in each area is the same percentage as the total births in the county. The total births in the county is then taken from the Wyoming eadiv estimates at the county level. This makes the two sum to the proper amount
|
###Because the total of the Kemmerer and Other Lincoln should sum to the births in the entire county, we assume that the ratio of children under the age of 1, in each area is the same percentage as the total births in the county. The total births in the county is then taken from the Wyoming eadiv estimates at the county level. This makes the two sum to the proper amount
|
||||||
@ -49,7 +47,10 @@ ADJUSTED_KEM_STUB_DATA <- COUNTY_BIRTH_DATA %>% filter(Region=='Lincoln') %>% s
|
|||||||
KEM_BIRTH_DATA <- KEM_BIRTH_DATA %>% select(-Births) %>% left_join(ADJUSTED_KEM_STUB_DATA )
|
KEM_BIRTH_DATA <- KEM_BIRTH_DATA %>% select(-Births) %>% left_join(ADJUSTED_KEM_STUB_DATA )
|
||||||
OTHER_BIRTH_DATA <- OTHER_BIRTH_DATA %>% select(-Births) %>% left_join(ADJUSTED_KEM_STUB_DATA)
|
OTHER_BIRTH_DATA <- OTHER_BIRTH_DATA %>% select(-Births) %>% left_join(ADJUSTED_KEM_STUB_DATA)
|
||||||
REG_DATA <- rbind(COUNTY_BIRTH_DATA,rbind(KEM_BIRTH_DATA,OTHER_BIRTH_DATA) %>% mutate(Deaths=NA,Migration=NA) %>% select(colnames(COUNTY_BIRTH_DATA)))
|
REG_DATA <- rbind(COUNTY_BIRTH_DATA,rbind(KEM_BIRTH_DATA,OTHER_BIRTH_DATA) %>% mutate(Deaths=NA,Migration=NA) %>% select(colnames(COUNTY_BIRTH_DATA)))
|
||||||
|
|
||||||
|
|
||||||
REG_DATA <- REG_DATA %>% group_by(Region) %>% arrange(Year) %>% mutate(Lag_Births=lag(Births),Lag_Two_Births=lag(Births,2)) %>% ungroup %>% arrange(County,Region,Year)
|
REG_DATA <- REG_DATA %>% group_by(Region) %>% arrange(Year) %>% mutate(Lag_Births=lag(Births),Lag_Two_Births=lag(Births,2)) %>% ungroup %>% arrange(County,Region,Year)
|
||||||
|
|
||||||
REG_REDUCED_DATA <- REG_DATA %>% filter(!is.na(Births),!is.na(Lag_Two_Births),!is.na(Min_Birth_Group),!is.na(Lag_Births),!is.na(Region)) %>% select(Year,Region,Min_Birth_Group,Births,Lag_Births,Lag_Two_Births)
|
REG_REDUCED_DATA <- REG_DATA %>% filter(!is.na(Births),!is.na(Lag_Two_Births),!is.na(Min_Birth_Group),!is.na(Lag_Births),!is.na(Region)) %>% select(Year,Region,Min_Birth_Group,Births,Lag_Births,Lag_Two_Births)
|
||||||
|
|
||||||
|
|
||||||
@ -140,4 +141,27 @@ ST_REG_DATA <- REG_REDUCED_DATA %>% filter(Region=='Lincoln') %>% filter(Year==m
|
|||||||
saveRDS(REG_REDUCED_DATA,paste0(SAVE_DATA_LOC,"/Birth_Regression_Input_Data.Rds"))
|
saveRDS(REG_REDUCED_DATA,paste0(SAVE_DATA_LOC,"/Birth_Regression_Input_Data.Rds"))
|
||||||
write_csv(REG_REDUCED_DATA,paste0(SAVE_DATA_LOC,"/Birth_Regression_Input_Data.csv"))
|
write_csv(REG_REDUCED_DATA,paste0(SAVE_DATA_LOC,"/Birth_Regression_Input_Data.csv"))
|
||||||
|
|
||||||
|
##########Update existing demogrpahic data with new births
|
||||||
|
KEM_DATA_NEW <- REG_DATA %>% filter(Region=='Kemmerer & Diamondville') %>% select(Year,County,Region,Population,Births,Deaths,Migration)
|
||||||
|
PRED_KEM_BIRTH_2024 <- REG_DATA %>% filter(Region=='Kemmerer & Diamondville',Year>=2023) %>% mutate(Min_Birth_Group=lag(Min_Birth_Group)) %>% filter(Year==2024)
|
||||||
|
KEM_DATA_NEW[KEM_DATA_NEW$Year==2024,"Births"] <- round(exp(predict(MOD_BIRTHS,PRED_KEM_BIRTH_2024)))
|
||||||
|
|
||||||
|
OTHER_DATA_NEW <- REG_DATA %>% filter(Region=='Lincoln_Other') %>% select(Year,County,Region,Population,Births,Deaths,Migration)
|
||||||
|
PRED_OTHER_BIRTH_2024 <- REG_DATA %>% filter(Region=='Lincoln_Other',Year>=2023) %>% mutate(Min_Birth_Group=lag(Min_Birth_Group)) %>% filter(Year==2024)
|
||||||
|
OTHER_DATA_NEW[OTHER_DATA_NEW$Year==2024,"Births"] <- round(exp(predict(MOD_BIRTHS,PRED_OTHER_BIRTH_2024)))
|
||||||
|
|
||||||
|
LIN_DATA_NEW <- REG_DATA %>% filter(Region=='Lincoln') %>% select(Year,County,Region,Population,Births,Deaths,Migration)
|
||||||
|
#
|
||||||
|
if(!exists("POPULATION_SAVE_RDS")){POPULATION_SAVE_RDS <- "./Data/Cleaned_Data/Population_Data/RDS/"}
|
||||||
|
dir.create(POPULATION_SAVE_RDS, recursive = TRUE, showWarnings = FALSE)
|
||||||
|
|
||||||
|
if(!exists("POPULATION_SAVE_CSV")){POPULATION_SAVE_CSV <- "./Data/Cleaned_Data/Population_Data/CSV/"}
|
||||||
|
dir.create(POPULATION_SAVE_CSV, recursive = TRUE, showWarnings = FALSE)
|
||||||
|
saveRDS(LIN_DATA_NEW,paste0(POPULATION_SAVE_RDS,"Full_Lincoln_County_Population_Data.Rds"))
|
||||||
|
write_csv(LIN_DATA_NEW,paste0(POPULATION_SAVE_CSV,"Full_Lincoln_County_Population_Data.csv"))
|
||||||
|
saveRDS(KEM_DATA_NEW,paste0(POPULATION_SAVE_RDS,"Kemmerer_Diamondville_Population_Data.Rds"))
|
||||||
|
write_csv(KEM_DATA_NEW,paste0(POPULATION_SAVE_CSV,"Kemmerer_Diamondville_Population_Data.csv"))
|
||||||
|
saveRDS(OTHER_DATA_NEW,paste0(POPULATION_SAVE_RDS,"Other_Lincoln_Population_Data.Rds"))
|
||||||
|
write_csv(OTHER_DATA_NEW,paste0(POPULATION_SAVE_CSV,"Other_Lincoln_Population_Data.csv"))
|
||||||
|
|
||||||
|
|
||||||
50
Scripts/2B_Impart_Deaths_and_Migration_to_Subregions.r
Normal file
50
Scripts/2B_Impart_Deaths_and_Migration_to_Subregions.r
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
###May want to check rounding of demographic data as migration does not line up perfectly (off by about 9 on average)
|
||||||
|
library(tidyverse)
|
||||||
|
#setwd("../")
|
||||||
|
KEM_POP_LOC <- "Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds"
|
||||||
|
OTHER_POP_LOC <- "Data/Cleaned_Data/Population_Data/RDS/Other_Lincoln_Population_Data.Rds"
|
||||||
|
KEM_POP_LOC_CSV <- "Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.csv"
|
||||||
|
OTHER_POP_LOC_CSV <- "Data/Cleaned_Data/Population_Data/RDS/Other_Lincoln_Population_Data.csv"
|
||||||
|
|
||||||
|
MORT <- readRDS("Data/Cleaned_Data/Mortality_Rate_Data/RDS/Lincoln_County_Mortality_Rates.Rds")
|
||||||
|
KEM_DEM <-readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Kemmerer_Diamondville_Demographics.Rds")
|
||||||
|
OTHER_DEM <- readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Other_Lincoln_Demographics.Rds")
|
||||||
|
LIN_DEM <- readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Full_Lincoln_County_Demographics.Rds")
|
||||||
|
MORTALITY <- MORT
|
||||||
|
#Estimate split of deaths in county (share of Kemmerer vs other Lincoln)
|
||||||
|
PRED_DEATHS <- function(DATA,MORTALITY){
|
||||||
|
MORTALITY <- MORTALITY%>% pivot_wider(values_from=Death_Rate,names_from=Sex) %>% group_by(County,Min_Age,Max_Age) %>% summarize(Rate_Male=mean(Male,na.rm=TRUE),Rate_Female=mean(Female,na.rm=TRUE)) %>% ungroup
|
||||||
|
RES <- DATA[DATA$Age >= MORTALITY$Min_Age[1] & MORTALITY$Max_Age[1]>= DATA$Age,] %>% group_by(County,Region,Year,Age) %>% summarize(Num_Female=sum(Num_Female),Num_Male=sum(Num_Male)) %>% ungroup %>% mutate(Min_Age=MORTALITY$Min_Age[1],Max_Age= MORTALITY$Max_Age[1])
|
||||||
|
for(i in 2:nrow(MORTALITY)){
|
||||||
|
RES <- rbind(RES,DATA[DATA$Age >= MORTALITY$Min_Age[i] & MORTALITY$Max_Age[i]>= DATA$Age,] %>% group_by(County,Region,Year,Age) %>% summarize(Num_Female=sum(Num_Female),Num_Male=sum(Num_Male)) %>% ungroup %>% mutate(Min_Age=MORTALITY$Min_Age[i],Max_Age= MORTALITY$Max_Age[i]))
|
||||||
|
}
|
||||||
|
RES <- RES%>% arrange(Year) %>% left_join(MORTALITY)%>% group_by(County,Region,Year) %>% summarize(Predicted_Deaths=sum(Rate_Male*Num_Male)+sum(Rate_Female*Num_Female) ) %>% ungroup %>% select(County,Region,Year,Predicted_Deaths)
|
||||||
|
return(RES)
|
||||||
|
}
|
||||||
|
##Predict all deaths an merge into a table to find the ratios of predicted deaths
|
||||||
|
LIN_PRED <- PRED_DEATHS(LIN_DEM,MORT) %>% select(Year,Lin_Pred_Deaths=Predicted_Deaths)
|
||||||
|
KEM_PRED <- PRED_DEATHS(KEM_DEM,MORT) %>% select(Year,Kem_Pred_Deaths=Predicted_Deaths)
|
||||||
|
OTHER_PRED <- PRED_DEATHS(OTHER_DEM,MORT) %>% select(Year,Other_Pred_Deaths=Predicted_Deaths)
|
||||||
|
PRED_DATA<- LIN_PRED %>% left_join(KEM_PRED) %>% left_join(OTHER_PRED)
|
||||||
|
Death_Adj <- PRED_DATA %>% filter(!is.na(Kem_Pred_Deaths)) %>% mutate(Kem_Death_Ratio=Kem_Pred_Deaths/(Kem_Pred_Deaths+Other_Pred_Deaths),Other_Death_Ratio=1-Kem_Death_Ratio) %>% select(Year,Kem_Death_Ratio,Other_Death_Ratio)
|
||||||
|
#Add deaths to Lincoln were missing (lacking data)
|
||||||
|
LIN_POP <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Full_Lincoln_County_Population_Data.Rds")
|
||||||
|
LIN_POP <- LIN_POP %>% left_join(LIN_PRED) %>% mutate(Deaths=ifelse(is.na(Deaths) & !is.na(Lin_Pred_Deaths),Lin_Pred_Deaths,Deaths)) %>% select(-Lin_Pred_Deaths) %>% mutate(BD=Population+Births-Deaths) %>% mutate(Missing=Population-lag(BD),Missing=lead(Missing),Migration=ifelse(is.na(Migration),Missing,Migration)) %>% mutate(NEXT=Population+Births-Deaths+Migration) %>% arrange(desc(Year)) %>% select(colnames(LIN_POP))
|
||||||
|
###Estimate the number of deaths in Kemmerer based on the total deaths, and the predicted share of deaths
|
||||||
|
#Find migration based on the remaining missing population (after deaths,and births)
|
||||||
|
KEM_POP <- readRDS(KEM_POP_LOC)
|
||||||
|
KEM_DEATHS <- LIN_POP %>% select(Year,Deaths) %>% left_join(Death_Adj) %>% left_join(KEM_PRED) %>% mutate(Deaths=round(Deaths*Kem_Death_Ratio)) %>% select(Year,Deaths)
|
||||||
|
KEM_POP <- KEM_POP%>% select(-Deaths) %>% left_join(KEM_DEATHS) %>% mutate(BD=Population+Births-Deaths) %>% mutate(Missing=Population-lag(BD),Missing=lead(Missing),Migration=ifelse(is.na(Migration),Missing,Migration))%>% mutate(NEXT=Population+Births-Deaths+Migration)%>% arrange(desc(Year)) %>% select(colnames(LIN_POP))
|
||||||
|
|
||||||
|
###Estimate the number of deaths in the other parts of Lincoln (not Kemmerer) based on the total deaths, and the predicted share of deaths
|
||||||
|
#Find migration based on the remaining missing population (after deaths,and births)
|
||||||
|
OTHER_POP <- readRDS(OTHER_POP_LOC)
|
||||||
|
OTHER_DEATHS <- LIN_POP %>% select(Year,Deaths) %>% left_join(Death_Adj) %>% left_join(OTHER_PRED) %>% mutate(Deaths=round(Deaths*Other_Death_Ratio)) %>% select(Year,Deaths)
|
||||||
|
OTHER_POP <- OTHER_POP %>% select(-Deaths) %>% left_join(OTHER_DEATHS) %>% mutate(BD=Population+Births-Deaths) %>% mutate(Missing=Population-lag(BD),Missing=lead(Missing),Migration=ifelse(is.na(Migration),Missing,Migration))%>% arrange(desc(Year)) %>% mutate(NEXT=Population+Births-Deaths+Migration)%>% select(colnames(LIN_POP)) %>% arrange(desc(Year))
|
||||||
|
#Save outputs
|
||||||
|
saveRDS(KEM_POP,KEM_POP_LOC)
|
||||||
|
saveRDS(OTHER_POP,OTHER_POP_LOC)
|
||||||
|
write_csv(KEM_POP,KEM_POP_LOC_CSV)
|
||||||
|
write_csv(OTHER_POP,OTHER_POP_LOC_CSV)
|
||||||
|
|
||||||
|
|
||||||
@ -1,30 +0,0 @@
|
|||||||
library(forecast)
|
|
||||||
####Work on overall migration trends
|
|
||||||
#Could use code cleanup after trying things, but have but I have a working ARIMA to model Lincoln county migration
|
|
||||||
TS_DATA <- POP_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population))
|
|
||||||
ST_YEAR <- min(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
|
|
||||||
END_YEAR <- max(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
|
|
||||||
#GRAPH_DATA <- TS_DATA %>% filter(!is.na(Migration))
|
|
||||||
#GRAPH_DATA_LN <- TS_DATA %>% filter(!is.na(Migration),County=="Lincoln")
|
|
||||||
#ggplot(GRAPH_DATA,aes(x=Year,y=Migration/Prev_Pop,group=County,color=County))+geom_point()+geom_line(data=GRAPH_DATA_LN)
|
|
||||||
TS_WIDE <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>ST_YEAR+1,Year<=END_YEAR) %>%ts(start=c(ST_YEAR+1),frequency=1)
|
|
||||||
LN <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% dplyr::select(Lincoln,Year) %>% filter(Year>=ST_YEAR,Year<=END_YEAR) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR),frequency=1)
|
|
||||||
#Create an ARIMA of Migration so the number of people migrating can be simulated
|
|
||||||
#Time series tests
|
|
||||||
#library(tseries)
|
|
||||||
#adf.test(LN,k=1) #Stationary with one lag, otherwise not stationary
|
|
||||||
#kpss.test(LN) #Stationary,default of program and has some model fit improvements
|
|
||||||
MOD <- auto.arima(LN,stationary=TRUE)
|
|
||||||
#summary(MOD)
|
|
||||||
#Validity tests
|
|
||||||
#autoplot(MOD)
|
|
||||||
#acf(resid(MOD))
|
|
||||||
#pacf(resid(MOD))
|
|
||||||
# adf.test(resid(MOD))
|
|
||||||
#checkresiduals(MOD)
|
|
||||||
#Save the resulting model outputs, will need to be changed if looking at other counties
|
|
||||||
saveRDS(MOD,"Data/Regression_Results/LN_ARIMA_MODEL.Rds")
|
|
||||||
|
|
||||||
MIGRATION_ARIMA_SIMS <- (do.call(cbind,mclapply(1:NUM_SIMULATIONS,function(x){as.numeric(round(simulate(MOD,future=TRUE, nsim=NUM_YEARS_PROJECTED)))},mc.cores =detectCores()-1)))#testing a multiple run simulation could use parallel process
|
|
||||||
saveRDS(MIGRATION_ARIMA_SIMS,"Data/Simulated_Data_Sets/Migration_ARIMA.Rds")
|
|
||||||
write.csv(MIGRATION_ARIMA_SIMS,row.names=FALSE,"Data/Simulated_Data_Sets/Migration_ARIMA.csv")
|
|
||||||
72
Scripts/2D_Overall_Migration_ARIMA.r
Normal file
72
Scripts/2D_Overall_Migration_ARIMA.r
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
library(forecast)
|
||||||
|
library(tidyverse)
|
||||||
|
#setwd("../")
|
||||||
|
####Work on overall migration trends
|
||||||
|
#Could use code cleanup after trying things, but have but I have a working ARIMA to model Lincoln county migration
|
||||||
|
POP_DATA <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Full_Lincoln_County_Population_Data.Rds") %>% mutate(Migration=Migration/Population)
|
||||||
|
POP_DATA_TEST <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Full_Lincoln_County_Population_Data.Rds") %>% mutate(Migration=Migration/Population)
|
||||||
|
|
||||||
|
POP_KEM_DATA <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds")
|
||||||
|
POP_OTHER_DATA <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Other_Lincoln_Population_Data.Rds")
|
||||||
|
hist(POP_OTHER_DATA$Migration/POP_OTHER_DATA$Population)
|
||||||
|
hist(POP_KEM_DATA$Migration/POP_KEM_DATA$Population)
|
||||||
|
|
||||||
|
|
||||||
|
TS_DATA <- POP_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
|
||||||
|
TS_DATA_TEST <- POP_DATA_TEST %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
|
||||||
|
|
||||||
|
TS_KEM_DATA <- POP_KEM_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
|
||||||
|
TS_OTHER_DATA <- POP_OTHER_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population)) %>% ungroup
|
||||||
|
|
||||||
|
|
||||||
|
ST_YEAR <- min(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
|
||||||
|
END_YEAR <- max(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
|
||||||
|
|
||||||
|
ST_YEAR_KEM <- min(pull(TS_KEM_DATA %>% filter(!is.na(Migration)),Year))
|
||||||
|
END_YEAR_KEM <- max(pull(TS_KEM_DATA %>% filter(!is.na(Migration)),Year))
|
||||||
|
|
||||||
|
ST_YEAR_OTHER <- min(pull(TS_OTHER_DATA %>% filter(!is.na(Migration)),Year))
|
||||||
|
END_YEAR_OTHER <- max(pull(TS_OTHER_DATA %>% filter(!is.na(Migration)),Year))
|
||||||
|
|
||||||
|
|
||||||
|
#GRAPH_DATA <- TS_DATA %>% filter(!is.na(Migration))
|
||||||
|
#GRAPH_DATA_LN <- TS_DATA %>% filter(!is.na(Migration),County=="Lincoln")
|
||||||
|
#ggplot(GRAPH_DATA,aes(x=Year,y=Migration/Prev_Pop,group=County,color=County))+geom_point()+geom_line(data=GRAPH_DATA_LN)
|
||||||
|
TS_WIDE <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>ST_YEAR+1,Year<=END_YEAR) %>%ts(start=c(ST_YEAR+1),frequency=1)
|
||||||
|
|
||||||
|
TS_KEM_WIDE <- TS_KEM_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>ST_YEAR+1,Year<=END_YEAR) %>%ts(start=c(ST_YEAR+1),frequency=1)
|
||||||
|
TS_OTHER_WIDE <- TS_OTHER_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>ST_YEAR+1,Year<=END_YEAR) %>%ts(start=c(ST_YEAR+1),frequency=1)
|
||||||
|
|
||||||
|
|
||||||
|
LN <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% dplyr::select(Lincoln,Year) %>% filter(Year>=ST_YEAR,Year<=END_YEAR) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR),frequency=1)
|
||||||
|
|
||||||
|
KEM <- TS_KEM_DATA %>% dplyr::select(Year,Region,Migration) %>% pivot_wider(values_from=Migration,names_from=Region) %>% arrange(Year) %>% dplyr::select('Kemmerer & Diamondville',Year) %>% filter(Year>=ST_YEAR_KEM,Year<=END_YEAR_KEM) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR_KEM),frequency=1)
|
||||||
|
TS_OTHER_DATA
|
||||||
|
OTHER <- TS_OTHER_DATA %>% dplyr::select(Year,Region,Migration) %>% pivot_wider(values_from=Migration,names_from=Region) %>% arrange(Year) %>% dplyr::select('Lincoln Other'=Lincoln_Other,Year) %>% filter(Year>=ST_YEAR_OTHER,Year<=END_YEAR_OTHER) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR_OTHER),frequency=1)
|
||||||
|
|
||||||
|
#Create an ARIMA of Migration so the number of people migrating can be simulated
|
||||||
|
#Time series tests
|
||||||
|
#library(tseries)
|
||||||
|
#adf.test(LN,k=1) #Stationary with one lag, otherwise not stationary
|
||||||
|
#kpss.test(LN) #Stationary,default of program and has some model fit improvements
|
||||||
|
MOD <- auto.arima(LN,stationary=TRUE)
|
||||||
|
MOD_KEM <- auto.arima(KEM)
|
||||||
|
MOD_OTHER <- auto.arima(OTHER)
|
||||||
|
plot(forecast(MOD ))
|
||||||
|
plot(forecast(MOD_KEM ))
|
||||||
|
plot(forecast(MOD_OTHER ))
|
||||||
|
plot(forecast(MOD,abs(KEM))
|
||||||
|
|
||||||
|
#summary(MOD)
|
||||||
|
#Validity tests
|
||||||
|
#autoplot(MOD)
|
||||||
|
#acf(resid(MOD))
|
||||||
|
#pacf(resid(MOD))
|
||||||
|
# adf.test(resid(MOD))
|
||||||
|
#checkresiduals(MOD)
|
||||||
|
#Save the resulting model outputs, will need to be changed if looking at other counties
|
||||||
|
#saveRDS(MOD,"Data/Regression_Results/LN_ARIMA_MODEL.Rds")
|
||||||
|
|
||||||
|
MIGRATION_ARIMA_SIMS <- (do.call(cbind,mclapply(1:NUM_SIMULATIONS,function(x){as.numeric(round(simulate(MOD,future=TRUE, nsim=NUM_YEARS_PROJECTED)))},mc.cores =detectCores()-1)))#testing a multiple run simulation could use parallel process
|
||||||
|
saveRDS(MIGRATION_ARIMA_SIMS,"Data/Simulated_Data_Sets/Migration_ARIMA.Rds")
|
||||||
|
write.csv(MIGRATION_ARIMA_SIMS,row.names=FALSE,"Data/Simulated_Data_Sets/Migration_ARIMA.csv")
|
||||||
Loading…
x
Reference in New Issue
Block a user