53 lines
5.1 KiB
R
53 lines
5.1 KiB
R
###May want to check rounding of demographic data as migration does not line up perfectly (off by about 9 on average)
|
|
library(tidyverse)
|
|
#setwd("../")
|
|
KEM_POP_LOC <- "Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds"
|
|
OTHER_POP_LOC <- "Data/Cleaned_Data/Population_Data/RDS/Other_Lincoln_Population_Data.Rds"
|
|
KEM_POP_LOC_CSV <- "Data/Cleaned_Data/Population_Data/CSV/Kemmerer_Diamondville_Population_Data.csv"
|
|
OTHER_POP_LOC_CSV <- "Data/Cleaned_Data/Population_Data/CSV/Other_Lincoln_Population_Data.csv"
|
|
|
|
MORT <- readRDS("Data/Cleaned_Data/Mortality_Rate_Data/RDS/Lincoln_County_Mortality_Rates.Rds")
|
|
KEM_DEM <-readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Kemmerer_Diamondville_Demographics.Rds")
|
|
OTHER_DEM <- readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Other_Lincoln_Demographics.Rds")
|
|
LIN_DEM <- readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Full_Lincoln_County_Demographics.Rds")
|
|
MORTALITY <- MORT
|
|
#Estimate split of deaths in county (share of Kemmerer vs other Lincoln)
|
|
PRED_DEATHS <- function(DATA,MORTALITY){
|
|
MORTALITY <- MORTALITY%>% pivot_wider(values_from=Death_Rate,names_from=Sex) %>% group_by(County,Min_Age,Max_Age) %>% summarize(Rate_Male=mean(Male,na.rm=TRUE),Rate_Female=mean(Female,na.rm=TRUE)) %>% ungroup
|
|
RES <- DATA[DATA$Age >= MORTALITY$Min_Age[1] & MORTALITY$Max_Age[1]>= DATA$Age,] %>% group_by(County,Region,Year,Age) %>% summarize(Num_Female=sum(Num_Female),Num_Male=sum(Num_Male)) %>% ungroup %>% mutate(Min_Age=MORTALITY$Min_Age[1],Max_Age= MORTALITY$Max_Age[1])
|
|
for(i in 2:nrow(MORTALITY)){ RES <- rbind(RES,DATA[DATA$Age >= MORTALITY$Min_Age[i] & MORTALITY$Max_Age[i]>= DATA$Age,] %>% group_by(County,Region,Year,Age) %>% summarize(Num_Female=sum(Num_Female),Num_Male=sum(Num_Male)) %>% ungroup %>% mutate(Min_Age=MORTALITY$Min_Age[i],Max_Age= MORTALITY$Max_Age[i]))
|
|
}
|
|
RES <- RES%>% arrange(Year) %>% left_join(MORTALITY)%>% group_by(County,Region,Year) %>% summarize(Predicted_Deaths=sum(Rate_Male*Num_Male)+sum(Rate_Female*Num_Female) ) %>% ungroup %>% select(County,Region,Year,Predicted_Deaths)
|
|
return(RES)
|
|
}
|
|
##Predict all deaths an merge into a table to find the ratios of predicted deaths
|
|
LIN_PRED <- PRED_DEATHS(LIN_DEM,MORT) %>% select(Year,Lin_Pred_Deaths=Predicted_Deaths)
|
|
KEM_PRED <- PRED_DEATHS(KEM_DEM,MORT) %>% select(Year,Kem_Pred_Deaths=Predicted_Deaths)
|
|
OTHER_PRED <- PRED_DEATHS(OTHER_DEM,MORT) %>% select(Year,Other_Pred_Deaths=Predicted_Deaths)
|
|
PRED_DATA<- LIN_PRED %>% left_join(KEM_PRED) %>% left_join(OTHER_PRED)
|
|
Death_Adj <- PRED_DATA %>% filter(!is.na(Kem_Pred_Deaths)) %>% mutate(Kem_Death_Ratio=Kem_Pred_Deaths/(Kem_Pred_Deaths+Other_Pred_Deaths),Other_Death_Ratio=1-Kem_Death_Ratio) %>% select(Year,Kem_Death_Ratio,Other_Death_Ratio)
|
|
#Add deaths to Lincoln were missing (lacking data)
|
|
LIN_POP <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Full_Lincoln_County_Population_Data.Rds")
|
|
LIN_POP <- LIN_POP %>% left_join(LIN_PRED) %>% mutate(Deaths=ifelse(is.na(Deaths) & !is.na(Lin_Pred_Deaths),Lin_Pred_Deaths,Deaths)) %>% select(-Lin_Pred_Deaths) %>% mutate(BD=Population+Births-Deaths) %>% mutate(Missing=Population-lag(BD),Missing=lead(Missing),Migration=ifelse(is.na(Migration),Missing,Migration)) %>% mutate(NEXT=Population+Births-Deaths+Migration) %>% arrange(desc(Year)) %>% select(colnames(LIN_POP))
|
|
###Estimate the number of deaths in Kemmerer based on the total deaths, and the predicted share of deaths
|
|
#Find migration based on the remaining missing population (after deaths,and births)
|
|
KEM_POP <- readRDS(KEM_POP_LOC)
|
|
KEM_DEATHS <- LIN_POP %>% select(Year,Deaths) %>% left_join(Death_Adj) %>% left_join(KEM_PRED) %>% mutate(Deaths=round(Deaths*Kem_Death_Ratio)) %>% select(Year,Deaths)
|
|
KEM_POP <- KEM_POP%>% select(-Deaths) %>% left_join(KEM_DEATHS) %>% mutate(BD=Population+Births-Deaths) %>% mutate(Missing=Population-lag(BD),Missing=lead(Missing),Migration=ifelse(is.na(Migration),Missing,Migration))%>% mutate(NEXT=Population+Births-Deaths+Migration)%>% arrange(desc(Year)) %>% select(colnames(LIN_POP))
|
|
#Round up any values to match the fact that only whole people can exists
|
|
KEM_POP$Population <- round(KEM_POP$Population)
|
|
|
|
###Estimate the number of deaths in the other parts of Lincoln (not Kemmerer) based on the total deaths, and the predicted share of deaths
|
|
#Find migration based on the remaining missing population (after deaths,and births)
|
|
OTHER_POP <- readRDS(OTHER_POP_LOC)
|
|
OTHER_DEATHS <- LIN_POP %>% select(Year,Deaths) %>% left_join(Death_Adj) %>% left_join(OTHER_PRED) %>% mutate(Deaths=round(Deaths*Other_Death_Ratio)) %>% select(Year,Deaths)
|
|
OTHER_POP <- OTHER_POP %>% select(-Deaths) %>% left_join(OTHER_DEATHS) %>% mutate(BD=Population+Births-Deaths) %>% mutate(Missing=Population-lag(BD),Missing=lead(Missing),Migration=ifelse(is.na(Migration),Missing,Migration))%>% arrange(desc(Year)) %>% mutate(NEXT=Population+Births-Deaths+Migration)%>% select(colnames(LIN_POP)) %>% arrange(desc(Year))
|
|
OTHER_POP$Population <- round(OTHER_POP$Population)
|
|
#Save outputs
|
|
saveRDS(KEM_POP,KEM_POP_LOC)
|
|
saveRDS(OTHER_POP,OTHER_POP_LOC)
|
|
write_csv(KEM_POP,KEM_POP_LOC_CSV)
|
|
write_csv(OTHER_POP,OTHER_POP_LOC_CSV)
|
|
|
|
|