###May want to check rounding of demographic data as migration does not line up perfectly (off by about 9 on average)
library(tidyverse)
#setwd("../")
KEM_POP_LOC <- "Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds"
OTHER_POP_LOC <- "Data/Cleaned_Data/Population_Data/RDS/Other_Lincoln_Population_Data.Rds"
KEM_POP_LOC_CSV <- "Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.csv"
OTHER_POP_LOC_CSV <- "Data/Cleaned_Data/Population_Data/RDS/Other_Lincoln_Population_Data.csv"

MORT  <- readRDS("Data/Cleaned_Data/Mortality_Rate_Data/RDS/Lincoln_County_Mortality_Rates.Rds")
KEM_DEM <-readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Kemmerer_Diamondville_Demographics.Rds")
OTHER_DEM <- readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Other_Lincoln_Demographics.Rds")
LIN_DEM <- readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Full_Lincoln_County_Demographics.Rds")
MORTALITY <- MORT
#Estimate split of deaths in county (share of Kemmerer vs other Lincoln)
PRED_DEATHS <- function(DATA,MORTALITY){
	MORTALITY <- MORTALITY%>% pivot_wider(values_from=Death_Rate,names_from=Sex) %>% group_by(County,Min_Age,Max_Age) %>% summarize(Rate_Male=mean(Male,na.rm=TRUE),Rate_Female=mean(Female,na.rm=TRUE)) %>% ungroup
	RES <- DATA[DATA$Age >= MORTALITY$Min_Age[1] & MORTALITY$Max_Age[1]>= DATA$Age,] %>%  group_by(County,Region,Year,Age) %>% summarize(Num_Female=sum(Num_Female),Num_Male=sum(Num_Male)) %>% ungroup %>% mutate(Min_Age=MORTALITY$Min_Age[1],Max_Age= MORTALITY$Max_Age[1])
	for(i in 2:nrow(MORTALITY)){
		RES <- rbind(RES,DATA[DATA$Age >= MORTALITY$Min_Age[i] & MORTALITY$Max_Age[i]>= DATA$Age,] %>%  group_by(County,Region,Year,Age) %>% summarize(Num_Female=sum(Num_Female),Num_Male=sum(Num_Male)) %>% ungroup %>% mutate(Min_Age=MORTALITY$Min_Age[i],Max_Age= MORTALITY$Max_Age[i]))
	}
RES <- RES%>% arrange(Year) %>% left_join(MORTALITY)%>% group_by(County,Region,Year) %>% summarize(Predicted_Deaths=sum(Rate_Male*Num_Male)+sum(Rate_Female*Num_Female) ) %>% ungroup %>% select(County,Region,Year,Predicted_Deaths)
	return(RES)
}
##Predict all deaths an merge into a table to find the ratios of predicted deaths
LIN_PRED <- PRED_DEATHS(LIN_DEM,MORT) %>% select(Year,Lin_Pred_Deaths=Predicted_Deaths)
KEM_PRED <- PRED_DEATHS(KEM_DEM,MORT) %>% select(Year,Kem_Pred_Deaths=Predicted_Deaths)
OTHER_PRED <- PRED_DEATHS(OTHER_DEM,MORT) %>% select(Year,Other_Pred_Deaths=Predicted_Deaths)
PRED_DATA<- LIN_PRED %>% left_join(KEM_PRED) %>% left_join(OTHER_PRED)
Death_Adj <- PRED_DATA %>% filter(!is.na(Kem_Pred_Deaths)) %>% mutate(Kem_Death_Ratio=Kem_Pred_Deaths/(Kem_Pred_Deaths+Other_Pred_Deaths),Other_Death_Ratio=1-Kem_Death_Ratio) %>% select(Year,Kem_Death_Ratio,Other_Death_Ratio)
#Add deaths to Lincoln were missing (lacking data)
LIN_POP <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Full_Lincoln_County_Population_Data.Rds") 
LIN_POP <- LIN_POP  %>% left_join(LIN_PRED)  %>% mutate(Deaths=ifelse(is.na(Deaths)  & !is.na(Lin_Pred_Deaths),Lin_Pred_Deaths,Deaths))  %>% select(-Lin_Pred_Deaths)  %>% mutate(BD=Population+Births-Deaths) %>% mutate(Missing=Population-lag(BD),Missing=lead(Missing),Migration=ifelse(is.na(Migration),Missing,Migration)) %>% mutate(NEXT=Population+Births-Deaths+Migration) %>% arrange(desc(Year)) %>% select(colnames(LIN_POP))
###Estimate the number of deaths in Kemmerer based on the total deaths, and the predicted share of deaths
	#Find migration based on the remaining missing population (after deaths,and births)
KEM_POP <- readRDS(KEM_POP_LOC) 
KEM_DEATHS <- LIN_POP %>% select(Year,Deaths) %>% left_join(Death_Adj) %>% left_join(KEM_PRED) %>% mutate(Deaths=round(Deaths*Kem_Death_Ratio)) %>% select(Year,Deaths)
KEM_POP <- KEM_POP%>% select(-Deaths) %>% left_join(KEM_DEATHS) %>%  mutate(BD=Population+Births-Deaths) %>% mutate(Missing=Population-lag(BD),Missing=lead(Missing),Migration=ifelse(is.na(Migration),Missing,Migration))%>% mutate(NEXT=Population+Births-Deaths+Migration)%>% arrange(desc(Year)) %>% select(colnames(LIN_POP))

###Estimate the number of deaths in the other parts of Lincoln (not Kemmerer) based on the total deaths, and the predicted share of deaths
	#Find migration based on the remaining missing population (after deaths,and births)
OTHER_POP <- readRDS(OTHER_POP_LOC) 
OTHER_DEATHS <- LIN_POP %>% select(Year,Deaths) %>% left_join(Death_Adj) %>% left_join(OTHER_PRED) %>% mutate(Deaths=round(Deaths*Other_Death_Ratio)) %>% select(Year,Deaths)
OTHER_POP <- OTHER_POP %>% select(-Deaths) %>% left_join(OTHER_DEATHS) %>%  mutate(BD=Population+Births-Deaths) %>% mutate(Missing=Population-lag(BD),Missing=lead(Missing),Migration=ifelse(is.na(Migration),Missing,Migration))%>% arrange(desc(Year)) %>% mutate(NEXT=Population+Births-Deaths+Migration)%>% select(colnames(LIN_POP))  %>% arrange(desc(Year)) 
#Save outputs
saveRDS(KEM_POP,KEM_POP_LOC)
saveRDS(OTHER_POP,OTHER_POP_LOC)
write_csv(KEM_POP,KEM_POP_LOC_CSV)
write_csv(OTHER_POP,OTHER_POP_LOC_CSV)