#########################################Mortality Rate
library(tidyverse)
###Create Location to Save raw data sets
	if(!exists("SAVE_LOC_RAW")){SAVE_LOC_RAW <-"./Data/Raw_Data/"}
	dir.create(SAVE_LOC_RAW, recursive = TRUE, showWarnings = FALSE)
GET_MORTALITY_DATA <- function(FILE,SEX,LOWER_AGE,UPPER_AGE){
	#Create clean mortality rate data
	#Data gathered from https://hdpulse.nimhd.nih.gov/data-portal/mortality/table?cod=247&cod_options=cod_15&ratetype=aa&ratetype_options=ratetype_2&race=00&race_options=race_6&sex=2&sex_options=sex_3&age=177&age_options=age_11&ruralurban=0&ruralurban_options=ruralurban_3&yeargroup=5&yeargroup_options=year5yearmort_1&statefips=56&statefips_options=area_states&county=56000&county_options=counties_wyoming&comparison=counties_to_us&comparison_options=comparison_counties&radio_comparison=areas&radio_comparison_options=cods_or_areas

	NAMES <- c("County","FIPS","Death_Rate","Lower_Rate","Upper_Rate","Deaths","Trend_Category","Trend","Lower_Trend","Upper_Trend")
	DF <- read_csv(FILE,skip=5,col_names=NAMES,col_types=list('c',"i",'d','d','d','d','c','d','d','d')) %>% filter(grepl("County|Wyoming",County)|County=="United States") %>% mutate(Rate_SD=(Upper_Rate-Lower_Rate)/(2*1.96),Trend_SD=(Upper_Trend-Lower_Trend)/(2*1.96)) %>% select(County,Death_Rate,Rate_SD,Trend,Trend_SD)
	DF$County <- gsub(" County","",DF$County,ignore.case=TRUE)
	DF[,-1] <- DF[,-1]/100000
	WYOMING_TREND <- pull(DF[DF$County=="Wyoming",],"Trend")
	US_TREND <- pull(DF[DF$County=="United States",],"Trend")

	WYOMING_RATE <- pull(DF[DF$County=="Wyoming",],"Death_Rate")
	US_RATE <- pull(DF[DF$County=="United States",],"Death_Rate")

	DF$Imparted_Trend <- FALSE 
	if(is.na(WYOMING_TREND)){
		DF[1,4:5] <- DF[2,4:5]
		DF[1,6] <- TRUE
	}

	DF$Imparted_Rate <- FALSE 
	if(is.na(WYOMING_RATE)){
		DF[1,4:5] <- DF[2,2:3]
		DF[1,6] <- TRUE
	}
	WYOMING_BASELINE_TREND <-cbind (DF[1,4:5] ,TRUE)
	WYOMING_BASELINE_RATE <-DF[1,2:3]

	for(i in 3:nrow(DF)){
		#Impart any missing trends based on higher levels
		if(is.na(pull(DF[i,],"Trend"))){ DF[i,4:6] <- WYOMING_BASELINE_TREND}
		#Impart any missing death rates based on higher levels
		if(is.na(pull(DF[i,],"Death_Rate"))){
			DF[i,2:3] <- WYOMING_BASELINE_RATE
			DF[i,"Imparted_Rate"] <- TRUE
		}
	}
	DF$Sex <- SEX
	DF$Min_Age <- LOWER_AGE 
	DF$Max_Age  <- UPPER_AGE
	DF <- DF %>% select(County,Sex,Min_Age,Max_Age,Death_Rate,Rate_SD,Imparted_Rate,everything())
	return(DF)
}
SAVE_LOC_RAW_MORT <- 	paste0(SAVE_LOC_RAW,"Mortality_Rates/")
RAW_MORTALITY_LOC_FEMALE <-	paste0(SAVE_LOC_RAW_MORT,"Female/")
RAW_MORTALITY_LOC_MALE <-	paste0(SAVE_LOC_RAW_MORT,"Male/")
#Gather all mortality Records
MORTALITY_DATA_ALL <- rbind(
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"A_Under1.csv"),"Female",0,0),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"B_1_9.csv"),"Female",1,9),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"C_10_19.csv"),"Female",10,19),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"D_20_39.csv"),"Female",20,39),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"E_40_64.csv"),"Female",40,64),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"G_65_74.csv"),"Female",65,74),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"H_75_84.csv"),"Female",75,84),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"I_85+.csv"),"Female",85,Inf),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"A_Under1.csv"),"Male",0,0),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"B_1_9.csv"),"Male",1,9),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"C_10_19.csv"),"Male",10,19),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"D_20_39.csv"),"Male",20,39),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"E_40_64.csv"),"Male",40,64),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"G_65_74.csv"),"Male",65,74),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"H_75_84.csv"),"Male",75,84),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"I_85+.csv"),"Male",85,Inf)
)
LIN_MORTALITY <- MORTALITY_DATA_ALL  %>% filter(County=="Lincoln")
##Save the mortality data
	if(!exists("SAVE_MORT_LOC")){SAVE_MORT_LOC <-"./Data/Cleaned_Data/Mortality_Rate_Data"}
	CSV_SAVE <- paste0(SAVE_MORT_LOC ,"/CSV")
	RDS_SAVE <- paste0(SAVE_MORT_LOC,"/RDS")
	#Save files for all county demographics 
	dir.create(CSV_SAVE  , recursive = TRUE, showWarnings = FALSE)
	dir.create(RDS_SAVE , recursive = TRUE, showWarnings = FALSE)
	saveRDS(MORTALITY_DATA_ALL,paste0(RDS_SAVE,"/All_Wyoming_Counties_Mortality_Rates.Rds" ))
	write_csv(MORTALITY_DATA_ALL,paste0(CSV_SAVE,"/All_Wyoming_Counties_Mortality_Rates.csv" ))
	saveRDS(LIN_MORTALITY,paste0(RDS_SAVE,"/Lincoln_County_Mortality_Rates.Rds" ))
	write_csv(LIN_MORTALITY,paste0(CSV_SAVE,"/Lincoln_County_Mortality_Rates.csv" ))
#Create a short readme files to make the data sources more clear
	#Save a raw data readme
sink(file=paste0(SAVE_LOC_RAW,"/README_MORTALITY_DATA.txt"),append=FALSE)
	cat("Data files gathered manually from:\n")
	cat("https://hdpulse.nimhd.nih.gov/data-portal/mortality/table?cod=247&cod_options=cod_15&ratetype=aa&ratetype_options=ratetype_2&race=00&race_options=race_6&sex=2&sex_options=sex_3&age=177&age_options=age_11&ruralurban=0&ruralurban_options=ruralurban_3&yeargroup=5&yeargroup_options=year5yearmort_1&statefips=56&statefips_options=area_states&county=56000&county_options=counties_wyoming&comparison=counties_to_us&comparison_options=comparison_counties&radio_comparison=areas&radio_comparison_options=cods_or_areas\n")
	cat("\nEach file is single age group, so age weighting does not apply despite the variable names\n")
sink()


	#Save a processed raw data readme
sink(file=paste0(SAVE_MORT_LOC,"/README_MORTALITY_DATA.txt"),append=FALSE)
	cat("This is a processed file of NIH death rates by age and county. Data files first gathered manually from:\n")
	cat("\nhttps://hdpulse.nimhd.nih.gov/data-portal/mortality/table?cod=247&cod_options=cod_15&ratetype=aa&ratetype_options=ratetype_2&race=00&race_options=race_6&sex=2&sex_options=sex_3&age=177&age_options=age_11&ruralurban=0&ruralurban_options=ruralurban_3&yeargroup=5&yeargroup_options=year5yearmort_1&statefips=56&statefips_options=area_states&county=56000&county_options=counties_wyoming&comparison=counties_to_us&comparison_options=comparison_counties&radio_comparison=areas&radio_comparison_options=cods_or_areas\n")
	cat("\nThese manually saved files are in the raw data directory. Each file is single age group, so age weighting does not apply despite the variable names\n")
sink()