Population_Study/Scripts/1B_Process_Existing_NIH_Mortality_Data.r

#########################################Mortality Rate
library(tidyverse)
###Create Location to Save raw data sets
	if(!exists("SAVE_LOC_RAW")){SAVE_LOC_RAW <-"./Data/Raw_Data/"}
	dir.create(SAVE_LOC_RAW, recursive = TRUE, showWarnings = FALSE)
GET_MORTALITY_DATA <- function(FILE,SEX,LOWER_AGE,UPPER_AGE){
	#Create clean mortality rate data
	#Data gathered from https://hdpulse.nimhd.nih.gov/data-portal/mortality/table?cod=247&cod_options=cod_15&ratetype=aa&ratetype_options=ratetype_2&race=00&race_options=race_6&sex=2&sex_options=sex_3&age=177&age_options=age_11&ruralurban=0&ruralurban_options=ruralurban_3&yeargroup=5&yeargroup_options=year5yearmort_1&statefips=56&statefips_options=area_states&county=56000&county_options=counties_wyoming&comparison=counties_to_us&comparison_options=comparison_counties&radio_comparison=areas&radio_comparison_options=cods_or_areas

	NAMES <- c("County","FIPS","Death_Rate","Lower_Rate","Upper_Rate","Deaths","Trend_Category","Trend","Lower_Trend","Upper_Trend")
	DF <- read_csv(FILE,skip=5,col_names=NAMES,col_types=list('c',"i",'d','d','d','d','c','d','d','d')) %>% filter(grepl("County|Wyoming",County)|County=="United States") %>% mutate(Rate_SD=(Upper_Rate-Lower_Rate)/(2*1.96),Trend_SD=(Upper_Trend-Lower_Trend)/(2*1.96)) %>% select(County,Death_Rate,Rate_SD,Trend,Trend_SD)
	DF$County <- gsub(" County","",DF$County,ignore.case=TRUE)
	DF[,-1] <- DF[,-1]/100000
	WYOMING_TREND <- pull(DF[DF$County=="Wyoming",],"Trend")
	US_TREND <- pull(DF[DF$County=="United States",],"Trend")

	WYOMING_RATE <- pull(DF[DF$County=="Wyoming",],"Death_Rate")
	US_RATE <- pull(DF[DF$County=="United States",],"Death_Rate")

	DF$Imparted_Trend <- FALSE
	if(is.na(WYOMING_TREND)){
		DF[1,4:5] <- DF[2,4:5]
		DF[1,6] <- TRUE
	}

	DF$Imparted_Rate <- FALSE
	if(is.na(WYOMING_RATE)){
		DF[1,4:5] <- DF[2,2:3]
		DF[1,6] <- TRUE
	}
	WYOMING_BASELINE_TREND <-cbind (DF[1,4:5] ,TRUE)
	WYOMING_BASELINE_RATE <-DF[1,2:3]

	for(i in 3:nrow(DF)){
		#Impart any missing trends based on higher levels
		if(is.na(pull(DF[i,],"Trend"))){ DF[i,4:6] <- WYOMING_BASELINE_TREND}
		#Impart any missing death rates based on higher levels
		if(is.na(pull(DF[i,],"Death_Rate"))){
			DF[i,2:3] <- WYOMING_BASELINE_RATE
			DF[i,"Imparted_Rate"] <- TRUE
		}
	}
	DF$Sex <- SEX
	DF$Min_Age <- LOWER_AGE
	DF$Max_Age  <- UPPER_AGE
	DF <- DF %>% select(County,Sex,Min_Age,Max_Age,Death_Rate,Rate_SD,Imparted_Rate,everything())
	return(DF)
}
SAVE_LOC_RAW_MORT <- 	paste0(SAVE_LOC_RAW,"Mortality_Rates/")
RAW_MORTALITY_LOC_FEMALE <-	paste0(SAVE_LOC_RAW_MORT,"Female/")
RAW_MORTALITY_LOC_MALE <-	paste0(SAVE_LOC_RAW_MORT,"Male/")
#Gather all mortality Records
MORTALITY_DATA_ALL <- rbind(
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"A_Under1.csv"),"Female",0,0),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"B_1_9.csv"),"Female",1,9),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"C_10_19.csv"),"Female",10,19),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"D_20_39.csv"),"Female",20,39),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"E_40_64.csv"),"Female",40,64),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"G_65_74.csv"),"Female",65,74),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"H_75_84.csv"),"Female",75,84),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"I_85+.csv"),"Female",85,Inf),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"A_Under1.csv"),"Male",0,0),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"B_1_9.csv"),"Male",1,9),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"C_10_19.csv"),"Male",10,19),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"D_20_39.csv"),"Male",20,39),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"E_40_64.csv"),"Male",40,64),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"G_65_74.csv"),"Male",65,74),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"H_75_84.csv"),"Male",75,84),
	GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"I_85+.csv"),"Male",85,Inf)
)
LIN_MORTALITY <- MORTALITY_DATA_ALL  %>% filter(County=="Lincoln")
##Save the mortality data
	if(!exists("SAVE_MORT_LOC")){SAVE_MORT_LOC <-"./Data/Cleaned_Data/Mortality_Rate_Data"}
	CSV_SAVE <- paste0(SAVE_MORT_LOC ,"/CSV")
	RDS_SAVE <- paste0(SAVE_MORT_LOC,"/RDS")
	#Save files for all county demographics
	dir.create(CSV_SAVE  , recursive = TRUE, showWarnings = FALSE)
	dir.create(RDS_SAVE , recursive = TRUE, showWarnings = FALSE)
	saveRDS(MORTALITY_DATA_ALL,paste0(RDS_SAVE,"/All_Wyoming_Counties_Mortality_Rates.Rds" ))
	write_csv(MORTALITY_DATA_ALL,paste0(CSV_SAVE,"/All_Wyoming_Counties_Mortality_Rates.csv" ))
	saveRDS(LIN_MORTALITY,paste0(RDS_SAVE,"/Lincoln_County_Mortality_Rates.Rds" ))
	write_csv(LIN_MORTALITY,paste0(CSV_SAVE,"/Lincoln_County_Mortality_Rates.csv" ))
#Create a short readme files to make the data sources more clear
	#Save a raw data readme
sink(file=paste0(SAVE_LOC_RAW_MORT ,"/README_MORTALITY_DATA.txt"),append=FALSE)
cat("Data files gathered manually from:

National Institute of Health HDPules: An Ecosystem of Health Disparities and Minority Health Resources at
https://hdpulse.nimhd.nih.gov/data-portal/mortality/table?cod=247&cod_options=cod_15&ratetype=aa&ratetype_options=ratetype_2&race=00&race_options=race_6&sex=2&sex_options=sex_3&age=177&age_options=age_11&ruralurban=0&ruralurban_options=ruralurban_3&yeargroup=5&yeargroup_options=year5yearmort_1&statefips=56&statefips_options=area_states&county=56000&county_options=counties_wyoming&comparison=counties_to_us&comparison_options=comparison_counties&radio_comparison=areas&radio_comparison_options=cods_or_areas

Each file is single age group, so age weighting does not apply despite the variable names. Each age group file is named sequentially with a prefix “A_” for the first age group and a prefix “I_” for the oldest. There are separate directories for each sex. The cleaning script uses this directory structure to extract the right files and merge them into one data set.

These files must be manually downloaded because there is a filter feature  on the web page that is used to select the county and age. While there may be a way to scrape the data with code the trade off on my time was not worth it. Future runs will need to check these records, and can download the files to match this directory structure, in order to process a update in death rates.

Valid data as of Nov 6 2025 Alex Gebben")
sink()