100 lines
6.4 KiB
R
100 lines
6.4 KiB
R
#########################################Mortality Rate
|
|
library(tidyverse)
|
|
###Create Location to Save raw data sets
|
|
if(!exists("SAVE_LOC_RAW")){SAVE_LOC_RAW <-"./Data/Raw_Data/"}
|
|
dir.create(SAVE_LOC_RAW, recursive = TRUE, showWarnings = FALSE)
|
|
GET_MORTALITY_DATA <- function(FILE,SEX,LOWER_AGE,UPPER_AGE){
|
|
#Create clean mortality rate data
|
|
#Data gathered from https://hdpulse.nimhd.nih.gov/data-portal/mortality/table?cod=247&cod_options=cod_15&ratetype=aa&ratetype_options=ratetype_2&race=00&race_options=race_6&sex=2&sex_options=sex_3&age=177&age_options=age_11&ruralurban=0&ruralurban_options=ruralurban_3&yeargroup=5&yeargroup_options=year5yearmort_1&statefips=56&statefips_options=area_states&county=56000&county_options=counties_wyoming&comparison=counties_to_us&comparison_options=comparison_counties&radio_comparison=areas&radio_comparison_options=cods_or_areas
|
|
|
|
NAMES <- c("County","FIPS","Death_Rate","Lower_Rate","Upper_Rate","Deaths","Trend_Category","Trend","Lower_Trend","Upper_Trend")
|
|
DF <- read_csv(FILE,skip=5,col_names=NAMES,col_types=list('c',"i",'d','d','d','d','c','d','d','d')) %>% filter(grepl("County|Wyoming",County)|County=="United States") %>% mutate(Rate_SD=(Upper_Rate-Lower_Rate)/(2*1.96),Trend_SD=(Upper_Trend-Lower_Trend)/(2*1.96)) %>% select(County,Death_Rate,Rate_SD,Trend,Trend_SD)
|
|
DF$County <- gsub(" County","",DF$County,ignore.case=TRUE)
|
|
DF[,-1] <- DF[,-1]/100000
|
|
WYOMING_TREND <- pull(DF[DF$County=="Wyoming",],"Trend")
|
|
US_TREND <- pull(DF[DF$County=="United States",],"Trend")
|
|
|
|
WYOMING_RATE <- pull(DF[DF$County=="Wyoming",],"Death_Rate")
|
|
US_RATE <- pull(DF[DF$County=="United States",],"Death_Rate")
|
|
|
|
DF$Imparted_Trend <- FALSE
|
|
if(is.na(WYOMING_TREND)){
|
|
DF[1,4:5] <- DF[2,4:5]
|
|
DF[1,6] <- TRUE
|
|
}
|
|
|
|
DF$Imparted_Rate <- FALSE
|
|
if(is.na(WYOMING_RATE)){
|
|
DF[1,4:5] <- DF[2,2:3]
|
|
DF[1,6] <- TRUE
|
|
}
|
|
WYOMING_BASELINE_TREND <-cbind (DF[1,4:5] ,TRUE)
|
|
WYOMING_BASELINE_RATE <-DF[1,2:3]
|
|
|
|
for(i in 3:nrow(DF)){
|
|
#Impart any missing trends based on higher levels
|
|
if(is.na(pull(DF[i,],"Trend"))){ DF[i,4:6] <- WYOMING_BASELINE_TREND}
|
|
#Impart any missing death rates based on higher levels
|
|
if(is.na(pull(DF[i,],"Death_Rate"))){
|
|
DF[i,2:3] <- WYOMING_BASELINE_RATE
|
|
DF[i,"Imparted_Rate"] <- TRUE
|
|
}
|
|
}
|
|
DF$Sex <- SEX
|
|
DF$Min_Age <- LOWER_AGE
|
|
DF$Max_Age <- UPPER_AGE
|
|
DF <- DF %>% select(County,Sex,Min_Age,Max_Age,Death_Rate,Rate_SD,Imparted_Rate,everything())
|
|
return(DF)
|
|
}
|
|
SAVE_LOC_RAW_MORT <- paste0(SAVE_LOC_RAW,"Mortality_Rates/")
|
|
RAW_MORTALITY_LOC_FEMALE <- paste0(SAVE_LOC_RAW_MORT,"Female/")
|
|
RAW_MORTALITY_LOC_MALE <- paste0(SAVE_LOC_RAW_MORT,"Male/")
|
|
#Gather all mortality Records
|
|
MORTALITY_DATA_ALL <- rbind(
|
|
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"A_Under1.csv"),"Female",0,0),
|
|
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"B_1_9.csv"),"Female",1,9),
|
|
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"C_10_19.csv"),"Female",10,19),
|
|
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"D_20_39.csv"),"Female",20,39),
|
|
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"E_40_64.csv"),"Female",40,64),
|
|
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"G_65_74.csv"),"Female",65,74),
|
|
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"H_75_84.csv"),"Female",75,84),
|
|
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"I_85+.csv"),"Female",85,Inf),
|
|
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"A_Under1.csv"),"Male",0,0),
|
|
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"B_1_9.csv"),"Male",1,9),
|
|
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"C_10_19.csv"),"Male",10,19),
|
|
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"D_20_39.csv"),"Male",20,39),
|
|
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"E_40_64.csv"),"Male",40,64),
|
|
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"G_65_74.csv"),"Male",65,74),
|
|
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"H_75_84.csv"),"Male",75,84),
|
|
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"I_85+.csv"),"Male",85,Inf)
|
|
)
|
|
LIN_MORTALITY <- MORTALITY_DATA_ALL %>% filter(County=="Lincoln")
|
|
##Save the mortality data
|
|
if(!exists("SAVE_MORT_LOC")){SAVE_MORT_LOC <-"./Data/Cleaned_Data/Mortality_Rate_Data"}
|
|
CSV_SAVE <- paste0(SAVE_MORT_LOC ,"/CSV")
|
|
RDS_SAVE <- paste0(SAVE_MORT_LOC,"/RDS")
|
|
#Save files for all county demographics
|
|
dir.create(CSV_SAVE , recursive = TRUE, showWarnings = FALSE)
|
|
dir.create(RDS_SAVE , recursive = TRUE, showWarnings = FALSE)
|
|
saveRDS(MORTALITY_DATA_ALL,paste0(RDS_SAVE,"/All_Wyoming_Counties_Mortality_Rates.Rds" ))
|
|
write_csv(MORTALITY_DATA_ALL,paste0(CSV_SAVE,"/All_Wyoming_Counties_Mortality_Rates.csv" ))
|
|
saveRDS(LIN_MORTALITY,paste0(RDS_SAVE,"/Lincoln_County_Mortality_Rates.Rds" ))
|
|
write_csv(LIN_MORTALITY,paste0(CSV_SAVE,"/Lincoln_County_Mortality_Rates.csv" ))
|
|
#Create a short readme files to make the data sources more clear
|
|
#Save a raw data readme
|
|
sink(file=paste0(SAVE_LOC_RAW,"/README_MORTALITY_DATA.txt"),append=FALSE)
|
|
cat("Data files gathered manually from:\n")
|
|
cat("https://hdpulse.nimhd.nih.gov/data-portal/mortality/table?cod=247&cod_options=cod_15&ratetype=aa&ratetype_options=ratetype_2&race=00&race_options=race_6&sex=2&sex_options=sex_3&age=177&age_options=age_11&ruralurban=0&ruralurban_options=ruralurban_3&yeargroup=5&yeargroup_options=year5yearmort_1&statefips=56&statefips_options=area_states&county=56000&county_options=counties_wyoming&comparison=counties_to_us&comparison_options=comparison_counties&radio_comparison=areas&radio_comparison_options=cods_or_areas\n")
|
|
cat("\nEach file is single age group, so age weighting does not apply despite the variable names\n")
|
|
sink()
|
|
|
|
|
|
|
|
#Save a processed raw data readme
|
|
sink(file=paste0(SAVE_MORT_LOC,"/README_MORTALITY_DATA.txt"),append=FALSE)
|
|
cat("This is a processed file of NIH death rates by age and county. Data files first gathered manually from:\n")
|
|
cat("\nhttps://hdpulse.nimhd.nih.gov/data-portal/mortality/table?cod=247&cod_options=cod_15&ratetype=aa&ratetype_options=ratetype_2&race=00&race_options=race_6&sex=2&sex_options=sex_3&age=177&age_options=age_11&ruralurban=0&ruralurban_options=ruralurban_3&yeargroup=5&yeargroup_options=year5yearmort_1&statefips=56&statefips_options=area_states&county=56000&county_options=counties_wyoming&comparison=counties_to_us&comparison_options=comparison_counties&radio_comparison=areas&radio_comparison_options=cods_or_areas\n")
|
|
cat("\nThese manually saved files are in the raw data directory. Each file is single age group, so age weighting does not apply despite the variable names\n")
|
|
sink()
|
|
|