Signficant Script Cleaning midsteam

This commit is contained in:
Alex Gebben Work 2025-11-05 16:20:20 -07:00
parent d66fa06536
commit 285026e421
31 changed files with 285 additions and 149 deletions

1
.gitignore vendored
View File

@ -2,6 +2,7 @@
#
*.png
*.csv
Data/Cleaned_Data/
Results/
#Don't save any major data files on the server, can be regenerated after pulling

View File

Can't render this file because it contains an unexpected character in line 5 and column 13.

View File

Can't render this file because it contains an unexpected character in line 5 and column 13.

View File

Can't render this file because it contains an unexpected character in line 5 and column 13.

View File

Can't render this file because it contains an unexpected character in line 5 and column 13.

View File

Can't render this file because it contains an unexpected character in line 5 and column 13.

View File

Can't render this file because it contains an unexpected character in line 5 and column 13.

View File

Can't render this file because it contains an unexpected character in line 5 and column 13.

View File

Can't render this file because it contains an unexpected character in line 5 and column 13.

View File

Can't render this file because it contains an unexpected character in line 5 and column 13.

View File

Can't render this file because it contains an unexpected character in line 5 and column 13.

View File

Can't render this file because it contains an unexpected character in line 5 and column 13.

View File

Can't render this file because it contains an unexpected character in line 5 and column 13.

View File

Can't render this file because it contains an unexpected character in line 5 and column 13.

View File

Can't render this file because it contains an unexpected character in line 5 and column 13.

View File

Can't render this file because it contains an unexpected character in line 5 and column 13.

View File

Can't render this file because it contains an unexpected character in line 5 and column 13.

View File

Can't render this file because it contains an unexpected character in line 5 and column 13.

View File

@ -0,0 +1,5 @@
Data files gathered manually from:
https://hdpulse.nimhd.nih.gov/data-portal/mortality/table?cod=247&cod_options=cod_15&ratetype=aa&ratetype_options=ratetype_2&race=00&race_options=race_6&sex=2&sex_options=sex_3&age=177&age_options=age_11&ruralurban=0&ruralurban_options=ruralurban_3&yeargroup=5&yeargroup_options=year5yearmort_1&statefips=56&statefips_options=area_states&county=56000&county_options=counties_wyoming&comparison=counties_to_us&comparison_options=comparison_counties&radio_comparison=areas&radio_comparison_options=cods_or_areas
Each file is single age group, so age weighting does not apply despite the variable names

View File

@ -0,0 +1 @@
[1] "Data files gathered manually from:\n\n\n\n\nhttps://hdpulse.nimhd.nih.gov/data-portal/mortality/table?cod=247&cod_options=cod_15&ratetype=aa&ratetype_options=ratetype_2&race=00&race_options=race_6&sex=2&sex_options=sex_3&age=177&age_options=age_11&ruralurban=0&ruralurban_options=ruralurban_3&yeargroup=5&yeargroup_options=year5yearmort_1&statefips=56&statefips_options=area_states&county=56000&county_options=counties_wyoming&comparison=counties_to_us&comparison_options=comparison_counties&radio_comparison=areas&radio_comparison_options=cods_or_areas\n\n\n\n\nEach file is single age group, so age weighting does not apply despite the variable names"

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -4,6 +4,32 @@ ACS_FILE_LOC <- "Data/Cleaned_Data/ACS_Census_Demographic_Data.Rds"
ACS_YEAR <- 2023 #Year of the ACS data
if(!file.exists(ACS_FILE_LOC)){source("Scripts/Load_ACS_Census_Data.r")}else{AGE_DATA <- readRDS(ACS_FILE_LOC)}
LIN_DEMOGRAPHICS <- readRDS("Data/Cleaned_Data/Lincoln_Demographic_Data.Rds") %>% filter(Year==ACS_YEAR)
LIN_DEMOGRAPHICS
GET_INTIAL_DATA_SUMMARY <- function(DEMO_DATA,FILE_SAVE_NAME,ST_POP=NA,Area=NA,ST_YEAR=NA,SAVE_DIR='Data/Cleaned_Data/Intiate_Simulation/'){
ST_BIRTH <- round(sum((DEMO_DATA%>% filter(Age==0))[4:5]))
ST_BIRTH_TWO_PREV <- round(sum((DEMO_DATA%>% filter(Age==1))[4:5]))
if(is.na(ST_YEAR)){ST_YEAR=max(POP_DATA$Year)}
if(is.na(ST_POP)){
if(Area=='Kemmerer'){
ST_POP<- POP_DATA %>% filter(City %in% c('Kemmerer','Diamondville'),Year==ST_YEAR) %>% mutate(Population=as.numeric(Population)) %>% pull(Population) %>% sum()
} else if(Area=='Other Lincoln'){
ST_POP_KEM<- POP_DATA %>% filter(City %in% c('Kemmerer','Diamondville'),Year==ST_YEAR) %>% mutate(Population=as.numeric(Population)) %>% pull(Population) %>% sum()
ST_POP_LIN <- readRDS("Data/Cleaned_Data/Wyoming_County_Population.Rds") %>% filter(Year==ST_YEAR,County==Area) %>% pull(Population) #Grab the year of data for the county
ST_POP <- ST_POP_LIN-ST_POP_KEM
} else{
ST_POP <- readRDS("Data/Cleaned_Data/Wyoming_County_Population.Rds") %>% filter(Year==ST_YEAR,County==Area) %>% pull(Population) #Grab the year of data for the county
}
}
INTIATE_DATA <- DEMO_DATA %>% mutate(Male_Window=Age>=18 & Age<=30,Female_Window=Age>=18 & Age<=28) %>% group_by(County,Year) %>% summarize(Female_Birth_Group=sum(Num_Female*Female_Window),Male_Birth_Group=sum(Num_Male*Male_Window),Min_Birth_Group=ifelse(Female_Birth_Group<Male_Birth_Group,Female_Birth_Group,Male_Birth_Group),PREV_BIRTH=ST_BIRTH,PREV_TWO_BIRTH=ST_BIRTH_TWO_PREV,Population=ST_POP) %>% mutate(Births=NA,Deaths=NA,Migration=NA) %>% select(Year,County,Population,Births,Deaths,Migration,Min_Birth_Group,PREV_BIRTH,PREV_TWO_BIRTH) %>% ungroup
saveRDS(INTIATE_DATA ,paste0(SAVE_DIR,FILE_SAVE_NAME,".Rds"))
write_csv(INTIATE_DATA ,paste0(SAVE_DIR,FILE_SAVE_NAME,".csv"))
return(INTIATE_DATA)
GET_INTIAL_DATA_SUMMARY(LIN_DEMOGRAPHICS,"test")
for(i in 1:nrow(AGE_DATA)){
if(i==1 & exists("RES")){rm(RES)}
@ -31,6 +57,7 @@ KEM_DEMOGRAPHICS <- RES %>% filter(IN_KEM==1) %>% select(Age,Sex,Per_Pop) %>% mu
INTIATE_KEMMER <- KEM_DEMOGRAPHICS %>% mutate(Male_Window=Age>=18 & Age<=30,Female_Window=Age>=18 & Age<=28) %>% group_by(County,Year) %>% summarize(Female_Birth_Group=sum(Num_Female*Female_Window),Male_Birth_Group=sum(Num_Male*Male_Window),Min_Birth_Group=ifelse(Female_Birth_Group<Male_Birth_Group,Female_Birth_Group,Male_Birth_Group),PREV_BIRTH=ST_BIRTH_KEM,PREV_TWO_BIRTH=ST_BIRTH_TWO_PREV_KEM,Population=CURRENT_KEM_POP) %>% mutate(Births=NA,Deaths=NA,Migration=NA) %>% select(Year,County,Population,Births,Deaths,Migration,Min_Birth_Group,PREV_BIRTH,PREV_TWO_BIRTH) %>% ungroup
saveRDS(INTIATE_KEMMER ,"Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Summary_Start_Data.Rds")
write_csv(INTIATE_KEMMER ,"Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Summary_Start_Data.csv")
###Create data sets that contain all people not in Loncoln but not Kemmerer or Diamondville, for a seperate comparison run
LIN_DEMOGRAPHICS_RECENT <- readRDS("Data/Cleaned_Data/Lincoln_Demographic_Data.Rds") %>% filter(Year==max(Year)) #Grab the most recent year of data for all of the county
LIN_OTHER_DEMOGRAPHICS <- LIN_DEMOGRAPHICS_RECENT %>% left_join(KEM_DEMOGRAPHICS %>% select(-County,ADJ_MALE=Num_Male,ADJ_FEMALE=Num_Female)) %>% mutate(Num_Male=Num_Male-ADJ_MALE,Num_Female=Num_Female-ADJ_FEMALE) %>% select(-ADJ_MALE,-ADJ_FEMALE) %>% ungroup#Deduct the populaiton estimates of Kemmerer to create a "Other Lincoln" data set

View File

@ -1,12 +1,14 @@
#################IMPORTANT!!!! CLEAN UP DATA SCRIPT FOR USE
#############################Clean up scirpt folders, simulations of deaths should be seperated from this file. Death rate simulation is complicated and should be commented, and turned into a seperate script.
##############WORKING ON REGRESSION OF BIRTHS FOR PREDICTIONS OF GROWTH, THINK ABOUT GOING FROM COUNTY TO CITY, SEPERATE REGRESION FROM DATA COLLOECTION
#############################Clean up script folders, simulations of deaths should be separated from this file. Death rate simulation is complicated and should be commented, and turned into a separate script.
library(rvest)
library(tidyverse)
library(readxl)
#setwd("../")
###Create Location to Save raw data sets
if(!exists("SAVE_LOC_RAW")){SAVE_LOC_RAW <-"./Data/Raw_Data/"}
dir.create(SAVE_LOC_RAW, recursive = TRUE, showWarnings = FALSE)
########County, Death, Birth and Migration Data
#Data found on the page http://eadiv.state.wy.us/pop/
#Website States: Wyoming Economic Analysis Division based on U.S. Census Bureau's population estimation and vital stats above
PAGE <- read_html("http://eadiv.state.wy.us/pop/BirthDeathMig.htm")
NODE <- html_element(PAGE ,"table")
TBL <- html_table(NODE)
@ -30,7 +32,7 @@ TBL <- TBL %>% filter(!is.na(Type)) %>% select(County,Type,everything())
GROUP <- colnames(TBL)[-1:-2]
Data <- pivot_longer(TBL,all_of(GROUP),names_to="Year",values_to="Pop_Change")
Data$County <- ifelse(toupper(Data$County)=="TOTAL","Wyoming",Data$County)
WY_COUNTY_DATA_SET <- pivot_wider(Data,names_from=Type,values_from=Pop_Change) %>% rename("Migration"=`Net Migration`) %>% mutate(Year=as.integer(Year),Births=parse_number(Births),Deaths=parse_number(Deaths),Migration=parse_number(Migration)) %>% mutate(Year=Year-1) #Data apears to be one off from populaiton
WY_COUNTY_DATA_SET <- pivot_wider(Data,names_from=Type,values_from=Pop_Change) %>% rename("Migration"=`Net Migration`) %>% mutate(Year=as.integer(Year),Births=parse_number(Births),Deaths=parse_number(Deaths),Migration=parse_number(Migration)) %>% mutate(Year=Year-1) #Data appears to be one off from population
WY_COUNTY_DATA_SET[,"County"] <- gsub(" ","_",WY_COUNTY_DATA_SET %>% pull(County))
########################City and County Population Data 2020 to 2024
@ -116,12 +118,15 @@ CITY_POP <- rbind(CITY_TBL,CITY_POP)
CITY_POP$City <- gsub("LaGrange","La_Grange",CITY_POP$City)
COUNTY_POP <- rbind(COUNTY_TBL,COUNTY_POP)
####################County and City Population Data for 1990-2000
if(!file.exists("./Data/Pop_1990s.xls")){download.file('http://eadiv.state.wy.us/pop/c&sc90_00.xls',"./Data/Pop_1990s.xls")}
TEMP <- read_xls("Data/Pop_1990s.xls",skip=2)[-1:-4,]
colnames(TEMP)[1] <- "County"
tail(TEMP)
TEMP <- TEMP[1:which(TEMP[,1]=="Wind River Res."),]
#Location to save any raw population files. Most files are not saved since they are pulled from a html and not a excel file, but older files are only available as excel files
SAVE_LOC_RAW_POP <- paste0(SAVE_LOC_RAW,"/Population")
dir.create(SAVE_LOC_RAW_POP , recursive = TRUE, showWarnings = FALSE)
POP_FILE_1990 <- paste0(SAVE_LOC_RAW_POP,"/Pop_1990s.xls")
if(!file.exists(POP_FILE_1990)){download.file('http://eadiv.state.wy.us/pop/c&sc90_00.xls',POP_FILE_1990)}
TEMP <- read_xls(POP_FILE_1990,skip=2)[-1:-4,]
colnames(TEMP)[1] <- "County"
TEMP <- TEMP[1:which(TEMP[,1]=="Wind River Res."),]
TEMP <- pivot_longer(TEMP,all_of(colnames(TEMP)[-1]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=as.numeric(Population))
TEMP <- TEMP %>% filter(Year!=2000)
TEMP_COUNTY <- TEMP[grepl("Cnty",TEMP %>% pull(County),ignore.case=TRUE),]
@ -138,8 +143,10 @@ TEMP_CITY <- TEMP_CITY %>% filter(Year!=2000)
try(rm(TEMP_CITY,TEMP_COUNTY,TEMP))
####################County and City Population Data for 1980-1990
if(!file.exists("./Data/Pop_1980s.xls")){download.file('http://eadiv.state.wy.us/pop/C&SC8090.xls',"./Data/Pop_1980s.xls")}
TEMP <- read_xls("Data/Pop_1980s.xls",skip=2)[-1:-4,]
POP_FILE_1980 <- paste0(SAVE_LOC_RAW_POP ,"/Pop_1980s.xls")
if(!file.exists(POP_FILE_1980)){download.file('http://eadiv.state.wy.us/pop/C&SC8090.xls',POP_FILE_1980)}
TEMP <- read_xls(POP_FILE_1980,skip=2)[-1:-4,]
colnames(TEMP)[1] <- "County"
TEMP <- TEMP[2:which(TEMP[,1]=="Upton"),1:(min(which(is.na(TEMP[2,])))-1)]
TEMP <- pivot_longer(TEMP,all_of(colnames(TEMP)[-1]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=as.numeric(Population))
@ -162,8 +169,10 @@ COUNTY_POP <- rbind(TEMP_COUNTY,COUNTY_POP)
try(rm(TEMP_CITY,TEMP_COUNTY,TEMP))
####################County Population Data for 1970-1980
if(!file.exists("./Data/Pop_1970s.xls")){download.file('http://eadiv.state.wy.us/pop/Cnty7080.xls',"./Data/Pop_1970s.xls")}
TEMP <- read_xls("Data/Pop_1970s.xls",skip=2)[-1:-4,]
POP_FILE_1970 <- paste0(SAVE_LOC_RAW_POP ,"/Pop_1970s.xls")
if(!file.exists(POP_FILE_1970)){download.file('http://eadiv.state.wy.us/pop/Cnty7080.xls',POP_FILE_1970)}
TEMP <- read_xls(POP_FILE_1970,skip=2)[-1:-4,]
colnames(TEMP)[1] <- "County"
TEMP <- TEMP[1:which(TEMP[,1]=="Weston"),]
TEMP <- pivot_longer(TEMP,all_of(colnames(TEMP)[-1]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=as.numeric(Population))
@ -197,123 +206,23 @@ try(rm(TEMP))
colnames(TEMP ) <- c("Year","City","Population")
TEMP <- as_tibble(TEMP)
CITY_POP <- rbind(TEMP,CITY_POP) %>% arrange(City,Year)
#Remove empty values, ensure all numeric values are not saved as characters
CITY_POP <- CITY_POP %>% filter(!is.na(Population) ) %>% mutate(Population=parse_number(Population),Year=parse_number(Year))
#Add Other Data
COUNTY_POP <- COUNTY_POP %>% mutate(Year=as.integer(Year)) %>% unique
COUNTY_POP <- COUNTY_POP %>% mutate(Year=as.numeric(Year)) %>% unique
WY_COUNTY_DATA_SET <- COUNTY_POP %>% left_join(WY_COUNTY_DATA_SET ) %>% mutate(Population=as.numeric(Population)) %>% unique
###Save Population Results
write_csv(CITY_POP,file="./Data/Cleaned_Data/Wyoming_City_Population.csv")
write_csv(WY_COUNTY_DATA_SET ,file="./Data/Cleaned_Data/Wyoming_County_Population.csv")
saveRDS(CITY_POP,file="./Data/Cleaned_Data/Wyoming_City_Population.Rds")
saveRDS(WY_COUNTY_DATA_SET ,file="./Data/Cleaned_Data/Wyoming_County_Population.Rds")
if(!exists("SAVE_LOC_POP")){SAVE_LOC_POP <-"./Data/Cleaned_Data/Population_Data"}
CSV_SAVE <- paste0(SAVE_LOC_POP,"/CSV")
RDS_SAVE <- paste0(SAVE_LOC_POP,"/RDS")
dir.create(CSV_SAVE, recursive = TRUE, showWarnings = FALSE)
dir.create(RDS_SAVE, recursive = TRUE, showWarnings = FALSE)
saveRDS(CITY_POP,paste0(RDS_SAVE,"/All_Wyoming_City_Populations.Rds" ))
write_csv(CITY_POP,paste0(CSV_SAVE,"/All_Wyoming_City_Populations.csv" ))
saveRDS(WY_COUNTY_DATA_SET,paste0(RDS_SAVE,"/All_Wyoming_County_Populations.Rds" ))
write_csv(WY_COUNTY_DATA_SET,paste0(CSV_SAVE,"/All_Wyoming_County_Populations.csv" ))
###################Demographics
if(!file.exists("./Data/Demo_Single_Year_2020s.xls")){download.file('http://eadiv.state.wy.us/Pop/CO_SYASEX24.xlsx',"./Data/Demo_Single_Year_2020s.xls")}
TEMP <- read_xlsx("./Data/Demo_Single_Year_2020s.xls",skip=2)[,-1]
TEMP <- TEMP[1:(min(which(is.na(TEMP[,1])))-1),]
TEMP <- TEMP[!grepl("Base",TEMP$YEAR,ignore.case=TRUE),] #There are two population values provided. I believe one is the census baseline, and one is a estimate in July. Keep the later estimate, to line up with the same seasonal collection pattern of the rest of the data
TEMP$YEAR <- year(as.Date(substr((TEMP$YEAR),1,8),format="%m/%d/%Y"))
colnames(TEMP) <- c("County","Year","Age","Number","Num_Male","Num_Female")
TEMP$County <- gsub(" County","",TEMP$County,ignore.case=TRUE)
DEM_2020 <- TEMP %>% select(-Number)
###Demogrpahics all
DEM_DATA <- read_delim("Data/County_Demographics_Census/wy.1969_2023.singleages.through89.90plus.txt",delim=" ",col_names=c("ID","VALUES"),col_types=list('c','c'))
DEM_DATA$Year <- as.integer(substr(DEM_DATA$ID,1,4))
DEM_DATA$fips<- substr(DEM_DATA$ID,7,11)
COUNTY_LIST <- read_csv("https://github.com/kjhealy/fips-codes/raw/refs/heads/master/county_fips_master.csv",col_types=list('c','c')) %>% filter(state_abbr=="WY") %>% select(fips,County=county_name) %>% mutate(County=gsub(" ","_",gsub(" County","",County,ignore.case=TRUE)))
DEM_DATA <- DEM_DATA %>% left_join(COUNTY_LIST) %>% select(-fips)
#16=3
DEM_DATA$Sex <- ifelse(substr(DEM_DATA$VALUES,3,3)==1,"Male","Female")
DEM_DATA$Age <- parse_number(substr(DEM_DATA$VALUES,4,5))
DEM_DATA$Number <- parse_number(substr(DEM_DATA$VALUES,6,14))
DEM_DATA <- DEM_DATA %>% select(-ID,-VALUES)
DEM_DATA <- DEM_DATA %>% group_by(Year,County,Sex,Age) %>% summarize(Number=sum(Number)) %>% ungroup()#Aggregate to sex and age level
#The Wyoming census data seems newer than this data set from SEER cancer data source. Drop any of these records that overlap with the Wyoming data before merging. Arrange so the column order is the same between the two data sets, so they can be easily bound together.
DEM_DATA <- pivot_wider(DEM_DATA,names_from=Sex,values_from=Number) %>% rename(Num_Female=Female,Num_Male=Male) %>% select(colnames(DEM_2020)) %>% filter(Year<min(DEM_2020$Year)) %>% unique
DEM_DATA <- rbind(DEM_2020,DEM_DATA) %>% ungroup %>% arrange(Year,Age) %>% unique
###Save demographic data set
LIN_DEM <- DEM_DATA %>% filter(County=='Lincoln')
write_csv(LIN_DEM,file="./Data/Cleaned_Data/Lincoln_Demographic_Data.csv")
write_csv(DEM_DATA,file="./Data/Cleaned_Data/Wyoming_County_Demographic_Data.csv")
saveRDS(LIN_DEM,file="./Data/Cleaned_Data/Lincoln_Demographic_Data.Rds")
saveRDS(DEM_DATA,file="./Data/Cleaned_Data/Wyoming_County_Demographic_Data.Rds")
#########################################Mortality Rate
GET_MORTALITY_DATA <- function(FILE,SEX,LOWER_AGE,UPPER_AGE){
#Create clean mortality rate data
#Data gathered from https://hdpulse.nimhd.nih.gov/data-portal/mortality/table?cod=247&cod_options=cod_15&ratetype=aa&ratetype_options=ratetype_2&race=00&race_options=race_6&sex=2&sex_options=sex_3&age=177&age_options=age_11&ruralurban=0&ruralurban_options=ruralurban_3&yeargroup=5&yeargroup_options=year5yearmort_1&statefips=56&statefips_options=area_states&county=56000&county_options=counties_wyoming&comparison=counties_to_us&comparison_options=comparison_counties&radio_comparison=areas&radio_comparison_options=cods_or_areas
NAMES <- c("County","FIPS","Death_Rate","Lower_Rate","Upper_Rate","Deaths","Trend_Category","Trend","Lower_Trend","Upper_Trend")
DF <- read_csv(FILE,skip=5,col_names=NAMES,col_types=list('c',"i",'d','d','d','d','c','d','d','d')) %>% filter(grepl("County|Wyoming",County)|County=="United States") %>% mutate(Rate_SD=(Upper_Rate-Lower_Rate)/(2*1.96),Trend_SD=(Upper_Trend-Lower_Trend)/(2*1.96)) %>% select(County,Death_Rate,Rate_SD,Trend,Trend_SD)
DF$County <- gsub(" County","",DF$County,ignore.case=TRUE)
DF[,-1] <- DF[,-1]/100000
WYOMING_TREND <- pull(DF[DF$County=="Wyoming",],"Trend")
US_TREND <- pull(DF[DF$County=="United States",],"Trend")
WYOMING_RATE <- pull(DF[DF$County=="Wyoming",],"Death_Rate")
US_RATE <- pull(DF[DF$County=="United States",],"Death_Rate")
DF$Imparted_Trend <- FALSE
if(is.na(WYOMING_TREND)){
DF[1,4:5] <- DF[2,4:5]
DF[1,6] <- TRUE
}
DF$Imparted_Rate <- FALSE
if(is.na(WYOMING_RATE)){
DF[1,4:5] <- DF[2,2:3]
DF[1,6] <- TRUE
}
WYOMING_BASELINE_TREND <-cbind (DF[1,4:5] ,TRUE)
WYOMING_BASELINE_RATE <-DF[1,2:3]
for(i in 3:nrow(DF)){
#Impart any missing trends based on higher levels
if(is.na(pull(DF[i,],"Trend"))){ DF[i,4:6] <- WYOMING_BASELINE_TREND}
#Impart any missing death rates based on higher levels
if(is.na(pull(DF[i,],"Death_Rate"))){
DF[i,2:3] <- WYOMING_BASELINE_RATE
DF[i,"Imparted_Rate"] <- TRUE
}
}
DF$Sex <- SEX
DF$Min_Age <- LOWER_AGE
DF$Max_Age <- UPPER_AGE
DF <- DF %>% select(County,Sex,Min_Age,Max_Age,Death_Rate,Rate_SD,Imparted_Rate,everything())
return(DF)
}
MORTALITY_DATA_ALL <- rbind(
GET_MORTALITY_DATA("Data/Mortality_Rates/Female/A_Under1.csv","Female",0,0),
GET_MORTALITY_DATA("Data/Mortality_Rates/Female/B_1_9.csv","Female",1,9),
GET_MORTALITY_DATA("Data/Mortality_Rates/Female/C_10_19.csv","Female",10,19),
GET_MORTALITY_DATA("Data/Mortality_Rates/Female/D_20_39.csv","Female",20,39),
GET_MORTALITY_DATA("Data/Mortality_Rates/Female/E_40_64.csv","Female",40,64),
GET_MORTALITY_DATA("Data/Mortality_Rates/Female/G_65_74.csv","Female",65,74),
GET_MORTALITY_DATA("Data/Mortality_Rates/Female/H_75_84.csv","Female",75,84),
GET_MORTALITY_DATA("Data/Mortality_Rates/Female/I_85+.csv","Female",85,Inf),
GET_MORTALITY_DATA("Data/Mortality_Rates/Male/A_Under1.csv","Male",0,0),
GET_MORTALITY_DATA("Data/Mortality_Rates/Male/B_1_9.csv","Male",1,9),
GET_MORTALITY_DATA("Data/Mortality_Rates/Male/C_10_19.csv","Male",10,19),
GET_MORTALITY_DATA("Data/Mortality_Rates/Male/D_20_39.csv","Male",20,39),
GET_MORTALITY_DATA("Data/Mortality_Rates/Male/E_40_64.csv","Male",40,64),
GET_MORTALITY_DATA("Data/Mortality_Rates/Male/G_65_74.csv","Male",65,74),
GET_MORTALITY_DATA("Data/Mortality_Rates/Male/H_75_84.csv","Male",75,84),
GET_MORTALITY_DATA("Data/Mortality_Rates/Male/I_85+.csv","Male",85,Inf)
)
LIN_MORTALITY <- MORTALITY_DATA_ALL %>% filter(County=="Lincoln")
###Save Mortality Rate data set
write_csv(LIN_MORTALITY,file="./Data/Cleaned_Data/Lincoln_Mortality_Rate.csv")
write_csv(MORTALITY_DATA_ALL,file="./Data/Cleaned_Data/Not_Used/Wyoming_County_Mortality_Rate.csv")
saveRDS(LIN_MORTALITY,file="./Data/Cleaned_Data/Lincoln_Mortality_Rate.Rds")
saveRDS(MORTALITY_DATA_ALL,file="./Data/Cleaned_Data/Not_Used/Wyoming_County_Mortality_Rate.Rds")
#Clean all data from memory. Will be loaded in any paticular script as needed
rm(list = ls())
gc()

View File

@ -0,0 +1,44 @@
library(tidyverse)
library(readxl)
###################Demographics
if(!file.exists("./Data/Demo_Single_Year_2020s.xls")){download.file('http://eadiv.state.wy.us/Pop/CO_SYASEX24.xlsx',"./Data/Demo_Single_Year_2020s.xls")}
TEMP <- read_xlsx("./Data/Demo_Single_Year_2020s.xls",skip=2)[,-1]
TEMP <- TEMP[1:(min(which(is.na(TEMP[,1])))-1),]
TEMP <- TEMP[!grepl("Base",TEMP$YEAR,ignore.case=TRUE),] #There are two population values provided. I believe one is the census baseline, and one is a estimate in July. Keep the later estimate, to line up with the same seasonal collection pattern of the rest of the data
TEMP$YEAR <- year(as.Date(substr((TEMP$YEAR),1,8),format="%m/%d/%Y"))
colnames(TEMP) <- c("County","Year","Age","Number","Num_Male","Num_Female")
TEMP$County <- gsub(" County","",TEMP$County,ignore.case=TRUE)
DEM_2020 <- TEMP %>% select(-Number)
###Demographics all
DEM_DATA <- read_delim("Data/County_Demographics_Census/wy.1969_2023.singleages.through89.90plus.txt",delim=" ",col_names=c("ID","VALUES"),col_types=list('c','c'))
DEM_DATA$Year <- as.integer(substr(DEM_DATA$ID,1,4))
DEM_DATA$fips<- substr(DEM_DATA$ID,7,11)
COUNTY_LIST <- read_csv("https://github.com/kjhealy/fips-codes/raw/refs/heads/master/county_fips_master.csv",col_types=list('c','c')) %>% filter(state_abbr=="WY") %>% select(fips,County=county_name) %>% mutate(County=gsub(" ","_",gsub(" County","",County,ignore.case=TRUE)))
DEM_DATA <- DEM_DATA %>% left_join(COUNTY_LIST) %>% select(-fips)
#16=3
DEM_DATA$Sex <- ifelse(substr(DEM_DATA$VALUES,3,3)==1,"Male","Female")
DEM_DATA$Age <- parse_number(substr(DEM_DATA$VALUES,4,5))
DEM_DATA$Number <- parse_number(substr(DEM_DATA$VALUES,6,14))
DEM_DATA <- DEM_DATA %>% select(-ID,-VALUES)
DEM_DATA <- DEM_DATA %>% group_by(Year,County,Sex,Age) %>% summarize(Number=sum(Number)) %>% ungroup()#Aggregate to sex and age level
#The Wyoming census data seems newer than this data set from SEER cancer data source. Drop any of these records that overlap with the Wyoming data before merging. Arrange so the column order is the same between the two data sets, so they can be easily bound together.
DEM_DATA <- pivot_wider(DEM_DATA,names_from=Sex,values_from=Number) %>% rename(Num_Female=Female,Num_Male=Male) %>% select(colnames(DEM_2020)) %>% filter(Year<min(DEM_2020$Year)) %>% unique
DEM_DATA <- rbind(DEM_2020,DEM_DATA) %>% ungroup %>% arrange(Year,Age) %>% unique %>% mutate(Region=County) %>% select(County,Region,Year,Age,Num_Male,Num_Female) #Add a region which in this case is just the county. For Lincoln cities and other areas are analysed
###Save demographic data set
if(!exists("SAVE_DEMO_LOC")){SAVE_DEMO_LOC <-"./Data/Cleaned_Data/Demographic_Sex_Age_Data"}
CSV_SAVE <- paste0(SAVE_DEMO_LOC,"/CSV")
RDS_SAVE <- paste0(SAVE_DEMO_LOC,"/RDS")
#Save files for all county demographics
dir.create(CSV_SAVE , recursive = TRUE, showWarnings = FALSE)
dir.create(RDS_SAVE , recursive = TRUE, showWarnings = FALSE)
saveRDS(DEM_DATA,paste0(RDS_SAVE,"/All_Wyoming_Counties_Demographics.Rds" ))
write_csv(DEM_DATA,paste0(CSV_SAVE,"/All_Wyoming_Counties_Demographics.csv" ))
#Save a file that is just Lincoln County, this just speeds up code so you do not always need to filter for 'Lincoln'
LIN_DEM <- DEM_DATA %>% filter(County=='Lincoln')
saveRDS(LIN_DEM,paste0(RDS_SAVE,"/Full_Lincoln_County_Demographics.Rds" ))
write_csv(LIN_DEM,paste0(CSV_SAVE,"/Full_Lincoln_County_Demographics.csv" ))

View File

@ -0,0 +1,99 @@
#########################################Mortality Rate
library(tidyverse)
###Create Location to Save raw data sets
if(!exists("SAVE_LOC_RAW")){SAVE_LOC_RAW <-"./Data/Raw_Data/"}
dir.create(SAVE_LOC_RAW, recursive = TRUE, showWarnings = FALSE)
GET_MORTALITY_DATA <- function(FILE,SEX,LOWER_AGE,UPPER_AGE){
#Create clean mortality rate data
#Data gathered from https://hdpulse.nimhd.nih.gov/data-portal/mortality/table?cod=247&cod_options=cod_15&ratetype=aa&ratetype_options=ratetype_2&race=00&race_options=race_6&sex=2&sex_options=sex_3&age=177&age_options=age_11&ruralurban=0&ruralurban_options=ruralurban_3&yeargroup=5&yeargroup_options=year5yearmort_1&statefips=56&statefips_options=area_states&county=56000&county_options=counties_wyoming&comparison=counties_to_us&comparison_options=comparison_counties&radio_comparison=areas&radio_comparison_options=cods_or_areas
NAMES <- c("County","FIPS","Death_Rate","Lower_Rate","Upper_Rate","Deaths","Trend_Category","Trend","Lower_Trend","Upper_Trend")
DF <- read_csv(FILE,skip=5,col_names=NAMES,col_types=list('c',"i",'d','d','d','d','c','d','d','d')) %>% filter(grepl("County|Wyoming",County)|County=="United States") %>% mutate(Rate_SD=(Upper_Rate-Lower_Rate)/(2*1.96),Trend_SD=(Upper_Trend-Lower_Trend)/(2*1.96)) %>% select(County,Death_Rate,Rate_SD,Trend,Trend_SD)
DF$County <- gsub(" County","",DF$County,ignore.case=TRUE)
DF[,-1] <- DF[,-1]/100000
WYOMING_TREND <- pull(DF[DF$County=="Wyoming",],"Trend")
US_TREND <- pull(DF[DF$County=="United States",],"Trend")
WYOMING_RATE <- pull(DF[DF$County=="Wyoming",],"Death_Rate")
US_RATE <- pull(DF[DF$County=="United States",],"Death_Rate")
DF$Imparted_Trend <- FALSE
if(is.na(WYOMING_TREND)){
DF[1,4:5] <- DF[2,4:5]
DF[1,6] <- TRUE
}
DF$Imparted_Rate <- FALSE
if(is.na(WYOMING_RATE)){
DF[1,4:5] <- DF[2,2:3]
DF[1,6] <- TRUE
}
WYOMING_BASELINE_TREND <-cbind (DF[1,4:5] ,TRUE)
WYOMING_BASELINE_RATE <-DF[1,2:3]
for(i in 3:nrow(DF)){
#Impart any missing trends based on higher levels
if(is.na(pull(DF[i,],"Trend"))){ DF[i,4:6] <- WYOMING_BASELINE_TREND}
#Impart any missing death rates based on higher levels
if(is.na(pull(DF[i,],"Death_Rate"))){
DF[i,2:3] <- WYOMING_BASELINE_RATE
DF[i,"Imparted_Rate"] <- TRUE
}
}
DF$Sex <- SEX
DF$Min_Age <- LOWER_AGE
DF$Max_Age <- UPPER_AGE
DF <- DF %>% select(County,Sex,Min_Age,Max_Age,Death_Rate,Rate_SD,Imparted_Rate,everything())
return(DF)
}
SAVE_LOC_RAW_MORT <- paste0(SAVE_LOC_RAW,"Mortality_Rates/")
RAW_MORTALITY_LOC_FEMALE <- paste0(SAVE_LOC_RAW_MORT,"Female/")
RAW_MORTALITY_LOC_MALE <- paste0(SAVE_LOC_RAW_MORT,"Male/")
#Gather all mortality Records
MORTALITY_DATA_ALL <- rbind(
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"A_Under1.csv"),"Female",0,0),
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"B_1_9.csv"),"Female",1,9),
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"C_10_19.csv"),"Female",10,19),
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"D_20_39.csv"),"Female",20,39),
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"E_40_64.csv"),"Female",40,64),
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"G_65_74.csv"),"Female",65,74),
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"H_75_84.csv"),"Female",75,84),
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_FEMALE,"I_85+.csv"),"Female",85,Inf),
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"A_Under1.csv"),"Male",0,0),
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"B_1_9.csv"),"Male",1,9),
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"C_10_19.csv"),"Male",10,19),
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"D_20_39.csv"),"Male",20,39),
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"E_40_64.csv"),"Male",40,64),
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"G_65_74.csv"),"Male",65,74),
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"H_75_84.csv"),"Male",75,84),
GET_MORTALITY_DATA(paste0(RAW_MORTALITY_LOC_MALE,"I_85+.csv"),"Male",85,Inf)
)
LIN_MORTALITY <- MORTALITY_DATA_ALL %>% filter(County=="Lincoln")
##Save the mortality data
if(!exists("SAVE_MORT_LOC")){SAVE_MORT_LOC <-"./Data/Cleaned_Data/Mortality_Rate_Data"}
CSV_SAVE <- paste0(SAVE_MORT_LOC ,"/CSV")
RDS_SAVE <- paste0(SAVE_MORT_LOC,"/RDS")
#Save files for all county demographics
dir.create(CSV_SAVE , recursive = TRUE, showWarnings = FALSE)
dir.create(RDS_SAVE , recursive = TRUE, showWarnings = FALSE)
saveRDS(MORTALITY_DATA_ALL,paste0(RDS_SAVE,"/All_Wyoming_Counties_Mortality_Rates.Rds" ))
write_csv(MORTALITY_DATA_ALL,paste0(CSV_SAVE,"/All_Wyoming_Counties_Mortality_Rates.csv" ))
saveRDS(LIN_MORTALITY,paste0(RDS_SAVE,"/Lincoln_County_Mortality_Rates.Rds" ))
write_csv(LIN_MORTALITY,paste0(CSV_SAVE,"/Lincoln_County_Mortality_Rates.csv" ))
#Create a short readme files to make the data sources more clear
#Save a raw data readme
sink(file=paste0(SAVE_LOC_RAW,"/README_MORTALITY_DATA.txt"),append=FALSE)
cat("Data files gathered manually from:\n")
cat("https://hdpulse.nimhd.nih.gov/data-portal/mortality/table?cod=247&cod_options=cod_15&ratetype=aa&ratetype_options=ratetype_2&race=00&race_options=race_6&sex=2&sex_options=sex_3&age=177&age_options=age_11&ruralurban=0&ruralurban_options=ruralurban_3&yeargroup=5&yeargroup_options=year5yearmort_1&statefips=56&statefips_options=area_states&county=56000&county_options=counties_wyoming&comparison=counties_to_us&comparison_options=comparison_counties&radio_comparison=areas&radio_comparison_options=cods_or_areas\n")
cat("\nEach file is single age group, so age weighting does not apply despite the variable names\n")
sink()
#Save a processed raw data readme
sink(file=paste0(SAVE_MORT_LOC,"/README_MORTALITY_DATA.txt"),append=FALSE)
cat("This is a processed file of NIH death rates by age and county. Data files first gathered manually from:\n")
cat("\nhttps://hdpulse.nimhd.nih.gov/data-portal/mortality/table?cod=247&cod_options=cod_15&ratetype=aa&ratetype_options=ratetype_2&race=00&race_options=race_6&sex=2&sex_options=sex_3&age=177&age_options=age_11&ruralurban=0&ruralurban_options=ruralurban_3&yeargroup=5&yeargroup_options=year5yearmort_1&statefips=56&statefips_options=area_states&county=56000&county_options=counties_wyoming&comparison=counties_to_us&comparison_options=comparison_counties&radio_comparison=areas&radio_comparison_options=cods_or_areas\n")
cat("\nThese manually saved files are in the raw data directory. Each file is single age group, so age weighting does not apply despite the variable names\n")
sink()

View File

@ -0,0 +1,44 @@
##Extract Lincoln county ACS data including separate indicators for if a census tract is in Kemmerer, or any other part of the county. Population by Age-Sex in each year for each tract.
GET_ACS_LIN_DATA <- function(ACS_YEAR,ACS_CODES=CODES,CENSUS_TRACTS=PROJ_TRACTS){
AGE_DATA <- get_acs(geography="tract",year=ACS_YEAR,variables=ACS_CODES$variable,state='WY',county='lincoln') %>% mutate(se=moe/1.64) %>% left_join(ACS_CODES %>% mutate(variable=gsub('E','',variable))) %>% select(-NAME,-moe) %>% left_join(CENSUS_TRACTS) %>% mutate(IN_KEM=ifelse(is.na(IN_KEM),0,1)) %>% rename(Population=estimate) %>% select(-variable,-GEOID) %>% select(Sex,Min_Age,Max_Age,Med_Age,IN_KEM,Population,se) %>% filter(!(Min_Age==0& Max_Age==Inf))
AGE_DATA <- AGE_DATA %>% group_by(Sex,Min_Age,Max_Age,Med_Age,IN_KEM)%>% summarize(Population=sum(Population)) %>% ungroup
#Add Descriptive age category to a clean graph
AGE_DATA$Ages <- paste(AGE_DATA$Min_Age,"to",AGE_DATA$Max_Age)
AGE_DATA[AGE_DATA$Max_Age==Inf,"Ages"] <- "85+"
AGE_DATA[AGE_DATA$Med_Age==18,"Ages"] <- "18"
AGE_DATA[AGE_DATA$Med_Age==20,"Ages"] <- "20"
AGE_DATA[AGE_DATA$Med_Age==21,"Ages"] <- "21"
AGE_DATA[AGE_DATA$Min_Age==0,"Ages"] <- "Under 5"
#Turn the ages into factors to keep the correct order in graphs
#Add the percent of total relative population in the region
ORD <- AGE_DATA %>% select(Min_Age,Ages) %>% unique %>% arrange(Min_Age) %>% pull(Ages) %>% unique
AGE_DATA$Ages <- factor(AGE_DATA$Ages,levels=ORD)
AGE_DATA$Year <- ACS_YEAR
AGE_DATA <-AGE_DATA %>% select(Year,Sex,Ages,IN_KEM,everything())
return(AGE_DATA)
}
####################Loop to create data for each year. Projecting distribution of ages into Kemmerer, and returning a demographic distribution for Kemmerer/Diamondville in each year of the ACS (currently 2009 to 2023)
MAKE_KEM_DEMO_DATA_YEAR <- function(ACS_YEAR){
LIN_DEMOGRAPHICS <- readRDS("Data/Cleaned_Data/Lincoln_Demographic_Data.Rds") %>% filter(Year==ACS_YEAR)
AGE_DATA <- GET_ACS_LIN_DATA(ACS_YEAR)
for(i in 1:nrow(AGE_DATA)){
if(i==1 & exists("RES")){rm(RES)}
C_DEMO <- AGE_DATA[i,]
C_SEX <- C_DEMO$Sex
if(C_DEMO$Min_Age==C_DEMO$Max_Age){C_AGE_YEARLY_DATA <- LIN_DEMOGRAPHICS %>% filter(Age==C_DEMO$Min_Age)}else{
C_AGE_YEARLY_DATA <- LIN_DEMOGRAPHICS %>% filter(Age>=C_DEMO$Min_Age,Age<=C_DEMO$Max_Age)
}
#Extract only the current sex being applied
if(C_SEX=='Female'){C_AGE_YEARLY_DATA <- C_AGE_YEARLY_DATA %>% select(Age,Num_Female) %>% rename(POP=Num_Female)}else{C_AGE_YEARLY_DATA <- C_AGE_YEARLY_DATA %>% select(Age,Num_Male) %>% rename(POP=Num_Male)}
C_AGE_YEARLY_DATA$Sex <- C_SEX
C_AGE_YEARLY_DATA$PER <- C_AGE_YEARLY_DATA$POP/sum(C_AGE_YEARLY_DATA$POP)
C_RES <- C_AGE_YEARLY_DATA%>% left_join(C_DEMO) %>% mutate(Population=PER*Population) %>% select(Age,Sex,IN_KEM,Population)
if(i==1){RES <- C_RES}else{RES <- rbind(RES,C_RES)}
}
RES$Year <- ACS_YEAR
RES$County <- "Lincoln"
RES <- RES %>% select(County,IN_KEM,Year,Age,Sex,Population)
return(RES)
}

View File

@ -1,9 +1,11 @@
#library(tidyverse);setwd("../")
library(tidycensus)
library(zipcodeR)
source("Scripts/Get_Sim_Intial_Demographic_Data.r")
if(!exists("SAVE_DEMO_LOC")){SAVE_DEMO_LOC <-"./Data/Cleaned_Data/Demographic_Sex_Age_Data"}
ACS_END_YEAR <- 2023 #most recent in package as of Nov 4 2025
#Pull the relevant median age variables the value moe (margine of error) can be converted to standard error, following the link below
#https://www.census.gov/content/dam/Census/library/publications/2018/acs/acs_general_handbook_2018_ch08.pdf
ACS_YEAR <- 2023 #most recent as of Nov 4 2025
source("Scripts/Downshift_Population_Functions.r")
#Packages to instal on computer if zipcodeR won't install
#Sudo apt install libssl-dev libudunits2-dev libabsl-dev libcurl4-openssl-dev libgdal-dev cmake libfontconfig1-dev libharfbuzz-dev libfribidi-dev
#install.packages("zipcodeR")
@ -13,28 +15,32 @@ source("Scripts/Downshift_Population_Functions.r")
#census_api_key(KEY, install = TRUE)
PROJ_TRACTS <- get_tracts(search_city('Kemmerer','WY')$zipcode) %>% full_join(get_tracts(search_city('Diamondville','WY')$zipcode))
PROJ_TRACTS <- PROJ_TRACTS %>% select(GEOID) %>% mutate('IN_KEM'=1) %>% mutate(GEOID=as.character(GEOID))
MED_AGE_VAR <- cbind(c('B01002_001E','B01002_002E','B01002_003E'),c('Median_Age','Median_Age_Male','Median_Age_Female')) %>% as_tibble %>% rename(variable=V1,Data_Type=V2)
###Load data manually created which links vairable names to sex-age census data
CODES <- read_csv("Data/API_CENSUS_CODES.csv",skip=1) %>% mutate(Med_Age=(Min_Age+Max_Age)/2) %>% rename(variable=Code)
#Testing age Comparison between the two
###Extract census data for all tracts in Lincoln county, clean up the data, and indicate if the tract is in Kemmerer/Diamondvile or not.
AGE_DATA <- get_acs(geography="tract",year=ACS_YEAR,variables=CODES$variable,state='WY',county='lincoln') %>% mutate(se=moe/1.64) %>% left_join(CODES %>% mutate(variable=gsub('E','',variable))) %>% select(-NAME,-moe) %>% left_join(PROJ_TRACTS) %>% mutate(IN_KEM=ifelse(is.na(IN_KEM),0,1)) %>% rename(Population=estimate) %>% select(-variable,-GEOID) %>% select(Sex,Min_Age,Max_Age,Med_Age,IN_KEM,Population,se) %>% filter(!(Min_Age==0& Max_Age==Inf))
AGE_DATA <- AGE_DATA %>% group_by(Sex,Min_Age,Max_Age,Med_Age,IN_KEM)%>% summarize(Population=sum(Population)) %>% ungroup
#Add Descriptive age category to a clean graph
AGE_DATA$Ages <- paste(AGE_DATA$Min_Age,"to",AGE_DATA$Max_Age)
AGE_DATA[AGE_DATA$Max_Age==Inf,"Ages"] <- "85+"
AGE_DATA[AGE_DATA$Med_Age==18,"Ages"] <- "18"
AGE_DATA[AGE_DATA$Med_Age==20,"Ages"] <- "20"
AGE_DATA[AGE_DATA$Med_Age==21,"Ages"] <- "21"
AGE_DATA[AGE_DATA$Min_Age==0,"Ages"] <- "Under 5"
#Turn the ages into factors to keep the correct order in graphs
#Add the percent of total relative population in the region
ORD <- AGE_DATA %>% select(Min_Age,Ages) %>% unique %>% arrange(Min_Age) %>% pull(Ages) %>% unique
AGE_DATA$Ages <- factor(AGE_DATA$Ages,levels=ORD)
AGE_DATA$Year <- ACS_YEAR
AGE_DATA <-AGE_DATA %>% select(Year,Sex,Ages,IN_KEM,everything())
if(!exists('ACS_FILE_LOC')){ACS_FILE_LOC <- "Data/Cleaned_Data/ACS_Census_Demographic_Data.Rds"}
saveRDS(AGE_DATA,ACS_FILE_LOC)
DEMO_DATA_ALL <- do.call(rbind,lapply(2009:ACS_END_YEAR,MAKE_KEM_DEMO_DATA_YEAR))
ORIG_DEMO_DATA_ALL <- DEMO_DATA_ALL
#Merge Age of 85+ into one group, because NIH death spastics end in this age category so it is extra overhead to keep each year
YOUNG <- DEMO_DATA_ALL %>% filter(Age<85)
OLD <- DEMO_DATA_ALL %>% filter(Age>=85) %>% group_by(County,IN_KEM,Year,Sex) %>% summarize(Age=85,Population=sum(Population)) %>% ungroup
DEMO_DATA_ALL <- rbind(YOUNG,OLD) %>% arrange(County,Year,IN_KEM,Sex)
DEMO_DATA_ALL <- DEMO_DATA_ALL %>% mutate(Population=round(Population)) %>% pivot_wider(values_from=Population,names_from=Sex,names_prefix="Num_")
OTHER_LIN_DEMO_DATA <- DEMO_DATA_ALL %>% filter(IN_KEM==0) %>% rename(Region=IN_KEM) %>% mutate(Region='Lincoln_Other')
KEM_DEMO_DATA <- DEMO_DATA_ALL %>% filter(IN_KEM==1) %>% rename(Region=IN_KEM) %>% mutate(Region='Kemmerer')
####Save results
CSV_SAVE <- paste0(SAVE_DEMO_LOC,"/CSV")
RDS_SAVE <- paste0(SAVE_DEMO_LOC,"/RDS")
dir.create(CSV_SAVE , recursive = TRUE, showWarnings = FALSE)
dir.create(RDS_SAVE , recursive = TRUE, showWarnings = FALSE)
#Kemmerer Save
saveRDS(KEM_DEMO_DATA,paste0(RDS_SAVE,"/Kemmerer_Diamondville_Demographics.Rds" ))
write_csv(KEM_DEMO_DATA,paste0(CSV_SAVE,"/Kemmerer_Diamondville_Demographics.csv" ))
#Other Lincoln area (not Kemmerer) save
saveRDS(OTHER_LIN_DEMO_DATA,paste0(RDS_SAVE,"/Other_Lincoln_Demographics.Rds" ))
write_csv(OTHER_LIN_DEMO_DATA,paste0(CSV_SAVE,"/Other_Lincoln_Demographics.csv" ))