Population_Study/Scripts/Load_ACS_Census_Data.r
2025-11-05 16:20:20 -07:00

47 lines
2.9 KiB
R

#library(tidyverse);setwd("../")
library(tidycensus)
library(zipcodeR)
source("Scripts/Get_Sim_Intial_Demographic_Data.r")
if(!exists("SAVE_DEMO_LOC")){SAVE_DEMO_LOC <-"./Data/Cleaned_Data/Demographic_Sex_Age_Data"}
ACS_END_YEAR <- 2023 #most recent in package as of Nov 4 2025
#Pull the relevant median age variables the value moe (margine of error) can be converted to standard error, following the link below
#https://www.census.gov/content/dam/Census/library/publications/2018/acs/acs_general_handbook_2018_ch08.pdf
#Packages to instal on computer if zipcodeR won't install
#Sudo apt install libssl-dev libudunits2-dev libabsl-dev libcurl4-openssl-dev libgdal-dev cmake libfontconfig1-dev libharfbuzz-dev libfribidi-dev
#install.packages("zipcodeR")
#Add API key if missing
#KEY <- '30e13ab22563318ff59286e433099f4174d4edd4'
#census_api_key(KEY, install = TRUE)
PROJ_TRACTS <- get_tracts(search_city('Kemmerer','WY')$zipcode) %>% full_join(get_tracts(search_city('Diamondville','WY')$zipcode))
PROJ_TRACTS <- PROJ_TRACTS %>% select(GEOID) %>% mutate('IN_KEM'=1) %>% mutate(GEOID=as.character(GEOID))
###Load data manually created which links vairable names to sex-age census data
CODES <- read_csv("Data/API_CENSUS_CODES.csv",skip=1) %>% mutate(Med_Age=(Min_Age+Max_Age)/2) %>% rename(variable=Code)
#Testing age Comparison between the two
###Extract census data for all tracts in Lincoln county, clean up the data, and indicate if the tract is in Kemmerer/Diamondvile or not.
DEMO_DATA_ALL <- do.call(rbind,lapply(2009:ACS_END_YEAR,MAKE_KEM_DEMO_DATA_YEAR))
ORIG_DEMO_DATA_ALL <- DEMO_DATA_ALL
#Merge Age of 85+ into one group, because NIH death spastics end in this age category so it is extra overhead to keep each year
YOUNG <- DEMO_DATA_ALL %>% filter(Age<85)
OLD <- DEMO_DATA_ALL %>% filter(Age>=85) %>% group_by(County,IN_KEM,Year,Sex) %>% summarize(Age=85,Population=sum(Population)) %>% ungroup
DEMO_DATA_ALL <- rbind(YOUNG,OLD) %>% arrange(County,Year,IN_KEM,Sex)
DEMO_DATA_ALL <- DEMO_DATA_ALL %>% mutate(Population=round(Population)) %>% pivot_wider(values_from=Population,names_from=Sex,names_prefix="Num_")
OTHER_LIN_DEMO_DATA <- DEMO_DATA_ALL %>% filter(IN_KEM==0) %>% rename(Region=IN_KEM) %>% mutate(Region='Lincoln_Other')
KEM_DEMO_DATA <- DEMO_DATA_ALL %>% filter(IN_KEM==1) %>% rename(Region=IN_KEM) %>% mutate(Region='Kemmerer')
####Save results
CSV_SAVE <- paste0(SAVE_DEMO_LOC,"/CSV")
RDS_SAVE <- paste0(SAVE_DEMO_LOC,"/RDS")
dir.create(CSV_SAVE , recursive = TRUE, showWarnings = FALSE)
dir.create(RDS_SAVE , recursive = TRUE, showWarnings = FALSE)
#Kemmerer Save
saveRDS(KEM_DEMO_DATA,paste0(RDS_SAVE,"/Kemmerer_Diamondville_Demographics.Rds" ))
write_csv(KEM_DEMO_DATA,paste0(CSV_SAVE,"/Kemmerer_Diamondville_Demographics.csv" ))
#Other Lincoln area (not Kemmerer) save
saveRDS(OTHER_LIN_DEMO_DATA,paste0(RDS_SAVE,"/Other_Lincoln_Demographics.Rds" ))
write_csv(OTHER_LIN_DEMO_DATA,paste0(CSV_SAVE,"/Other_Lincoln_Demographics.csv" ))