Population_Study/Scripts/Get_Sim_Intial_Demographic_Data.r
2025-11-05 16:20:20 -07:00

45 lines
2.9 KiB
R

##Extract Lincoln county ACS data including separate indicators for if a census tract is in Kemmerer, or any other part of the county. Population by Age-Sex in each year for each tract.
GET_ACS_LIN_DATA <- function(ACS_YEAR,ACS_CODES=CODES,CENSUS_TRACTS=PROJ_TRACTS){
AGE_DATA <- get_acs(geography="tract",year=ACS_YEAR,variables=ACS_CODES$variable,state='WY',county='lincoln') %>% mutate(se=moe/1.64) %>% left_join(ACS_CODES %>% mutate(variable=gsub('E','',variable))) %>% select(-NAME,-moe) %>% left_join(CENSUS_TRACTS) %>% mutate(IN_KEM=ifelse(is.na(IN_KEM),0,1)) %>% rename(Population=estimate) %>% select(-variable,-GEOID) %>% select(Sex,Min_Age,Max_Age,Med_Age,IN_KEM,Population,se) %>% filter(!(Min_Age==0& Max_Age==Inf))
AGE_DATA <- AGE_DATA %>% group_by(Sex,Min_Age,Max_Age,Med_Age,IN_KEM)%>% summarize(Population=sum(Population)) %>% ungroup
#Add Descriptive age category to a clean graph
AGE_DATA$Ages <- paste(AGE_DATA$Min_Age,"to",AGE_DATA$Max_Age)
AGE_DATA[AGE_DATA$Max_Age==Inf,"Ages"] <- "85+"
AGE_DATA[AGE_DATA$Med_Age==18,"Ages"] <- "18"
AGE_DATA[AGE_DATA$Med_Age==20,"Ages"] <- "20"
AGE_DATA[AGE_DATA$Med_Age==21,"Ages"] <- "21"
AGE_DATA[AGE_DATA$Min_Age==0,"Ages"] <- "Under 5"
#Turn the ages into factors to keep the correct order in graphs
#Add the percent of total relative population in the region
ORD <- AGE_DATA %>% select(Min_Age,Ages) %>% unique %>% arrange(Min_Age) %>% pull(Ages) %>% unique
AGE_DATA$Ages <- factor(AGE_DATA$Ages,levels=ORD)
AGE_DATA$Year <- ACS_YEAR
AGE_DATA <-AGE_DATA %>% select(Year,Sex,Ages,IN_KEM,everything())
return(AGE_DATA)
}
####################Loop to create data for each year. Projecting distribution of ages into Kemmerer, and returning a demographic distribution for Kemmerer/Diamondville in each year of the ACS (currently 2009 to 2023)
MAKE_KEM_DEMO_DATA_YEAR <- function(ACS_YEAR){
LIN_DEMOGRAPHICS <- readRDS("Data/Cleaned_Data/Lincoln_Demographic_Data.Rds") %>% filter(Year==ACS_YEAR)
AGE_DATA <- GET_ACS_LIN_DATA(ACS_YEAR)
for(i in 1:nrow(AGE_DATA)){
if(i==1 & exists("RES")){rm(RES)}
C_DEMO <- AGE_DATA[i,]
C_SEX <- C_DEMO$Sex
if(C_DEMO$Min_Age==C_DEMO$Max_Age){C_AGE_YEARLY_DATA <- LIN_DEMOGRAPHICS %>% filter(Age==C_DEMO$Min_Age)}else{
C_AGE_YEARLY_DATA <- LIN_DEMOGRAPHICS %>% filter(Age>=C_DEMO$Min_Age,Age<=C_DEMO$Max_Age)
}
#Extract only the current sex being applied
if(C_SEX=='Female'){C_AGE_YEARLY_DATA <- C_AGE_YEARLY_DATA %>% select(Age,Num_Female) %>% rename(POP=Num_Female)}else{C_AGE_YEARLY_DATA <- C_AGE_YEARLY_DATA %>% select(Age,Num_Male) %>% rename(POP=Num_Male)}
C_AGE_YEARLY_DATA$Sex <- C_SEX
C_AGE_YEARLY_DATA$PER <- C_AGE_YEARLY_DATA$POP/sum(C_AGE_YEARLY_DATA$POP)
C_RES <- C_AGE_YEARLY_DATA%>% left_join(C_DEMO) %>% mutate(Population=PER*Population) %>% select(Age,Sex,IN_KEM,Population)
if(i==1){RES <- C_RES}else{RES <- rbind(RES,C_RES)}
}
RES$Year <- ACS_YEAR
RES$County <- "Lincoln"
RES <- RES %>% select(County,IN_KEM,Year,Age,Sex,Population)
return(RES)
}