diff --git a/Scripts/1D_Use_ACS_Census_Data_to_Estimate_Kemmerer_Demographics.r b/Scripts/1D_Use_ACS_Census_Data_to_Estimate_Kemmerer_Demographics.r index 093f51e..a974a04 100644 --- a/Scripts/1D_Use_ACS_Census_Data_to_Estimate_Kemmerer_Demographics.r +++ b/Scripts/1D_Use_ACS_Census_Data_to_Estimate_Kemmerer_Demographics.r @@ -14,19 +14,14 @@ ACS_END_YEAR <- 2023 #most recent in package as of Nov 4 2025 #Add API key if missing #KEY <- '30e13ab22563318ff59286e433099f4174d4edd4' #census_api_key(KEY, install = TRUE) -PROJ_TRACTS <- get_tracts(search_city('Kemmerer','WY')$zipcode)# %>% full_join(get_tracts(search_city('Diamondville','WY')$zipcode)) +PROJ_TRACTS <- get_tracts(search_city('Kemmerer','WY')$zipcode) %>% full_join(get_tracts(search_city('Diamondville','WY')$zipcode)) + #Manual search of GEOID finds that 56023978200 is not Kemmerer, search_county('Lincoln','Wy')$zipcode + #https://www.geocod.io/geoids/wyoming/lincoln-county-56023/978400/ is Kemmerer + #https://www.geocod.io/geoids/wyoming/lincoln-county-56023/978200/ is mostly other areas +PROJ_TRACTS <- PROJ_TRACTS %>% select(-ZCTA5) %>% filter(GEOID!=56023978200) %>% unique PROJ_TRACTS <- PROJ_TRACTS %>% select(GEOID) %>% mutate('IN_KEM'=1) %>% mutate(GEOID=as.character(GEOID)) ###Load data manually created which links vairable names to sex-age census data CODES <- read_csv("./Data/Raw_Data/ACS_Demographics/API_CENSUS_CODES.csv",skip=1) %>% mutate(Med_Age=(Min_Age+Max_Age)/2) %>% rename(variable=Code) -CODES %>% filter(Min_Age==0,Max_Age==Inf) -TEST_CODES <- c('B01001_002E','B01001_026E') #All men and all woment -TEMP <- -TEMP_COUNTY <- get_acs(geography="tract",variables=TEST_CODES,state='WY',county='lincoln') -TEMP_COUNTY$estimate %>% sum -search_city('Kemmerer','WY') -TEMP -(TEMP %>% inner_join(PROJ_TRACTS) %>% pull(estimate) %>% sum)/3104 -TEST <- MAKE_KEM_DEMO_DATA_YEAR(2020) #Testing age Comparison between the two ###Extract census data for all tracts in Lincoln county, clean up the data, and indicate if the tract is in Kemmerer/Diamondvile or not. DEMO_DATA_ALL <- do.call(rbind,lapply(2009:ACS_END_YEAR,MAKE_KEM_DEMO_DATA_YEAR)) @@ -38,6 +33,16 @@ DEMO_DATA_ALL <- rbind(YOUNG,OLD) %>% arrange(County,Year,IN_KEM,Sex) DEMO_DATA_ALL <- DEMO_DATA_ALL %>% mutate(Population=round(Population)) %>% pivot_wider(values_from=Population,names_from=Sex,names_prefix="Num_") OTHER_LIN_DEMO_DATA <- DEMO_DATA_ALL %>% filter(IN_KEM==0) %>% rename(Region=IN_KEM) %>% mutate(Region='Lincoln_Other') KEM_DEMO_DATA <- DEMO_DATA_ALL %>% filter(IN_KEM==1) %>% rename(Region=IN_KEM) %>% mutate(Region='Kemmerer & Diamondville') +#Ajust the populations to match the total population stastics from the other data sources, since the tracts may spill into other areas +POST_ADJUST_DATA <- KEM_DEMO_DATA %>% group_by(Year) %>% summarize(Kem_Demo_Population=sum(Num_Female)+sum(Num_Male)) %>% left_join(OTHER_LIN_DEMO_DATA %>% group_by(Year) %>% summarize(Other_Demo_Population=sum(Num_Female)+sum(Num_Male))) %>% mutate(Total_Lincoln_Demo_Population=Kem_Demo_Population+Other_Demo_Population) +DIRECT_POP <- readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_County_Populations.Rds") %>% filter(County=='Lincoln') %>% select(Year,Lin_Direct_Population=Population) %>% full_join(readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_City_Populations.Rds") %>% filter(City %in% c('Kemmerer','Diamondville')) %>% group_by(Year) %>% summarize(Kem_Direct_Population=sum(Population,na.rm=TRUE))) %>% mutate(Other_Direct_Population=Lin_Direct_Population-Kem_Direct_Population) +ADJUST_TABLE <- DIRECT_POP %>% inner_join(POST_ADJUST_DATA) %>% mutate(KEM_ADJ=Kem_Direct_Population/Kem_Demo_Population,OTHER_ADJ=Lin_Direct_Population/Other_Demo_Population,LIN_ADJ=Lin_Direct_Population/Total_Lincoln_Demo_Population) %>% select(Year,LIN_ADJ,KEM_ADJ,OTHER_ADJ) +KEM_DEMO_DATA <- KEM_DEMO_DATA %>% left_join(ADJUST_TABLE %>% select(Year,KEM_ADJ)) %>% mutate(Num_Female=round(KEM_ADJ*Num_Female),Num_Male=round(KEM_ADJ*Num_Male)) %>% select(-KEM_ADJ) +OTHER_LIN_DEMO_DATA <- OTHER_LIN_DEMO_DATA%>% left_join(ADJUST_TABLE %>% select(Year,OTHER_ADJ)) %>% mutate(Num_Female=round(OTHER_ADJ*Num_Female),Num_Male=round(OTHER_ADJ*Num_Male)) %>% select(-OTHER_ADJ) + + + + #Find the most recent data year MAX_YEAR <- max(KEM_DEMO_DATA$Year) KEM_DEMO_MAT <- KEM_DEMO_DATA %>% filter(Year==MAX_YEAR) %>% select(Num_Male,Num_Female) %>% as.matrix @@ -51,6 +56,10 @@ OTHER_LIN_DEM_OLD_MAT <- OTHER_LIN_DEMO_DATA%>% filter(Year==MED_YEAR) %>% selec rownames(OTHER_LIN_DEM_OLD_MAT ) <- 0:85 + + + + ####Save results CSV_SAVE <- paste0(SAVE_DEMO_LOC,"/CSV") RDS_SAVE <- paste0(SAVE_DEMO_LOC,"/RDS") diff --git a/Scripts/3A_Population_Pyramid.r b/Scripts/3A_Population_Pyramid.r index 623827a..9645ec4 100644 --- a/Scripts/3A_Population_Pyramid.r +++ b/Scripts/3A_Population_Pyramid.r @@ -52,6 +52,7 @@ png(paste0(SAVE_PY_LOC,"Kemmerer_2009_to_2023_Age_Population_Pyramid.png"), res dev.off() + POP_DATA <- PY_KEM_SHIFT %>% pivot_wider(values_from=`Percent of Population`,names_from=Year,names_prefix="Year_") %>% group_by(Age) %>% summarize(Year_2023=abs(sum(Year_2023,na.rm=TRUE)),Year_2009=abs(sum(Year_2009,na.rm=TRUE)),Shift=Year_2023-Year_2009) #Make clean labels of Lollipop graph POP_RANGE <- c(pretty(range(POP_DATA$Shift),n=8)) @@ -66,47 +67,25 @@ dev.off() png(paste0(SAVE_PY_LOC,"Other_Lincoln_County_2009_to_2023_Age_Changes_Lollipop.png"), res = 600, width = 8.27, height = 11, units = "in") ggplot(POP_OTHER_DATA,aes(y=Age,x=Shift))+ geom_segment( aes(x=0, xend=Shift, y=Age, yend=Age),size=1.5, color="darkgrey")+geom_point( color="firebrick3", size=3.5) + theme_bw()+ theme(axis.text = element_text(size = 10),legend.position = "top",legend.text=element_text(size=14),legend.title = element_blank(),axis.title=element_text(size=18),strip.text = element_text(size = 14))+scale_y_discrete(breaks=c(0,seq(5,80,by=5),"+85"))+ scale_fill_manual(values = c("indianred2", "cornflowerblue", "magenta", "yellow"))+xlab("Share of Population Change (2009 to 2023)")+ xlim(-0.02, 0.02)+scale_x_continuous(breaks = POP_RANGE,labels = POP_LABEL,limits=c(-0.020,0.020)) dev.off() - +####Make a facet plot of trends of major population groups GRAPH_DATA <- rbind(readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Kemmerer_Diamondville_Demographics.Rds"),readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Other_Lincoln_Demographics.Rds")) AVG_AGE <- GRAPH_DATA %>% mutate(Population=Num_Female+Num_Male) %>% group_by(Region,Year) %>% summarize(Average_Age=sum(Age*Population)/sum(Population)) %>% ungroup -NUM_CHILDREN <- GRAPH_DATA %>% mutate(Population=Num_Female+Num_Male) %>% filter(Age<=18) %>% group_by(Region,Year) %>% summarize(Children=sum(Population)) %>% ungroup +NUM_CHILDREN <- GRAPH_DATA %>% mutate(Population=Num_Female+Num_Male) %>% filter(Age<=18) %>% group_by(Region,Year) %>% summarize('0-18'=sum(Population)) %>% ungroup NUM_ADULT <- GRAPH_DATA %>% mutate(Population=Num_Female+Num_Male) %>% filter(Age>=18,Age<31) %>% group_by(Region,Year) %>% summarize('18-30'=sum(Population)) %>% ungroup NUM_WORKING_ADULT <- GRAPH_DATA %>% mutate(Population=Num_Female+Num_Male) %>% filter(Age>=31,Age<55) %>% group_by(Region,Year) %>% summarize('31-54'=sum(Population)) %>% ungroup NUM_RETIRED <- GRAPH_DATA %>% mutate(Population=Num_Female+Num_Male) %>% filter(Age>=55) %>% group_by(Region,Year) %>% summarize('55+'=sum(Population)) %>% ungroup - - - MEDIAN_AGE <- GRAPH_DATA %>% mutate(Population=Num_Female+Num_Male) %>% group_by(Region,Year) %>% mutate(ROLLSUM=cumsum(Population),MID_POINT=ROLLSUM>=sum(Population)/2) %>% filter(MID_POINT) %>% filter(Age==min(Age)) %>% select(County,Region,Med_Age=Age) %>% ungroup + GRAPH_DATA <- AVG_AGE %>% left_join(MEDIAN_AGE) %>% left_join(NUM_CHILDREN ) %>% left_join(NUM_ADULT ) %>% left_join(NUM_WORKING_ADULT)%>% left_join(NUM_RETIRED) -GRAPH_DATA <- GRAPH_DATA %>% pivot_longer(cols=c('Children','18-30','31-54','55+'),names_to='Age Category',values_to='Population') -readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_City_Populations.Rds") %>% pull(City) %>% unique -readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_County_Populations.Rds") - -readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Kemmerer_Diamondville_Demographics.Rds") %>% group_by(Year) %>% summarize(Demo_Population=sum(Num_Male)+sum(Num_Female)) %>% left_join(readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_City_Populations.Rds") %>% filter(City=='Kemmerer'| City=='Diamondville') %>% group_by(Year) %>% summarize(City_Population=sum(Population))) %>% mutate(Demo_Population/City_Population) - -readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Full_Lincoln_County_Demographics.Rds") %>% group_by(Year)%>% summarize(Demo_Population=sum(Num_Male)+sum(Num_Female)) %>% left_join(readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_County_Populations.Rds") %>% filter(County=='Lincoln') %>% group_by(Year) %>% summarize(County_Population=sum(Population))) %>% tail -%>% mutate(Demo_Population/City_Population - -getwd() -%>% group_by(Year) %>% summarize(Demo_Population=sum(Num_Male)+sum(Num_Female)) - -%>% filter(County='Lincoln') %>% group_by(Year) %>% summarize(Population=sum(Population)) -GRAPH_DATA -GRAPH_DATA %>% filter(Region=='Kemmerer & Diamondville') %>% group_by(Year) %>% summarize(POP_DEMO=sum(Population)) %>% left_join(TEST) - -rbind(readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Kemmerer_Diamondville_Demographics.Rds")) %>% mutate(Population=Num_Female+Num_Male) %>% group_by(Year) %>% summarize(Pop_Demo=sum(Population)) %>% left_join(TEST) - -GRAPH_DATA - -ggplot(GRAPH_DATA,aes(x=Year,y=Population,color=Region)) +facet_grid( ~`Age Category`)+geom_line() -ggplot(GRAPH_DATA,aes(x=Year,color=Region)) + geom_line(aes(y=Average_Age),linetype=1,size=1.5) -ggplot(GRAPH_DATA,aes(x=Year,color=Region)) + geom_line(aes(y=Children),linetype=1,size=1.5)+ geom_line(aes(y=`18-30`),linetype=2,size=1.5)+ geom_line(aes(y=`31-54`),linetype=3,size=1.5)+ geom_line(aes(y=`55+`),linetype=4,size=1.5) +GRAPH_DATA <- GRAPH_DATA %>% pivot_longer(cols=c('0-18','18-30','31-54','55+'),names_to='Age Category',values_to='Population') +GRAPH_DATA <- GRAPH_DATA %>% mutate(Region=ifelse(Region=='Lincoln_Other','Rest of Lincoln County',Region)) +png(paste0(SAVE_PY_LOC,"Kemmerer_Lincoln_Population_Trends.png"), res = 600, height = 6, width=7, units = "in") + ggplot(GRAPH_DATA,aes(x=Year,y=Population,color=Region)) +facet_wrap( ~`Age Category`)+geom_line(size=1)+theme(axis.text = element_text(size = 10),legend.position = "top",legend.text=element_text(size=14),legend.title = element_blank(),axis.title=element_text(size=18),strip.text = element_text(size = 14)) +dev.off() - - - - -PY_DATA <- GRAPH_DATA %>% pivot_longer(cols=c("Num_Female","Num_Male"),names_to="Sex",values_to="Population") %>% mutate(Sex=ifelse(Sex=="Num_Female","Female","Male")) +png(paste0(SAVE_PY_LOC,"Kemmerer_Population_Trends.png"), res = 600, height = 7, width=5, units = "in") + ggplot(GRAPH_DATA %>% filter(Region=='Kemmerer & Diamondville'),aes(x=Year,y=Population)) +facet_wrap( ~`Age Category`,ncol=1)+geom_line(size=1,color='red')+theme(axis.text = element_text(size = 10),legend.position = "top",legend.text=element_text(size=14),legend.title = element_blank(),axis.title=element_text(size=18),strip.text = element_text(size = 14)) +dev.off()