Population_Study/Scripts/3A_Population_Pyramid.r
2025-12-17 16:07:54 -07:00

92 lines
10 KiB
R

llibrary(tidyverse)
library(scales) #For a pretty population Pyramid
PY_DATA <- rbind(readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Kemmerer_Diamondville_Demographics.Rds"),readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Other_Lincoln_Demographics.Rds"))
PY_DATA <- PY_DATA %>% pivot_longer(cols=c("Num_Female","Num_Male"),names_to="Sex",values_to="Population") %>% mutate(Sex=ifelse(Sex=="Num_Female","Female","Male"))
PY_DATA <- PY_DATA %>% group_by(Year,Region) %>% mutate('Percent of Population'=Population/sum(Population)) %>% ungroup
PY_DATA <- PY_DATA %>% mutate(Age_Numeric=Age,Age=as.character(Age))
PY_DATA[PY_DATA$Age=='85',"Age"] <- "+85"
ORD <- PY_DATA[,c("Age_Numeric","Age")] %>% unique %>% arrange(Age_Numeric) %>% pull(Age)
PY_DATA$Age <- factor(PY_DATA$Age,levels=ORD)
PY_DATA <- PY_DATA %>% mutate(`Percent of Population`=ifelse(Region=='Kemmerer & Diamondville',`Percent of Population`,-`Percent of Population`))
PY_DATA$Region <- ifelse(PY_DATA$Region=='Lincoln_Other','Rest of Lincoln County',PY_DATA$Region)
PY_DATA_NO_SEX <- PY_DATA %>% group_by(County,Region,Year,Age,Age_Numeric) %>% summarize(Population=sum(Population)) %>% ungroup %>% group_by(Region,Year) %>% mutate('Percent of Population'=Population/sum(Population)) %>% ungroup
PY_DATA_NO_SEX <- PY_DATA_NO_SEX %>% mutate(`Percent of Population`=ifelse(Region=='Kemmerer & Diamondville',`Percent of Population`,-`Percent of Population`))
PY_2023 <- PY_DATA %>% filter(Year==2023)
PY_2023_KEM <- PY_DATA %>% filter(Year==2023,Region=='Kemmerer & Diamondville') %>% mutate(`Percent of Population`=ifelse(Sex=='Male',`Percent of Population`,-`Percent of Population`))
PY_NO_SEX_2023 <- PY_DATA_NO_SEX %>% filter(Year==2023)
PY_NO_SEX_2009 <- PY_DATA_NO_SEX %>% filter(Year==2009)
PY_KEM_SHIFT <- rbind(PY_NO_SEX_2023,PY_NO_SEX_2009) %>% filter(Region=='Kemmerer & Diamondville') %>% mutate(Year=factor(Year,levels=c(2009,2023)),`Percent of Population`=ifelse(Year==2023,`Percent of Population`,-`Percent of Population`))
PY_KEM_SHIFT <- rbind(PY_NO_SEX_2023,PY_NO_SEX_2009) %>% filter(Region=='Kemmerer & Diamondville') %>% mutate(Year=factor(Year,levels=c(2009,2023)),`Percent of Population`=ifelse(Year==2023,`Percent of Population`,-`Percent of Population`))
PY_OTHER_SHIFT <- rbind(PY_NO_SEX_2023,PY_NO_SEX_2009) %>% filter(Region!='Kemmerer & Diamondville') %>% mutate(Year=factor(Year,levels=c(2009,2023)),`Percent of Population`=ifelse(Year==2023,`Percent of Population`,-`Percent of Population`))
RANGE <- c(pretty(range(PY_2023$`Percent of Population`),n=6),0.02)
LAB <- percent(abs(RANGE),accuracy=0.1 )
RANGE_NO_SEX <- c(pretty(range(PY_NO_SEX_2023$`Percent of Population`),n=8))
LAB_NO_SEX <- percent(abs(RANGE_NO_SEX),accuracy=0.1 )
if(!exists("SAVE_PY_LOC")){SAVE_PY_LOC <- "./Results/Population_Pyramids/"}
dir.create(SAVE_PY_LOC , recursive = TRUE, showWarnings = FALSE)
png(paste0(SAVE_PY_LOC,"Kemmerer_Lincoln_Age_by_Sex_2023_Population_Pyramid.png"), res = 600, width = 8.27, height = 11, units = "in")
ggplot(PY_2023,aes(y=Age,x=`Percent of Population`,fill=Region))+geom_col()+scale_fill_manual(values=c('cadetblue','darkred')) +scale_x_continuous(breaks = RANGE,labels = LAB,limits=c(-0.02,0.02))+facet_grid(~Sex)+ylab("Age")+ theme_bw()+ theme(axis.text = element_text(size = 10),legend.position = "top",legend.text=element_text(size=14),legend.title = element_blank(),axis.title=element_text(size=18),strip.text = element_text(size = 14))+scale_y_discrete(breaks=c(0,seq(5,80,by=5),"+85"))
dev.off()
png(paste0(SAVE_PY_LOC,"Kemmerer_Age_by_Sex_2023_Population_Pyramid.png"), res = 600, width = 8.27, height = 11, units = "in")
ggplot(PY_2023_KEM ,aes(y=Age,x=`Percent of Population`,fill=Sex))+geom_col() +scale_x_continuous(breaks = RANGE,labels = LAB,limits=c(-0.015,0.015))+ theme_bw() + theme(axis.text = element_text(size = 10),legend.position = "top",legend.text=element_text(size=14),legend.title = element_blank(),axis.title=element_text(size=18),strip.text = element_text(size = 14))+ scale_fill_discrete(guide = guide_legend(reverse = TRUE))+ scale_fill_manual(values = c( "mediumpurple2","aquamarine3"))+scale_y_discrete(breaks=c(0,seq(5,80,by=5),"+85"))
dev.off()
png(paste0(SAVE_PY_LOC,"Kemmerer_Lincoln_Age_2023_Population_Pyramid.png"), res = 600, width = 8.27, height = 11, units = "in")
ggplot(PY_NO_SEX_2023,aes(y=Age,x=`Percent of Population`,fill=Region))+geom_col() +scale_x_continuous(breaks = RANGE_NO_SEX,labels = LAB_NO_SEX)+ theme_bw()+ theme(axis.text = element_text(size = 10),legend.position = "top",legend.text=element_text(size=14),legend.title = element_blank(),axis.title=element_text(size=18),strip.text = element_text(size = 14))+ guides(fill= guide_legend(reverse = TRUE))+scale_x_continuous(breaks = RANGE_NO_SEX,labels = LAB_NO_SEX,limits=c(-0.025,0.025))+scale_y_discrete(breaks=c(0,seq(5,80,by=5),"+85"))+scale_fill_manual(values=c("cadetblue","darkred"))
dev.off()
png(paste0(SAVE_PY_LOC,"Kemmerer_2009_to_2023_Age_Population_Pyramid.png"), res = 600, width = 8.27, height = 11, units = "in")
ggplot(PY_KEM_SHIFT,aes(y=Age,x=`Percent of Population`,fill=Year))+geom_col() + theme_bw()+ theme(axis.text = element_text(size = 10),legend.position = "top",legend.text=element_text(size=14),legend.title = element_blank(),axis.title=element_text(size=18),strip.text = element_text(size = 14))+scale_y_discrete(breaks=c(0,seq(5,80,by=5),"+85"))+ scale_fill_manual(values = c("indianred2", "cornflowerblue", "magenta", "yellow"))+scale_x_continuous(breaks = RANGE_NO_SEX,labels = LAB_NO_SEX,limits=c(-0.025,0.025))
dev.off()
POP_DATA <- PY_KEM_SHIFT %>% pivot_wider(values_from=`Percent of Population`,names_from=Year,names_prefix="Year_") %>% group_by(Age) %>% summarize(Year_2023=abs(sum(Year_2023,na.rm=TRUE)),Year_2009=abs(sum(Year_2009,na.rm=TRUE)),Shift=Year_2023-Year_2009)
#Make clean labels of Lollipop graph
POP_RANGE <- c(pretty(range(POP_DATA$Shift),n=8))
POP_LABEL <- percent(abs(POP_RANGE),accuracy=0.1 )
POP_OTHER_DATA <- PY_OTHER_SHIFT %>% pivot_wider(values_from=`Percent of Population`,names_from=Year,names_prefix="Year_") %>% group_by(Age) %>% summarize(Year_2023=abs(sum(Year_2023,na.rm=TRUE)),Year_2009=abs(sum(Year_2009,na.rm=TRUE)),Shift=Year_2023-Year_2009)
png(paste0(SAVE_PY_LOC,"Kemmerer_2009_to_2023_Age_Changes_Lollipop.png"), res = 600, width = 8.27, height = 11, units = "in")
ggplot(POP_DATA,aes(y=Age,x=Shift))+ geom_segment( aes(x=0, xend=Shift, y=Age, yend=Age),size=1.5, color="darkgrey")+geom_point( color="darkorange2", size=3.5) + theme_bw()+ theme(axis.text = element_text(size = 10),legend.position = "top",legend.text=element_text(size=14),legend.title = element_blank(),axis.title=element_text(size=18),strip.text = element_text(size = 14))+scale_y_discrete(breaks=c(0,seq(5,80,by=5),"+85"))+ scale_fill_manual(values = c("indianred2", "cornflowerblue", "magenta", "yellow"))+xlab("Share of Population Change (2009 to 2023)")+ scale_x_continuous(breaks = POP_RANGE,labels = POP_LABEL,limits=c(-0.020,0.020))
dev.off()
png(paste0(SAVE_PY_LOC,"Other_Lincoln_County_2009_to_2023_Age_Changes_Lollipop.png"), res = 600, width = 8.27, height = 11, units = "in")
ggplot(POP_OTHER_DATA,aes(y=Age,x=Shift))+ geom_segment( aes(x=0, xend=Shift, y=Age, yend=Age),size=1.5, color="darkgrey")+geom_point( color="firebrick3", size=3.5) + theme_bw()+ theme(axis.text = element_text(size = 10),legend.position = "top",legend.text=element_text(size=14),legend.title = element_blank(),axis.title=element_text(size=18),strip.text = element_text(size = 14))+scale_y_discrete(breaks=c(0,seq(5,80,by=5),"+85"))+ scale_fill_manual(values = c("indianred2", "cornflowerblue", "magenta", "yellow"))+xlab("Share of Population Change (2009 to 2023)")+ xlim(-0.02, 0.02)+scale_x_continuous(breaks = POP_RANGE,labels = POP_LABEL,limits=c(-0.020,0.020))
dev.off()
####Make a facet plot of trends of major population groups
GRAPH_DATA <- rbind(readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Kemmerer_Diamondville_Demographics.Rds"),readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Other_Lincoln_Demographics.Rds"))
AVG_AGE <- GRAPH_DATA %>% mutate(Population=Num_Female+Num_Male) %>% group_by(Region,Year) %>% summarize(Average_Age=sum(Age*Population)/sum(Population)) %>% ungroup
NUM_CHILDREN <- GRAPH_DATA %>% mutate(Population=Num_Female+Num_Male) %>% filter(Age<=18) %>% group_by(Region,Year) %>% summarize('0-18'=sum(Population)) %>% ungroup
NUM_ADULT <- GRAPH_DATA %>% mutate(Population=Num_Female+Num_Male) %>% filter(Age>=18,Age<31) %>% group_by(Region,Year) %>% summarize('18-30'=sum(Population)) %>% ungroup
NUM_WORKING_ADULT <- GRAPH_DATA %>% mutate(Population=Num_Female+Num_Male) %>% filter(Age>=31,Age<55) %>% group_by(Region,Year) %>% summarize('31-54'=sum(Population)) %>% ungroup
NUM_RETIRED <- GRAPH_DATA %>% mutate(Population=Num_Female+Num_Male) %>% filter(Age>=55) %>% group_by(Region,Year) %>% summarize('55+'=sum(Population)) %>% ungroup
MEDIAN_AGE <- GRAPH_DATA %>% mutate(Population=Num_Female+Num_Male) %>% group_by(Region,Year) %>% mutate(ROLLSUM=cumsum(Population),MID_POINT=ROLLSUM>=sum(Population)/2) %>% filter(MID_POINT) %>% filter(Age==min(Age)) %>% select(County,Region,Med_Age=Age) %>% ungroup
GRAPH_DATA <- AVG_AGE %>% left_join(MEDIAN_AGE) %>% left_join(NUM_CHILDREN ) %>% left_join(NUM_ADULT ) %>% left_join(NUM_WORKING_ADULT)%>% left_join(NUM_RETIRED)
GRAPH_DATA <- GRAPH_DATA %>% pivot_longer(cols=c('0-18','18-30','31-54','55+'),names_to='Age Category',values_to='Population')
GRAPH_DATA <- GRAPH_DATA %>% mutate(Region=ifelse(Region=='Lincoln_Other','Rest of Lincoln County',Region))
png(paste0(SAVE_PY_LOC,"Kemmerer_Lincoln_Population_Trends.png"), res = 600, height = 6, width=7, units = "in")
ggplot(GRAPH_DATA,aes(x=Year,y=Population,color=Region)) +facet_wrap( ~`Age Category`)+geom_line(size=1)+theme(axis.text = element_text(size = 10),legend.position = "top",legend.text=element_text(size=14),legend.title = element_blank(),axis.title=element_text(size=18),strip.text = element_text(size = 14))
dev.off()
png(paste0(SAVE_PY_LOC,"Kemmerer_Population_Trends.png"), res = 600, height = 7, width=5, units = "in")
ggplot(GRAPH_DATA %>% filter(Region=='Kemmerer & Diamondville'),aes(x=Year,y=Population)) +facet_wrap( ~`Age Category`,ncol=1)+geom_line(size=1,color='red')+theme(axis.text = element_text(size = 10),legend.position = "top",legend.text=element_text(size=14),legend.title = element_blank(),axis.title=element_text(size=18),strip.text = element_text(size = 14))
dev.off()