Updated Population Pyramid scripts to auto build
This commit is contained in:
parent
63a8a56c92
commit
8c4dd2bb37
112
Scripts/3A_Population_Pyramid.r
Normal file
112
Scripts/3A_Population_Pyramid.r
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
library(tidyverse)
|
||||||
|
library(scales) #For a pretty population Pyramid
|
||||||
|
PY_DATA <- rbind(readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Kemmerer_Diamondville_Demographics.Rds"),readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Other_Lincoln_Demographics.Rds"))
|
||||||
|
PY_DATA <- PY_DATA %>% pivot_longer(cols=c("Num_Female","Num_Male"),names_to="Sex",values_to="Population") %>% mutate(Sex=ifelse(Sex=="Num_Female","Female","Male"))
|
||||||
|
|
||||||
|
PY_DATA <- PY_DATA %>% group_by(Year,Region) %>% mutate('Percent of Population'=Population/sum(Population)) %>% ungroup
|
||||||
|
PY_DATA <- PY_DATA %>% mutate(Age_Numeric=Age,Age=as.character(Age))
|
||||||
|
PY_DATA[PY_DATA$Age=='85',"Age"] <- "+85"
|
||||||
|
ORD <- PY_DATA[,c("Age_Numeric","Age")] %>% unique %>% arrange(Age_Numeric) %>% pull(Age)
|
||||||
|
PY_DATA$Age <- factor(PY_DATA$Age,levels=ORD)
|
||||||
|
PY_DATA <- PY_DATA %>% mutate(`Percent of Population`=ifelse(Region=='Kemmerer & Diamondville',`Percent of Population`,-`Percent of Population`))
|
||||||
|
PY_DATA$Region <- ifelse(PY_DATA$Region=='Lincoln_Other','Rest of Lincoln County',PY_DATA$Region)
|
||||||
|
|
||||||
|
PY_DATA_NO_SEX <- PY_DATA %>% group_by(County,Region,Year,Age,Age_Numeric) %>% summarize(Population=sum(Population)) %>% ungroup %>% group_by(Region,Year) %>% mutate('Percent of Population'=Population/sum(Population)) %>% ungroup
|
||||||
|
PY_DATA_NO_SEX <- PY_DATA_NO_SEX %>% mutate(`Percent of Population`=ifelse(Region=='Kemmerer & Diamondville',`Percent of Population`,-`Percent of Population`))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
PY_2023 <- PY_DATA %>% filter(Year==2023)
|
||||||
|
|
||||||
|
PY_2023_KEM <- PY_DATA %>% filter(Year==2023,Region=='Kemmerer & Diamondville') %>% mutate(`Percent of Population`=ifelse(Sex=='Male',`Percent of Population`,-`Percent of Population`))
|
||||||
|
PY_NO_SEX_2023 <- PY_DATA_NO_SEX %>% filter(Year==2023)
|
||||||
|
PY_NO_SEX_2009 <- PY_DATA_NO_SEX %>% filter(Year==2009)
|
||||||
|
PY_KEM_SHIFT <- rbind(PY_NO_SEX_2023,PY_NO_SEX_2009) %>% filter(Region=='Kemmerer & Diamondville') %>% mutate(Year=factor(Year,levels=c(2009,2023)),`Percent of Population`=ifelse(Year==2023,`Percent of Population`,-`Percent of Population`))
|
||||||
|
PY_KEM_SHIFT <- rbind(PY_NO_SEX_2023,PY_NO_SEX_2009) %>% filter(Region=='Kemmerer & Diamondville') %>% mutate(Year=factor(Year,levels=c(2009,2023)),`Percent of Population`=ifelse(Year==2023,`Percent of Population`,-`Percent of Population`))
|
||||||
|
PY_OTHER_SHIFT <- rbind(PY_NO_SEX_2023,PY_NO_SEX_2009) %>% filter(Region!='Kemmerer & Diamondville') %>% mutate(Year=factor(Year,levels=c(2009,2023)),`Percent of Population`=ifelse(Year==2023,`Percent of Population`,-`Percent of Population`))
|
||||||
|
|
||||||
|
|
||||||
|
RANGE <- c(pretty(range(PY_2023$`Percent of Population`),n=6),0.02)
|
||||||
|
LAB <- percent(abs(RANGE),accuracy=0.1 )
|
||||||
|
|
||||||
|
RANGE_NO_SEX <- c(pretty(range(PY_NO_SEX_2023$`Percent of Population`),n=8))
|
||||||
|
LAB_NO_SEX <- percent(abs(RANGE_NO_SEX),accuracy=0.1 )
|
||||||
|
|
||||||
|
if(!exists("SAVE_PY_LOC")){SAVE_PY_LOC <- "./Results/Population_Pyramids/"}
|
||||||
|
dir.create(SAVE_PY_LOC , recursive = TRUE, showWarnings = FALSE)
|
||||||
|
|
||||||
|
|
||||||
|
png(paste0(SAVE_PY_LOC,"Kemmerer_Lincoln_Age_by_Sex_2023_Population_Pyramid.png"), res = 600, width = 8.27, height = 11, units = "in")
|
||||||
|
ggplot(PY_2023,aes(y=Age,x=`Percent of Population`,fill=Region))+geom_col() +scale_x_continuous(breaks = RANGE,labels = LAB,limits=c(-0.02,0.02))+facet_grid(~Sex)+ylab("Age")+ theme_bw()+ theme(axis.text = element_text(size = 10),legend.position = "top",legend.text=element_text(size=14),legend.title = element_blank(),axis.title=element_text(size=18),strip.text = element_text(size = 14))+ scale_fill_discrete(guide = guide_legend(reverse = TRUE))+scale_y_discrete(breaks=c(0,seq(5,80,by=5),"+85"))
|
||||||
|
dev.off()
|
||||||
|
png(paste0(SAVE_PY_LOC,"Kemmerer_Age_by_Sex_2023_Population_Pyramid.png"), res = 600, width = 8.27, height = 11, units = "in")
|
||||||
|
ggplot(PY_2023_KEM ,aes(y=Age,x=`Percent of Population`,fill=Sex))+geom_col() +scale_x_continuous(breaks = RANGE,labels = LAB,limits=c(-0.015,0.015))+ theme_bw() + theme(axis.text = element_text(size = 10),legend.position = "top",legend.text=element_text(size=14),legend.title = element_blank(),axis.title=element_text(size=18),strip.text = element_text(size = 14))+ scale_fill_discrete(guide = guide_legend(reverse = TRUE))+ scale_fill_manual(values = c( "mediumpurple2","aquamarine3"))+scale_y_discrete(breaks=c(0,seq(5,80,by=5),"+85"))
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
png(paste0(SAVE_PY_LOC,"Kemmerer_Lincoln_Age_2023_Population_Pyramid.png"), res = 600, width = 8.27, height = 11, units = "in")
|
||||||
|
ggplot(PY_NO_SEX_2023,aes(y=Age,x=`Percent of Population`,fill=Region))+geom_col() +scale_x_continuous(breaks = RANGE_NO_SEX,labels = LAB_NO_SEX)+ theme_bw()+ theme(axis.text = element_text(size = 10),legend.position = "top",legend.text=element_text(size=14),legend.title = element_blank(),axis.title=element_text(size=18),strip.text = element_text(size = 14))+ guides(fill= guide_legend(reverse = TRUE))+scale_x_continuous(breaks = RANGE_NO_SEX,labels = LAB_NO_SEX,limits=c(-0.025,0.025))+scale_y_discrete(breaks=c(0,seq(5,80,by=5),"+85"))
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
png(paste0(SAVE_PY_LOC,"Kemmerer_2009_to_2023_Age_Population_Pyramid.png"), res = 600, width = 8.27, height = 11, units = "in")
|
||||||
|
ggplot(PY_KEM_SHIFT,aes(y=Age,x=`Percent of Population`,fill=Year))+geom_col() + theme_bw()+ theme(axis.text = element_text(size = 10),legend.position = "top",legend.text=element_text(size=14),legend.title = element_blank(),axis.title=element_text(size=18),strip.text = element_text(size = 14))+scale_y_discrete(breaks=c(0,seq(5,80,by=5),"+85"))+ scale_fill_manual(values = c("indianred2", "cornflowerblue", "magenta", "yellow"))+scale_x_continuous(breaks = RANGE_NO_SEX,labels = LAB_NO_SEX,limits=c(-0.025,0.025))
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
|
||||||
|
POP_DATA <- PY_KEM_SHIFT %>% pivot_wider(values_from=`Percent of Population`,names_from=Year,names_prefix="Year_") %>% group_by(Age) %>% summarize(Year_2023=abs(sum(Year_2023,na.rm=TRUE)),Year_2009=abs(sum(Year_2009,na.rm=TRUE)),Shift=Year_2023-Year_2009)
|
||||||
|
#Make clean labels of Lollipop graph
|
||||||
|
POP_RANGE <- c(pretty(range(POP_DATA$Shift),n=8))
|
||||||
|
POP_LABEL <- percent(abs(POP_RANGE),accuracy=0.1 )
|
||||||
|
|
||||||
|
POP_OTHER_DATA <- PY_OTHER_SHIFT %>% pivot_wider(values_from=`Percent of Population`,names_from=Year,names_prefix="Year_") %>% group_by(Age) %>% summarize(Year_2023=abs(sum(Year_2023,na.rm=TRUE)),Year_2009=abs(sum(Year_2009,na.rm=TRUE)),Shift=Year_2023-Year_2009)
|
||||||
|
|
||||||
|
png(paste0(SAVE_PY_LOC,"Kemmerer_2009_to_2023_Age_Changes_Lollipop.png"), res = 600, width = 8.27, height = 11, units = "in")
|
||||||
|
ggplot(POP_DATA,aes(y=Age,x=Shift))+ geom_segment( aes(x=0, xend=Shift, y=Age, yend=Age),size=1.5, color="darkgrey")+geom_point( color="darkorange2", size=3.5) + theme_bw()+ theme(axis.text = element_text(size = 10),legend.position = "top",legend.text=element_text(size=14),legend.title = element_blank(),axis.title=element_text(size=18),strip.text = element_text(size = 14))+scale_y_discrete(breaks=c(0,seq(5,80,by=5),"+85"))+ scale_fill_manual(values = c("indianred2", "cornflowerblue", "magenta", "yellow"))+xlab("Share of Population Change (2009 to 2023)")+ scale_x_continuous(breaks = POP_RANGE,labels = POP_LABEL,limits=c(-0.020,0.020))
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
png(paste0(SAVE_PY_LOC,"Other_Lincoln_County_2009_to_2023_Age_Changes_Lollipop.png"), res = 600, width = 8.27, height = 11, units = "in")
|
||||||
|
ggplot(POP_OTHER_DATA,aes(y=Age,x=Shift))+ geom_segment( aes(x=0, xend=Shift, y=Age, yend=Age),size=1.5, color="darkgrey")+geom_point( color="firebrick3", size=3.5) + theme_bw()+ theme(axis.text = element_text(size = 10),legend.position = "top",legend.text=element_text(size=14),legend.title = element_blank(),axis.title=element_text(size=18),strip.text = element_text(size = 14))+scale_y_discrete(breaks=c(0,seq(5,80,by=5),"+85"))+ scale_fill_manual(values = c("indianred2", "cornflowerblue", "magenta", "yellow"))+xlab("Share of Population Change (2009 to 2023)")+ xlim(-0.02, 0.02)+scale_x_continuous(breaks = POP_RANGE,labels = POP_LABEL,limits=c(-0.020,0.020))
|
||||||
|
dev.off()
|
||||||
|
|
||||||
|
GRAPH_DATA <- rbind(readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Kemmerer_Diamondville_Demographics.Rds"),readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Other_Lincoln_Demographics.Rds"))
|
||||||
|
AVG_AGE <- GRAPH_DATA %>% mutate(Population=Num_Female+Num_Male) %>% group_by(Region,Year) %>% summarize(Average_Age=sum(Age*Population)/sum(Population)) %>% ungroup
|
||||||
|
NUM_CHILDREN <- GRAPH_DATA %>% mutate(Population=Num_Female+Num_Male) %>% filter(Age<=18) %>% group_by(Region,Year) %>% summarize(Children=sum(Population)) %>% ungroup
|
||||||
|
NUM_ADULT <- GRAPH_DATA %>% mutate(Population=Num_Female+Num_Male) %>% filter(Age>=18,Age<31) %>% group_by(Region,Year) %>% summarize('18-30'=sum(Population)) %>% ungroup
|
||||||
|
NUM_WORKING_ADULT <- GRAPH_DATA %>% mutate(Population=Num_Female+Num_Male) %>% filter(Age>=31,Age<55) %>% group_by(Region,Year) %>% summarize('31-54'=sum(Population)) %>% ungroup
|
||||||
|
NUM_RETIRED <- GRAPH_DATA %>% mutate(Population=Num_Female+Num_Male) %>% filter(Age>=55) %>% group_by(Region,Year) %>% summarize('55+'=sum(Population)) %>% ungroup
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
MEDIAN_AGE <- GRAPH_DATA %>% mutate(Population=Num_Female+Num_Male) %>% group_by(Region,Year) %>% mutate(ROLLSUM=cumsum(Population),MID_POINT=ROLLSUM>=sum(Population)/2) %>% filter(MID_POINT) %>% filter(Age==min(Age)) %>% select(County,Region,Med_Age=Age) %>% ungroup
|
||||||
|
GRAPH_DATA <- AVG_AGE %>% left_join(MEDIAN_AGE) %>% left_join(NUM_CHILDREN ) %>% left_join(NUM_ADULT ) %>% left_join(NUM_WORKING_ADULT)%>% left_join(NUM_RETIRED)
|
||||||
|
GRAPH_DATA <- GRAPH_DATA %>% pivot_longer(cols=c('Children','18-30','31-54','55+'),names_to='Age Category',values_to='Population')
|
||||||
|
readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_City_Populations.Rds") %>% pull(City) %>% unique
|
||||||
|
readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_County_Populations.Rds")
|
||||||
|
|
||||||
|
readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Kemmerer_Diamondville_Demographics.Rds") %>% group_by(Year) %>% summarize(Demo_Population=sum(Num_Male)+sum(Num_Female)) %>% left_join(readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_City_Populations.Rds") %>% filter(City=='Kemmerer'| City=='Diamondville') %>% group_by(Year) %>% summarize(City_Population=sum(Population))) %>% mutate(Demo_Population/City_Population)
|
||||||
|
|
||||||
|
readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Full_Lincoln_County_Demographics.Rds") %>% group_by(Year)%>% summarize(Demo_Population=sum(Num_Male)+sum(Num_Female)) %>% left_join(readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_County_Populations.Rds") %>% filter(County=='Lincoln') %>% group_by(Year) %>% summarize(County_Population=sum(Population))) %>% tail
|
||||||
|
%>% mutate(Demo_Population/City_Population
|
||||||
|
|
||||||
|
getwd()
|
||||||
|
%>% group_by(Year) %>% summarize(Demo_Population=sum(Num_Male)+sum(Num_Female))
|
||||||
|
|
||||||
|
%>% filter(County='Lincoln') %>% group_by(Year) %>% summarize(Population=sum(Population))
|
||||||
|
GRAPH_DATA
|
||||||
|
GRAPH_DATA %>% filter(Region=='Kemmerer & Diamondville') %>% group_by(Year) %>% summarize(POP_DEMO=sum(Population)) %>% left_join(TEST)
|
||||||
|
|
||||||
|
rbind(readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Kemmerer_Diamondville_Demographics.Rds")) %>% mutate(Population=Num_Female+Num_Male) %>% group_by(Year) %>% summarize(Pop_Demo=sum(Population)) %>% left_join(TEST)
|
||||||
|
|
||||||
|
GRAPH_DATA
|
||||||
|
|
||||||
|
ggplot(GRAPH_DATA,aes(x=Year,y=Population,color=Region)) +facet_grid( ~`Age Category`)+geom_line()
|
||||||
|
ggplot(GRAPH_DATA,aes(x=Year,color=Region)) + geom_line(aes(y=Average_Age),linetype=1,size=1.5)
|
||||||
|
ggplot(GRAPH_DATA,aes(x=Year,color=Region)) + geom_line(aes(y=Children),linetype=1,size=1.5)+ geom_line(aes(y=`18-30`),linetype=2,size=1.5)+ geom_line(aes(y=`31-54`),linetype=3,size=1.5)+ geom_line(aes(y=`55+`),linetype=4,size=1.5)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
PY_DATA <- GRAPH_DATA %>% pivot_longer(cols=c("Num_Female","Num_Male"),names_to="Sex",values_to="Population") %>% mutate(Sex=ifelse(Sex=="Num_Female","Female","Male"))
|
||||||
|
|
||||||
@ -1,128 +0,0 @@
|
|||||||
library(tidyverse)
|
|
||||||
library(scales) #For a pretty population Pyramid
|
|
||||||
ACS_FILE_LOC <- "Data/Cleaned_Data/ACS_Census_Demographic_Data.Rds"
|
|
||||||
ACS_YEAR <- 2023 #Year of the ACS data
|
|
||||||
if(!file.exists(ACS_FILE_LOC)){source("Scripts/Load_ACS_Census_Data.r")}else{AGE_DATA <- readRDS(ACS_FILE_LOC)}
|
|
||||||
LIN_DEMOGRAPHICS <- readRDS("Data/Cleaned_Data/Lincoln_Demographic_Data.Rds") %>% filter(Year==ACS_YEAR)
|
|
||||||
LIN_DEMOGRAPHICS
|
|
||||||
GET_INTIAL_DATA_SUMMARY <- function(DEMO_DATA,FILE_SAVE_NAME,ST_POP=NA,Area=NA,ST_YEAR=NA,SAVE_DIR='Data/Cleaned_Data/Intiate_Simulation/'){
|
|
||||||
ST_BIRTH <- round(sum((DEMO_DATA%>% filter(Age==0))[4:5]))
|
|
||||||
ST_BIRTH_TWO_PREV <- round(sum((DEMO_DATA%>% filter(Age==1))[4:5]))
|
|
||||||
if(is.na(ST_YEAR)){ST_YEAR=max(POP_DATA$Year)}
|
|
||||||
|
|
||||||
if(is.na(ST_POP)){
|
|
||||||
if(Area=='Kemmerer'){
|
|
||||||
ST_POP<- POP_DATA %>% filter(City %in% c('Kemmerer','Diamondville'),Year==ST_YEAR) %>% mutate(Population=as.numeric(Population)) %>% pull(Population) %>% sum()
|
|
||||||
} else if(Area=='Other Lincoln'){
|
|
||||||
ST_POP_KEM<- POP_DATA %>% filter(City %in% c('Kemmerer','Diamondville'),Year==ST_YEAR) %>% mutate(Population=as.numeric(Population)) %>% pull(Population) %>% sum()
|
|
||||||
|
|
||||||
ST_POP_LIN <- readRDS("Data/Cleaned_Data/Wyoming_County_Population.Rds") %>% filter(Year==ST_YEAR,County==Area) %>% pull(Population) #Grab the year of data for the county
|
|
||||||
ST_POP <- ST_POP_LIN-ST_POP_KEM
|
|
||||||
|
|
||||||
} else{
|
|
||||||
ST_POP <- readRDS("Data/Cleaned_Data/Wyoming_County_Population.Rds") %>% filter(Year==ST_YEAR,County==Area) %>% pull(Population) #Grab the year of data for the county
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
INTIATE_DATA <- DEMO_DATA %>% mutate(Male_Window=Age>=18 & Age<=30,Female_Window=Age>=18 & Age<=28) %>% group_by(County,Year) %>% summarize(Female_Birth_Group=sum(Num_Female*Female_Window),Male_Birth_Group=sum(Num_Male*Male_Window),Min_Birth_Group=ifelse(Female_Birth_Group<Male_Birth_Group,Female_Birth_Group,Male_Birth_Group),PREV_BIRTH=ST_BIRTH,PREV_TWO_BIRTH=ST_BIRTH_TWO_PREV,Population=ST_POP) %>% mutate(Births=NA,Deaths=NA,Migration=NA) %>% select(Year,County,Population,Births,Deaths,Migration,Min_Birth_Group,PREV_BIRTH,PREV_TWO_BIRTH) %>% ungroup
|
|
||||||
saveRDS(INTIATE_DATA ,paste0(SAVE_DIR,FILE_SAVE_NAME,".Rds"))
|
|
||||||
write_csv(INTIATE_DATA ,paste0(SAVE_DIR,FILE_SAVE_NAME,".csv"))
|
|
||||||
return(INTIATE_DATA)
|
|
||||||
GET_INTIAL_DATA_SUMMARY(LIN_DEMOGRAPHICS,"test")
|
|
||||||
|
|
||||||
|
|
||||||
for(i in 1:nrow(AGE_DATA)){
|
|
||||||
if(i==1 & exists("RES")){rm(RES)}
|
|
||||||
C_DEMO <- AGE_DATA[i,]
|
|
||||||
C_SEX <- C_DEMO$Sex
|
|
||||||
if(C_DEMO$Min_Age==C_DEMO$Max_Age){C_AGE_YEARLY_DATA <- LIN_DEMOGRAPHICS %>% filter(Age==C_DEMO$Min_Age)}else{
|
|
||||||
C_AGE_YEARLY_DATA <- LIN_DEMOGRAPHICS %>% filter(Age>=C_DEMO$Min_Age,Age<=C_DEMO$Max_Age)
|
|
||||||
}
|
|
||||||
#Extract only the current sex being applied
|
|
||||||
if(C_SEX=='Female'){C_AGE_YEARLY_DATA <- C_AGE_YEARLY_DATA %>% select(Age,Num_Female) %>% rename(POP=Num_Female)}else{C_AGE_YEARLY_DATA <- C_AGE_YEARLY_DATA %>% select(Age,Num_Male) %>% rename(POP=Num_Male)}
|
|
||||||
C_AGE_YEARLY_DATA$Sex <- C_SEX
|
|
||||||
C_AGE_YEARLY_DATA$PER <- C_AGE_YEARLY_DATA$POP/sum(C_AGE_YEARLY_DATA$POP)
|
|
||||||
C_RES <- C_AGE_YEARLY_DATA%>% left_join(C_DEMO) %>% mutate(Population=PER*Population) %>% select(Age,Sex,IN_KEM,Population)
|
|
||||||
if(i==1){RES <- C_RES}else{RES <- rbind(RES,C_RES)}
|
|
||||||
}
|
|
||||||
RES <- RES %>% group_by(IN_KEM) %>% mutate(Per_Pop=Population/sum(Population)) %>% mutate(Region=ifelse(IN_KEM,'Kemmerere & Diamondville','Rest of the County')) %>% select(-Population) %>% ungroup
|
|
||||||
CURRENT_KEM_POP <- readRDS("Data/Cleaned_Data/Wyoming_City_Population.Rds") %>% filter(City %in% c('Kemmerer','Diamondville'),Year==max(Year)) %>% mutate(Population=as.numeric(Population)) %>% pull(Population) %>% sum()
|
|
||||||
KEM_DEMOGRAPHICS <- RES %>% filter(IN_KEM==1) %>% select(Age,Sex,Per_Pop) %>% mutate(Year=ACS_YEAR+1,County='Kemmerer',Population=round(Per_Pop*CURRENT_KEM_POP) ) %>% select(-Per_Pop) %>% pivot_wider(values_from=Population,names_from=c(Sex),names_prefix="Num_") %>% select(County,Year,Age,Num_Male,Num_Female)
|
|
||||||
saveRDS(KEM_DEMOGRAPHICS,"Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Demographic_Data.Rds")
|
|
||||||
write_csv(KEM_DEMOGRAPHICS,"Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Demographic_Data.csv")
|
|
||||||
|
|
||||||
##Create a simplfied summary of the Kemmerer/Diamondville population estimates, to start the Monte Carlo
|
|
||||||
ST_BIRTH_KEM <- round(sum((KEM_DEMOGRAPHICS%>% filter(Age==0))[4:5]))
|
|
||||||
ST_BIRTH_TWO_PREV_KEM <- round(sum((KEM_DEMOGRAPHICS%>% filter(Age==1))[4:5]))
|
|
||||||
INTIATE_KEMMER <- KEM_DEMOGRAPHICS %>% mutate(Male_Window=Age>=18 & Age<=30,Female_Window=Age>=18 & Age<=28) %>% group_by(County,Year) %>% summarize(Female_Birth_Group=sum(Num_Female*Female_Window),Male_Birth_Group=sum(Num_Male*Male_Window),Min_Birth_Group=ifelse(Female_Birth_Group<Male_Birth_Group,Female_Birth_Group,Male_Birth_Group),PREV_BIRTH=ST_BIRTH_KEM,PREV_TWO_BIRTH=ST_BIRTH_TWO_PREV_KEM,Population=CURRENT_KEM_POP) %>% mutate(Births=NA,Deaths=NA,Migration=NA) %>% select(Year,County,Population,Births,Deaths,Migration,Min_Birth_Group,PREV_BIRTH,PREV_TWO_BIRTH) %>% ungroup
|
|
||||||
saveRDS(INTIATE_KEMMER ,"Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Summary_Start_Data.Rds")
|
|
||||||
write_csv(INTIATE_KEMMER ,"Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Summary_Start_Data.csv")
|
|
||||||
|
|
||||||
###Create data sets that contain all people not in Loncoln but not Kemmerer or Diamondville, for a seperate comparison run
|
|
||||||
LIN_DEMOGRAPHICS_RECENT <- readRDS("Data/Cleaned_Data/Lincoln_Demographic_Data.Rds") %>% filter(Year==max(Year)) #Grab the most recent year of data for all of the county
|
|
||||||
LIN_OTHER_DEMOGRAPHICS <- LIN_DEMOGRAPHICS_RECENT %>% left_join(KEM_DEMOGRAPHICS %>% select(-County,ADJ_MALE=Num_Male,ADJ_FEMALE=Num_Female)) %>% mutate(Num_Male=Num_Male-ADJ_MALE,Num_Female=Num_Female-ADJ_FEMALE) %>% select(-ADJ_MALE,-ADJ_FEMALE) %>% ungroup#Deduct the populaiton estimates of Kemmerer to create a "Other Lincoln" data set
|
|
||||||
#Data sets needed to start a simulation for all but Kemmerer/Diamondville but in the county
|
|
||||||
ST_BIRTH_OTHER <- round(sum((LIN_OTHER_DEMOGRAPHICS%>% filter(Age==0))[4:5]))
|
|
||||||
ST_BIRTH_TWO_PREV_OTHER <- round(sum((LIN_OTHER_DEMOGRAPHICS%>% filter(Age==1))[4:5]))
|
|
||||||
INTIATE_OTHER <- LIN_OTHER_DEMOGRAPHICS %>% mutate(Male_Window=Age>=18 & Age<=30,Female_Window=Age>=18 & Age<=28) %>% group_by(County,Year) %>% summarize(Female_Birth_Group=sum(Num_Female*Female_Window),Male_Birth_Group=sum(Num_Male*Male_Window),Min_Birth_Group=ifelse(Female_Birth_Group<Male_Birth_Group,Female_Birth_Group,Male_Birth_Group),PREV_BIRTH=ST_BIRTH_OTHER,PREV_TWO_BIRTH=ST_BIRTH_TWO_PREV_OTHER,Population=CURRENT_KEM_POP) %>% mutate(Births=NA,Deaths=NA,Migration=NA) %>% select(Year,County,Population,Births,Deaths,Migration,Min_Birth_Group,PREV_BIRTH,PREV_TWO_BIRTH) %>% ungroup
|
|
||||||
#Save detailed demographics for start of analysis
|
|
||||||
saveRDS(LIN_OTHER_DEMOGRAPHICS,"Data/Cleaned_Data/Intiate_Simulation/Lincoln_Rest_of_County_Demographic_Data.Rds")
|
|
||||||
write_csv(LIN_OTHER_DEMOGRAPHICS,"Data/Cleaned_Data/Intiate_Simulation/Lincoln_Rest_of_County_Demographic_Data.csv")
|
|
||||||
|
|
||||||
#Save summary information for start of analysis
|
|
||||||
|
|
||||||
saveRDS(INTIATE_OTHER ,"Data/Cleaned_Data/Intiate_Simulation/Lincoln_Rest_of_County_Summary_Start_Data.Rds")
|
|
||||||
write_csv(INTIATE_OTHER ,"Data/Cleaned_Data/Intiate_Simulation/Lincoln_Rest_of_County_Summary_Start_Data.csv")
|
|
||||||
|
|
||||||
|
|
||||||
#Data sets needed to start a simulation for all the county (includes Kemmerer)
|
|
||||||
ST_BIRTH_ALL <- round(sum((LIN_DEMOGRAPHICS_RECENT %>% filter(Age==0))[4:5]))
|
|
||||||
ST_BIRTH_TWO_PREV_ALL <- round(sum((LIN_DEMOGRAPHICS_RECENT %>% filter(Age==1))[4:5]))
|
|
||||||
INTIATE_ALL <- LIN_DEMOGRAPHICS_RECENT %>% mutate(Male_Window=Age>=18 & Age<=30,Female_Window=Age>=18 & Age<=28) %>% group_by(County,Year) %>% summarize(Female_Birth_Group=sum(Num_Female*Female_Window),Male_Birth_Group=sum(Num_Male*Male_Window),Min_Birth_Group=ifelse(Female_Birth_Group<Male_Birth_Group,Female_Birth_Group,Male_Birth_Group),PREV_BIRTH=ST_BIRTH_ALL,PREV_TWO_BIRTH=ST_BIRTH_TWO_PREV_ALL,Population=CURRENT_KEM_POP) %>% mutate(Births=NA,Deaths=NA,Migration=NA) %>% select(Year,County,Population,Births,Deaths,Migration,Min_Birth_Group,PREV_BIRTH,PREV_TWO_BIRTH) %>% ungroup
|
|
||||||
#Save detailed demographics for start of analysis
|
|
||||||
saveRDS(LIN_DEMOGRAPHICS_RECENT,"Data/Cleaned_Data/Intiate_Simulation/Lincoln_All_County_Demographic_Data.Rds")
|
|
||||||
write_csv(LIN_DEMOGRAPHICS_RECENT,"Data/Cleaned_Data/Intiate_Simulation/Lincoln_All_County_Demographic_Data.csv")
|
|
||||||
|
|
||||||
#Save summary information for start of analysis
|
|
||||||
|
|
||||||
saveRDS(INTIATE_ALL ,"Data/Cleaned_Data/Intiate_Simulation/Lincoln_All_County_Summary_Start_Data.Rds")
|
|
||||||
write_csv(INTIATE_ALL ,"Data/Cleaned_Data/Intiate_Simulation/Lincoln_All_County_Summary_Start_Data.csv")
|
|
||||||
|
|
||||||
|
|
||||||
#sum(LIN_OTHER_DEMOGRAPHICS[,4:5] )+sum(KEM_DEMOGRAPHICS[,4:5])
|
|
||||||
|
|
||||||
|
|
||||||
###Make a population Pyramid Graph
|
|
||||||
PY_KEM_DATA <- KEM_DEMOGRAPHICS %>% mutate(County='Kemmerer') %>% pivot_longer(cols=c(Num_Male,Num_Female),names_to="Sex",values_to="Population") %>% mutate(Sex=ifelse(Sex=='Num_Male','Male','Female'))
|
|
||||||
PY_LN_DATA <- readRDS("Data/Cleaned_Data/Wyoming_County_Demographic_Data.Rds") %>% filter(County=='Lincoln',Year==max(Year))%>% pivot_longer(cols=c(Num_Male,Num_Female),names_to="Sex",values_to="Population") %>% mutate(Sex=ifelse(Sex=='Num_Male','Male','Female'))
|
|
||||||
|
|
||||||
#Population=ifelse(Sex=='Male',-Population,Population)
|
|
||||||
#Remove the Kemmerer Population estimate from Lincoln county
|
|
||||||
PY_LN_DATA <- PY_LN_DATA %>% left_join(PY_KEM_DATA %>% rename(POP_SHIFT=Population) %>% select(-County)) %>% mutate(Population=Population-POP_SHIFT) %>% select(-POP_SHIFT)
|
|
||||||
|
|
||||||
LN_ONLY_POP <- sum(PY_LN_DATA$Population)
|
|
||||||
PY_LN_DATA$Percent <- PY_LN_DATA$Population/LN_ONLY_POP
|
|
||||||
#Find percent Kemmerer (nod adjustment needed)
|
|
||||||
KEM_ONLY_POP <- sum(PY_KEM_DATA$Population)
|
|
||||||
PY_KEM_DATA$Percent <- PY_KEM_DATA$Population/KEM_ONLY_POP
|
|
||||||
|
|
||||||
PY_DATA <- rbind(PY_LN_DATA,PY_KEM_DATA) %>% mutate(Population=ifelse(County=='Kemmerer',Population,-Population),'Percent of Population'=ifelse(County=='Kemmerer',Percent,-Percent),Age_Numeric=Age,Age=as.character(Age))
|
|
||||||
PY_DATA[PY_DATA$Age=='85',"Age"] <- ">84"
|
|
||||||
ORD <- PY_DATA[,c("Age_Numeric","Age")] %>% unique %>% arrange(Age_Numeric) %>% pull(Age)
|
|
||||||
PY_DATA$Age <- factor(PY_DATA$Age,levels=ORD)
|
|
||||||
|
|
||||||
RANGE <- c(pretty(range(PY_DATA$`Percent of Population`),n=4))
|
|
||||||
LAB <- percent(abs(RANGE),accuracy=0.1 )
|
|
||||||
PY_DATA
|
|
||||||
PY_DATA <- PY_DATA %>% mutate(Region=ifelse(County=='Lincoln',"Rest of Lincoln County","Kemmerer & Diamondville"))
|
|
||||||
PY_DATA$Sex <- ifelse(PY_DATA$Sex=='Male','Men','Women')
|
|
||||||
ORDER <- PY_DATA$Sex %>% unique
|
|
||||||
PY_DATA$Sex <- factor(PY_DATA$Sex,levels=ORDER)
|
|
||||||
POP_PYRAMID <- ggplot(PY_DATA,aes(y=factor(Age),x=`Percent of Population`,fill=Region))+geom_col() +scale_x_continuous(breaks = RANGE,labels = LAB)+facet_grid(~Sex)+ylab("Age")+ theme_bw()+ theme(axis.text = element_text(size = 10),legend.position = "top",legend.text=element_text(size=14),legend.title = element_blank(),axis.title=element_text(size=18),strip.text = element_text(size = 14))
|
|
||||||
|
|
||||||
|
|
||||||
dir.create(FIG_DIR ,showWarnings=FALSE)
|
|
||||||
png(paste0(FIG_DIR,"/Population_Pyramid.png"), res = 600, width = 8.27, height = 11, units = "in")
|
|
||||||
POP_PYRAMID
|
|
||||||
dev.off()
|
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user