Fixed over estiamte of Kemmerer pop issue
This commit is contained in:
parent
9e2764a33a
commit
743d61e08a
@ -14,19 +14,14 @@ ACS_END_YEAR <- 2023 #most recent in package as of Nov 4 2025
|
||||
#Add API key if missing
|
||||
#KEY <- '30e13ab22563318ff59286e433099f4174d4edd4'
|
||||
#census_api_key(KEY, install = TRUE)
|
||||
PROJ_TRACTS <- get_tracts(search_city('Kemmerer','WY')$zipcode)# %>% full_join(get_tracts(search_city('Diamondville','WY')$zipcode))
|
||||
PROJ_TRACTS <- get_tracts(search_city('Kemmerer','WY')$zipcode) %>% full_join(get_tracts(search_city('Diamondville','WY')$zipcode))
|
||||
#Manual search of GEOID finds that 56023978200 is not Kemmerer, search_county('Lincoln','Wy')$zipcode
|
||||
#https://www.geocod.io/geoids/wyoming/lincoln-county-56023/978400/ is Kemmerer
|
||||
#https://www.geocod.io/geoids/wyoming/lincoln-county-56023/978200/ is mostly other areas
|
||||
PROJ_TRACTS <- PROJ_TRACTS %>% select(-ZCTA5) %>% filter(GEOID!=56023978200) %>% unique
|
||||
PROJ_TRACTS <- PROJ_TRACTS %>% select(GEOID) %>% mutate('IN_KEM'=1) %>% mutate(GEOID=as.character(GEOID))
|
||||
###Load data manually created which links vairable names to sex-age census data
|
||||
CODES <- read_csv("./Data/Raw_Data/ACS_Demographics/API_CENSUS_CODES.csv",skip=1) %>% mutate(Med_Age=(Min_Age+Max_Age)/2) %>% rename(variable=Code)
|
||||
CODES %>% filter(Min_Age==0,Max_Age==Inf)
|
||||
TEST_CODES <- c('B01001_002E','B01001_026E') #All men and all woment
|
||||
TEMP <-
|
||||
TEMP_COUNTY <- get_acs(geography="tract",variables=TEST_CODES,state='WY',county='lincoln')
|
||||
TEMP_COUNTY$estimate %>% sum
|
||||
search_city('Kemmerer','WY')
|
||||
TEMP
|
||||
(TEMP %>% inner_join(PROJ_TRACTS) %>% pull(estimate) %>% sum)/3104
|
||||
TEST <- MAKE_KEM_DEMO_DATA_YEAR(2020)
|
||||
#Testing age Comparison between the two
|
||||
###Extract census data for all tracts in Lincoln county, clean up the data, and indicate if the tract is in Kemmerer/Diamondvile or not.
|
||||
DEMO_DATA_ALL <- do.call(rbind,lapply(2009:ACS_END_YEAR,MAKE_KEM_DEMO_DATA_YEAR))
|
||||
@ -38,6 +33,16 @@ DEMO_DATA_ALL <- rbind(YOUNG,OLD) %>% arrange(County,Year,IN_KEM,Sex)
|
||||
DEMO_DATA_ALL <- DEMO_DATA_ALL %>% mutate(Population=round(Population)) %>% pivot_wider(values_from=Population,names_from=Sex,names_prefix="Num_")
|
||||
OTHER_LIN_DEMO_DATA <- DEMO_DATA_ALL %>% filter(IN_KEM==0) %>% rename(Region=IN_KEM) %>% mutate(Region='Lincoln_Other')
|
||||
KEM_DEMO_DATA <- DEMO_DATA_ALL %>% filter(IN_KEM==1) %>% rename(Region=IN_KEM) %>% mutate(Region='Kemmerer & Diamondville')
|
||||
#Ajust the populations to match the total population stastics from the other data sources, since the tracts may spill into other areas
|
||||
POST_ADJUST_DATA <- KEM_DEMO_DATA %>% group_by(Year) %>% summarize(Kem_Demo_Population=sum(Num_Female)+sum(Num_Male)) %>% left_join(OTHER_LIN_DEMO_DATA %>% group_by(Year) %>% summarize(Other_Demo_Population=sum(Num_Female)+sum(Num_Male))) %>% mutate(Total_Lincoln_Demo_Population=Kem_Demo_Population+Other_Demo_Population)
|
||||
DIRECT_POP <- readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_County_Populations.Rds") %>% filter(County=='Lincoln') %>% select(Year,Lin_Direct_Population=Population) %>% full_join(readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_City_Populations.Rds") %>% filter(City %in% c('Kemmerer','Diamondville')) %>% group_by(Year) %>% summarize(Kem_Direct_Population=sum(Population,na.rm=TRUE))) %>% mutate(Other_Direct_Population=Lin_Direct_Population-Kem_Direct_Population)
|
||||
ADJUST_TABLE <- DIRECT_POP %>% inner_join(POST_ADJUST_DATA) %>% mutate(KEM_ADJ=Kem_Direct_Population/Kem_Demo_Population,OTHER_ADJ=Lin_Direct_Population/Other_Demo_Population,LIN_ADJ=Lin_Direct_Population/Total_Lincoln_Demo_Population) %>% select(Year,LIN_ADJ,KEM_ADJ,OTHER_ADJ)
|
||||
KEM_DEMO_DATA <- KEM_DEMO_DATA %>% left_join(ADJUST_TABLE %>% select(Year,KEM_ADJ)) %>% mutate(Num_Female=round(KEM_ADJ*Num_Female),Num_Male=round(KEM_ADJ*Num_Male)) %>% select(-KEM_ADJ)
|
||||
OTHER_LIN_DEMO_DATA <- OTHER_LIN_DEMO_DATA%>% left_join(ADJUST_TABLE %>% select(Year,OTHER_ADJ)) %>% mutate(Num_Female=round(OTHER_ADJ*Num_Female),Num_Male=round(OTHER_ADJ*Num_Male)) %>% select(-OTHER_ADJ)
|
||||
|
||||
|
||||
|
||||
|
||||
#Find the most recent data year
|
||||
MAX_YEAR <- max(KEM_DEMO_DATA$Year)
|
||||
KEM_DEMO_MAT <- KEM_DEMO_DATA %>% filter(Year==MAX_YEAR) %>% select(Num_Male,Num_Female) %>% as.matrix
|
||||
@ -51,6 +56,10 @@ OTHER_LIN_DEM_OLD_MAT <- OTHER_LIN_DEMO_DATA%>% filter(Year==MED_YEAR) %>% selec
|
||||
rownames(OTHER_LIN_DEM_OLD_MAT ) <- 0:85
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
####Save results
|
||||
CSV_SAVE <- paste0(SAVE_DEMO_LOC,"/CSV")
|
||||
RDS_SAVE <- paste0(SAVE_DEMO_LOC,"/RDS")
|
||||
|
||||
@ -52,6 +52,7 @@ png(paste0(SAVE_PY_LOC,"Kemmerer_2009_to_2023_Age_Population_Pyramid.png"), res
|
||||
dev.off()
|
||||
|
||||
|
||||
|
||||
POP_DATA <- PY_KEM_SHIFT %>% pivot_wider(values_from=`Percent of Population`,names_from=Year,names_prefix="Year_") %>% group_by(Age) %>% summarize(Year_2023=abs(sum(Year_2023,na.rm=TRUE)),Year_2009=abs(sum(Year_2009,na.rm=TRUE)),Shift=Year_2023-Year_2009)
|
||||
#Make clean labels of Lollipop graph
|
||||
POP_RANGE <- c(pretty(range(POP_DATA$Shift),n=8))
|
||||
@ -66,47 +67,25 @@ dev.off()
|
||||
png(paste0(SAVE_PY_LOC,"Other_Lincoln_County_2009_to_2023_Age_Changes_Lollipop.png"), res = 600, width = 8.27, height = 11, units = "in")
|
||||
ggplot(POP_OTHER_DATA,aes(y=Age,x=Shift))+ geom_segment( aes(x=0, xend=Shift, y=Age, yend=Age),size=1.5, color="darkgrey")+geom_point( color="firebrick3", size=3.5) + theme_bw()+ theme(axis.text = element_text(size = 10),legend.position = "top",legend.text=element_text(size=14),legend.title = element_blank(),axis.title=element_text(size=18),strip.text = element_text(size = 14))+scale_y_discrete(breaks=c(0,seq(5,80,by=5),"+85"))+ scale_fill_manual(values = c("indianred2", "cornflowerblue", "magenta", "yellow"))+xlab("Share of Population Change (2009 to 2023)")+ xlim(-0.02, 0.02)+scale_x_continuous(breaks = POP_RANGE,labels = POP_LABEL,limits=c(-0.020,0.020))
|
||||
dev.off()
|
||||
|
||||
####Make a facet plot of trends of major population groups
|
||||
GRAPH_DATA <- rbind(readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Kemmerer_Diamondville_Demographics.Rds"),readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Other_Lincoln_Demographics.Rds"))
|
||||
AVG_AGE <- GRAPH_DATA %>% mutate(Population=Num_Female+Num_Male) %>% group_by(Region,Year) %>% summarize(Average_Age=sum(Age*Population)/sum(Population)) %>% ungroup
|
||||
NUM_CHILDREN <- GRAPH_DATA %>% mutate(Population=Num_Female+Num_Male) %>% filter(Age<=18) %>% group_by(Region,Year) %>% summarize(Children=sum(Population)) %>% ungroup
|
||||
NUM_CHILDREN <- GRAPH_DATA %>% mutate(Population=Num_Female+Num_Male) %>% filter(Age<=18) %>% group_by(Region,Year) %>% summarize('0-18'=sum(Population)) %>% ungroup
|
||||
NUM_ADULT <- GRAPH_DATA %>% mutate(Population=Num_Female+Num_Male) %>% filter(Age>=18,Age<31) %>% group_by(Region,Year) %>% summarize('18-30'=sum(Population)) %>% ungroup
|
||||
NUM_WORKING_ADULT <- GRAPH_DATA %>% mutate(Population=Num_Female+Num_Male) %>% filter(Age>=31,Age<55) %>% group_by(Region,Year) %>% summarize('31-54'=sum(Population)) %>% ungroup
|
||||
NUM_RETIRED <- GRAPH_DATA %>% mutate(Population=Num_Female+Num_Male) %>% filter(Age>=55) %>% group_by(Region,Year) %>% summarize('55+'=sum(Population)) %>% ungroup
|
||||
|
||||
|
||||
|
||||
MEDIAN_AGE <- GRAPH_DATA %>% mutate(Population=Num_Female+Num_Male) %>% group_by(Region,Year) %>% mutate(ROLLSUM=cumsum(Population),MID_POINT=ROLLSUM>=sum(Population)/2) %>% filter(MID_POINT) %>% filter(Age==min(Age)) %>% select(County,Region,Med_Age=Age) %>% ungroup
|
||||
|
||||
GRAPH_DATA <- AVG_AGE %>% left_join(MEDIAN_AGE) %>% left_join(NUM_CHILDREN ) %>% left_join(NUM_ADULT ) %>% left_join(NUM_WORKING_ADULT)%>% left_join(NUM_RETIRED)
|
||||
GRAPH_DATA <- GRAPH_DATA %>% pivot_longer(cols=c('Children','18-30','31-54','55+'),names_to='Age Category',values_to='Population')
|
||||
readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_City_Populations.Rds") %>% pull(City) %>% unique
|
||||
readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_County_Populations.Rds")
|
||||
|
||||
readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Kemmerer_Diamondville_Demographics.Rds") %>% group_by(Year) %>% summarize(Demo_Population=sum(Num_Male)+sum(Num_Female)) %>% left_join(readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_City_Populations.Rds") %>% filter(City=='Kemmerer'| City=='Diamondville') %>% group_by(Year) %>% summarize(City_Population=sum(Population))) %>% mutate(Demo_Population/City_Population)
|
||||
|
||||
readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Full_Lincoln_County_Demographics.Rds") %>% group_by(Year)%>% summarize(Demo_Population=sum(Num_Male)+sum(Num_Female)) %>% left_join(readRDS("Data/Cleaned_Data/Population_Data/RDS/All_Wyoming_County_Populations.Rds") %>% filter(County=='Lincoln') %>% group_by(Year) %>% summarize(County_Population=sum(Population))) %>% tail
|
||||
%>% mutate(Demo_Population/City_Population
|
||||
|
||||
getwd()
|
||||
%>% group_by(Year) %>% summarize(Demo_Population=sum(Num_Male)+sum(Num_Female))
|
||||
|
||||
%>% filter(County='Lincoln') %>% group_by(Year) %>% summarize(Population=sum(Population))
|
||||
GRAPH_DATA
|
||||
GRAPH_DATA %>% filter(Region=='Kemmerer & Diamondville') %>% group_by(Year) %>% summarize(POP_DEMO=sum(Population)) %>% left_join(TEST)
|
||||
|
||||
rbind(readRDS("Data/Cleaned_Data/Demographic_Sex_Age_Data/RDS/Kemmerer_Diamondville_Demographics.Rds")) %>% mutate(Population=Num_Female+Num_Male) %>% group_by(Year) %>% summarize(Pop_Demo=sum(Population)) %>% left_join(TEST)
|
||||
|
||||
GRAPH_DATA
|
||||
|
||||
ggplot(GRAPH_DATA,aes(x=Year,y=Population,color=Region)) +facet_grid( ~`Age Category`)+geom_line()
|
||||
ggplot(GRAPH_DATA,aes(x=Year,color=Region)) + geom_line(aes(y=Average_Age),linetype=1,size=1.5)
|
||||
ggplot(GRAPH_DATA,aes(x=Year,color=Region)) + geom_line(aes(y=Children),linetype=1,size=1.5)+ geom_line(aes(y=`18-30`),linetype=2,size=1.5)+ geom_line(aes(y=`31-54`),linetype=3,size=1.5)+ geom_line(aes(y=`55+`),linetype=4,size=1.5)
|
||||
GRAPH_DATA <- GRAPH_DATA %>% pivot_longer(cols=c('0-18','18-30','31-54','55+'),names_to='Age Category',values_to='Population')
|
||||
GRAPH_DATA <- GRAPH_DATA %>% mutate(Region=ifelse(Region=='Lincoln_Other','Rest of Lincoln County',Region))
|
||||
|
||||
|
||||
png(paste0(SAVE_PY_LOC,"Kemmerer_Lincoln_Population_Trends.png"), res = 600, height = 6, width=7, units = "in")
|
||||
ggplot(GRAPH_DATA,aes(x=Year,y=Population,color=Region)) +facet_wrap( ~`Age Category`)+geom_line(size=1)+theme(axis.text = element_text(size = 10),legend.position = "top",legend.text=element_text(size=14),legend.title = element_blank(),axis.title=element_text(size=18),strip.text = element_text(size = 14))
|
||||
dev.off()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
PY_DATA <- GRAPH_DATA %>% pivot_longer(cols=c("Num_Female","Num_Male"),names_to="Sex",values_to="Population") %>% mutate(Sex=ifelse(Sex=="Num_Female","Female","Male"))
|
||||
png(paste0(SAVE_PY_LOC,"Kemmerer_Population_Trends.png"), res = 600, height = 7, width=5, units = "in")
|
||||
ggplot(GRAPH_DATA %>% filter(Region=='Kemmerer & Diamondville'),aes(x=Year,y=Population)) +facet_wrap( ~`Age Category`,ncol=1)+geom_line(size=1,color='red')+theme(axis.text = element_text(size = 10),legend.position = "top",legend.text=element_text(size=14),legend.title = element_blank(),axis.title=element_text(size=18),strip.text = element_text(size = 14))
|
||||
dev.off()
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user