120 lines
6.7 KiB
R
120 lines
6.7 KiB
R
library(tidyverse)
|
|
library(tidycensus)
|
|
library(zipcodeR)
|
|
#Packages to instal on computer if zipcodeR won't install
|
|
#Sudo apt install libssl-dev libudunits2-dev libabsl-dev libcurl4-openssl-dev libgdal-dev cmake libfontconfig1-dev libharfbuzz-dev libfribidi-dev
|
|
#install.packages("zipcodeR")
|
|
|
|
#Add API key if missing
|
|
#KEY <- '30e13ab22563318ff59286e433099f4174d4edd4'
|
|
#census_api_key(KEY, install = TRUE)
|
|
PROJ_TRACTS <- get_tracts(search_city('Kemmerer','WY')$zipcode) %>% full_join(get_tracts(search_city('Diamondville','WY')$zipcode))
|
|
PROJ_TRACTS <- PROJ_TRACTS %>% select(GEOID) %>% mutate('IN_KEM'=1) %>% mutate(GEOID=as.character(GEOID))
|
|
|
|
MED_AGE_VAR <- cbind(c('B01002_001E','B01002_002E','B01002_003E'),c('Median_Age','Median_Age_Male','Median_Age_Female')) %>% as_tibble %>% rename(variable=V1,Data_Type=V2)
|
|
#Pull the relevant median age variables the value moe (margine of error) can be converted to standard error, following the link below
|
|
#https://www.census.gov/content/dam/Census/library/publications/2018/acs/acs_general_handbook_2018_ch08.pdf
|
|
|
|
MED_AGE_VALUES <- get_acs(geography="tract",variables=MED_AGE_VAR$variable,state='WY',county='lincoln') %>% mutate(se=moe/1.64) %>% left_join(MED_AGE_VAR %>% mutate(variable=gsub('E','',variable))) %>% select(-NAME,-moe) %>% left_join(PROJ_TRACTS) %>% mutate(IN_KEM=ifelse(is.na(IN_KEM),0,1))
|
|
AGE_DIFF <- MED_AGE_VALUES %>% group_by(IN_KEM,Data_Type) %>% summarize(Age=mean(estimate)) %>% pivot_wider(names_from=IN_KEM,values_from=Age,names_prefix="In_Kemmerer_")
|
|
AGE_DIFF
|
|
|
|
###Load data manually created which links vairable names to sex-age census data
|
|
CODES <- read_csv("Data/API_CENSUS_CODES.csv",skip=1) %>% mutate(Med_Age=(Min_Age+Max_Age)/2) %>% rename(variable=Code)
|
|
|
|
|
|
|
|
#Testing age Comparison between the two
|
|
###Extract census data for all tracts in Lincoln county, clean up the data, and indicate if the tract is in Kemmerer/Diamondvile or not.
|
|
AGE_DATA <- get_acs(geography="tract",variables=CODES$variable,state='WY',county='lincoln') %>% mutate(se=moe/1.64) %>% left_join(CODES %>% mutate(variable=gsub('E','',variable))) %>% select(-NAME,-moe) %>% left_join(PROJ_TRACTS) %>% mutate(IN_KEM=ifelse(is.na(IN_KEM),0,1)) %>% rename(Population=estimate) %>% select(-variable,-GEOID) %>% select(Sex,Min_Age,Max_Age,Med_Age,IN_KEM,Population,se) %>% filter(!(Min_Age==0& Max_Age==Inf))
|
|
|
|
AGE_DATA <- AGE_DATA %>% group_by(Sex,Min_Age,Max_Age,Med_Age,IN_KEM)%>% summarize(Population=sum(Population)) %>% ungroup
|
|
#Add Descriptive age category to a clean graph
|
|
AGE_DATA$Ages <- paste(AGE_DATA$Min_Age,"to",AGE_DATA$Max_Age)
|
|
AGE_DATA[AGE_DATA$Max_Age==Inf,"Ages"] <- "85+"
|
|
AGE_DATA[AGE_DATA$Med_Age==18,"Ages"] <- "18"
|
|
AGE_DATA[AGE_DATA$Med_Age==20,"Ages"] <- "20"
|
|
AGE_DATA[AGE_DATA$Med_Age==21,"Ages"] <- "21"
|
|
AGE_DATA[AGE_DATA$Min_Age==0,"Ages"] <- "Under 5"
|
|
#Turn the ages into factors to keep the correct order in graphs
|
|
ORD <- AGE_DATA %>% select(Min_Age,Ages) %>% unique %>% arrange(Min_Age) %>% pull(Ages) %>% unique
|
|
AGE_DATA$Ages <- factor(AGE_DATA$Ages,levels=ORD)
|
|
#Add the percent of total relative population in theregion
|
|
AGE_DATA <- AGE_DATA %>% mutate(Per_Pop=Population/sum(Population)) %>% group_by(IN_KEM) %>% mutate(Per_Pop_Region=Population/sum(Population)) %>% ungroup
|
|
|
|
#Add a region name for clearer graphs
|
|
AGE_DATA <- AGE_DATA %>% mutate(Region=ifelse(IN_KEM==1,'Kemmerer','Lincoln'))
|
|
#AGE_DATA <- AGE_DATA %>% group_by(IN_KEM) %>% mutate(MORE_KEMMER=ifelse(sum(IN_KEM*Per_Pop_Region)>sum((1-IN_KEM)*Per_Pop_Region),1,0)) %>% ungroup
|
|
|
|
PLOT <- ggplot(AGE_DATA, aes(x =Ages, y = Per_Pop_Region)) + geom_line() + geom_point(aes(color = Region ), size = 5) + scale_color_brewer(palette = "Set1", direction = 1) + theme(legend.position = "bottom")+facet_wrap(~Sex,ncol=1)
|
|
#PLOT
|
|
Age <- 13
|
|
SEX <- 'Male'
|
|
DATA <- AGE_DATA
|
|
GET_VALUE <- function(Age,SEX,DATA=AGE_DATA){
|
|
DATA <- DATA %>% filter(Region=='Lincoln',Sex==Sex)
|
|
if(any(Age==DATA$Med_Age)){
|
|
return(DATA[which(Age==DATA$Med_Age),] %>% pull(Per_Pop))
|
|
}
|
|
TEMP <- DATA %>% arrange(Min_Age) %>% select(Med_Age,Min_Age,Max_Age,Per_Pop) %>% filter(Per_Pop!=0)
|
|
LOWER <- TEMP[max(which(TEMP$Med_Age<Age)),]
|
|
UPPER <- TEMP[min(which(TEMP$Med_Age>Age)),]
|
|
C <- LOWER$Med_Age
|
|
ST <- LOWER$Per_Pop
|
|
DELTA <- UPPER$Per_Pop-LOWER$Per_Pop
|
|
GAP <- UPPER$Med_Age-LOWER$Med_Age
|
|
return(ST+(Age-C)*DELTA/GAP)
|
|
}
|
|
GET_VALUE(27,"Male",AGE_DATA)
|
|
|
|
|
|
AGE_DATA %>% filter(Med_Age<=x,Med_Age>=x,Sex==SEX) %>% arrange(Min_Age)
|
|
|
|
TEMP
|
|
TEMP[,"Per_Pop"]
|
|
|
|
|
|
AGE_FORWARD
|
|
AGE_FORWARD <- AGE_DATA %>% filter(!(Min_Age==0 & Max_Age==Inf))%>% mutate(POP_TOTAL=POP_OUT+POP_KEM,KEM_RATIO=POP_KEM/POP_TOTAL) %>% select(Sex,Min_Age,Max_Age,KEM_RATIO)
|
|
STORE <- AGE_FORWARD %>% filter(Min_Age==0,Max_Age==4)
|
|
AGE_FORWARD <- STORE %>% full_join(AGE_FORWARD %>% mutate(Min_Age=Min_Age+4,Max_Age=Max_Age+4))
|
|
MALE_FORWARD <- AGE_FORWARD %>% filter(Sex=='Male') %>% arrange(Min_Age) %>% filter(KEM_RATIO!=0) %>% mutate(Med_Age=(Min_Age+Max_Age)/2)
|
|
NUM_IN_GROUP <- MALE_FORWARD$Max_Age- MALE_FORWARD$Min_Age+1
|
|
NUM_IN_GROUP[23] <- 1
|
|
MALE_FORWARD$KEM_RATIO
|
|
ggplot(MALE_FORWARD,aes(x=Med_Age,y=KEM_RATIO)) +geom_point()+geom_smooth(span=0.25)
|
|
loess(KEM_RATIO ~ Med_Age,data=MALE_FORWARD,span=0.3 )
|
|
?loess
|
|
FEMALE_FORWARD <- AGE_FORWARD %>% filter(Sex=='Female') %>% arrange(Min_Age)
|
|
MALE_FORWARD
|
|
|
|
plot(AGE_FORWARD$KEM_RATIO)
|
|
gg <- ggplot(GRAPH_DATA, aes(x=PER_OUT, xend=PER_KEM, y=MED_AGE, group=Sex)) +
|
|
geom_dumbbell(color="#a3c4dc",
|
|
size=0.75,
|
|
point.colour.l="#0e668b") +
|
|
scale_x_continuous(label=percent) +
|
|
labs(x=NULL,
|
|
y=NULL,
|
|
title="Dumbbell Chart",
|
|
subtitle="Pct Change: 2013 vs 2014",
|
|
caption="Source: https://github.com/hrbrmstr/ggalt") +
|
|
theme(plot.title = element_text(hjust=0.5, face="bold"),
|
|
plot.background=element_rect(fill="#f7f7f7"),
|
|
panel.background=element_rect(fill="#f7f7f7"),
|
|
panel.grid.minor=element_blank(),
|
|
panel.grid.major.y=element_blank(),
|
|
panel.grid.major.x=element_line(),
|
|
axis.ticks=element_blank(),
|
|
legend.position="top",
|
|
panel.border=element_blank())
|
|
ggplot(GRAPH_DATA %>% filter(Sex=='Male')) +geom_point(aes(x=Med_Age,y=PER_KEM),color='blue') +geom_point(aes(x=Med_Age,y=PER_OUT)) +geom_smooth(aes(x=Med_Age,y=PER_OUT),color='black',span=SPAN)+geom_smooth(aes(x=Med_Age,y=PER_KEM),span=SPAN)
|
|
ggplot(GRAPH_DATA %>% filter(Sex=='Female')) +geom_point(aes(x=Med_Age,y=PER_KEM),color='blue') +geom_point(aes(x=Med_Age,y=PER_OUT)) +geom_smooth(aes(x=Med_Age,y=PER_OUT),color='black',span=SPAN)+geom_smooth(aes(x=Med_Age,y=PER_KEM),span=SPAN)
|
|
|
|
|
|
|
|
|
|
|
|
AGE_DATA %>% pivot_wider(names_from=c(Sex,Min_Age,Max_Age,Med_Age,IN_KEM),values_from=Population)
|
|
|