diff --git a/README.md b/README.md deleted file mode 100644 index bfe3628..0000000 --- a/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# Population_Study - -Code to predict the population of Lincoln County \ No newline at end of file diff --git a/Scripts/Functions.r b/Scripts/Functions.r deleted file mode 100644 index 77e56ff..0000000 --- a/Scripts/Functions.r +++ /dev/null @@ -1,26 +0,0 @@ -#A function to extract date from FRED. Assumes the date is either annual or monthly. -FRED_GET <- function(FRED_SERIES_ID,NAME=NA,ST_DATE='1890-01-91',END_DATE=Sys.Date(),ANNUAL_DATA=TRUE){ - NAME <- ifelse(is.na(NAME),FRED_SERIES_ID,NAME) - DATA_TIME_FRAME <- ifelse(ANNUAL_DATA,"Annual","Monthly") - URL <- paste0('https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23ebf3fb&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=off&txtcolor=%23444444&ts=12&tts=12&width=827&nt=0&thu=0&trc=0&show_legend=no&show_axis_titles=no&show_tooltip=no&id=',FRED_SERIES_ID,'&scale=left&cosd=',ST_DATE,'&coed=',END_DATE,'&line_color=%230073e6&link_values=false&line_style=solid&mark_type=none&mw=3&lw=3&ost=-99999&oet=99999&mma=0&fml=a&fq=',DATA_TIME_FRAME,'&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=',END_DATE,'&revision_date=',END_DATE,'&nd=',END_DATE) - DATA <- read_csv(URL) - DATA <- DATA[which(!is.na(DATA[,2] )),] - colnames(DATA)[2] <- NAME - if(ANNUAL_DATA){ - colnames(DATA)[1] <- "YEAR" - DATA$YEAR <- year(DATA$YEAR) - }else{ - colnames(DATA)[1] <- "DATE" - } - return(DATA) -} - -CPI_ADJUST <- function(DATA){ - CPI <- FRED_GET("CPIAUCSL") %>% filter(!is.na(CPIAUCSL)) - CPI$CPI_ADJ <- as.numeric(CPI[which.max(CPI$YEAR),2])/CPI$CPIAUCSL - CPI <- CPI %>% select(-"CPIAUCSL") - DATA <- DATA %>% left_join(CPI) - DATA[,2] <- DATA[,2]*DATA[,"CPI_ADJ"] - DATA <- DATA%>% select(-"CPI_ADJ") - return(DATA) -} diff --git a/Scripts/Load_Wyoming_Web_Data.r b/Scripts/Load_Wyoming_Web_Data.r deleted file mode 100644 index 26ab369..0000000 --- a/Scripts/Load_Wyoming_Web_Data.r +++ /dev/null @@ -1,28 +0,0 @@ -library(rvest) -#Data found on the page http://eadiv.state.wy.us/pop/ -PAGE <- read_html("http://eadiv.state.wy.us/pop/BirthDeathMig.htm") -NODE <- html_element(PAGE ,"table") -TBL <- html_table(NODE) - -ST <- which(toupper(TBL$X1)=="ALBANY") -END <- which(toupper(TBL$X1)=="TOTAL") -TYPES <- TBL[ST-2,1] - -ST_YEAR <- 1971 -ALL_DATA <- list() -TBL <- TBL[,c(1,which(!is.na(as.numeric(TBL[ST[1],]))))] -TBL <- TBL[,-ncol(TBL)] -colnames(TBL) <- c("County",(ST_YEAR:(ST_YEAR+ncol(TBL)-1))) -TBL$Type <- NA -for(i in 1:length(ST)){ - TBL[ST[i]:END[i],"Type"]<- as.character(TYPES[i,1]) -} -TBL[ST[2]:END[2],"Type"] <- as.character(TYPES[2,1]) -TBL$Type -TBL <- TBL %>% filter(!is.na(Type)) %>% select(County,Type,everything()) -GROUP <- colnames(TBL)[-1:-2] -Data <- pivot_longer(TBL,all_of(GROUP),names_to="Year",values_to="Pop_Change") -Data$County <- ifelse(toupper(Data$County)=="TOTAL","Wyoming",Data$County) -WY_COUNTY_DATA_SET <- pivot_wider(Data,names_from=Type,values_from=Pop_Change) -colnames(Data)[5] <-"Migration" - diff --git a/Zip_Code.r b/Zip_Code.r index 67f2621..b9de11b 100644 --- a/Zip_Code.r +++ b/Zip_Code.r @@ -1,42 +1,119 @@ library(tidyverse) library(tidycensus) library(zipcodeR) -install.packages("zipcodeR") -install.packages("units") +#Packages to instal on computer if zipcodeR won't install +#Sudo apt install libssl-dev libudunits2-dev libabsl-dev libcurl4-openssl-dev libgdal-dev cmake libfontconfig1-dev libharfbuzz-dev libfribidi-dev +#install.packages("zipcodeR") + +#Add API key if missing + #KEY <- '30e13ab22563318ff59286e433099f4174d4edd4' + #census_api_key(KEY, install = TRUE) +PROJ_TRACTS <- get_tracts(search_city('Kemmerer','WY')$zipcode) %>% full_join(get_tracts(search_city('Diamondville','WY')$zipcode)) +PROJ_TRACTS <- PROJ_TRACTS %>% select(GEOID) %>% mutate('IN_KEM'=1) %>% mutate(GEOID=as.character(GEOID)) + +MED_AGE_VAR <- cbind(c('B01002_001E','B01002_002E','B01002_003E'),c('Median_Age','Median_Age_Male','Median_Age_Female')) %>% as_tibble %>% rename(variable=V1,Data_Type=V2) +#Pull the relevant median age variables the value moe (margine of error) can be converted to standard error, following the link below +#https://www.census.gov/content/dam/Census/library/publications/2018/acs/acs_general_handbook_2018_ch08.pdf + +MED_AGE_VALUES <- get_acs(geography="tract",variables=MED_AGE_VAR$variable,state='WY',county='lincoln') %>% mutate(se=moe/1.64) %>% left_join(MED_AGE_VAR %>% mutate(variable=gsub('E','',variable))) %>% select(-NAME,-moe) %>% left_join(PROJ_TRACTS) %>% mutate(IN_KEM=ifelse(is.na(IN_KEM),0,1)) +AGE_DIFF <- MED_AGE_VALUES %>% group_by(IN_KEM,Data_Type) %>% summarize(Age=mean(estimate)) %>% pivot_wider(names_from=IN_KEM,values_from=Age,names_prefix="In_Kemmerer_") +AGE_DIFF + +###Load data manually created which links vairable names to sex-age census data +CODES <- read_csv("Data/API_CENSUS_CODES.csv",skip=1) %>% mutate(Med_Age=(Min_Age+Max_Age)/2) %>% rename(variable=Code) -#install.packages("terra") -census_api_key(KEY, install = TRUE) -KEY <- '30e13ab22563318ff59286e433099f4174d4edd4' -#Ex -TRACTS <-get_tracts(search_city('Kemmerer','WY')$zipcode) %>% full_join(get_tracts(search_city('Diamondville','WY')$zipcode)) -get_decennial(geography="tract",variables=vars,state='WY',county='lincoln') - -c('B01002_001E','B01002_002E','B01002_003E') -c('Median_Age','Median_Age_Male','Median_Age_Female') - - - - - -get_acs(geography="tract",variables=vars,state='WY',county='lincoln',GEOID='56023978400') -######MALE - -#Variable names from: https://api.census.gov/data/2019/acs/acs1/variables.html -sapply(1:25,function(x){paste0("B01001_00",x,"E")}) -c(seq(0,15,by=5),18,20,21,22,seq(25,85,by=5)) -c(seq(4,17,by=5),17,19,20,21,24,seq(29,84,by=5)) -CODES <- read_csv("Data/API_CENSUS_CODES.csv") %>% mutate(Med_Age=(Min_Age+Max_Age)/2) -VARS <- CODES$Code #Testing age Comparison between the two -TEMP <- get_acs(geography="tract",variables=VARS,state='WY',county='lincoln') -KEM <- TEMP %>% filter(GEOID=='56023978400') -KEM -19/sum(KEM$estimate) +###Extract census data for all tracts in Lincoln county, clean up the data, and indicate if the tract is in Kemmerer/Diamondvile or not. +AGE_DATA <- get_acs(geography="tract",variables=CODES$variable,state='WY',county='lincoln') %>% mutate(se=moe/1.64) %>% left_join(CODES %>% mutate(variable=gsub('E','',variable))) %>% select(-NAME,-moe) %>% left_join(PROJ_TRACTS) %>% mutate(IN_KEM=ifelse(is.na(IN_KEM),0,1)) %>% rename(Population=estimate) %>% select(-variable,-GEOID) %>% select(Sex,Min_Age,Max_Age,Med_Age,IN_KEM,Population,se) %>% filter(!(Min_Age==0& Max_Age==Inf)) -LN <- TEMP %>% filter(GEOID!='56023978400') -LN -19/sum(KEM$estimate) +AGE_DATA <- AGE_DATA %>% group_by(Sex,Min_Age,Max_Age,Med_Age,IN_KEM)%>% summarize(Population=sum(Population)) %>% ungroup +#Add Descriptive age category to a clean graph +AGE_DATA$Ages <- paste(AGE_DATA$Min_Age,"to",AGE_DATA$Max_Age) +AGE_DATA[AGE_DATA$Max_Age==Inf,"Ages"] <- "85+" +AGE_DATA[AGE_DATA$Med_Age==18,"Ages"] <- "18" +AGE_DATA[AGE_DATA$Med_Age==20,"Ages"] <- "20" +AGE_DATA[AGE_DATA$Med_Age==21,"Ages"] <- "21" +AGE_DATA[AGE_DATA$Min_Age==0,"Ages"] <- "Under 5" +#Turn the ages into factors to keep the correct order in graphs +ORD <- AGE_DATA %>% select(Min_Age,Ages) %>% unique %>% arrange(Min_Age) %>% pull(Ages) %>% unique +AGE_DATA$Ages <- factor(AGE_DATA$Ages,levels=ORD) +#Add the percent of total relative population in theregion +AGE_DATA <- AGE_DATA %>% mutate(Per_Pop=Population/sum(Population)) %>% group_by(IN_KEM) %>% mutate(Per_Pop_Region=Population/sum(Population)) %>% ungroup + +#Add a region name for clearer graphs +AGE_DATA <- AGE_DATA %>% mutate(Region=ifelse(IN_KEM==1,'Kemmerer','Lincoln')) +#AGE_DATA <- AGE_DATA %>% group_by(IN_KEM) %>% mutate(MORE_KEMMER=ifelse(sum(IN_KEM*Per_Pop_Region)>sum((1-IN_KEM)*Per_Pop_Region),1,0)) %>% ungroup + +PLOT <- ggplot(AGE_DATA, aes(x =Ages, y = Per_Pop_Region)) + geom_line() + geom_point(aes(color = Region ), size = 5) + scale_color_brewer(palette = "Set1", direction = 1) + theme(legend.position = "bottom")+facet_wrap(~Sex,ncol=1) +#PLOT +Age <- 13 +SEX <- 'Male' +DATA <- AGE_DATA +GET_VALUE <- function(Age,SEX,DATA=AGE_DATA){ + DATA <- DATA %>% filter(Region=='Lincoln',Sex==Sex) + if(any(Age==DATA$Med_Age)){ + return(DATA[which(Age==DATA$Med_Age),] %>% pull(Per_Pop)) +} + TEMP <- DATA %>% arrange(Min_Age) %>% select(Med_Age,Min_Age,Max_Age,Per_Pop) %>% filter(Per_Pop!=0) + LOWER <- TEMP[max(which(TEMP$Med_AgeAge)),] + C <- LOWER$Med_Age + ST <- LOWER$Per_Pop + DELTA <- UPPER$Per_Pop-LOWER$Per_Pop + GAP <- UPPER$Med_Age-LOWER$Med_Age +return(ST+(Age-C)*DELTA/GAP) +} +GET_VALUE(27,"Male",AGE_DATA) + AGE_DATA %>% filter(Med_Age<=x,Med_Age>=x,Sex==SEX) %>% arrange(Min_Age) + +TEMP +TEMP[,"Per_Pop"] + + +AGE_FORWARD +AGE_FORWARD <- AGE_DATA %>% filter(!(Min_Age==0 & Max_Age==Inf))%>% mutate(POP_TOTAL=POP_OUT+POP_KEM,KEM_RATIO=POP_KEM/POP_TOTAL) %>% select(Sex,Min_Age,Max_Age,KEM_RATIO) +STORE <- AGE_FORWARD %>% filter(Min_Age==0,Max_Age==4) +AGE_FORWARD <- STORE %>% full_join(AGE_FORWARD %>% mutate(Min_Age=Min_Age+4,Max_Age=Max_Age+4)) +MALE_FORWARD <- AGE_FORWARD %>% filter(Sex=='Male') %>% arrange(Min_Age) %>% filter(KEM_RATIO!=0) %>% mutate(Med_Age=(Min_Age+Max_Age)/2) +NUM_IN_GROUP <- MALE_FORWARD$Max_Age- MALE_FORWARD$Min_Age+1 +NUM_IN_GROUP[23] <- 1 +MALE_FORWARD$KEM_RATIO +ggplot(MALE_FORWARD,aes(x=Med_Age,y=KEM_RATIO)) +geom_point()+geom_smooth(span=0.25) +loess(KEM_RATIO ~ Med_Age,data=MALE_FORWARD,span=0.3 ) +?loess +FEMALE_FORWARD <- AGE_FORWARD %>% filter(Sex=='Female') %>% arrange(Min_Age) +MALE_FORWARD + +plot(AGE_FORWARD$KEM_RATIO) +gg <- ggplot(GRAPH_DATA, aes(x=PER_OUT, xend=PER_KEM, y=MED_AGE, group=Sex)) + + geom_dumbbell(color="#a3c4dc", + size=0.75, + point.colour.l="#0e668b") + + scale_x_continuous(label=percent) + + labs(x=NULL, + y=NULL, + title="Dumbbell Chart", + subtitle="Pct Change: 2013 vs 2014", + caption="Source: https://github.com/hrbrmstr/ggalt") + + theme(plot.title = element_text(hjust=0.5, face="bold"), + plot.background=element_rect(fill="#f7f7f7"), + panel.background=element_rect(fill="#f7f7f7"), + panel.grid.minor=element_blank(), + panel.grid.major.y=element_blank(), + panel.grid.major.x=element_line(), + axis.ticks=element_blank(), + legend.position="top", + panel.border=element_blank()) +ggplot(GRAPH_DATA %>% filter(Sex=='Male')) +geom_point(aes(x=Med_Age,y=PER_KEM),color='blue') +geom_point(aes(x=Med_Age,y=PER_OUT)) +geom_smooth(aes(x=Med_Age,y=PER_OUT),color='black',span=SPAN)+geom_smooth(aes(x=Med_Age,y=PER_KEM),span=SPAN) +ggplot(GRAPH_DATA %>% filter(Sex=='Female')) +geom_point(aes(x=Med_Age,y=PER_KEM),color='blue') +geom_point(aes(x=Med_Age,y=PER_OUT)) +geom_smooth(aes(x=Med_Age,y=PER_OUT),color='black',span=SPAN)+geom_smooth(aes(x=Med_Age,y=PER_KEM),span=SPAN) + + + + + +AGE_DATA %>% pivot_wider(names_from=c(Sex,Min_Age,Max_Age,Med_Age,IN_KEM),values_from=Population) +