library(rvest) library(tidyverse) library(readxl) #setwd("../") ########County, Death, Birth and Migration Data #Data found on the page http://eadiv.state.wy.us/pop/ PAGE <- read_html("http://eadiv.state.wy.us/pop/BirthDeathMig.htm") NODE <- html_element(PAGE ,"table") TBL <- html_table(NODE) ST <- which(toupper(TBL$X1)=="ALBANY") END <- which(toupper(TBL$X1)=="TOTAL") TYPES <- TBL[ST-2,1] ST_YEAR <- 1971 ALL_DATA <- list() TBL <- TBL[,c(1,which(!is.na(as.numeric(TBL[ST[1],]))))] TBL <- TBL[,-ncol(TBL)] colnames(TBL) <- c("County",(ST_YEAR:(ST_YEAR+ncol(TBL)-1))) TBL$Type <- NA for(i in 1:length(ST)){ TBL[ST[i]:END[i],"Type"]<- as.character(TYPES[i,1]) } TBL[ST[2]:END[2],"Type"] <- as.character(TYPES[2,1]) TBL$Type TBL <- TBL %>% filter(!is.na(Type)) %>% select(County,Type,everything()) GROUP <- colnames(TBL)[-1:-2] Data <- pivot_longer(TBL,all_of(GROUP),names_to="Year",values_to="Pop_Change") Data$County <- ifelse(toupper(Data$County)=="TOTAL","Wyoming",Data$County) WY_COUNTY_DATA_SET <- pivot_wider(Data,names_from=Type,values_from=Pop_Change) %>% rename("Migration"=`Net Migration`) %>% mutate(Year=as.integer(Year),Births=parse_number(Births),Deaths=parse_number(Deaths),Migration=parse_number(Migration)) ########################City and County Population Data 2020 to 2024 PAGE <- read_html('http://eadiv.state.wy.us/pop/Place-24EST.htm') NODE <- html_element(PAGE ,"table") TBL <- html_table(NODE) ST <- which(toupper(TBL$X1)==toupper("Albany County")) END <- which(toupper(TBL$X1)==toupper("Balance of Weston County")) #More years than are pulled are listed to make more generic COLUMNS <- c(1,which(TBL[ST-2,] %in% 1970:2025)) NAMES <- TBL[4,COLUMNS][-1] TBL <- TBL[ST:END,COLUMNS ] colnames(TBL) <- c("County",NAMES) TBL <- pivot_longer(TBL,all_of(colnames(TBL)[-1]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=parse_number(Population)) TBL$County <- gsub(" "," ",gsub("\n","",gsub("\r","",TBL %>% pull(County)))) COUNTY_POP<- TBL[grep("COUNTY",TBL %>% pull(County),ignore.case=TRUE),] COUNTY_POP<- COUNTY_POP[grep("Balance",COUNTY_POP%>% pull(County),invert=TRUE,ignore.case=TRUE),] COUNTY_POP$County <- gsub(" ","_",gsub(" County","",COUNTY_POP$County)) CITY_POP <- TBL[sort(c(grep("County",TBL %>% pull(County),invert=TRUE,ignore.case=TRUE),grep("Balance",TBL %>% pull(County),ignore.case=TRUE))),] CITY_POP$County <- gsub(" ","_",gsub("Balance of","Unincorporated",gsub(" County","",gsub(" city","",gsub(" town","",CITY_POP$County,ignore.case=TRUE),ignore.case=TRUE),ignore.case=TRUE),ignore.case=TRUE)) CITY_POP <- CITY_POP %>% rename("City"=County) ########################City Population Data 2010 to 2020 PAGE <- read_html('http://eadiv.state.wy.us/pop/sub-est11-19.htm') NODE <- html_element(PAGE ,"table") TBL <- html_table(NODE) ST <- which(toupper(TBL$X1)==toupper("Afton town, Wyoming")) END <- which(toupper(TBL$X1)==toupper("Yoder town, Wyoming")) #More years than are pulled are listed to make more generic COLUMNS <- c(1,which(TBL[ST-1,] %in% 1970:2025)) NAMES <- TBL[3,COLUMNS][-1] TBL <- TBL[ST:END,COLUMNS ] colnames(TBL) <- c("City",NAMES) TBL <- pivot_longer(TBL,all_of(colnames(TBL)[-1]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=parse_number(Population)) TBL$City <- gsub(" ","_",gsub(" $","",gsub("\r|\n| Wyoming|,| town| city","",TBL$City,ignore.case=TRUE))) CITY_POP <- rbind(TBL,CITY_POP) ########################County Population Data 2010 to 2020 PAGE <- read_html('http://eadiv.state.wy.us/pop/ctyest11-19.htm') NODE <- html_element(PAGE ,"table") TBL <- html_table(NODE) ST <- grep("Albany",TBL$X1) END <- grep("Weston",TBL$X1) #More years than are pulled are listed to make more generic COLUMNS <- c(1,which(TBL[ST-2,] %in% 1970:2025)) NAMES <- TBL[3,COLUMNS][-1] TBL <- TBL[ST:END,COLUMNS ] colnames(TBL) <- c("County",NAMES) TBL <- pivot_longer(TBL,all_of(colnames(TBL)[-1]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=parse_number(Population)) TBL$County <- gsub(" ","_",gsub(" "," ",gsub(" $","",gsub("\r|\n| Wyoming|,| town| city| County|\\.","",TBL$County,ignore.case=TRUE)))) COUNTY_POP <- rbind(TBL,COUNTY_POP) ########################County and City Population Data 2000 to 2010 PAGE <- read_html('http://eadiv.state.wy.us/pop/sub-est01-09.htm') NODE <- html_element(PAGE ,"table") TBL <- html_table(NODE) ST <- which(toupper(TBL$X1)==toupper("Albany County")) END <- which(toupper(TBL$X1)==toupper("Balance of Weston County")) #More years than are pulled are listed to make more generic COLUMNS <- c(1,which(TBL[ST-4,] %in% 1970:2025)) NAMES <- TBL[4,COLUMNS][-1] TBL <- TBL[ST:END,COLUMNS ] colnames(TBL) <- c("County",NAMES) TBL <- pivot_longer(TBL,all_of(colnames(TBL)[-1]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=parse_number(Population)) TBL$County <- gsub(" "," ",gsub("\n","",gsub("\r","",TBL %>% pull(County)))) COUNTY_TBL <- TBL[grep("COUNTY",TBL %>% pull(County),ignore.case=TRUE),] COUNTY_TBL <-COUNTY_TBL[grep("Balance",COUNTY_TBL%>% pull(County),invert=TRUE,ignore.case=TRUE),] COUNTY_TBL$County <-gsub("_(pt.)","", gsub(" ","_",gsub(" County","",COUNTY_TBL$County))) CITY_TBL <- TBL[sort(c(grep("County",TBL %>% pull(County),invert=TRUE,ignore.case=TRUE),grep("Balance",TBL %>% pull(County),ignore.case=TRUE))),] CITY_TBL$County <- gsub(" ","_",gsub("Balance of","Unincorporated",gsub(" County","",gsub(" city","",gsub(" town","",CITY_TBL$County,ignore.case=TRUE),ignore.case=TRUE),ignore.case=TRUE),ignore.case=TRUE)) CITY_TBL <- CITY_TBL %>% rename("City"=County) CITY_POP <- rbind(CITY_TBL,CITY_POP) #Cleanup names CITY_POP$City <- gsub("LaGrange","La_Grange",CITY_POP$City) COUNTY_POP <- rbind(COUNTY_TBL,COUNTY_POP) ####################County and City Population Data for 1990-2000 if(!file.exists("./Data/Pop_1990s.xls")){download.file('http://eadiv.state.wy.us/pop/c&sc90_00.xls',"./Data/Pop_1990s.xls")} TEMP <- read_xls("Data/Pop_1990s.xls",skip=2)[-1:-4,] colnames(TEMP)[1] <- "County" tail(TEMP) TEMP <- TEMP[1:which(TEMP[,1]=="Wind River Res."),] TEMP <- pivot_longer(TEMP,all_of(colnames(TEMP)[-1]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=as.numeric(Population)) TEMP_COUNTY <- TEMP[grepl("Cnty",TEMP %>% pull(County),ignore.case=TRUE),] TEMP_COUNTY$County <- gsub(" ","_",gsub(" "," ",gsub(" Cnty","",TEMP_COUNTY$County,ignore.case=TRUE))) TEMP_CITY <- TEMP[grep("Cnty",TEMP %>% pull(County),ignore.case=TRUE,invert=TRUE),] TEMP_CITY$County <- gsub("E_Therm","East_Therm",gsub(" ","_",gsub(" ","",TEMP_CITY %>% pull(County)))) TEMP_CITY <- TEMP_CITY %>% rename(City=County) TEMP_CITY %>% pull(City) %>% unique %>% sort CITY_POP <- rbind(TEMP_CITY,CITY_POP) CITY_POP %>% pull(City) %>% unique %>% sort COUNTY_POP <- rbind(TEMP_COUNTY,COUNTY_POP) #ggplot(aes(x=Year,y=Population,group=County,color=County),data=COUNTY_POP)+geom_line() try(rm(TEMP_CITY,TEMP_COUNTY,TEMP)) ####################County and City Population Data for 1980-1990 if(!file.exists("./Data/Pop_1980s.xls")){download.file('http://eadiv.state.wy.us/pop/C&SC8090.xls',"./Data/Pop_1980s.xls")} TEMP <- read_xls("Data/Pop_1980s.xls",skip=2)[-1:-4,] colnames(TEMP)[1] <- "County" TEMP <- TEMP[2:which(TEMP[,1]=="Upton"),1:(min(which(is.na(TEMP[2,])))-1)] TEMP <- pivot_longer(TEMP,all_of(colnames(TEMP)[-1]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=as.numeric(Population)) TEMP_COUNTY <- TEMP[grepl("Cty",TEMP %>% pull(County),ignore.case=TRUE),] TEMP_COUNTY$County <- gsub(" ","_",gsub(" "," ",gsub(" Cty","",TEMP_COUNTY$County,ignore.case=TRUE))) TEMP_CITY <- TEMP[grep("Cty",TEMP %>% pull(County),ignore.case=TRUE,invert=TRUE),] TEMP_CITY$County <-gsub("Frannie_","Frannie", gsub("Mtn._View","Mountain_View",gsub("E._Therm","East_Therm",gsub(" ","_",gsub(" ","",TEMP_CITY %>% pull(County)))))) TEMP_CITY <- TEMP_CITY %>% rename(City=County) CITY_POP <- rbind(TEMP_CITY,CITY_POP) COUNTY_POP <- rbind(TEMP_COUNTY,COUNTY_POP) #ggplot(aes(x=Year,y=Population,group=County,color=County),data=COUNTY_POP)+geom_line() try(rm(TEMP_CITY,TEMP_COUNTY,TEMP)) ####################County Population Data for 1980-1990 if(!file.exists("./Data/Pop_1970s.xls")){download.file('http://eadiv.state.wy.us/pop/Cnty7080.xls',"./Data/Pop_1970s.xls")} TEMP <- read_xls("Data/Pop_1970s.xls",skip=2)[-1:-4,] colnames(TEMP)[1] <- "County" TEMP <- TEMP[1:which(TEMP[,1]=="Weston"),] TEMP <- pivot_longer(TEMP,all_of(colnames(TEMP)[-1]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=as.numeric(Population)) TEMP$County <- gsub(" ","_",TEMP$County) COUNTY_POP <- rbind(TEMP,COUNTY_POP) #ggplot(aes(x=Year,y=Population,group=County,color=County),data=COUNTY_POP)+geom_line() try(rm(TEMP)) ###########Old data addtion:Period Ends in 1970 #See in part http://eadiv.state.wy.us/demog_data/cntycity_hist.htm LN_OLD <- c(12487,10894,10286,9023,9018,8640) #Missing in 1910 Year <- seq(1920,1970,by=10) TEMP <- cbind(Year,rep("Lincoln",6),LN_OLD) colnames(TEMP ) <- c("Year","County","Population") TEMP <- as_tibble(TEMP) COUNTY_POP <- rbind(TEMP,COUNTY_POP) %>% arrange(County,Year) KEM_OLD <- c(843,1517,1884,2026,1667,2028,2292) #1910 forward until 1970 Year <- seq(1910,1970,by=10) TEMP <- cbind(Year,rep("kemmerer",7),KEM_OLD) colnames(TEMP ) <- c("Year","City","Population") TEMP <- as_tibble(TEMP) CITY_POP <- rbind(TEMP,CITY_POP) DIAMOND_OLD <- c(696,726,812,586,415,398,485) TEMP <- cbind(Year,rep("Diamondvile",7),DIAMOND_OLD) colnames(TEMP ) <- c("Year","City","Population") TEMP <- as_tibble(TEMP) CITY_POP <- rbind(TEMP,CITY_POP) %>% arrange(City,Year) #Add Other Data COUNTY_POP <- COUNTY_POP %>% mutate(Year=as.integer(Year)) WY_COUNTY_DATA_SET <- COUNTY_POP %>% left_join(WY_COUNTY_DATA_SET ) ###Save Population Results write_csv(CITY_POP,file="./Data/Cleaned_Data/Wyoming_City_Population.csv") write_csv(WY_COUNTY_DATA_SET ,file="./Data/Cleaned_Data/Wyoming_County_Population.csv") saveRDS(CITY_POP,file="./Data/Cleaned_Data/Wyoming_City_Population.Rds") saveRDS(WY_COUNTY_DATA_SET ,file="./Data/Cleaned_Data/Wyoming_County_Population.Rds") ###################Demographics if(!file.exists("./Data/Demo_Single_Year_2020s.xls")){download.file('http://eadiv.state.wy.us/Pop/CO_SYASEX24.xlsx',"./Data/Demo_Single_Year_2020s.xls")} TEMP <- read_xlsx("./Data/Demo_Single_Year_2020s.xls",skip=2)[,-1] TEMP <- TEMP[1:(min(which(is.na(TEMP[,1])))-1),] TEMP$YEAR <- year(as.Date(substr((TEMP$YEAR),1,8),format="%m/%d/%Y")) DEM_2020_SINGLE <- TEMP ###Standard Population to adjust FROM #https://hdpulse.nimhd.nih.gov/data-portal/mortality/table?cod=247&cod_options=cod_15&ratetype=aa&ratetype_options=ratetype_2&race=00&race_options=race_6&sex=2&sex_options=sex_3&age=001&age_options=age_11&ruralurban=0&ruralurban_options=ruralurban_3&yeargroup=5&yeargroup_options=year5yearmort_1&statefips=56&statefips_options=area_states&county=56000&county_options=counties_wyoming&comparison=counties_to_us&comparison_options=comparison_counties&radio_comparison=areas&radio_comparison_options=cods_or_areas PAGE <- read_html("https://seer.cancer.gov/stdpopulations/stdpop.20ages.html") NODE <- html_element(PAGE ,"table") TBL <- html_table(NODE) TBL <- TBL[,1:2] colnames(TBL) <- c("Age_Group","Population") DEM_2020_SINGLE ####Demographics 2010-2020 PAGE <- read_html('http://eadiv.state.wy.us/Pop/CO-AS10-20.htm') NODE <- html_element(PAGE ,"table") TBL <- html_table(NODE) NAMES <- gsub(" ","_",TBL[5,] %>% t) colnames(TBL ) <- NAMES TBL <- TBL[-1:-5,c(-4)] TBL$County <- gsub(" ","_",gsub(" "," ",gsub(" County","",TBL$County))) TEMP <- pivot_longer(TBL,all_of(colnames(TBL)[-1:-3]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=parse_number(Population)) TEMP <- TEMP %>% unique TEMP <- TEMP %>% filter(!Age_Group=="") TEMP$Age_Group <- gsub(" ","_",TEMP$Age_Group) DEM_2010 <- pivot_wider(TEMP,names_from=Age_Group,values_from=Population) DEM_2010 ####Demographics 2000-2010 PAGE <- read_html('http://eadiv.state.wy.us/Pop/CO-AS01-09.htm') NODE <- html_element(PAGE ,"table") TBL <- html_table(NODE) TBL <- TBL[-1:-4,c(-4,-15,-16)] colnames(TBL) <- c("County","Sex","Age_Group",2000:2009) TBL$County <- gsub(" ","_",gsub(" "," ",gsub(" County","",TBL$County))) TEMP <- pivot_longer(TBL,all_of(colnames(TBL)[-1:-3]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=parse_number(Population)) TEMP <- TEMP %>% unique TEMP <- TEMP %>% filter(!Age_Group=="") TEMP$Age_Group <- gsub(" ","_",TEMP$Age_Group) TEMP <- pivot_wider(TEMP,names_from=Age_Group,values_from=Population) ####Demographics 2000-2010