#############################Clean up script folders, simulations of deaths should be separated from this file. Death rate simulation is complicated and should be commented, and turned into a separate script. library(rvest) library(tidyverse) library(readxl) library(curl) #To archive the html files #setwd("../") ###Create Location to Save raw data sets if(!exists("SAVE_LOC_RAW")){SAVE_LOC_RAW <-"./Data/Raw_Data/"} dir.create(SAVE_LOC_RAW, recursive = TRUE, showWarnings = FALSE) SAVE_LOC_RAW_POP <- paste0(SAVE_LOC_RAW,"Population/") dir.create(SAVE_LOC_RAW_POP , recursive = TRUE, showWarnings = FALSE) RAW_HTML_LOC <- paste0(SAVE_LOC_RAW_POP,"HTML_Population_Files/") dir.create(RAW_HTML_LOC, recursive = TRUE, showWarnings = FALSE) RAW_EXCEL_LOC <- paste0(SAVE_LOC_RAW_POP,"Excel_Population_Files/") dir.create(RAW_EXCEL_LOC, recursive = TRUE, showWarnings = FALSE) ##Start a log file about the data sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=FALSE) cat("Most data is supplied by the Wyoming Department of Administration & Information Economic Analysis Division (WIEAD). Their data is either directly pulled from other sources like the census or is interpolated using this data (such as deaths, and migration).\n This folder saves the raw HTML of a few population data source, which were stored as a web table rather than a CSV or excel file so that in the future if the web pages are removed or changed the code will still run, and can be updated with new information.\nSome files are supplied as HTML tables from a webpage, while others are excel files. ") sink() ########County, Death, Birth and Migration Data #Data found on the page http://eadiv.state.wy.us/pop/ #Website States: Wyoming Economic Analysis Division based on U.S. Census Bureau's population estimation and vital stats above BIRTH_DEATH_MIGRATION_HTML_LOC <- paste0(RAW_HTML_LOC,"County_Migration_Deaths_Births.html") try(curl_download("http://eadiv.state.wy.us/pop/BirthDeathMig.htm",destfile=BIRTH_DEATH_MIGRATION_HTML_LOC )) #Download the file rather than directly use the website, so a backup is available if it ever goes offline. #Add to the notes sink(file=paste0(SAVE_LOC_RAW_POP,"/README_POPULATION_DATA.txt"),append=TRUE) cat("\n\n 1) Wyoming births, deaths, and net migration by county of residence: 1971 - 2023: County_Migration_Deaths_Births.htm comes from http://eadiv.state.wy.us/pop/BirthDeathMig.htm Data Type: HTML table Data Source: WIEAD Original Source: Wyoming Department of Health and U.S. Census Bureau") sink() PAGE <- read_html(BIRTH_DEATH_MIGRATION_HTML_LOC ) NODE <- html_element(PAGE ,"table") TBL <- html_table(NODE) ST <- which(toupper(TBL$X1)=="ALBANY") END <- which(toupper(TBL$X1)=="TOTAL") TYPES <- TBL[ST-2,1] ST_YEAR <- 1971 ALL_DATA <- list() TBL <- TBL[,c(1,which(!is.na(as.numeric(TBL[ST[1],]))))] TBL <- TBL[,-ncol(TBL)] colnames(TBL) <- c("County",(ST_YEAR:(ST_YEAR+ncol(TBL)-1))) TBL$Type <- NA for(i in 1:length(ST)){ TBL[ST[i]:END[i],"Type"]<- as.character(TYPES[i,1]) } TBL[ST[2]:END[2],"Type"] <- as.character(TYPES[2,1]) TBL$Type TBL <- TBL %>% filter(!is.na(Type)) %>% select(County,Type,everything()) GROUP <- colnames(TBL)[-1:-2] Data <- pivot_longer(TBL,all_of(GROUP),names_to="Year",values_to="Pop_Change") Data$County <- ifelse(toupper(Data$County)=="TOTAL","Wyoming",Data$County) WY_COUNTY_DATA_SET <- pivot_wider(Data,names_from=Type,values_from=Pop_Change) %>% rename("Migration"=`Net Migration`) %>% mutate(Year=as.integer(Year),Births=parse_number(Births),Deaths=parse_number(Deaths),Migration=parse_number(Migration)) %>% mutate(Year=Year-1) #Data appears to be one off from population WY_COUNTY_DATA_SET[,"County"] <- gsub(" ","_",WY_COUNTY_DATA_SET %>% pull(County)) ########################City and County Population Data 2020 to 2024 CITY_POPULATION_A <- paste0(RAW_HTML_LOC,"Wyoming_City_and_County_Population_2020_2024.html") try(curl_download("http://eadiv.state.wy.us/pop/Place-24EST.htm",destfile=CITY_POPULATION_A)) #Download the file rather than directly use the website, so a backup is available if it ever goes offline. #Add to the notes sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE) cat("\n 2) Wyoming Incorporated Place Population Estimates: April 1, 2020 to July 1, 2024 Wyoming_City_and_County_Population_2020_2024.html comes from http://eadiv.state.wy.us/pop/Place-24EST.htm Data Type: HTML table Data Source: WIEAD Original Source: U.S. Census Bureau, Population Division Wyoming Department of Health and U.S. Census Bureau") sink() PAGE <- read_html(CITY_POPULATION_A) NODE <- html_element(PAGE ,"table") TBL <- html_table(NODE) ST <- which(toupper(TBL$X1)==toupper("Albany County")) END <- which(toupper(TBL$X1)==toupper("Balance of Weston County")) #More years than are pulled are listed to make more generic COLUMNS <- c(1,which(TBL[ST-2,] %in% 1970:2025)) NAMES <- TBL[4,COLUMNS][-1] TBL <- TBL[ST:END,COLUMNS ] colnames(TBL) <- c("County",NAMES) TBL <- pivot_longer(TBL,all_of(colnames(TBL)[-1]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=parse_number(Population)) TBL$County <- gsub(" "," ",gsub("\n","",gsub("\r","",TBL %>% pull(County)))) COUNTY_POP<- TBL[grep("COUNTY",TBL %>% pull(County),ignore.case=TRUE),] COUNTY_POP<- COUNTY_POP[grep("Balance",COUNTY_POP%>% pull(County),invert=TRUE,ignore.case=TRUE),] COUNTY_POP$County <- gsub(" ","_",gsub(" County","",COUNTY_POP$County)) CITY_POP <- TBL[sort(c(grep("County",TBL %>% pull(County),invert=TRUE,ignore.case=TRUE),grep("Balance",TBL %>% pull(County),ignore.case=TRUE))),] CITY_POP$County <- gsub(" ","_",gsub("Balance of","Unincorporated",gsub(" County","",gsub(" city","",gsub(" town","",CITY_POP$County,ignore.case=TRUE),ignore.case=TRUE),ignore.case=TRUE),ignore.case=TRUE)) CITY_POP <- CITY_POP %>% rename("City"=County) ########################City Population Data 2010 to 2020 CITY_POPULATION_B <- paste0(RAW_HTML_LOC,"Wyoming_City_and_County_Population_2010_2020.html") try(curl_download('http://eadiv.state.wy.us/pop/sub-est11-19.htm',destfile=CITY_POPULATION_B)) #Download the file rather than directly use the website, so a backup is available if it ever goes offline. #Add to the notes sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE) cat("\n 3) Intercensal Estimates of the Resident Population for Incorporated Places in Wyoming: April 1, 2010 to April 1, 2020 Data Type: HTML table Wyoming_City_and_County_Population_2010_2020.html comes from http://eadiv.state.wy.us/pop/sub-est11-19.htm Data Source: WIEAD Original Source: Source: U.S. Census Bureau, Population Division" ) sink() PAGE <- read_html(CITY_POPULATION_B ) NODE <- html_element(PAGE ,"table") TBL <- html_table(NODE) ST <- which(toupper(TBL$X1)==toupper("Afton town, Wyoming")) END <- which(toupper(TBL$X1)==toupper("Yoder town, Wyoming")) #More years than are pulled are listed to make more generic COLUMNS <- c(1,which(TBL[ST-1,] %in% 1970:2025)) NAMES <- TBL[3,COLUMNS][-1] TBL <- TBL[ST:END,COLUMNS ] colnames(TBL) <- c("City",NAMES) TBL <- pivot_longer(TBL,all_of(colnames(TBL)[-1]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=parse_number(Population)) TBL$City <- gsub(" ","_",gsub(" $","",gsub("\r|\n| Wyoming|,| town| city","",TBL$City,ignore.case=TRUE))) TBL <- TBL %>% filter(Year!=2020) CITY_POP <- rbind(TBL,CITY_POP) ########################County Population Data 2010 to 2020 COUNTY_POPULATION_B <- paste0(RAW_HTML_LOC,"Wyoming_County_Population_2010_2020.html") try(curl_download('http://eadiv.state.wy.us/pop/ctyest11-19.htm',destfile=COUNTY_POPULATION_B)) #Download the file rather than directly use the website, so a backup is available if it ever goes offline. #Add to the notes sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE) cat("\n 4) Intercensal Estimates of the Resident Population for Counties in Wyoming: April 1, 2010 to April 1, 2020 Wyoming_County_Population_2010_2020.html comes from http://eadiv.state.wy.us/pop/ctyest11-19.htm Data Type: HTML table Data Source: WIEAD Original Source: U.S. Census Bureau, Population Division" ) sink() PAGE <- read_html(COUNTY_POPULATION_B ) NODE <- html_element(PAGE ,"table") TBL <- html_table(NODE) ST <- grep("Albany",TBL$X1) END <- grep("Weston",TBL$X1) #More years than are pulled are listed to make more generic COLUMNS <- c(1,which(TBL[ST-2,] %in% 1970:2025)) NAMES <- TBL[3,COLUMNS][-1] TBL <- TBL[ST:END,COLUMNS ] colnames(TBL) <- c("County",NAMES) TBL <- pivot_longer(TBL,all_of(colnames(TBL)[-1]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=parse_number(Population)) TBL$County <- gsub(" ","_",gsub(" "," ",gsub(" $","",gsub("\r|\n| Wyoming|,| town| city| County|\\.","",TBL$County,ignore.case=TRUE)))) TBL <- TBL %>% filter(Year!=2020) COUNTY_POP <- rbind(TBL,COUNTY_POP) ########################County and City Population Data 2000 to 2010 CITY_POPULATION_C <- paste0(RAW_HTML_LOC,"Wyoming_City_and_County_Population_2000_2009.html") try(curl_download('http://eadiv.state.wy.us/pop/sub-est01-09.htm',destfile=CITY_POPULATION_C)) #Download the file rather than directly use the website, so a backup is available if it ever goes offline. #Add to the notes sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE) cat("\n 5) Intercensal Estimates of the Resident Population for Cities and Towns of Wyoming: April 1, 2000 to July 1, 2010 Wyoming_City_and_County_Population_2000_2009.html comes from 'http://eadiv.state.wy.us/pop/sub-est01-09.htm' Data Type: HTML table Data Source: WIEAD Original Source: U.S. Census Bureau, Population Division" ) sink() PAGE <- read_html(CITY_POPULATION_C) NODE <- html_element(PAGE ,"table") TBL <- html_table(NODE) ST <- which(toupper(TBL$X1)==toupper("Albany County")) END <- which(toupper(TBL$X1)==toupper("Balance of Weston County")) #More years than are pulled are listed to make more generic COLUMNS <- c(1,which(TBL[ST-4,] %in% 1970:2025)) NAMES <- TBL[4,COLUMNS][-1] TBL <- TBL[ST:END,COLUMNS ] colnames(TBL) <- c("County",NAMES) TBL <- pivot_longer(TBL,all_of(colnames(TBL)[-1]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=parse_number(Population)) TBL <- TBL %>% filter(Year!=2010) TBL$County <- gsub(" "," ",gsub("\n","",gsub("\r","",TBL %>% pull(County)))) COUNTY_TBL <- TBL[grep("COUNTY",TBL %>% pull(County),ignore.case=TRUE),] COUNTY_TBL <-COUNTY_TBL[grep("Balance",COUNTY_TBL%>% pull(County),invert=TRUE,ignore.case=TRUE),] COUNTY_TBL$County <-gsub("_(pt.)","", gsub(" ","_",gsub(" County","",COUNTY_TBL$County))) CITY_TBL <- TBL[sort(c(grep("County",TBL %>% pull(County),invert=TRUE,ignore.case=TRUE),grep("Balance",TBL %>% pull(County),ignore.case=TRUE))),] CITY_TBL$County <- gsub(" ","_",gsub("Balance of","Unincorporated",gsub(" County","",gsub(" city","",gsub(" town","",CITY_TBL$County,ignore.case=TRUE),ignore.case=TRUE),ignore.case=TRUE),ignore.case=TRUE)) CITY_TBL <- CITY_TBL %>% rename("City"=County) CITY_POP <- rbind(CITY_TBL,CITY_POP) #Cleanup names CITY_POP$City <- gsub("LaGrange","La_Grange",CITY_POP$City) COUNTY_POP <- rbind(COUNTY_TBL,COUNTY_POP) ####################County and City Population Data for 1990-2000 #Location to save any raw population files. Most files are not saved since they are pulled from a html and not a excel file, but older files are only available as excel files POP_FILE_1990 <- paste0(RAW_EXCEL_LOC,"Wyoming_County_Population_1990_2000.xls") try(if(!file.exists(POP_FILE_1990)){download.file('http://eadiv.state.wy.us/pop/c&sc90_00.xls',POP_FILE_1990)}) #Add to the notes sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE) cat("\n 6) Population for Wyoming, Counties, Cities, and Towns: 1990 to 2000 Wyoming_County_Population_1980_1990.xls comes from 'http://eadiv.state.wy.us/pop/c&sc90_00.xls' Data Type: Excel file Data Source: WIEAD Original Source: U.S. Census Bureau, Population Division" ) sink() TEMP <- read_xls(POP_FILE_1990,skip=2)[-1:-4,] colnames(TEMP)[1] <- "County" TEMP <- TEMP[1:which(TEMP[,1]=="Wind River Res."),] TEMP <- pivot_longer(TEMP,all_of(colnames(TEMP)[-1]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=as.numeric(Population)) TEMP <- TEMP %>% filter(Year!=2000) TEMP_COUNTY <- TEMP[grepl("Cnty",TEMP %>% pull(County),ignore.case=TRUE),] TEMP_COUNTY$County <- gsub(" ","_",gsub(" "," ",gsub(" Cnty","",TEMP_COUNTY$County,ignore.case=TRUE))) TEMP_CITY <- TEMP[grep("Cnty",TEMP %>% pull(County),ignore.case=TRUE,invert=TRUE),] TEMP_CITY$County <- gsub("E_Therm","East_Therm",gsub(" ","_",gsub(" ","",TEMP_CITY %>% pull(County)))) TEMP_CITY <- TEMP_CITY %>% rename(City=County) TEMP_CITY %>% pull(City) %>% unique %>% sort CITY_POP <- rbind(TEMP_CITY,CITY_POP) CITY_POP %>% pull(City) %>% unique %>% sort COUNTY_POP <- rbind(TEMP_COUNTY,COUNTY_POP) TEMP_CITY <- TEMP_CITY %>% filter(Year!=2000) try(rm(TEMP_CITY,TEMP_COUNTY,TEMP)) ####################County and City Population Data for 1980-1990 POP_FILE_1980 <- paste0(RAW_EXCEL_LOC,"/Wyoming_County_Population_1980_1990.xls") try(if(!file.exists(POP_FILE_1980)){download.file('http://eadiv.state.wy.us/pop/C&SC8090.xls',POP_FILE_1980)}) #Add to the notes sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE) cat("\n 7) Population for Wyoming, Counties and Municipalities: 1980 to 1990 Wyoming_County_Population_1980_1990.xls comes from 'http://eadiv.state.wy.us/pop/C&SC8090.xls' Data Type: Excel file Data Source: WIEAD Original Source: U.S. Census Bureau, Population Division" ) sink() TEMP <- read_xls(POP_FILE_1980,skip=2)[-1:-4,] colnames(TEMP)[1] <- "County" TEMP <- TEMP[2:which(TEMP[,1]=="Upton"),1:(min(which(is.na(TEMP[2,])))-1)] TEMP <- pivot_longer(TEMP,all_of(colnames(TEMP)[-1]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=as.numeric(Population)) TEMP_COUNTY <- TEMP[grepl("Cty",TEMP %>% pull(County),ignore.case=TRUE),] TEMP_COUNTY$County <- gsub(" ","_",gsub(" "," ",gsub(" Cty","",TEMP_COUNTY$County,ignore.case=TRUE))) TEMP_CITY <- TEMP[grep("Cty",TEMP %>% pull(County),ignore.case=TRUE,invert=TRUE),] TEMP_CITY$County <-gsub("Frannie_","Frannie", gsub("Mtn._View","Mountain_View",gsub("E._Therm","East_Therm",gsub(" ","_",gsub(" ","",TEMP_CITY %>% pull(County)))))) TEMP_CITY <- TEMP_CITY %>% rename(City=County) TEMP_CITY <- TEMP_CITY %>% filter(Year!=1990) TEMP_COUNTY <- TEMP_COUNTY %>% filter(Year!=1990) CITY_POP <- rbind(TEMP_CITY,CITY_POP) COUNTY_POP <- rbind(TEMP_COUNTY,COUNTY_POP) #ggplot(aes(x=Year,y=Population,group=County,color=County),data=COUNTY_POP)+geom_line() try(rm(TEMP_CITY,TEMP_COUNTY,TEMP)) ####################County Population Data for 1970-1980 POP_FILE_1970 <- paste0(RAW_EXCEL_LOC,"/Wyoming_County_Population_1970_1980.xls") try(if(!file.exists(POP_FILE_1970)){download.file('http://eadiv.state.wy.us/pop/Cnty7080.xls',POP_FILE_1970)}) #Add to the notes sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE) cat("\n 8) Wyoming and County Population: 1970 to 1980 Wyoming_County_Population_1970_1980.xls comes from 'http://eadiv.state.wy.us/pop/Cnty7080.xls' Data Type: Excel file Data Source: WIEAD Original Source: U.S. Census Bureau, Population Division" ) sink() if(!file.exists(POP_FILE_1970)){download.file('http://eadiv.state.wy.us/pop/Cnty7080.xls',POP_FILE_1970)} TEMP <- read_xls(POP_FILE_1970,skip=2)[-1:-4,] colnames(TEMP)[1] <- "County" TEMP <- TEMP[1:which(TEMP[,1]=="Weston"),] TEMP <- pivot_longer(TEMP,all_of(colnames(TEMP)[-1]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=as.numeric(Population)) TEMP$County <- gsub(" ","_",TEMP$County) TEMP <- TEMP %>% filter(Year!=1980) COUNTY_POP <- rbind(TEMP,COUNTY_POP) #ggplot(aes(x=Year,y=Population,group=County,color=County),data=COUNTY_POP)+geom_line() try(rm(TEMP)) ###########Old data addition:Period Ends in 1970 POP_FILE_OLD <- paste0(RAW_HTML_LOC,"Wyoming_City_and_County_Population_Prior_to_1970.htm") try(curl_download('http://eadiv.state.wy.us/demog_data/cntycity_hist.htm',destfile=POP_FILE_OLD)) #Add to the notes sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE) cat("\n 9) Historical decennial census population for Wyoming counties, cities, and towns Wyoming_City_and_County_Population_Prior_to_1970.htm comes from 'http://eadiv.state.wy.us/demog_data/cntycity_hist.htm' Data Type: HTML Tables Data Source: WIEAD Original Source: U.S. Census Bureau, Population Division Note: Two tables are included complicating extraction. The values are manually entered in R rather than scarped like the other data sets" ) sink() #See in part http://eadiv.state.wy.us/demog_data/cntycity_hist.htm LN_OLD <- c(12487,10894,10286,9023,9018,8640) #Missing in 1910 Year <- seq(1920,1970,by=10) TEMP <- cbind(Year,rep("Lincoln",6),LN_OLD) colnames(TEMP ) <- c("Year","County","Population") TEMP <- as_tibble(TEMP) COUNTY_POP <- rbind(TEMP,COUNTY_POP) %>% arrange(County,Year) KEM_OLD <- c(843,1517,1884,2026,1667,2028,2292) #1910 forward until 1970 Year <- seq(1910,1970,by=10) TEMP <- cbind(Year,rep("kemmerer",7),KEM_OLD) colnames(TEMP ) <- c("Year","City","Population") TEMP <- as_tibble(TEMP) CITY_POP <- rbind(TEMP,CITY_POP) DIAMOND_OLD <- c(696,726,812,586,415,398,485) TEMP <- cbind(Year,rep("Diamondvile",7),DIAMOND_OLD) colnames(TEMP ) <- c("Year","City","Population") TEMP <- as_tibble(TEMP) CITY_POP <- rbind(TEMP,CITY_POP) %>% arrange(City,Year) #Remove empty values, ensure all numeric values are not saved as characters CITY_POP <- CITY_POP %>% filter(!is.na(Population) ) %>% mutate(Population=parse_number(Population),Year=parse_number(Year)) #Add Other Data COUNTY_POP <- COUNTY_POP %>% mutate(Year=as.numeric(Year)) %>% unique WY_COUNTY_DATA_SET <- COUNTY_POP %>% left_join(WY_COUNTY_DATA_SET ) %>% mutate(Population=as.numeric(Population)) %>% unique ###Save Population Results if(!exists("SAVE_LOC_POP")){SAVE_LOC_POP <-"./Data/Cleaned_Data/Population_Data"} CSV_SAVE <- paste0(SAVE_LOC_POP,"/CSV") RDS_SAVE <- paste0(SAVE_LOC_POP,"/RDS") dir.create(CSV_SAVE, recursive = TRUE, showWarnings = FALSE) dir.create(RDS_SAVE, recursive = TRUE, showWarnings = FALSE) saveRDS(CITY_POP,paste0(RDS_SAVE,"/All_Wyoming_City_Populations.Rds" )) write_csv(CITY_POP,paste0(CSV_SAVE,"/All_Wyoming_City_Populations.csv" )) saveRDS(WY_COUNTY_DATA_SET,paste0(RDS_SAVE,"/All_Wyoming_County_Populations.Rds" )) write_csv(WY_COUNTY_DATA_SET,paste0(CSV_SAVE,"/All_Wyoming_County_Populations.csv" )) run_datetime <- format(Sys.time(), "%Y-%m-%d %H:%M:%S") sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE) cat(paste0("\n--- Run Date: ", run_datetime, " ---\n")) sink()