Major updates to auto-data gathering plus notes
This commit is contained in:
parent
285026e421
commit
89da07fdae
@ -1,5 +1,10 @@
|
|||||||
Data files gathered manually from:
|
Data files gathered manually from:
|
||||||
|
|
||||||
|
National Institute of Health HDPules: An Ecosystem of Health Disparities and Minority Health Resources at
|
||||||
https://hdpulse.nimhd.nih.gov/data-portal/mortality/table?cod=247&cod_options=cod_15&ratetype=aa&ratetype_options=ratetype_2&race=00&race_options=race_6&sex=2&sex_options=sex_3&age=177&age_options=age_11&ruralurban=0&ruralurban_options=ruralurban_3&yeargroup=5&yeargroup_options=year5yearmort_1&statefips=56&statefips_options=area_states&county=56000&county_options=counties_wyoming&comparison=counties_to_us&comparison_options=comparison_counties&radio_comparison=areas&radio_comparison_options=cods_or_areas
|
https://hdpulse.nimhd.nih.gov/data-portal/mortality/table?cod=247&cod_options=cod_15&ratetype=aa&ratetype_options=ratetype_2&race=00&race_options=race_6&sex=2&sex_options=sex_3&age=177&age_options=age_11&ruralurban=0&ruralurban_options=ruralurban_3&yeargroup=5&yeargroup_options=year5yearmort_1&statefips=56&statefips_options=area_states&county=56000&county_options=counties_wyoming&comparison=counties_to_us&comparison_options=comparison_counties&radio_comparison=areas&radio_comparison_options=cods_or_areas
|
||||||
|
|
||||||
Each file is single age group, so age weighting does not apply despite the variable names
|
Each file is single age group, so age weighting does not apply despite the variable names. Each age group file is named sequentially with a prefix “A_” for the first age group and a prefix “I_” for the oldest. There are separate directories for each sex. The cleaning script uses this directory structure to extract the right files and merge them into one data set.
|
||||||
|
|
||||||
|
These files must be manually downloaded because there is a filter feature on the web page that is used to select the county and age. While there may be a way to scrape the data with code the trade off on my time was not worth it. Future runs will need to check these records, and can download the files to match this directory structure, in order to process a update in death rates.
|
||||||
|
|
||||||
|
Valid data as of Nov 6 2025 Alex Gebben
|
||||||
|
|||||||
@ -1 +0,0 @@
|
|||||||
[1] "Data files gathered manually from:\n\n\n\n\nhttps://hdpulse.nimhd.nih.gov/data-portal/mortality/table?cod=247&cod_options=cod_15&ratetype=aa&ratetype_options=ratetype_2&race=00&race_options=race_6&sex=2&sex_options=sex_3&age=177&age_options=age_11&ruralurban=0&ruralurban_options=ruralurban_3&yeargroup=5&yeargroup_options=year5yearmort_1&statefips=56&statefips_options=area_states&county=56000&county_options=counties_wyoming&comparison=counties_to_us&comparison_options=comparison_counties&radio_comparison=areas&radio_comparison_options=cods_or_areas\n\n\n\n\nEach file is single age group, so age weighting does not apply despite the variable names"
|
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -2,14 +2,42 @@
|
|||||||
library(rvest)
|
library(rvest)
|
||||||
library(tidyverse)
|
library(tidyverse)
|
||||||
library(readxl)
|
library(readxl)
|
||||||
|
library(curl) #To archive the html files
|
||||||
#setwd("../")
|
#setwd("../")
|
||||||
###Create Location to Save raw data sets
|
###Create Location to Save raw data sets
|
||||||
if(!exists("SAVE_LOC_RAW")){SAVE_LOC_RAW <-"./Data/Raw_Data/"}
|
if(!exists("SAVE_LOC_RAW")){SAVE_LOC_RAW <-"./Data/Raw_Data/"}
|
||||||
dir.create(SAVE_LOC_RAW, recursive = TRUE, showWarnings = FALSE)
|
dir.create(SAVE_LOC_RAW, recursive = TRUE, showWarnings = FALSE)
|
||||||
|
SAVE_LOC_RAW_POP <- paste0(SAVE_LOC_RAW,"Population/")
|
||||||
|
dir.create(SAVE_LOC_RAW_POP , recursive = TRUE, showWarnings = FALSE)
|
||||||
|
|
||||||
|
RAW_HTML_LOC <- paste0(SAVE_LOC_RAW_POP,"HTML_Population_Files/")
|
||||||
|
dir.create(RAW_HTML_LOC, recursive = TRUE, showWarnings = FALSE)
|
||||||
|
|
||||||
|
RAW_EXCEL_LOC <- paste0(SAVE_LOC_RAW_POP,"Excel_Population_Files/")
|
||||||
|
dir.create(RAW_EXCEL_LOC, recursive = TRUE, showWarnings = FALSE)
|
||||||
|
|
||||||
|
##Start a log file about the data
|
||||||
|
sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=FALSE)
|
||||||
|
cat("Most data is supplied by the Wyoming Department of Administration & Information Economic Analysis Division (WIEAD). Their data is either directly pulled from other sources like the census or is interpolated using this data (such as deaths, and migration).\n This folder saves the raw HTML of a few population data source, which were stored as a web table rather than a CSV or excel file so that in the future if the web pages are removed or changed the code will still run, and can be updated with new information.\nSome files are supplied as HTML tables from a webpage, while others are excel files. ")
|
||||||
|
sink()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
########County, Death, Birth and Migration Data
|
########County, Death, Birth and Migration Data
|
||||||
#Data found on the page http://eadiv.state.wy.us/pop/
|
#Data found on the page http://eadiv.state.wy.us/pop/
|
||||||
#Website States: Wyoming Economic Analysis Division based on U.S. Census Bureau's population estimation and vital stats above
|
#Website States: Wyoming Economic Analysis Division based on U.S. Census Bureau's population estimation and vital stats above
|
||||||
PAGE <- read_html("http://eadiv.state.wy.us/pop/BirthDeathMig.htm")
|
BIRTH_DEATH_MIGRATION_HTML_LOC <- paste0(RAW_HTML_LOC,"County_Migration_Deaths_Births.html")
|
||||||
|
try(curl_download("http://eadiv.state.wy.us/pop/BirthDeathMig.htm",destfile=BIRTH_DEATH_MIGRATION_HTML_LOC )) #Download the file rather than directly use the website, so a backup is available if it ever goes offline.
|
||||||
|
#Add to the notes
|
||||||
|
sink(file=paste0(SAVE_LOC_RAW_POP,"/README_POPULATION_DATA.txt"),append=TRUE)
|
||||||
|
cat("\n\n 1) Wyoming births, deaths, and net migration by county of residence: 1971 - 2023:
|
||||||
|
County_Migration_Deaths_Births.htm comes from http://eadiv.state.wy.us/pop/BirthDeathMig.htm
|
||||||
|
Data Type: HTML table
|
||||||
|
Data Source: WIEAD
|
||||||
|
Original Source: Wyoming Department of Health and U.S. Census Bureau")
|
||||||
|
sink()
|
||||||
|
|
||||||
|
PAGE <- read_html(BIRTH_DEATH_MIGRATION_HTML_LOC )
|
||||||
NODE <- html_element(PAGE ,"table")
|
NODE <- html_element(PAGE ,"table")
|
||||||
TBL <- html_table(NODE)
|
TBL <- html_table(NODE)
|
||||||
|
|
||||||
@ -36,7 +64,17 @@ WY_COUNTY_DATA_SET <- pivot_wider(Data,names_from=Type,values_from=Pop_Change) %
|
|||||||
WY_COUNTY_DATA_SET[,"County"] <- gsub(" ","_",WY_COUNTY_DATA_SET %>% pull(County))
|
WY_COUNTY_DATA_SET[,"County"] <- gsub(" ","_",WY_COUNTY_DATA_SET %>% pull(County))
|
||||||
|
|
||||||
########################City and County Population Data 2020 to 2024
|
########################City and County Population Data 2020 to 2024
|
||||||
PAGE <- read_html('http://eadiv.state.wy.us/pop/Place-24EST.htm')
|
CITY_POPULATION_A <- paste0(RAW_HTML_LOC,"Wyoming_City_and_County_Population_2020_2024.html")
|
||||||
|
try(curl_download("http://eadiv.state.wy.us/pop/Place-24EST.htm",destfile=CITY_POPULATION_A)) #Download the file rather than directly use the website, so a backup is available if it ever goes offline.
|
||||||
|
#Add to the notes
|
||||||
|
sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE)
|
||||||
|
cat("\n 2) Wyoming Incorporated Place Population Estimates: April 1, 2020 to July 1, 2024
|
||||||
|
Wyoming_City_and_County_Population_2020_2024.html comes from http://eadiv.state.wy.us/pop/Place-24EST.htm
|
||||||
|
Data Type: HTML table
|
||||||
|
Data Source: WIEAD
|
||||||
|
Original Source: U.S. Census Bureau, Population Division Wyoming Department of Health and U.S. Census Bureau")
|
||||||
|
sink()
|
||||||
|
PAGE <- read_html(CITY_POPULATION_A)
|
||||||
NODE <- html_element(PAGE ,"table")
|
NODE <- html_element(PAGE ,"table")
|
||||||
TBL <- html_table(NODE)
|
TBL <- html_table(NODE)
|
||||||
|
|
||||||
@ -59,7 +97,18 @@ CITY_POP <- TBL[sort(c(grep("County",TBL %>% pull(County),invert=TRUE,ignore.cas
|
|||||||
CITY_POP$County <- gsub(" ","_",gsub("Balance of","Unincorporated",gsub(" County","",gsub(" city","",gsub(" town","",CITY_POP$County,ignore.case=TRUE),ignore.case=TRUE),ignore.case=TRUE),ignore.case=TRUE))
|
CITY_POP$County <- gsub(" ","_",gsub("Balance of","Unincorporated",gsub(" County","",gsub(" city","",gsub(" town","",CITY_POP$County,ignore.case=TRUE),ignore.case=TRUE),ignore.case=TRUE),ignore.case=TRUE))
|
||||||
CITY_POP <- CITY_POP %>% rename("City"=County)
|
CITY_POP <- CITY_POP %>% rename("City"=County)
|
||||||
########################City Population Data 2010 to 2020
|
########################City Population Data 2010 to 2020
|
||||||
PAGE <- read_html('http://eadiv.state.wy.us/pop/sub-est11-19.htm')
|
CITY_POPULATION_B <- paste0(RAW_HTML_LOC,"Wyoming_City_and_County_Population_2010_2020.html")
|
||||||
|
try(curl_download('http://eadiv.state.wy.us/pop/sub-est11-19.htm',destfile=CITY_POPULATION_B)) #Download the file rather than directly use the website, so a backup is available if it ever goes offline.
|
||||||
|
#Add to the notes
|
||||||
|
sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE)
|
||||||
|
cat("\n 3) Intercensal Estimates of the Resident Population for Incorporated Places in Wyoming: April 1, 2010 to April 1, 2020
|
||||||
|
Data Type: HTML table
|
||||||
|
Wyoming_City_and_County_Population_2010_2020.html comes from http://eadiv.state.wy.us/pop/sub-est11-19.htm
|
||||||
|
Data Source: WIEAD
|
||||||
|
Original Source: Source: U.S. Census Bureau, Population Division" )
|
||||||
|
sink()
|
||||||
|
|
||||||
|
PAGE <- read_html(CITY_POPULATION_B )
|
||||||
NODE <- html_element(PAGE ,"table")
|
NODE <- html_element(PAGE ,"table")
|
||||||
TBL <- html_table(NODE)
|
TBL <- html_table(NODE)
|
||||||
ST <- which(toupper(TBL$X1)==toupper("Afton town, Wyoming"))
|
ST <- which(toupper(TBL$X1)==toupper("Afton town, Wyoming"))
|
||||||
@ -74,7 +123,18 @@ TBL$City <- gsub(" ","_",gsub(" $","",gsub("\r|\n| Wyoming|,| town| city","",TBL
|
|||||||
TBL <- TBL %>% filter(Year!=2020)
|
TBL <- TBL %>% filter(Year!=2020)
|
||||||
CITY_POP <- rbind(TBL,CITY_POP)
|
CITY_POP <- rbind(TBL,CITY_POP)
|
||||||
########################County Population Data 2010 to 2020
|
########################County Population Data 2010 to 2020
|
||||||
PAGE <- read_html('http://eadiv.state.wy.us/pop/ctyest11-19.htm')
|
COUNTY_POPULATION_B <- paste0(RAW_HTML_LOC,"Wyoming_County_Population_2010_2020.html")
|
||||||
|
try(curl_download('http://eadiv.state.wy.us/pop/ctyest11-19.htm',destfile=COUNTY_POPULATION_B)) #Download the file rather than directly use the website, so a backup is available if it ever goes offline.
|
||||||
|
#Add to the notes
|
||||||
|
sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE)
|
||||||
|
cat("\n 4) Intercensal Estimates of the Resident Population for Counties in Wyoming: April 1, 2010 to April 1, 2020
|
||||||
|
Wyoming_County_Population_2010_2020.html comes from http://eadiv.state.wy.us/pop/ctyest11-19.htm
|
||||||
|
Data Type: HTML table
|
||||||
|
Data Source: WIEAD
|
||||||
|
Original Source: U.S. Census Bureau, Population Division" )
|
||||||
|
sink()
|
||||||
|
|
||||||
|
PAGE <- read_html(COUNTY_POPULATION_B )
|
||||||
NODE <- html_element(PAGE ,"table")
|
NODE <- html_element(PAGE ,"table")
|
||||||
TBL <- html_table(NODE)
|
TBL <- html_table(NODE)
|
||||||
ST <- grep("Albany",TBL$X1)
|
ST <- grep("Albany",TBL$X1)
|
||||||
@ -90,7 +150,18 @@ TBL <- TBL %>% filter(Year!=2020)
|
|||||||
COUNTY_POP <- rbind(TBL,COUNTY_POP)
|
COUNTY_POP <- rbind(TBL,COUNTY_POP)
|
||||||
|
|
||||||
########################County and City Population Data 2000 to 2010
|
########################County and City Population Data 2000 to 2010
|
||||||
PAGE <- read_html('http://eadiv.state.wy.us/pop/sub-est01-09.htm')
|
CITY_POPULATION_C <- paste0(RAW_HTML_LOC,"Wyoming_City_and_County_Population_2000_2009.html")
|
||||||
|
try(curl_download('http://eadiv.state.wy.us/pop/sub-est01-09.htm',destfile=CITY_POPULATION_C)) #Download the file rather than directly use the website, so a backup is available if it ever goes offline.
|
||||||
|
#Add to the notes
|
||||||
|
sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE)
|
||||||
|
cat("\n 5) Intercensal Estimates of the Resident Population for Cities and Towns of Wyoming: April 1, 2000 to July 1, 2010
|
||||||
|
Wyoming_City_and_County_Population_2000_2009.html comes from 'http://eadiv.state.wy.us/pop/sub-est01-09.htm'
|
||||||
|
Data Type: HTML table
|
||||||
|
Data Source: WIEAD
|
||||||
|
Original Source: U.S. Census Bureau, Population Division" )
|
||||||
|
sink()
|
||||||
|
|
||||||
|
PAGE <- read_html(CITY_POPULATION_C)
|
||||||
NODE <- html_element(PAGE ,"table")
|
NODE <- html_element(PAGE ,"table")
|
||||||
TBL <- html_table(NODE)
|
TBL <- html_table(NODE)
|
||||||
|
|
||||||
@ -119,11 +190,18 @@ CITY_POP$City <- gsub("LaGrange","La_Grange",CITY_POP$City)
|
|||||||
COUNTY_POP <- rbind(COUNTY_TBL,COUNTY_POP)
|
COUNTY_POP <- rbind(COUNTY_TBL,COUNTY_POP)
|
||||||
####################County and City Population Data for 1990-2000
|
####################County and City Population Data for 1990-2000
|
||||||
#Location to save any raw population files. Most files are not saved since they are pulled from a html and not a excel file, but older files are only available as excel files
|
#Location to save any raw population files. Most files are not saved since they are pulled from a html and not a excel file, but older files are only available as excel files
|
||||||
SAVE_LOC_RAW_POP <- paste0(SAVE_LOC_RAW,"/Population")
|
POP_FILE_1990 <- paste0(RAW_EXCEL_LOC,"Wyoming_County_Population_1990_2000.xls")
|
||||||
dir.create(SAVE_LOC_RAW_POP , recursive = TRUE, showWarnings = FALSE)
|
try(if(!file.exists(POP_FILE_1990)){download.file('http://eadiv.state.wy.us/pop/c&sc90_00.xls',POP_FILE_1990)})
|
||||||
|
|
||||||
|
#Add to the notes
|
||||||
|
sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE)
|
||||||
|
cat("\n 6) Population for Wyoming, Counties, Cities, and Towns: 1990 to 2000
|
||||||
|
Wyoming_County_Population_1980_1990.xls comes from 'http://eadiv.state.wy.us/pop/c&sc90_00.xls'
|
||||||
|
Data Type: Excel file
|
||||||
|
Data Source: WIEAD
|
||||||
|
Original Source: U.S. Census Bureau, Population Division" )
|
||||||
|
sink()
|
||||||
|
|
||||||
POP_FILE_1990 <- paste0(SAVE_LOC_RAW_POP,"/Pop_1990s.xls")
|
|
||||||
if(!file.exists(POP_FILE_1990)){download.file('http://eadiv.state.wy.us/pop/c&sc90_00.xls',POP_FILE_1990)}
|
|
||||||
TEMP <- read_xls(POP_FILE_1990,skip=2)[-1:-4,]
|
TEMP <- read_xls(POP_FILE_1990,skip=2)[-1:-4,]
|
||||||
colnames(TEMP)[1] <- "County"
|
colnames(TEMP)[1] <- "County"
|
||||||
TEMP <- TEMP[1:which(TEMP[,1]=="Wind River Res."),]
|
TEMP <- TEMP[1:which(TEMP[,1]=="Wind River Res."),]
|
||||||
@ -143,8 +221,18 @@ TEMP_CITY <- TEMP_CITY %>% filter(Year!=2000)
|
|||||||
|
|
||||||
try(rm(TEMP_CITY,TEMP_COUNTY,TEMP))
|
try(rm(TEMP_CITY,TEMP_COUNTY,TEMP))
|
||||||
####################County and City Population Data for 1980-1990
|
####################County and City Population Data for 1980-1990
|
||||||
POP_FILE_1980 <- paste0(SAVE_LOC_RAW_POP ,"/Pop_1980s.xls")
|
POP_FILE_1980 <- paste0(RAW_EXCEL_LOC,"/Wyoming_County_Population_1980_1990.xls")
|
||||||
if(!file.exists(POP_FILE_1980)){download.file('http://eadiv.state.wy.us/pop/C&SC8090.xls',POP_FILE_1980)}
|
try(if(!file.exists(POP_FILE_1980)){download.file('http://eadiv.state.wy.us/pop/C&SC8090.xls',POP_FILE_1980)})
|
||||||
|
|
||||||
|
#Add to the notes
|
||||||
|
sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE)
|
||||||
|
cat("\n 7) Population for Wyoming, Counties and Municipalities: 1980 to 1990
|
||||||
|
Wyoming_County_Population_1980_1990.xls comes from 'http://eadiv.state.wy.us/pop/C&SC8090.xls'
|
||||||
|
Data Type: Excel file
|
||||||
|
Data Source: WIEAD
|
||||||
|
Original Source: U.S. Census Bureau, Population Division" )
|
||||||
|
sink()
|
||||||
|
|
||||||
|
|
||||||
TEMP <- read_xls(POP_FILE_1980,skip=2)[-1:-4,]
|
TEMP <- read_xls(POP_FILE_1980,skip=2)[-1:-4,]
|
||||||
colnames(TEMP)[1] <- "County"
|
colnames(TEMP)[1] <- "County"
|
||||||
@ -169,7 +257,18 @@ COUNTY_POP <- rbind(TEMP_COUNTY,COUNTY_POP)
|
|||||||
try(rm(TEMP_CITY,TEMP_COUNTY,TEMP))
|
try(rm(TEMP_CITY,TEMP_COUNTY,TEMP))
|
||||||
|
|
||||||
####################County Population Data for 1970-1980
|
####################County Population Data for 1970-1980
|
||||||
POP_FILE_1970 <- paste0(SAVE_LOC_RAW_POP ,"/Pop_1970s.xls")
|
POP_FILE_1970 <- paste0(RAW_EXCEL_LOC,"/Wyoming_County_Population_1970_1980.xls")
|
||||||
|
try(if(!file.exists(POP_FILE_1970)){download.file('http://eadiv.state.wy.us/pop/Cnty7080.xls',POP_FILE_1970)})
|
||||||
|
|
||||||
|
#Add to the notes
|
||||||
|
sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE)
|
||||||
|
cat("\n 8) Wyoming and County Population: 1970 to 1980
|
||||||
|
Wyoming_County_Population_1970_1980.xls comes from 'http://eadiv.state.wy.us/pop/Cnty7080.xls'
|
||||||
|
Data Type: Excel file
|
||||||
|
Data Source: WIEAD
|
||||||
|
Original Source: U.S. Census Bureau, Population Division" )
|
||||||
|
sink()
|
||||||
|
|
||||||
if(!file.exists(POP_FILE_1970)){download.file('http://eadiv.state.wy.us/pop/Cnty7080.xls',POP_FILE_1970)}
|
if(!file.exists(POP_FILE_1970)){download.file('http://eadiv.state.wy.us/pop/Cnty7080.xls',POP_FILE_1970)}
|
||||||
|
|
||||||
TEMP <- read_xls(POP_FILE_1970,skip=2)[-1:-4,]
|
TEMP <- read_xls(POP_FILE_1970,skip=2)[-1:-4,]
|
||||||
@ -183,7 +282,20 @@ TEMP <- TEMP %>% filter(Year!=1980)
|
|||||||
COUNTY_POP <- rbind(TEMP,COUNTY_POP)
|
COUNTY_POP <- rbind(TEMP,COUNTY_POP)
|
||||||
#ggplot(aes(x=Year,y=Population,group=County,color=County),data=COUNTY_POP)+geom_line()
|
#ggplot(aes(x=Year,y=Population,group=County,color=County),data=COUNTY_POP)+geom_line()
|
||||||
try(rm(TEMP))
|
try(rm(TEMP))
|
||||||
###########Old data addtion:Period Ends in 1970
|
###########Old data addition:Period Ends in 1970
|
||||||
|
POP_FILE_OLD <- paste0(RAW_HTML_LOC,"Wyoming_City_and_County_Population_Prior_to_1970.htm")
|
||||||
|
try(curl_download('http://eadiv.state.wy.us/demog_data/cntycity_hist.htm',destfile=POP_FILE_OLD))
|
||||||
|
|
||||||
|
#Add to the notes
|
||||||
|
sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE)
|
||||||
|
cat("\n 9) Historical decennial census population for Wyoming counties, cities, and towns
|
||||||
|
Wyoming_City_and_County_Population_Prior_to_1970.htm comes from 'http://eadiv.state.wy.us/demog_data/cntycity_hist.htm'
|
||||||
|
Data Type: HTML Tables
|
||||||
|
Data Source: WIEAD
|
||||||
|
Original Source: U.S. Census Bureau, Population Division
|
||||||
|
Note: Two tables are included complicating extraction. The values are manually entered in R rather than scarped like the other data sets" )
|
||||||
|
sink()
|
||||||
|
|
||||||
#See in part http://eadiv.state.wy.us/demog_data/cntycity_hist.htm
|
#See in part http://eadiv.state.wy.us/demog_data/cntycity_hist.htm
|
||||||
LN_OLD <- c(12487,10894,10286,9023,9018,8640) #Missing in 1910
|
LN_OLD <- c(12487,10894,10286,9023,9018,8640) #Missing in 1910
|
||||||
Year <- seq(1920,1970,by=10)
|
Year <- seq(1920,1970,by=10)
|
||||||
@ -224,5 +336,8 @@ write_csv(CITY_POP,paste0(CSV_SAVE,"/All_Wyoming_City_Populations.csv" ))
|
|||||||
|
|
||||||
saveRDS(WY_COUNTY_DATA_SET,paste0(RDS_SAVE,"/All_Wyoming_County_Populations.Rds" ))
|
saveRDS(WY_COUNTY_DATA_SET,paste0(RDS_SAVE,"/All_Wyoming_County_Populations.Rds" ))
|
||||||
write_csv(WY_COUNTY_DATA_SET,paste0(CSV_SAVE,"/All_Wyoming_County_Populations.csv" ))
|
write_csv(WY_COUNTY_DATA_SET,paste0(CSV_SAVE,"/All_Wyoming_County_Populations.csv" ))
|
||||||
|
run_datetime <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
|
||||||
|
sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE)
|
||||||
|
cat(paste0("\n--- Run Date: ", run_datetime, " ---\n"))
|
||||||
|
sink()
|
||||||
|
|
||||||
|
|||||||
@ -1,9 +1,33 @@
|
|||||||
library(tidyverse)
|
library(tidyverse)
|
||||||
library(readxl)
|
library(readxl)
|
||||||
|
#setwd("../")
|
||||||
###################Demographics
|
###################Demographics
|
||||||
if(!file.exists("./Data/Demo_Single_Year_2020s.xls")){download.file('http://eadiv.state.wy.us/Pop/CO_SYASEX24.xlsx',"./Data/Demo_Single_Year_2020s.xls")}
|
#Set up saving locations
|
||||||
TEMP <- read_xlsx("./Data/Demo_Single_Year_2020s.xls",skip=2)[,-1]
|
if(!exists("SAVE_LOC_RAW")){SAVE_LOC_RAW <-"./Data/Raw_Data/"}
|
||||||
|
RAW_DEMO_LOC <- paste0(SAVE_LOC_RAW,"Demographics/")
|
||||||
|
dir.create(RAW_DEMO_LOC, recursive = TRUE, showWarnings = FALSE)
|
||||||
|
#Demographic Reference data
|
||||||
|
if(!exists("SAVE_LOC_REF")){SAVE_LOC_REF <-paste0(RAW_DEMO_LOC,"Reference_Material_for_Demographics/")}
|
||||||
|
dir.create(SAVE_LOC_REF, recursive = TRUE, showWarnings = FALSE)
|
||||||
|
|
||||||
|
|
||||||
|
#Start a README file for the raw downloaded demographic data
|
||||||
|
sink(file=paste0(RAW_DEMO_LOC,"README_DEMOGRAPHIC_DATA.txt"),append=FALSE)
|
||||||
|
cat("Demographic data used to find age and sex distribution of county populations\n")
|
||||||
|
sink()
|
||||||
|
#####Gather data
|
||||||
|
C_FILE_PATH <- paste0(RAW_DEMO_LOC,"Wyoming_County_Sex_by_Year_of_Age_Demographic_Data_2020_2024.xls")
|
||||||
|
try(if(!file.exists(C_FILE_PATH)){download.file("http://eadiv.state.wy.us/Pop/CO_SYASEX24.xlsx",C_FILE_PATH)})
|
||||||
|
#Append to the README for clarity of data sources
|
||||||
|
sink(file=paste0(RAW_DEMO_LOC,"/README_DEMOGRAPHIC_DATA.txt"),append=TRUE)
|
||||||
|
cat("\n\n 1) Annual County Resident Population Estimates by Single Year of Age and Sex: April 1, 2020 to July 1, 2024
|
||||||
|
Wyoming_County_Sex_by_Year_of_Age_Demographic_Data_2020_2024.xls comes from http://eadiv.state.wy.us/Pop/CO_SYASEX24.xlsx
|
||||||
|
Data Type: Excel table
|
||||||
|
Data Source: Wyoming Department of Information and Economic Development (WIEAD)
|
||||||
|
Original Source: Census Bureau, Population Division, June 2025")
|
||||||
|
sink()
|
||||||
|
|
||||||
|
TEMP <- read_xlsx(C_FILE_PATH,skip=2)[,-1]
|
||||||
TEMP <- TEMP[1:(min(which(is.na(TEMP[,1])))-1),]
|
TEMP <- TEMP[1:(min(which(is.na(TEMP[,1])))-1),]
|
||||||
TEMP <- TEMP[!grepl("Base",TEMP$YEAR,ignore.case=TRUE),] #There are two population values provided. I believe one is the census baseline, and one is a estimate in July. Keep the later estimate, to line up with the same seasonal collection pattern of the rest of the data
|
TEMP <- TEMP[!grepl("Base",TEMP$YEAR,ignore.case=TRUE),] #There are two population values provided. I believe one is the census baseline, and one is a estimate in July. Keep the later estimate, to line up with the same seasonal collection pattern of the rest of the data
|
||||||
TEMP$YEAR <- year(as.Date(substr((TEMP$YEAR),1,8),format="%m/%d/%Y"))
|
TEMP$YEAR <- year(as.Date(substr((TEMP$YEAR),1,8),format="%m/%d/%Y"))
|
||||||
@ -11,10 +35,27 @@ colnames(TEMP) <- c("County","Year","Age","Number","Num_Male","Num_Female")
|
|||||||
TEMP$County <- gsub(" County","",TEMP$County,ignore.case=TRUE)
|
TEMP$County <- gsub(" County","",TEMP$County,ignore.case=TRUE)
|
||||||
DEM_2020 <- TEMP %>% select(-Number)
|
DEM_2020 <- TEMP %>% select(-Number)
|
||||||
###Demographics all
|
###Demographics all
|
||||||
DEM_DATA <- read_delim("Data/County_Demographics_Census/wy.1969_2023.singleages.through89.90plus.txt",delim=" ",col_names=c("ID","VALUES"),col_types=list('c','c'))
|
try(DEM_DATA <- read_delim('https://seer.cancer.gov/popdata/yr1969_2023.singleages.through89.90plus/wy.1969_2023.singleages.through89.90plus.txt.gz',delim=" ",col_names=c("ID","VALUES"),col_types=list('c','c')))
|
||||||
DEM_DATA$Year <- as.integer(substr(DEM_DATA$ID,1,4))
|
SEER_DATA_LOC <- paste0(RAW_DEMO_LOC,"Wyoming_County_Sex_by_Year_of_Age_Demographic_Data_1969_2023.csv")
|
||||||
|
if(!exists("DEM_DATA")){DEM_DATA <- read_csv(SEER_DATA_LOC )}else{ write_csv(DEM_DATA,SEER_DATA_LOC)}
|
||||||
|
|
||||||
|
#Append to the README for clarity of data sources
|
||||||
|
sink(file=paste0(RAW_DEMO_LOC,"/README_DEMOGRAPHIC_DATA.txt"),append=TRUE)
|
||||||
|
cat("\n\n 2) Wyoming County-Level Population Files - Single-year Age Groups 1969 to 2023
|
||||||
|
Wyoming_County_Sex_by_Year_of_Age_Demographic_Data_1969_2023.csv comes from https://seer.cancer.gov/popdata/yr1969_2023.singleages.through89.90plus/wy.1969_2023.singleages.through89.90plus.txt.gz
|
||||||
|
Data Type: gunzip (gz) file with coded data
|
||||||
|
Data Source: The National Cancer Institute surveillance, Epidemiology, and End Results Program
|
||||||
|
Original Source: Census Bureau (data processed for yearly estimates)
|
||||||
|
Note: See https://seer.cancer.gov/popdata/download.html for more data information and other State data. Raw data is parsed using the described data format (first number year, then fips code etc.).
|
||||||
|
The required fips codes are provided in the reference folder from https://github.com/kjhealy/fips-codes/raw/refs/heads/master/county_fips_master.csv.")
|
||||||
|
sink()
|
||||||
|
|
||||||
|
DEM_DATA$Year <- as.numeric(substr(DEM_DATA$ID,1,4))
|
||||||
DEM_DATA$fips<- substr(DEM_DATA$ID,7,11)
|
DEM_DATA$fips<- substr(DEM_DATA$ID,7,11)
|
||||||
COUNTY_LIST <- read_csv("https://github.com/kjhealy/fips-codes/raw/refs/heads/master/county_fips_master.csv",col_types=list('c','c')) %>% filter(state_abbr=="WY") %>% select(fips,County=county_name) %>% mutate(County=gsub(" ","_",gsub(" County","",County,ignore.case=TRUE)))
|
COUNTY_LIST <- read_csv("https://github.com/kjhealy/fips-codes/raw/refs/heads/master/county_fips_master.csv",col_types=list('c','c')) %>% filter(state_abbr=="WY") %>% select(fips,County=county_name) %>% mutate(County=gsub(" ","_",gsub(" County","",County,ignore.case=TRUE)))
|
||||||
|
FIPS_LOC <- paste0(SAVE_LOC_REF,"fips_codes.csv")
|
||||||
|
if(!exists("COUNTY_LIST")){ COUNTY_LIST <- read_csv(FIPS_LOC)}else{ write_csv(COUNTY_LIST,FIPS_LOC)}
|
||||||
|
|
||||||
DEM_DATA <- DEM_DATA %>% left_join(COUNTY_LIST) %>% select(-fips)
|
DEM_DATA <- DEM_DATA %>% left_join(COUNTY_LIST) %>% select(-fips)
|
||||||
#16=3
|
#16=3
|
||||||
DEM_DATA$Sex <- ifelse(substr(DEM_DATA$VALUES,3,3)==1,"Male","Female")
|
DEM_DATA$Sex <- ifelse(substr(DEM_DATA$VALUES,3,3)==1,"Male","Female")
|
||||||
@ -41,4 +82,8 @@ DEM_DATA <- rbind(DEM_2020,DEM_DATA) %>% ungroup %>% arrange(Year,Age) %>% uniq
|
|||||||
LIN_DEM <- DEM_DATA %>% filter(County=='Lincoln')
|
LIN_DEM <- DEM_DATA %>% filter(County=='Lincoln')
|
||||||
saveRDS(LIN_DEM,paste0(RDS_SAVE,"/Full_Lincoln_County_Demographics.Rds" ))
|
saveRDS(LIN_DEM,paste0(RDS_SAVE,"/Full_Lincoln_County_Demographics.Rds" ))
|
||||||
write_csv(LIN_DEM,paste0(CSV_SAVE,"/Full_Lincoln_County_Demographics.csv" ))
|
write_csv(LIN_DEM,paste0(CSV_SAVE,"/Full_Lincoln_County_Demographics.csv" ))
|
||||||
|
run_datetime <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
|
||||||
|
sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE)
|
||||||
|
cat(paste0("\n--- Run Date: ", run_datetime, " ---\n"))
|
||||||
|
sink()
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user