Major updates to auto-data gathering plus notes

This commit is contained in:
Alex Gebben Work 2025-11-06 17:21:41 -07:00
parent 285026e421
commit 89da07fdae
7 changed files with 185 additions and 21 deletions

View File

@ -1,5 +1,10 @@
Data files gathered manually from: Data files gathered manually from:
National Institute of Health HDPules: An Ecosystem of Health Disparities and Minority Health Resources at
https://hdpulse.nimhd.nih.gov/data-portal/mortality/table?cod=247&cod_options=cod_15&ratetype=aa&ratetype_options=ratetype_2&race=00&race_options=race_6&sex=2&sex_options=sex_3&age=177&age_options=age_11&ruralurban=0&ruralurban_options=ruralurban_3&yeargroup=5&yeargroup_options=year5yearmort_1&statefips=56&statefips_options=area_states&county=56000&county_options=counties_wyoming&comparison=counties_to_us&comparison_options=comparison_counties&radio_comparison=areas&radio_comparison_options=cods_or_areas https://hdpulse.nimhd.nih.gov/data-portal/mortality/table?cod=247&cod_options=cod_15&ratetype=aa&ratetype_options=ratetype_2&race=00&race_options=race_6&sex=2&sex_options=sex_3&age=177&age_options=age_11&ruralurban=0&ruralurban_options=ruralurban_3&yeargroup=5&yeargroup_options=year5yearmort_1&statefips=56&statefips_options=area_states&county=56000&county_options=counties_wyoming&comparison=counties_to_us&comparison_options=comparison_counties&radio_comparison=areas&radio_comparison_options=cods_or_areas
Each file is single age group, so age weighting does not apply despite the variable names Each file is single age group, so age weighting does not apply despite the variable names. Each age group file is named sequentially with a prefix “A_” for the first age group and a prefix “I_” for the oldest. There are separate directories for each sex. The cleaning script uses this directory structure to extract the right files and merge them into one data set.
These files must be manually downloaded because there is a filter feature on the web page that is used to select the county and age. While there may be a way to scrape the data with code the trade off on my time was not worth it. Future runs will need to check these records, and can download the files to match this directory structure, in order to process a update in death rates.
Valid data as of Nov 6 2025 Alex Gebben

View File

@ -1 +0,0 @@
[1] "Data files gathered manually from:\n\n\n\n\nhttps://hdpulse.nimhd.nih.gov/data-portal/mortality/table?cod=247&cod_options=cod_15&ratetype=aa&ratetype_options=ratetype_2&race=00&race_options=race_6&sex=2&sex_options=sex_3&age=177&age_options=age_11&ruralurban=0&ruralurban_options=ruralurban_3&yeargroup=5&yeargroup_options=year5yearmort_1&statefips=56&statefips_options=area_states&county=56000&county_options=counties_wyoming&comparison=counties_to_us&comparison_options=comparison_counties&radio_comparison=areas&radio_comparison_options=cods_or_areas\n\n\n\n\nEach file is single age group, so age weighting does not apply despite the variable names"

View File

@ -2,14 +2,42 @@
library(rvest) library(rvest)
library(tidyverse) library(tidyverse)
library(readxl) library(readxl)
library(curl) #To archive the html files
#setwd("../") #setwd("../")
###Create Location to Save raw data sets ###Create Location to Save raw data sets
if(!exists("SAVE_LOC_RAW")){SAVE_LOC_RAW <-"./Data/Raw_Data/"} if(!exists("SAVE_LOC_RAW")){SAVE_LOC_RAW <-"./Data/Raw_Data/"}
dir.create(SAVE_LOC_RAW, recursive = TRUE, showWarnings = FALSE) dir.create(SAVE_LOC_RAW, recursive = TRUE, showWarnings = FALSE)
SAVE_LOC_RAW_POP <- paste0(SAVE_LOC_RAW,"Population/")
dir.create(SAVE_LOC_RAW_POP , recursive = TRUE, showWarnings = FALSE)
RAW_HTML_LOC <- paste0(SAVE_LOC_RAW_POP,"HTML_Population_Files/")
dir.create(RAW_HTML_LOC, recursive = TRUE, showWarnings = FALSE)
RAW_EXCEL_LOC <- paste0(SAVE_LOC_RAW_POP,"Excel_Population_Files/")
dir.create(RAW_EXCEL_LOC, recursive = TRUE, showWarnings = FALSE)
##Start a log file about the data
sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=FALSE)
cat("Most data is supplied by the Wyoming Department of Administration & Information Economic Analysis Division (WIEAD). Their data is either directly pulled from other sources like the census or is interpolated using this data (such as deaths, and migration).\n This folder saves the raw HTML of a few population data source, which were stored as a web table rather than a CSV or excel file so that in the future if the web pages are removed or changed the code will still run, and can be updated with new information.\nSome files are supplied as HTML tables from a webpage, while others are excel files. ")
sink()
########County, Death, Birth and Migration Data ########County, Death, Birth and Migration Data
#Data found on the page http://eadiv.state.wy.us/pop/ #Data found on the page http://eadiv.state.wy.us/pop/
#Website States: Wyoming Economic Analysis Division based on U.S. Census Bureau's population estimation and vital stats above #Website States: Wyoming Economic Analysis Division based on U.S. Census Bureau's population estimation and vital stats above
PAGE <- read_html("http://eadiv.state.wy.us/pop/BirthDeathMig.htm") BIRTH_DEATH_MIGRATION_HTML_LOC <- paste0(RAW_HTML_LOC,"County_Migration_Deaths_Births.html")
try(curl_download("http://eadiv.state.wy.us/pop/BirthDeathMig.htm",destfile=BIRTH_DEATH_MIGRATION_HTML_LOC )) #Download the file rather than directly use the website, so a backup is available if it ever goes offline.
#Add to the notes
sink(file=paste0(SAVE_LOC_RAW_POP,"/README_POPULATION_DATA.txt"),append=TRUE)
cat("\n\n 1) Wyoming births, deaths, and net migration by county of residence: 1971 - 2023:
County_Migration_Deaths_Births.htm comes from http://eadiv.state.wy.us/pop/BirthDeathMig.htm
Data Type: HTML table
Data Source: WIEAD
Original Source: Wyoming Department of Health and U.S. Census Bureau")
sink()
PAGE <- read_html(BIRTH_DEATH_MIGRATION_HTML_LOC )
NODE <- html_element(PAGE ,"table") NODE <- html_element(PAGE ,"table")
TBL <- html_table(NODE) TBL <- html_table(NODE)
@ -36,7 +64,17 @@ WY_COUNTY_DATA_SET <- pivot_wider(Data,names_from=Type,values_from=Pop_Change) %
WY_COUNTY_DATA_SET[,"County"] <- gsub(" ","_",WY_COUNTY_DATA_SET %>% pull(County)) WY_COUNTY_DATA_SET[,"County"] <- gsub(" ","_",WY_COUNTY_DATA_SET %>% pull(County))
########################City and County Population Data 2020 to 2024 ########################City and County Population Data 2020 to 2024
PAGE <- read_html('http://eadiv.state.wy.us/pop/Place-24EST.htm') CITY_POPULATION_A <- paste0(RAW_HTML_LOC,"Wyoming_City_and_County_Population_2020_2024.html")
try(curl_download("http://eadiv.state.wy.us/pop/Place-24EST.htm",destfile=CITY_POPULATION_A)) #Download the file rather than directly use the website, so a backup is available if it ever goes offline.
#Add to the notes
sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE)
cat("\n 2) Wyoming Incorporated Place Population Estimates: April 1, 2020 to July 1, 2024
Wyoming_City_and_County_Population_2020_2024.html comes from http://eadiv.state.wy.us/pop/Place-24EST.htm
Data Type: HTML table
Data Source: WIEAD
Original Source: U.S. Census Bureau, Population Division Wyoming Department of Health and U.S. Census Bureau")
sink()
PAGE <- read_html(CITY_POPULATION_A)
NODE <- html_element(PAGE ,"table") NODE <- html_element(PAGE ,"table")
TBL <- html_table(NODE) TBL <- html_table(NODE)
@ -59,7 +97,18 @@ CITY_POP <- TBL[sort(c(grep("County",TBL %>% pull(County),invert=TRUE,ignore.cas
CITY_POP$County <- gsub(" ","_",gsub("Balance of","Unincorporated",gsub(" County","",gsub(" city","",gsub(" town","",CITY_POP$County,ignore.case=TRUE),ignore.case=TRUE),ignore.case=TRUE),ignore.case=TRUE)) CITY_POP$County <- gsub(" ","_",gsub("Balance of","Unincorporated",gsub(" County","",gsub(" city","",gsub(" town","",CITY_POP$County,ignore.case=TRUE),ignore.case=TRUE),ignore.case=TRUE),ignore.case=TRUE))
CITY_POP <- CITY_POP %>% rename("City"=County) CITY_POP <- CITY_POP %>% rename("City"=County)
########################City Population Data 2010 to 2020 ########################City Population Data 2010 to 2020
PAGE <- read_html('http://eadiv.state.wy.us/pop/sub-est11-19.htm') CITY_POPULATION_B <- paste0(RAW_HTML_LOC,"Wyoming_City_and_County_Population_2010_2020.html")
try(curl_download('http://eadiv.state.wy.us/pop/sub-est11-19.htm',destfile=CITY_POPULATION_B)) #Download the file rather than directly use the website, so a backup is available if it ever goes offline.
#Add to the notes
sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE)
cat("\n 3) Intercensal Estimates of the Resident Population for Incorporated Places in Wyoming: April 1, 2010 to April 1, 2020
Data Type: HTML table
Wyoming_City_and_County_Population_2010_2020.html comes from http://eadiv.state.wy.us/pop/sub-est11-19.htm
Data Source: WIEAD
Original Source: Source: U.S. Census Bureau, Population Division" )
sink()
PAGE <- read_html(CITY_POPULATION_B )
NODE <- html_element(PAGE ,"table") NODE <- html_element(PAGE ,"table")
TBL <- html_table(NODE) TBL <- html_table(NODE)
ST <- which(toupper(TBL$X1)==toupper("Afton town, Wyoming")) ST <- which(toupper(TBL$X1)==toupper("Afton town, Wyoming"))
@ -74,7 +123,18 @@ TBL$City <- gsub(" ","_",gsub(" $","",gsub("\r|\n| Wyoming|,| town| city","",TBL
TBL <- TBL %>% filter(Year!=2020) TBL <- TBL %>% filter(Year!=2020)
CITY_POP <- rbind(TBL,CITY_POP) CITY_POP <- rbind(TBL,CITY_POP)
########################County Population Data 2010 to 2020 ########################County Population Data 2010 to 2020
PAGE <- read_html('http://eadiv.state.wy.us/pop/ctyest11-19.htm') COUNTY_POPULATION_B <- paste0(RAW_HTML_LOC,"Wyoming_County_Population_2010_2020.html")
try(curl_download('http://eadiv.state.wy.us/pop/ctyest11-19.htm',destfile=COUNTY_POPULATION_B)) #Download the file rather than directly use the website, so a backup is available if it ever goes offline.
#Add to the notes
sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE)
cat("\n 4) Intercensal Estimates of the Resident Population for Counties in Wyoming: April 1, 2010 to April 1, 2020
Wyoming_County_Population_2010_2020.html comes from http://eadiv.state.wy.us/pop/ctyest11-19.htm
Data Type: HTML table
Data Source: WIEAD
Original Source: U.S. Census Bureau, Population Division" )
sink()
PAGE <- read_html(COUNTY_POPULATION_B )
NODE <- html_element(PAGE ,"table") NODE <- html_element(PAGE ,"table")
TBL <- html_table(NODE) TBL <- html_table(NODE)
ST <- grep("Albany",TBL$X1) ST <- grep("Albany",TBL$X1)
@ -90,7 +150,18 @@ TBL <- TBL %>% filter(Year!=2020)
COUNTY_POP <- rbind(TBL,COUNTY_POP) COUNTY_POP <- rbind(TBL,COUNTY_POP)
########################County and City Population Data 2000 to 2010 ########################County and City Population Data 2000 to 2010
PAGE <- read_html('http://eadiv.state.wy.us/pop/sub-est01-09.htm') CITY_POPULATION_C <- paste0(RAW_HTML_LOC,"Wyoming_City_and_County_Population_2000_2009.html")
try(curl_download('http://eadiv.state.wy.us/pop/sub-est01-09.htm',destfile=CITY_POPULATION_C)) #Download the file rather than directly use the website, so a backup is available if it ever goes offline.
#Add to the notes
sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE)
cat("\n 5) Intercensal Estimates of the Resident Population for Cities and Towns of Wyoming: April 1, 2000 to July 1, 2010
Wyoming_City_and_County_Population_2000_2009.html comes from 'http://eadiv.state.wy.us/pop/sub-est01-09.htm'
Data Type: HTML table
Data Source: WIEAD
Original Source: U.S. Census Bureau, Population Division" )
sink()
PAGE <- read_html(CITY_POPULATION_C)
NODE <- html_element(PAGE ,"table") NODE <- html_element(PAGE ,"table")
TBL <- html_table(NODE) TBL <- html_table(NODE)
@ -119,11 +190,18 @@ CITY_POP$City <- gsub("LaGrange","La_Grange",CITY_POP$City)
COUNTY_POP <- rbind(COUNTY_TBL,COUNTY_POP) COUNTY_POP <- rbind(COUNTY_TBL,COUNTY_POP)
####################County and City Population Data for 1990-2000 ####################County and City Population Data for 1990-2000
#Location to save any raw population files. Most files are not saved since they are pulled from a html and not a excel file, but older files are only available as excel files #Location to save any raw population files. Most files are not saved since they are pulled from a html and not a excel file, but older files are only available as excel files
SAVE_LOC_RAW_POP <- paste0(SAVE_LOC_RAW,"/Population") POP_FILE_1990 <- paste0(RAW_EXCEL_LOC,"Wyoming_County_Population_1990_2000.xls")
dir.create(SAVE_LOC_RAW_POP , recursive = TRUE, showWarnings = FALSE) try(if(!file.exists(POP_FILE_1990)){download.file('http://eadiv.state.wy.us/pop/c&sc90_00.xls',POP_FILE_1990)})
#Add to the notes
sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE)
cat("\n 6) Population for Wyoming, Counties, Cities, and Towns: 1990 to 2000
Wyoming_County_Population_1980_1990.xls comes from 'http://eadiv.state.wy.us/pop/c&sc90_00.xls'
Data Type: Excel file
Data Source: WIEAD
Original Source: U.S. Census Bureau, Population Division" )
sink()
POP_FILE_1990 <- paste0(SAVE_LOC_RAW_POP,"/Pop_1990s.xls")
if(!file.exists(POP_FILE_1990)){download.file('http://eadiv.state.wy.us/pop/c&sc90_00.xls',POP_FILE_1990)}
TEMP <- read_xls(POP_FILE_1990,skip=2)[-1:-4,] TEMP <- read_xls(POP_FILE_1990,skip=2)[-1:-4,]
colnames(TEMP)[1] <- "County" colnames(TEMP)[1] <- "County"
TEMP <- TEMP[1:which(TEMP[,1]=="Wind River Res."),] TEMP <- TEMP[1:which(TEMP[,1]=="Wind River Res."),]
@ -143,8 +221,18 @@ TEMP_CITY <- TEMP_CITY %>% filter(Year!=2000)
try(rm(TEMP_CITY,TEMP_COUNTY,TEMP)) try(rm(TEMP_CITY,TEMP_COUNTY,TEMP))
####################County and City Population Data for 1980-1990 ####################County and City Population Data for 1980-1990
POP_FILE_1980 <- paste0(SAVE_LOC_RAW_POP ,"/Pop_1980s.xls") POP_FILE_1980 <- paste0(RAW_EXCEL_LOC,"/Wyoming_County_Population_1980_1990.xls")
if(!file.exists(POP_FILE_1980)){download.file('http://eadiv.state.wy.us/pop/C&SC8090.xls',POP_FILE_1980)} try(if(!file.exists(POP_FILE_1980)){download.file('http://eadiv.state.wy.us/pop/C&SC8090.xls',POP_FILE_1980)})
#Add to the notes
sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE)
cat("\n 7) Population for Wyoming, Counties and Municipalities: 1980 to 1990
Wyoming_County_Population_1980_1990.xls comes from 'http://eadiv.state.wy.us/pop/C&SC8090.xls'
Data Type: Excel file
Data Source: WIEAD
Original Source: U.S. Census Bureau, Population Division" )
sink()
TEMP <- read_xls(POP_FILE_1980,skip=2)[-1:-4,] TEMP <- read_xls(POP_FILE_1980,skip=2)[-1:-4,]
colnames(TEMP)[1] <- "County" colnames(TEMP)[1] <- "County"
@ -169,7 +257,18 @@ COUNTY_POP <- rbind(TEMP_COUNTY,COUNTY_POP)
try(rm(TEMP_CITY,TEMP_COUNTY,TEMP)) try(rm(TEMP_CITY,TEMP_COUNTY,TEMP))
####################County Population Data for 1970-1980 ####################County Population Data for 1970-1980
POP_FILE_1970 <- paste0(SAVE_LOC_RAW_POP ,"/Pop_1970s.xls") POP_FILE_1970 <- paste0(RAW_EXCEL_LOC,"/Wyoming_County_Population_1970_1980.xls")
try(if(!file.exists(POP_FILE_1970)){download.file('http://eadiv.state.wy.us/pop/Cnty7080.xls',POP_FILE_1970)})
#Add to the notes
sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE)
cat("\n 8) Wyoming and County Population: 1970 to 1980
Wyoming_County_Population_1970_1980.xls comes from 'http://eadiv.state.wy.us/pop/Cnty7080.xls'
Data Type: Excel file
Data Source: WIEAD
Original Source: U.S. Census Bureau, Population Division" )
sink()
if(!file.exists(POP_FILE_1970)){download.file('http://eadiv.state.wy.us/pop/Cnty7080.xls',POP_FILE_1970)} if(!file.exists(POP_FILE_1970)){download.file('http://eadiv.state.wy.us/pop/Cnty7080.xls',POP_FILE_1970)}
TEMP <- read_xls(POP_FILE_1970,skip=2)[-1:-4,] TEMP <- read_xls(POP_FILE_1970,skip=2)[-1:-4,]
@ -183,7 +282,20 @@ TEMP <- TEMP %>% filter(Year!=1980)
COUNTY_POP <- rbind(TEMP,COUNTY_POP) COUNTY_POP <- rbind(TEMP,COUNTY_POP)
#ggplot(aes(x=Year,y=Population,group=County,color=County),data=COUNTY_POP)+geom_line() #ggplot(aes(x=Year,y=Population,group=County,color=County),data=COUNTY_POP)+geom_line()
try(rm(TEMP)) try(rm(TEMP))
###########Old data addtion:Period Ends in 1970 ###########Old data addition:Period Ends in 1970
POP_FILE_OLD <- paste0(RAW_HTML_LOC,"Wyoming_City_and_County_Population_Prior_to_1970.htm")
try(curl_download('http://eadiv.state.wy.us/demog_data/cntycity_hist.htm',destfile=POP_FILE_OLD))
#Add to the notes
sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE)
cat("\n 9) Historical decennial census population for Wyoming counties, cities, and towns
Wyoming_City_and_County_Population_Prior_to_1970.htm comes from 'http://eadiv.state.wy.us/demog_data/cntycity_hist.htm'
Data Type: HTML Tables
Data Source: WIEAD
Original Source: U.S. Census Bureau, Population Division
Note: Two tables are included complicating extraction. The values are manually entered in R rather than scarped like the other data sets" )
sink()
#See in part http://eadiv.state.wy.us/demog_data/cntycity_hist.htm #See in part http://eadiv.state.wy.us/demog_data/cntycity_hist.htm
LN_OLD <- c(12487,10894,10286,9023,9018,8640) #Missing in 1910 LN_OLD <- c(12487,10894,10286,9023,9018,8640) #Missing in 1910
Year <- seq(1920,1970,by=10) Year <- seq(1920,1970,by=10)
@ -224,5 +336,8 @@ write_csv(CITY_POP,paste0(CSV_SAVE,"/All_Wyoming_City_Populations.csv" ))
saveRDS(WY_COUNTY_DATA_SET,paste0(RDS_SAVE,"/All_Wyoming_County_Populations.Rds" )) saveRDS(WY_COUNTY_DATA_SET,paste0(RDS_SAVE,"/All_Wyoming_County_Populations.Rds" ))
write_csv(WY_COUNTY_DATA_SET,paste0(CSV_SAVE,"/All_Wyoming_County_Populations.csv" )) write_csv(WY_COUNTY_DATA_SET,paste0(CSV_SAVE,"/All_Wyoming_County_Populations.csv" ))
run_datetime <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE)
cat(paste0("\n--- Run Date: ", run_datetime, " ---\n"))
sink()

View File

@ -1,9 +1,33 @@
library(tidyverse) library(tidyverse)
library(readxl) library(readxl)
#setwd("../")
###################Demographics ###################Demographics
if(!file.exists("./Data/Demo_Single_Year_2020s.xls")){download.file('http://eadiv.state.wy.us/Pop/CO_SYASEX24.xlsx',"./Data/Demo_Single_Year_2020s.xls")} #Set up saving locations
TEMP <- read_xlsx("./Data/Demo_Single_Year_2020s.xls",skip=2)[,-1] if(!exists("SAVE_LOC_RAW")){SAVE_LOC_RAW <-"./Data/Raw_Data/"}
RAW_DEMO_LOC <- paste0(SAVE_LOC_RAW,"Demographics/")
dir.create(RAW_DEMO_LOC, recursive = TRUE, showWarnings = FALSE)
#Demographic Reference data
if(!exists("SAVE_LOC_REF")){SAVE_LOC_REF <-paste0(RAW_DEMO_LOC,"Reference_Material_for_Demographics/")}
dir.create(SAVE_LOC_REF, recursive = TRUE, showWarnings = FALSE)
#Start a README file for the raw downloaded demographic data
sink(file=paste0(RAW_DEMO_LOC,"README_DEMOGRAPHIC_DATA.txt"),append=FALSE)
cat("Demographic data used to find age and sex distribution of county populations\n")
sink()
#####Gather data
C_FILE_PATH <- paste0(RAW_DEMO_LOC,"Wyoming_County_Sex_by_Year_of_Age_Demographic_Data_2020_2024.xls")
try(if(!file.exists(C_FILE_PATH)){download.file("http://eadiv.state.wy.us/Pop/CO_SYASEX24.xlsx",C_FILE_PATH)})
#Append to the README for clarity of data sources
sink(file=paste0(RAW_DEMO_LOC,"/README_DEMOGRAPHIC_DATA.txt"),append=TRUE)
cat("\n\n 1) Annual County Resident Population Estimates by Single Year of Age and Sex: April 1, 2020 to July 1, 2024
Wyoming_County_Sex_by_Year_of_Age_Demographic_Data_2020_2024.xls comes from http://eadiv.state.wy.us/Pop/CO_SYASEX24.xlsx
Data Type: Excel table
Data Source: Wyoming Department of Information and Economic Development (WIEAD)
Original Source: Census Bureau, Population Division, June 2025")
sink()
TEMP <- read_xlsx(C_FILE_PATH,skip=2)[,-1]
TEMP <- TEMP[1:(min(which(is.na(TEMP[,1])))-1),] TEMP <- TEMP[1:(min(which(is.na(TEMP[,1])))-1),]
TEMP <- TEMP[!grepl("Base",TEMP$YEAR,ignore.case=TRUE),] #There are two population values provided. I believe one is the census baseline, and one is a estimate in July. Keep the later estimate, to line up with the same seasonal collection pattern of the rest of the data TEMP <- TEMP[!grepl("Base",TEMP$YEAR,ignore.case=TRUE),] #There are two population values provided. I believe one is the census baseline, and one is a estimate in July. Keep the later estimate, to line up with the same seasonal collection pattern of the rest of the data
TEMP$YEAR <- year(as.Date(substr((TEMP$YEAR),1,8),format="%m/%d/%Y")) TEMP$YEAR <- year(as.Date(substr((TEMP$YEAR),1,8),format="%m/%d/%Y"))
@ -11,10 +35,27 @@ colnames(TEMP) <- c("County","Year","Age","Number","Num_Male","Num_Female")
TEMP$County <- gsub(" County","",TEMP$County,ignore.case=TRUE) TEMP$County <- gsub(" County","",TEMP$County,ignore.case=TRUE)
DEM_2020 <- TEMP %>% select(-Number) DEM_2020 <- TEMP %>% select(-Number)
###Demographics all ###Demographics all
DEM_DATA <- read_delim("Data/County_Demographics_Census/wy.1969_2023.singleages.through89.90plus.txt",delim=" ",col_names=c("ID","VALUES"),col_types=list('c','c')) try(DEM_DATA <- read_delim('https://seer.cancer.gov/popdata/yr1969_2023.singleages.through89.90plus/wy.1969_2023.singleages.through89.90plus.txt.gz',delim=" ",col_names=c("ID","VALUES"),col_types=list('c','c')))
DEM_DATA$Year <- as.integer(substr(DEM_DATA$ID,1,4)) SEER_DATA_LOC <- paste0(RAW_DEMO_LOC,"Wyoming_County_Sex_by_Year_of_Age_Demographic_Data_1969_2023.csv")
if(!exists("DEM_DATA")){DEM_DATA <- read_csv(SEER_DATA_LOC )}else{ write_csv(DEM_DATA,SEER_DATA_LOC)}
#Append to the README for clarity of data sources
sink(file=paste0(RAW_DEMO_LOC,"/README_DEMOGRAPHIC_DATA.txt"),append=TRUE)
cat("\n\n 2) Wyoming County-Level Population Files - Single-year Age Groups 1969 to 2023
Wyoming_County_Sex_by_Year_of_Age_Demographic_Data_1969_2023.csv comes from https://seer.cancer.gov/popdata/yr1969_2023.singleages.through89.90plus/wy.1969_2023.singleages.through89.90plus.txt.gz
Data Type: gunzip (gz) file with coded data
Data Source: The National Cancer Institute surveillance, Epidemiology, and End Results Program
Original Source: Census Bureau (data processed for yearly estimates)
Note: See https://seer.cancer.gov/popdata/download.html for more data information and other State data. Raw data is parsed using the described data format (first number year, then fips code etc.).
The required fips codes are provided in the reference folder from https://github.com/kjhealy/fips-codes/raw/refs/heads/master/county_fips_master.csv.")
sink()
DEM_DATA$Year <- as.numeric(substr(DEM_DATA$ID,1,4))
DEM_DATA$fips<- substr(DEM_DATA$ID,7,11) DEM_DATA$fips<- substr(DEM_DATA$ID,7,11)
COUNTY_LIST <- read_csv("https://github.com/kjhealy/fips-codes/raw/refs/heads/master/county_fips_master.csv",col_types=list('c','c')) %>% filter(state_abbr=="WY") %>% select(fips,County=county_name) %>% mutate(County=gsub(" ","_",gsub(" County","",County,ignore.case=TRUE))) COUNTY_LIST <- read_csv("https://github.com/kjhealy/fips-codes/raw/refs/heads/master/county_fips_master.csv",col_types=list('c','c')) %>% filter(state_abbr=="WY") %>% select(fips,County=county_name) %>% mutate(County=gsub(" ","_",gsub(" County","",County,ignore.case=TRUE)))
FIPS_LOC <- paste0(SAVE_LOC_REF,"fips_codes.csv")
if(!exists("COUNTY_LIST")){ COUNTY_LIST <- read_csv(FIPS_LOC)}else{ write_csv(COUNTY_LIST,FIPS_LOC)}
DEM_DATA <- DEM_DATA %>% left_join(COUNTY_LIST) %>% select(-fips) DEM_DATA <- DEM_DATA %>% left_join(COUNTY_LIST) %>% select(-fips)
#16=3 #16=3
DEM_DATA$Sex <- ifelse(substr(DEM_DATA$VALUES,3,3)==1,"Male","Female") DEM_DATA$Sex <- ifelse(substr(DEM_DATA$VALUES,3,3)==1,"Male","Female")
@ -41,4 +82,8 @@ DEM_DATA <- rbind(DEM_2020,DEM_DATA) %>% ungroup %>% arrange(Year,Age) %>% uniq
LIN_DEM <- DEM_DATA %>% filter(County=='Lincoln') LIN_DEM <- DEM_DATA %>% filter(County=='Lincoln')
saveRDS(LIN_DEM,paste0(RDS_SAVE,"/Full_Lincoln_County_Demographics.Rds" )) saveRDS(LIN_DEM,paste0(RDS_SAVE,"/Full_Lincoln_County_Demographics.Rds" ))
write_csv(LIN_DEM,paste0(CSV_SAVE,"/Full_Lincoln_County_Demographics.csv" )) write_csv(LIN_DEM,paste0(CSV_SAVE,"/Full_Lincoln_County_Demographics.csv" ))
run_datetime <- format(Sys.time(), "%Y-%m-%d %H:%M:%S")
sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE)
cat(paste0("\n--- Run Date: ", run_datetime, " ---\n"))
sink()