library(tidyverse) library(readxl) #setwd("../") ###################Demographics #Set up saving locations if(!exists("SAVE_LOC_RAW")){SAVE_LOC_RAW <-"./Data/Raw_Data/"} RAW_DEMO_LOC <- paste0(SAVE_LOC_RAW,"Demographics/") dir.create(RAW_DEMO_LOC, recursive = TRUE, showWarnings = FALSE) #Demographic Reference data if(!exists("SAVE_LOC_REF")){SAVE_LOC_REF <-paste0(RAW_DEMO_LOC,"Reference_Material_for_Demographics/")} dir.create(SAVE_LOC_REF, recursive = TRUE, showWarnings = FALSE) #Start a README file for the raw downloaded demographic data sink(file=paste0(RAW_DEMO_LOC,"README_DEMOGRAPHIC_DATA.txt"),append=FALSE) cat("Demographic data used to find age and sex distribution of county populations\n") sink() #####Gather data C_FILE_PATH <- paste0(RAW_DEMO_LOC,"Wyoming_County_Sex_by_Year_of_Age_Demographic_Data_2020_2024.xls") try(if(!file.exists(C_FILE_PATH)){download.file("http://eadiv.state.wy.us/Pop/CO_SYASEX24.xlsx",C_FILE_PATH)}) #Append to the README for clarity of data sources sink(file=paste0(RAW_DEMO_LOC,"/README_DEMOGRAPHIC_DATA.txt"),append=TRUE) cat("\n\n 1) Annual County Resident Population Estimates by Single Year of Age and Sex: April 1, 2020 to July 1, 2024 Wyoming_County_Sex_by_Year_of_Age_Demographic_Data_2020_2024.xls comes from http://eadiv.state.wy.us/Pop/CO_SYASEX24.xlsx Data Type: Excel table Data Source: Wyoming Department of Information and Economic Development (WIEAD) Original Source: Census Bureau, Population Division, June 2025") sink() TEMP <- read_xlsx(C_FILE_PATH,skip=2)[,-1] TEMP <- TEMP[1:(min(which(is.na(TEMP[,1])))-1),] TEMP <- TEMP[!grepl("Base",TEMP$YEAR,ignore.case=TRUE),] #There are two population values provided. I believe one is the census baseline, and one is a estimate in July. Keep the later estimate, to line up with the same seasonal collection pattern of the rest of the data TEMP$YEAR <- year(as.Date(substr((TEMP$YEAR),1,8),format="%m/%d/%Y")) colnames(TEMP) <- c("County","Year","Age","Number","Num_Male","Num_Female") TEMP$County <- gsub(" County","",TEMP$County,ignore.case=TRUE) DEM_2020 <- TEMP %>% select(-Number) ###Demographics all try(DEM_DATA <- read_delim('https://seer.cancer.gov/popdata/yr1969_2023.singleages.through89.90plus/wy.1969_2023.singleages.through89.90plus.txt.gz',delim=" ",col_names=c("ID","VALUES"),col_types=list('c','c'))) SEER_DATA_LOC <- paste0(RAW_DEMO_LOC,"Wyoming_County_Sex_by_Year_of_Age_Demographic_Data_1969_2023.csv") if(!exists("DEM_DATA")){DEM_DATA <- read_csv(SEER_DATA_LOC )}else{ write_csv(DEM_DATA,SEER_DATA_LOC)} #Append to the README for clarity of data sources sink(file=paste0(RAW_DEMO_LOC,"/README_DEMOGRAPHIC_DATA.txt"),append=TRUE) cat("\n\n 2) Wyoming County-Level Population Files - Single-year Age Groups 1969 to 2023 Wyoming_County_Sex_by_Year_of_Age_Demographic_Data_1969_2023.csv comes from https://seer.cancer.gov/popdata/yr1969_2023.singleages.through89.90plus/wy.1969_2023.singleages.through89.90plus.txt.gz Data Type: gunzip (gz) file with coded data Data Source: The National Cancer Institute surveillance, Epidemiology, and End Results Program Original Source: Census Bureau (data processed for yearly estimates) Note: See https://seer.cancer.gov/popdata/download.html for more data information and other State data. Raw data is parsed using the described data format (first number year, then fips code etc.). The required fips codes are provided in the reference folder from https://github.com/kjhealy/fips-codes/raw/refs/heads/master/county_fips_master.csv.") sink() DEM_DATA$Year <- as.numeric(substr(DEM_DATA$ID,1,4)) DEM_DATA$fips<- substr(DEM_DATA$ID,7,11) COUNTY_LIST <- read_csv("https://github.com/kjhealy/fips-codes/raw/refs/heads/master/county_fips_master.csv",col_types=list('c','c')) %>% filter(state_abbr=="WY") %>% select(fips,County=county_name) %>% mutate(County=gsub(" ","_",gsub(" County","",County,ignore.case=TRUE))) FIPS_LOC <- paste0(SAVE_LOC_REF,"fips_codes.csv") if(!exists("COUNTY_LIST")){ COUNTY_LIST <- read_csv(FIPS_LOC)}else{ write_csv(COUNTY_LIST,FIPS_LOC)} DEM_DATA <- DEM_DATA %>% left_join(COUNTY_LIST) %>% select(-fips) #16=3 DEM_DATA$Sex <- ifelse(substr(DEM_DATA$VALUES,3,3)==1,"Male","Female") DEM_DATA$Age <- parse_number(substr(DEM_DATA$VALUES,4,5)) DEM_DATA$Number <- parse_number(substr(DEM_DATA$VALUES,6,14)) DEM_DATA <- DEM_DATA %>% select(-ID,-VALUES) DEM_DATA <- DEM_DATA %>% group_by(Year,County,Sex,Age) %>% summarize(Number=sum(Number)) %>% ungroup()#Aggregate to sex and age level #The Wyoming census data seems newer than this data set from SEER cancer data source. Drop any of these records that overlap with the Wyoming data before merging. Arrange so the column order is the same between the two data sets, so they can be easily bound together. DEM_DATA <- pivot_wider(DEM_DATA,names_from=Sex,values_from=Number) %>% rename(Num_Female=Female,Num_Male=Male) %>% select(colnames(DEM_2020)) %>% filter(Year% unique DEM_DATA <- rbind(DEM_2020,DEM_DATA) %>% ungroup %>% arrange(Year,Age) %>% unique %>% mutate(Region=County) %>% select(County,Region,Year,Age,Num_Male,Num_Female) #Add a region which in this case is just the county. For Lincoln cities and other areas are analysed ###Save demographic data set if(!exists("SAVE_DEMO_LOC")){SAVE_DEMO_LOC <-"./Data/Cleaned_Data/Demographic_Sex_Age_Data"} CSV_SAVE <- paste0(SAVE_DEMO_LOC,"/CSV") RDS_SAVE <- paste0(SAVE_DEMO_LOC,"/RDS") #Save files for all county demographics dir.create(CSV_SAVE , recursive = TRUE, showWarnings = FALSE) dir.create(RDS_SAVE , recursive = TRUE, showWarnings = FALSE) saveRDS(DEM_DATA,paste0(RDS_SAVE,"/All_Wyoming_Counties_Demographics.Rds" )) write_csv(DEM_DATA,paste0(CSV_SAVE,"/All_Wyoming_Counties_Demographics.csv" )) #Save a file that is just Lincoln County, this just speeds up code so you do not always need to filter for 'Lincoln' LIN_DEM <- DEM_DATA %>% filter(County=='Lincoln') saveRDS(LIN_DEM,paste0(RDS_SAVE,"/Full_Lincoln_County_Demographics.Rds" )) write_csv(LIN_DEM,paste0(CSV_SAVE,"/Full_Lincoln_County_Demographics.csv" )) run_datetime <- format(Sys.time(), "%Y-%m-%d %H:%M:%S") sink(file=paste0(SAVE_LOC_RAW_POP,"README_POPULATION_DATA.txt"),append=TRUE) cat(paste0("\n--- Run Date: ", run_datetime, " ---\n")) sink()