library(tidyverse) library(readxl) ###################Demographics if(!file.exists("./Data/Demo_Single_Year_2020s.xls")){download.file('http://eadiv.state.wy.us/Pop/CO_SYASEX24.xlsx',"./Data/Demo_Single_Year_2020s.xls")} TEMP <- read_xlsx("./Data/Demo_Single_Year_2020s.xls",skip=2)[,-1] TEMP <- TEMP[1:(min(which(is.na(TEMP[,1])))-1),] TEMP <- TEMP[!grepl("Base",TEMP$YEAR,ignore.case=TRUE),] #There are two population values provided. I believe one is the census baseline, and one is a estimate in July. Keep the later estimate, to line up with the same seasonal collection pattern of the rest of the data TEMP$YEAR <- year(as.Date(substr((TEMP$YEAR),1,8),format="%m/%d/%Y")) colnames(TEMP) <- c("County","Year","Age","Number","Num_Male","Num_Female") TEMP$County <- gsub(" County","",TEMP$County,ignore.case=TRUE) DEM_2020 <- TEMP %>% select(-Number) ###Demographics all DEM_DATA <- read_delim("Data/County_Demographics_Census/wy.1969_2023.singleages.through89.90plus.txt",delim=" ",col_names=c("ID","VALUES"),col_types=list('c','c')) DEM_DATA$Year <- as.integer(substr(DEM_DATA$ID,1,4)) DEM_DATA$fips<- substr(DEM_DATA$ID,7,11) COUNTY_LIST <- read_csv("https://github.com/kjhealy/fips-codes/raw/refs/heads/master/county_fips_master.csv",col_types=list('c','c')) %>% filter(state_abbr=="WY") %>% select(fips,County=county_name) %>% mutate(County=gsub(" ","_",gsub(" County","",County,ignore.case=TRUE))) DEM_DATA <- DEM_DATA %>% left_join(COUNTY_LIST) %>% select(-fips) #16=3 DEM_DATA$Sex <- ifelse(substr(DEM_DATA$VALUES,3,3)==1,"Male","Female") DEM_DATA$Age <- parse_number(substr(DEM_DATA$VALUES,4,5)) DEM_DATA$Number <- parse_number(substr(DEM_DATA$VALUES,6,14)) DEM_DATA <- DEM_DATA %>% select(-ID,-VALUES) DEM_DATA <- DEM_DATA %>% group_by(Year,County,Sex,Age) %>% summarize(Number=sum(Number)) %>% ungroup()#Aggregate to sex and age level #The Wyoming census data seems newer than this data set from SEER cancer data source. Drop any of these records that overlap with the Wyoming data before merging. Arrange so the column order is the same between the two data sets, so they can be easily bound together. DEM_DATA <- pivot_wider(DEM_DATA,names_from=Sex,values_from=Number) %>% rename(Num_Female=Female,Num_Male=Male) %>% select(colnames(DEM_2020)) %>% filter(Year% unique DEM_DATA <- rbind(DEM_2020,DEM_DATA) %>% ungroup %>% arrange(Year,Age) %>% unique %>% mutate(Region=County) %>% select(County,Region,Year,Age,Num_Male,Num_Female) #Add a region which in this case is just the county. For Lincoln cities and other areas are analysed ###Save demographic data set if(!exists("SAVE_DEMO_LOC")){SAVE_DEMO_LOC <-"./Data/Cleaned_Data/Demographic_Sex_Age_Data"} CSV_SAVE <- paste0(SAVE_DEMO_LOC,"/CSV") RDS_SAVE <- paste0(SAVE_DEMO_LOC,"/RDS") #Save files for all county demographics dir.create(CSV_SAVE , recursive = TRUE, showWarnings = FALSE) dir.create(RDS_SAVE , recursive = TRUE, showWarnings = FALSE) saveRDS(DEM_DATA,paste0(RDS_SAVE,"/All_Wyoming_Counties_Demographics.Rds" )) write_csv(DEM_DATA,paste0(CSV_SAVE,"/All_Wyoming_Counties_Demographics.csv" )) #Save a file that is just Lincoln County, this just speeds up code so you do not always need to filter for 'Lincoln' LIN_DEM <- DEM_DATA %>% filter(County=='Lincoln') saveRDS(LIN_DEM,paste0(RDS_SAVE,"/Full_Lincoln_County_Demographics.Rds" )) write_csv(LIN_DEM,paste0(CSV_SAVE,"/Full_Lincoln_County_Demographics.csv" ))