Worked on Data proc script

This commit is contained in:
Alex 2025-10-07 17:14:51 -06:00
parent 5894881103
commit ee74049143
7 changed files with 6596 additions and 2 deletions

Binary file not shown.

File diff suppressed because it is too large Load Diff

Binary file not shown.

File diff suppressed because it is too large Load Diff

Binary file not shown.

BIN
Data/Pop_1970s.xls Normal file

Binary file not shown.

View File

@ -109,7 +109,7 @@ CITY_POP <- rbind(CITY_TBL,CITY_POP)
#Cleanup names
CITY_POP$City <- gsub("LaGrange","La_Grange",CITY_POP$City)
COUNTY_POP <- rbind(COUNTY_TBL,COUNTY_POP)
####################County and City Population DAta for 1990-2000
####################County and City Population Data for 1990-2000
if(!file.exists("./Data/Pop_1990s.xls")){download.file('http://eadiv.state.wy.us/pop/c&sc90_00.xls',"./Data/Pop_1990s.xls")}
TEMP <- read_xls("Data/Pop_1990s.xls",skip=2)[-1:-4,]
colnames(TEMP)[1] <- "County"
@ -129,7 +129,7 @@ CITY_POP %>% pull(City) %>% unique %>% sort
COUNTY_POP <- rbind(TEMP_COUNTY,COUNTY_POP)
#ggplot(aes(x=Year,y=Population,group=County,color=County),data=COUNTY_POP)+geom_line()
try(rm(TEMP_CITY,TEMP_COUNTY,TEMP))
####################County and City Population DAta for 1980-1990
####################County and City Population Data for 1980-1990
if(!file.exists("./Data/Pop_1980s.xls")){download.file('http://eadiv.state.wy.us/pop/C&SC8090.xls',"./Data/Pop_1980s.xls")}
TEMP <- read_xls("Data/Pop_1980s.xls",skip=2)[-1:-4,]
colnames(TEMP)[1] <- "County"
@ -148,7 +148,99 @@ COUNTY_POP <- rbind(TEMP_COUNTY,COUNTY_POP)
#ggplot(aes(x=Year,y=Population,group=County,color=County),data=COUNTY_POP)+geom_line()
try(rm(TEMP_CITY,TEMP_COUNTY,TEMP))
####################County Population Data for 1980-1990
if(!file.exists("./Data/Pop_1970s.xls")){download.file('http://eadiv.state.wy.us/pop/Cnty7080.xls',"./Data/Pop_1970s.xls")}
TEMP <- read_xls("Data/Pop_1970s.xls",skip=2)[-1:-4,]
colnames(TEMP)[1] <- "County"
TEMP <- TEMP[1:which(TEMP[,1]=="Weston"),]
TEMP <- pivot_longer(TEMP,all_of(colnames(TEMP)[-1]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=as.numeric(Population))
TEMP$County <- gsub(" ","_",TEMP$County)
COUNTY_POP <- rbind(TEMP,COUNTY_POP)
#ggplot(aes(x=Year,y=Population,group=County,color=County),data=COUNTY_POP)+geom_line()
try(rm(TEMP))
###########Old data addtion:Period Ends in 1970
#See in part http://eadiv.state.wy.us/demog_data/cntycity_hist.htm
LN_OLD <- c(12487,10894,10286,9023,9018,8640) #Missing in 1910
Year <- seq(1920,1970,by=10)
TEMP <- cbind(Year,rep("Lincoln",6),LN_OLD)
colnames(TEMP ) <- c("Year","County","Population")
TEMP <- as_tibble(TEMP)
COUNTY_POP <- rbind(TEMP,COUNTY_POP) %>% arrange(County,Year)
KEM_OLD <- c(843,1517,1884,2026,1667,2028,2292) #1910 forward until 1970
Year <- seq(1910,1970,by=10)
TEMP <- cbind(Year,rep("kemmerer",7),KEM_OLD)
colnames(TEMP ) <- c("Year","City","Population")
TEMP <- as_tibble(TEMP)
CITY_POP <- rbind(TEMP,CITY_POP)
DIAMOND_OLD <- c(696,726,812,586,415,398,485)
TEMP <- cbind(Year,rep("Diamondvile",7),DIAMOND_OLD)
colnames(TEMP ) <- c("Year","City","Population")
TEMP <- as_tibble(TEMP)
CITY_POP <- rbind(TEMP,CITY_POP) %>% arrange(City,Year)
#Add Other Data
COUNTY_POP <- COUNTY_POP %>% mutate(Year=as.integer(Year))
WY_COUNTY_DATA_SET <- COUNTY_POP %>% left_join(WY_COUNTY_DATA_SET )
###Save Population Results
write_csv(CITY_POP,file="./Data/Cleaned_Data/Wyoming_City_Population.csv")
write_csv(WY_COUNTY_DATA_SET ,file="./Data/Cleaned_Data/Wyoming_County_Population.csv")
saveRDS(CITY_POP,file="./Data/Cleaned_Data/Wyoming_City_Population.Rds")
saveRDS(WY_COUNTY_DATA_SET ,file="./Data/Cleaned_Data/Wyoming_County_Population.Rds")
###################Demographics
if(!file.exists("./Data/Demo_Single_Year_2020s.xls")){download.file('http://eadiv.state.wy.us/Pop/CO_SYASEX24.xlsx',"./Data/Demo_Single_Year_2020s.xls")}
TEMP <- read_xlsx("./Data/Demo_Single_Year_2020s.xls",skip=2)[,-1]
TEMP <- TEMP[1:(min(which(is.na(TEMP[,1])))-1),]
TEMP$YEAR <- year(as.Date(substr((TEMP$YEAR),1,8),format="%m/%d/%Y"))
DEM_2020_SINGLE <- TEMP
###Standard Population to adjust FROM
#https://hdpulse.nimhd.nih.gov/data-portal/mortality/table?cod=247&cod_options=cod_15&ratetype=aa&ratetype_options=ratetype_2&race=00&race_options=race_6&sex=2&sex_options=sex_3&age=001&age_options=age_11&ruralurban=0&ruralurban_options=ruralurban_3&yeargroup=5&yeargroup_options=year5yearmort_1&statefips=56&statefips_options=area_states&county=56000&county_options=counties_wyoming&comparison=counties_to_us&comparison_options=comparison_counties&radio_comparison=areas&radio_comparison_options=cods_or_areas
PAGE <- read_html("https://seer.cancer.gov/stdpopulations/stdpop.20ages.html")
NODE <- html_element(PAGE ,"table")
TBL <- html_table(NODE)
TBL <- TBL[,1:2]
colnames(TBL) <- c("Age_Group","Population")
DEM_2020_SINGLE
####Demographics 2010-2020
PAGE <- read_html('http://eadiv.state.wy.us/Pop/CO-AS10-20.htm')
NODE <- html_element(PAGE ,"table")
TBL <- html_table(NODE)
NAMES <- gsub(" ","_",TBL[5,] %>% t)
colnames(TBL ) <- NAMES
TBL <- TBL[-1:-5,c(-4)]
TBL$County <- gsub(" ","_",gsub(" "," ",gsub(" County","",TBL$County)))
TEMP <- pivot_longer(TBL,all_of(colnames(TBL)[-1:-3]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=parse_number(Population))
TEMP <- TEMP %>% unique
TEMP <- TEMP %>% filter(!Age_Group=="")
TEMP$Age_Group <- gsub(" ","_",TEMP$Age_Group)
DEM_2010 <- pivot_wider(TEMP,names_from=Age_Group,values_from=Population)
DEM_2010
####Demographics 2000-2010
PAGE <- read_html('http://eadiv.state.wy.us/Pop/CO-AS01-09.htm')
NODE <- html_element(PAGE ,"table")
TBL <- html_table(NODE)
TBL <- TBL[-1:-4,c(-4,-15,-16)]
colnames(TBL) <- c("County","Sex","Age_Group",2000:2009)
TBL$County <- gsub(" ","_",gsub(" "," ",gsub(" County","",TBL$County)))
TEMP <- pivot_longer(TBL,all_of(colnames(TBL)[-1:-3]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=parse_number(Population))
TEMP <- TEMP %>% unique
TEMP <- TEMP %>% filter(!Age_Group=="")
TEMP$Age_Group <- gsub(" ","_",TEMP$Age_Group)
TEMP <- pivot_wider(TEMP,names_from=Age_Group,values_from=Population)
####Demographics 2000-2010