330 lines
18 KiB
R
330 lines
18 KiB
R
#################IMPORTANT!!!! CLEAN UP DATA SCRIPT FOR USE
|
|
#############################Clean up scirpt folders, simulations of deaths should be seperated from this file. Death rate simulation is complicated and should be commented, and turned into a seperate script.
|
|
##############WORKING ON REGRESSION OF BIRTHS FOR PREDICTIONS OF GROWTH, THINK ABOUT GOING FROM COUNTY TO CITY, SEPERATE REGRESION FROM DATA COLLOECTION
|
|
library(rvest)
|
|
library(tidyverse)
|
|
library(readxl)
|
|
library(parallel)
|
|
cl <- detectCores()-1
|
|
#setwd("../")
|
|
########County, Death, Birth and Migration Data
|
|
#Data found on the page http://eadiv.state.wy.us/pop/
|
|
PAGE <- read_html("http://eadiv.state.wy.us/pop/BirthDeathMig.htm")
|
|
NODE <- html_element(PAGE ,"table")
|
|
TBL <- html_table(NODE)
|
|
|
|
ST <- which(toupper(TBL$X1)=="ALBANY")
|
|
END <- which(toupper(TBL$X1)=="TOTAL")
|
|
TYPES <- TBL[ST-2,1]
|
|
|
|
ST_YEAR <- 1971
|
|
ALL_DATA <- list()
|
|
TBL <- TBL[,c(1,which(!is.na(as.numeric(TBL[ST[1],]))))]
|
|
TBL <- TBL[,-ncol(TBL)]
|
|
colnames(TBL) <- c("County",(ST_YEAR:(ST_YEAR+ncol(TBL)-1)))
|
|
TBL$Type <- NA
|
|
for(i in 1:length(ST)){
|
|
TBL[ST[i]:END[i],"Type"]<- as.character(TYPES[i,1])
|
|
}
|
|
TBL[ST[2]:END[2],"Type"] <- as.character(TYPES[2,1])
|
|
TBL$Type
|
|
TBL <- TBL %>% filter(!is.na(Type)) %>% select(County,Type,everything())
|
|
GROUP <- colnames(TBL)[-1:-2]
|
|
Data <- pivot_longer(TBL,all_of(GROUP),names_to="Year",values_to="Pop_Change")
|
|
Data$County <- ifelse(toupper(Data$County)=="TOTAL","Wyoming",Data$County)
|
|
WY_COUNTY_DATA_SET <- pivot_wider(Data,names_from=Type,values_from=Pop_Change) %>% rename("Migration"=`Net Migration`) %>% mutate(Year=as.integer(Year),Births=parse_number(Births),Deaths=parse_number(Deaths),Migration=parse_number(Migration))
|
|
|
|
########################City and County Population Data 2020 to 2024
|
|
PAGE <- read_html('http://eadiv.state.wy.us/pop/Place-24EST.htm')
|
|
NODE <- html_element(PAGE ,"table")
|
|
TBL <- html_table(NODE)
|
|
|
|
ST <- which(toupper(TBL$X1)==toupper("Albany County"))
|
|
END <- which(toupper(TBL$X1)==toupper("Balance of Weston County"))
|
|
|
|
#More years than are pulled are listed to make more generic
|
|
COLUMNS <- c(1,which(TBL[ST-2,] %in% 1970:2025))
|
|
NAMES <- TBL[4,COLUMNS][-1]
|
|
TBL <- TBL[ST:END,COLUMNS ]
|
|
colnames(TBL) <- c("County",NAMES)
|
|
|
|
TBL <- pivot_longer(TBL,all_of(colnames(TBL)[-1]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=parse_number(Population))
|
|
TBL$County <- gsub(" "," ",gsub("\n","",gsub("\r","",TBL %>% pull(County))))
|
|
|
|
COUNTY_POP<- TBL[grep("COUNTY",TBL %>% pull(County),ignore.case=TRUE),]
|
|
COUNTY_POP<- COUNTY_POP[grep("Balance",COUNTY_POP%>% pull(County),invert=TRUE,ignore.case=TRUE),]
|
|
COUNTY_POP$County <- gsub(" ","_",gsub(" County","",COUNTY_POP$County))
|
|
CITY_POP <- TBL[sort(c(grep("County",TBL %>% pull(County),invert=TRUE,ignore.case=TRUE),grep("Balance",TBL %>% pull(County),ignore.case=TRUE))),]
|
|
CITY_POP$County <- gsub(" ","_",gsub("Balance of","Unincorporated",gsub(" County","",gsub(" city","",gsub(" town","",CITY_POP$County,ignore.case=TRUE),ignore.case=TRUE),ignore.case=TRUE),ignore.case=TRUE))
|
|
CITY_POP <- CITY_POP %>% rename("City"=County)
|
|
########################City Population Data 2010 to 2020
|
|
PAGE <- read_html('http://eadiv.state.wy.us/pop/sub-est11-19.htm')
|
|
NODE <- html_element(PAGE ,"table")
|
|
TBL <- html_table(NODE)
|
|
ST <- which(toupper(TBL$X1)==toupper("Afton town, Wyoming"))
|
|
END <- which(toupper(TBL$X1)==toupper("Yoder town, Wyoming"))
|
|
#More years than are pulled are listed to make more generic
|
|
COLUMNS <- c(1,which(TBL[ST-1,] %in% 1970:2025))
|
|
NAMES <- TBL[3,COLUMNS][-1]
|
|
TBL <- TBL[ST:END,COLUMNS ]
|
|
colnames(TBL) <- c("City",NAMES)
|
|
TBL <- pivot_longer(TBL,all_of(colnames(TBL)[-1]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=parse_number(Population))
|
|
TBL$City <- gsub(" ","_",gsub(" $","",gsub("\r|\n| Wyoming|,| town| city","",TBL$City,ignore.case=TRUE)))
|
|
CITY_POP <- rbind(TBL,CITY_POP)
|
|
########################County Population Data 2010 to 2020
|
|
PAGE <- read_html('http://eadiv.state.wy.us/pop/ctyest11-19.htm')
|
|
NODE <- html_element(PAGE ,"table")
|
|
TBL <- html_table(NODE)
|
|
ST <- grep("Albany",TBL$X1)
|
|
END <- grep("Weston",TBL$X1)
|
|
#More years than are pulled are listed to make more generic
|
|
COLUMNS <- c(1,which(TBL[ST-2,] %in% 1970:2025))
|
|
NAMES <- TBL[3,COLUMNS][-1]
|
|
TBL <- TBL[ST:END,COLUMNS ]
|
|
colnames(TBL) <- c("County",NAMES)
|
|
TBL <- pivot_longer(TBL,all_of(colnames(TBL)[-1]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=parse_number(Population))
|
|
TBL$County <- gsub(" ","_",gsub(" "," ",gsub(" $","",gsub("\r|\n| Wyoming|,| town| city| County|\\.","",TBL$County,ignore.case=TRUE))))
|
|
|
|
COUNTY_POP <- rbind(TBL,COUNTY_POP)
|
|
|
|
########################County and City Population Data 2000 to 2010
|
|
PAGE <- read_html('http://eadiv.state.wy.us/pop/sub-est01-09.htm')
|
|
NODE <- html_element(PAGE ,"table")
|
|
TBL <- html_table(NODE)
|
|
|
|
ST <- which(toupper(TBL$X1)==toupper("Albany County"))
|
|
END <- which(toupper(TBL$X1)==toupper("Balance of Weston County"))
|
|
|
|
#More years than are pulled are listed to make more generic
|
|
COLUMNS <- c(1,which(TBL[ST-4,] %in% 1970:2025))
|
|
NAMES <- TBL[4,COLUMNS][-1]
|
|
TBL <- TBL[ST:END,COLUMNS ]
|
|
colnames(TBL) <- c("County",NAMES)
|
|
TBL <- pivot_longer(TBL,all_of(colnames(TBL)[-1]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=parse_number(Population))
|
|
TBL$County <- gsub(" "," ",gsub("\n","",gsub("\r","",TBL %>% pull(County))))
|
|
|
|
COUNTY_TBL <- TBL[grep("COUNTY",TBL %>% pull(County),ignore.case=TRUE),]
|
|
COUNTY_TBL <-COUNTY_TBL[grep("Balance",COUNTY_TBL%>% pull(County),invert=TRUE,ignore.case=TRUE),]
|
|
COUNTY_TBL$County <-gsub("_(pt.)","", gsub(" ","_",gsub(" County","",COUNTY_TBL$County)))
|
|
|
|
CITY_TBL <- TBL[sort(c(grep("County",TBL %>% pull(County),invert=TRUE,ignore.case=TRUE),grep("Balance",TBL %>% pull(County),ignore.case=TRUE))),]
|
|
CITY_TBL$County <- gsub(" ","_",gsub("Balance of","Unincorporated",gsub(" County","",gsub(" city","",gsub(" town","",CITY_TBL$County,ignore.case=TRUE),ignore.case=TRUE),ignore.case=TRUE),ignore.case=TRUE))
|
|
CITY_TBL <- CITY_TBL %>% rename("City"=County)
|
|
CITY_POP <- rbind(CITY_TBL,CITY_POP)
|
|
#Cleanup names
|
|
CITY_POP$City <- gsub("LaGrange","La_Grange",CITY_POP$City)
|
|
COUNTY_POP <- rbind(COUNTY_TBL,COUNTY_POP)
|
|
####################County and City Population Data for 1990-2000
|
|
if(!file.exists("./Data/Pop_1990s.xls")){download.file('http://eadiv.state.wy.us/pop/c&sc90_00.xls',"./Data/Pop_1990s.xls")}
|
|
TEMP <- read_xls("Data/Pop_1990s.xls",skip=2)[-1:-4,]
|
|
colnames(TEMP)[1] <- "County"
|
|
tail(TEMP)
|
|
TEMP <- TEMP[1:which(TEMP[,1]=="Wind River Res."),]
|
|
|
|
TEMP <- pivot_longer(TEMP,all_of(colnames(TEMP)[-1]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=as.numeric(Population))
|
|
TEMP_COUNTY <- TEMP[grepl("Cnty",TEMP %>% pull(County),ignore.case=TRUE),]
|
|
TEMP_COUNTY$County <- gsub(" ","_",gsub(" "," ",gsub(" Cnty","",TEMP_COUNTY$County,ignore.case=TRUE)))
|
|
|
|
TEMP_CITY <- TEMP[grep("Cnty",TEMP %>% pull(County),ignore.case=TRUE,invert=TRUE),]
|
|
TEMP_CITY$County <- gsub("E_Therm","East_Therm",gsub(" ","_",gsub(" ","",TEMP_CITY %>% pull(County))))
|
|
TEMP_CITY <- TEMP_CITY %>% rename(City=County)
|
|
TEMP_CITY %>% pull(City) %>% unique %>% sort
|
|
CITY_POP <- rbind(TEMP_CITY,CITY_POP)
|
|
CITY_POP %>% pull(City) %>% unique %>% sort
|
|
COUNTY_POP <- rbind(TEMP_COUNTY,COUNTY_POP)
|
|
#ggplot(aes(x=Year,y=Population,group=County,color=County),data=COUNTY_POP)+geom_line()
|
|
try(rm(TEMP_CITY,TEMP_COUNTY,TEMP))
|
|
####################County and City Population Data for 1980-1990
|
|
if(!file.exists("./Data/Pop_1980s.xls")){download.file('http://eadiv.state.wy.us/pop/C&SC8090.xls',"./Data/Pop_1980s.xls")}
|
|
TEMP <- read_xls("Data/Pop_1980s.xls",skip=2)[-1:-4,]
|
|
colnames(TEMP)[1] <- "County"
|
|
TEMP <- TEMP[2:which(TEMP[,1]=="Upton"),1:(min(which(is.na(TEMP[2,])))-1)]
|
|
TEMP <- pivot_longer(TEMP,all_of(colnames(TEMP)[-1]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=as.numeric(Population))
|
|
|
|
TEMP_COUNTY <- TEMP[grepl("Cty",TEMP %>% pull(County),ignore.case=TRUE),]
|
|
TEMP_COUNTY$County <- gsub(" ","_",gsub(" "," ",gsub(" Cty","",TEMP_COUNTY$County,ignore.case=TRUE)))
|
|
|
|
TEMP_CITY <- TEMP[grep("Cty",TEMP %>% pull(County),ignore.case=TRUE,invert=TRUE),]
|
|
TEMP_CITY$County <-gsub("Frannie_","Frannie", gsub("Mtn._View","Mountain_View",gsub("E._Therm","East_Therm",gsub(" ","_",gsub(" ","",TEMP_CITY %>% pull(County))))))
|
|
TEMP_CITY <- TEMP_CITY %>% rename(City=County)
|
|
|
|
CITY_POP <- rbind(TEMP_CITY,CITY_POP)
|
|
COUNTY_POP <- rbind(TEMP_COUNTY,COUNTY_POP)
|
|
#ggplot(aes(x=Year,y=Population,group=County,color=County),data=COUNTY_POP)+geom_line()
|
|
try(rm(TEMP_CITY,TEMP_COUNTY,TEMP))
|
|
|
|
####################County Population Data for 1980-1990
|
|
if(!file.exists("./Data/Pop_1970s.xls")){download.file('http://eadiv.state.wy.us/pop/Cnty7080.xls',"./Data/Pop_1970s.xls")}
|
|
TEMP <- read_xls("Data/Pop_1970s.xls",skip=2)[-1:-4,]
|
|
colnames(TEMP)[1] <- "County"
|
|
TEMP <- TEMP[1:which(TEMP[,1]=="Weston"),]
|
|
TEMP <- pivot_longer(TEMP,all_of(colnames(TEMP)[-1]),names_to="Year",values_to="Population") %>% mutate(Year=as.integer(Year),Population=as.numeric(Population))
|
|
|
|
TEMP$County <- gsub(" ","_",TEMP$County)
|
|
|
|
COUNTY_POP <- rbind(TEMP,COUNTY_POP)
|
|
#ggplot(aes(x=Year,y=Population,group=County,color=County),data=COUNTY_POP)+geom_line()
|
|
try(rm(TEMP))
|
|
###########Old data addtion:Period Ends in 1970
|
|
#See in part http://eadiv.state.wy.us/demog_data/cntycity_hist.htm
|
|
LN_OLD <- c(12487,10894,10286,9023,9018,8640) #Missing in 1910
|
|
Year <- seq(1920,1970,by=10)
|
|
TEMP <- cbind(Year,rep("Lincoln",6),LN_OLD)
|
|
colnames(TEMP ) <- c("Year","County","Population")
|
|
TEMP <- as_tibble(TEMP)
|
|
COUNTY_POP <- rbind(TEMP,COUNTY_POP) %>% arrange(County,Year)
|
|
|
|
|
|
KEM_OLD <- c(843,1517,1884,2026,1667,2028,2292) #1910 forward until 1970
|
|
Year <- seq(1910,1970,by=10)
|
|
TEMP <- cbind(Year,rep("kemmerer",7),KEM_OLD)
|
|
colnames(TEMP ) <- c("Year","City","Population")
|
|
TEMP <- as_tibble(TEMP)
|
|
CITY_POP <- rbind(TEMP,CITY_POP)
|
|
|
|
|
|
DIAMOND_OLD <- c(696,726,812,586,415,398,485)
|
|
TEMP <- cbind(Year,rep("Diamondvile",7),DIAMOND_OLD)
|
|
colnames(TEMP ) <- c("Year","City","Population")
|
|
TEMP <- as_tibble(TEMP)
|
|
CITY_POP <- rbind(TEMP,CITY_POP) %>% arrange(City,Year)
|
|
#Add Other Data
|
|
COUNTY_POP <- COUNTY_POP %>% mutate(Year=as.integer(Year))
|
|
WY_COUNTY_DATA_SET <- COUNTY_POP %>% left_join(WY_COUNTY_DATA_SET ) %>% mutate(Population=as.numeric(Population))
|
|
###Save Population Results
|
|
write_csv(CITY_POP,file="./Data/Cleaned_Data/Wyoming_City_Population.csv")
|
|
write_csv(WY_COUNTY_DATA_SET ,file="./Data/Cleaned_Data/Wyoming_County_Population.csv")
|
|
saveRDS(CITY_POP,file="./Data/Cleaned_Data/Wyoming_City_Population.Rds")
|
|
saveRDS(WY_COUNTY_DATA_SET ,file="./Data/Cleaned_Data/Wyoming_County_Population.Rds")
|
|
|
|
|
|
###################Demographics
|
|
if(!file.exists("./Data/Demo_Single_Year_2020s.xls")){download.file('http://eadiv.state.wy.us/Pop/CO_SYASEX24.xlsx',"./Data/Demo_Single_Year_2020s.xls")}
|
|
TEMP <- read_xlsx("./Data/Demo_Single_Year_2020s.xls",skip=2)[,-1]
|
|
TEMP <- TEMP[1:(min(which(is.na(TEMP[,1])))-1),]
|
|
TEMP$YEAR <- year(as.Date(substr((TEMP$YEAR),1,8),format="%m/%d/%Y"))
|
|
colnames(TEMP) <- c("County","Year","Age","Number","Num_Male","Num_Female")
|
|
TEMP$County <- gsub(" County","",TEMP$County,ignore.case=TRUE)
|
|
DEM_2020 <- TEMP %>% select(-Number)
|
|
###Demogrpahics all
|
|
DEM_DATA <- read_delim("Data/County_Demographics_Census/wy.1969_2023.singleages.through89.90plus.txt",delim=" ",col_names=c("ID","VALUES"),col_types=list('c','c'))
|
|
DEM_DATA$Year <- as.integer(substr(DEM_DATA$ID,1,4))
|
|
DEM_DATA$fips<- substr(DEM_DATA$ID,7,11)
|
|
COUNTY_LIST <- read_csv("https://github.com/kjhealy/fips-codes/raw/refs/heads/master/county_fips_master.csv",col_types=list('c','c')) %>% filter(state_abbr=="WY") %>% select(fips,County=county_name) %>% mutate(County=gsub(" ","_",gsub(" County","",County,ignore.case=TRUE)))
|
|
COUNTY_LIST
|
|
DEM_DATA <- DEM_DATA %>% left_join(COUNTY_LIST) %>% select(-fips)
|
|
#16=3
|
|
DEM_DATA$Sex <- ifelse(substr(DEM_DATA$VALUES,3,3)==1,"Male","Female")
|
|
DEM_DATA$Age <- parse_number(substr(DEM_DATA$VALUES,4,5))
|
|
DEM_DATA$Number <- parse_number(substr(DEM_DATA$VALUES,6,14))
|
|
DEM_DATA <- DEM_DATA %>% select(-ID,-VALUES)
|
|
|
|
DEM_DATA <- DEM_DATA %>% group_by(Year,County,Sex,Age) %>% summarize(Number=sum(Number)) %>% ungroup()#Aggregate to sex and age level
|
|
#The Wyoming census data seems newer than this data set from SEER cancer data source. Drop any of these records that overlap with the Wyoming data before merging. Arrange so the colomn order is the same between the two data sets, so they can be easily bound together.
|
|
DEM_DATA <- pivot_wider(DEM_DATA,names_from=Sex,values_from=Number) %>% rename(Num_Female=Female,Num_Male=Male) %>% select(colnames(DEM_2020)) %>% filter(Year<min(DEM_2020$Year)) %>% unique
|
|
|
|
#########################################Mortality Rate
|
|
GET_MORTALITY_DATA <- function(FILE,SEX,LOWER_AGE,UPPER_AGE){
|
|
#Create clean moratlity rate data
|
|
#Data gathered from https://hdpulse.nimhd.nih.gov/data-portal/mortality/table?cod=247&cod_options=cod_15&ratetype=aa&ratetype_options=ratetype_2&race=00&race_options=race_6&sex=2&sex_options=sex_3&age=177&age_options=age_11&ruralurban=0&ruralurban_options=ruralurban_3&yeargroup=5&yeargroup_options=year5yearmort_1&statefips=56&statefips_options=area_states&county=56000&county_options=counties_wyoming&comparison=counties_to_us&comparison_options=comparison_counties&radio_comparison=areas&radio_comparison_options=cods_or_areas
|
|
|
|
NAMES <- c("County","FIPS","Death_Rate","Lower_Rate","Upper_Rate","Deaths","Trend_Category","Trend","Lower_Trend","Upper_Trend")
|
|
DF <- read_csv(FILE,skip=5,col_names=NAMES,col_types=list('c',"i",'d','d','d','d','c','d','d','d')) %>% filter(grepl("County|Wyoming",County)|County=="United States") %>% mutate(Rate_SD=(Upper_Rate-Lower_Rate)/(2*1.96),Trend_SD=(Upper_Trend-Lower_Trend)/(2*1.96)) %>% select(County,Death_Rate,Rate_SD,Trend,Trend_SD)
|
|
DF$County <- gsub(" County","",DF$County,ignore.case=TRUE)
|
|
DF[,-1] <- DF[,-1]/100000
|
|
WYOMING_TREND <- pull(DF[DF$County=="Wyoming",],"Trend")
|
|
US_TREND <- pull(DF[DF$County=="United States",],"Trend")
|
|
|
|
WYOMING_RATE <- pull(DF[DF$County=="Wyoming",],"Death_Rate")
|
|
US_RATE <- pull(DF[DF$County=="United States",],"Death_Rate")
|
|
|
|
DF$Imparted_Trend <- FALSE
|
|
if(is.na(WYOMING_TREND)){
|
|
DF[1,4:5] <- DF[2,4:5]
|
|
DF[1,6] <- TRUE
|
|
}
|
|
|
|
DF$Imparted_Rate <- FALSE
|
|
if(is.na(WYOMING_RATE)){
|
|
DF[1,4:5] <- DF[2,2:3]
|
|
DF[1,6] <- TRUE
|
|
}
|
|
WYOMING_BASELINE_TREND <-cbind (DF[1,4:5] ,TRUE)
|
|
WYOMING_BASELINE_RATE <-DF[1,2:3]
|
|
|
|
for(i in 3:nrow(DF)){
|
|
#Impart any missing trends based on higher levels
|
|
if(is.na(pull(DF[i,],"Trend"))){ DF[i,4:6] <- WYOMING_BASELINE_TREND}
|
|
#Impart any missing death rates based on higher levels
|
|
if(is.na(pull(DF[i,],"Death_Rate"))){
|
|
DF[i,2:3] <- WYOMING_BASELINE_RATE
|
|
DF[i,"Imparted_Rate"] <- TRUE
|
|
}
|
|
}
|
|
DF$Sex <- SEX
|
|
DF$Min_Age <- LOWER_AGE
|
|
DF$Max_Age <- UPPER_AGE
|
|
DF <- DF %>% select(County,Sex,Min_Age,Max_Age,Death_Rate,Rate_SD,Imparted_Rate,everything())
|
|
return(DF)
|
|
}
|
|
MORTALITY_DATA_ALL %>% filter(Min_Age>0)
|
|
MORTALITY_DATA_ALL <- rbind(
|
|
GET_MORTALITY_DATA("Data/Mortality_Rates/Female/A_Under1.csv","Female",0,0),
|
|
GET_MORTALITY_DATA("Data/Mortality_Rates/Female/B_1_9.csv","Female",1,9),
|
|
GET_MORTALITY_DATA("Data/Mortality_Rates/Female/C_10_19.csv","Female",10,19),
|
|
GET_MORTALITY_DATA("Data/Mortality_Rates/Female/D_20_39.csv","Female",20,39),
|
|
GET_MORTALITY_DATA("Data/Mortality_Rates/Female/E_40_64.csv","Female",40,64),
|
|
GET_MORTALITY_DATA("Data/Mortality_Rates/Female/G_65_74.csv","Female",65,74),
|
|
GET_MORTALITY_DATA("Data/Mortality_Rates/Female/H_75_84.csv","Female",75,84),
|
|
GET_MORTALITY_DATA("Data/Mortality_Rates/Female/I_85+.csv","Female",85,Inf),
|
|
GET_MORTALITY_DATA("Data/Mortality_Rates/Male/A_Under1.csv","Male",0,0),
|
|
GET_MORTALITY_DATA("Data/Mortality_Rates/Male/B_1_9.csv","Male",1,9),
|
|
GET_MORTALITY_DATA("Data/Mortality_Rates/Male/C_10_19.csv","Male",10,19),
|
|
GET_MORTALITY_DATA("Data/Mortality_Rates/Male/D_20_39.csv","Male",20,39),
|
|
GET_MORTALITY_DATA("Data/Mortality_Rates/Male/E_40_64.csv","Male",40,64),
|
|
GET_MORTALITY_DATA("Data/Mortality_Rates/Male/G_65_74.csv","Male",65,74),
|
|
GET_MORTALITY_DATA("Data/Mortality_Rates/Male/H_75_84.csv","Male",75,84),
|
|
GET_MORTALITY_DATA("Data/Mortality_Rates/Male/I_85+.csv","Male",85,Inf)
|
|
|
|
)
|
|
LIN_MORTALITY <- MORTALITY_DATA_ALL %>% filter(County=="Lincoln")
|
|
LIN_DEM <- DEM_2020 %>% filter(County=='Lincoln')
|
|
CURRENT <- LIN_DEM[1,]
|
|
TEMP <- LIN_MORTALITY %>% filter(Sex=="Male",Min_Age<=0,Max_Age>=0) %>% select(Death_Rate,Rate_SD,Trend,Trend_SD)
|
|
|
|
TREND_SIM <- function(DEMO_GROUP,NUM_YEARS_FORWARD=50,RAND_SEED=NA){
|
|
if(!is.na(RAND_SEED)){set.seed(RAND_SEED)}
|
|
RES <- c()
|
|
C_YEAR_VALUE <- 0
|
|
for(i in 1:NUM_YEARS_FORWARD){
|
|
RES[length(RES)+1] <- C_YEAR_VALUE
|
|
C_YEAR_VALUE <- rnorm(1,mean=DEMO_GROUP$Trend,sd=DEMO_GROUP$Trend_SD)+C_YEAR_VALUE
|
|
}
|
|
return(RES)
|
|
}
|
|
DEATH_RATE_DEMO_GROUP_SIM <- function(DEMO_GROUP,NUM_YEARS_FORWARD=50,RAND_SEED=NA){
|
|
if(!is.na(RAND_SEED)){set.seed(RAND_SEED)}
|
|
return(rnorm(NUM_YEARS_FORWARD,mean=DEMO_GROUP$Death_Rate,sd=DEMO_GROUP$Rate_SD))+TREND_SIM(DEMO_GROUP,NUM_YEARS_FORWARD,RAND_SEED)
|
|
}
|
|
|
|
CREATE_DEATH_RATE_TREND_SIMULATION <- function(NUM_SIMS,NUM_YEARS_TO_SIMULATE,POP_DATA=LIN_MORTALITY,SAVE_LOC="./Data/Simulated_Data_Sets/MORTALITY_MONTE_CARLO.Rds",RERUN=FALSE){
|
|
if(RERUN | !file.exists(SAVE_LOC)){
|
|
SINGLE_GROUP_SIM_DEATH_RATE <- function(x){sapply(1:NUM_SIMS,DEATH_RATE_DEMO_GROUP_SIM,DEMO_GROUP=POP_DATA[x,],NUM_YEARS_FORWARD=NUM_YEARS_TO_SIMULATE)}
|
|
TEMP <- mclapply(1:nrow(POP_DATA),SINGLE_GROUP_SIM_DEATH_RATE, mc.cores = detectCores()-1 )
|
|
saveRDS(TEMP,SAVE_LOC,compress=FALSE)
|
|
} else{print("Death rate of population groups, by year simulation already saved on file. Skipping simulation.")}
|
|
}
|
|
|
|
CREATE_DEATH_RATE_TREND_SIMULATION(1000000,50,RERUN=TRUE)
|
|
##[[12]] Index number of sex,age group, 1:5 range of years to extract (present day to five years out), 220 simulation number 220
|
|
readRDS("./Data/Simulated_Data_Sets/MORTALITY_MONTE_CARLO.Rds")[[12]][1:5,220]
|
|
TEMP <- readRDS("./Data/Simulated_Data_Sets/MORTALITY_MONTE_CARLO.Rds")
|
|
sapply(1:16,function(x){TEMP[[x]][1,330]})
|
|
rm(TEMP)
|
|
gc()
|
|
#######################3###Model Population Trends
|
|
library(fixest)
|
|
WY_COUNTY_DATA_SET
|
|
REG_DATA <- WY_COUNTY_DATA_SET %>% mutate(LN=ifelse(County=="Lincoln",1,0),Pop=Population-Births ) %>% left_join(REG_DATA <- DEM_DATA %>% filter(Age<=35,Age>18) %>% group_by(County,Year) %>% summarize(Child_Bearing=sum(Num_Female)) %>% ungroup)
|
|
etable(feols(log(Births)~log(Pop)+log(Child_Bearing)*LN+County|Year,cluster~County,data=REG_DATA))
|