Working on arima sim with age splits
This commit is contained in:
parent
3bf85ce48b
commit
d1d4682b72
@ -10,4 +10,4 @@ Data is manually gathered from CDC WONDER data queries.
|
|||||||
4) The world pandemic uncertainty index as collected from FRED which is used to account for pandemics in the regression, making the age time series stationary.
|
4) The world pandemic uncertainty index as collected from FRED which is used to account for pandemics in the regression, making the age time series stationary.
|
||||||
|
|
||||||
These are used to project mortality trends over time. In the case of the age adjusted data, this has local trends that can be compared to the national average. The single age-sex data is only at a national level but can be imparted to local levels as a general trend in the distribution of deaths
|
These are used to project mortality trends over time. In the case of the age adjusted data, this has local trends that can be compared to the national average. The single age-sex data is only at a national level but can be imparted to local levels as a general trend in the distribution of deaths
|
||||||
--- Run Date: 2025-11-23 15:59:28 ---
|
--- Run Date: 2025-11-25 12:04:27 ---
|
||||||
|
|||||||
@ -3,183 +3,76 @@ library(fixest)
|
|||||||
library(forecast)
|
library(forecast)
|
||||||
|
|
||||||
########################################################ARIMA
|
########################################################ARIMA
|
||||||
|
DATA_WOMEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Mortality_Rate_and_Pandemic_Data_for_Regression.Rds") %>% filter(Sex=='Female')
|
||||||
|
DATA_MEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Mortality_Rate_and_Pandemic_Data_for_Regression.Rds") %>% filter(Sex=='Male')
|
||||||
|
|
||||||
MOD_WOMEN <- feols(Mort_Rate_US~L_Mort_Rate_US+Year+WUPI,REG_DATA %>% filter(Sex=='Female'))
|
#Create time series data
|
||||||
acf(resid(MOD_WOMEN))
|
|
||||||
MOD_WOMEN
|
|
||||||
MOD_MEN <- feols(Mort_Rate_US~L_Mort_Rate_US+Year+WUPI,REG_DATA %>% filter(Sex=='Male'))
|
|
||||||
###Lincoln
|
|
||||||
MOD_WOMEN <- feols(Mort_Rate_Lincoln~Mort_Rate_US,REG_DATA %>% filter(Sex=='Female'))
|
|
||||||
acf(resid(MOD_WOMEN))
|
|
||||||
MOD_WOMEN
|
|
||||||
MOD_MEN <- feols(Mort_Rate_US~L_Mort_Rate_US+Year+WUPI,REG_DATA %>% filter(Sex=='Male'))
|
|
||||||
|
|
||||||
acf(resid(MOD_MEN))
|
|
||||||
plot(resid(MOD_MEN))
|
|
||||||
plot(predict(MOD_MEN))
|
|
||||||
DATA_MEN <- REG_DATA %>% filter(Sex=='Male')
|
|
||||||
DATA_WOMEN <- REG_DATA %>% filter(Sex=='Female')
|
|
||||||
|
|
||||||
ST_YEAR <- DATA_MEN %>% pull(Year) %>% min
|
ST_YEAR <- DATA_MEN %>% pull(Year) %>% min
|
||||||
|
|
||||||
TS_MEN_US <- DATA_MEN %>% select(Mort_Rate_US) %>% ts(start=ST_YEAR,frequency=1)
|
TS_MEN_US <- DATA_MEN %>% select(Mort_Rate_US) %>% ts(start=ST_YEAR,frequency=1)
|
||||||
TS_MEN_LIN <- DATA_MEN %>% select(Mort_Rate_Lincoln) %>% ts(start=ST_YEAR,frequency=1)
|
TS_MEN_LIN <- DATA_MEN %>% select(Mort_Rate_Lincoln) %>% ts(start=ST_YEAR,frequency=1)
|
||||||
TS_PANDEMIC <- DATA_MEN %>% select(WUPI,L_WUPI) %>% ts(start=ST_YEAR,frequency=1)
|
|
||||||
|
|
||||||
TS_WOMEN_US <- DATA_WOMEN %>% select(Mort_Rate_US) %>% ts(start=ST_YEAR,frequency=1)
|
TS_WOMEN_US <- DATA_WOMEN %>% select(Mort_Rate_US) %>% ts(start=ST_YEAR,frequency=1)
|
||||||
TS_WOMEN_LIN <- DATA_WOMEN %>% select(Mort_Rate_Lincoln) %>% ts(start=ST_YEAR,frequency=1)
|
TS_WOMEN_LIN <- DATA_WOMEN %>% select(Mort_Rate_Lincoln) %>% ts(start=ST_YEAR,frequency=1)
|
||||||
|
|
||||||
TS_MEN_US_INV <- TS_MEN_US
|
TS_PANDEMIC <- DATA_MEN %>% select(WUPI,L_WUPI) %>% ts(start=ST_YEAR,frequency=1)
|
||||||
|
|
||||||
TS_WOMEN_US_INV <- TS_WOMEN_US
|
TS_WOMEN_US_INV <- TS_WOMEN_US
|
||||||
FORECAST_XREG <- TS_PANDEMIC
|
FORECAST_XREG <- TS_PANDEMIC
|
||||||
FORECAST_XREG[,] <- 0
|
FORECAST_XREG[,] <- 0
|
||||||
MOD_US_MEN <- auto.arima(TS_MEN_US,lambda=0,biasadj=TRUE,xreg=TS_PANDEMIC)
|
MOD_US_MEN <- auto.arima(TS_MEN_US,lambda=0,biasadj=TRUE,xreg=TS_PANDEMIC)
|
||||||
checkresiduals(MOD_US_MEN)
|
#checkresiduals(MOD_US_MEN)
|
||||||
|
|
||||||
plot(forecast(MOD_US_MEN,xreg=FORECAST_XREG))
|
#plot(forecast(MOD_US_MEN,xreg=FORECAST_XREG))
|
||||||
MOD_US_WOMEN <- auto.arima(TS_WOMEN_US,lambda=0,biasadj=TRUE,xreg=TS_PANDEMIC)
|
MOD_US_WOMEN <- auto.arima(TS_WOMEN_US,lambda=0,biasadj=TRUE,xreg=TS_PANDEMIC)
|
||||||
checkresiduals(MOD_US_WOMEN)
|
#checkresiduals(MOD_US_WOMEN)
|
||||||
plot(forecast(MOD_US_WOMEN,xreg=FORECAST_XREG))
|
#plot(forecast(MOD_US_WOMEN,xreg=FORECAST_XREG))
|
||||||
|
|
||||||
MOD_LIN <- auto.arima(TS_MEN_LIN,biasadj=TRUE,xreg=TS_MEN_US)
|
MOD_LIN_MEN <- auto.arima(TS_MEN_LIN,biasadj=TRUE,xreg=TS_MEN_US)
|
||||||
simulate(MOD_US_MEN,xreg=FORECAST_XREG)
|
MOD_LIN_WOMEN <- auto.arima(TS_WOMEN_LIN,biasadj=TRUE,xreg=TS_WOMEN_US)
|
||||||
plot(simulate(MOD_LIN,xreg=simulate(MOD_US_MEN,xreg=FORECAST_XREG)))
|
############################################Start Simualtion work
|
||||||
plot(forecast(MOD_LIN,xreg=simulate(MOD_US_MEN,xreg=FORECAST_XREG)))
|
SINGLE_MODS <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Time_Series_Regression.Rds")
|
||||||
################################Other work
|
MIN_VALUES <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Min_Values_for_Bounding_Predictions.Rds")
|
||||||
SINGLE_DATA <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_Single_Age_1999-2020.csv") %>% select(Year,Sex,Age=`Single-Year Ages Code`,Mortality_Rate=`Crude Rate`) %>% mutate(Mortality_Rate=parse_number(Mortality_Rate)) %>% filter(!is.na(Mortality_Rate))
|
MAX_VALUES <- readRDS("Data/Intermediate_Inputs/Mortality_Regression_Data/Single_Sex_Age_Max_Values_for_Bounding_Predictions.Rds")
|
||||||
|
BASELINE_AGE_ADJUST_MEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_Population_in_2000.Rds") %>% filter(Sex=='Male') %>% pull(Percent_of_Population)
|
||||||
OLDER <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_10_Year_Age_Groups_1999-2020.csv")%>% rename(Age=`Ten-Year Age Groups Code`,Mortality_Rate=`Crude Rate`) %>% filter(Age=='85+')%>% mutate(Age=85,Year=as.numeric(Year),Mortality_Rate=parse_number(Mortality_Rate)) %>% select(Year,Sex,Age,Mortality_Rate)%>% select(colnames(SINGLE_DATA))
|
BASELINE_AGE_ADJUST_WOMEN <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_Population_in_2000.Rds") %>% filter(Sex=='Female') %>% pull(Percent_of_Population)
|
||||||
SINGLE_DATA <- rbind(SINGLE_DATA,OLDER) %>% left_join(REG_DATA %>% select(Year,Sex,US_Rate=Mort_Rate_US))
|
#Adjust to just women popualtion (Not all population percent
|
||||||
SINGLE_DATA_PLAIN <- SINGLE_DATA
|
BASELINE_AGE_ADJUST_WOMEN <- BASELINE_AGE_ADJUST_WOMEN/ sum(BASELINE_AGE_ADJUST_WOMEN )
|
||||||
|
BASELINE_AGE_ADJUST_MEN <- BASELINE_AGE_ADJUST_MEN/ sum(BASELINE_AGE_ADJUST_MEN )
|
||||||
SINGLE_DATA <- SINGLE_DATA %>% group_by(Age,Sex) %>% mutate(INDEX=Mortality_Rate/sum(ifelse(Year==min(Year),Mortality_Rate,0)),US_INDEX=US_Rate/sum(ifelse(Year==min(Year),US_Rate,0))) %>% ungroup
|
|
||||||
ggplot(SINGLE_DATA,aes(x=Year,y=INDEX,group=Age,color=Age))+geom_point()+geom_smooth(aes(y=US_INDEX),se=FALSE,color="black",linetype=2,linewidth=2) + facet_grid(rows= vars(Sex))
|
|
||||||
|
|
||||||
ggplot(SINGLE_DATA,aes(x=Year,y=INDEX,group=Age,color=Age))+geom_point(size=0.5)+geom_line(aes(y=US_INDEX),color="black",linetype=2,linewidth=1) + facet_grid(rows= vars(Sex))
|
|
||||||
|
|
||||||
ggplot(SINGLE_DATA,aes(x=Year,y=Mortality_Rate,group=Age,color=Age))+geom_point() + scale_y_log10() + facet_grid(. ~ Sex)
|
|
||||||
|
|
||||||
ggplot(SINGLE_DATA,aes(x=Year,y=INDEX,group=Age,color=Age))+geom_smooth(se=FALSE,method="lm")+ scale_y_log10() + facet_grid(. ~ Sex)
|
|
||||||
COR_DAT <- SINGLE_DATA %>% arrange(Sex,Age) %>% select(-US_Rate,-US_INDEX,-INDEX) %>% pivot_wider(values_from=c(Mortality_Rate),names_from=c(Sex,Age)) %>% arrange(Year) %>% select(-Year) %>% as.matrix
|
|
||||||
rownames(COR_DAT) <- 1999:2020
|
|
||||||
|
|
||||||
TEST <- matrix(as.numeric(COR_DAT[1,]),nrow(COR_DAT),nrow=nrow(COR_DAT),ncol=ncol(COR_DAT))
|
|
||||||
COR_DAT <- COR_DAT/TEST
|
|
||||||
#COR_DAT <- t(COR_DAT)
|
|
||||||
#US_COR_DATA <- SINGLE_DATA %>% select(Year,Sex,US_Rate) %>% unique %>% pivot_wider(values_from=US_Rate,names_from=Sex) %>% arrange(Year)
|
|
||||||
#COR_DAT <- cbind(US_COR_DATA,COR_DAT) %>% as.matrix
|
|
||||||
library(factoextra)
|
|
||||||
COR_DAT
|
|
||||||
corrplot(cor(COR_DAT[,1:86]),type="lower")
|
|
||||||
corrplot(cor(COR_DAT[,87:172]),type="lower")
|
|
||||||
|
|
||||||
COR_DAT <- t(COR_DAT)
|
|
||||||
COR_DAT <- COR_DAT[,-1]
|
|
||||||
|
|
||||||
fviz_nbclust(COR_DAT,kmeans,"gap_stat")
|
|
||||||
fviz_nbclust(COR_DAT,kmeans,"wss",nboot=1000)
|
|
||||||
fviz_nbclust(COR_DAT,kmeans,nboot=1000)
|
|
||||||
km.res <- kmeans(COR_DAT, 3, nstart = 1)
|
|
||||||
fviz_cluster(km.res,COR_DAT)
|
|
||||||
print(km.res)
|
|
||||||
COR_DAT
|
|
||||||
SINGLE_DATA %>% filter(!is.numeric(Mortality_Rate))
|
|
||||||
SINGLE_DATA <- SINGLE_DATA %>% mutate(Age=as.numeric(Age),US_Rate=as.numeric(US_Rate),Year=as.numeric(Year),Mortality_Rate=as.numeric(Mortality_Rate))
|
|
||||||
SINGLE_DATA %>% filter(is.numeric(Age),is.numeric(Mortality_Rate),is.numeric(US_Rate))
|
|
||||||
DATA <- SINGLE_DATA %>% left_join(PANDIMIC_INDEX ) %>% ungroup %>% group_by(Sex,Year) %>% mutate(Rank=rank(Mortality_Rate)) %>% ungroup %>% group_by(Year,Sex) %>% mutate(PER_MORT_MAX=Mortality_Rate/max(Mortality_Rate))
|
|
||||||
feols(Mortality_Rate~factor(Rank)+Sex+Year,DATA)
|
|
||||||
MOD <- feols(log(Mortality_Rate)~log(US_Rate)*Sex+factor(Rank)+Sex|Year,DATA)
|
|
||||||
MOD
|
|
||||||
plot(resid(MOD))
|
|
||||||
DATA$RESID <- resid(MOD)
|
|
||||||
acf((DATA %>% filter(Age==85,Sex=='Female') %>% pull(RESID)))
|
|
||||||
DATA
|
|
||||||
ggplot(SINGLE_DATA,aes(x=Year,y=Mortality_Rate,group=Age,color=Age))+geom_point() + scale_y_log10() + facet_grid(. ~ Sex)
|
|
||||||
|
|
||||||
TEST <- SINGLE_DATA %>% group_by(Year,Sex) %>% mutate(TEST=rank(Mortality_Rate)) %>%ungroup
|
|
||||||
ggplot(TEST,aes(x=Year,y=TEST,group=Age,color=Age)) +geom_point()+ geom_smooth()
|
|
||||||
TEMP <- SINGLE_DATA %>% group_by(Sex,Year) %>% summarize(GAP=max(Mortality_Rate)-min(Mortality_Rate))
|
|
||||||
ggplot(TEMP,aes(x=Year,y=GAP,group=Sex,color=Sex))+geom_line()
|
|
||||||
|
|
||||||
MOD <- feols(log(Mortality_Rate)~factor(Age)*(log(US_Rate)+WUPI+L_WUPI)+Year,data=as_tibble(SINGLE_DATA))
|
|
||||||
SINGLE_DATA
|
|
||||||
plot(resid(MOD))
|
|
||||||
TEMP <- SINGLE_DATA
|
|
||||||
TEMP$RESID <- resid(MOD)
|
|
||||||
acf(TEMP %>% filter(Age==80) %>% pull(RESID))
|
|
||||||
etable(MOD,group=list("Single Age"="factor"))
|
|
||||||
SINGLE_DATA[87,]
|
|
||||||
resid(MOD)
|
|
||||||
predict
|
|
||||||
|
|
||||||
|
|
||||||
REG_SINGLE_DATA <- SINGLE_DATA_PLAIN %>% mutate( %>% pivot_wider(values_from="Mortality_Rate",names_from=c("Age"))
|
ST_YEAR <- 2025
|
||||||
MALE <- REG_SINGLE_DATA %>% filter(Sex=='Male')
|
END_YEAR <- 2025+40
|
||||||
FEMALE <- REG_SINGLE_DATA %>% filter(Sex=='Female')
|
GAP <- END_YEAR-ST_YEAR
|
||||||
library(corrplot)
|
NUM_SIMS <- END_YEAR-ST_YEAR+1
|
||||||
corrplot(cor(MALE %>% select(-Sex)))
|
|
||||||
US_CAUSES <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_Cause_of_Death_1999-2020.csv") %>% select(Year,ICD=`ICD Sub-Chapter Code`,Death_Rate=`Crude Rate`) %>% filter(!is.na(Death_Rate)) %>% mutate(Death_Rate=ifelse(Death_Rate=='Suppressed' |Death_Rate=='Unreliable',NA,Death_Rate)) %>% rbind(read_csv("Data/Raw_Data/Mortality_Rates_New/US_Cause_of_Death_2018-2023.csv") %>% select(Year,ICD=`ICD Sub-Chapter Code`,Death_Rate=`Crude Rate`) %>% filter(!is.na(Death_Rate)) %>% mutate(Death_Rate=ifelse(Death_Rate=='Suppressed' |Death_Rate=='Unreliable',NA,Death_Rate))) %>% mutate(Death_Rate=parse_number(Death_Rate)) %>% arrange(Year,ICD) %>% group_by(ICD) %>% filter(max(is.na(Death_Rate))==0,min(Death_Rate)!=max(Death_Rate)) %>% ungroup %>% unique
|
|
||||||
BIND <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_Cause_of_Death_1999-2020.csv") %>% select(ICD=`ICD Sub-Chapter Code`,NAME=`ICD Sub-Chapter`) %>% unique
|
|
||||||
US_CAUSES <- US_CAUSES %>% left_join(BIND) %>% select(-ICD) %>% rename(ICD=NAME)
|
|
||||||
US_CAUSES %>% group_by(ICD) %>% summarize(Rate=mean(Death_Rate)) %>% summarize(ICD,Rate, Rank=rank(desc(Rate))) %>% arrange(Rank)
|
|
||||||
ggplot(US_CAUSES,aes(x=Year,y=scale(Death_Rate),group=ICD,color=ICD,fill=ICD)) +geom_point() +geom_smooth()+theme(legend.position="bottom")
|
|
||||||
US_CAUSES
|
|
||||||
parse_number(REG_SINGLE_DATA[,3:89])
|
|
||||||
US_CAUSES <- US_CAUSES %>% pivot_wider(values_from=Death_Rate,names_from=ICD)
|
|
||||||
CAUS_MAT <- US_CAUSES %>% select(-Year)
|
|
||||||
sd(t(CAUS_MAT)[,118])
|
|
||||||
|
|
||||||
CAUS_MAT <- scale(CAUS_MAT)
|
XREG <- cbind(rep(0,NUM_SIMS),rep(0,NUM_SIMS))
|
||||||
|
#colnames(XREG) <- c("WUPI","L_WUPI")
|
||||||
|
XREG <- ts(XREG,start=ST_YEAR,frequency=1)
|
||||||
|
|
||||||
fviz_nbclust(CAUS_MAT,kmeans,"gap_stat") #6
|
SIM_LIN_WOMEN <-simulate(MOD_LIN_WOMEN,xreg=simulate(MOD_US_WOMEN,xreg=XREG))
|
||||||
fviz_nbclust(CAUS_MAT,kmeans,"wss") #5
|
SIM_LIN_MEN <- simulate(MOD_LIN_MEN,xreg=simulate(MOD_US_MEN,xreg=XREG))
|
||||||
fviz_nbclust(CAUS_MAT,kmeans) #2
|
C_VAL <- rbind(cbind(ST_YEAR:END_YEAR,rep("Female",NUM_SIMS),as.vector(SIM_LIN_MEN)), cbind(ST_YEAR:END_YEAR,rep("Male",NUM_SIMS),as.vector(SIM_LIN_WOMEN))) %>% as_tibble
|
||||||
|
colnames(C_VAL) <- c("Year","Sex","US_Adj_Death_Rate")
|
||||||
km.res <- kmeans(CAUS_MAT, 6, nstart = 1)
|
C_VAL$Year <- as.numeric(pull(C_VAL,Year))
|
||||||
fviz_cluster(km.res,CAUS_MAT)
|
C_VAL$US_Adj_Death_Rate <- as.numeric(pull(C_VAL,US_Adj_Death_Rate))
|
||||||
summary(km.res
|
C_VAL
|
||||||
print(km.res)
|
###Pedict
|
||||||
km.res$cluster
|
RES <- do.call(rbind,lapply(1:86,function(x){return(predict(SINGLE_MODS[[x]],C_VAL))}))#For each data frame containing each year and sex combination of the forecast, predict the data for each age 0-85. Bind these by row to create a result with ages by row, and year by column
|
||||||
|
ncol(RES)
|
||||||
corrplot(cor(US_CAUSES))
|
#RES1 <- RES
|
||||||
MALE <- US_CAUSES %>% left_join(MALE) %>% select(Year,Sex,US_Rate,everything())
|
#Rows Year, Column Age
|
||||||
MALE %>% tail
|
MIN_MAT <- matrix(rep(MIN_VALUES,ncol(RES)),ncol=ncol(RES))
|
||||||
COR_MALE <- MALE %>% select(-Sex) %>% as.matrix
|
RES <- ifelse(RES<MIN_VALUES,MIN_VALUES,RES)
|
||||||
corrplot(cor(COR_MALE,use="pairwise.complete"),type="lower",diag=FALSE,)
|
RES <- ifelse(RES>MAX_VALUES,MIN_VALUES,RES)
|
||||||
?corrplot
|
FEMALE_RES <- t(RES[,1:NUM_SIMS])
|
||||||
|
MALE_RES <- t(RES[,(NUM_SIMS+1):(2*NUM_SIMS)])
|
||||||
COR_MALE
|
PRED_ADJ_RATE_WOMEN <- rowSums(FEMALE_RES*BASELINE_AGE_ADJUST_WOMEN)
|
||||||
corrplot(cor(cbind(MALE[,1],MALE[,4:ncol(MALE)]/t(MALE[,3]))))
|
PRED_ADJ_RATE_MEN <- rowSums(MALE_RES*BASELINE_AGE_ADJUST_MEN)
|
||||||
corrplot(cor(log(FEMALE %>% select(-Sex))))
|
MALE_RES <- MALE_RES*C_VAL[1:(nrow(C_VAL)/2),]$US_Adj_Death_Rate/PRED_ADJ_RATE_MEN
|
||||||
TEMP <- MALE %>% select(RATE=`36`,US_Rate,Year) %>% as.data.frame
|
FEMALE_RES <- MALE_RES*C_VAL[(nrow(C_VAL)/2+1):(nrow(C_VAL)),]$US_Adj_Death_Rate/PRED_ADJ_RATE_WOMEN
|
||||||
TEST <- feols((RATE)~US_Rate+Year,TEMP)
|
#Testing looks good so far
|
||||||
TEST <- feols(RATE/US_Rate~Year,TEMP)
|
FEMALE_RES[,20:30]
|
||||||
acf(as.numeric((TEMP[,'RATE']-predict(TEST)) %>% unlist))
|
MALE_RES[,20:30]
|
||||||
acf
|
|
||||||
TEST <- lm(log(`22`)~log(US_Rate)+Year,data=MALE)
|
|
||||||
predict(TEST
|
|
||||||
resid(TEST)
|
|
||||||
lm(`20`~Year+
|
|
||||||
plot(as.vector(t(FEMALE %>% dplyr::select(`20`))))
|
|
||||||
%>% pull(`22`)
|
|
||||||
?corrplot
|
|
||||||
plot(cor(FEMALE %>% select(-Sex))[2,])
|
|
||||||
plot(cor(FEMALE %>% select(-Sex))[1,])
|
|
||||||
MAT <- (cbind(abs(cor(FEMALE %>% select(-Sex))[2,]),abs(cor(FEMALE %>% select(-Sex))[1,])))
|
|
||||||
plot(apply(MAT,1,min))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
MALE
|
|
||||||
|
|
||||||
library(fixest)
|
|
||||||
MOD
|
|
||||||
MOD <- feols(Age_85~US_Rate,data=MALE)
|
|
||||||
acf(MALE[,"Age_85"]-predict(MOD))
|
|
||||||
residuals(MOD)
|
|
||||||
MOD0
|
|
||||||
resid(MOD0)
|
|
||||||
|
|
||||||
|
|||||||
@ -1,9 +1,11 @@
|
|||||||
#Clean and collect data sets used in later code.
|
#Clean and collect data sets used in later code.
|
||||||
Rscript "./Scripts/1A_Download_and_Process_Population_Data.r"
|
Rscript "./Scripts/1A_Download_and_Process_Population_Data.r"
|
||||||
Rscript "./Scripts/1B_Process_Existing_NIH_Mortality_Data.r"
|
Rscript "./Scripts/1B_Process_Existing_NIH_Mortality_Data.r" #Somewhat outdated, could use only the data in 1E and 1F but speeds up completion time to not change
|
||||||
Rscript "./Scripts/1C_Download_and_Process_Demographic_Data.r"
|
Rscript "./Scripts/1C_Download_and_Process_Demographic_Data.r"
|
||||||
Rscript "./Scripts/1D_Use_ACS_Census_Data_to_Estimate_Kemmerer_Demographics.r"
|
Rscript "./Scripts/1D_Use_ACS_Census_Data_to_Estimate_Kemmerer_Demographics.r"
|
||||||
Rscript "./Scripts/1E_Process_WONDER_Mortality_Data.r"
|
Rscript "./Scripts/1E_Process_WONDER_Mortality_Data.r"
|
||||||
|
Rscript "./Scripts/1F_Process_WONDER_Single_Age_Sex_Mortality_Data.r"
|
||||||
|
|
||||||
#Create data sets used in later simulations, produce some results for the report when related to this process.
|
#Create data sets used in later simulations, produce some results for the report when related to this process.
|
||||||
Rscript "./Scripts/2A_Birth_Rate_Regression_and_Impart_Kemmerer_Births.r"
|
Rscript "./Scripts/2A_Birth_Rate_Regression_and_Impart_Kemmerer_Births.r"
|
||||||
Rscript "./Scripts/2B_Impart_Deaths_and_Migration_to_Subregions.r"
|
Rscript "./Scripts/2B_Impart_Deaths_and_Migration_to_Subregions.r"
|
||||||
|
|||||||
@ -2,24 +2,25 @@ library(tidyverse)
|
|||||||
library(fixest)
|
library(fixest)
|
||||||
####SPLIT OUT THE DATA MANAGEMENT PULL IN ARIMA
|
####SPLIT OUT THE DATA MANAGEMENT PULL IN ARIMA
|
||||||
################################Create the data need to model the age-sex specific death rates
|
################################Create the data need to model the age-sex specific death rates
|
||||||
DF1999 <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_Single_Age_1999-2020.csv") %>% select(Year,Sex,Age=`Single-Year Ages Code`,Mortality_Rate=`Crude Rate`) %>% mutate(Mortality_Rate=parse_number(Mortality_Rate)) %>% filter(!is.na(Mortality_Rate)) %>% mutate(Mortality_Rate=as.numeric(Mortality_Rate))
|
RAW_DATA_LOC <- "Data/Cleaned_Data/Mortality_Data/RDS/"
|
||||||
DF2018 <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_Single_Age_2018-2023.csv") %>% select(Year,Sex,Age=`Single-Year Ages Code`,Mortality_Rate=`Crude Rate`) %>% filter(!is.na(Mortality_Rate))%>% mutate(Mortality_Rate=parse_number(Mortality_Rate)) %>% filter(!is.na(Mortality_Rate)) %>% mutate(Mortality_Rate=as.numeric(Mortality_Rate))
|
REG_DATA <- readRDS(paste0(RAW_DATA_LOC,"Single_Sex_Age_US_Mortality_Rate_Data_Wide.Rds"))
|
||||||
|
if(!exists("SAVE_DATA_LOC")){SAVE_DATA_LOC<- "Data/Intermediate_Inputs/Mortality_Regression_Data/"}
|
||||||
|
dir.create(SAVE_DATA_LOC, recursive = TRUE, showWarnings = FALSE)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
OLDER1 <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_10_Year_Age_Groups_1999-2020.csv")%>% rename(Age=`Ten-Year Age Groups Code`,Mortality_Rate=`Crude Rate`) %>% filter(Age=='85+')%>% mutate(Age=85,Year=as.numeric(Year),Mortality_Rate=parse_number(Mortality_Rate)) %>% select(Year,Sex,Age,Mortality_Rate) %>% mutate(Mortality_Rate=as.numeric(Mortality_Rate),Age=as.numeric(Age))
|
|
||||||
OLDER2 <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_10_Year_Age_Groups_2018-2023.csv")%>% rename(Age=`Ten-Year Age Groups Code`,Mortality_Rate=`Crude Rate`) %>% filter(Age=='85+')%>% mutate(Age=85,Year=as.numeric(Year),Mortality_Rate=parse_number(Mortality_Rate)) %>% select(Year,Sex,Age,Mortality_Rate)%>% mutate(Mortality_Rate=as.numeric(Mortality_Rate),Age=as.numeric(Age))
|
|
||||||
DF <- rbind(DF1999,DF2018,OLDER1,OLDER2) %>% unique %>% group_by(Year,Sex,Age) %>% arrange(Year,Sex,Age) %>% mutate(Age=as.numeric(Age)) %>% ungroup
|
|
||||||
#hist(US_CAUSES$Death_Rate,breaks=150)
|
|
||||||
#Overall US death rates
|
|
||||||
US_AGE_ADJ <- rbind(read_csv("Data/Raw_Data/Mortality_Rates_New/US_Age_Adjusted_1979-1998.csv") %>% select(Year,Sex,US_Adj_Death_Rate=`Crude Rate`),read_csv("Data/Raw_Data/Mortality_Rates_New/US_Age_Adjusted_1999-2020.csv") %>% select(Year,Sex,US_Adj_Death_Rate=`Crude Rate`),read_csv("Data/Raw_Data/Mortality_Rates_New/US_Age_Adjusted_2018-2023.csv") %>% select(Year,Sex,US_Adj_Death_Rate=`Crude Rate`)) %>% unique
|
|
||||||
REG_DATA <- DF %>% left_join(US_AGE_ADJ) %>% pivot_wider(values_from=Mortality_Rate,names_from=Age,names_prefix="Age_")
|
|
||||||
#####################Model all ages and sex
|
#####################Model all ages and sex
|
||||||
MOD <- feols(Age_.[0:85]~US_Adj_Death_Rate+Sex*Year,REG_DATA)
|
MOD <- feols(Age_.[0:85]~US_Adj_Death_Rate+Sex*Year,REG_DATA)
|
||||||
|
|
||||||
###Simulate each age-sex death rate over time with the models
|
###Simulate each age-sex death rate over time with the models
|
||||||
#########When project far into the future some death rate values become negative. Make bounds to limit the forecast to a reasonable range. In this case I select half of the historic minimum, or double the historic maximum as upper an lower bounds in the study period.
|
#########When project far into the future some death rate values become negative. Make bounds to limit the forecast to a reasonable range. In this case I select half of the historic minimum, or double the historic maximum as upper an lower bounds in the study period.
|
||||||
BOUNDS <- DF %>% group_by(Age) %>% summarize(MAX_RATE=2*max(Mortality_Rate),MIN_RATE=min(Mortality_Rate)/2)
|
BOUNDS <- readRDS("Data/Cleaned_Data/Mortality_Data/RDS/Single_Sex_Age_US_Mortality_Rate_Data_Long.Rds") %>% group_by(Age) %>% summarize(MAX_RATE=2*max(Mortality_Rate),MIN_RATE=min(Mortality_Rate)/2)
|
||||||
MAX_BOUND <- BOUNDS %>% pull(MAX_RATE)
|
MAX_BOUND <- BOUNDS %>% pull(MAX_RATE)
|
||||||
MIN_BOUND <- BOUNDS %>% pull(MIN_RATE)
|
MIN_BOUND <- BOUNDS %>% pull(MIN_RATE)
|
||||||
|
saveRDS(MOD,paste0(SAVE_DATA_LOC,"Single_Sex_Age_Time_Series_Regression.Rds"))
|
||||||
|
saveRDS(MAX_BOUND,paste0(SAVE_DATA_LOC,"Single_Sex_Age_Max_Values_for_Bounding_Predictions.Rds"))
|
||||||
|
saveRDS(MIN_BOUND,paste0(SAVE_DATA_LOC,"Single_Sex_Age_Min_Values_for_Bounding_Predictions.Rds"))
|
||||||
|
|
||||||
|
|
||||||
#Create a proxy data set to simulate with
|
#Create a proxy data set to simulate with
|
||||||
C_VAL <- REG_DATA %>% mutate(Year=Year+(2025-1999)) %>% select(Year,Sex,US_Adj_Death_Rate)
|
C_VAL <- REG_DATA %>% mutate(Year=Year+(2025-1999)) %>% select(Year,Sex,US_Adj_Death_Rate)
|
||||||
#################NOTE YOU NEED TO ADJUST THE SINGLE AGE DEATH RATE DOWN TO MATCH LINCOLN IN SOME WAY
|
#################NOTE YOU NEED TO ADJUST THE SINGLE AGE DEATH RATE DOWN TO MATCH LINCOLN IN SOME WAY
|
||||||
|
|||||||
37
Scripts/1F_Process_WONDER_Single_Age_Sex_Mortality_Data.r
Normal file
37
Scripts/1F_Process_WONDER_Single_Age_Sex_Mortality_Data.r
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
library(tidyverse)
|
||||||
|
#setwd("../")
|
||||||
|
#Define and set all working directories for reading or saving files
|
||||||
|
if(!exists("DATA_LOC_RAW")){DATA_LOC_RAW <- "./Data/Raw_Data/Mortality_Rates_Over_Time/"}
|
||||||
|
if(!exists("DATA_SAVE_LOC_RDS")){DATA_SAVE_LOC_RDS <- "./Data/Cleaned_Data/Mortality_Data/RDS/"}
|
||||||
|
if(!exists("DATA_SAVE_LOC_CSV")){DATA_SAVE_LOC_CSV <- "./Data/Cleaned_Data/Mortality_Data/CSV/"}
|
||||||
|
dir.create(DATA_SAVE_LOC_RDS, recursive = TRUE, showWarnings = FALSE)
|
||||||
|
dir.create(DATA_SAVE_LOC_CSV, recursive = TRUE, showWarnings = FALSE)
|
||||||
|
#get the 2000 age distribution for use to reverse engineer age adjusted values to later use when lining the age adjusted regression with single year-sex mortality estiamtes.
|
||||||
|
AGE_ADJUST_REF_DATA <- read_csv(paste0(DATA_LOC_RAW,"US_Single_Age_1999-2020.csv")) %>% select(Year,Sex,Age=`Single-Year Ages Code`,Population) %>% mutate(Population=parse_number(Population)) %>% filter(!is.na(Population)) %>% filter(Year==2000,!is.na(Population))
|
||||||
|
OLD_ADJUST_REF_DATA <- read_csv(paste0(DATA_LOC_RAW,"US_10_Year_Age_Groups_1999-2020.csv"))%>% rename(Age=`Ten-Year Age Groups Code`) %>% filter(Age=='85+',Year==2000)%>% mutate(Age=85,Year=as.numeric(Year),Population=parse_number(Population)) %>% select(Year,Sex,Age,Population) %>% mutate(Population=as.numeric(Population),Age=as.numeric(Age))
|
||||||
|
AGE_ADJUST_REF_DATA <- full_join(AGE_ADJUST_REF_DATA,OLD_ADJUST_REF_DATA) %>% arrange(Sex,Age) %>% ungroup %>% mutate(Population=as.numeric(Population))
|
||||||
|
AGE_ADJUST_REF_DATA$Percent_of_Population <- AGE_ADJUST_REF_DATA$Population/sum(AGE_ADJUST_REF_DATA$Population)
|
||||||
|
saveRDS(AGE_ADJUST_REF_DATA,paste0(DATA_SAVE_LOC_RDS,"Single_Sex_Age_Population_in_2000.Rds" ))
|
||||||
|
write_csv(AGE_ADJUST_REF_DATA,paste0(DATA_SAVE_LOC_RDS,"Single_Sex_Age_Population_in_2000.Rds.csv" ))
|
||||||
|
####
|
||||||
|
DF1999 <- read_csv(paste0(DATA_LOC_RAW,"US_Single_Age_1999-2020.csv")) %>% select(Year,Sex,Age=`Single-Year Ages Code`,Mortality_Rate=`Crude Rate`) %>% mutate(Mortality_Rate=parse_number(Mortality_Rate)) %>% filter(!is.na(Mortality_Rate)) %>% mutate(Mortality_Rate=as.numeric(Mortality_Rate))
|
||||||
|
sum(AGE_ADJUST_REF_DATA$Population,na.rm=TRUE)/10^6
|
||||||
|
DF2018 <- read_csv(paste0(DATA_LOC_RAW,"US_Single_Age_2018-2023.csv")) %>% select(Year,Sex,Age=`Single-Year Ages Code`,Mortality_Rate=`Crude Rate`) %>% filter(!is.na(Mortality_Rate))%>% mutate(Mortality_Rate=parse_number(Mortality_Rate)) %>% filter(!is.na(Mortality_Rate)) %>% mutate(Mortality_Rate=as.numeric(Mortality_Rate))
|
||||||
|
|
||||||
|
OLDER1 <- read_csv(paste0(DATA_LOC_RAW,"US_10_Year_Age_Groups_1999-2020.csv"))%>% rename(Age=`Ten-Year Age Groups Code`,Mortality_Rate=`Crude Rate`) %>% filter(Age=='85+')%>% mutate(Age=85,Year=as.numeric(Year),Mortality_Rate=parse_number(Mortality_Rate)) %>% select(Year,Sex,Age,Mortality_Rate) %>% mutate(Mortality_Rate=as.numeric(Mortality_Rate),Age=as.numeric(Age))
|
||||||
|
OLDER2 <- read_csv(paste0(DATA_LOC_RAW,"US_10_Year_Age_Groups_2018-2023.csv"))%>% rename(Age=`Ten-Year Age Groups Code`,Mortality_Rate=`Crude Rate`) %>% filter(Age=='85+')%>% mutate(Age=85,Year=as.numeric(Year),Mortality_Rate=parse_number(Mortality_Rate)) %>% select(Year,Sex,Age,Mortality_Rate)%>% mutate(Mortality_Rate=as.numeric(Mortality_Rate),Age=as.numeric(Age))
|
||||||
|
DF <- rbind(DF1999,DF2018,OLDER1,OLDER2) %>% unique %>% group_by(Year,Sex,Age) %>% arrange(Year,Sex,Age) %>% mutate(Age=as.numeric(Age)) %>% ungroup
|
||||||
|
#hist(US_CAUSES$Death_Rate,breaks=150)
|
||||||
|
#Overall US death rates
|
||||||
|
DF <- rbind(DF1999,DF2018,OLDER1,OLDER2) %>% unique %>% group_by(Year,Sex,Age) %>% arrange(Year,Sex,Age) %>% mutate(Age=as.numeric(Age)) %>% ungroup
|
||||||
|
|
||||||
|
US_AGE_ADJ <- rbind(read_csv(paste0(DATA_LOC_RAW,"US_Age_Adjusted_1979-1998.csv")) %>% select(Year,Sex,US_Adj_Death_Rate=`Crude Rate`),read_csv(paste0(DATA_LOC_RAW,"US_Age_Adjusted_1999-2020.csv")) %>% select(Year,Sex,US_Adj_Death_Rate=`Crude Rate`),read_csv(paste0(DATA_LOC_RAW,"US_Age_Adjusted_2018-2023.csv")) %>% select(Year,Sex,US_Adj_Death_Rate=`Crude Rate`)) %>% unique
|
||||||
|
US_AGE_ADJ <- rbind(read_csv(paste0(DATA_LOC_RAW,"US_Age_Adjusted_1979-1998.csv")) %>% select(Year,Sex,US_Adj_Death_Rate=`Crude Rate`),read_csv(paste0(DATA_LOC_RAW,"US_Age_Adjusted_1999-2020.csv")) %>% select(Year,Sex,US_Adj_Death_Rate=`Crude Rate`),read_csv(paste0(DATA_LOC_RAW,"US_Age_Adjusted_2018-2023.csv")) %>% select(Year,Sex,US_Adj_Death_Rate=`Crude Rate`)) %>% unique
|
||||||
|
REG_DATA <- DF %>% left_join(US_AGE_ADJ) %>% pivot_wider(values_from=Mortality_Rate,names_from=Age,names_prefix="Age_")
|
||||||
|
saveRDS(DF,paste0(DATA_SAVE_LOC_RDS,"Single_Sex_Age_US_Mortality_Rate_Data_Long.Rds" ))
|
||||||
|
write_csv(DF,paste0(DATA_SAVE_LOC_RDS,"Single_Sex_Age_US_Mortality_Rate_Data_Long.csv" ))
|
||||||
|
|
||||||
|
|
||||||
|
saveRDS(REG_DATA,paste0(DATA_SAVE_LOC_RDS,"Single_Sex_Age_US_Mortality_Rate_Data_Wide.Rds" ))
|
||||||
|
write_csv(REG_DATA,paste0(DATA_SAVE_LOC_RDS,"Single_Sex_Age_US_Mortality_Rate_Data_Wide.csv" ))
|
||||||
|
|
||||||
Loading…
x
Reference in New Issue
Block a user