213 lines
12 KiB
R
213 lines
12 KiB
R
library(tidyverse)
|
|
LIN_1979 <- read_csv("Data/Raw_Data/Mortality_Rates_New/Lincoln_Age_Adjusted_1979-1998.csv") %>% select(Year,Sex,Mort_Rate=`Age Adjusted Rate`)%>% mutate(Region='Lincoln')
|
|
WY_1979 <- read_csv("Data/Raw_Data/Mortality_Rates_New/Wyoming_Age_Adjusted_1979-1998.csv") %>% select(Year,Sex,Mort_Rate=`Age Adjusted Rate`)%>% mutate(Region='Wyoming')
|
|
US_1979 <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_Age_Adjusted_1979-1998.csv")%>% select(Year,Sex,Mort_Rate=`Age Adjusted Rate`) %>% filter(!is.na(Sex),!is.na(Year)) %>% mutate(Region="US")%>% filter(Year<2018)
|
|
|
|
|
|
|
|
|
|
LIN_1999 <- read_csv("Data/Raw_Data/Mortality_Rates_New/Lincoln_Age_Adjusted_1999-2020.csv") %>% select(Year,Sex,Mort_Rate=`Age Adjusted Rate`)%>% mutate(Region='Lincoln')
|
|
WY_1999 <- read_csv("Data/Raw_Data/Mortality_Rates_New/Wyoming_Age_Adjusted_1999-2020.csv") %>% select(Year,Sex,Mort_Rate=`Age Adjusted Rate`)%>% mutate(Region='Wyoming') %>% filter(Year<2018)
|
|
US_1999 <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_Age_Adjusted_1999-2020.csv")%>% select(Year,Sex,Mort_Rate=`Age Adjusted Rate`) %>% filter(!is.na(Sex),!is.na(Year)) %>% mutate(Region="US")%>% filter(Year<2018)
|
|
|
|
|
|
WY_2018 <- read_csv("Data/Raw_Data/Mortality_Rates_New/Wyoming_Age_Adjusted_2018-2023.csv") %>% select(Year,Sex,Mort_Rate=`Age Adjusted Rate`) %>% mutate(Region='Wyoming')
|
|
US_2018 <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_Age_Adjusted_2018-2023.csv")%>% select(Year,Sex,Mort_Rate=`Age Adjusted Rate`) %>% mutate(Region="US")
|
|
##No adjustment for later data allowed
|
|
LIN_2018<- read_csv("Data/Raw_Data/Mortality_Rates_New/Lincoln_Not_Age_Adjusted_2018-2023.csv") %>% select(Year,Sex,Mort_Rate=`Crude Rate`)%>% mutate(Region='Lincoln')
|
|
ADJUST_TERM <- LIN_2018 %>% rename(UNADJUSTED=Mort_Rate) %>% inner_join(LIN_1999) %>% filter(!is.na(Year)) %>% mutate(Ratio=Mort_Rate/UNADJUSTED) %>% group_by(Sex) %>% summarize(Ratio=mean(Ratio))
|
|
LIN_2018 <- LIN_2018 %>% filter(Year>2020) %>% left_join(ADJUST_TERM) %>% mutate(Mort_Rate=Mort_Rate*Ratio) %>% select(-Ratio)
|
|
|
|
DF <- rbind(LIN_1999,LIN_2018,WY_1999,US_1999,US_2018,WY_2018,WY_1979,US_1979,LIN_1979) %>% filter(!is.na(Year),!is.na(Sex))
|
|
ggplot(DF,aes(x=Year,y=Mort_Rate,group=Region,color=Region,fill=Region))+geom_point()+geom_smooth(method="lm")+ facet_grid(. ~ Sex)
|
|
ggplot(DF,aes(x=Year,y=Mort_Rate,group=Region,color=Region,fill=Region))+geom_point()+geom_smooth(span=0.4)+ facet_grid(. ~ Sex)
|
|
ggplot(DF,aes(x=Year,y=Mort_Rate,group=Region,color=Region,fill=Region))+geom_point()+geom_line()+ facet_grid(. ~ Sex)
|
|
########################################################ARIMA
|
|
PANDIMIC_INDEX <- read_csv("https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23ebf3fb&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=off&txtcolor=%23444444&ts=12&tts=12&width=1320&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=WUPI&scale=left&cosd=1996-01-01&coed=2025-07-01&line_color=%230073e6&link_values=false&line_style=solid&mark_type=none&mw=3&lw=3&ost=-99999&oet=99999&mma=0&fml=a&fq=Annual&fam=sum&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2025-11-20&revision_date=2025-11-20&nd=1996-01-01") %>% mutate(Year=year(observation_date)) %>% select(Year,WUPI)%>% mutate(L_WUPI=lag(WUPI),L_TWO_WUPI=lag(WUPI,2))
|
|
DF <- DF %>% ungroup%>% group_by(Sex,Region) %>% arrange(Sex,Region,Year) %>% mutate(L_Mort_Rate=lag(Mort_Rate)) %>% ungroup
|
|
REG_DATA <- DF %>% pivot_wider(values_from=c(Mort_Rate,L_Mort_Rate),names_from=Region)
|
|
REG_DATA <- REG_DATA %>% left_join(PANDIMIC_INDEX) %>% mutate(WUPI=ifelse(is.na(WUPI),0,WUPI),L_WUPI=ifelse(is.na(L_WUPI),0,L_WUPI))
|
|
|
|
library(fixest)
|
|
MOD_WOMEN <- feols(Mort_Rate_US~L_Mort_Rate_US+Year+WUPI,REG_DATA %>% filter(Sex=='Female'))
|
|
acf(resid(MOD_WOMEN))
|
|
MOD_WOMEN
|
|
MOD_MEN <- feols(Mort_Rate_US~L_Mort_Rate_US+Year+WUPI,REG_DATA %>% filter(Sex=='Male'))
|
|
###Lincoln
|
|
REG_DATA
|
|
MOD_WOMEN <- feols(Mort_Rate_Lincoln~Mort_Rate_US,REG_DATA %>% filter(Sex=='Female'))
|
|
acf(resid(MOD_WOMEN))
|
|
MOD_WOMEN
|
|
MOD_MEN <- feols(Mort_Rate_US~L_Mort_Rate_US+Year+WUPI,REG_DATA %>% filter(Sex=='Male'))
|
|
|
|
acf(resid(MOD_MEN))
|
|
plot(resid(MOD_MEN))
|
|
plot(predict(MOD_MEN))
|
|
library(forecast)
|
|
DATA_MEN <- REG_DATA %>% filter(Sex=='Male')
|
|
DATA_WOMEN <- REG_DATA %>% filter(Sex=='Female')
|
|
|
|
ST_YEAR <- DATA_MEN %>% pull(Year) %>% min
|
|
|
|
TS_MEN_US <- DATA_MEN %>% select(Mort_Rate_US) %>% ts(start=ST_YEAR,frequency=1)
|
|
TS_MEN_LIN <- DATA_MEN %>% select(Mort_Rate_Lincoln) %>% ts(start=ST_YEAR,frequency=1)
|
|
TS_PANDEMIC <- DATA_MEN %>% select(WUPI,L_WUPI) %>% ts(start=ST_YEAR,frequency=1)
|
|
|
|
TS_WOMEN_US <- DATA_WOMEN %>% select(Mort_Rate_US) %>% ts(start=ST_YEAR,frequency=1)
|
|
TS_WOMEN_LIN <- DATA_WOMEN %>% select(Mort_Rate_Lincoln) %>% ts(start=ST_YEAR,frequency=1)
|
|
|
|
TS_MEN_US_INV <- TS_MEN_US
|
|
|
|
TS_WOMEN_US_INV <- TS_WOMEN_US
|
|
FORECAST_XREG <- TS_PANDEMIC
|
|
FORECAST_XREG[,] <- 0
|
|
MOD_US_MEN <- auto.arima(TS_MEN_US,lambda=0,biasadj=TRUE,xreg=TS_PANDEMIC)
|
|
checkresiduals(MOD_US_MEN)
|
|
|
|
plot(forecast(MOD_US_MEN,xreg=FORECAST_XREG))
|
|
MOD_US_WOMEN <- auto.arima(TS_WOMEN_US,lambda=0,biasadj=TRUE,xreg=TS_PANDEMIC)
|
|
checkresiduals(MOD_US_WOMEN)
|
|
plot(forecast(MOD_US_WOMEN,xreg=FORECAST_XREG))
|
|
|
|
MOD_LIN <- auto.arima(TS_MEN_LIN,biasadj=TRUE,xreg=TS_MEN_US)
|
|
simulate(MOD_US_MEN,xreg=FORECAST_XREG)
|
|
plot(simulate(MOD_LIN,xreg=simulate(MOD_US_MEN,xreg=FORECAST_XREG)))
|
|
plot(forecast(MOD_LIN,xreg=simulate(MOD_US_MEN,xreg=FORECAST_XREG)))
|
|
################################Other work
|
|
SINGLE_DATA <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_Single_Age_1999-2020.csv") %>% select(Year,Sex,Age=`Single-Year Ages Code`,Mortality_Rate=`Crude Rate`) %>% mutate(Mortality_Rate=parse_number(Mortality_Rate)) %>% filter(!is.na(Mortality_Rate))
|
|
|
|
OLDER <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_10_Year_Age_Groups_1999-2020.csv")%>% rename(Age=`Ten-Year Age Groups Code`,Mortality_Rate=`Crude Rate`) %>% filter(Age=='85+')%>% mutate(Age=85,Year=as.numeric(Year),Mortality_Rate=parse_number(Mortality_Rate)) %>% select(Year,Sex,Age,Mortality_Rate)%>% select(colnames(SINGLE_DATA))
|
|
SINGLE_DATA <- rbind(SINGLE_DATA,OLDER) %>% left_join(REG_DATA %>% select(Year,Sex,US_Rate=Mort_Rate_US))
|
|
SINGLE_DATA_PLAIN <- SINGLE_DATA
|
|
|
|
SINGLE_DATA <- SINGLE_DATA %>% group_by(Age,Sex) %>% mutate(INDEX=Mortality_Rate/sum(ifelse(Year==min(Year),Mortality_Rate,0)),US_INDEX=US_Rate/sum(ifelse(Year==min(Year),US_Rate,0))) %>% ungroup
|
|
ggplot(SINGLE_DATA,aes(x=Year,y=INDEX,group=Age,color=Age))+geom_point()+geom_smooth(aes(y=US_INDEX),se=FALSE,color="black",linetype=2,linewidth=2) + facet_grid(rows= vars(Sex))
|
|
|
|
ggplot(SINGLE_DATA,aes(x=Year,y=INDEX,group=Age,color=Age))+geom_point(size=0.5)+geom_line(aes(y=US_INDEX),color="black",linetype=2,linewidth=1) + facet_grid(rows= vars(Sex))
|
|
|
|
ggplot(SINGLE_DATA,aes(x=Year,y=Mortality_Rate,group=Age,color=Age))+geom_point() + scale_y_log10() + facet_grid(. ~ Sex)
|
|
|
|
ggplot(SINGLE_DATA,aes(x=Year,y=INDEX,group=Age,color=Age))+geom_smooth(se=FALSE,method="lm")+ scale_y_log10() + facet_grid(. ~ Sex)
|
|
COR_DAT <- SINGLE_DATA %>% arrange(Sex,Age) %>% select(-US_Rate,-US_INDEX,-INDEX) %>% pivot_wider(values_from=c(Mortality_Rate),names_from=c(Sex,Age)) %>% arrange(Year) %>% select(-Year) %>% as.matrix
|
|
rownames(COR_DAT) <- 1999:2020
|
|
|
|
TEST <- matrix(as.numeric(COR_DAT[1,]),nrow(COR_DAT),nrow=nrow(COR_DAT),ncol=ncol(COR_DAT))
|
|
COR_DAT <- COR_DAT/TEST
|
|
#COR_DAT <- t(COR_DAT)
|
|
#US_COR_DATA <- SINGLE_DATA %>% select(Year,Sex,US_Rate) %>% unique %>% pivot_wider(values_from=US_Rate,names_from=Sex) %>% arrange(Year)
|
|
#COR_DAT <- cbind(US_COR_DATA,COR_DAT) %>% as.matrix
|
|
library(factoextra)
|
|
COR_DAT
|
|
corrplot(cor(COR_DAT[,1:86]),type="lower")
|
|
corrplot(cor(COR_DAT[,87:172]),type="lower")
|
|
|
|
COR_DAT <- t(COR_DAT)
|
|
COR_DAT <- COR_DAT[,-1]
|
|
|
|
fviz_nbclust(COR_DAT,kmeans,"gap_stat")
|
|
fviz_nbclust(COR_DAT,kmeans,"wss",nboot=1000)
|
|
fviz_nbclust(COR_DAT,kmeans,nboot=1000)
|
|
km.res <- kmeans(COR_DAT, 3, nstart = 1)
|
|
fviz_cluster(km.res,COR_DAT)
|
|
print(km.res)
|
|
COR_DAT
|
|
SINGLE_DATA %>% filter(!is.numeric(Mortality_Rate))
|
|
SINGLE_DATA <- SINGLE_DATA %>% mutate(Age=as.numeric(Age),US_Rate=as.numeric(US_Rate),Year=as.numeric(Year),Mortality_Rate=as.numeric(Mortality_Rate))
|
|
SINGLE_DATA %>% filter(is.numeric(Age),is.numeric(Mortality_Rate),is.numeric(US_Rate))
|
|
DATA <- SINGLE_DATA %>% left_join(PANDIMIC_INDEX ) %>% ungroup %>% group_by(Sex,Year) %>% mutate(Rank=rank(Mortality_Rate)) %>% ungroup %>% group_by(Year,Sex) %>% mutate(PER_MORT_MAX=Mortality_Rate/max(Mortality_Rate))
|
|
feols(Mortality_Rate~factor(Rank)+Sex+Year,DATA)
|
|
MOD <- feols(log(Mortality_Rate)~log(US_Rate)*Sex+factor(Rank)+Sex|Year,DATA)
|
|
MOD
|
|
plot(resid(MOD))
|
|
DATA$RESID <- resid(MOD)
|
|
acf((DATA %>% filter(Age==85,Sex=='Female') %>% pull(RESID)))
|
|
DATA
|
|
ggplot(SINGLE_DATA,aes(x=Year,y=Mortality_Rate,group=Age,color=Age))+geom_point() + scale_y_log10() + facet_grid(. ~ Sex)
|
|
|
|
TEST <- SINGLE_DATA %>% group_by(Year,Sex) %>% mutate(TEST=rank(Mortality_Rate)) %>%ungroup
|
|
ggplot(TEST,aes(x=Year,y=TEST,group=Age,color=Age)) +geom_point()+ geom_smooth()
|
|
TEMP <- SINGLE_DATA %>% group_by(Sex,Year) %>% summarize(GAP=max(Mortality_Rate)-min(Mortality_Rate))
|
|
ggplot(TEMP,aes(x=Year,y=GAP,group=Sex,color=Sex))+geom_line()
|
|
|
|
MOD <- feols(log(Mortality_Rate)~factor(Age)*(log(US_Rate)+WUPI+L_WUPI)+Year,data=as_tibble(SINGLE_DATA))
|
|
SINGLE_DATA
|
|
plot(resid(MOD))
|
|
TEMP <- SINGLE_DATA
|
|
TEMP$RESID <- resid(MOD)
|
|
acf(TEMP %>% filter(Age==80) %>% pull(RESID))
|
|
etable(MOD,group=list("Single Age"="factor"))
|
|
SINGLE_DATA[87,]
|
|
resid(MOD)
|
|
predict
|
|
|
|
|
|
REG_SINGLE_DATA <- SINGLE_DATA_PLAIN %>% mutate( %>% pivot_wider(values_from="Mortality_Rate",names_from=c("Age"))
|
|
MALE <- REG_SINGLE_DATA %>% filter(Sex=='Male')
|
|
FEMALE <- REG_SINGLE_DATA %>% filter(Sex=='Female')
|
|
library(corrplot)
|
|
corrplot(cor(MALE %>% select(-Sex)))
|
|
US_CAUSES <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_Cause_of_Death_1999-2020.csv") %>% select(Year,ICD=`ICD Sub-Chapter Code`,Death_Rate=`Crude Rate`) %>% filter(!is.na(Death_Rate)) %>% mutate(Death_Rate=ifelse(Death_Rate=='Suppressed' |Death_Rate=='Unreliable',NA,Death_Rate)) %>% rbind(read_csv("Data/Raw_Data/Mortality_Rates_New/US_Cause_of_Death_2018-2023.csv") %>% select(Year,ICD=`ICD Sub-Chapter Code`,Death_Rate=`Crude Rate`) %>% filter(!is.na(Death_Rate)) %>% mutate(Death_Rate=ifelse(Death_Rate=='Suppressed' |Death_Rate=='Unreliable',NA,Death_Rate))) %>% mutate(Death_Rate=parse_number(Death_Rate)) %>% arrange(Year,ICD) %>% group_by(ICD) %>% filter(max(is.na(Death_Rate))==0,min(Death_Rate)!=max(Death_Rate)) %>% ungroup %>% unique
|
|
BIND <- read_csv("Data/Raw_Data/Mortality_Rates_New/US_Cause_of_Death_1999-2020.csv") %>% select(ICD=`ICD Sub-Chapter Code`,NAME=`ICD Sub-Chapter`) %>% unique
|
|
US_CAUSES <- US_CAUSES %>% left_join(BIND) %>% select(-ICD) %>% rename(ICD=NAME)
|
|
US_CAUSES %>% group_by(ICD) %>% summarize(Rate=mean(Death_Rate)) %>% summarize(ICD,Rate, Rank=rank(desc(Rate))) %>% arrange(Rank)
|
|
ggplot(US_CAUSES,aes(x=Year,y=scale(Death_Rate),group=ICD,color=ICD,fill=ICD)) +geom_point() +geom_smooth()+theme(legend.position="bottom")
|
|
US_CAUSES
|
|
parse_number(REG_SINGLE_DATA[,3:89])
|
|
US_CAUSES <- US_CAUSES %>% pivot_wider(values_from=Death_Rate,names_from=ICD)
|
|
CAUS_MAT <- US_CAUSES %>% select(-Year)
|
|
sd(t(CAUS_MAT)[,118])
|
|
|
|
CAUS_MAT <- scale(CAUS_MAT)
|
|
|
|
fviz_nbclust(CAUS_MAT,kmeans,"gap_stat") #6
|
|
fviz_nbclust(CAUS_MAT,kmeans,"wss") #5
|
|
fviz_nbclust(CAUS_MAT,kmeans) #2
|
|
|
|
km.res <- kmeans(CAUS_MAT, 6, nstart = 1)
|
|
fviz_cluster(km.res,CAUS_MAT)
|
|
summary(km.res
|
|
print(km.res)
|
|
km.res$cluster
|
|
|
|
corrplot(cor(US_CAUSES))
|
|
MALE <- US_CAUSES %>% left_join(MALE) %>% select(Year,Sex,US_Rate,everything())
|
|
MALE %>% tail
|
|
COR_MALE <- MALE %>% select(-Sex) %>% as.matrix
|
|
corrplot(cor(COR_MALE,use="pairwise.complete"),type="lower",diag=FALSE,)
|
|
?corrplot
|
|
|
|
COR_MALE
|
|
corrplot(cor(cbind(MALE[,1],MALE[,4:ncol(MALE)]/t(MALE[,3]))))
|
|
corrplot(cor(log(FEMALE %>% select(-Sex))))
|
|
TEMP <- MALE %>% select(RATE=`36`,US_Rate,Year) %>% as.data.frame
|
|
TEST <- feols((RATE)~US_Rate+Year,TEMP)
|
|
TEST <- feols(RATE/US_Rate~Year,TEMP)
|
|
acf(as.numeric((TEMP[,'RATE']-predict(TEST)) %>% unlist))
|
|
acf
|
|
TEST <- lm(log(`22`)~log(US_Rate)+Year,data=MALE)
|
|
predict(TEST
|
|
resid(TEST)
|
|
lm(`20`~Year+
|
|
plot(as.vector(t(FEMALE %>% dplyr::select(`20`))))
|
|
%>% pull(`22`)
|
|
?corrplot
|
|
plot(cor(FEMALE %>% select(-Sex))[2,])
|
|
plot(cor(FEMALE %>% select(-Sex))[1,])
|
|
MAT <- (cbind(abs(cor(FEMALE %>% select(-Sex))[2,]),abs(cor(FEMALE %>% select(-Sex))[1,])))
|
|
plot(apply(MAT,1,min))
|
|
|
|
|
|
|
|
|
|
|
|
MALE
|
|
|
|
library(fixest)
|
|
MOD
|
|
MOD <- feols(Age_85~US_Rate,data=MALE)
|
|
acf(MALE[,"Age_85"]-predict(MOD))
|
|
residuals(MOD)
|
|
MOD0
|
|
resid(MOD0)
|
|
|