41 lines
4.9 KiB
R
41 lines
4.9 KiB
R
## This R script contains a set of functions used to calculate the number of deaths in each age-sex group, when given a set of initial demographics. The mortality rate trend data feeds this primary model by identifying a simulated future mortality rate to apply to a demographic group when the year is in the future.
|
|
|
|
###Uncomment to test the function line by line. Keep commented for a typical run.
|
|
#C_YEAR=1;SIM_NUM=89;LIN_CURRENT_DEM=readRDS("Data/Cleaned_Data/Lincoln_Demographic_Data.Rds") %>% group_by(County) %>% filter(Year==max(Year)) %>% ungroup %>% select(-County);RELOAD_MORTALITY_RATE=TRUE;MORTALITY_RATE_SIM=NA;MORTALITY_RATE_SIM_LOC="./Data/Simulated_Data_Sets/MORTALITY_MONTE_CARLO.Rds";DEM_GROUP_INDEX=readRDS("./Data/Cleaned_Data/Lincoln_Mortality_Rate.Rds") %>% select(Sex,Min_Age,Max_Age)
|
|
MORTALITY_SIM <- function(C_YEAR,SIM_NUM,LIN_CURRENT_DEM,RELOAD_MORTALITY_RATE=TRUE,MORTALITY_RATE_SIM=NA,MORTALITY_RATE_SIM_LOC="./Data/Simulated_Data_Sets/MORTALITY_MONTE_CARLO.Rds",DEM_GROUP_INDEX=readRDS("./Data/Cleaned_Data/Lincoln_Mortality_Rate.Rds") %>% select(Sex,Min_Age,Max_Age)){
|
|
#C_YEAR: Current year (1,2,3 etc.) since start of the simulation.
|
|
#SIM_NUM: Number of Monte Carlo simulation to run
|
|
#LIN_CURRENT: Demographic data from the previous simulation (or census) to simulate number of deaths
|
|
#RELOAD_MORTALITY_RATE: Should Mortality Rates be passed to the function as a data frame, or extracted from file?
|
|
#MORTALITY_RATE_SIM: If RELOAD_MORTALITY_RATE is FALSE than the user must supply a simulation object. Increases speed but also uses more RAM. So this allows for the RAM speed trade-off choice to be made by the user
|
|
#MORTALITY_RATE_SIM_LOC: If RELOAD_MORTALITY_RATE is TRUE, where are the Monte Carlo results of age-sex groups mortality rates over time saved? This allows the data to be loaded from disc.
|
|
#DEM_GROUP_INDEX: A data set of sex age groups with the sex, and min, and max age listed for each group. The rows of the data must match what was used in the mortality rate over time simulation.
|
|
if(RELOAD_MORTALITY_RATE){MORTALITY_RATE_SIM <- readRDS(MORTALITY_RATE_SIM_LOC)}#If the user did not supply a data.frame of time group trends in mortality rates, load one from disc
|
|
|
|
|
|
#Extract a index of Male and Female mortality rates for each row of the supplied starting demographic distribution data, this index matches the exact order of the saved mortality rate simulation results by age and sex group, allowing the relevant future morality rate simulation results to be recalled using this index number.
|
|
INDEX <- t((sapply(1:nrow(LIN_CURRENT_DEM),function(x){TEMP <- (LIN_CURRENT_DEM %>% pull(Age))[x]; rev(which(TEMP>=DEM_GROUP_INDEX %>% pull(Min_Age)& TEMP<=DEM_GROUP_INDEX %>% pull(Max_Age)))}))) #Find the mortality rate of a specific age (1,85 etc.) from the simulated future rates. The rates are assessed by age groups, so identify which age group each individual age is in in order to link that to mortality rates.
|
|
|
|
#Extract the Male and Female, by age results using the INDEX values. To add a current mortality rate for each group, based on the long run rate simulation
|
|
LIN_CURRENT_DEM$Male_Mortality <- sapply(INDEX[,1],function(x){MORTALITY_RATE_SIM[[x]][C_YEAR,SIM_NUM]})
|
|
LIN_CURRENT_DEM$Female_Mortality <- sapply(INDEX[,2],function(x){MORTALITY_RATE_SIM[[x]][C_YEAR,SIM_NUM]})
|
|
|
|
#Create a simulation of the number of actual deaths. Randomly pull numbers for each person in the group. A person dies only if this number is less than the mortality rate to match the distribution of deaths, but allowing for random changes when summed over the population.
|
|
LIN_CURRENT_DEM$Male_Deaths <- sapply(1:nrow(LIN_CURRENT_DEM),function(x){sum(runif(LIN_CURRENT_DEM$Num_Male[x])<LIN_CURRENT_DEM$Male_Mortality[x])})
|
|
LIN_CURRENT_DEM$Female_Deaths <- sapply(1:nrow(LIN_CURRENT_DEM),function(x){sum(runif(LIN_CURRENT_DEM$Num_Female[x])<LIN_CURRENT_DEM$Female_Mortality[x])})
|
|
return(LIN_CURRENT_DEM)
|
|
|
|
}
|
|
|
|
###NOT CURRENTLY USED AS OF OCTOBER 22, THE IDEA IS TO MOVE ALL YEARS FORWARD BY ONE FOR THE NEXT SIMULATION
|
|
INCREMENT_DEMOGRAPHICS <- function(LIN_CURRENT_DEM){
|
|
#Create a new data set that will return the demographics as updated with the simulated deaths for use in the next years simulation.
|
|
LIN_NEXT_DEM_DEATHS <- LIN_CURRENT_DEM %>% select(Year,Age,Num_Male,Num_Female,Male_Deaths,Female_Deaths)
|
|
LIN_NEXT_DEM_DEATHS$Year <- LIN_NEXT_DEM_DEATHS$Year +1 #This is the starting point for the next years simulation
|
|
LIN_NEXT_DEM_DEATHS$Age[1:85] <- LIN_NEXT_DEM_DEATHS$Age[1:85]+1 #Everyone is one year older. The 85 plus group is identical in the next year based on the layout of the NIH age group mortality data
|
|
LIN_NEXT_DEM_DEATHS[85,3:6] <- LIN_NEXT_DEM_DEATHS[86,3:6]+LIN_NEXT_DEM_DEATHS[85,3:6] #All 84 year olds are moved into the 85+ group, and all current 85+ individuals remain in this group. (total of 86 groups since zero is age in the list)
|
|
return(LIN_NEXT_DEM_DEATHS[-86,1:4]) #Return the results and drop the last row which was combined in with the 85th row in the previous step.
|
|
}
|
|
|
|
|