Population_Study/Scripts/Load_Custom_Functions/Death_Simulation_Functions.r
2025-11-13 17:20:10 -07:00

32 lines
3.9 KiB
R

## This R script contains a set of functions used to calculate the number of deaths in each age-sex group, when given a set of initial demographics. The mortality rate trend data feeds this primary model by identifying a simulated future mortality rate to apply to a demographic group when the year is in the future.
###Uncomment to test the function line by line. Keep commented for a typical run.
#C_YEAR=1;SIM_NUM=89;LIN_CURRENT_DEM=readRDS("Data/Cleaned_Data/Lincoln_Demographic_Data.Rds") %>% group_by(County) %>% filter(Year==max(Year)) %>% ungroup %>% select(-County);RELOAD_MORTALITY_RATE=TRUE;MORTALITY_RATE_SIM=NA;MORTALITY_RATE_SIM_LOC="./Data/Simulated_Data_Sets/MORTALITY_MONTE_CARLO.Rds";DEM_GROUP_INDEX=readRDS("./Data/Cleaned_Data/Lincoln_Mortality_Rate.Rds") %>% select(Sex,Min_Age,Max_Age)
MORTALITY_SIM <- function(C_YEAR,SIM_NUM,LIN_CURRENT_DEM,RELOAD_MORTALITY_RATE=TRUE,MORTALITY_RATE_SIM=NA,MORTALITY_RATE_SIM_LOC="./Data/Simulated_Data_Sets/MORTALITY_MONTE_CARLO.Rds",DEM_GROUP_INDEX=readRDS("./Data/Cleaned_Data/Lincoln_Mortality_Rate.Rds") %>% select(Sex,Min_Age,Max_Age)){
#C_YEAR: Current year (1,2,3 etc.) since start of the simulation.
#SIM_NUM: Number of Monte Carlo simulation to run
#LIN_CURRENT: Demographic data from the previous simulation (or census) to simulate number of deaths
#RELOAD_MORTALITY_RATE: Should Mortality Rates be passed to the function as a data frame, or extracted from file?
#MORTALITY_RATE_SIM: If RELOAD_MORTALITY_RATE is FALSE than the user must supply a simulation object. Increases speed but also uses more RAM. So this allows for the RAM speed trade-off choice to be made by the user
#MORTALITY_RATE_SIM_LOC: If RELOAD_MORTALITY_RATE is TRUE, where are the Monte Carlo results of age-sex groups mortality rates over time saved? This allows the data to be loaded from disc.
#DEM_GROUP_INDEX: A data set of sex age groups with the sex, and min, and max age listed for each group. The rows of the data must match what was used in the mortality rate over time simulation.
if(RELOAD_MORTALITY_RATE){MORTALITY_RATE_SIM <- readRDS(MORTALITY_RATE_SIM_LOC)}#If the user did not supply a data.frame of time group trends in mortality rates, load one from disc
#Extract a index of Male and Female mortality rates for each row of the supplied starting demographic distribution data, this index matches the exact order of the saved mortality rate simulation results by age and sex group, allowing the relevant future morality rate simulation results to be recalled using this index number.
INDEX <- t((sapply(1:nrow(LIN_CURRENT_DEM),function(x){TEMP <- (LIN_CURRENT_DEM %>% pull(Age))[x]; rev(which(TEMP>=DEM_GROUP_INDEX %>% pull(Min_Age)& TEMP<=DEM_GROUP_INDEX %>% pull(Max_Age)))}))) #Find the mortality rate of a specific age (1,85 etc.) from the simulated future rates. The rates are assessed by age groups, so identify which age group each individual age is in in order to link that to mortality rates.
#Extract the Male and Female, by age results using the INDEX values. To add a current mortality rate for each group, based on the long run rate simulation
LIN_CURRENT_DEM$Male_Mortality <- sapply(INDEX[,1],function(x){MORTALITY_RATE_SIM[[x]][C_YEAR,SIM_NUM]})
LIN_CURRENT_DEM$Female_Mortality <- sapply(INDEX[,2],function(x){MORTALITY_RATE_SIM[[x]][C_YEAR,SIM_NUM]})
#Create a simulation of the number of actual deaths. Randomly pull numbers for each person in the group. A person dies only if this number is less than the mortality rate to match the distribution of deaths, but allowing for random changes when summed over the population.
LIN_CURRENT_DEM$Male_Deaths <- sapply(1:nrow(LIN_CURRENT_DEM),function(x){sum(runif(LIN_CURRENT_DEM$Num_Male[x])<LIN_CURRENT_DEM$Male_Mortality[x])})
LIN_CURRENT_DEM$Female_Deaths <- sapply(1:nrow(LIN_CURRENT_DEM),function(x){sum(runif(LIN_CURRENT_DEM$Num_Female[x])<LIN_CURRENT_DEM$Female_Mortality[x])})
return(LIN_CURRENT_DEM)
}