First combination of migration, births and deaths

This commit is contained in:
Alex Gebben Work 2025-10-29 16:57:18 -06:00
parent 60dff9e4da
commit c66d260d41
5 changed files with 133 additions and 126 deletions

View File

@ -1,13 +1,17 @@
#####Packages
library(tidyverse) #Cleaning data
library(fixest) #Estimating a model of birth rates, to provide variance in the birth rate Monte Carlo using a fixed effect model.
library(parallel)
#Load custom functions needed for the simulation
source("Scripts/Birth_Simulation_Functions.r")
source("Scripts/Monte_Carlo_Functions.r")
source("Scripts/Migration_Simulation_Functions.r")
#####User Configuration Values
NUM_SIMULATIONS <- 10^4 #Number of Monte Carlo Simulations to run
NUM_SIMULATIONS <- 10^6 #Number of Monte Carlo Simulations to run
RERUN_MORTALITY_SIMULATION <- TRUE #Rerun the Monte Carlo simulation of future mortality rates (not actual deaths) even if a Rds file of a mortality rates exists. This can be used to speed up runs when FALSE
RERUN_MIGRATION_SIMULATION <- TRUE #Rerun the ARIMA simulations that predict total migration in any year even if a Rds file of a mortality rates exists. This can be used to speed up runs when FALSE
NUM_YEARS_PROJECTED <- 50 #How many years into the future should each Monte Carlo run project to. For example 25 years if starting from 2025 and ending in 2050.
BIRTH_RATE_REG_RESULTS <- "Data/Regression_Results/Birth_Rate_Model.Rds" #Location of the regression used to model variance in birth rates in each Monte Carlo simulation.
START_DEMOGRAPHIC_DATA <- "Data/Cleaned_Data/Start_Year_Demographic_Data_With_Fertility_Groups.Rds" #Location of the data for the first year needing a forecasted birth rate, which aggregates the yearly splits of populations, into a single, year-county data set with variables for key birth prediction (total number of men and women in ages with high fertility rates), and then combines with the data set including births, deaths, migration and population.
@ -17,20 +21,24 @@ source("Scripts/Monte_Carlo_Functions.r")
#A script contains the code needed to create a feols (fixest) regression of the birth rate given age distribution. Load this saved result or else create it to use in each simulation for gathering variance of births in any given age distribution path of the Monte Carlo.
if(file.exists(BIRTH_RATE_REG_RESULTS)){MOD_BIRTHS <- readRDS(BIRTH_RATE_REG_RESULTS);FIRST_PREDICT_YEAR_POPULATION_DATA <- readRDS(START_DEMOGRAPHIC_DATA)} else{source("Birth_Rate_Regression.r")}
if(file.exists(AGE_OF_MIGRANT_PROBABILITY)){MIG_AGE_DIST <- read.csv(AGE_OF_MIGRANT_PROBABILITY)} else{source("Migration_Regression.r")}
#Rerun the migration simulation if requested
if(RERUN_MIGRATION_SIMULATION ){source("Migration_Regression.r")}
#######################################################Main Monte Carlo
START_DEM_DATA <- readRDS("Data/Cleaned_Data/Lincoln_Demographic_Data.Rds") %>% group_by(County) %>% filter(Year==2023) %>% ungroup %>% select(-County)
MORTALITY_SIMULATION <- readRDS("./Data/Simulated_Data_Sets/MORTALITY_MONTE_CARLO.Rds") #Load the Mortality simulation to speed up simulation
MIGRATION_ARIMA_SIMULATION <- readRDS("./Data/Simulated_Data_Sets/Migration_ARIMA.Rds") #Load the Migration simulation to speed up simulation
#First run
C_RES <-RUN_SINGLE_SIM(MOD_BIRTHS,FIRST_PREDICT_YEAR_POPULATION_DATA,START_DEM_DATA,MORTALITY_SIMULATION,SIM_NUMBER=1,START_OF_SIM=2023)
C_RES <-RUN_SINGLE_SIM(MOD_BIRTHS,FIRST_PREDICT_YEAR_POPULATION_DATA,START_DEM_DATA,MORTALITY_SIMULATION,SIM_NUMBER=1,START_OF_SIM=2023,MIGRATION_ARIMA_SIMULATION,MIG_AGE_DIST )
# Year County Population Births Deaths Migration Min_Birth_Group PREV_BIRTH PREV_TWO_BIRTH Male_Birth_Group
#Second run, work on making into loop and saving the output to file (check if that will slow the results). Maybe use a predifined matrix, so that the results can be stored quirckly
SINGLE_PATH_SIM <- function(j){
C_RES <- RUN_SINGLE_SIM(MOD_BIRTHS,FIRST_PREDICT_YEAR_POPULATION_DATA,START_DEM_DATA,MORTALITY_SIMULATION,SIM_NUMBER=j,START_OF_SIM=2023)
C_RES <- RUN_SINGLE_SIM(MOD_BIRTHS,FIRST_PREDICT_YEAR_POPULATION_DATA,START_DEM_DATA,MORTALITY_SIMULATION,SIM_NUMBER=j,START_OF_SIM=2023,MIGRATION_ARIMA_SIMULATION,MIG_AGE_DIST)
RES <- C_RES[[1]]
for(i in 1:NUM_YEARS_PROJECTED){
C_RES <- RUN_SINGLE_SIM(MOD_BIRTHS,C_RES[[3]],C_RES[[2]],MORTALITY_SIMULATION,SIM_NUMBER=j,START_OF_SIM=2023)
C_RES <- RUN_SINGLE_SIM(MOD_BIRTHS,C_RES[[3]],C_RES[[2]],MORTALITY_SIMULATION,SIM_NUMBER=j,START_OF_SIM=2023,MIGRATION_ARIMA_SIMULATION,MIG_AGE_DIST)
RES <- rbind(RES,C_RES[[1]])
}
return(RES)
@ -38,8 +46,7 @@ SINGLE_PATH_SIM <- function(j){
#Run the full simulation across all simulations simulating changes in demographic, and mortality data.
system.time({
FULL_RESULTS <- mclapply(1:NUM_SIMULATIONS,SINGLE_PATH_SIM,mc.cores = detectCores()-1)
})
plot(FULL_RESULTS[[2000]]$Population)
TEMP <- sapply(1:length(FULL_RESULTS),function(x){(FULL_RESULTS[[x]] %>% pull(Population))[25] })
hist(TEMP)
})
plot(FULL_RESULTS[[1]][3])

View File

@ -83,9 +83,4 @@
5.13038157978179e-05
4.82137214190365e-06
4.82137214190365e-06
4.82137214190365e-06
4.82137214190365e-06
4.82137214190365e-06
4.82137214190365e-06
4.82137214190365e-06
3.31534934593599e-05
9.54339236147969e-06

1 x
83 5.13038157978179e-05
84 4.82137214190365e-06
85 4.82137214190365e-06
86 4.82137214190365e-06 9.54339236147969e-06
4.82137214190365e-06
4.82137214190365e-06
4.82137214190365e-06
4.82137214190365e-06
3.31534934593599e-05

View File

@ -1,10 +1,9 @@
##########################Model Migration Trends for use in the Monte Carlo. This is important because a 18 year old is more likely to move than 75 year old.
library(tidyverse)
library(fixest)
library(corrplot)
#library(tidyverse)
#library(fixest)
library(forecast)
######Checking correlations with migration rates
DEMOGRAPHIC_DATA <- readRDS("Data/Cleaned_Data/Wyoming_County_Demographic_Data.Rds")
#Extract the population trend data to connect with demographics (Population,births,deaths)
POP_DATA <- readRDS("Data/Cleaned_Data/Wyoming_County_Population.Rds")
#Identify births, deaths an migration from existing data.
@ -12,14 +11,12 @@ library(corrplot)
DEMO2 <- DEMOGRAPHIC_DATA %>% mutate(Year=Year+1,Age=Age+1) %>% rename(PREV_MALE=Num_Male,PREV_FEMALE=Num_Female)
#Combine into a usable data set. Calculate the change in the age-sex population from year to year. This change will include the effect of migration, where absolute values are a mix of factors unrelated to migration. This first-difference approach is preferred to absolute values.
DEMO_DATA <- inner_join(DEMO1,DEMO2) %>% mutate(Male=Num_Male-PREV_MALE,Female=Num_Female-PREV_FEMALE,Pop_Change=Male+Female) %>% select(County,Year,Age,Male,Female,Pop_Change) %>% arrange(County,Year,Age)
DEMO_DATA
#############################Observed that men and women behave similarly allowing for the groups to be combined
#COR_MAT_DATA_FULL <- pivot_wider(DEMO_DATA,values_from=c(Male,Female),names_from=Age)
#COR_MAT_DATA_FULL <- POP_DATA %>% left_join(COR_MAT_DATA_FULL )
COR_MAT_DATA_FULL <- pivot_wider(DEMO_DATA,values_from=c(Male,Female),names_from=Age)
COR_MAT_DATA_FULL <- POP_DATA %>% left_join(COR_MAT_DATA_FULL )
#TEST_DATA <- COR_MAT_DATA_FULL %>% filter(!is.na(Migration),!is.na(Deaths),!is.na(Population),!is.na(Male_34),!is.na(Year),!is.na(County))
#RES <- cbind(c(rep("Male",90),rep("Female",90)),c(rep(1:90,2)),rep(NA,180)) %>% as_tibble
#colnames(RES ) <- c("Sex","Age","MIGRATION_COEF")
#RES$MIGRATION_COEF <- as.numeric(RES$MIGRATION_COEF)
@ -37,9 +34,8 @@ DEMO_DATA
#Create a wide data set with ages in each column so that each regression of age can be predicted one by one.
#Use the previous years population data as the starting point, so that the regression does not use data already including migration.
DEMO_DATA <- DEMO_DATA %>% select(-Male,-Female)
COR_MAT_DATA_FULL <- pivot_wider(DEMO_DATA,values_from=Pop_Change,names_from=Age,names_prefix="Age_")
TEST_DATA <- POP_DATA %>% mutate(Population=Population-Migration-Births+Deaths) %>% left_join( COR_MAT_DATA_FULL) %>% filter(!is.na(Migration),!is.na(Deaths),!is.na(Population),!is.na(Year),!is.na(County))
COR_MAT_DATA_FULL <- pivot_wider(DEMO_DATA,values_from=Pop_Change,names_from=Age,names_prefix="Age_")
AGE_WIDE_DATA <- POP_DATA %>% mutate(Population=Population-Migration-Births+Deaths) %>% left_join( COR_MAT_DATA_FULL) %>% filter(!is.na(Migration),!is.na(Deaths),!is.na(Population),!is.na(Year),!is.na(County))
#Create a table to store the resulting coefficients in.
RES <- cbind(1:90,c(rep("Child",17),"18",rep("Adult",90-18)),rep(NA,90),rep(NA,90)) %>% as_tibble
@ -53,13 +49,12 @@ RES <- cbind(1:90,c(rep("Child",17),"18",rep("Adult",90-18)),rep(NA,90),rep(NA,9
#Predicating the effect of migration on population in any one age group, so that trends over age can be observed. Less when old, more when 18-19.
#Loop over all age groups, predict number of people in the age group, from previous population, deaths, and Migrations. Extract the Migration Coefficient for use in a trend analysis.
for(x in 1:90){
TEST_DATA$Y_VAL <- as.numeric(t(TEST_DATA[,6+x]))#Extract the change
TEST <- feols(Y_VAL~Deaths+Migration+Population|Year+County,data=TEST_DATA)
RES[x,3] <- as.numeric(coef(TEST)["Migration"])
RES[x,4] <- as.numeric(coef(TEST)["Deaths"])
# RES[x,3] <- mean(predict(TEST,TEST_DATA %>% mutate(Migration=Migration+1))-predict(TEST),na.rm=TRUE)
# RES[x,4] <- mean(predict(TEST,TEST_DATA %>% mutate(Deaths=Deaths+1))-predict(TEST),na.rm=TRUE)
AGE_WIDE_DATA$Y_VAL <- as.numeric(t(AGE_WIDE_DATA[,6+x]))#Extract the change
C_REG<- feols(Y_VAL~Deaths+Migration+Population|Year+County,data=AGE_WIDE_DATA)
RES[x,3] <- as.numeric(coef(C_REG)["Migration"])
RES[x,4] <- as.numeric(coef(C_REG)["Deaths"])
}
rm(C_REG)
#Create data to create graphs and analyze. Remove some observed outliers
GRAPH_DATA <- RES %>% filter(abs(MIGRATION_COEF)<Inf,Age<100) %>% filter(Age!=25,Age!=35,Age!=85)
##Graph when using log scales and grouping by child/adult. Looks pretty linear
@ -90,109 +85,41 @@ GRAPH_DATA <- RES %>% filter(abs(MIGRATION_COEF)<Inf,Age<100) %>% filter(Age!=2
#Convert the absolute coefficient values to a percentage chance that any one immigrant will be in the given age. This wont line up perfectly with the coefficients if using them to predict immigration, because the age-sex data set uses different totals than the population/migration data. However, the distribution should be the same, so we divide each estimate by the total. The results is the percent probability that a single independent immigrant will be of the given age.
#If this is run from the main script MIG_AGE_DIST will be the key variable and should not be changed.
MIG_AGE_DIST <- PRED_DATA$MIGRATION_COEF/sum(PRED_DATA$MIGRATION_COEF)
#Condense down to 85+ group because death rates only use 85+
MIG_AGE_DIST[85] <- mean(MIG_AGE_DIST[85:90])
MIG_AGE_DIST <- MIG_AGE_DIST[1:85]
write.csv(MIG_AGE_DIST ,"Data/Other_Intermediate_Outputs/Migreation_Age_Probablity_One_to_Ninety.csv",row.names=FALSE)
####Work on overall migration trends
#Could use code cleanup after trying things, but have but I have a working ARIMA to model lincoln county migration
TS_DATA <- POP_DATA
TS_DATA <- TS_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population),ABS_PER_MIG=abs(Migration)/Prev_Pop,L_MIG=lag(Migration))
ST_YEAR <- min(pull(TS_DATA,Year))
END_YEAR <- max(pull(TS_DATA,Year))
TS_DATA %>% filter(County=='Lincoln') %>% pull(Migration) %>% plot()
TS_DATA %>% pull(Migration) %>% plot()
GRAPH_DATA <- TS_DATA %>% filter(!is.na(Migration))
GRAPH_DATA_LN <- TS_DATA %>% filter(!is.na(Migration),County=="Lincoln")
ggplot(GRAPH_DATA,aes(x=Year,y=Migration/Prev_Pop,group=County,color=County))+geom_point()+geom_line(data=GRAPH_DATA_LN)
TS_WIDE <- TS_DATA %>% mutate(Migration=Migration) %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>1990,Year<=2021) %>%ts(start=c(1991),frequency=1)
TS_DATA %>% filter(County=='Lincoln')
ST_YEAR <- 1970
LN <- TS_DATA %>% mutate(Migration=Migration) %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% dplyr::select(Lincoln,Year) %>% filter(Year>=ST_YEAR,Year<=2021) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR),frequency=1)
TS_DATA <- POP_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population))
ST_YEAR <- min(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
END_YEAR <- max(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
#GRAPH_DATA <- TS_DATA %>% filter(!is.na(Migration))
#GRAPH_DATA_LN <- TS_DATA %>% filter(!is.na(Migration),County=="Lincoln")
#ggplot(GRAPH_DATA,aes(x=Year,y=Migration/Prev_Pop,group=County,color=County))+geom_point()+geom_line(data=GRAPH_DATA_LN)
TS_WIDE <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>ST_YEAR+1,Year<=END_YEAR) %>%ts(start=c(ST_YEAR+1),frequency=1)
LN <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% dplyr::select(Lincoln,Year) %>% filter(Year>=ST_YEAR,Year<=END_YEAR) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR),frequency=1)
#Create an ARIMA of Migration so the number of people migrating can be simulated
library("forecast")
#Time series tests
adf.test(LN,k=1) #Stationary with one lag, otherwise not stationary
kpss.test(LN) #Stationary,default of program and has some model fit improvements
MOD <- auto.arima(LN,test="kpss",stationary=TRUE)
summary(MOD)
#library(tseries)
#adf.test(LN,k=1) #Stationary with one lag, otherwise not stationary
#kpss.test(LN) #Stationary,default of program and has some model fit improvements
MOD <- auto.arima(LN,stationary=TRUE)
#summary(MOD)
#Validity tests
autoplot(MOD)
acf(resid(MOD))
pacf(resid(MOD))
#autoplot(MOD)
#acf(resid(MOD))
#pacf(resid(MOD))
# adf.test(resid(MOD))
checkresiduals(MOD)
#checkresiduals(MOD)
#Save the resulting model outputs, will need to be changed if looking at other counties
saveRDS(MOD,"Data/Regression_Results/LN_ARIMA_MODEL.Rds")
lapply(1:10^6,function(x){round(simulate(MOD,future=TRUE, nsim=50))})#testing a multiple run simulation could use parallel process
MIGRATION_ARIMA_SIMS <- (do.call(cbind,mclapply(1:NUM_SIMULATIONS,function(x){as.numeric(round(simulate(MOD,future=TRUE, nsim=NUM_YEARS_PROJECTED)))},mc.cores =detectCores()-1)))#testing a multiple run simulation could use parallel process
saveRDS(MIGRATION_ARIMA_SIMS,"Data/Simulated_Data_Sets/Migration_ARIMA.Rds")
write.csv(MIGRATION_ARIMA_SIMS,row.names=FALSE,"Data/Simulated_Data_Sets/Migration_ARIMA.csv")
#################################Simulate data
MIG_AGE_DIST
NUM_SIMS<-34
PROB <- MIG_AGE_DIST
MIG_SIM <- round(simulate(MOD,future=TRUE, nsim=50)) #Round up for whole numbers
NUM_SIMS <- abs(MIG_SIM[[1]])
INCREASE <- MIG_SIM[[1]]/abs(MIG_SIM[[1]]) #Check if positive or negative migration, as these are handled differently
if(INCREASE==1){MF_SAMPLE <- sample(x=c("Male","Female"),NUM_SIMS,replace=TRUE)}
sample(x=1:90,size=NUM_SIMS,prob=PROB,replace=TRUE)
C_DEMO_DATA <- DEMOGRAPHIC_DATA %>% filter(County=='Lincoln',Year==max(Year))
NUM_MALE <- pull(C_DEMO_DATA ,"Num_Male")
NUM_FEMALE <- pull(C_DEMO_DATA,"Num_Female")
####WORKING ON THE CASE WHEN WE ARE REMOVING INDIVIDUALS. IF THERE ARE NONE THEY SHOULD NOT MOVE. ON THE OTHER HAND IF MOVING IN THE AVERAGE VALUES WORK
PROB <- MIG_AGE_DIST
LN_DEMO_DATA <- DEMOGRAPHIC_DATA %>% filter(County=='Lincoln',Year==max(Year))
LN_DEMO_DATA$Age
MAKE_SET <- function(PROB_AGE_DIST,DEMOGRAPHIC_DATA){
SINGLE_AGE_RET <- function(x,PROB,DEMO_DATA){
C_PROB<- PROB[x]
C_LOOP <- DEMO_DATA[x+1,]
NUM_MALE <- pull(C_LOOP,Num_Male)
NUM_FEMALE <- pull(C_LOOP,Num_Female)
C_AGE <- C_LOOP$Age
MEN <- cbind(rep(C_AGE,NUM_MALE),rep("Male",NUM_MALE),rep(C_PROB,NUM_MALE))
WOMEN <- cbind(rep(C_AGE,NUM_FEMALE),rep("Female",NUM_FEMALE),rep(C_PROB,NUM_FEMALE))
RES <- rbind(MEN,WOMEN)
# colnames(RES) <- c("Age","Sex","Probability")
return(RES)
}
FINAL_OUT <-do.call(rbind,sapply(1:84,function(x){SINGLE_AGE_RET(x,PROB_AGE_DIST,DEMOGRAPHIC_DATA) })) %>% as_tibble
colnames(FINAL_OUT ) <- c("Age","Sex","Probability")
FINAL_OUT$Age <- as.numeric(FINAL_OUT$Age)
return(FINAL_OUT)
}
#A function to find the number of migrants leaving the county (net out), accounting for the fact that fewer or more people in any one age-sex bracket will decrease the odds of being the person to leave even if they are 18-19 and likely to leave.
OUT_MIGRATION_SIMULATION <- function(NUM_MIGRATED,MIG_AGE_DIST,DEMO_DATA){
CURRENT_POP <- MAKE_SET(MIG_AGE_DIST,DEMO_DATA)
NUM_POP <- nrow(CURRENT_POP)
#Rank all individuals to easily combine with the ordinal data set
ORD <- sample(1:NUM_POP,prob=pull(CURRENT_POP,Probablity),size=NUM_POP,replace=FALSE)
#Set the migration out status of all individuals to zero (staying in the county)
CURRENT_POP$Migrated <- 0
#The people drawn first are assumed to migrate up to the point where all migration is filled.
CURRENT_POP[ORD[1:NUM_MIGRATED],"Migrated"] <- -1
MIG_STATUS <- CURRENT_POP %>% group_by(Age,Sex) %>% summarize(Migrated=sum(Migrated)) %>% arrange(desc(Sex),Age) %>% ungroup
return(MIG_STATUS)
}
OUT_MIGRATION_SIMULATION(100,MIG_AGE_DIST,LN_DEMO_DATA)
#Function to find the number of migrants to a county, by age-sex when coming from outside the county.
IN_MIGRATION_SIMULATION <- function(NUM_MIGRATED,MIG_AGE_DIST,DEMO_DATA){
EMPTY_SET <- cbind(rep(1:90,2), c(rep("Male",90),rep("Female",90)),rep(0,180))
MIGRATED_AGE <- sample(1:90,prob=MIG_AGE_DIST,size=NUM_MIGRATED,replace=TRUE)
MIGRATED_SEX <- sample(c("Male","Female"),size=NUM_MIGRATED,replace=TRUE)
MIGRATED_GROUP <- cbind(MIGRATED_AGE,MIGRATED_SEX,rep(1,NUM_MIGRATED))
MIGRATED_DATA <- rbind(EMPTY_SET,MIGRATED_GROUP) %>% as_tibble
colnames(MIGRATED_DATA) <- c("Age","Sex","Migration")
MIGRATED_DATA$Age <- as.numeric(MIGRATED_DATA$Age)
MIGRATED_DATA$Migration <- as.numeric(MIGRATED_DATA$Migration)
MIGRATED_DATA <- MIGRATED_DATA %>% group_by(Age,Sex) %>% summarize(Migration=sum(Migration)) %>% ungroup %>% arrange(desc(Sex),Age)
return(MIGRATED_DATA)
}
IN_MIGRATION_SIMULATION(100,MIG_AGE_DIST,LN_DEMO_DATA)

View File

@ -0,0 +1,71 @@
#################################Simulate data
#A function used only inside the OUT_MIGRATION_SIMULATION function (see below). This creates a data set which has one entry for every individual in the county, with a assigned probability of moving out of the county. This allows the migration simulation to extract individuals based on true population distribution. When migration is net positive this does not matter, as anyone age-sex group can move in.
#DEMOGRAPHIC_DATA <- DEMO_DATA
#DEMOGRAPHIC_DATA <- START_DEM_DATA
MAKE_SET <- function(PROB_AGE_DIST,DEMOGRAPHIC_DATA){
# if(nrow(DEMOGRAPHIC_DATA)==86){DEMOGRAPHIC_DATA<- DEMOGRAPHIC_DATA[-1,]} #Drop age zero if it was included
SINGLE_AGE_RET <- function(x,PROB,DEMO_DATA){
C_PROB<- PROB[x]
C_LOOP <- DEMO_DATA[x+1,]
NUM_MALE <- pull(C_LOOP,Num_Male)
NUM_FEMALE <- pull(C_LOOP,Num_Female)
C_AGE <- C_LOOP$Age
MEN <- cbind(rep(C_AGE,NUM_MALE),rep("Male",NUM_MALE),rep(C_PROB,NUM_MALE))
WOMEN <- cbind(rep(C_AGE,NUM_FEMALE),rep("Female",NUM_FEMALE),rep(C_PROB,NUM_FEMALE))
RES <- rbind(MEN,WOMEN)
# colnames(RES) <- c("Age","Sex","Probability")
return(RES)
}
FINAL_OUT <-do.call(rbind,sapply(1:85,function(x){SINGLE_AGE_RET(x,PROB_AGE_DIST,DEMOGRAPHIC_DATA) })) %>% as_tibble
colnames(FINAL_OUT ) <- c("Age","Sex","Probability")
FINAL_OUT$Age <- as.numeric(FINAL_OUT$Age)
return(FINAL_OUT)
}
#A function to find the number of migrants leaving the county (net out), accounting for the fact that fewer or more people in any one age-sex bracket will decrease the odds of being the person to leave even if they are 18-19 and likely to leave.
OUT_MIGRATION_SIMULATION <- function(NUM_MIGRATED,MIG_AGE_DIST,DEMO_DATA){
CURRENT_POP <- MAKE_SET(MIG_AGE_DIST,DEMO_DATA)
NUM_POP <- nrow(CURRENT_POP)
#Rank all individuals to easily combine with the ordinal data set
ORD <- sample(1:NUM_POP,prob=pull(CURRENT_POP,Probability),size=NUM_POP,replace=FALSE)
#Set the migration out status of all individuals to zero (staying in the county)
CURRENT_POP$Migrated <- 0
#The people drawn first are assumed to migrate up to the point where all migration is filled.
CURRENT_POP[ORD[1:NUM_MIGRATED],"Migrated"] <- -1
MIG_STATUS <- CURRENT_POP %>% group_by(Age,Sex) %>% summarize(Migrated=sum(Migrated)) %>% arrange(desc(Sex),Age) %>% ungroup
return(MIG_STATUS)
}
#Function to find the number of migrants to a county, by age-sex when coming from outside the county.
#NUM_MIGRATED <- TOTAL_MIGRATION;
IN_MIGRATION_SIMULATION <- function(NUM_MIGRATED,MIG_AGE_DIST){
NUM_MIGRATED <- abs(NUM_MIGRATED)
NUM_AGES <- 85
EMPTY_SET <- cbind(rep(1:NUM_AGES,2), c(rep("Male",NUM_AGES),rep("Female",NUM_AGES)),rep(0,2*NUM_AGES))
colnames(EMPTY_SET) <- c("Age","Sex","Migrated")
if(NUM_MIGRATED==0){return(EMPTY_SET)}
MIGRATED_AGE <- sample(1:NUM_AGES,prob=MIG_AGE_DIST,size=NUM_MIGRATED,replace=TRUE)
MIGRATED_SEX <- sample(c("Male","Female"),size=NUM_MIGRATED,replace=TRUE)
MIGRATED_GROUP <- cbind(MIGRATED_AGE,MIGRATED_SEX,rep(1,NUM_MIGRATED))
MIGRATED_DATA <- rbind(EMPTY_SET,MIGRATED_GROUP) %>% as_tibble
colnames(MIGRATED_DATA) <- c("Age","Sex","Migrated")
MIGRATED_DATA$Age <- as.numeric(MIGRATED_DATA$Age)
MIGRATED_DATA$Migrated <- as.numeric(MIGRATED_DATA$Migrated)
MIGRATED_DATA <- MIGRATED_DATA %>% group_by(Age,Sex) %>% summarize(Migrated=sum(Migrated)) %>% ungroup %>% arrange(desc(Sex),Age)
return(MIGRATED_DATA)
}
# MIG_AGE_DIST <- Migration_Age_Distribution;DEMO_DATA <- START_DETAILED_DATA;NET_MIGRATION <- TOTAL_MIGRATION
MIGRATION_SIMULATION <- function(MIG_AGE_DIST,DEMO_DATA,NET_MIGRATION){
#MIG_AGE_DIST: A distribution of probabilities of any one age of person to migrate to or out of a county used to grab a reasonable rate of in/out migration by age from a total migration number
#DEMO_DATA: A data set which contains the number of people in each sex-age category (ex Male 30, or Female 82)
#TOTAL_MIGRATION_ARIMA_SIM: A single simulated path of a ARIMA simulation of net migration in the county.
#YEARS_AHEAD_OF_SIMULATION: The number of years forward from the start of the ARIMA to extract as total migration.
if(NET_MIGRATION<0){
RES <- OUT_MIGRATION_SIMULATION(abs(NET_MIGRATION),MIG_AGE_DIST,DEMO_DATA)
} else{
RES <- IN_MIGRATION_SIMULATION(NET_MIGRATION,MIG_AGE_DIST)}
return(RES %>% pivot_wider(values_from=Migrated,names_from=Sex,names_prefix="Num_"))
}

View File

@ -1,13 +1,15 @@
#Uncomment to check the function line by line
#REG_BIRTH_MODEL=MOD_BIRTHS;START_BASIC_DATA=FIRST_PREDICT_YEAR_POPULATION_DATA;START_DETAILED_DATA=START_DEM_DATA;Mortality_Rate_Sim=MORTALITY_SIMULATION;SIM_NUMBER=1;START_OF_SIM_YEAR=2023
# REG_BIRTH_MODEL=MOD_BIRTHS;START_BASIC_DATA=FIRST_PREDICT_YEAR_POPULATION_DATA;START_DETAILED_DATA=START_DEM_DATA;Mortality_Rate_Sim=MORTALITY_SIMULATION;SIM_NUMBER=1;START_OF_SIM_YEAR=2023;Migration_ARIMA_Sim=MIGRATION_ARIMA_SIMULATION;Migration_Age_Distribution=MIG_AGE_DIST
RUN_SINGLE_SIM <- function(REG_BIRTH_MODEL,START_BASIC_DATA,START_DETAILED_DATA,Mortality_Rate_Sim,SIM_NUMBER,START_OF_SIM_YEAR=2023){
RUN_SINGLE_SIM <- function(REG_BIRTH_MODEL,START_BASIC_DATA,START_DETAILED_DATA,Mortality_Rate_Sim,SIM_NUMBER,START_OF_SIM_YEAR=2023,Migration_ARIMA_Sim,Migration_Age_Distribution ){
#REG_BIRTH_MODEL: Feols regression object of population model.
#START_BASIC_DATA: A single row of data, with information for the birth regression (Male_Birth_Group,PREV_BIRTH etc.)
#START_DETAILED_DATA: A data set, with the number of men and women at each individual age (zero to 85+)
#Mortality_Rate_Sim: A list object with a set of project future mortality rates by age. See ./Scripts/Mortality_Rate_Over_Time_Simulation_Function.r. By passing this in the simulation speed is increased significantly.
#SIM_NUMBER: The current Monte Carlo simulation being applied. This extracts the correct index of Mortality_Rate_Sim Object for the present simulation.
#START_OF_SIM_YEAR: This is the first year of data which requires a simulation. This allows for the time trend to be properly estimated as this depends on the number of years since the forecast began
#The simulates paths of of net migration using an ARIMA model, from the start year until the end year.
#Migration_Age_Distribution: A vector which has entries from age 1 to 90 of the assessed probability of migrating in a single year, for each net migrant in the county.
NEXT_BASIC_DATA <- START_BASIC_DATA #Create a data set for the data to feed into the next run.
C_BIRTHS <- BIRTH_SIM(REG_BIRTH_MODEL,START_BASIC_DATA)
NEXT_BASIC_DATA[,"PREV_TWO_BIRTH"] <- START_BASIC_DATA[,"PREV_BIRTH"]
@ -24,7 +26,12 @@ RUN_SINGLE_SIM <- function(REG_BIRTH_MODEL,START_BASIC_DATA,START_DETAILED_DATA,
#Run a preliminary Monte Carlo which estimates the future mortality rate, for each simulation and year of of Monte Carlo Simulation
YEARS_AHEAD <- max(START_BASIC_DATA[,'Year']-START_OF_SIM_YEAR,1) #Define the number of years forward from the simulation start based on the current year of analysis, and the user provided first year.
MORTALITY_C_RES <- MORTALITY_SIM(YEARS_AHEAD,SIM_NUMBER,START_DETAILED_DATA,FALSE,Mortality_Rate_Sim )
TOTAL_MIGRATION <- Migration_ARIMA_Sim[YEARS_AHEAD,SIM_NUMBER]
START_BASIC_DATA[,"Migration"] <- TOTAL_MIGRATION
MIG <- MIGRATION_SIMULATION(Migration_Age_Distribution,START_DETAILED_DATA,TOTAL_MIGRATION)
MORTALITY_INPUT_DETAILED_DATA <- START_DETAILED_DATA
MORTALITY_INPUT_DETAILED_DATA[-1,c("Num_Male","Num_Female")] <- MORTALITY_INPUT_DETAILED_DATA[-1,c("Num_Male","Num_Female")]+MIG[,c("Num_Male","Num_Female")]
MORTALITY_C_RES <- MORTALITY_SIM(YEARS_AHEAD,SIM_NUMBER,MORTALITY_INPUT_DETAILED_DATA,FALSE,Mortality_Rate_Sim )
#Update number of deaths in the current run (which should be blank when supplied to the function)
START_BASIC_DATA[,"Deaths"] <- sum(MORTALITY_C_RES[,c("Male_Deaths","Female_Deaths")] )
#