Population_Study/Scripts/Load_Custom_Functions/Migration_Simulation_Functions.r
2025-11-18 17:19:41 -07:00

105 lines
9.5 KiB
R

#Script to increment the migration, and death data to start simulation in 2024, projecting 2025
library(tidyverse)
#setwd("../../")
#When migrants are leaving the area of study the current distention of ages and sex needs to be accounted for because people cannot leave who do not already live in the area. As a result two functions are made one looks at cases when the net migration is out, and the other looks at net migration in. In the later the identified distention of migrants by age is used with no direct tie to the current demographics
#Function for net migration out of the area. Returns a new age-sex demographics table matching the input demographic data format
MIGRATED_OUT <- function(DEMOGRAPHIC_DATASET,NUM_MIGRATED_OUT,ODDS_MIGRATED){
POP_TOTAL <- sum(DEMOGRAPHIC_DATASET)
if(POP_TOTAL>NUM_MIGRATED_OUT){
#Populate a matrix used to draw from with sample. One entry is included for each person in the demographic set.
RES_MAT <- matrix(NA,nrow=POP_TOTAL,ncol=2)
#
RES_MAT[,2] <- c(rep(1,sum(DEMOGRAPHIC_DATASET[,1])),rep(2,sum(DEMOGRAPHIC_DATASET[,2])))
DEM_LIST <- as.vector(DEMOGRAPHIC_DATASET)
#To speed up the process keep everything as a matrix or vector. In this case the odds of migrating by age are repeated for men and women, because sex was not found to be a major factor in migration odds.
ODDS_SET <- rep(ODDS_MIGRATED,2)
ODDS_RES <- matrix(NA,ncol=1,nrow=POP_TOTAL) #To store a list of odds for each person, to feed to sample
#There ages 0 to 85 for men and women is 172 rows
for(i in 1:172){
#Populate from the first entry that is NA
ST <- min(which(is.na(RES_MAT)))
NUM_REP <- DEM_LIST[i]
#Skip if there are zero people in the group
if(NUM_REP>0){
#Populate a entry that stores the index (Age 1-86 becomes 0-85, and Sex either 1 or 2). Using the index is done to speed up computation
RES_MAT[ST:(NUM_REP+ST-1)] <- rep(i,NUM_REP )
ODDS_RES[ST:(NUM_REP+ST-1)] <- rep(ODDS_SET[i],NUM_REP)
}
}
#Because the index of the matrix is stored a entry of 87 in the first column is girl age zero. User some logic to convert this to actual age, men are unchanged women need to be shifted down
RES_MAT[,1] <- RES_MAT[,1]-86*(RES_MAT[,2]-1)
#Using the matrix of each individual sample who WILL NOT MIGRATE OUT. To do this the odds are reversed, and the number sampled is the total population minus the net leaving migration. The remaining individuals out is created because this makes it easier to return an output of current demographics after migration.
NOT_MIGRATED <- RES_MAT[sample(1:nrow(RES_MAT),POP_TOTAL-NUM_MIGRATED_OUT,prob=1-ODDS_RES,replace=FALSE),]
#Corner case error found that if there is exactly one row, a vector is created and cannot be named. This case is manually corrected
if(POP_TOTAL-NUM_MIGRATED_OUT==1){
#Use the single row output to maniple a one row matrix
OUT <- as.matrix(t(NOT_MIGRATED),nrow=1,ncol=2)
rownames(OUT) <- OUT[1]-1 #The row index is one higher than the age for example age zero is index one. Naming based on age
MALE_FEMALE <- OUT[2] #There is only either a single man or women. Identify if they are male or female to properly create the row adding zero for the other value
OUT[MALE_FEMALE] <- 1 #The value of the sex indicating column should be one for the single person selected
OUT[-MALE_FEMALE] <- 0 #The other column should be set to zero as there is only one person.
OUT[1] <- 1 #This is currently the age of the person (+1 as an index) but should be the number of people to stay which in this case must be one.
}else{
#In most cases (number of people left >1) a matrix should be created which pulls a single entry of age and sex for each person who stays in the region after net out migration occurs.
OUT <- table(NOT_MIGRATED[,1],NOT_MIGRATED[,2]) #This creates a table with the number of unique entries for each age-sex combination to report.
#A corner case error was identified if one of the sex's has no population. For example only Men of age 20 exist. In that case only one column for sex is generated from the "table" command. The missing column is added back with values of zero to match the output of all other possible combinations. This allows the simulation to run without requiring logic to adjust the output demotions.
if(ncol(OUT)==1){
NEW_COL_INDEX <- as.character(which(!(c(1,2) %in% colnames(OUT) ))) #Find whether 1 or 2 (men or women) need to be added back as a column
OUT <- cbind(OUT,rep(0,nrow(OUT))) #Add values of zero in the second column. We do not yet know if this is men or women, but one of the two must be zero to reach this part of the code
colnames(OUT) <- c(colnames(OUT)[1],NEW_COL_INDEX) #Rename the columns such that they match the correct male or female record. In the case where women are left the columns are out of order, but the column names are now correct.
OUT <- OUT[,c("1","2")] #Make sure the columns are selected in a order consistent with all other outputs. This is redundant if correct, but flips the order to be correct when only women are left.
}
#
rownames(OUT) <- as.numeric(rownames(OUT))-1 #The row names are currently an index ranging from 1:86 but should be ages 0 to 85. So subtract one.
}
colnames(OUT) <- c("Num_Male","Num_Female" )
#Identify if one of the age brackets has no values. If that is the case it will not show up in the output matrix, but should be listed in the demographics as zero men and zero women to make the row numbers of each output identical making downstream result management much easier.
if(nrow(OUT)<86){
ZERO_ROWS <- 0:85
ZERO_ROWS <- ZERO_ROWS[!(0:85 %in% rownames(OUT))] #Find which ages are missing from the output
OUT <- rbind(OUT,matrix(0,nrow=length(ZERO_ROWS),ncol=2,dimnames=list(ZERO_ROWS,colnames(OUT)))) #Create a new matrix with zeros for the missing ages
OUT <- OUT[as.character(sort(as.numeric(rownames(OUT)))),] #Combine the zero matrix with the matrix to the main simulated result, and then sort based on the row name to make sure the ages are lined up correctly 0 to 85 so that each output is identical and can be added or subtracted properly.
}
} else{
OUT <- matrix(0,nrow=86,ncol=2,dimnames=list(0:85,colnames(DEMOGRAPHIC_DATASET)))} #In the corner case that more people leave the area than present live there set all values to zero and return this in the same format as any other output.
return(OUT)
}
#Function to account for net migration into the county. Returns a new age-sex demographics table matching the input demographic data format
MIGRATED_IN <- function(DEMOGRAPHIC_DATASET,NUM_MIGRATED_IN,ODDS_MIGRATED){
#Based on historic data sample possible migration additions by age with higher probabilities being assigned to ages such as 19 and 20, than 80 or 85. Also randomly draw if the person moving in is a man or women with a 50/50 split.
#Using table to find the total number of each age sex combination from this simulation.
OUT <- table(sample(0:85,NUM_MIGRATED_IN,prob=ODDS_MIGRATED,replace=TRUE),sample(1:2,NUM_MIGRATED_IN,replace=TRUE))
#OUT_ORIG <- OUT
#OUT <- OUT_ORIG
#A corner case error was identified if one of the sex's has no population. For example only Men of age 20 exist. In that case only one column for sex is generated from the "table" command. The missing column is added back with values of zero to match the output of all other possible combinations. This allows the simulation to run without requiring logic to adjust the output demotions.
if(ncol(OUT)==1){
NEW_COL_INDEX <- as.character(which(!(c(1,2) %in% colnames(OUT) ))) #Find whether 1 or 2 (men or women) need to be added back as a column
OUT <- cbind(OUT,rep(0,nrow(OUT))) #Add values of zero in the second column. We do not yet know if this is men or women, but one of the two must be zero to reach this part of the code
colnames(OUT) <- c(colnames(OUT)[1],NEW_COL_INDEX) #Rename the columns such that they match the correct male or female record. In the case where women are left the columns are out of order, but the column names are now correct.
ROW_NAME <- rownames(OUT)
if(nrow(OUT)==1){OUT <- t(OUT[1,c("1","2")])} #Make sure the columns are selected in a order consistent with all other outputs. This is redundant if correct, but flips the order to be correct when only women are left.
rownames(OUT) <- ROW_NAME
}
#Pull the correct rows of the demographics table using the row names. The names are a character set going from 0-85 ages, while the index is 1:86, so make sure to use the charter names. Add these new immigrants to the existing demographics of age-sex combinations.
DEMOGRAPHIC_DATASET[as.character(rownames(OUT)),] <- DEMOGRAPHIC_DATASET[as.character(rownames(OUT)),]+OUT
return(DEMOGRAPHIC_DATASET)
}
#Combine both functions for easier coding later on, allowing one function to be called in all scenarios
DEMOGRAPHICS_AFTER_MIGRATION <- function(DEMOGRAPHIC_DATASET,NUM_MIGRATED,ODDS_MIGRATED){
if(NUM_MIGRATED==0){return(DEMOGRAPHIC_DATASET)} #If there are no changes return the input. This allows for easier code that does not need to check if zero migration occurs on the back end.
if(NUM_MIGRATED>0){return(MIGRATED_IN(DEMOGRAPHIC_DATASET,NUM_MIGRATED,ODDS_MIGRATED))}else{return(MIGRATED_OUT(DEMOGRAPHIC_DATASET,abs(NUM_MIGRATED),ODDS_MIGRATED))}#Decide the correct function to call depending on if net migration is positive or negative
}
#Test results
#ODDS_LEAVE <- readRDS("Data/Intermediate_Inputs/Migration_Trends/Migration_Age_Probability_Zero_to_85.Rds")
#KEM_DEMOGRAPHIC <- readRDS("Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2023_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds")
#DEMOGRAPHIC_DATASET <- KEM_DEMOGRAPHIC
#MIGRATED_IN(KEM_DEMOGRAPHIC,3,ODDS_LEAVE)-KEM_DEMOGRAPHIC