Complete Migration function. Simulate 2024 data

This commit is contained in:
Alex Gebben Work 2025-11-18 17:19:41 -07:00
parent ef3e529063
commit d84d6069bd
6 changed files with 143 additions and 147 deletions

View File

@ -8,8 +8,6 @@ Rscript "./Scripts/2A_Birth_Rate_Regression_and_Impart_Kemmerer_Births.r"
Rscript "./Scripts/2B_Impart_Deaths_and_Migration_to_Subregions.r" Rscript "./Scripts/2B_Impart_Deaths_and_Migration_to_Subregions.r"
Rscript "./Scripts/2C_Migration_by_Age_Regression.r" Rscript "./Scripts/2C_Migration_by_Age_Regression.r"
Rscript "./Scripts/2D_Overall_Migration_ARIMA.r" Rscript "./Scripts/2D_Overall_Migration_ARIMA.r"
Rscript "./Scripts/2E_Move_Current_Demographic_Data_to_Current_Year.r"
#Produce final results for either the simulation, or information for the report, but not anything used in later stages of the simulation #Produce final results for either the simulation, or information for the report, but not anything used in later stages of the simulation
Rscript "./Scripts/3A_Population_Pyramid.r" Rscript "./Scripts/3A_Population_Pyramid.r"

View File

@ -141,7 +141,7 @@ ST_REG_DATA <- REG_REDUCED_DATA %>% filter(Region=='Lincoln') %>% filter(Year==m
saveRDS(REG_REDUCED_DATA,paste0(SAVE_DATA_LOC,"/Birth_Regression_Input_Data.Rds")) saveRDS(REG_REDUCED_DATA,paste0(SAVE_DATA_LOC,"/Birth_Regression_Input_Data.Rds"))
write_csv(REG_REDUCED_DATA,paste0(SAVE_DATA_LOC,"/Birth_Regression_Input_Data.csv")) write_csv(REG_REDUCED_DATA,paste0(SAVE_DATA_LOC,"/Birth_Regression_Input_Data.csv"))
##########Update existing demogrpahic data with new births ##########Update existing demographic data with new births
KEM_DATA_NEW <- REG_DATA %>% filter(Region=='Kemmerer & Diamondville') %>% select(Year,County,Region,Population,Births,Deaths,Migration) KEM_DATA_NEW <- REG_DATA %>% filter(Region=='Kemmerer & Diamondville') %>% select(Year,County,Region,Population,Births,Deaths,Migration)
PRED_KEM_BIRTH_2024 <- REG_DATA %>% filter(Region=='Kemmerer & Diamondville',Year>=2023) %>% mutate(Min_Birth_Group=lag(Min_Birth_Group)) %>% filter(Year==2024) PRED_KEM_BIRTH_2024 <- REG_DATA %>% filter(Region=='Kemmerer & Diamondville',Year>=2023) %>% mutate(Min_Birth_Group=lag(Min_Birth_Group)) %>% filter(Year==2024)
KEM_DATA_NEW[KEM_DATA_NEW$Year==2024,"Births"] <- round(exp(predict(MOD_BIRTHS,PRED_KEM_BIRTH_2024))) KEM_DATA_NEW[KEM_DATA_NEW$Year==2024,"Births"] <- round(exp(predict(MOD_BIRTHS,PRED_KEM_BIRTH_2024)))

View File

@ -35,7 +35,7 @@ LIN_POP <- LIN_POP %>% left_join(LIN_PRED) %>% mutate(Deaths=ifelse(is.na(Deat
KEM_POP <- readRDS(KEM_POP_LOC) KEM_POP <- readRDS(KEM_POP_LOC)
KEM_DEATHS <- LIN_POP %>% select(Year,Deaths) %>% left_join(Death_Adj) %>% left_join(KEM_PRED) %>% mutate(Deaths=round(Deaths*Kem_Death_Ratio)) %>% select(Year,Deaths) KEM_DEATHS <- LIN_POP %>% select(Year,Deaths) %>% left_join(Death_Adj) %>% left_join(KEM_PRED) %>% mutate(Deaths=round(Deaths*Kem_Death_Ratio)) %>% select(Year,Deaths)
KEM_POP <- KEM_POP%>% select(-Deaths) %>% left_join(KEM_DEATHS) %>% mutate(BD=Population+Births-Deaths) %>% mutate(Missing=Population-lag(BD),Missing=lead(Missing),Migration=ifelse(is.na(Migration),Missing,Migration))%>% mutate(NEXT=Population+Births-Deaths+Migration)%>% arrange(desc(Year)) %>% select(colnames(LIN_POP)) KEM_POP <- KEM_POP%>% select(-Deaths) %>% left_join(KEM_DEATHS) %>% mutate(BD=Population+Births-Deaths) %>% mutate(Missing=Population-lag(BD),Missing=lead(Missing),Migration=ifelse(is.na(Migration),Missing,Migration))%>% mutate(NEXT=Population+Births-Deaths+Migration)%>% arrange(desc(Year)) %>% select(colnames(LIN_POP))
#Rond up any values to match the fact that only whole people can exists #Round up any values to match the fact that only whole people can exists
KEM_POP$Population <- round(KEM_POP$Population) KEM_POP$Population <- round(KEM_POP$Population)
###Estimate the number of deaths in the other parts of Lincoln (not Kemmerer) based on the total deaths, and the predicted share of deaths ###Estimate the number of deaths in the other parts of Lincoln (not Kemmerer) based on the total deaths, and the predicted share of deaths

View File

@ -106,5 +106,5 @@ MIG_AGE_DIST <- MIG_AGE_DIST/sum(MIG_AGE_DIST)
if(!exists("RES_SAVE_LOC")){RES_SAVE_LOC <- "./Data/Intermediate_Inputs/Migration_Trends/"} if(!exists("RES_SAVE_LOC")){RES_SAVE_LOC <- "./Data/Intermediate_Inputs/Migration_Trends/"}
dir.create(RES_SAVE_LOC , recursive = TRUE, showWarnings = FALSE) dir.create(RES_SAVE_LOC , recursive = TRUE, showWarnings = FALSE)
write.csv(MIG_AGE_DIST ,paste0(RES_SAVE_LOC,"Migration_Age_Probablity_Zero_to_85.csv"),row.names=FALSE) write.csv(MIG_AGE_DIST ,paste0(RES_SAVE_LOC,"Migration_Age_Probability_Zero_to_85.csv"),row.names=FALSE)
saveRDS(MIG_AGE_DIST ,paste0(RES_SAVE_LOC,"Migration_Age_Probablity_Zero_to_85.Rds")) saveRDS(MIG_AGE_DIST ,paste0(RES_SAVE_LOC,"Migration_Age_Probability_Zero_to_85.Rds"))

View File

@ -1,79 +1,49 @@
library(tidyverse) library(tidyverse)
library(parallel)
#setwd("../") #setwd("../")
#Script to increment the migration, and death data to start simulation in 2024, projecting 2025 #Script to increment the demographic data by age and sex up from 2023 to 2024 using the birth,migration, and death estiamtes in 2023. This is done to start the projection of the Monte Carlo in 2025. Since the actual population is known in 2024 (but not the deaths, births and migration. It is worth estimating these to keep more precise data matching migration to match the population
ODDS_LEAVE <- readRDS("Data/Intermediate_Inputs/Migration_Trends/Migration_Age_Probablity_Zero_to_85.Rds") ODDS_LEAVE <- readRDS("Data/Intermediate_Inputs/Migration_Trends/Migration_Age_Probability_Zero_to_85.Rds")
KEM_DEMOGRAPHIC <- readRDS("Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2023_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds") KEM_DEMOGRAPHIC <- readRDS("Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2023_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds")
NUM_MIGRATED_OUT<- readRDS("Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds") %>% filter(Year==2023) %>% pull(Migration) %>% abs POPULATION <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds") %>% filter(Year>=2023)
PRED_BIRTH <- POPULATION %>% filter(Year==2024) %>% pull(Births)
MORT <- readRDS("Data/Cleaned_Data/Mortality_Rate_Data/RDS/Lincoln_County_Mortality_Rates.Rds")
source("Scripts/Load_Custom_Functions/Migration_Simulation_Functions.r")
KEM_DEMOGRAPHIC["84",] <- colSums(KEM_DEMOGRAPHIC[c("84","85"),])
KEM_DEMOGRAPHIC <- KEM_DEMOGRAPHIC[1:85,]
#Manually add estimated new births 31 total, small assumption made that 15 are boys and 16 are girls.
KEM_DEMOGRAPHIC <- rbind(c(15,15),KEM_DEMOGRAPHIC )
rownames(KEM_DEMOGRAPHIC) <- 0:85
INDEX <- as.numeric(rownames(KEM_DEMOGRAPHIC))
MIGRATED_OUT <- function(KEM_CURRENT,NUM_MIGRATED_OUT,ODDS_MIGRATED){ for(i in INDEX){
POP_TOTAL <- sum(KEM_CURRENT) C_VAL <- MORT %>% filter(Min_Age<=i,Max_Age>=i) %>% select(Sex,Death_Rate) %>% pivot_wider(values_from=Death_Rate,names_from=Sex,names_prefix="Death_Rate_")
if(POP_TOTAL>NUM_MIGRATED_OUT){ if(i==0){RES <-C_VAL}else{RES <- rbind(RES,C_VAL)}
RES_MAT <- matrix(NA,nrow=POP_TOTAL,ncol=2)
RES_MAT[,2] <- c(rep(1,sum(KEM_CURRENT[,1])),rep(2,sum(KEM_CURRENT[,2])))
DEM_LIST <- as.vector(KEM_CURRENT)
ODDS_SET <- rep(ODDS_MIGRATED,2)
ODDS_RES <- matrix(NA,ncol=1,nrow=POP_TOTAL)
for(i in 1:172){
ST <- min(which(is.na(RES_MAT)))
NUM_REP <- DEM_LIST[i]
if(NUM_REP>0){
RES_MAT[ST:(NUM_REP+ST-1)] <- rep(i,NUM_REP )
ODDS_RES[ST:(NUM_REP+ST-1)] <- rep(ODDS_SET[i],NUM_REP)
}
}
RES_MAT[,1] <- RES_MAT[,1]-86*(RES_MAT[,2]-1)
NOT_MIGRATED <- RES_MAT[sample(1:nrow(RES_MAT),POP_TOTAL-NUM_MIGRATED_OUT,prob=1-ODDS_RES,replace=FALSE),]
if(POP_TOTAL-NUM_MIGRATED_OUT==1){
OUT <- as.matrix(t(NOT_MIGRATED),nrow=1,ncol=2)
rownames(OUT) <- OUT[1]-1
MALE_FEMALE <- OUT[2]
OUT[MALE_FEMALE] <- 1
OUT[-MALE_FEMALE] <- 0
OUT[1] <- 1
}else{
OUT <- table(NOT_MIGRATED[,1],NOT_MIGRATED[,2])
if(ncol(OUT)==1){
OUT_ORIG <- OUT
NEW_COL_INDEX <- as.character(which(!(c(1,2) %in% colnames(OUT) )))
OUT <- cbind(OUT,rep(0,nrow(OUT)))
colnames(OUT) <- c(colnames(OUT)[1],NEW_COL_INDEX)
OUT <- OUT[,c("1","2")]
}
rownames(OUT) <- as.numeric(rownames(OUT))-1
}
colnames(OUT) <- c("Num_Male","Num_Female" )
if(nrow(OUT)<86){
ZERO_ROWS <- 0:85
ZERO_ROWS <- ZERO_ROWS[!(0:85 %in% rownames(OUT))]
OUT <- rbind(OUT,matrix(0,nrow=length(ZERO_ROWS),ncol=2,dimnames=list(ZERO_ROWS,colnames(OUT))))
OUT <- OUT[as.character(sort(as.numeric(rownames(OUT)))),]
}
} else{
OUT <- matrix(0,nrow=86,ncol=2,dimnames=list(0:85,colnames(KEM_CURRENT)))}
return(OUT)
} }
ODDS_LEAVE <- readRDS("Data/Intermediate_Inputs/Migration_Trends/Migration_Age_Probablity_Zero_to_85.Rds") RES <- as.matrix(RES,ncol=2)
KEM_DEMOGRAPHIC <- readRDS("Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2023_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds") NUM_DEATHS <- round(sum(KEM_DEMOGRAPHIC*RES))
NUM_MIGRATED_OUT<- readRDS("Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds") %>% filter(Year==2023) %>% pull(Migration) %>% abs sum(round(KEM_DEMOGRAPHIC*RES+0.204))
DEATH_MAT <- round(KEM_DEMOGRAPHIC*RES+0.205) #Manually Manipulated factor to make deaths round up to the desired number (27), however, only 30 or 25 is possible, so it needs one more adjustment
DEATH_MAT[67:69,"Num_Female"] <- 0
sum(DEATH_MAT)
KEM_DEMOGRAPHIC <- KEM_DEMOGRAPHIC - DEATH_MAT
############Apply a migration out of 33 people in Kemmerer
source("Scripts/Load_Custom_Functions/Migration_Simulation_Functions.r")
#Run many simulations to pick a plausible distribution of people leaving
NUM_RUNS <- 10^7
#In Parallel run the migration simulation many times, to find a distribution of migration given 2023 data.
#Find the average number of migrants leaving by age-sex using the Reduce function to collapse the list, and then dividing by number of runs selected.
RES <- Reduce('+',mclapply(1:NUM_RUNS,function(x){return(KEM_DEMOGRAPHIC-DEMOGRAPHICS_AFTER_MIGRATION(KEM_DEMOGRAPHIC,-33,ODDS_LEAVE)) },mc.cores = detectCores()-1))/NUM_RUNS
#Add a factor such that the sum of all migration adds up to the desired 33. The average found should sum to 33, but when rounding is zero since each sub category sex-age is less than 50% chance of a migration
FACTOR <- 0.225
33-sum(round(RES+FACTOR))
KEM_DEMOGRAPHIC_NEW <- KEM_DEMOGRAPHIC-round(RES+FACTOR)
saveRDS(KEM_DEMOGRAPHIC_NEW,"Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2024_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds")
TEST <- KEM_DEMOGRAPHIC_NEW -KEM_DEMOGRAPHIC
##Perhaps I can be more clever. I could average the direct simulation estimate as done above, with the Kemmerer values found when estimating the other region, and then subtracting from the Lincoln Total.
OTHER_DEMOGRAPHIC_NEW <- readRDS("Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2024_Starting_Lincoln_County_Demographics_Matrix.Rds")-KEM_DEMOGRAPHIC_NEW
saveRDS(OTHER_DEMOGRAPHIC_NEW ,"Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2024_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds")
sum(KEM_DEMOGRAPHIC_NEW)
POPULATION
start_time <- Sys.time()
for(x in 1:round(10^6/23)){MIGRATED_OUT(KEM_DEMOGRAPHIC,NUM_MIGRATED_OUT,ODDS_LEAVE)}
end_time <- Sys.time()
end_time - start_time
head(KEM_CURRENT)
LENGTH <- length(KEM_CURRENT)
for(x in 1:10^6){
KEM_NEW <- KEM_CURRENT
for(i in 1:num_migrated){
MIGRATED <- sample(1:LENGTH,1,prob=1-(1-ODDS_MIGRATE)^KEM_CURRENT)
KEM_NEW[MIGRATED]
KEM_NEW[MIGRATED] <- KEM_NEW[MIGRATED]-1
}
# if(x==1){RES <- KEM_CURRENT-KEM_NEW} else{RES<- cbind(RES,KEM_NEW-KEM_CURRENT)}
}
plot((rowMeans(RES)))

View File

@ -1,76 +1,104 @@
#################################Simulate data #Script to increment the migration, and death data to start simulation in 2024, projecting 2025
#A function used only inside the OUT_MIGRATION_SIMULATION function (see below). This creates a data set which has one entry for every individual in the county, with a assigned probability of moving out of the county. This allows the migration simulation to extract individuals based on true population distribution. When migration is net positive this does not matter, as anyone age-sex group can move in. library(tidyverse)
#PROB_AGE_DIST <- Migration_Age_Distribution #setwd("../../")
#DEMOGRAPHIC_DATA <- START_DETAILED_DATA
MAKE_SET <- function(PROB_AGE_DIST,DEMOGRAPHIC_DATA){
# if(nrow(DEMOGRAPHIC_DATA)==86){DEMOGRAPHIC_DATA<- DEMOGRAPHIC_DATA[-1,]} #Drop age zero if it was included #When migrants are leaving the area of study the current distention of ages and sex needs to be accounted for because people cannot leave who do not already live in the area. As a result two functions are made one looks at cases when the net migration is out, and the other looks at net migration in. In the later the identified distention of migrants by age is used with no direct tie to the current demographics
SINGLE_AGE_RET <- function(x,PROB,DEMO_DATA){
C_PROB<- PROB[x]
C_LOOP <- DEMO_DATA[x+1,]
NUM_MALE <- pull(C_LOOP,Num_Male)
NUM_FEMALE <- pull(C_LOOP,Num_Female)
C_AGE <- C_LOOP$Age
MEN <- cbind(rep(C_AGE,NUM_MALE),rep("Male",NUM_MALE),rep(C_PROB,NUM_MALE))
WOMEN <- cbind(rep(C_AGE,NUM_FEMALE),rep("Female",NUM_FEMALE),rep(C_PROB,NUM_FEMALE))
RES <- rbind(MEN,WOMEN)
# colnames(RES) <- c("Age","Sex","Probability")
return(RES)
#Function for net migration out of the area. Returns a new age-sex demographics table matching the input demographic data format
MIGRATED_OUT <- function(DEMOGRAPHIC_DATASET,NUM_MIGRATED_OUT,ODDS_MIGRATED){
POP_TOTAL <- sum(DEMOGRAPHIC_DATASET)
if(POP_TOTAL>NUM_MIGRATED_OUT){
#Populate a matrix used to draw from with sample. One entry is included for each person in the demographic set.
RES_MAT <- matrix(NA,nrow=POP_TOTAL,ncol=2)
#
RES_MAT[,2] <- c(rep(1,sum(DEMOGRAPHIC_DATASET[,1])),rep(2,sum(DEMOGRAPHIC_DATASET[,2])))
DEM_LIST <- as.vector(DEMOGRAPHIC_DATASET)
#To speed up the process keep everything as a matrix or vector. In this case the odds of migrating by age are repeated for men and women, because sex was not found to be a major factor in migration odds.
ODDS_SET <- rep(ODDS_MIGRATED,2)
ODDS_RES <- matrix(NA,ncol=1,nrow=POP_TOTAL) #To store a list of odds for each person, to feed to sample
#There ages 0 to 85 for men and women is 172 rows
for(i in 1:172){
#Populate from the first entry that is NA
ST <- min(which(is.na(RES_MAT)))
NUM_REP <- DEM_LIST[i]
#Skip if there are zero people in the group
if(NUM_REP>0){
#Populate a entry that stores the index (Age 1-86 becomes 0-85, and Sex either 1 or 2). Using the index is done to speed up computation
RES_MAT[ST:(NUM_REP+ST-1)] <- rep(i,NUM_REP )
ODDS_RES[ST:(NUM_REP+ST-1)] <- rep(ODDS_SET[i],NUM_REP)
}
}
#Because the index of the matrix is stored a entry of 87 in the first column is girl age zero. User some logic to convert this to actual age, men are unchanged women need to be shifted down
RES_MAT[,1] <- RES_MAT[,1]-86*(RES_MAT[,2]-1)
#Using the matrix of each individual sample who WILL NOT MIGRATE OUT. To do this the odds are reversed, and the number sampled is the total population minus the net leaving migration. The remaining individuals out is created because this makes it easier to return an output of current demographics after migration.
NOT_MIGRATED <- RES_MAT[sample(1:nrow(RES_MAT),POP_TOTAL-NUM_MIGRATED_OUT,prob=1-ODDS_RES,replace=FALSE),]
#Corner case error found that if there is exactly one row, a vector is created and cannot be named. This case is manually corrected
if(POP_TOTAL-NUM_MIGRATED_OUT==1){
#Use the single row output to maniple a one row matrix
OUT <- as.matrix(t(NOT_MIGRATED),nrow=1,ncol=2)
rownames(OUT) <- OUT[1]-1 #The row index is one higher than the age for example age zero is index one. Naming based on age
MALE_FEMALE <- OUT[2] #There is only either a single man or women. Identify if they are male or female to properly create the row adding zero for the other value
OUT[MALE_FEMALE] <- 1 #The value of the sex indicating column should be one for the single person selected
OUT[-MALE_FEMALE] <- 0 #The other column should be set to zero as there is only one person.
OUT[1] <- 1 #This is currently the age of the person (+1 as an index) but should be the number of people to stay which in this case must be one.
}else{
#In most cases (number of people left >1) a matrix should be created which pulls a single entry of age and sex for each person who stays in the region after net out migration occurs.
OUT <- table(NOT_MIGRATED[,1],NOT_MIGRATED[,2]) #This creates a table with the number of unique entries for each age-sex combination to report.
#A corner case error was identified if one of the sex's has no population. For example only Men of age 20 exist. In that case only one column for sex is generated from the "table" command. The missing column is added back with values of zero to match the output of all other possible combinations. This allows the simulation to run without requiring logic to adjust the output demotions.
if(ncol(OUT)==1){
NEW_COL_INDEX <- as.character(which(!(c(1,2) %in% colnames(OUT) ))) #Find whether 1 or 2 (men or women) need to be added back as a column
OUT <- cbind(OUT,rep(0,nrow(OUT))) #Add values of zero in the second column. We do not yet know if this is men or women, but one of the two must be zero to reach this part of the code
colnames(OUT) <- c(colnames(OUT)[1],NEW_COL_INDEX) #Rename the columns such that they match the correct male or female record. In the case where women are left the columns are out of order, but the column names are now correct.
OUT <- OUT[,c("1","2")] #Make sure the columns are selected in a order consistent with all other outputs. This is redundant if correct, but flips the order to be correct when only women are left.
}
#
rownames(OUT) <- as.numeric(rownames(OUT))-1 #The row names are currently an index ranging from 1:86 but should be ages 0 to 85. So subtract one.
} }
FINAL_OUT <-do.call(rbind,sapply(1:85,function(x){SINGLE_AGE_RET(x,PROB_AGE_DIST,DEMOGRAPHIC_DATA) })) %>% as_tibble
colnames(FINAL_OUT ) <- c("Age","Sex","Probability")
FINAL_OUT$Age <- as.numeric(FINAL_OUT$Age)
return(FINAL_OUT)
}
#A function to find the number of migrants leaving the county (net out), accounting for the fact that fewer or more people in any one age-sex bracket will decrease the odds of being the person to leave even if they are 18-19 and likely to leave.
#DEMO_DATA <- START_DETAILED_DATA
#NUM_MIGRATED <- 29
#MIG_AGE_DIST <- Migration_Age_Distribution
OUT_MIGRATION_SIMULATION <- function(NUM_MIGRATED,MIG_AGE_DIST,DEMO_DATA){
MIG_AGE_DIST
CURRENT_POP <- MAKE_SET(MIG_AGE_DIST,DEMO_DATA)
NUM_POP <- nrow(CURRENT_POP)
#Rank all individuals to easily combine with the ordinal data set
ORD <- sample(1:NUM_POP,prob=pull(CURRENT_POP,Probability),size=NUM_POP,replace=FALSE)
#Set the migration out status of all individuals to zero (staying in the county)
CURRENT_POP$Migrated <- 0
#The people drawn first are assumed to migrate up to the point where all migration is filled.
CURRENT_POP[ORD[1:NUM_MIGRATED],"Migrated"] <- -1
MIG_STATUS <- CURRENT_POP %>% group_by(Age,Sex) %>% summarize(Migrated=sum(Migrated), .groups = 'drop')%>% arrange(desc(Sex),Age)
return(MIG_STATUS)
}
#Function to find the number of migrants to a county, by age-sex when coming from outside the county. colnames(OUT) <- c("Num_Male","Num_Female" )
#NUM_MIGRATED <- TOTAL_MIGRATION; #Identify if one of the age brackets has no values. If that is the case it will not show up in the output matrix, but should be listed in the demographics as zero men and zero women to make the row numbers of each output identical making downstream result management much easier.
#NUM_MIGRATED <- 0 if(nrow(OUT)<86){
IN_MIGRATION_SIMULATION <- function(NUM_MIGRATED,MIG_AGE_DIST){ ZERO_ROWS <- 0:85
NUM_MIGRATED <- abs(NUM_MIGRATED) ZERO_ROWS <- ZERO_ROWS[!(0:85 %in% rownames(OUT))] #Find which ages are missing from the output
NUM_AGES <- 85 OUT <- rbind(OUT,matrix(0,nrow=length(ZERO_ROWS),ncol=2,dimnames=list(ZERO_ROWS,colnames(OUT)))) #Create a new matrix with zeros for the missing ages
EMPTY_SET <- cbind(rep(1:NUM_AGES,2), c(rep("Male",NUM_AGES),rep("Female",NUM_AGES)),rep(0,2*NUM_AGES)) OUT <- OUT[as.character(sort(as.numeric(rownames(OUT)))),] #Combine the zero matrix with the matrix to the main simulated result, and then sort based on the row name to make sure the ages are lined up correctly 0 to 85 so that each output is identical and can be added or subtracted properly.
colnames(EMPTY_SET) <- c("Age","Sex","Migrated") }
if(NUM_MIGRATED==0){return(EMPTY_SET)} } else{
MIGRATED_AGE <- sample(1:NUM_AGES,prob=MIG_AGE_DIST,size=NUM_MIGRATED,replace=TRUE) OUT <- matrix(0,nrow=86,ncol=2,dimnames=list(0:85,colnames(DEMOGRAPHIC_DATASET)))} #In the corner case that more people leave the area than present live there set all values to zero and return this in the same format as any other output.
MIGRATED_SEX <- sample(c("Male","Female"),size=NUM_MIGRATED,replace=TRUE) return(OUT)
MIGRATED_GROUP <- cbind(MIGRATED_AGE,MIGRATED_SEX,rep(1,NUM_MIGRATED))
MIGRATED_DATA <- rbind(EMPTY_SET,MIGRATED_GROUP) %>% as_tibble
colnames(MIGRATED_DATA) <- c("Age","Sex","Migrated")
MIGRATED_DATA$Age <- as.numeric(MIGRATED_DATA$Age)
MIGRATED_DATA$Migrated <- as.numeric(MIGRATED_DATA$Migrated)
MIGRATED_DATA <- MIGRATED_DATA %>% group_by(Age,Sex) %>% summarize(Migrated=sum(Migrated),.groups = 'drop') %>% arrange(desc(Sex),Age)
return(MIGRATED_DATA)
}
# MIG_AGE_DIST <- Migration_Age_Distribution;DEMO_DATA <- START_DETAILED_DATA;NET_MIGRATION <- TOTAL_MIGRATION
MIGRATION_SIMULATION <- function(MIG_AGE_DIST,DEMO_DATA,NET_MIGRATION){
#MIG_AGE_DIST: A distribution of probabilities of any one age of person to migrate to or out of a county used to grab a reasonable rate of in/out migration by age from a total migration number
#DEMO_DATA: A data set which contains the number of people in each sex-age category (ex Male 30, or Female 82)
#TOTAL_MIGRATION_ARIMA_SIM: A single simulated path of a ARIMA simulation of net migration in the county.
#YEARS_AHEAD_OF_SIMULATION: The number of years forward from the start of the ARIMA to extract as total migration.
if(NET_MIGRATION<0){
RES <- OUT_MIGRATION_SIMULATION(abs(NET_MIGRATION),MIG_AGE_DIST,DEMO_DATA)
} else{
RES <- IN_MIGRATION_SIMULATION(NET_MIGRATION,MIG_AGE_DIST)}
return(RES %>% pivot_wider(values_from=Migrated,names_from=Sex,names_prefix="Num_"))
} }
#Function to account for net migration into the county. Returns a new age-sex demographics table matching the input demographic data format
MIGRATED_IN <- function(DEMOGRAPHIC_DATASET,NUM_MIGRATED_IN,ODDS_MIGRATED){
#Based on historic data sample possible migration additions by age with higher probabilities being assigned to ages such as 19 and 20, than 80 or 85. Also randomly draw if the person moving in is a man or women with a 50/50 split.
#Using table to find the total number of each age sex combination from this simulation.
OUT <- table(sample(0:85,NUM_MIGRATED_IN,prob=ODDS_MIGRATED,replace=TRUE),sample(1:2,NUM_MIGRATED_IN,replace=TRUE))
#OUT_ORIG <- OUT
#OUT <- OUT_ORIG
#A corner case error was identified if one of the sex's has no population. For example only Men of age 20 exist. In that case only one column for sex is generated from the "table" command. The missing column is added back with values of zero to match the output of all other possible combinations. This allows the simulation to run without requiring logic to adjust the output demotions.
if(ncol(OUT)==1){
NEW_COL_INDEX <- as.character(which(!(c(1,2) %in% colnames(OUT) ))) #Find whether 1 or 2 (men or women) need to be added back as a column
OUT <- cbind(OUT,rep(0,nrow(OUT))) #Add values of zero in the second column. We do not yet know if this is men or women, but one of the two must be zero to reach this part of the code
colnames(OUT) <- c(colnames(OUT)[1],NEW_COL_INDEX) #Rename the columns such that they match the correct male or female record. In the case where women are left the columns are out of order, but the column names are now correct.
ROW_NAME <- rownames(OUT)
if(nrow(OUT)==1){OUT <- t(OUT[1,c("1","2")])} #Make sure the columns are selected in a order consistent with all other outputs. This is redundant if correct, but flips the order to be correct when only women are left.
rownames(OUT) <- ROW_NAME
}
#Pull the correct rows of the demographics table using the row names. The names are a character set going from 0-85 ages, while the index is 1:86, so make sure to use the charter names. Add these new immigrants to the existing demographics of age-sex combinations.
DEMOGRAPHIC_DATASET[as.character(rownames(OUT)),] <- DEMOGRAPHIC_DATASET[as.character(rownames(OUT)),]+OUT
return(DEMOGRAPHIC_DATASET)
}
#Combine both functions for easier coding later on, allowing one function to be called in all scenarios
DEMOGRAPHICS_AFTER_MIGRATION <- function(DEMOGRAPHIC_DATASET,NUM_MIGRATED,ODDS_MIGRATED){
if(NUM_MIGRATED==0){return(DEMOGRAPHIC_DATASET)} #If there are no changes return the input. This allows for easier code that does not need to check if zero migration occurs on the back end.
if(NUM_MIGRATED>0){return(MIGRATED_IN(DEMOGRAPHIC_DATASET,NUM_MIGRATED,ODDS_MIGRATED))}else{return(MIGRATED_OUT(DEMOGRAPHIC_DATASET,abs(NUM_MIGRATED),ODDS_MIGRATED))}#Decide the correct function to call depending on if net migration is positive or negative
}
#Test results
#ODDS_LEAVE <- readRDS("Data/Intermediate_Inputs/Migration_Trends/Migration_Age_Probability_Zero_to_85.Rds")
#KEM_DEMOGRAPHIC <- readRDS("Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2023_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds")
#DEMOGRAPHIC_DATASET <- KEM_DEMOGRAPHIC
#MIGRATED_IN(KEM_DEMOGRAPHIC,3,ODDS_LEAVE)-KEM_DEMOGRAPHIC