From d84d6069bd77fed1a671c13749e43d0924a2850d Mon Sep 17 00:00:00 2001 From: Alex Gebben Work Date: Tue, 18 Nov 2025 17:19:41 -0700 Subject: [PATCH] Complete Migration function. Simulate 2024 data --- Prelim_Process.sh | 4 +- ...te_Regression_and_Impart_Kemmerer_Births.r | 2 +- ...mpart_Deaths_and_Migration_to_Subregions.r | 2 +- Scripts/2C_Migration_by_Age_Regression.r | 4 +- ...Current_Demographic_Data_to_Current_Year.r | 112 +++++------- .../Migration_Simulation_Functions.r | 166 ++++++++++-------- 6 files changed, 143 insertions(+), 147 deletions(-) diff --git a/Prelim_Process.sh b/Prelim_Process.sh index cdf4291..d94f1c5 100644 --- a/Prelim_Process.sh +++ b/Prelim_Process.sh @@ -8,8 +8,6 @@ Rscript "./Scripts/2A_Birth_Rate_Regression_and_Impart_Kemmerer_Births.r" Rscript "./Scripts/2B_Impart_Deaths_and_Migration_to_Subregions.r" Rscript "./Scripts/2C_Migration_by_Age_Regression.r" Rscript "./Scripts/2D_Overall_Migration_ARIMA.r" +Rscript "./Scripts/2E_Move_Current_Demographic_Data_to_Current_Year.r" #Produce final results for either the simulation, or information for the report, but not anything used in later stages of the simulation Rscript "./Scripts/3A_Population_Pyramid.r" - - - diff --git a/Scripts/2A_Birth_Rate_Regression_and_Impart_Kemmerer_Births.r b/Scripts/2A_Birth_Rate_Regression_and_Impart_Kemmerer_Births.r index 1466b3d..1af5066 100644 --- a/Scripts/2A_Birth_Rate_Regression_and_Impart_Kemmerer_Births.r +++ b/Scripts/2A_Birth_Rate_Regression_and_Impart_Kemmerer_Births.r @@ -141,7 +141,7 @@ ST_REG_DATA <- REG_REDUCED_DATA %>% filter(Region=='Lincoln') %>% filter(Year==m saveRDS(REG_REDUCED_DATA,paste0(SAVE_DATA_LOC,"/Birth_Regression_Input_Data.Rds")) write_csv(REG_REDUCED_DATA,paste0(SAVE_DATA_LOC,"/Birth_Regression_Input_Data.csv")) -##########Update existing demogrpahic data with new births +##########Update existing demographic data with new births KEM_DATA_NEW <- REG_DATA %>% filter(Region=='Kemmerer & Diamondville') %>% select(Year,County,Region,Population,Births,Deaths,Migration) PRED_KEM_BIRTH_2024 <- REG_DATA %>% filter(Region=='Kemmerer & Diamondville',Year>=2023) %>% mutate(Min_Birth_Group=lag(Min_Birth_Group)) %>% filter(Year==2024) KEM_DATA_NEW[KEM_DATA_NEW$Year==2024,"Births"] <- round(exp(predict(MOD_BIRTHS,PRED_KEM_BIRTH_2024))) diff --git a/Scripts/2B_Impart_Deaths_and_Migration_to_Subregions.r b/Scripts/2B_Impart_Deaths_and_Migration_to_Subregions.r index 6476f0e..2084d63 100644 --- a/Scripts/2B_Impart_Deaths_and_Migration_to_Subregions.r +++ b/Scripts/2B_Impart_Deaths_and_Migration_to_Subregions.r @@ -35,7 +35,7 @@ LIN_POP <- LIN_POP %>% left_join(LIN_PRED) %>% mutate(Deaths=ifelse(is.na(Deat KEM_POP <- readRDS(KEM_POP_LOC) KEM_DEATHS <- LIN_POP %>% select(Year,Deaths) %>% left_join(Death_Adj) %>% left_join(KEM_PRED) %>% mutate(Deaths=round(Deaths*Kem_Death_Ratio)) %>% select(Year,Deaths) KEM_POP <- KEM_POP%>% select(-Deaths) %>% left_join(KEM_DEATHS) %>% mutate(BD=Population+Births-Deaths) %>% mutate(Missing=Population-lag(BD),Missing=lead(Missing),Migration=ifelse(is.na(Migration),Missing,Migration))%>% mutate(NEXT=Population+Births-Deaths+Migration)%>% arrange(desc(Year)) %>% select(colnames(LIN_POP)) -#Rond up any values to match the fact that only whole people can exists +#Round up any values to match the fact that only whole people can exists KEM_POP$Population <- round(KEM_POP$Population) ###Estimate the number of deaths in the other parts of Lincoln (not Kemmerer) based on the total deaths, and the predicted share of deaths diff --git a/Scripts/2C_Migration_by_Age_Regression.r b/Scripts/2C_Migration_by_Age_Regression.r index 0376ae2..9da97da 100644 --- a/Scripts/2C_Migration_by_Age_Regression.r +++ b/Scripts/2C_Migration_by_Age_Regression.r @@ -106,5 +106,5 @@ MIG_AGE_DIST <- MIG_AGE_DIST/sum(MIG_AGE_DIST) if(!exists("RES_SAVE_LOC")){RES_SAVE_LOC <- "./Data/Intermediate_Inputs/Migration_Trends/"} dir.create(RES_SAVE_LOC , recursive = TRUE, showWarnings = FALSE) -write.csv(MIG_AGE_DIST ,paste0(RES_SAVE_LOC,"Migration_Age_Probablity_Zero_to_85.csv"),row.names=FALSE) -saveRDS(MIG_AGE_DIST ,paste0(RES_SAVE_LOC,"Migration_Age_Probablity_Zero_to_85.Rds")) +write.csv(MIG_AGE_DIST ,paste0(RES_SAVE_LOC,"Migration_Age_Probability_Zero_to_85.csv"),row.names=FALSE) +saveRDS(MIG_AGE_DIST ,paste0(RES_SAVE_LOC,"Migration_Age_Probability_Zero_to_85.Rds")) diff --git a/Scripts/2E_Move_Current_Demographic_Data_to_Current_Year.r b/Scripts/2E_Move_Current_Demographic_Data_to_Current_Year.r index c645f2e..f6dea86 100644 --- a/Scripts/2E_Move_Current_Demographic_Data_to_Current_Year.r +++ b/Scripts/2E_Move_Current_Demographic_Data_to_Current_Year.r @@ -1,79 +1,49 @@ library(tidyverse) +library(parallel) #setwd("../") -#Script to increment the migration, and death data to start simulation in 2024, projecting 2025 -ODDS_LEAVE <- readRDS("Data/Intermediate_Inputs/Migration_Trends/Migration_Age_Probablity_Zero_to_85.Rds") +#Script to increment the demographic data by age and sex up from 2023 to 2024 using the birth,migration, and death estiamtes in 2023. This is done to start the projection of the Monte Carlo in 2025. Since the actual population is known in 2024 (but not the deaths, births and migration. It is worth estimating these to keep more precise data matching migration to match the population +ODDS_LEAVE <- readRDS("Data/Intermediate_Inputs/Migration_Trends/Migration_Age_Probability_Zero_to_85.Rds") KEM_DEMOGRAPHIC <- readRDS("Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2023_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds") -NUM_MIGRATED_OUT<- readRDS("Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds") %>% filter(Year==2023) %>% pull(Migration) %>% abs +POPULATION <- readRDS("Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds") %>% filter(Year>=2023) +PRED_BIRTH <- POPULATION %>% filter(Year==2024) %>% pull(Births) +MORT <- readRDS("Data/Cleaned_Data/Mortality_Rate_Data/RDS/Lincoln_County_Mortality_Rates.Rds") +source("Scripts/Load_Custom_Functions/Migration_Simulation_Functions.r") +KEM_DEMOGRAPHIC["84",] <- colSums(KEM_DEMOGRAPHIC[c("84","85"),]) +KEM_DEMOGRAPHIC <- KEM_DEMOGRAPHIC[1:85,] +#Manually add estimated new births 31 total, small assumption made that 15 are boys and 16 are girls. +KEM_DEMOGRAPHIC <- rbind(c(15,15),KEM_DEMOGRAPHIC ) +rownames(KEM_DEMOGRAPHIC) <- 0:85 +INDEX <- as.numeric(rownames(KEM_DEMOGRAPHIC)) -MIGRATED_OUT <- function(KEM_CURRENT,NUM_MIGRATED_OUT,ODDS_MIGRATED){ - POP_TOTAL <- sum(KEM_CURRENT) - if(POP_TOTAL>NUM_MIGRATED_OUT){ - RES_MAT <- matrix(NA,nrow=POP_TOTAL,ncol=2) - RES_MAT[,2] <- c(rep(1,sum(KEM_CURRENT[,1])),rep(2,sum(KEM_CURRENT[,2]))) - DEM_LIST <- as.vector(KEM_CURRENT) - ODDS_SET <- rep(ODDS_MIGRATED,2) - ODDS_RES <- matrix(NA,ncol=1,nrow=POP_TOTAL) - for(i in 1:172){ - ST <- min(which(is.na(RES_MAT))) - NUM_REP <- DEM_LIST[i] - if(NUM_REP>0){ - RES_MAT[ST:(NUM_REP+ST-1)] <- rep(i,NUM_REP ) - ODDS_RES[ST:(NUM_REP+ST-1)] <- rep(ODDS_SET[i],NUM_REP) - } - } - RES_MAT[,1] <- RES_MAT[,1]-86*(RES_MAT[,2]-1) - NOT_MIGRATED <- RES_MAT[sample(1:nrow(RES_MAT),POP_TOTAL-NUM_MIGRATED_OUT,prob=1-ODDS_RES,replace=FALSE),] - if(POP_TOTAL-NUM_MIGRATED_OUT==1){ - OUT <- as.matrix(t(NOT_MIGRATED),nrow=1,ncol=2) - rownames(OUT) <- OUT[1]-1 - MALE_FEMALE <- OUT[2] - OUT[MALE_FEMALE] <- 1 - OUT[-MALE_FEMALE] <- 0 - OUT[1] <- 1 - }else{ - OUT <- table(NOT_MIGRATED[,1],NOT_MIGRATED[,2]) - if(ncol(OUT)==1){ - OUT_ORIG <- OUT - NEW_COL_INDEX <- as.character(which(!(c(1,2) %in% colnames(OUT) ))) - OUT <- cbind(OUT,rep(0,nrow(OUT))) - colnames(OUT) <- c(colnames(OUT)[1],NEW_COL_INDEX) - OUT <- OUT[,c("1","2")] - } - rownames(OUT) <- as.numeric(rownames(OUT))-1 - } - - colnames(OUT) <- c("Num_Male","Num_Female" ) - if(nrow(OUT)<86){ - ZERO_ROWS <- 0:85 - ZERO_ROWS <- ZERO_ROWS[!(0:85 %in% rownames(OUT))] - OUT <- rbind(OUT,matrix(0,nrow=length(ZERO_ROWS),ncol=2,dimnames=list(ZERO_ROWS,colnames(OUT)))) - OUT <- OUT[as.character(sort(as.numeric(rownames(OUT)))),] - } - } else{ - OUT <- matrix(0,nrow=86,ncol=2,dimnames=list(0:85,colnames(KEM_CURRENT)))} - return(OUT) +for(i in INDEX){ +C_VAL <- MORT %>% filter(Min_Age<=i,Max_Age>=i) %>% select(Sex,Death_Rate) %>% pivot_wider(values_from=Death_Rate,names_from=Sex,names_prefix="Death_Rate_") +if(i==0){RES <-C_VAL}else{RES <- rbind(RES,C_VAL)} } -ODDS_LEAVE <- readRDS("Data/Intermediate_Inputs/Migration_Trends/Migration_Age_Probablity_Zero_to_85.Rds") -KEM_DEMOGRAPHIC <- readRDS("Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2023_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds") -NUM_MIGRATED_OUT<- readRDS("Data/Cleaned_Data/Population_Data/RDS/Kemmerer_Diamondville_Population_Data.Rds") %>% filter(Year==2023) %>% pull(Migration) %>% abs +RES <- as.matrix(RES,ncol=2) +NUM_DEATHS <- round(sum(KEM_DEMOGRAPHIC*RES)) +sum(round(KEM_DEMOGRAPHIC*RES+0.204)) +DEATH_MAT <- round(KEM_DEMOGRAPHIC*RES+0.205) #Manually Manipulated factor to make deaths round up to the desired number (27), however, only 30 or 25 is possible, so it needs one more adjustment +DEATH_MAT[67:69,"Num_Female"] <- 0 +sum(DEATH_MAT) +KEM_DEMOGRAPHIC <- KEM_DEMOGRAPHIC - DEATH_MAT +############Apply a migration out of 33 people in Kemmerer +source("Scripts/Load_Custom_Functions/Migration_Simulation_Functions.r") +#Run many simulations to pick a plausible distribution of people leaving +NUM_RUNS <- 10^7 +#In Parallel run the migration simulation many times, to find a distribution of migration given 2023 data. + #Find the average number of migrants leaving by age-sex using the Reduce function to collapse the list, and then dividing by number of runs selected. +RES <- Reduce('+',mclapply(1:NUM_RUNS,function(x){return(KEM_DEMOGRAPHIC-DEMOGRAPHICS_AFTER_MIGRATION(KEM_DEMOGRAPHIC,-33,ODDS_LEAVE)) },mc.cores = detectCores()-1))/NUM_RUNS +#Add a factor such that the sum of all migration adds up to the desired 33. The average found should sum to 33, but when rounding is zero since each sub category sex-age is less than 50% chance of a migration +FACTOR <- 0.225 +33-sum(round(RES+FACTOR)) +KEM_DEMOGRAPHIC_NEW <- KEM_DEMOGRAPHIC-round(RES+FACTOR) +saveRDS(KEM_DEMOGRAPHIC_NEW,"Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2024_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds") +TEST <- KEM_DEMOGRAPHIC_NEW -KEM_DEMOGRAPHIC +##Perhaps I can be more clever. I could average the direct simulation estimate as done above, with the Kemmerer values found when estimating the other region, and then subtracting from the Lincoln Total. +OTHER_DEMOGRAPHIC_NEW <- readRDS("Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2024_Starting_Lincoln_County_Demographics_Matrix.Rds")-KEM_DEMOGRAPHIC_NEW +saveRDS(OTHER_DEMOGRAPHIC_NEW ,"Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2024_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds") +sum(KEM_DEMOGRAPHIC_NEW) +POPULATION -start_time <- Sys.time() -for(x in 1:round(10^6/23)){MIGRATED_OUT(KEM_DEMOGRAPHIC,NUM_MIGRATED_OUT,ODDS_LEAVE)} -end_time <- Sys.time() -end_time - start_time - -head(KEM_CURRENT) - -LENGTH <- length(KEM_CURRENT) -for(x in 1:10^6){ -KEM_NEW <- KEM_CURRENT -for(i in 1:num_migrated){ - MIGRATED <- sample(1:LENGTH,1,prob=1-(1-ODDS_MIGRATE)^KEM_CURRENT) - KEM_NEW[MIGRATED] - KEM_NEW[MIGRATED] <- KEM_NEW[MIGRATED]-1 -} -# if(x==1){RES <- KEM_CURRENT-KEM_NEW} else{RES<- cbind(RES,KEM_NEW-KEM_CURRENT)} -} -plot((rowMeans(RES))) diff --git a/Scripts/Load_Custom_Functions/Migration_Simulation_Functions.r b/Scripts/Load_Custom_Functions/Migration_Simulation_Functions.r index f4c888c..1a3e49d 100644 --- a/Scripts/Load_Custom_Functions/Migration_Simulation_Functions.r +++ b/Scripts/Load_Custom_Functions/Migration_Simulation_Functions.r @@ -1,76 +1,104 @@ -#################################Simulate data -#A function used only inside the OUT_MIGRATION_SIMULATION function (see below). This creates a data set which has one entry for every individual in the county, with a assigned probability of moving out of the county. This allows the migration simulation to extract individuals based on true population distribution. When migration is net positive this does not matter, as anyone age-sex group can move in. -#PROB_AGE_DIST <- Migration_Age_Distribution -#DEMOGRAPHIC_DATA <- START_DETAILED_DATA -MAKE_SET <- function(PROB_AGE_DIST,DEMOGRAPHIC_DATA){ +#Script to increment the migration, and death data to start simulation in 2024, projecting 2025 +library(tidyverse) +#setwd("../../") -# if(nrow(DEMOGRAPHIC_DATA)==86){DEMOGRAPHIC_DATA<- DEMOGRAPHIC_DATA[-1,]} #Drop age zero if it was included - SINGLE_AGE_RET <- function(x,PROB,DEMO_DATA){ - C_PROB<- PROB[x] - C_LOOP <- DEMO_DATA[x+1,] - NUM_MALE <- pull(C_LOOP,Num_Male) - NUM_FEMALE <- pull(C_LOOP,Num_Female) - C_AGE <- C_LOOP$Age - MEN <- cbind(rep(C_AGE,NUM_MALE),rep("Male",NUM_MALE),rep(C_PROB,NUM_MALE)) - WOMEN <- cbind(rep(C_AGE,NUM_FEMALE),rep("Female",NUM_FEMALE),rep(C_PROB,NUM_FEMALE)) - RES <- rbind(MEN,WOMEN) -# colnames(RES) <- c("Age","Sex","Probability") - return(RES) +#When migrants are leaving the area of study the current distention of ages and sex needs to be accounted for because people cannot leave who do not already live in the area. As a result two functions are made one looks at cases when the net migration is out, and the other looks at net migration in. In the later the identified distention of migrants by age is used with no direct tie to the current demographics +#Function for net migration out of the area. Returns a new age-sex demographics table matching the input demographic data format +MIGRATED_OUT <- function(DEMOGRAPHIC_DATASET,NUM_MIGRATED_OUT,ODDS_MIGRATED){ + POP_TOTAL <- sum(DEMOGRAPHIC_DATASET) + if(POP_TOTAL>NUM_MIGRATED_OUT){ + #Populate a matrix used to draw from with sample. One entry is included for each person in the demographic set. + RES_MAT <- matrix(NA,nrow=POP_TOTAL,ncol=2) + # + RES_MAT[,2] <- c(rep(1,sum(DEMOGRAPHIC_DATASET[,1])),rep(2,sum(DEMOGRAPHIC_DATASET[,2]))) + DEM_LIST <- as.vector(DEMOGRAPHIC_DATASET) + #To speed up the process keep everything as a matrix or vector. In this case the odds of migrating by age are repeated for men and women, because sex was not found to be a major factor in migration odds. + ODDS_SET <- rep(ODDS_MIGRATED,2) + ODDS_RES <- matrix(NA,ncol=1,nrow=POP_TOTAL) #To store a list of odds for each person, to feed to sample + #There ages 0 to 85 for men and women is 172 rows + for(i in 1:172){ + #Populate from the first entry that is NA + ST <- min(which(is.na(RES_MAT))) + NUM_REP <- DEM_LIST[i] + #Skip if there are zero people in the group + if(NUM_REP>0){ + #Populate a entry that stores the index (Age 1-86 becomes 0-85, and Sex either 1 or 2). Using the index is done to speed up computation + RES_MAT[ST:(NUM_REP+ST-1)] <- rep(i,NUM_REP ) + ODDS_RES[ST:(NUM_REP+ST-1)] <- rep(ODDS_SET[i],NUM_REP) + } + } + #Because the index of the matrix is stored a entry of 87 in the first column is girl age zero. User some logic to convert this to actual age, men are unchanged women need to be shifted down + RES_MAT[,1] <- RES_MAT[,1]-86*(RES_MAT[,2]-1) + #Using the matrix of each individual sample who WILL NOT MIGRATE OUT. To do this the odds are reversed, and the number sampled is the total population minus the net leaving migration. The remaining individuals out is created because this makes it easier to return an output of current demographics after migration. + NOT_MIGRATED <- RES_MAT[sample(1:nrow(RES_MAT),POP_TOTAL-NUM_MIGRATED_OUT,prob=1-ODDS_RES,replace=FALSE),] + #Corner case error found that if there is exactly one row, a vector is created and cannot be named. This case is manually corrected + if(POP_TOTAL-NUM_MIGRATED_OUT==1){ + #Use the single row output to maniple a one row matrix + OUT <- as.matrix(t(NOT_MIGRATED),nrow=1,ncol=2) + rownames(OUT) <- OUT[1]-1 #The row index is one higher than the age for example age zero is index one. Naming based on age + MALE_FEMALE <- OUT[2] #There is only either a single man or women. Identify if they are male or female to properly create the row adding zero for the other value + OUT[MALE_FEMALE] <- 1 #The value of the sex indicating column should be one for the single person selected + OUT[-MALE_FEMALE] <- 0 #The other column should be set to zero as there is only one person. + OUT[1] <- 1 #This is currently the age of the person (+1 as an index) but should be the number of people to stay which in this case must be one. + }else{ + #In most cases (number of people left >1) a matrix should be created which pulls a single entry of age and sex for each person who stays in the region after net out migration occurs. + OUT <- table(NOT_MIGRATED[,1],NOT_MIGRATED[,2]) #This creates a table with the number of unique entries for each age-sex combination to report. + #A corner case error was identified if one of the sex's has no population. For example only Men of age 20 exist. In that case only one column for sex is generated from the "table" command. The missing column is added back with values of zero to match the output of all other possible combinations. This allows the simulation to run without requiring logic to adjust the output demotions. + if(ncol(OUT)==1){ + NEW_COL_INDEX <- as.character(which(!(c(1,2) %in% colnames(OUT) ))) #Find whether 1 or 2 (men or women) need to be added back as a column + OUT <- cbind(OUT,rep(0,nrow(OUT))) #Add values of zero in the second column. We do not yet know if this is men or women, but one of the two must be zero to reach this part of the code + colnames(OUT) <- c(colnames(OUT)[1],NEW_COL_INDEX) #Rename the columns such that they match the correct male or female record. In the case where women are left the columns are out of order, but the column names are now correct. + OUT <- OUT[,c("1","2")] #Make sure the columns are selected in a order consistent with all other outputs. This is redundant if correct, but flips the order to be correct when only women are left. + } + # + rownames(OUT) <- as.numeric(rownames(OUT))-1 #The row names are currently an index ranging from 1:86 but should be ages 0 to 85. So subtract one. } - FINAL_OUT <-do.call(rbind,sapply(1:85,function(x){SINGLE_AGE_RET(x,PROB_AGE_DIST,DEMOGRAPHIC_DATA) })) %>% as_tibble - colnames(FINAL_OUT ) <- c("Age","Sex","Probability") - FINAL_OUT$Age <- as.numeric(FINAL_OUT$Age) - return(FINAL_OUT) -} -#A function to find the number of migrants leaving the county (net out), accounting for the fact that fewer or more people in any one age-sex bracket will decrease the odds of being the person to leave even if they are 18-19 and likely to leave. -#DEMO_DATA <- START_DETAILED_DATA -#NUM_MIGRATED <- 29 -#MIG_AGE_DIST <- Migration_Age_Distribution -OUT_MIGRATION_SIMULATION <- function(NUM_MIGRATED,MIG_AGE_DIST,DEMO_DATA){ - MIG_AGE_DIST - CURRENT_POP <- MAKE_SET(MIG_AGE_DIST,DEMO_DATA) - NUM_POP <- nrow(CURRENT_POP) - #Rank all individuals to easily combine with the ordinal data set - ORD <- sample(1:NUM_POP,prob=pull(CURRENT_POP,Probability),size=NUM_POP,replace=FALSE) - #Set the migration out status of all individuals to zero (staying in the county) - CURRENT_POP$Migrated <- 0 - #The people drawn first are assumed to migrate up to the point where all migration is filled. - CURRENT_POP[ORD[1:NUM_MIGRATED],"Migrated"] <- -1 - MIG_STATUS <- CURRENT_POP %>% group_by(Age,Sex) %>% summarize(Migrated=sum(Migrated), .groups = 'drop')%>% arrange(desc(Sex),Age) - return(MIG_STATUS) -} -#Function to find the number of migrants to a county, by age-sex when coming from outside the county. -#NUM_MIGRATED <- TOTAL_MIGRATION; -#NUM_MIGRATED <- 0 -IN_MIGRATION_SIMULATION <- function(NUM_MIGRATED,MIG_AGE_DIST){ - NUM_MIGRATED <- abs(NUM_MIGRATED) - NUM_AGES <- 85 - EMPTY_SET <- cbind(rep(1:NUM_AGES,2), c(rep("Male",NUM_AGES),rep("Female",NUM_AGES)),rep(0,2*NUM_AGES)) - colnames(EMPTY_SET) <- c("Age","Sex","Migrated") - if(NUM_MIGRATED==0){return(EMPTY_SET)} - MIGRATED_AGE <- sample(1:NUM_AGES,prob=MIG_AGE_DIST,size=NUM_MIGRATED,replace=TRUE) - MIGRATED_SEX <- sample(c("Male","Female"),size=NUM_MIGRATED,replace=TRUE) - MIGRATED_GROUP <- cbind(MIGRATED_AGE,MIGRATED_SEX,rep(1,NUM_MIGRATED)) - MIGRATED_DATA <- rbind(EMPTY_SET,MIGRATED_GROUP) %>% as_tibble - colnames(MIGRATED_DATA) <- c("Age","Sex","Migrated") - MIGRATED_DATA$Age <- as.numeric(MIGRATED_DATA$Age) - MIGRATED_DATA$Migrated <- as.numeric(MIGRATED_DATA$Migrated) - MIGRATED_DATA <- MIGRATED_DATA %>% group_by(Age,Sex) %>% summarize(Migrated=sum(Migrated),.groups = 'drop') %>% arrange(desc(Sex),Age) - return(MIGRATED_DATA) -} -# MIG_AGE_DIST <- Migration_Age_Distribution;DEMO_DATA <- START_DETAILED_DATA;NET_MIGRATION <- TOTAL_MIGRATION -MIGRATION_SIMULATION <- function(MIG_AGE_DIST,DEMO_DATA,NET_MIGRATION){ -#MIG_AGE_DIST: A distribution of probabilities of any one age of person to migrate to or out of a county used to grab a reasonable rate of in/out migration by age from a total migration number -#DEMO_DATA: A data set which contains the number of people in each sex-age category (ex Male 30, or Female 82) -#TOTAL_MIGRATION_ARIMA_SIM: A single simulated path of a ARIMA simulation of net migration in the county. -#YEARS_AHEAD_OF_SIMULATION: The number of years forward from the start of the ARIMA to extract as total migration. -if(NET_MIGRATION<0){ - RES <- OUT_MIGRATION_SIMULATION(abs(NET_MIGRATION),MIG_AGE_DIST,DEMO_DATA) -} else{ - RES <- IN_MIGRATION_SIMULATION(NET_MIGRATION,MIG_AGE_DIST)} - return(RES %>% pivot_wider(values_from=Migrated,names_from=Sex,names_prefix="Num_")) + colnames(OUT) <- c("Num_Male","Num_Female" ) + #Identify if one of the age brackets has no values. If that is the case it will not show up in the output matrix, but should be listed in the demographics as zero men and zero women to make the row numbers of each output identical making downstream result management much easier. + if(nrow(OUT)<86){ + ZERO_ROWS <- 0:85 + ZERO_ROWS <- ZERO_ROWS[!(0:85 %in% rownames(OUT))] #Find which ages are missing from the output + OUT <- rbind(OUT,matrix(0,nrow=length(ZERO_ROWS),ncol=2,dimnames=list(ZERO_ROWS,colnames(OUT)))) #Create a new matrix with zeros for the missing ages + OUT <- OUT[as.character(sort(as.numeric(rownames(OUT)))),] #Combine the zero matrix with the matrix to the main simulated result, and then sort based on the row name to make sure the ages are lined up correctly 0 to 85 so that each output is identical and can be added or subtracted properly. + } + } else{ + OUT <- matrix(0,nrow=86,ncol=2,dimnames=list(0:85,colnames(DEMOGRAPHIC_DATASET)))} #In the corner case that more people leave the area than present live there set all values to zero and return this in the same format as any other output. + return(OUT) } +#Function to account for net migration into the county. Returns a new age-sex demographics table matching the input demographic data format +MIGRATED_IN <- function(DEMOGRAPHIC_DATASET,NUM_MIGRATED_IN,ODDS_MIGRATED){ + #Based on historic data sample possible migration additions by age with higher probabilities being assigned to ages such as 19 and 20, than 80 or 85. Also randomly draw if the person moving in is a man or women with a 50/50 split. + #Using table to find the total number of each age sex combination from this simulation. +OUT <- table(sample(0:85,NUM_MIGRATED_IN,prob=ODDS_MIGRATED,replace=TRUE),sample(1:2,NUM_MIGRATED_IN,replace=TRUE)) +#OUT_ORIG <- OUT +#OUT <- OUT_ORIG + #A corner case error was identified if one of the sex's has no population. For example only Men of age 20 exist. In that case only one column for sex is generated from the "table" command. The missing column is added back with values of zero to match the output of all other possible combinations. This allows the simulation to run without requiring logic to adjust the output demotions. + if(ncol(OUT)==1){ + NEW_COL_INDEX <- as.character(which(!(c(1,2) %in% colnames(OUT) ))) #Find whether 1 or 2 (men or women) need to be added back as a column + OUT <- cbind(OUT,rep(0,nrow(OUT))) #Add values of zero in the second column. We do not yet know if this is men or women, but one of the two must be zero to reach this part of the code + colnames(OUT) <- c(colnames(OUT)[1],NEW_COL_INDEX) #Rename the columns such that they match the correct male or female record. In the case where women are left the columns are out of order, but the column names are now correct. + ROW_NAME <- rownames(OUT) + if(nrow(OUT)==1){OUT <- t(OUT[1,c("1","2")])} #Make sure the columns are selected in a order consistent with all other outputs. This is redundant if correct, but flips the order to be correct when only women are left. + rownames(OUT) <- ROW_NAME + } +#Pull the correct rows of the demographics table using the row names. The names are a character set going from 0-85 ages, while the index is 1:86, so make sure to use the charter names. Add these new immigrants to the existing demographics of age-sex combinations. +DEMOGRAPHIC_DATASET[as.character(rownames(OUT)),] <- DEMOGRAPHIC_DATASET[as.character(rownames(OUT)),]+OUT +return(DEMOGRAPHIC_DATASET) +} +#Combine both functions for easier coding later on, allowing one function to be called in all scenarios +DEMOGRAPHICS_AFTER_MIGRATION <- function(DEMOGRAPHIC_DATASET,NUM_MIGRATED,ODDS_MIGRATED){ + if(NUM_MIGRATED==0){return(DEMOGRAPHIC_DATASET)} #If there are no changes return the input. This allows for easier code that does not need to check if zero migration occurs on the back end. + if(NUM_MIGRATED>0){return(MIGRATED_IN(DEMOGRAPHIC_DATASET,NUM_MIGRATED,ODDS_MIGRATED))}else{return(MIGRATED_OUT(DEMOGRAPHIC_DATASET,abs(NUM_MIGRATED),ODDS_MIGRATED))}#Decide the correct function to call depending on if net migration is positive or negative +} + +#Test results +#ODDS_LEAVE <- readRDS("Data/Intermediate_Inputs/Migration_Trends/Migration_Age_Probability_Zero_to_85.Rds") +#KEM_DEMOGRAPHIC <- readRDS("Data/Intermediate_Inputs/Starting_Demographic_Data_Sets_of_Monte_Carlo/2023_Starting_Kemmerer_Diamondville_Demographics_Matrix.Rds") +#DEMOGRAPHIC_DATASET <- KEM_DEMOGRAPHIC + +#MIGRATED_IN(KEM_DEMOGRAPHIC,3,ODDS_LEAVE)-KEM_DEMOGRAPHIC +