Combined birth, and deaths. Still need migration

2025-10-22 17:36:31 -06:00 · 2025-10-22 17:36:31 -06:00 · 089751d3cd
commit 089751d3cd
parent 3fd47b23ea
11 changed files with 146 additions and 54 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,6 @@
 # ---> R
+#Don't save any major data files on the server, can be regenerated after pulling
+*.Rds
 # ##Very large simulation should not be saved
 Data/Simulated_Data_Sets/ 
 # History files
--- a/1_Run_Full_Simulation.r
+++ b/1_Run_Full_Simulation.r
@ -0,0 +1,38 @@
+#####Packages
+library(tidyverse) #Cleaning data
+library(fixest) #Estimating a model of birth rates, to provide variance in the birth rate Monte Carlo using a fixed effect model.
+#Load custom functions needed for the simulation
+source("Scripts/Birth_Simulation_Functions.r")
+source("Scripts/Monte_Carlo_Functions.r")
+
+#####User Configuration Values
+	NUM_SIMULATIONS <- 10^6 #Number of Monte Carlo Simulations to run
+	RERUN_MORTALITY_SIMULATION <- TRUE #Rerun the Monte Carlo simulation of future mortality rates (not actual deaths) even if a Rds file of a mortality rates exists. This can be used to speed up runs when FALSE
+	NUM_YEARS_PROJECTED <-  50 #How many years into the future should each Monte Carlo run project to. For example 25 years if starting from 2025 and ending in 2050.
+	BIRTH_RATE_REG_RESULTS <- "Data/Regression_Results/Birth_Rate_Model.Rds" #Location of the regression used to model variance in birth rates in each Monte Carlo simulation.
+	START_DEMOGRAPHIC_DATA <- "Data/Cleaned_Data/Start_Year_Demographic_Data_With_Fertility_Groups.Rds" #Location of the data for the first year needing a forecasted birth rate, which aggregates the yearly splits of populations, into a single, year-county data set with variables for key birth prediction (total number of men and women in ages with high fertility rates), and then combines with the data set including births, deaths, migration and population.
+####Run any scirpts required before main Monte Carlo 
+	source("Survival_Simulation.r") #Populate a table with a simulation of future mortality rates, for quick recall during the simulation.
+	#A script contains the code needed to create a feols (fixest) regression of the birth rate given age distribution. Load this saved result or else create it to use in each simulation for gathering variance of births in any given age distribution path of the Monte Carlo.
+	if(file.exists(BIRTH_RATE_REG_RESULTS)){MOD_BIRTHS <- readRDS(BIRTH_RATE_REG_RESULTS);FIRST_PREDICT_YEAR_POPULATION_DATA <- readRDS(START_DEMOGRAPHIC_DATA)} else{source("Birth_Rate_Regression.r")}
+FIRST_PREDICT_YEAR_POPULATION_DATA 
+#######################################################Main Monte Carlo
+
+START_DEM_DATA <- readRDS("Data/Cleaned_Data/Lincoln_Demographic_Data.Rds") %>% group_by(County) %>% filter(Year==2023) %>% ungroup %>% select(-County)
+MORTALITY_SIMULATION <- readRDS("./Data/Simulated_Data_Sets/MORTALITY_MONTE_CARLO.Rds") #Load the Mortality simulation to speed up simulation
+
+#First run
+C_RES <- RUN_SINGLE_SIM(MOD_BIRTHS,FIRST_PREDICT_YEAR_POPULATION_DATA,START_DEM_DATA,MORTALITY_SIMULATION,SIM_NUMBER=1,START_OF_SIM=2023)
+C_RES[[3]]
+#Second run, work on making into loop and saving the output to file (check if that will slow the results). Maybe use a predifined matrix, so that the results can be stored quirckly
+RUN_SINGLE_SIM(MOD_BIRTHS,C_RES[[3]],C_RES[[2]],MORTALITY_SIMULATION,SIM_NUMBER=1,START_OF_SIM=2023)
+#Test
+sapply(1:1000,function(x){RUN_SINGLE_SIM(MOD_BIRTHS,C_RES[[3]],C_RES[[2]],MORTALITY_SIMULATION,SIM_NUMBER=1,START_OF_SIM=2023)})
+
+
+
+
+#Run the full simulation in the current year (1), across all simulations x, passing in the demographic, and mortality data.
+#TEST <- mclapply(1:10^3,function(x){MORTALITY_SIM(1,x,LIN_CURRENT_DEM,FALSE,Mortality_Rate_Sim )},mc.cores = detectCores()-1)
+#TEST[[1]]
+
--- a/Birth_Rate_Regression.r
+++ b/Birth_Rate_Regression.r
@ -0,0 +1,29 @@
+##########################Model Population Trends
+##Run Regression 
+#Pull in Demographic data and create categories for key groups in the regression, male/female population with high fertility, children under one and two (but not zero). This data is broken down by each age group so aggregate to the county, year level for the final regression.
+#Fertility age bounds were informed by the regression found in the file ./Scripts/Other_Analysis/Select_Range_of_Male_Female_Fertility.r Which qualitatively supports that the number of people in these age ranges (18-28 Women, 18-30 Men) have the most significance in predicting birth rates. These two are combined into one variable which represent the minimum number of people in the key fertility window between the sexes, this is the binding fertility constraint and has more explanatory power than including either the number of men or women in the fertility window alone, providing a good trade off for including more variables or reducing variance.
+  DEMOGRAPHIC_DATA <- readRDS("Data/Cleaned_Data/Wyoming_County_Demographic_Data.Rds") %>% mutate(Male_Window=Age>=18 & Age<=30,Female_Window=Age>=18 & Age<=28) %>% group_by(County,Year) %>% summarize(Female_Birth_Group=sum(Num_Female*Female_Window),Male_Birth_Group=sum(Num_Male*Male_Window),Min_Birth_Group=ifelse(Female_Birth_Group<Male_Birth_Group,Female_Birth_Group,Male_Birth_Group))
+  #Extract the population trend data to connect with demographics (Population,births,deaths)
+  POP_DATA <- readRDS("Data/Cleaned_Data/Wyoming_County_Population.Rds") %>% mutate(LN=ifelse(County=="Lincoln",1,0))
+  #Merger the two data sets and drop any records that cannot be used in the regression (this makes the "predict" function output the right number of records)
+  REG_DATA <- POP_DATA %>% full_join(DEMOGRAPHIC_DATA) 
+  REG_DATA <- REG_DATA %>% group_by(County) %>% mutate(PREV_BIRTH=lag(Births),PREV_TWO_BIRTH=lag(Births,2)) %>% ungroup %>% filter(!is.na(PREV_TWO_BIRTH),!is.na(Min_Birth_Group))
+  REG_DATA$County <- factor(REG_DATA$County)
+  FIRST_PREDICT_YEAR_POPULATION_DATA <-  REG_DATA %>% filter(Year==2023,County=='Lincoln') %>% select(-LN,-Female_Birth_Group,-Male_Birth_Group) #Store the data set of only the first year needing a birth forecast, to start the birth Monte Carlo simulations.
+  REG_DATA <- REG_DATA %>% filter(!is.na(Births)) #Remove any values with missing births for a simpler regression which includes only complete data
+
+
+
+###Predict the number of Births
+  MOD_BIRTHS <- feols(log(Births)~log(PREV_BIRTH)+log(PREV_TWO_BIRTH)+log(Min_Birth_Group)+Year*County,cluster=~Year+County, data=REG_DATA )
+ #Optional: Review the ACF and PACF for validity. Model made on October 22nd appears to have uncorrelated lags of residuals.
+  #RES_DATA <- REG_DATA #Data to create visuals with, without changing the main file. Can be used for ggplot, or residual tests
+  #RES_DATA$RESID <- resid(MOD_BIRTHS)
+  #acf(RES_DATA %>% pull(RESID))
+  #pacf(RES_DATA %>% pull(RESID))
+
+  saveRDS(MOD_BIRTHS,BIRTH_RATE_REG_RESULTS)
+  saveRDS(FIRST_PREDICT_YEAR_POPULATION_DATA,START_DEMOGRAPHIC_DATA) #Save the cleaned data set for later use when starting the simulation.
+  #Cleanup data no longer needed, and save some RAM
+	  rm(POP_DATA,DEMOGRAPHIC_DATA,REG_DATA)
+	  gc()
--- a/Birth_Rate_Simulation.r
+++ b/Birth_Rate_Simulation.r
@ -1,42 +0,0 @@
-##########################Model Population Trends
-library(fixest)
-library(tidyverse)
-readRDS("Data/Cleaned_Data/Wyoming_County_Population.Rds")
-REG_DATA <- readRDS("Data/Cleaned_Data/Wyoming_County_Population.Rds") %>% left_join(readRDS("Data/Cleaned_Data/Wyoming_County_Demographic_Data.Rds") %>%  group_by(County,Year) %>% summarize(POP2=sum(Num_Male+Num_Female))) %>%left_join( readRDS("Data/Cleaned_Data/Wyoming_County_Demographic_Data.Rds") %>% mutate(BIRTH_POP=ifelse(Age==0,1,0)) %>% group_by(County,Year,BIRTH_POP) %>% summarize(BIRTH_POP=sum(Num_Female))) %>% mutate(LN=ifelse(County=="Lincoln",1,0),Pop=Population-Births)
-TEMP <- readRDS("Data/Cleaned_Data/Wyoming_County_Demographic_Data.Rds")  %>% mutate(Num_Male=log(Num_Male),Num_Female=log(Num_Female))
-#TEMP <-	     pivot_longer(TEMP,c("Num_Male","Num_Female"),names_to="Sex",values_to="Number") %>% mutate(Sex=ifelse(Sex=="Num_Male","Male","Female"))
-TEMP <- pivot_wider(TEMP,names_from=Age,values_from=c(Num_Male,Num_Female),names_prefix="Age_") %>% unique
-#Establish good bounds for the birth groups
-REG_DATA_PRLIM <- readRDS("Data/Cleaned_Data/Wyoming_County_Population.Rds") %>% mutate(Population=Population-Births+Deaths+Migration) %>% select(-Deaths,-Migration) %>% left_join(TEMP) %>% select(-Population,-Num_Male_Age_0,-Num_Female_Age_0)
-  summary(lm(Births~. ,data=REG_DATA_PRLIM))
-##Run Regression 
-  #Pull in Demographic data, and create categories for key groups, male/female population with high fertility, children under one and two (but not zero)
-  DEMOGRAPHIC_DATA <- readRDS("Data/Cleaned_Data/Wyoming_County_Demographic_Data.Rds") %>% mutate(Male_Window=Age>=18 & Age<=30,Female_Window=Age>=18 & Age<=28,Under_Two=Age<=2 & Age!=0,Under_One=Age<=1 & Age!=0) %>% group_by(County,Year) %>% summarize(Female_Birth_Group=sum(Num_Female*Female_Window),Male_Birth_Group=sum(Num_Male*Male_Window),Under_Two=sum(Under_Two*(Num_Male+Num_Female)),Under_One=sum(Under_One*(Num_Male+Num_Female)),Min_Birth_Group=ifelse(Female_Birth_Group<Male_Birth_Group,Female_Birth_Group,Male_Birth_Group))
-  POP_DATA <- readRDS("Data/Cleaned_Data/Wyoming_County_Population.Rds") %>% mutate(LN=ifelse(County=="Lincoln",1,0))
-  REG_DATA <- POP_DATA %>% full_join(DEMOGRAPHIC_DATA) %>% filter(!is.na(Births))
-  REG_DATA <- REG_DATA %>% group_by(County) %>% mutate(PREV_BIRTH=lag(Births),PREV_TWO_BIRTH=lag(Births,2)) %>% ungroup %>% filter(!is.na(PREV_TWO_BIRTH),!is.na(Min_Birth_Group))
-REG_DATA
-  MOD_BIRTHS<- feols(log(Births)~log(PREV_BIRTH)+log(PREV_TWO_BIRTH)+log(Min_Birth_Group)+Year*County,cluster=~Year, data=REG_DATA )
-  #acf(RES_DATA %>% pull(RESID))
-  #pacf(RES_DATA %>% pull(RESID))
-
-TEST <- REG_DATA %>% filter(LN==1) %>% filter(Year==2022) %>% mutate(Year=2022)
-C_PREDICT <- predict(MOD_BIRTHS,TEST,interval = "prediction",level=0.95)
-PRED_MEAN <- C_PREDICT$fit
-SE_PRED <- (C_PREDICT$ci_high-C_PREDICT$ci_low)/3.92
-
-YEAR <- 2025
-NUM_SIMS <- 10000
-BIRTHS <- round(exp(rnorm(NUM_SIMS,mean=PRED_MEAN,sd=SE_PRED)))
-MALE <- sapply(1:NUM_SIMS,function(x){ rbinom(1,BIRTHS[x],prob=0.5)})
-RES <- cbind(rep(YEAR,NUM_SIMS),rep(0,NUM_SIMS),MALE,BIRTHS-MALE) %>% as_tibble
-colnames(RES) <- c("Year","Age","Num_Male","Num_Female")
-RES
-##NOTE TEST[[1]] Comming from death simulation script as a test in order to feedback into births.
-CVAL <- TEST[[1]]
-CVAL 
-Male_Birth_Group <- sum(CVAL[CVAL$Age>=18 & CVAL$Age<=30,] %>% pull(Num_Male))
-Female_Birth_Group <- sum(CVAL[CVAL$Age>=18 & CVAL$Age<=28,] %>% pull(Num_Female))
-Min_Birth_Group <- min(Male_Birth_Group,Female_Birth_Group )
-YEAR <- YEAR+1
-
--- a/Data/Regression_Results/README_REG.md
+++ b/Data/Regression_Results/README_REG.md
@ -0,0 +1 @@
+This is where any intermediate regression results should be saved as Rds files for loading into the Monte Carlo model
--- a/Scripts/Birth_Simulation_Functions.r
+++ b/Scripts/Birth_Simulation_Functions.r
@ -0,0 +1,16 @@
+	#Births,PREV_BIRTH,PREV_TWO_BIRTH,Min_Birth_Group,Year,County
+	#Uncomment to test the function step by step
+#REG_MODEL <- MOD_BIRTHS;REG_DATA <- FIRST_PREDICT_YEAR_POPULATION_DATA;NUM_SIMS=1
+BIRTH_SIM <- function(REG_MODEL,REG_DATA,NUM_SIMS=1){
+	predict(REG_MODEL,newdata=REG_DATA)
+	C_PREDICT <- predict(REG_MODEL,REG_DATA,interval = "prediction",level=0.95)
+	PRED_MEAN <- C_PREDICT$fit
+	SE_PRED <- (C_PREDICT$ci_high-C_PREDICT$ci_low)/3.92
+	YEAR <- REG_DATA %>% pull(Year) %>% unique 
+	BIRTHS <- round(exp(rnorm(NUM_SIMS,mean=PRED_MEAN,sd=SE_PRED)))
+	MALE <- sapply(1:NUM_SIMS,function(x){ rbinom(1,BIRTHS[x],prob=0.5)})
+	RES <- cbind(rep(YEAR,NUM_SIMS),rep(0,NUM_SIMS),MALE,BIRTHS-MALE) %>% as_tibble
+	colnames(RES) <- c("Year","Age","Num_Male","Num_Female")
+	return(RES)
+}
+
--- a/Scripts/Death_Simulation_Functions.r
+++ b/Scripts/Death_Simulation_Functions.r
@ -1,4 +1,7 @@
 ## This R script contains a set of functions used to calculate the number of deaths in each age-sex group, when given a set of initial demographics. The mortality rate trend  data feeds this primary model by identifying a simulated future mortality rate to apply to a demographic group when the year is in the future.
+
+###Uncomment to test the function line by line. Keep commented for a typical run.
+	#C_YEAR=1;SIM_NUM=89;LIN_CURRENT_DEM=readRDS("Data/Cleaned_Data/Lincoln_Demographic_Data.Rds") %>% group_by(County) %>% filter(Year==max(Year)) %>% ungroup %>% select(-County);RELOAD_MORTALITY_RATE=TRUE;MORTALITY_RATE_SIM=NA;MORTALITY_RATE_SIM_LOC="./Data/Simulated_Data_Sets/MORTALITY_MONTE_CARLO.Rds";DEM_GROUP_INDEX=readRDS("./Data/Cleaned_Data/Lincoln_Mortality_Rate.Rds") %>% select(Sex,Min_Age,Max_Age)
 MORTALITY_SIM <- function(C_YEAR,SIM_NUM,LIN_CURRENT_DEM,RELOAD_MORTALITY_RATE=TRUE,MORTALITY_RATE_SIM=NA,MORTALITY_RATE_SIM_LOC="./Data/Simulated_Data_Sets/MORTALITY_MONTE_CARLO.Rds",DEM_GROUP_INDEX=readRDS("./Data/Cleaned_Data/Lincoln_Mortality_Rate.Rds") %>% select(Sex,Min_Age,Max_Age)){
 	#C_YEAR: Current year (1,2,3 etc.) since start of the simulation.
 	#SIM_NUM: Number of Monte Carlo simulation to run
@ -13,7 +16,6 @@ MORTALITY_SIM <- function(C_YEAR,SIM_NUM,LIN_CURRENT_DEM,RELOAD_MORTALITY_RATE=T
 		#Extract a index of Male and Female mortality rates for each row of the supplied starting demographic distribution data, this index matches the exact order of the saved mortality rate simulation results by age and sex group, allowing the relevant future morality rate simulation results to be recalled using this index number. 
 		INDEX <- t((sapply(1:nrow(LIN_CURRENT_DEM),function(x){TEMP <- (LIN_CURRENT_DEM %>% pull(Age))[x]; rev(which(TEMP>=DEM_GROUP_INDEX %>% pull(Min_Age)& TEMP<=DEM_GROUP_INDEX %>% pull(Max_Age)))}))) #Find the mortality rate of a specific age (1,85 etc.) from the simulated future rates. The rates are assessed by age groups, so identify which age group each individual age is in in order to link that to mortality rates.

-
 		#Extract the Male and Female, by age results using the INDEX values. To add a current mortality rate for each group, based on the long run rate simulation
 	LIN_CURRENT_DEM$Male_Mortality <- sapply(INDEX[,1],function(x){MORTALITY_RATE_SIM[[x]][C_YEAR,SIM_NUM]})
 	LIN_CURRENT_DEM$Female_Mortality <- sapply(INDEX[,2],function(x){MORTALITY_RATE_SIM[[x]][C_YEAR,SIM_NUM]})
@ -21,7 +23,12 @@ MORTALITY_SIM <- function(C_YEAR,SIM_NUM,LIN_CURRENT_DEM,RELOAD_MORTALITY_RATE=T
 	#Create a simulation of the number of actual deaths. Randomly pull numbers for each person in the group. A person dies only if this number is less than the mortality rate to match the distribution of deaths, but allowing for random changes when summed over the population.
 	LIN_CURRENT_DEM$Male_Deaths <- sapply(1:nrow(LIN_CURRENT_DEM),function(x){sum(runif(LIN_CURRENT_DEM$Num_Male[x])<LIN_CURRENT_DEM$Male_Mortality[x])})
 	LIN_CURRENT_DEM$Female_Deaths <- sapply(1:nrow(LIN_CURRENT_DEM),function(x){sum(runif(LIN_CURRENT_DEM$Num_Female[x])<LIN_CURRENT_DEM$Female_Mortality[x])})
+	return(LIN_CURRENT_DEM)

+	}
+
+###NOT CURRENTLY USED AS OF OCTOBER 22, THE IDEA IS TO MOVE ALL YEARS FORWARD BY ONE FOR THE NEXT SIMULATION
+INCREMENT_DEMOGRAPHICS <- function(LIN_CURRENT_DEM){
 #Create a new data set that will return the demographics as updated with the simulated deaths for use in the next years simulation. 
 	LIN_NEXT_DEM_DEATHS <- LIN_CURRENT_DEM %>% select(Year,Age,Num_Male,Num_Female,Male_Deaths,Female_Deaths) 
 	LIN_NEXT_DEM_DEATHS$Year <- LIN_NEXT_DEM_DEATHS$Year +1 #This is the starting point for the next years simulation
--- a/Scripts/Monte_Carlo_Functions.r
+++ b/Scripts/Monte_Carlo_Functions.r
@ -0,0 +1,41 @@
+#Uncomment to check the function line by line
+	#REG_BIRTH_MODEL=MOD_BIRTHS;START_BASIC_DATA=FIRST_PREDICT_YEAR_POPULATION_DATA;START_DETAILED_DATA=START_DEM_DATA;Mortality_Rate_Sim=MORTALITY_SIM;SIM_NUMBER=1;START_OF_SIM=2023
+
+RUN_SINGLE_SIM <- function(REG_BIRTH_MODEL,START_BASIC_DATA,START_DETAILED_DATA,Mortality_Rate_Sim,SIM_NUMBER,START_OF_SIM_YEAR=2023){
+	#REG_BIRTH_MODEL: Feols regression object of population model.
+	#START_BASIC_DATA: A single row of data, with information for the birth regression (Male_Birth_Group,PREV_BIRTH etc.)
+	#START_DETAILED_DATA: A data set, with the number of men and women at each individual age (zero to 85+)
+	#Mortality_Rate_Sim: A list object with a set of project future mortality rates by age. See ./Scripts/Mortality_Rate_Over_Time_Simulation_Function.r. By passing this in the simulation speed is increased significantly.
+	#SIM_NUMBER: The current Monte Carlo simulation being applied. This extracts the correct index of Mortality_Rate_Sim Object for the present simulation.
+	#START_OF_SIM_YEAR: This is the first year of data which requires a simulation. This allows for the time trend to be properly estimated as this depends on the number of years since the forecast began
+		NEXT_BASIC_DATA <- START_BASIC_DATA #Create a data set for the data to feed into the next run.
+		C_BIRTHS <- BIRTH_SIM(REG_BIRTH_MODEL,START_BASIC_DATA)
+		NEXT_BASIC_DATA[,"PREV_TWO_BIRTH"] <- START_BASIC_DATA[,"PREV_BIRTH"]
+		NEXT_BASIC_DATA[,"PREV_BIRTH"] <- sum(C_BIRTHS[,3:4])
+		NEXT_BASIC_DATA[,"Year"] <- NEXT_BASIC_DATA[,"Year"]+1
+		#Update the initial data to include the projected births
+		START_BASIC_DATA[,"Births"] <- sum(C_BIRTHS[,3:4])
+
+	#Increment the ages of the provided demographic data so that all ages increase by one. Add in the new births as Age zero. This is done so the distribution for deaths makes sense, having new births in the zero age and implying all other ages increase by one year.
+	START_DETAILED_DATA[,"Age"] <- START_DETAILED_DATA[,"Age"]+1
+	START_DETAILED_DATA[START_DETAILED_DATA$Age==85,c("Num_Male","Num_Female")] <- START_DETAILED_DATA[START_DETAILED_DATA$Age==86,c("Num_Male","Num_Female")]+START_DETAILED_DATA[START_DETAILED_DATA$Age==85,c("Num_Male","Num_Female")] #Sum the 85 and 86 ages into one row for age 85
+	START_DETAILED_DATA <- START_DETAILED_DATA[START_DETAILED_DATA$Age!=86,] #Anyone older than 85 is lumped into one group remove the 86 group
+	START_DETAILED_DATA <- rbind(C_BIRTHS,START_DETAILED_DATA) #Add the new simulated births
+	#Run a preliminary Monte Carlo which estimates the future mortality rate, for each simulation and year of of Monte Carlo Simulation
+
+		YEARS_AHEAD <- max(START_BASIC_DATA[,'Year']-START_OF_SIM_YEAR,1) #Define the number of years forward from the simulation start based on the current year of analysis, and the user provided first year.
+	MORTALITY_C_RES <- MORTALITY_SIM(YEARS_AHEAD,SIM_NUMBER,START_DETAILED_DATA,FALSE,Mortality_Rate_Sim )
+	#Update number of deaths in the current run (which should be blank when supplied to the function)
+        START_BASIC_DATA[,"Deaths"] <- sum(MORTALITY_C_RES[,c("Male_Deaths","Female_Deaths")] )
+	#
+	NEXT_DETAILED_DATA  <- MORTALITY_C_RES[1:4]
+	NEXT_DETAILED_DATA[,"Num_Male"] <- MORTALITY_C_RES[,"Num_Male"]-MORTALITY_C_RES[,"Male_Deaths"]
+	NEXT_DETAILED_DATA[,"Num_Female"] <- MORTALITY_C_RES[,"Num_Female"]-MORTALITY_C_RES[,"Female_Deaths"]
+	NEXT_DETAILED_DATA[,"Year"] <- NEXT_DETAILED_DATA[,"Year"] +1
+	####
+	NEXT_BASIC_DATA[,"Population"] <- sum(NEXT_DETAILED_DATA[,3:4])
+	NEXT_BASIC_DATA[,"Male_Birth_Group"] <-  sum(NEXT_DETAILED_DATA[NEXT_DETAILED_DATA$Age>=18 & NEXT_DETAILED_DATA$Age<=30,"Num_Male"])
+	NEXT_BASIC_DATA[,"Female_Birth_Group"] <-  sum(NEXT_DETAILED_DATA[NEXT_DETAILED_DATA$Age>=18 & NEXT_DETAILED_DATA$Age<=28,"Num_Female"])
+	NEXT_BASIC_DATA[,"Min_Birth_Group"] <- min(NEXT_BASIC_DATA[,c("Female_Birth_Group","Male_Birth_Group")])
+	return(list(START_BASIC_DATA,NEXT_DETAILED_DATA,NEXT_BASIC_DATA))
+}
--- a/Scripts/Mortality_Rate_Over_Time_Simulation_Function.r
+++ b/Scripts/Mortality_Rate_Over_Time_Simulation_Function.r
@ -44,7 +44,7 @@ MORTALITY_RATE_SIMULATION<- function(NUM_SIMS,NUM_YEARS_TO_SIMULATE,POP_DATA=LIN
 	if(RERUN | !file.exists(SAVE_LOC)){
 		#A function which runs a single population cohort simulation.  A sapply is used to create a matrix of results, with column numbers representing each individual simulation, and row numbers representing each year. Using a matrix vastly speeds up memory recall compared to a tibble or data frame.
 	SINGLE_GROUP_SIM_DEATH_RATE <- function(x){sapply(1:NUM_SIMS,DEATH_RATE_DEMO_GROUP_SIM,DEMO_GROUP=POP_DATA[x,],NUM_YEARS_FORWARD=NUM_YEARS_TO_SIMULATE)}
-	#Run each population group simulation in Parello, save the matrix of population cohorts (sex,age) in a list. The index of the list can be used to recall particular demographic group simulations as the index corresponds to the row number of the demographic data file. From there the simulated matrix can be recalled to find particular summation numbers by column number, and years by row number.
+	#Run each population group simulation in parallel, save the matrix of population cohorts (sex,age) in a list. The index of the list can be used to recall particular demographic group simulations as the index corresponds to the row number of the demographic data file. From there the simulated matrix can be recalled to find particular summation numbers by column number, and years by row number.
 	TEMP <- mclapply(1:nrow(POP_DATA),SINGLE_GROUP_SIM_DEATH_RATE, mc.cores = detectCores()-1 )
 	saveRDS(TEMP,SAVE_LOC,compress=FALSE)
 	} else{print("Death rate of population groups, by year simulation already saved on file. Skipping simulation.")}
--- a/Scripts/Other_Analysis/Select_Range_of_Male_Female_Fertility.r
+++ b/Scripts/Other_Analysis/Select_Range_of_Male_Female_Fertility.r
@ -0,0 +1,8 @@
+#This is R code which was used to select the age bounds applied in the regression predicting the number of births in a given year. Based on visual observations of these results, the number of men and women over 18 have a significant impact on the number of births in a given year. However, Women loose significance around 28 while men lose significance around age 30. This information is applied to asses a new value which is the total number of men between the age of 18 and 30, and the number of women between 18 and 28, as a variable in the birth rate regression. This is later converted to be a variable which is the minimum number between the two groups, which provides more information than either the number of men or number of women alone.
+##This code is not used in the simulation, but was used to create the birth rate model, which in turn is used in the simulation
+library(tidyverse)
+TEMP <- readRDS("../../Data/Cleaned_Data/Wyoming_County_Demographic_Data.Rds")  %>% mutate(Num_Male=log(Num_Male),Num_Female=log(Num_Female))
+TEMP <- pivot_wider(TEMP,names_from=Age,values_from=c(Num_Male,Num_Female),names_prefix="Age_") %>% unique
+#Establish good bounds for the birth groups
+REG_DATA_PRLIM <- readRDS("../../Data/Cleaned_Data/Wyoming_County_Population.Rds") %>% mutate(Population=Population-Births+Deaths+Migration) %>% select(-Deaths,-Migration) %>% left_join(TEMP) %>% select(-Population,-Num_Male_Age_0,-Num_Female_Age_0)
+  summary(lm(Births~. ,data=REG_DATA_PRLIM))
--- a/Survival_Simulation.r
+++ b/Survival_Simulation.r
@ -1,15 +1,7 @@
 ###R code to run a Monte Carlo of possible future deaths within population groups (age-sex). 

-#####Packages
-library(tidyverse)
 source("Scripts/Mortality_Rate_Over_Time_Simulation_Function.r") #Load the mortality rate simulation functions. Note that this will clean the mortality rate data if already missing, by sourcing the data clean script
 source("Scripts/Death_Simulation_Functions.r") #Load the death across population groups simulation functions. 
-MORTALITY_RATE_SIMULATION(1000000,50,RERUN=TRUE) #Run a simulation of future mortality rates, this informs the Monte Carlo of actual deaths, given a future year, and set of demographics.
+MORTALITY_RATE_SIMULATION(NUM_SIMULATIONS,NUM_YEARS_PROJECTED,RERUN=RERUN_MORTALITY_SIMULATION) #Run a simulation of future mortality rates, this informs the Monte Carlo of actual deaths, given a future year, and set of demographics.

-Mortality_Rate_Sim <- readRDS("./Data/Simulated_Data_Sets/MORTALITY_MONTE_CARLO.Rds") #Load the Mortality simulation to speed up simulation
-LIN_CURRENT_DEM <- readRDS("Data/Cleaned_Data/Lincoln_Demographic_Data.Rds") %>% group_by(County) %>% filter(Year==max(Year)) %>% ungroup %>% select(-County)
-LIN_CURRENT_DEM 
-#Run the full simulation in the current year (1), across all simulations x, passing in the demographic, and mortality data.
-TEST <- mclapply(1:10^3,function(x){MORTALITY_SIM(1,x,LIN_CURRENT_DEM,FALSE,Mortality_Rate_Sim )},mc.cores = detectCores()-1)
-TEST[[1]]
				`@ -0,0 +1 @@`
				`This is where any intermediate regression results should be saved as Rds files for loading into the Monte Carlo model`