From 23cccf13b20eef423ab1064e203076d084ee42db Mon Sep 17 00:00:00 2001
From: Alex Gebben Work <agebben@uwyo.edu>
Date: Mon, 27 Oct 2025 17:45:02 -0600
Subject: [PATCH] Working on adding arima of migration

---
 1_Run_Full_Simulation.r                       |  4 +-
 ...igreation_Age_Probablity_One_to_Ninety.csv | 91 +++++++++++++++++++
 Migration_Regression.r                        | 58 +++++++++++-
 3 files changed, 150 insertions(+), 3 deletions(-)
 create mode 100644 Data/Other_Intermediate_Outputs/Migreation_Age_Probablity_One_to_Ninety.csv

diff --git a/1_Run_Full_Simulation.r b/1_Run_Full_Simulation.r
index 0b96c0c..180d2d9 100644
--- a/1_Run_Full_Simulation.r
+++ b/1_Run_Full_Simulation.r
@@ -11,13 +11,13 @@ source("Scripts/Monte_Carlo_Functions.r")
 	NUM_YEARS_PROJECTED <-  50 #How many years into the future should each Monte Carlo run project to. For example 25 years if starting from 2025 and ending in 2050.
 	BIRTH_RATE_REG_RESULTS <- "Data/Regression_Results/Birth_Rate_Model.Rds" #Location of the regression used to model variance in birth rates in each Monte Carlo simulation.
 	START_DEMOGRAPHIC_DATA <- "Data/Cleaned_Data/Start_Year_Demographic_Data_With_Fertility_Groups.Rds" #Location of the data for the first year needing a forecasted birth rate, which aggregates the yearly splits of populations, into a single, year-county data set with variables for key birth prediction (total number of men and women in ages with high fertility rates), and then combines with the data set including births, deaths, migration and population.
+	AGE_OF_MIGRANT_PROBABILITY <- "Data/Other_Intermediate_Outputs/Migreation_Age_Probablity_One_to_Ninety.csv" #Location of the data which is the result of regression analysis of the age of migrants. That is to say 18 year olds may migrate more than 70 year olds, and this is the distribution by age. Sex was not found to be a major factor
 ####Run any scirpts required before main Monte Carlo 
 	source("Survival_Simulation.r") #Populate a table with a simulation of future mortality rates, for quick recall during the simulation.
 	#A script contains the code needed to create a feols (fixest) regression of the birth rate given age distribution. Load this saved result or else create it to use in each simulation for gathering variance of births in any given age distribution path of the Monte Carlo.
 	if(file.exists(BIRTH_RATE_REG_RESULTS)){MOD_BIRTHS <- readRDS(BIRTH_RATE_REG_RESULTS);FIRST_PREDICT_YEAR_POPULATION_DATA <- readRDS(START_DEMOGRAPHIC_DATA)} else{source("Birth_Rate_Regression.r")}
-FIRST_PREDICT_YEAR_POPULATION_DATA 
+	if(file.exists(AGE_OF_MIGRANT_PROBABILITY)){MIG_AGE_DIST <- read.csv(AGE_OF_MIGRANT_PROBABILITY)} else{source("Migration_Regression.r")}
 #######################################################Main Monte Carlo
-
 START_DEM_DATA <- readRDS("Data/Cleaned_Data/Lincoln_Demographic_Data.Rds") %>% group_by(County) %>% filter(Year==2023) %>% ungroup %>% select(-County)
 MORTALITY_SIMULATION <- readRDS("./Data/Simulated_Data_Sets/MORTALITY_MONTE_CARLO.Rds") #Load the Mortality simulation to speed up simulation
 
diff --git a/Data/Other_Intermediate_Outputs/Migreation_Age_Probablity_One_to_Ninety.csv b/Data/Other_Intermediate_Outputs/Migreation_Age_Probablity_One_to_Ninety.csv
new file mode 100644
index 0000000..5d708da
--- /dev/null
+++ b/Data/Other_Intermediate_Outputs/Migreation_Age_Probablity_One_to_Ninety.csv
@@ -0,0 +1,91 @@
+"x"
+0.0239992994215661
+0.0228458992048084
+0.0217479311086577
+0.0207027310795178
+0.0197077630975303
+0.0187606130233046
+0.0178589827403748
+0.0170006845791664
+0.0161836360089478
+0.0154058545848836
+0.0146654531379325
+0.0139606351959162
+0.013289690624652
+0.0126509914785702
+0.0120429880507506
+0.0114642051127927
+0.0109132383353972
+0.0220454815210065
+0.0359275548330788
+0.0344697460779352
+0.0330442204553006
+0.0316520188547773
+0.0302941821659677
+0.0289717512784745
+0.0276857670818999
+0.0264372704658464
+0.0252273023199165
+0.0240569035337126
+0.022926492724261
+0.021834843485538
+0.0207805291725703
+0.0197621231403848
+0.0187781987440083
+0.0178273293384677
+0.0169080882787899
+0.0160190489200017
+0.01515878461713
+0.0143258687252016
+0.0135327575487902
+0.0127893872758465
+0.0120905510864634
+0.0114310421607335
+0.0108056536787497
+0.0102091788206047
+0.00963641076639125
+0.00908214269620218
+0.00854116779013021
+0.00805934728928651
+0.00767027209367645
+0.00735032987173412
+0.00707590829189361
+0.00682339502258902
+0.00656917773225442
+0.00628964408932393
+0.00596118176223162
+0.00560172490613661
+0.00524620704023708
+0.00489651487902571
+0.00455453513699517
+0.00422215452863814
+0.00390125976844731
+0.00359373757091534
+0.00330147465053493
+0.00302635772179875
+0.00276401290239562
+0.00250959862847155
+0.00226430009220494
+0.0020293024857742
+0.00180579100135776
+0.00159495083113402
+0.00139796716728139
+0.00121602520197831
+0.00104721774453399
+0.000889086267306052
+0.000741988957045488
+0.000606284000503278
+0.000482329584430407
+0.000370483895577858
+0.000271105120696616
+0.000184551446537666
+0.000111181059851991
+5.13038157978179e-05
+4.82137214190365e-06
+4.82137214190365e-06
+4.82137214190365e-06
+4.82137214190365e-06
+4.82137214190365e-06
+4.82137214190365e-06
+4.82137214190365e-06
+3.31534934593599e-05
diff --git a/Migration_Regression.r b/Migration_Regression.r
index 3669d28..cdbf477 100644
--- a/Migration_Regression.r
+++ b/Migration_Regression.r
@@ -88,7 +88,63 @@ GRAPH_DATA  <- RES %>% filter(abs(MIGRATION_COEF)<Inf,Age<100) %>% filter(Age!=2
 #Create a plot of the predicted coefficients versus the observe values.This is good visual to explain the methods, and the need for migration age groups.
 #ggplot(GRAPH_DATA,aes(x=Age,y=MIGRATION_COEF,group=Group,color=Group)) +geom_point()+geom_line(data=PRED_DATA,size=1.5,linetype="longdash")
 	#Convert the absolute coefficient values to a percentage chance that any one immigrant will be in the given age. This wont line up perfectly with the coefficients if using them to predict immigration, because the age-sex data set uses different totals than the population/migration data. However, the distribution should be the same, so we divide each estimate by the total. The results is the percent probability that a single independent immigrant will be of the given age.
-PROB <- PRED_DATA$MIGRATION_COEF/sum(PRED_DATA$MIGRATION_COEF)
+	#If this is run from the main script MIG_AGE_DIST will be the key variable and should not be changed.
+MIG_AGE_DIST <- PRED_DATA$MIGRATION_COEF/sum(PRED_DATA$MIGRATION_COEF)
+write.csv(MIG_AGE_DIST ,"Data/Other_Intermediate_Outputs/Migreation_Age_Probablity_One_to_Ninety.csv",row.names=FALSE)
+
+####Work on overall migration trends
+TS_DATA <- POP_DATA
+TS_DATA <- TS_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population),ABS_PER_MIG=abs(Migration)/Prev_Pop,L_MIG=lag(Migration))
+TS_DATA
+feols(Migration~L_MIG+Prev_Pop|Year+County,data=TS_DATA)
+(feols(ABS_PER_MIG~In_Migration+log(Prev_Pop)+Year*County,TS_DATA))
+LN_TS_DATA <- TS_DATA %>% filter(County=='Lincoln')
+
+MOD <- feols(Migration~L_MIG+Prev_Pop+Year,data=LN_TS_DATA)
+
+MOD <- feols(Migration~L_MIG+Prev_Pop+Year*County,data=TS_DATA)
+
+acf(resid(MOD))
+pacf(resid(MOD))
+library("forecast")
+ST_YEAR <- min(pull(TS_DATA,Year))
+END_YEAR <- max(pull(TS_DATA,Year))
+TS_DATA %>% filter(County=='Lincoln') %>% pull(Migration) %>% plot()
+TS_DATA %>% pull(Migration) %>% plot()
+GRAPH_DATA <- TS_DATA %>% filter(!is.na(Migration))
+GRAPH_DATA_LN <- TS_DATA %>% filter(!is.na(Migration),County=="Lincoln")
+
+ggplot(GRAPH_DATA,aes(x=Year,y=Migration/Prev_Pop,group=County,color=County))+geom_point()+geom_line(data=GRAPH_DATA_LN)
+TS_DATA
+TS_WIDE <- TS_DATA %>% mutate(Migration=Migration/Prev_Pop) %>% select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>1990,Year<=2021) %>%ts(start=c(1991),frequency=1)
+
+LN <- TS_DATA %>% mutate(Migration=Migration) %>% select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% select(Lincoln,Year) %>% filter(Year>1990,Year<=2021) %>% select(-Year) %>%ts(start=c(1991),frequency=1)
+
+TS_WIDE[,'Lincoln'] 
+
+LN <- TS_WIDE[,"Lincoln"]
+plot(LN )
+library(tseries)
+adf.test(LN)
+TEST <- auto.arima(LN )
+plot(TEST)
+forecast(TEST)
+acf(resid(TEST))
+arima.sim(TEST,n=1)
+arima.sim(arima(LN,order=c(2,4,3)),n=1)
+
+summary(TEST)
+plot(forecast(TEST))
+resid(TEST)
+acf(resid(TEST))
+ auto.arima(LN )
+
+plot(forecast(TEST))
+TS_WIDE  %>% filter(Year>1970) %>% pull(Year)
+TS_WIDE  
+
+
+summary(feols(ABS_PER_MIG~Year,LN_TS_DATA))
 
 #Start of a simulation method with the inputs.
 	#plot(100*PROB)