Working on adding arima of migration

This commit is contained in:
Alex Gebben Work 2025-10-27 17:45:02 -06:00
parent 7f5c54faa1
commit 23cccf13b2
3 changed files with 150 additions and 3 deletions

View File

@ -11,13 +11,13 @@ source("Scripts/Monte_Carlo_Functions.r")
NUM_YEARS_PROJECTED <- 50 #How many years into the future should each Monte Carlo run project to. For example 25 years if starting from 2025 and ending in 2050.
BIRTH_RATE_REG_RESULTS <- "Data/Regression_Results/Birth_Rate_Model.Rds" #Location of the regression used to model variance in birth rates in each Monte Carlo simulation.
START_DEMOGRAPHIC_DATA <- "Data/Cleaned_Data/Start_Year_Demographic_Data_With_Fertility_Groups.Rds" #Location of the data for the first year needing a forecasted birth rate, which aggregates the yearly splits of populations, into a single, year-county data set with variables for key birth prediction (total number of men and women in ages with high fertility rates), and then combines with the data set including births, deaths, migration and population.
AGE_OF_MIGRANT_PROBABILITY <- "Data/Other_Intermediate_Outputs/Migreation_Age_Probablity_One_to_Ninety.csv" #Location of the data which is the result of regression analysis of the age of migrants. That is to say 18 year olds may migrate more than 70 year olds, and this is the distribution by age. Sex was not found to be a major factor
####Run any scirpts required before main Monte Carlo
source("Survival_Simulation.r") #Populate a table with a simulation of future mortality rates, for quick recall during the simulation.
#A script contains the code needed to create a feols (fixest) regression of the birth rate given age distribution. Load this saved result or else create it to use in each simulation for gathering variance of births in any given age distribution path of the Monte Carlo.
if(file.exists(BIRTH_RATE_REG_RESULTS)){MOD_BIRTHS <- readRDS(BIRTH_RATE_REG_RESULTS);FIRST_PREDICT_YEAR_POPULATION_DATA <- readRDS(START_DEMOGRAPHIC_DATA)} else{source("Birth_Rate_Regression.r")}
FIRST_PREDICT_YEAR_POPULATION_DATA
if(file.exists(AGE_OF_MIGRANT_PROBABILITY)){MIG_AGE_DIST <- read.csv(AGE_OF_MIGRANT_PROBABILITY)} else{source("Migration_Regression.r")}
#######################################################Main Monte Carlo
START_DEM_DATA <- readRDS("Data/Cleaned_Data/Lincoln_Demographic_Data.Rds") %>% group_by(County) %>% filter(Year==2023) %>% ungroup %>% select(-County)
MORTALITY_SIMULATION <- readRDS("./Data/Simulated_Data_Sets/MORTALITY_MONTE_CARLO.Rds") #Load the Mortality simulation to speed up simulation

View File

@ -0,0 +1,91 @@
"x"
0.0239992994215661
0.0228458992048084
0.0217479311086577
0.0207027310795178
0.0197077630975303
0.0187606130233046
0.0178589827403748
0.0170006845791664
0.0161836360089478
0.0154058545848836
0.0146654531379325
0.0139606351959162
0.013289690624652
0.0126509914785702
0.0120429880507506
0.0114642051127927
0.0109132383353972
0.0220454815210065
0.0359275548330788
0.0344697460779352
0.0330442204553006
0.0316520188547773
0.0302941821659677
0.0289717512784745
0.0276857670818999
0.0264372704658464
0.0252273023199165
0.0240569035337126
0.022926492724261
0.021834843485538
0.0207805291725703
0.0197621231403848
0.0187781987440083
0.0178273293384677
0.0169080882787899
0.0160190489200017
0.01515878461713
0.0143258687252016
0.0135327575487902
0.0127893872758465
0.0120905510864634
0.0114310421607335
0.0108056536787497
0.0102091788206047
0.00963641076639125
0.00908214269620218
0.00854116779013021
0.00805934728928651
0.00767027209367645
0.00735032987173412
0.00707590829189361
0.00682339502258902
0.00656917773225442
0.00628964408932393
0.00596118176223162
0.00560172490613661
0.00524620704023708
0.00489651487902571
0.00455453513699517
0.00422215452863814
0.00390125976844731
0.00359373757091534
0.00330147465053493
0.00302635772179875
0.00276401290239562
0.00250959862847155
0.00226430009220494
0.0020293024857742
0.00180579100135776
0.00159495083113402
0.00139796716728139
0.00121602520197831
0.00104721774453399
0.000889086267306052
0.000741988957045488
0.000606284000503278
0.000482329584430407
0.000370483895577858
0.000271105120696616
0.000184551446537666
0.000111181059851991
5.13038157978179e-05
4.82137214190365e-06
4.82137214190365e-06
4.82137214190365e-06
4.82137214190365e-06
4.82137214190365e-06
4.82137214190365e-06
4.82137214190365e-06
3.31534934593599e-05
1 x
2 0.0239992994215661
3 0.0228458992048084
4 0.0217479311086577
5 0.0207027310795178
6 0.0197077630975303
7 0.0187606130233046
8 0.0178589827403748
9 0.0170006845791664
10 0.0161836360089478
11 0.0154058545848836
12 0.0146654531379325
13 0.0139606351959162
14 0.013289690624652
15 0.0126509914785702
16 0.0120429880507506
17 0.0114642051127927
18 0.0109132383353972
19 0.0220454815210065
20 0.0359275548330788
21 0.0344697460779352
22 0.0330442204553006
23 0.0316520188547773
24 0.0302941821659677
25 0.0289717512784745
26 0.0276857670818999
27 0.0264372704658464
28 0.0252273023199165
29 0.0240569035337126
30 0.022926492724261
31 0.021834843485538
32 0.0207805291725703
33 0.0197621231403848
34 0.0187781987440083
35 0.0178273293384677
36 0.0169080882787899
37 0.0160190489200017
38 0.01515878461713
39 0.0143258687252016
40 0.0135327575487902
41 0.0127893872758465
42 0.0120905510864634
43 0.0114310421607335
44 0.0108056536787497
45 0.0102091788206047
46 0.00963641076639125
47 0.00908214269620218
48 0.00854116779013021
49 0.00805934728928651
50 0.00767027209367645
51 0.00735032987173412
52 0.00707590829189361
53 0.00682339502258902
54 0.00656917773225442
55 0.00628964408932393
56 0.00596118176223162
57 0.00560172490613661
58 0.00524620704023708
59 0.00489651487902571
60 0.00455453513699517
61 0.00422215452863814
62 0.00390125976844731
63 0.00359373757091534
64 0.00330147465053493
65 0.00302635772179875
66 0.00276401290239562
67 0.00250959862847155
68 0.00226430009220494
69 0.0020293024857742
70 0.00180579100135776
71 0.00159495083113402
72 0.00139796716728139
73 0.00121602520197831
74 0.00104721774453399
75 0.000889086267306052
76 0.000741988957045488
77 0.000606284000503278
78 0.000482329584430407
79 0.000370483895577858
80 0.000271105120696616
81 0.000184551446537666
82 0.000111181059851991
83 5.13038157978179e-05
84 4.82137214190365e-06
85 4.82137214190365e-06
86 4.82137214190365e-06
87 4.82137214190365e-06
88 4.82137214190365e-06
89 4.82137214190365e-06
90 4.82137214190365e-06
91 3.31534934593599e-05

View File

@ -88,7 +88,63 @@ GRAPH_DATA <- RES %>% filter(abs(MIGRATION_COEF)<Inf,Age<100) %>% filter(Age!=2
#Create a plot of the predicted coefficients versus the observe values.This is good visual to explain the methods, and the need for migration age groups.
#ggplot(GRAPH_DATA,aes(x=Age,y=MIGRATION_COEF,group=Group,color=Group)) +geom_point()+geom_line(data=PRED_DATA,size=1.5,linetype="longdash")
#Convert the absolute coefficient values to a percentage chance that any one immigrant will be in the given age. This wont line up perfectly with the coefficients if using them to predict immigration, because the age-sex data set uses different totals than the population/migration data. However, the distribution should be the same, so we divide each estimate by the total. The results is the percent probability that a single independent immigrant will be of the given age.
PROB <- PRED_DATA$MIGRATION_COEF/sum(PRED_DATA$MIGRATION_COEF)
#If this is run from the main script MIG_AGE_DIST will be the key variable and should not be changed.
MIG_AGE_DIST <- PRED_DATA$MIGRATION_COEF/sum(PRED_DATA$MIGRATION_COEF)
write.csv(MIG_AGE_DIST ,"Data/Other_Intermediate_Outputs/Migreation_Age_Probablity_One_to_Ninety.csv",row.names=FALSE)
####Work on overall migration trends
TS_DATA <- POP_DATA
TS_DATA <- TS_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population),ABS_PER_MIG=abs(Migration)/Prev_Pop,L_MIG=lag(Migration))
TS_DATA
feols(Migration~L_MIG+Prev_Pop|Year+County,data=TS_DATA)
(feols(ABS_PER_MIG~In_Migration+log(Prev_Pop)+Year*County,TS_DATA))
LN_TS_DATA <- TS_DATA %>% filter(County=='Lincoln')
MOD <- feols(Migration~L_MIG+Prev_Pop+Year,data=LN_TS_DATA)
MOD <- feols(Migration~L_MIG+Prev_Pop+Year*County,data=TS_DATA)
acf(resid(MOD))
pacf(resid(MOD))
library("forecast")
ST_YEAR <- min(pull(TS_DATA,Year))
END_YEAR <- max(pull(TS_DATA,Year))
TS_DATA %>% filter(County=='Lincoln') %>% pull(Migration) %>% plot()
TS_DATA %>% pull(Migration) %>% plot()
GRAPH_DATA <- TS_DATA %>% filter(!is.na(Migration))
GRAPH_DATA_LN <- TS_DATA %>% filter(!is.na(Migration),County=="Lincoln")
ggplot(GRAPH_DATA,aes(x=Year,y=Migration/Prev_Pop,group=County,color=County))+geom_point()+geom_line(data=GRAPH_DATA_LN)
TS_DATA
TS_WIDE <- TS_DATA %>% mutate(Migration=Migration/Prev_Pop) %>% select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>1990,Year<=2021) %>%ts(start=c(1991),frequency=1)
LN <- TS_DATA %>% mutate(Migration=Migration) %>% select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% select(Lincoln,Year) %>% filter(Year>1990,Year<=2021) %>% select(-Year) %>%ts(start=c(1991),frequency=1)
TS_WIDE[,'Lincoln']
LN <- TS_WIDE[,"Lincoln"]
plot(LN )
library(tseries)
adf.test(LN)
TEST <- auto.arima(LN )
plot(TEST)
forecast(TEST)
acf(resid(TEST))
arima.sim(TEST,n=1)
arima.sim(arima(LN,order=c(2,4,3)),n=1)
summary(TEST)
plot(forecast(TEST))
resid(TEST)
acf(resid(TEST))
auto.arima(LN )
plot(forecast(TEST))
TS_WIDE %>% filter(Year>1970) %>% pull(Year)
TS_WIDE
summary(feols(ABS_PER_MIG~Year,LN_TS_DATA))
#Start of a simulation method with the inputs.
#plot(100*PROB)