Added a bash script to run code. One bug fix

2025-11-13 13:03:44 -07:00 · 2025-11-13 13:03:44 -07:00 · 104c1cd0cd
commit 104c1cd0cd
parent 00ccebc050
3 changed files with 16 additions and 128 deletions
--- a/Prelim_Process.sh
+++ b/Prelim_Process.sh
@ -0,0 +1,14 @@
+#Clean and collect data sets used in later code.
+Rscript "./Scripts/1A_Download_and_Process_Population_Data.r"
+Rscript "./Scripts/1B_Process_Existing_NIH_Mortality_Data.r"
+Rscript "./Scripts/1C_Download_and_Process_Demographic_Data.r"
+Rscript "./Scripts/1D_Use_ACS_Census_Data_to_Estimate_Kemmerer_Demographics.r"
+#Create data sets used in later simulations, produce some results for the report when related to this process.
+Rscript "./Scripts/2A_Birth_Rate_Regression.r"
+Rscript "./Scripts/2B_Migration_by_Age_Regression.r"
+Rscript "./Scripts/2C_Overall_Migration_ARIMA.r"
+#Produce final results for either the simulation, or information for the report, but not anything used in later stages of the simulation
+Rscript "./Scripts/3A_Population_Pyramid.r"
+
+
+
--- a/Scripts/2B_Migration_Regression.r
+++ b/Scripts/2B_Migration_Regression.r
@ -1,126 +0,0 @@
-##########################Model Migration Trends for use in the Monte Carlo. This is important because a 18 year old is more likely to move than 75 year old.
-#library(tidyverse)
-#library(fixest)
-library(forecast)
-######Checking correlations with migration rates
-  DEMOGRAPHIC_DATA <- readRDS("Data/Cleaned_Data/Wyoming_County_Demographic_Data.Rds")
-  #Extract the population trend data to connect with demographics (Population,births,deaths)
-  POP_DATA <- readRDS("Data/Cleaned_Data/Wyoming_County_Population.Rds")
-#Identify births, deaths an migration from existing data.
-	DEMO1 <-  DEMOGRAPHIC_DATA
-	DEMO2 <- DEMOGRAPHIC_DATA %>% mutate(Year=Year+1,Age=Age+1) %>% rename(PREV_MALE=Num_Male,PREV_FEMALE=Num_Female)
-	#Combine into a usable data set. Calculate the change in the age-sex population from year to year. This change will include the effect of migration, where absolute values are a mix of factors unrelated to migration. This first-difference approach is preferred to absolute values.
-DEMO_DATA <- inner_join(DEMO1,DEMO2) %>% mutate(Male=Num_Male-PREV_MALE,Female=Num_Female-PREV_FEMALE,Pop_Change=Male+Female) %>% select(County,Year,Age,Male,Female,Pop_Change) %>% arrange(County,Year,Age)
-
-
-#############################Observed that men and women behave similarly allowing for the groups to be combined
-COR_MAT_DATA_FULL <- pivot_wider(DEMO_DATA,values_from=c(Male,Female),names_from=Age) 
-COR_MAT_DATA_FULL <- POP_DATA %>% left_join(COR_MAT_DATA_FULL )
-
-#RES <- cbind(c(rep("Male",90),rep("Female",90)),c(rep(1:90,2)),rep(NA,180)) %>% as_tibble
-#colnames(RES ) <- c("Sex","Age","MIGRATION_COEF")
-#RES$MIGRATION_COEF <- as.numeric(RES$MIGRATION_COEF)
- 
-#colnames(TEST_DATA)
-#for(x in 1:180){
-#TEST_DATA$Y_VAL <- as.numeric(t(TEST_DATA[,6+x]))
-#	TEST <- feols(Y_VAL~Deaths+Migration+Population|Year+County,data=TEST_DATA)
-#	RES[x,3] <- median(predict(TEST,TEST_DATA %>% mutate(Migration=Migration+1))-predict(TEST),na.rm=TRUE)
-#}
-#RES <- RES %>% filter(MIGRATION_COEF>-1,Age<85)
-#RES$MIGRATION_COEF
-#ggplot(RES,aes(x=Age,y=MIGRATION_COEF,group=Sex,color=Sex)) +geom_point()+geom_smooth()
-#########################################
-#Create a wide data set with ages in each column so that each regression of age can be predicted one by one.
-	#Use the previous years population data as the starting point, so that the regression does not use data already including migration.
-DEMO_DATA <- DEMO_DATA %>% select(-Male,-Female)
-COR_MAT_DATA_FULL <- pivot_wider(DEMO_DATA,values_from=Pop_Change,names_from=Age,names_prefix="Age_")
-AGE_WIDE_DATA <- POP_DATA %>% mutate(Population=Population-Migration-Births+Deaths) %>% left_join( COR_MAT_DATA_FULL) %>% filter(!is.na(Migration),!is.na(Deaths),!is.na(Population),!is.na(Year),!is.na(County))
-
-#Create a table to store the resulting coefficients in.
-RES <- cbind(1:90,c(rep("Child",17),"18",rep("Adult",90-18)),rep(NA,90),rep(NA,90)) %>% as_tibble
-#RES <- cbind(1:90,c(rep("Child",18),rep("Adult",90-18)),rep(NA,90),rep(NA,90)) %>% as_tibble
-	#Clean the output table
-	colnames(RES ) <- c("Age","Group","MIGRATION_COEF","DEATH_COEF")
-	RES$Age<- as.numeric(RES$Age)
-	RES$MIGRATION_COEF <- as.numeric(RES$MIGRATION_COEF)
-	RES$DEATH_COEF <- as.numeric(RES$DEATH_COEF)
-	RES$Group <- as.factor(RES$Group)
-#Predicating the effect of migration on population in any one age group, so that trends over age can be observed. Less when old, more when 18-19.
-	#Loop over all age groups, predict number of people in the age group, from previous population, deaths, and Migrations. Extract the Migration Coefficient for use in a trend analysis.
-for(x in 1:90){
-	AGE_WIDE_DATA$Y_VAL <- as.numeric(t(AGE_WIDE_DATA[,6+x]))#Extract the change
-		C_REG<- feols(Y_VAL~Deaths+Migration+Population|Year+County,data=AGE_WIDE_DATA)
-	RES[x,3] <- as.numeric(coef(C_REG)["Migration"])
-	RES[x,4] <- as.numeric(coef(C_REG)["Deaths"])
-}
-rm(C_REG)
-#Create data to create graphs and analyze. Remove some observed outliers
-GRAPH_DATA  <- RES %>% filter(abs(MIGRATION_COEF)<Inf,Age<100) %>% filter(Age!=25,Age!=35,Age!=85)
-##Graph when using log scales and grouping by child/adult. Looks pretty linear
-	#ggplot(GRAPH_DATA,aes(x=Age,y=MIGRATION_COEF,group=Group,color=Group)) +geom_point()+geom_smooth(method="lm")+ scale_y_continuous(trans = scales::log_trans())
-#Graph when not using log scale but including a geom_smooth to show the actual trend.
-#GRAPH <-ggplot(GRAPH_DATA,aes(x=Age,y=MIGRATION_COEF,group=Group,color=Group)) +geom_point()+geom_smooth(span=0.9)+ylab("Migration Coefficient (Pop. Change Per Added Immigrant)")
-#ggsave("Migration_Age_Distribution.png",GRAPH)
-
-####Create results which find a functional form for the probability that a migrant is in a certain age bracket, so that the probability of any age can be drawn from in the Monte Carlo for net migration numbers. Note that a function is used, because point estimates will have large variably, but the overall trend looks VERY clean.
-	CHILD_MOD <- lm(log(MIGRATION_COEF)~Age,data=GRAPH_DATA %>% filter(Group=='Child')) #The childhood range (1-18), has a great exponential fit with age, but has a different trend than adults. Because there are fewer data points we prefer a exponential fit, compared to a smoothed fit as the variance changes the end points, yet in both cases a exponential fit looks good.
-	CHILD_PRED <- exp(predict(CHILD_MOD))
-	#Extract only the 18 coefficient values. This falls cleanly in between the two groups which seems reasonable, however, it would not inexcusably make sense to assign exactly 18 to either child or adult trend.
-	PRED_18 <- as.numeric(GRAPH_DATA[GRAPH_DATA$Age==18,"MIGRATION_COEF"])
-	#Uses a local polynomial regression fitting model for adults. While this is nearly a exponential curve, some additional uncertainty can be captured since there is enough data to create a smooth line. A span of 0.9 is used rather than a lower number to avoid over-fitting to the high variance in year by year coefficients.
-	ADULT_MOD <- loess(MIGRATION_COEF~Age,data=GRAPH_DATA %>% filter(Group=='Adult'),span=0.9)
-	ADULT_PRED <- predict(ADULT_MOD,19:90)
-	#Create a data frame for the probabilities in each age. Include age, and child/adult indicators for convenience.
-	MIGRATION_COEF <- c(CHILD_PRED,PRED_18,ADULT_PRED) #Combine the predicting coefficients for each year 1 to 90.
-	Age <- 1:90 #Ages in the model are 1 to 90. No zero as these are included in births of the current year.
-	PRED_DATA <- cbind(MIGRATION_COEF,Age) %>% as_tibble
-	PRED_DATA$Group <- 	c(rep("Child",17),"18",rep("Adult",90-18))
-	#clean the data
-	PRED_DATA$Age <- as.numeric(PRED_DATA$Age)
-	MIN_VAL <- min(abs(as.numeric(MIGRATION_COEF))) #Some of the tail end estimates are very slightly less than zero. This is not possible, so instead put negative values, as the smallest magnitude observed in the other predictions.
-	PRED_DATA$MIGRATION_COEF<- ifelse(MIGRATION_COEF<MIN_VAL,MIN_VAL,MIGRATION_COEF)
-
-#Create a plot of the predicted coefficients versus the observe values.This is good visual to explain the methods, and the need for migration age groups.
-#ggplot(GRAPH_DATA,aes(x=Age,y=MIGRATION_COEF,group=Group,color=Group)) +geom_point()+geom_line(data=PRED_DATA,size=1.5,linetype="longdash")
-	#Convert the absolute coefficient values to a percentage chance that any one immigrant will be in the given age. This wont line up perfectly with the coefficients if using them to predict immigration, because the age-sex data set uses different totals than the population/migration data. However, the distribution should be the same, so we divide each estimate by the total. The results is the percent probability that a single independent immigrant will be of the given age.
-	#If this is run from the main script MIG_AGE_DIST will be the key variable and should not be changed.
-MIG_AGE_DIST <- PRED_DATA$MIGRATION_COEF/sum(PRED_DATA$MIGRATION_COEF)
-#Condense down to 85+ group because death rates only use 85+
-MIG_AGE_DIST[85] <- mean(MIG_AGE_DIST[85:90])
-MIG_AGE_DIST <- MIG_AGE_DIST[1:85] 
-write.csv(MIG_AGE_DIST ,"Data/Other_Intermediate_Outputs/Migreation_Age_Probablity_One_to_Ninety.csv",row.names=FALSE)
-
-####Work on overall migration trends
-	#Could use code cleanup after trying things, but have but I have a working ARIMA to model lincoln county migration
-TS_DATA <- POP_DATA %>% mutate(In_Migration=ifelse(Migration>0,1,0)) %>% group_by(County) %>% arrange(County,Year) %>% mutate(Prev_Pop=lag(Population))
-ST_YEAR <- min(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
-END_YEAR <- max(pull(TS_DATA %>% filter(!is.na(Migration)),Year))
-	#GRAPH_DATA <- TS_DATA %>% filter(!is.na(Migration))
-	#GRAPH_DATA_LN <- TS_DATA %>% filter(!is.na(Migration),County=="Lincoln")
-	#ggplot(GRAPH_DATA,aes(x=Year,y=Migration/Prev_Pop,group=County,color=County))+geom_point()+geom_line(data=GRAPH_DATA_LN)
-TS_WIDE <- TS_DATA %>% dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% filter(Year>ST_YEAR+1,Year<=END_YEAR) %>%ts(start=c(ST_YEAR+1),frequency=1)
-LN <- TS_DATA %>%  dplyr::select(Year,County,Migration) %>% pivot_wider(values_from=Migration,names_from=County) %>% arrange(Year) %>% dplyr::select(Lincoln,Year) %>% filter(Year>=ST_YEAR,Year<=END_YEAR) %>% dplyr::select(-Year) %>%ts(start=c(ST_YEAR),frequency=1)
-#Create an ARIMA of Migration so the number of people migrating can be simulated
-	#Time series tests
-	#library(tseries)
-	#adf.test(LN,k=1) #Stationary with one lag, otherwise not stationary
-	#kpss.test(LN) #Stationary,default of program and has some model fit improvements
-MOD <- auto.arima(LN,stationary=TRUE)
-	#summary(MOD)
-	#Validity tests
-	#autoplot(MOD)
-	#acf(resid(MOD))
-	#pacf(resid(MOD))
-#	adf.test(resid(MOD))
-	#checkresiduals(MOD)
-#Save the resulting model outputs, will need to be changed if looking at other counties
-saveRDS(MOD,"Data/Regression_Results/LN_ARIMA_MODEL.Rds")
-
-MIGRATION_ARIMA_SIMS <- (do.call(cbind,mclapply(1:NUM_SIMULATIONS,function(x){as.numeric(round(simulate(MOD,future=TRUE, nsim=NUM_YEARS_PROJECTED)))},mc.cores =detectCores()-1)))#testing a multiple run simulation could use parallel process
-saveRDS(MIGRATION_ARIMA_SIMS,"Data/Simulated_Data_Sets/Migration_ARIMA.Rds")
-write.csv(MIGRATION_ARIMA_SIMS,row.names=FALSE,"Data/Simulated_Data_Sets/Migration_ARIMA.csv")
-
-
-
-
--- a/Scripts/2B_Migration_by_Age_Regression.r
+++ b/Scripts/2B_Migration_by_Age_Regression.r
@ -61,8 +61,8 @@ DICT <-c("AGE"='\\textbf{Yearly Change in Number of People by Age in Wyoming Cou
 REG_TABLE_LOC <- paste0(FIG_SAVE_LOC,"Migration_by_Age_Regression.png")
 REG_TEX_LOC <- paste0(FIG_SAVE_LOC,"Migration_by_Age_Regression.tex")

-etable(REG_1,REG_5,REG_10,REG_15,REG_18,REG_25,REG_45,REG_65,REG_85,headers=HEAD,style.tex=style.tex(yesNo="$\\checkmark$"),dict=DICT,replace=TRUE,file=REG_TEX_LOC ,export=REG_TABLE_LOC)
-	try(etable(MOD_VIEW_BIRTHS,MOD_VIEW_BIRTHS_2016,MOD_VIEW_BIRTHS_1985,headers=HEAD,group=list("County Trends"="Year "),style.tex=style.tex(yesNo="$\\checkmark$"),dict=DICT,notes=NOTES,file=REG_TABLE_TEX_LOC ,export=REG_TABLE_LOC,replace=TRUE))
+try(etable(REG_1,REG_5,REG_10,REG_15,REG_18,REG_25,REG_45,REG_65,REG_85,headers=HEAD,style.tex=style.tex(yesNo="$\\checkmark$"),dict=DICT,replace=TRUE,file=REG_TEX_LOC ,export=REG_TABLE_LOC))
+try(etable(REG_1,REG_5,REG_10,REG_15,REG_18,REG_25,REG_45,REG_65,REG_85,headers=HEAD,style.tex=style.tex(yesNo="$\\checkmark$"),dict=DICT,replace=TRUE,file=REG_TEX_LOC ,export=REG_TABLE_LOC))
 #Create data to create graphs and analyze. Remove some observed outlier
 GRAPH_DATA  <- RES %>% filter(abs(MIGRATION_COEF)<Inf,Age<100) %>% filter(Age!=25,Age!=35,Age!=85)