##########################Model Migration Trends library(tidyverse) library(fixest) library(corrplot) ##Run Regression DEMOGRAPHIC_DATA <- readRDS("Data/Cleaned_Data/Wyoming_County_Demographic_Data.Rds") #Extract the population trend data to connect with demographics (Population,births,deaths) POP_DATA <- readRDS("Data/Cleaned_Data/Wyoming_County_Population.Rds") #Identify births, deaths an migration from existing data. C_YEAR <- 1983 C_COUNTY <- 'Albany' POP_DATA %>% filter(Year==C_YEAR,County==C_COUNTY) sum((DEMOGRAPHIC_DATA %>% filter(Year==C_YEAR,County==C_COUNTY))[,4:5])+34 sum((DEMOGRAPHIC_DATA %>% filter(Year==C_YEAR-1,County==C_COUNTY,Age==0))[,4:5]) sum((DEMOGRAPHIC_DATA %>% filter(Year==C_YEAR,County==C_COUNTY,Age==1))[,4:5]) sum((DEMOGRAPHIC_DATA %>% filter(Year==C_YEAR,County==C_COUNTY,Age==0))[,4:5]) #############################OTHER TESTING DATA <- POP_DATA %>% left_join(DEMOGRAPHIC_DATA) %>% filter(!is.na(Births)) DATA$Age_Group <- NA DATA <- DATA %>% mutate(Age_Group=ifelse(Age<=5,"Infant",Age_Group)) DATA <- DATA %>% mutate(Age_Group=ifelse(Age>5 & Age<18,"Child",Age_Group)) DATA <- DATA %>% mutate(Age_Group=ifelse(Age>=18 & Age<25,"Young_Adult",Age_Group)) DATA <- DATA %>% mutate(Age_Group=ifelse(Age>=25 & Age<35,"Young_Working_Adult",Age_Group)) DATA <- DATA %>% mutate(Age_Group=ifelse(Age>=35 & Age<60,"Mid_Adult",Age_Group)) DATA <- DATA %>% mutate(Age_Group=ifelse(Age>=60,"Retired_Adult",Age_Group)) DATA %>% filter(Age_Group=="Retired_Adult") DATA <- DATA %>% ungroup %>% group_by(Year,County,Population,Births,Deaths,Migration,Age_Group) %>% summarize(Num_Male=sum(Num_Male,na.omit=TRUE),Num_Female=sum(Num_Female,na.omit=TRUE)) %>% ungroup TEMP <- DATA %>% select(-County) %>% pivot_wider(values_from=c(Num_Male,Num_Female),names_from=Age_Group) corrplot(cor(TEMP,use="pairwise.complete.obs")) REG_TEMP <- DATA %>% pivot_wider(values_from=c(Num_Male,Num_Female),names_from=Age_Group) %>% mutate(Population=Population-Births+Deaths) REG_TEMP %>% arrange(County,Year) %>% filter(County!='Albany',Year>2015) #############Looks like Births deaths and migration should be shifted back (or population forward) POP_DATA %>% group_by(County) %>% arrange(Year) %>% mutate(PREV=Population-Births+Deaths-Migration) %>% arrange(County,Year) %>% filter(Year>2018) (26500)-501+166+266 35836+541-184+1137-36209 (11831-13324)-259+83 DIFF <- 26519-26165 DIFF-501+166 (27380-26633)-413+146 C_YEAR <-1980 REG_TEMP %>% filter(Year==C_YEAR-1) TEMP <- DEMOGRAPHIC_DATA %>% filter(County=='Albany', Year==C_YEAR) sum(TEMP[1,4:5] ) TEMP[,4:5] <-DEMOGRAPHIC_DATA %>% filter(County=='Albany', Year==C_YEAR) %>% select(Num_Male,Num_Female)-DEMOGRAPHIC_DATA %>% filter(County=='Albany', Year==C_YEAR-1) %>% select(Num_Male,Num_Female) TEMP REG_TEMP REG_TEMP$UPWARD <- ifelse(REG_TEMP$Migration>0,1,0) REG_TEMP[,5:16] <- log(((REG_TEMP[,5:16]))) REG_TEMP$Migration <- log(abs(REG_TEMP$Migration)) summary(feols(Migration~UPWARD*(Num_Male_Infant+Num_Male_Child+Num_Male_Young_Adult+Num_Male_Young_Working_Adult+Num_Male_Retired_Adult+Num_Female_Infant+Num_Female_Child+Num_Female_Young_Adult+Num_Female_Young_Working_Adult+Num_Female_Retired_Adult)+Population+Population+Year|County,data=REG_TEMP)) summary(feols(Migration~UPWARD*(Num_Male_Infant+Num_Male_Child+Num_Male_Young_Adult+Num_Male_Young_Working_Adult+Num_Male_Retired_Adult+Num_Female_Infant+Num_Female_Child+Num_Female_Young_Adult+Num_Female_Young_Working_Adult+Num_Female_Retired_Adult)+Population+Population+Year|County,data=REG_TEMP)) summary(lm(Migration~.,data=REG_TEMP)) ,Young_Adult=Age>=18,"Child",Age_Group)) %>% mutate(Child=Age<18,Young_Adult=Age>=18 & Age<35,Mid_Adult=Age>=35 & Age<=60,Retired_Adult=Age>60) %>% group_by(Year,County,Population,Births,Deaths,Migration,Child,Young_Adult,Mid_Adult,Retired_Adult) %>% summarize(Num_Male=sum(Num_Male),Num_Female =sum(Num_Female)) TEST <- POP_DATA %>% left_join(DEMOGRAPHIC_DATA) %>% filter(!is.na(Births)) %>% pivot_wider(names_from=Age,values_from=c(Num_Male,Num_Female)) TEST head(colnames(TEST)) TEST <- TEST corrplot(cor(TEST,use="pairwise.complete.obs")) #Merger the two data sets and drop any records that cannot be used in the regression (this makes the "predict" function output the right number of records) REG_DATA <- POP_DATA %>% left_join(DEMOGRAPHIC_DATA) %>% filter(!is.na(Births)) REG_DATA <- REG_DATA %>% group_by(County) %>% mutate(PREV_MIG=lag(Migration),PREV_TWO_MIG=lag(Migration,2),PREV_POP=lag(Population),PREV_BIRTHS=lag(Births)) %>% ungroup REG_DATA$County <- factor(REG_DATA$County) feols((Migration)~(PREV_MIG)+(PREV_TWO_MIG)+PREV_BIRTHS+PREV_POP|Year+County,data=REG_DATA) REG_DATA %>% filter(!is.na(Births))