Corrected data shift, used Correlation of age and migration
This commit is contained in:
parent
034f69924b
commit
7de4132973
@ -15,15 +15,19 @@
|
|||||||
|
|
||||||
|
|
||||||
###Predict the number of Births
|
###Predict the number of Births
|
||||||
MOD_BIRTHS <- feols(log(Births)~log(PREV_BIRTH)+log(PREV_TWO_BIRTH)+log(Min_Birth_Group)+Year*County,cluster=~Year+County, data=REG_DATA )
|
MOD_BIRTHS <- feols(log(Births)~log(PREV_BIRTH)+log(PREV_TWO_BIRTH)+log(Min_Birth_Group)+Year*County,cluster=~Year+County, data=REG_DATA ) #Lower AIC
|
||||||
#Optional: Review the ACF and PACF for validity. Model made on October 22nd appears to have uncorrelated lags of residuals.
|
#AIC(MOD_BIRTHS)
|
||||||
#RES_DATA <- REG_DATA #Data to create visuals with, without changing the main file. Can be used for ggplot, or residual tests
|
#MOD_BIRTHS <- feols(log(Births)~log(PREV_BIRTH)+log(Min_Birth_Group)+Year*County,cluster=~Year+County, data=REG_DATA )
|
||||||
#RES_DATA$RESID <- resid(MOD_BIRTHS)
|
#AIC(MOD_BIRTHS)
|
||||||
#acf(RES_DATA %>% pull(RESID))
|
#Optional: Review the ACF and PACF for validity. Model made on October 24nd appears to have uncorrelated lags of residuals accept year three.
|
||||||
#pacf(RES_DATA %>% pull(RESID))
|
RES_DATA <- REG_DATA #Data to create visuals with, without changing the main file. Can be used for ggplot, or residual tests
|
||||||
|
RES_DATA$RESID <- resid(MOD_BIRTHS)
|
||||||
|
acf(RES_DATA %>% pull(RESID))
|
||||||
|
pacf(RES_DATA %>% pull(RESID))
|
||||||
|
|
||||||
saveRDS(MOD_BIRTHS,BIRTH_RATE_REG_RESULTS)
|
saveRDS(MOD_BIRTHS,BIRTH_RATE_REG_RESULTS)
|
||||||
saveRDS(FIRST_PREDICT_YEAR_POPULATION_DATA,START_DEMOGRAPHIC_DATA) #Save the cleaned data set for later use when starting the simulation.
|
saveRDS(FIRST_PREDICT_YEAR_POPULATION_DATA,START_DEMOGRAPHIC_DATA) #Save the cleaned data set for later use when starting the simulation.
|
||||||
#Cleanup data no longer needed, and save some RAM
|
#Cleanup data no longer needed, and save some RAM
|
||||||
rm(POP_DATA,DEMOGRAPHIC_DATA,REG_DATA)
|
rm(POP_DATA,DEMOGRAPHIC_DATA,REG_DATA)
|
||||||
gc()
|
gc()
|
||||||
|
|
||||||
|
|||||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@ -1,87 +1,38 @@
|
|||||||
|
#### NEXT STEPS!!!! USE CORRELATION TO DRAW FROM EACH MIGRANT IN A GIVEN YEAR
|
||||||
##########################Model Migration Trends
|
##########################Model Migration Trends
|
||||||
library(tidyverse)
|
library(tidyverse)
|
||||||
library(fixest)
|
library(fixest)
|
||||||
library(corrplot)
|
library(corrplot)
|
||||||
##Run Regression
|
######Checking correlations with migration rates
|
||||||
DEMOGRAPHIC_DATA <- readRDS("Data/Cleaned_Data/Wyoming_County_Demographic_Data.Rds")
|
DEMOGRAPHIC_DATA <- readRDS("Data/Cleaned_Data/Wyoming_County_Demographic_Data.Rds")
|
||||||
|
|
||||||
#Extract the population trend data to connect with demographics (Population,births,deaths)
|
#Extract the population trend data to connect with demographics (Population,births,deaths)
|
||||||
POP_DATA <- readRDS("Data/Cleaned_Data/Wyoming_County_Population.Rds")
|
POP_DATA <- readRDS("Data/Cleaned_Data/Wyoming_County_Population.Rds")
|
||||||
#Identify births, deaths an migration from existing data.
|
#Identify births, deaths an migration from existing data.
|
||||||
C_YEAR <- 1983
|
DEMO1 <- DEMOGRAPHIC_DATA
|
||||||
C_COUNTY <- 'Albany'
|
DEMO2 <- DEMOGRAPHIC_DATA %>% mutate(Year=Year+1,Age=Age+1) %>% rename(PREV_MALE=Num_Male,PREV_FEMALE=Num_Female)
|
||||||
POP_DATA %>% filter(Year==C_YEAR,County==C_COUNTY)
|
DEMO_DATA <- inner_join(DEMO1,DEMO2) %>% mutate(Male=Num_Male-PREV_MALE,Female=Num_Female-PREV_FEMALE) %>% select(County,Year,Age,Male,Female) %>% arrange(County,Year,Age)
|
||||||
sum((DEMOGRAPHIC_DATA %>% filter(Year==C_YEAR,County==C_COUNTY))[,4:5])+34
|
COR_MAT_DATA_FULL <- pivot_wider(DEMO_DATA,values_from=c(Male,Female),names_from=Age)
|
||||||
|
COR_MAT_DATA_FULL <- POP_DATA %>% left_join(COR_MAT_DATA_FULL )
|
||||||
|
COR_DATA <- COR_MAT_DATA_FULL %>% filter(Year>2010) %>% select(-County,-Year,-Births,-Deaths,-Population)
|
||||||
|
COR <- cor(COR_DATA,use="pairwise.complete.obs")
|
||||||
|
COR_RES <- COR["Migration",2:(ncol(COR))]
|
||||||
|
COR_RES <- cbind(rep(1:90,2),c(rep("Male",ncol(COR)/2),rep("Female",ncol(COR)/2)),as.numeric(COR_RES)) %>% as_tibble
|
||||||
|
colnames(COR_RES) <- c("Age","Sex","Cor")
|
||||||
|
COR_RES <- COR_RES %>% mutate(Age=as.integer(Age),Cor=as.numeric(Cor))
|
||||||
|
ggplot(COR_RES,aes(x=Age,y=Cor,group=Sex,color=Sex))+geom_smooth(span=0.25)+geom_point()
|
||||||
|
########################Combine Male and Female Since they look similar
|
||||||
|
DEMO_DATA <- inner_join(DEMO1,DEMO2) %>% mutate(Male=Num_Male-PREV_MALE,Female=Num_Female-PREV_FEMALE,Change=Male+Female) %>% select(County,Year,Age,Change) %>% arrange(County,Year,Age)
|
||||||
|
COR_MAT_DATA_FULL <- pivot_wider(DEMO_DATA,values_from=c(Change),names_from=Age)
|
||||||
|
COR_MAT_DATA_FULL <- POP_DATA %>% left_join(COR_MAT_DATA_FULL )
|
||||||
|
COR_DATA <- COR_MAT_DATA_FULL %>% filter(Year>2010) %>% select(-County,-Year,-Births,-Deaths,-Population)
|
||||||
|
COR <- cor(COR_DATA,use="pairwise.complete.obs")
|
||||||
|
COR_RES <- COR["Migration",2:(ncol(COR))]
|
||||||
|
COR_RES <- cbind(1:90,as.numeric(COR_RES)) %>% as_tibble
|
||||||
|
colnames(COR_RES) <- c("Age","Cor")
|
||||||
|
ggplot(COR_RES,aes(x=Age,y=Cor))+geom_smooth(span=0.3)+geom_point()
|
||||||
|
data.frame(COR_RES) %>% as_tibble
|
||||||
|
MIGRATION_AGE_COR <- predict(loess(Cor~Age,span=0.3,data=as.data.frame(COR_RES)))
|
||||||
|
plot(MIGRATION_AGE_COR)
|
||||||
|
|
||||||
sum((DEMOGRAPHIC_DATA %>% filter(Year==C_YEAR-1,County==C_COUNTY,Age==0))[,4:5])
|
#### NEXT STEPS!!!! USE CORRELATION TO DRAW FROM EACH MIGRANT IN A GIVEN YEAR
|
||||||
sum((DEMOGRAPHIC_DATA %>% filter(Year==C_YEAR,County==C_COUNTY,Age==1))[,4:5])
|
|
||||||
|
|
||||||
sum((DEMOGRAPHIC_DATA %>% filter(Year==C_YEAR,County==C_COUNTY,Age==0))[,4:5])
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#############################OTHER TESTING
|
|
||||||
DATA <- POP_DATA %>% left_join(DEMOGRAPHIC_DATA) %>% filter(!is.na(Births))
|
|
||||||
DATA$Age_Group <- NA
|
|
||||||
DATA <- DATA %>% mutate(Age_Group=ifelse(Age<=5,"Infant",Age_Group))
|
|
||||||
DATA <- DATA %>% mutate(Age_Group=ifelse(Age>5 & Age<18,"Child",Age_Group))
|
|
||||||
DATA <- DATA %>% mutate(Age_Group=ifelse(Age>=18 & Age<25,"Young_Adult",Age_Group))
|
|
||||||
DATA <- DATA %>% mutate(Age_Group=ifelse(Age>=25 & Age<35,"Young_Working_Adult",Age_Group))
|
|
||||||
DATA <- DATA %>% mutate(Age_Group=ifelse(Age>=35 & Age<60,"Mid_Adult",Age_Group))
|
|
||||||
DATA <- DATA %>% mutate(Age_Group=ifelse(Age>=60,"Retired_Adult",Age_Group))
|
|
||||||
DATA %>% filter(Age_Group=="Retired_Adult")
|
|
||||||
DATA <- DATA %>% ungroup %>% group_by(Year,County,Population,Births,Deaths,Migration,Age_Group) %>% summarize(Num_Male=sum(Num_Male,na.omit=TRUE),Num_Female=sum(Num_Female,na.omit=TRUE)) %>% ungroup
|
|
||||||
TEMP <- DATA %>% select(-County) %>% pivot_wider(values_from=c(Num_Male,Num_Female),names_from=Age_Group)
|
|
||||||
|
|
||||||
corrplot(cor(TEMP,use="pairwise.complete.obs"))
|
|
||||||
|
|
||||||
REG_TEMP <- DATA %>% pivot_wider(values_from=c(Num_Male,Num_Female),names_from=Age_Group) %>% mutate(Population=Population-Births+Deaths)
|
|
||||||
REG_TEMP %>% arrange(County,Year) %>% filter(County!='Albany',Year>2015)
|
|
||||||
#############Looks like Births deaths and migration should be shifted back (or population forward)
|
|
||||||
POP_DATA %>% group_by(County) %>% arrange(Year) %>% mutate(PREV=Population-Births+Deaths-Migration) %>% arrange(County,Year) %>% filter(Year>2018)
|
|
||||||
(26500)-501+166+266
|
|
||||||
35836+541-184+1137-36209
|
|
||||||
(11831-13324)-259+83
|
|
||||||
DIFF <- 26519-26165
|
|
||||||
DIFF-501+166
|
|
||||||
(27380-26633)-413+146
|
|
||||||
C_YEAR <-1980
|
|
||||||
REG_TEMP %>% filter(Year==C_YEAR-1)
|
|
||||||
TEMP <- DEMOGRAPHIC_DATA %>% filter(County=='Albany', Year==C_YEAR)
|
|
||||||
sum(TEMP[1,4:5] )
|
|
||||||
TEMP[,4:5] <-DEMOGRAPHIC_DATA %>% filter(County=='Albany', Year==C_YEAR) %>% select(Num_Male,Num_Female)-DEMOGRAPHIC_DATA %>% filter(County=='Albany', Year==C_YEAR-1) %>% select(Num_Male,Num_Female)
|
|
||||||
TEMP
|
|
||||||
|
|
||||||
|
|
||||||
REG_TEMP
|
|
||||||
REG_TEMP$UPWARD <- ifelse(REG_TEMP$Migration>0,1,0)
|
|
||||||
REG_TEMP[,5:16] <- log(((REG_TEMP[,5:16])))
|
|
||||||
REG_TEMP$Migration <- log(abs(REG_TEMP$Migration))
|
|
||||||
|
|
||||||
summary(feols(Migration~UPWARD*(Num_Male_Infant+Num_Male_Child+Num_Male_Young_Adult+Num_Male_Young_Working_Adult+Num_Male_Retired_Adult+Num_Female_Infant+Num_Female_Child+Num_Female_Young_Adult+Num_Female_Young_Working_Adult+Num_Female_Retired_Adult)+Population+Population+Year|County,data=REG_TEMP))
|
|
||||||
summary(feols(Migration~UPWARD*(Num_Male_Infant+Num_Male_Child+Num_Male_Young_Adult+Num_Male_Young_Working_Adult+Num_Male_Retired_Adult+Num_Female_Infant+Num_Female_Child+Num_Female_Young_Adult+Num_Female_Young_Working_Adult+Num_Female_Retired_Adult)+Population+Population+Year|County,data=REG_TEMP))
|
|
||||||
|
|
||||||
summary(lm(Migration~.,data=REG_TEMP))
|
|
||||||
|
|
||||||
|
|
||||||
,Young_Adult=Age>=18,"Child",Age_Group))
|
|
||||||
%>% mutate(Child=Age<18,Young_Adult=Age>=18 & Age<35,Mid_Adult=Age>=35 & Age<=60,Retired_Adult=Age>60) %>% group_by(Year,County,Population,Births,Deaths,Migration,Child,Young_Adult,Mid_Adult,Retired_Adult) %>% summarize(Num_Male=sum(Num_Male),Num_Female =sum(Num_Female))
|
|
||||||
TEST <- POP_DATA %>% left_join(DEMOGRAPHIC_DATA) %>% filter(!is.na(Births)) %>% pivot_wider(names_from=Age,values_from=c(Num_Male,Num_Female))
|
|
||||||
TEST
|
|
||||||
head(colnames(TEST))
|
|
||||||
TEST <- TEST
|
|
||||||
corrplot(cor(TEST,use="pairwise.complete.obs"))
|
|
||||||
#Merger the two data sets and drop any records that cannot be used in the regression (this makes the "predict" function output the right number of records)
|
|
||||||
REG_DATA <- POP_DATA %>% left_join(DEMOGRAPHIC_DATA) %>% filter(!is.na(Births))
|
|
||||||
|
|
||||||
REG_DATA <- REG_DATA %>% group_by(County) %>% mutate(PREV_MIG=lag(Migration),PREV_TWO_MIG=lag(Migration,2),PREV_POP=lag(Population),PREV_BIRTHS=lag(Births)) %>% ungroup
|
|
||||||
REG_DATA$County <- factor(REG_DATA$County)
|
|
||||||
feols((Migration)~(PREV_MIG)+(PREV_TWO_MIG)+PREV_BIRTHS+PREV_POP|Year+County,data=REG_DATA)
|
|
||||||
REG_DATA %>% filter(!is.na(Births))
|
|
||||||
|
|||||||
@ -30,7 +30,8 @@ TBL <- TBL %>% filter(!is.na(Type)) %>% select(County,Type,everything())
|
|||||||
GROUP <- colnames(TBL)[-1:-2]
|
GROUP <- colnames(TBL)[-1:-2]
|
||||||
Data <- pivot_longer(TBL,all_of(GROUP),names_to="Year",values_to="Pop_Change")
|
Data <- pivot_longer(TBL,all_of(GROUP),names_to="Year",values_to="Pop_Change")
|
||||||
Data$County <- ifelse(toupper(Data$County)=="TOTAL","Wyoming",Data$County)
|
Data$County <- ifelse(toupper(Data$County)=="TOTAL","Wyoming",Data$County)
|
||||||
WY_COUNTY_DATA_SET <- pivot_wider(Data,names_from=Type,values_from=Pop_Change) %>% rename("Migration"=`Net Migration`) %>% mutate(Year=as.integer(Year),Births=parse_number(Births),Deaths=parse_number(Deaths),Migration=parse_number(Migration))
|
WY_COUNTY_DATA_SET <- pivot_wider(Data,names_from=Type,values_from=Pop_Change) %>% rename("Migration"=`Net Migration`) %>% mutate(Year=as.integer(Year),Births=parse_number(Births),Deaths=parse_number(Deaths),Migration=parse_number(Migration)) %>% mutate(Year=Year-1) #Data apears to be one off from populaiton
|
||||||
|
WY_COUNTY_DATA_SET[,"County"] <- gsub(" ","_",WY_COUNTY_DATA_SET %>% pull(County))
|
||||||
|
|
||||||
########################City and County Population Data 2020 to 2024
|
########################City and County Population Data 2020 to 2024
|
||||||
PAGE <- read_html('http://eadiv.state.wy.us/pop/Place-24EST.htm')
|
PAGE <- read_html('http://eadiv.state.wy.us/pop/Place-24EST.htm')
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user