Made data cleaning easier. Added a VAR
This commit is contained in:
parent
60127fb003
commit
6162487b95
134
Popultaion.R
134
Popultaion.R
@ -1,82 +1,86 @@
|
||||
library(rvest)
|
||||
library(tidyverse)
|
||||
library(fixest)
|
||||
#Data found on the page http://eadiv.state.wy.us/pop/
|
||||
PAGE <- read_html("http://eadiv.state.wy.us/pop/BirthDeathMig.htm")
|
||||
NODE <- html_element(PAGE ,"table")
|
||||
TBL <- html_table(NODE)
|
||||
source("Scripts/Functions.r")
|
||||
#source("Scripts/Load_Wyoming_Web_Data.r")
|
||||
DATA_TO_GATHER <- list()
|
||||
DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c("WYPOP","WY_POP",TRUE)
|
||||
|
||||
ST <- which(toupper(TBL$X1)=="ALBANY")
|
||||
END <- which(toupper(TBL$X1)=="TOTAL")
|
||||
TYPES <- TBL[ST-2,1]
|
||||
DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c("WYNQGSP","WY_GDP",TRUE)
|
||||
DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c("MEHOINUSWYA646N","WY_MED_INCOME",TRUE)
|
||||
DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c("BUSAPPWNSAWY","WY_BUISNESS_APPLICATIONS",FALSE)
|
||||
DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('ACTLISCOUWY','WY_HOUSES_FOR_SALE',FALSE)
|
||||
DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('WYRVAC','WY_RENTAL_VACANCY_RATE',FALSE)
|
||||
DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('WYBPPRIVSA','WY_PRIVATE_HOUSING',FALSE)
|
||||
#New Private Housing Units Authorized by Building Permits for Wyoming
|
||||
DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('B03002006E056023','LN_FIVE_YEAR_POP',FALSE)
|
||||
DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('GDPALL56023','LN_GDP',TRUE)
|
||||
DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('WYLINC3POP','LN_POP',FALSE)
|
||||
DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('LAUCN560230000000005','LN_EMPLOYMENT',FALSE)
|
||||
DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('BPPRIV056023','LN_PRIVE_HOUSING',FALSE)
|
||||
|
||||
ST_YEAR <- 1971
|
||||
ALL_DATA <- list()
|
||||
TBL <- TBL[,c(1,which(!is.na(as.numeric(TBL[ST[1],]))))]
|
||||
TBL <- TBL[,-ncol(TBL)]
|
||||
colnames(TBL) <- c("County",(ST_YEAR:(ST_YEAR+ncol(TBL)-1)))
|
||||
TBL$Type <- NA
|
||||
for(i in 1:length(ST)){
|
||||
TBL[ST[i]:END[i],"Type"]<- as.character(TYPES[i,1])
|
||||
DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('ENU5602320510','LN_NUM_ESTABLISHMENTS',FALSE)
|
||||
|
||||
DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('GDPALL56041','UINTA_GDP',TRUE)
|
||||
DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('WYUINT1POP','UINTA_POP',FALSE)
|
||||
DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('WYSUBL5POP','SUBLETTE_POP',FALSE)
|
||||
DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('WYSWEE7POP','SWEETWATER_POP',FALSE)
|
||||
DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('WYTETO9POP','TETON_POP',FALSE)
|
||||
##Idaho Counties
|
||||
DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('IDBEAR7POP','BEAR_LAKE_POP',FALSE)
|
||||
DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('IDCARI9POP','CARIBOU_POP',FALSE)
|
||||
|
||||
DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('IDBONN0POP','BONNEVILLE_POP',FALSE)
|
||||
###US Population
|
||||
DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('POPTOTUSA647NWDB','US_POP',FALSE)
|
||||
|
||||
|
||||
|
||||
|
||||
for(x in 1:length(DATA_TO_GATHER)){
|
||||
CURRENT <- DATA_TO_GATHER[[x]]
|
||||
|
||||
if(CURRENT[3]){C_DATA <- CPI_ADJUST(FRED_GET(CURRENT[1],CURRENT[2]))}else{C_DATA <- FRED_GET(CURRENT[1],CURRENT[2])}
|
||||
if(x==1){RES <- C_DATA}else{RES <- RES %>% full_join(C_DATA)}
|
||||
rm(CURRENT,C_DATA)
|
||||
}
|
||||
TBL[ST[2]:END[2],"Type"] <- as.character(TYPES[2,1])
|
||||
TBL$Type
|
||||
TBL <- TBL %>% filter(!is.na(Type)) %>% select(County,Type,everything())
|
||||
GROUP <- colnames(TBL)[-1:-2]
|
||||
Data <- pivot_longer(TBL,all_of(GROUP),names_to="Year",values_to="Pop_Change")
|
||||
Data$County <- ifelse(toupper(Data$County)=="TOTAL","Wyoming",Data$County)
|
||||
Data <- pivot_wider(Data,names_from=Type,values_from=Pop_Change)
|
||||
colnames(Data)[5] <-"Migration"
|
||||
HOUSE_INCOME <- read_csv("https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23ebf3fb&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1320&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=MEHOINUSWYA672N&scale=left&cosd=1984-01-01&coed=2023-01-01&line_color=%230073e6&link_values=false&line_style=solid&mark_type=none&mw=3&lw=3&ost=-99999&oet=99999&mma=0&fml=a&fq=Annual&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2025-08-29&revision_date=2025-08-29&nd=1984-01-01")
|
||||
HOUSE_INCOME <-
|
||||
colnames(HOUSE_INCOME) <- c("Year","WY_INCOME")
|
||||
HOUSE_INCOME <- HOUSE_INCOME %>% mutate(Year=year(Year))
|
||||
DATA <- RES %>% mutate(US_POP=US_POP-WY_POP,WY_POP=WY_POP-LN_POP-UINTA_POP-SUBLETTE_POP-SWEETWATER_POP-TETON_POP)
|
||||
colnames(DATA)
|
||||
feols(log(LN_POP) ~log(US_POP)+log(UINTA_POP)+log(WY_POP)+log(SUBLETTE_POP)+log(SWEETWATER_POP)+log(TETON_POP)+log(BEAR_LAKE_POP)+log(CARIBOU_POP)+log(BONNEVILLE_POP)+YEAR,data=DATA)
|
||||
TS_DATA_ORIG <- DATA %>% select(YEAR,LN_POP,US_POP,WY_POP,UINTA_POP,SUBLETTE_POP,SWEETWATER_POP,TETON_POP,BEAR_LAKE_POP,CARIBOU_POP,BONNEVILLE_POP) %>% filter(!is.na(LN_POP)) %>% arrange(YEAR) %>% select(-YEAR)
|
||||
|
||||
#Employment in construction of Wyoming
|
||||
CON_EMP <- read_csv('https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23ebf3fb&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1320&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=WYCONS&scale=left&cosd=1990-01-01&coed=2025-07-01&line_color=%230073e6&link_values=false&line_style=solid&mark_type=none&mw=3&lw=3&ost=-99999&oet=99999&mma=0&fml=a&fq=Monthly&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2025-08-29&revision_date=2025-08-29&nd=1990-01-01')
|
||||
colnames(CON_EMP) <- c("Year","WY_CON_EMP")
|
||||
CON_EMP <- CON_EMP%>% mutate(Year=year(Year))
|
||||
LINC_UNEMP <- 'https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23ebf3fb&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1320&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=WYLINC3URN&scale=left&cosd=1990-01-01&coed=2025-07-01&line_color=%230073e6&link_values=false&line_style=solid&mark_type=none&mw=3&lw=3&ost=-99999&oet=99999&mma=0&fml=a&fq=Monthly&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2025-08-29&revision_date=2025-08-29&nd=1990-01-01'
|
||||
LINC_GDP <- 'https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23ebf3fb&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1320&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=REALGDPALL56023&scale=left&cosd=2001-01-01&coed=2023-01-01&line_color=%230073e6&link_values=false&line_style=solid&mark_type=none&mw=3&lw=3&ost=-99999&oet=99999&mma=0&fml=a&fq=Annual&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2025-08-29&revision_date=2025-08-29&nd=2001-01-01'
|
||||
LINC_GDP<- read_csv(LINC_GDP)
|
||||
colnames(LINC_GDP) <- c("Year","LINC_GDP")
|
||||
LINC_GDP <- LINC_GDP %>% mutate(Year=year(Year))
|
||||
|
||||
|
||||
LINC_PRIV_IND <- 'https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23ebf3fb&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1320&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=ENU5602320510&scale=left&cosd=1990-01-01&coed=2024-10-01&line_color=%230073e6&link_values=false&line_style=solid&mark_type=none&mw=3&lw=3&ost=-99999&oet=99999&mma=0&fml=a&fq=Annual&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2025-08-29&revision_date=2025-08-29&nd=1990-01-01'
|
||||
LINC_PRIV_IND <- read_csv(LINC_PRIV_IND)
|
||||
LINC_PRIV_IND
|
||||
colnames(LINC_PRIV_IND) <- c("Year","LINC_PRIV_IND")
|
||||
LINC_PRIV_IND <- LINC_PRIV_IND %>% mutate(Year=year(Year))
|
||||
|
||||
LINC_LABOR_FORCE <- 'https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23ebf3fb&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1320&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=WYLINC3LFN&scale=left&cosd=1990-01-01&coed=2025-07-01&line_color=%230073e6&link_values=false&line_style=solid&mark_type=none&mw=3&lw=3&ost=-99999&oet=99999&mma=0&fml=a&fq=Annual&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2025-08-29&revision_date=2025-08-29&nd=1990-01-01'
|
||||
LINC_LABOR_FORCE<- read_csv(LINC_LABOR_FORCE)
|
||||
colnames(LINC_LABOR_FORCE) <- c("Year","LINC_LABOR_FORCE")
|
||||
LINC_LABOR_FORCE<- LINC_LABOR_FORCE%>% mutate(Year=year(Year))
|
||||
TS_DATA <- diff(log(ts(TS_DATA_ORIG,start=c(1970),end=c(2024),frequency=1)))
|
||||
library("forecast")
|
||||
library("vars")
|
||||
VARselect(TS_DATA,lag.max=4,type="const")
|
||||
VAR1 <- VAR(TS_DATA,p=3,type="const",season=NULL,exog=NULL)
|
||||
plot(irf(VAR1,response="LN_POP"))
|
||||
plot(forecast(VAR1,))
|
||||
RES <- (predict(VAR1, n.ahead = 20, ci = 0.95))
|
||||
names(RES )
|
||||
names(RES$fcst)
|
||||
RES$fcst$LN_POP %>% as_tibble
|
||||
CURRENT_POP <- max(DATA$LN_POP,na.rm=TRUE)
|
||||
0.0157*CURRENT_POP*1000
|
||||
0.083*CURRENT_POP*1000
|
||||
-0.0489*CURRENT_POP*1000
|
||||
|
||||
|
||||
|
||||
LINC_GOV_GDP <- 'https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23ebf3fb&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1320&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=GDPGOVT56023&scale=left&cosd=2001-01-01&coed=2023-01-01&line_color=%230073e6&link_values=false&line_style=solid&mark_type=none&mw=3&lw=3&ost=-99999&oet=99999&mma=0&fml=a&fq=Annual&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2025-08-29&revision_date=2025-08-29&nd=2001-01-01'
|
||||
LINC_GOV_GDP <- read_csv(LINC_GOV_GDP)
|
||||
colnames(LINC_GOV_GDP) <- c("Year","LINC_GOV_GDP")
|
||||
LINC_GOV_GDP<-LINC_GOV_GDP%>% mutate(Year=year(Year))
|
||||
|
||||
# View the forecasted values and confidence intervals
|
||||
print(forecast_results)
|
||||
|
||||
# You can also plot the forecasts
|
||||
plot(forecast_results)
|
||||
|
||||
|
||||
LINC_POP <- 'https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23ebf3fb&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1320&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=WYLINC3POP&scale=left&cosd=1970-01-01&coed=2024-01-01&line_color=%230073e6&link_values=false&line_style=solid&mark_type=none&mw=3&lw=3&ost=-99999&oet=99999&mma=0&fml=a&fq=Annual&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2025-08-29&revision_date=2025-08-29&nd=1970-01-01'
|
||||
LINC_POP <- read_csv(LINC_POP)
|
||||
colnames(LINC_POP) <- c("Year","LINC_POP")
|
||||
LINC_POP <- LINC_POP %>% mutate(Year=year(Year))
|
||||
?forecast
|
||||
?forecast
|
||||
install.packages("sparsegl")
|
||||
plot(VAR1)
|
||||
|
||||
|
||||
WY_POP <- 'https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23ebf3fb&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1320&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=WYPOP&scale=left&cosd=1900-01-01&coed=2024-01-01&line_color=%230073e6&link_values=false&line_style=solid&mark_type=none&mw=3&lw=3&ost=-99999&oet=99999&mma=0&fml=a&fq=Annual&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2025-08-29&revision_date=2025-08-29&nd=1900-01-01'
|
||||
WY_POP <- read_csv(WY_POP)
|
||||
colnames(WY_POP) <- c("Year","WY_POP")
|
||||
WY_POP <- WY_POP %>% mutate(Year=year(Year))
|
||||
|
||||
LINC_PRIV_IND
|
||||
DF <- LINC_POP %>% left_join(WY_POP) %>% left_join(LINC_PRIV_IND) %>% left_join(LINC_LABOR_FORCE) %>% left_join(LINC_GOV_GDP) %>% left_join(LINC_GDP) %>% left_join(WY_POP)
|
||||
DF %>% select(LINC_POP,WY_POP)
|
||||
DF <- DF %>% mutate(WY_POP=WY_POP-LINC_POP)
|
||||
#Check a VAR it looks like lags on changes to Private industry could affect other variables
|
||||
#Idea check a SVAR placing limits on which shocks are first
|
||||
feols((LINC_POP)~(WY_POP)+log(LINC_PRIV_IND)+Year,data=DF)
|
||||
|
||||
26
Scripts/Functions.r
Normal file
26
Scripts/Functions.r
Normal file
@ -0,0 +1,26 @@
|
||||
#A function to extract date from FRED. Assumes the date is either annual or monthly.
|
||||
FRED_GET <- function(FRED_SERIES_ID,NAME=NA,ST_DATE='1890-01-91',END_DATE=Sys.Date(),ANNUAL_DATA=TRUE){
|
||||
NAME <- ifelse(is.na(NAME),FRED_SERIES_ID,NAME)
|
||||
DATA_TIME_FRAME <- ifelse(ANNUAL_DATA,"Annual","Monthly")
|
||||
URL <- paste0('https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23ebf3fb&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=off&txtcolor=%23444444&ts=12&tts=12&width=827&nt=0&thu=0&trc=0&show_legend=no&show_axis_titles=no&show_tooltip=no&id=',FRED_SERIES_ID,'&scale=left&cosd=',ST_DATE,'&coed=',END_DATE,'&line_color=%230073e6&link_values=false&line_style=solid&mark_type=none&mw=3&lw=3&ost=-99999&oet=99999&mma=0&fml=a&fq=',DATA_TIME_FRAME,'&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=',END_DATE,'&revision_date=',END_DATE,'&nd=',END_DATE)
|
||||
DATA <- read_csv(URL)
|
||||
DATA <- DATA[which(!is.na(DATA[,2] )),]
|
||||
colnames(DATA)[2] <- NAME
|
||||
if(ANNUAL_DATA){
|
||||
colnames(DATA)[1] <- "YEAR"
|
||||
DATA$YEAR <- year(DATA$YEAR)
|
||||
}else{
|
||||
colnames(DATA)[1] <- "DATE"
|
||||
}
|
||||
return(DATA)
|
||||
}
|
||||
|
||||
CPI_ADJUST <- function(DATA){
|
||||
CPI <- FRED_GET("CPIAUCSL") %>% filter(!is.na(CPIAUCSL))
|
||||
CPI$CPI_ADJ <- as.numeric(CPI[which.max(CPI$YEAR),2])/CPI$CPIAUCSL
|
||||
CPI <- CPI %>% select(-"CPIAUCSL")
|
||||
DATA <- DATA %>% left_join(CPI)
|
||||
DATA[,2] <- DATA[,2]*DATA[,"CPI_ADJ"]
|
||||
DATA <- DATA%>% select(-"CPI_ADJ")
|
||||
return(DATA)
|
||||
}
|
||||
28
Scripts/Load_Wyoming_Web_Data.r
Normal file
28
Scripts/Load_Wyoming_Web_Data.r
Normal file
@ -0,0 +1,28 @@
|
||||
library(rvest)
|
||||
#Data found on the page http://eadiv.state.wy.us/pop/
|
||||
PAGE <- read_html("http://eadiv.state.wy.us/pop/BirthDeathMig.htm")
|
||||
NODE <- html_element(PAGE ,"table")
|
||||
TBL <- html_table(NODE)
|
||||
|
||||
ST <- which(toupper(TBL$X1)=="ALBANY")
|
||||
END <- which(toupper(TBL$X1)=="TOTAL")
|
||||
TYPES <- TBL[ST-2,1]
|
||||
|
||||
ST_YEAR <- 1971
|
||||
ALL_DATA <- list()
|
||||
TBL <- TBL[,c(1,which(!is.na(as.numeric(TBL[ST[1],]))))]
|
||||
TBL <- TBL[,-ncol(TBL)]
|
||||
colnames(TBL) <- c("County",(ST_YEAR:(ST_YEAR+ncol(TBL)-1)))
|
||||
TBL$Type <- NA
|
||||
for(i in 1:length(ST)){
|
||||
TBL[ST[i]:END[i],"Type"]<- as.character(TYPES[i,1])
|
||||
}
|
||||
TBL[ST[2]:END[2],"Type"] <- as.character(TYPES[2,1])
|
||||
TBL$Type
|
||||
TBL <- TBL %>% filter(!is.na(Type)) %>% select(County,Type,everything())
|
||||
GROUP <- colnames(TBL)[-1:-2]
|
||||
Data <- pivot_longer(TBL,all_of(GROUP),names_to="Year",values_to="Pop_Change")
|
||||
Data$County <- ifelse(toupper(Data$County)=="TOTAL","Wyoming",Data$County)
|
||||
WY_COUNTY_DATA_SET <- pivot_wider(Data,names_from=Type,values_from=Pop_Change)
|
||||
colnames(Data)[5] <-"Migration"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user