From 6162487b957a12f2cb15820046b8e634e0827cb9 Mon Sep 17 00:00:00 2001 From: Alex Gebben Work Date: Tue, 9 Sep 2025 17:12:57 -0600 Subject: [PATCH] Made data cleaning easier. Added a VAR --- Popultaion.R | 134 ++++++++++++++++---------------- Scripts/Functions.r | 26 +++++++ Scripts/Load_Wyoming_Web_Data.r | 28 +++++++ 3 files changed, 123 insertions(+), 65 deletions(-) create mode 100644 Scripts/Functions.r create mode 100644 Scripts/Load_Wyoming_Web_Data.r diff --git a/Popultaion.R b/Popultaion.R index 94d07e7..f13b0a2 100644 --- a/Popultaion.R +++ b/Popultaion.R @@ -1,82 +1,86 @@ -library(rvest) library(tidyverse) library(fixest) -#Data found on the page http://eadiv.state.wy.us/pop/ -PAGE <- read_html("http://eadiv.state.wy.us/pop/BirthDeathMig.htm") -NODE <- html_element(PAGE ,"table") -TBL <- html_table(NODE) +source("Scripts/Functions.r") +#source("Scripts/Load_Wyoming_Web_Data.r") +DATA_TO_GATHER <- list() +DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c("WYPOP","WY_POP",TRUE) -ST <- which(toupper(TBL$X1)=="ALBANY") -END <- which(toupper(TBL$X1)=="TOTAL") -TYPES <- TBL[ST-2,1] +DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c("WYNQGSP","WY_GDP",TRUE) +DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c("MEHOINUSWYA646N","WY_MED_INCOME",TRUE) +DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c("BUSAPPWNSAWY","WY_BUISNESS_APPLICATIONS",FALSE) +DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('ACTLISCOUWY','WY_HOUSES_FOR_SALE',FALSE) +DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('WYRVAC','WY_RENTAL_VACANCY_RATE',FALSE) +DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('WYBPPRIVSA','WY_PRIVATE_HOUSING',FALSE) +#New Private Housing Units Authorized by Building Permits for Wyoming +DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('B03002006E056023','LN_FIVE_YEAR_POP',FALSE) +DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('GDPALL56023','LN_GDP',TRUE) +DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('WYLINC3POP','LN_POP',FALSE) +DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('LAUCN560230000000005','LN_EMPLOYMENT',FALSE) +DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('BPPRIV056023','LN_PRIVE_HOUSING',FALSE) -ST_YEAR <- 1971 -ALL_DATA <- list() -TBL <- TBL[,c(1,which(!is.na(as.numeric(TBL[ST[1],]))))] -TBL <- TBL[,-ncol(TBL)] -colnames(TBL) <- c("County",(ST_YEAR:(ST_YEAR+ncol(TBL)-1))) -TBL$Type <- NA -for(i in 1:length(ST)){ - TBL[ST[i]:END[i],"Type"]<- as.character(TYPES[i,1]) +DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('ENU5602320510','LN_NUM_ESTABLISHMENTS',FALSE) + +DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('GDPALL56041','UINTA_GDP',TRUE) +DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('WYUINT1POP','UINTA_POP',FALSE) +DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('WYSUBL5POP','SUBLETTE_POP',FALSE) +DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('WYSWEE7POP','SWEETWATER_POP',FALSE) +DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('WYTETO9POP','TETON_POP',FALSE) +##Idaho Counties +DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('IDBEAR7POP','BEAR_LAKE_POP',FALSE) +DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('IDCARI9POP','CARIBOU_POP',FALSE) + +DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('IDBONN0POP','BONNEVILLE_POP',FALSE) +###US Population +DATA_TO_GATHER[[length(DATA_TO_GATHER)+1]] <- c('POPTOTUSA647NWDB','US_POP',FALSE) + + + + +for(x in 1:length(DATA_TO_GATHER)){ + CURRENT <- DATA_TO_GATHER[[x]] + + if(CURRENT[3]){C_DATA <- CPI_ADJUST(FRED_GET(CURRENT[1],CURRENT[2]))}else{C_DATA <- FRED_GET(CURRENT[1],CURRENT[2])} + if(x==1){RES <- C_DATA}else{RES <- RES %>% full_join(C_DATA)} + rm(CURRENT,C_DATA) } -TBL[ST[2]:END[2],"Type"] <- as.character(TYPES[2,1]) -TBL$Type -TBL <- TBL %>% filter(!is.na(Type)) %>% select(County,Type,everything()) -GROUP <- colnames(TBL)[-1:-2] -Data <- pivot_longer(TBL,all_of(GROUP),names_to="Year",values_to="Pop_Change") -Data$County <- ifelse(toupper(Data$County)=="TOTAL","Wyoming",Data$County) -Data <- pivot_wider(Data,names_from=Type,values_from=Pop_Change) -colnames(Data)[5] <-"Migration" -HOUSE_INCOME <- read_csv("https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23ebf3fb&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1320&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=MEHOINUSWYA672N&scale=left&cosd=1984-01-01&coed=2023-01-01&line_color=%230073e6&link_values=false&line_style=solid&mark_type=none&mw=3&lw=3&ost=-99999&oet=99999&mma=0&fml=a&fq=Annual&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2025-08-29&revision_date=2025-08-29&nd=1984-01-01") -HOUSE_INCOME <- -colnames(HOUSE_INCOME) <- c("Year","WY_INCOME") -HOUSE_INCOME <- HOUSE_INCOME %>% mutate(Year=year(Year)) +DATA <- RES %>% mutate(US_POP=US_POP-WY_POP,WY_POP=WY_POP-LN_POP-UINTA_POP-SUBLETTE_POP-SWEETWATER_POP-TETON_POP) +colnames(DATA) +feols(log(LN_POP) ~log(US_POP)+log(UINTA_POP)+log(WY_POP)+log(SUBLETTE_POP)+log(SWEETWATER_POP)+log(TETON_POP)+log(BEAR_LAKE_POP)+log(CARIBOU_POP)+log(BONNEVILLE_POP)+YEAR,data=DATA) +TS_DATA_ORIG <- DATA %>% select(YEAR,LN_POP,US_POP,WY_POP,UINTA_POP,SUBLETTE_POP,SWEETWATER_POP,TETON_POP,BEAR_LAKE_POP,CARIBOU_POP,BONNEVILLE_POP) %>% filter(!is.na(LN_POP)) %>% arrange(YEAR) %>% select(-YEAR) -#Employment in construction of Wyoming -CON_EMP <- read_csv('https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23ebf3fb&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1320&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=WYCONS&scale=left&cosd=1990-01-01&coed=2025-07-01&line_color=%230073e6&link_values=false&line_style=solid&mark_type=none&mw=3&lw=3&ost=-99999&oet=99999&mma=0&fml=a&fq=Monthly&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2025-08-29&revision_date=2025-08-29&nd=1990-01-01') -colnames(CON_EMP) <- c("Year","WY_CON_EMP") -CON_EMP <- CON_EMP%>% mutate(Year=year(Year)) -LINC_UNEMP <- 'https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23ebf3fb&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1320&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=WYLINC3URN&scale=left&cosd=1990-01-01&coed=2025-07-01&line_color=%230073e6&link_values=false&line_style=solid&mark_type=none&mw=3&lw=3&ost=-99999&oet=99999&mma=0&fml=a&fq=Monthly&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2025-08-29&revision_date=2025-08-29&nd=1990-01-01' -LINC_GDP <- 'https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23ebf3fb&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1320&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=REALGDPALL56023&scale=left&cosd=2001-01-01&coed=2023-01-01&line_color=%230073e6&link_values=false&line_style=solid&mark_type=none&mw=3&lw=3&ost=-99999&oet=99999&mma=0&fml=a&fq=Annual&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2025-08-29&revision_date=2025-08-29&nd=2001-01-01' -LINC_GDP<- read_csv(LINC_GDP) -colnames(LINC_GDP) <- c("Year","LINC_GDP") -LINC_GDP <- LINC_GDP %>% mutate(Year=year(Year)) - - -LINC_PRIV_IND <- 'https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23ebf3fb&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1320&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=ENU5602320510&scale=left&cosd=1990-01-01&coed=2024-10-01&line_color=%230073e6&link_values=false&line_style=solid&mark_type=none&mw=3&lw=3&ost=-99999&oet=99999&mma=0&fml=a&fq=Annual&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2025-08-29&revision_date=2025-08-29&nd=1990-01-01' -LINC_PRIV_IND <- read_csv(LINC_PRIV_IND) -LINC_PRIV_IND -colnames(LINC_PRIV_IND) <- c("Year","LINC_PRIV_IND") -LINC_PRIV_IND <- LINC_PRIV_IND %>% mutate(Year=year(Year)) - -LINC_LABOR_FORCE <- 'https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23ebf3fb&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1320&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=WYLINC3LFN&scale=left&cosd=1990-01-01&coed=2025-07-01&line_color=%230073e6&link_values=false&line_style=solid&mark_type=none&mw=3&lw=3&ost=-99999&oet=99999&mma=0&fml=a&fq=Annual&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2025-08-29&revision_date=2025-08-29&nd=1990-01-01' -LINC_LABOR_FORCE<- read_csv(LINC_LABOR_FORCE) -colnames(LINC_LABOR_FORCE) <- c("Year","LINC_LABOR_FORCE") -LINC_LABOR_FORCE<- LINC_LABOR_FORCE%>% mutate(Year=year(Year)) +TS_DATA <- diff(log(ts(TS_DATA_ORIG,start=c(1970),end=c(2024),frequency=1))) +library("forecast") +library("vars") +VARselect(TS_DATA,lag.max=4,type="const") +VAR1 <- VAR(TS_DATA,p=3,type="const",season=NULL,exog=NULL) +plot(irf(VAR1,response="LN_POP")) +plot(forecast(VAR1,)) +RES <- (predict(VAR1, n.ahead = 20, ci = 0.95)) +names(RES ) +names(RES$fcst) +RES$fcst$LN_POP %>% as_tibble +CURRENT_POP <- max(DATA$LN_POP,na.rm=TRUE) +0.0157*CURRENT_POP*1000 +0.083*CURRENT_POP*1000 +-0.0489*CURRENT_POP*1000 -LINC_GOV_GDP <- 'https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23ebf3fb&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1320&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=GDPGOVT56023&scale=left&cosd=2001-01-01&coed=2023-01-01&line_color=%230073e6&link_values=false&line_style=solid&mark_type=none&mw=3&lw=3&ost=-99999&oet=99999&mma=0&fml=a&fq=Annual&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2025-08-29&revision_date=2025-08-29&nd=2001-01-01' -LINC_GOV_GDP <- read_csv(LINC_GOV_GDP) -colnames(LINC_GOV_GDP) <- c("Year","LINC_GOV_GDP") -LINC_GOV_GDP<-LINC_GOV_GDP%>% mutate(Year=year(Year)) + +# View the forecasted values and confidence intervals +print(forecast_results) + +# You can also plot the forecasts +plot(forecast_results) -LINC_POP <- 'https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23ebf3fb&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1320&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=WYLINC3POP&scale=left&cosd=1970-01-01&coed=2024-01-01&line_color=%230073e6&link_values=false&line_style=solid&mark_type=none&mw=3&lw=3&ost=-99999&oet=99999&mma=0&fml=a&fq=Annual&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2025-08-29&revision_date=2025-08-29&nd=1970-01-01' -LINC_POP <- read_csv(LINC_POP) -colnames(LINC_POP) <- c("Year","LINC_POP") -LINC_POP <- LINC_POP %>% mutate(Year=year(Year)) +?forecast +?forecast +install.packages("sparsegl") +plot(VAR1) -WY_POP <- 'https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23ebf3fb&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=on&txtcolor=%23444444&ts=12&tts=12&width=1320&nt=0&thu=0&trc=0&show_legend=yes&show_axis_titles=yes&show_tooltip=yes&id=WYPOP&scale=left&cosd=1900-01-01&coed=2024-01-01&line_color=%230073e6&link_values=false&line_style=solid&mark_type=none&mw=3&lw=3&ost=-99999&oet=99999&mma=0&fml=a&fq=Annual&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=2025-08-29&revision_date=2025-08-29&nd=1900-01-01' -WY_POP <- read_csv(WY_POP) -colnames(WY_POP) <- c("Year","WY_POP") -WY_POP <- WY_POP %>% mutate(Year=year(Year)) -LINC_PRIV_IND -DF <- LINC_POP %>% left_join(WY_POP) %>% left_join(LINC_PRIV_IND) %>% left_join(LINC_LABOR_FORCE) %>% left_join(LINC_GOV_GDP) %>% left_join(LINC_GDP) %>% left_join(WY_POP) -DF %>% select(LINC_POP,WY_POP) -DF <- DF %>% mutate(WY_POP=WY_POP-LINC_POP) #Check a VAR it looks like lags on changes to Private industry could affect other variables #Idea check a SVAR placing limits on which shocks are first feols((LINC_POP)~(WY_POP)+log(LINC_PRIV_IND)+Year,data=DF) diff --git a/Scripts/Functions.r b/Scripts/Functions.r new file mode 100644 index 0000000..77e56ff --- /dev/null +++ b/Scripts/Functions.r @@ -0,0 +1,26 @@ +#A function to extract date from FRED. Assumes the date is either annual or monthly. +FRED_GET <- function(FRED_SERIES_ID,NAME=NA,ST_DATE='1890-01-91',END_DATE=Sys.Date(),ANNUAL_DATA=TRUE){ + NAME <- ifelse(is.na(NAME),FRED_SERIES_ID,NAME) + DATA_TIME_FRAME <- ifelse(ANNUAL_DATA,"Annual","Monthly") + URL <- paste0('https://fred.stlouisfed.org/graph/fredgraph.csv?bgcolor=%23ebf3fb&chart_type=line&drp=0&fo=open%20sans&graph_bgcolor=%23ffffff&height=450&mode=fred&recession_bars=off&txtcolor=%23444444&ts=12&tts=12&width=827&nt=0&thu=0&trc=0&show_legend=no&show_axis_titles=no&show_tooltip=no&id=',FRED_SERIES_ID,'&scale=left&cosd=',ST_DATE,'&coed=',END_DATE,'&line_color=%230073e6&link_values=false&line_style=solid&mark_type=none&mw=3&lw=3&ost=-99999&oet=99999&mma=0&fml=a&fq=',DATA_TIME_FRAME,'&fam=avg&fgst=lin&fgsnd=2020-02-01&line_index=1&transformation=lin&vintage_date=',END_DATE,'&revision_date=',END_DATE,'&nd=',END_DATE) + DATA <- read_csv(URL) + DATA <- DATA[which(!is.na(DATA[,2] )),] + colnames(DATA)[2] <- NAME + if(ANNUAL_DATA){ + colnames(DATA)[1] <- "YEAR" + DATA$YEAR <- year(DATA$YEAR) + }else{ + colnames(DATA)[1] <- "DATE" + } + return(DATA) +} + +CPI_ADJUST <- function(DATA){ + CPI <- FRED_GET("CPIAUCSL") %>% filter(!is.na(CPIAUCSL)) + CPI$CPI_ADJ <- as.numeric(CPI[which.max(CPI$YEAR),2])/CPI$CPIAUCSL + CPI <- CPI %>% select(-"CPIAUCSL") + DATA <- DATA %>% left_join(CPI) + DATA[,2] <- DATA[,2]*DATA[,"CPI_ADJ"] + DATA <- DATA%>% select(-"CPI_ADJ") + return(DATA) +} diff --git a/Scripts/Load_Wyoming_Web_Data.r b/Scripts/Load_Wyoming_Web_Data.r new file mode 100644 index 0000000..26ab369 --- /dev/null +++ b/Scripts/Load_Wyoming_Web_Data.r @@ -0,0 +1,28 @@ +library(rvest) +#Data found on the page http://eadiv.state.wy.us/pop/ +PAGE <- read_html("http://eadiv.state.wy.us/pop/BirthDeathMig.htm") +NODE <- html_element(PAGE ,"table") +TBL <- html_table(NODE) + +ST <- which(toupper(TBL$X1)=="ALBANY") +END <- which(toupper(TBL$X1)=="TOTAL") +TYPES <- TBL[ST-2,1] + +ST_YEAR <- 1971 +ALL_DATA <- list() +TBL <- TBL[,c(1,which(!is.na(as.numeric(TBL[ST[1],]))))] +TBL <- TBL[,-ncol(TBL)] +colnames(TBL) <- c("County",(ST_YEAR:(ST_YEAR+ncol(TBL)-1))) +TBL$Type <- NA +for(i in 1:length(ST)){ + TBL[ST[i]:END[i],"Type"]<- as.character(TYPES[i,1]) +} +TBL[ST[2]:END[2],"Type"] <- as.character(TYPES[2,1]) +TBL$Type +TBL <- TBL %>% filter(!is.na(Type)) %>% select(County,Type,everything()) +GROUP <- colnames(TBL)[-1:-2] +Data <- pivot_longer(TBL,all_of(GROUP),names_to="Year",values_to="Pop_Change") +Data$County <- ifelse(toupper(Data$County)=="TOTAL","Wyoming",Data$County) +WY_COUNTY_DATA_SET <- pivot_wider(Data,names_from=Type,values_from=Pop_Change) +colnames(Data)[5] <-"Migration" +