diff --git a/Data/API_CENSUS_CODES.csv b/Data/API_CENSUS_CODES.csv index 9ecdd5a..9c79810 100644 --- a/Data/API_CENSUS_CODES.csv +++ b/Data/API_CENSUS_CODES.csv @@ -5,7 +5,7 @@ B01001_003E,"Male",0,4 B01001_004E,"Male",5,9 B01001_005E,"Male",10,14 B01001_006E,"Male",15,17 -B01001_007E,"Male",18,18 +B01001_007E,"Male",18,19 B01001_008E,"Male",20,20 B01001_009E,"Male",21,21 B01001_010E,"Male",22,24 @@ -16,9 +16,9 @@ B01001_014E,"Male",40,44 B01001_015E,"Male",45,49 B01001_016E,"Male",50,54 B01001_017E,"Male",55,59 -B01001_018E,"Male",60,60 +B01001_018E,"Male",60,61 B01001_019E,"Male",62,64 -B01001_020E,"Male",65,65 +B01001_020E,"Male",65,66 B01001_021E,"Male",67,69 B01001_022E,"Male",70,74 B01001_023E,"Male",75,79 @@ -29,7 +29,7 @@ B01001_027E,"Female",0,4 B01001_028E,"Female",5,9 B01001_029E,"Female",10,14 B01001_030E,"Female",15,17 -B01001_031E,"Female",18,18 +B01001_031E,"Female",18,19 B01001_032E,"Female",20,20 B01001_033E,"Female",21,21 B01001_034E,"Female",22,24 @@ -40,9 +40,9 @@ B01001_038E,"Female",40,44 B01001_039E,"Female",45,49 B01001_040E,"Female",50,54 B01001_041E,"Female",55,59 -B01001_042E,"Female",60,60 +B01001_042E,"Female",60,61 B01001_043E,"Female",62,64 -B01001_044E,"Female",65,65 +B01001_044E,"Female",65,66 B01001_045E,"Female",67,69 B01001_046E,"Female",70,74 B01001_047E,"Female",75,79 diff --git a/Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Demographic_Data.csv b/Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Demographic_Data.csv new file mode 100644 index 0000000..816792c --- /dev/null +++ b/Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Demographic_Data.csv @@ -0,0 +1,87 @@ +County,Year,Age,Num_Male,Num_Female +Kemmerer,2024,0,10.464965521092157,18.53827755476956 +Kemmerer,2024,1,10.73329797035093,26.12211837262983 +Kemmerer,2024,2,9.838856472821687,22.330197963699696 +Kemmerer,2024,3,11.717183617633099,26.3327806175704 +Kemmerer,2024,4,10.464965521092157,24.015495923224204 +Kemmerer,2024,5,15.460982984335295,16.925040008171884 +Kemmerer,2024,6,19.09886133359066,18.773993958644443 +Kemmerer,2024,7,20.78787628145922,22.045220178711276 +Kemmerer,2024,8,17.409846385722098,19.34290286648215 +Kemmerer,2024,9,21.17764896173658,20.05403900127929 +Kemmerer,2024,10,23.313248886019743,19.837665883519023 +Kemmerer,2024,11,24.63286674749256,19.692865402617425 +Kemmerer,2024,12,25.0727393679835,18.534461555404636 +Kemmerer,2024,13,24.19299412700162,23.023276463354193 +Kemmerer,2024,14,26.53898143628663,23.74727886786219 +Kemmerer,2024,15,13.763908906585888,13.325834953286826 +Kemmerer,2024,16,14.400390243306624,12.585510789215336 +Kemmerer,2024,17,15.116431747117451,11.919219041550994 +Kemmerer,2024,18,8.11409091515564,8.11283185840708 +Kemmerer,2024,19,6.633417390492201,5.672882427307205 +Kemmerer,2024,20,8.976744186046512,27.571428571428573 +Kemmerer,2024,21,0,6.411960132890365 +Kemmerer,2024,22,10.007317017094115,16.00986295681063 +Kemmerer,2024,23,9.285139500396603,13.795732973421927 +Kemmerer,2024,24,10.523158100449484,13.795732973421927 +Kemmerer,2024,25,14.229527197897555,14.473076411960134 +Kemmerer,2024,26,12.94653704071007,15.057847176079733 +Kemmerer,2024,27,13.179807978380522,13.888305647840532 +Kemmerer,2024,28,9.214202037982844,13.74211295681063 +Kemmerer,2024,29,12.94653704071007,15.935003322259135 +Kemmerer,2024,30,9.841972541328104,16.16915622314293 +Kemmerer,2024,31,10.64539887123244,16.16915622314293 +Kemmerer,2024,32,9.339831085137893,17.360567734321886 +Kemmerer,2024,33,10.24368570628027,17.360567734321886 +Kemmerer,2024,34,9.942400832566145,21.10500391231288 +Kemmerer,2024,35,23.090047712195066,13.797127159737228 +Kemmerer,2024,36,24.414886515353803,15.316076938790872 +Kemmerer,2024,37,21.386683536705274,16.20213097657216 +Kemmerer,2024,38,23.2793103983606,16.835026717844507 +Kemmerer,2024,39,29.33571635565767,16.07555182831769 +Kemmerer,2024,40,17.783950233132593,12.477327826165036 +Kemmerer,2024,41,17.654140377416297,12.581305558049745 +Kemmerer,2024,42,16.615661531685927,13.413127413127413 +Kemmerer,2024,43,18.30318965599778,15.388704318936878 +Kemmerer,2024,44,19.73109806887704,15.388704318936878 +Kemmerer,2024,45,9.96622215641455,21.42182039512059 +Kemmerer,2024,46,10.86595054553531,17.354386142882504 +Kemmerer,2024,47,10.796740669449097,16.812061575917422 +Kemmerer,2024,48,9.96622215641455,16.405318150693617 +Kemmerer,2024,49,8.097555502086824,16.812061575917422 +Kemmerer,2024,50,11.910119959416715,17.094436330798903 +Kemmerer,2024,51,12.541717229991843,18.692047202836182 +Kemmerer,2024,52,13.443999045099172,20.928702423688375 +Kemmerer,2024,53,12.000348140927445,20.928702423688375 +Kemmerer,2024,54,10.37624087373426,17.57371959241009 +Kemmerer,2024,55,27.330581312705572,17.79318936877076 +Kemmerer,2024,56,28.04355299912398,19.075581395348838 +Kemmerer,2024,57,24.71635179583808,19.716777408637874 +Kemmerer,2024,58,30.1824680583792,21.640365448504983 +Kemmerer,2024,59,33.034354804052825,21.80066445182724 +Kemmerer,2024,60,24.58166576527853,13.547851248526417 +Kemmerer,2024,61,25.752221277910838,15.305969349480229 +Kemmerer,2024,62,23.212805992871722,17.31788012362568 +Kemmerer,2024,63,23.212805992871722,16.868065055479555 +Kemmerer,2024,64,24.42654748269508,17.430333890662205 +Kemmerer,2024,65,17.826584994462902,31.169037340601193 +Kemmerer,2024,66,20.324577796234774,32.30936797501343 +Kemmerer,2024,67,31.745589072282296,31.707002858013322 +Kemmerer,2024,68,31.745589072282296,32.5413976700663 +Kemmerer,2024,69,29.161645775701185,31.289805451986826 +Kemmerer,2024,70,27.118003770017854,15.476896504155697 +Kemmerer,2024,71,25.59022890973516,15.122463759785719 +Kemmerer,2024,72,21.579819901493085,13.114011541689177 +Kemmerer,2024,73,20.43398875628106,12.405146052949224 +Kemmerer,2024,74,20.052045041210388,12.168857556702571 +Kemmerer,2024,75,27.418452872826624,9.03148043108338 +Kemmerer,2024,76,32.273803902389666,9.305161656267725 +Kemmerer,2024,77,19.421404118252187,5.199943278502552 +Kemmerer,2024,78,17.422141929608582,5.0174891283796565 +Kemmerer,2024,79,15.994097509148864,5.108716203441104 +Kemmerer,2024,80,7.746694691165502,5.8689063665333245 +Kemmerer,2024,81,5.325852600176282,5.103396840463761 +Kemmerer,2024,82,3.7523052410332904,5.358566682486948 +Kemmerer,2024,83,3.510221031934368,4.465472235405791 +Kemmerer,2024,84,3.389178927384908,4.2103023933826025 +Kemmerer,2024,85,20.83887043189369,18.274086378737543 diff --git a/Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Summary_Start_Data.csv b/Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Summary_Start_Data.csv new file mode 100644 index 0000000..cd50a0d --- /dev/null +++ b/Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Summary_Start_Data.csv @@ -0,0 +1,2 @@ +Year,County,Population,Births,Deaths,Migration,Min_Birth_Group,PREV_BIRTH,PREV_TWO_BIRTH +2024,Kemmerer,2895,NA,NA,NA,125.89845094664372,29,37 diff --git a/Demographic_Split.r b/Demographic_Split.r new file mode 100644 index 0000000..ab96565 --- /dev/null +++ b/Demographic_Split.r @@ -0,0 +1,56 @@ +library(tidyverse) +library(scales) #For a pretty population Pyramid +ACS_FILE_LOC <- "Data/Cleaned_Data/ACS_Census_Demographic_Data.Rds" +ACS_YEAR <- 2023 #Year of the ACS data +if(!file.exists(ACS_FILE_LOC)){source("Scripts/Load_ACS_Census_Data.r")}else{AGE_DATA <- readRDS(ACS_FILE_LOC)} +LIN_DEMOGRAPHICS <- readRDS("Data/Cleaned_Data/Lincoln_Demographic_Data.Rds") %>% filter(Year==ACS_YEAR) +for(i in 1:nrow(AGE_DATA)){ + if(i==1 & exists("RES")){rm(RES)} + C_DEMO <- AGE_DATA[i,] + C_SEX <- C_DEMO$Sex + if(C_DEMO$Min_Age==C_DEMO$Max_Age){C_AGE_YEARLY_DATA <- LIN_DEMOGRAPHICS %>% filter(Age==C_DEMO$Min_Age)}else{ + C_AGE_YEARLY_DATA <- LIN_DEMOGRAPHICS %>% filter(Age>=C_DEMO$Min_Age,Age<=C_DEMO$Max_Age) + } + #Extract only the current sex being applied + if(C_SEX=='Female'){C_AGE_YEARLY_DATA <- C_AGE_YEARLY_DATA %>% select(Age,Num_Female) %>% rename(POP=Num_Female)}else{C_AGE_YEARLY_DATA <- C_AGE_YEARLY_DATA %>% select(Age,Num_Male) %>% rename(POP=Num_Male)} + C_AGE_YEARLY_DATA$Sex <- C_SEX + C_AGE_YEARLY_DATA$PER <- C_AGE_YEARLY_DATA$POP/sum(C_AGE_YEARLY_DATA$POP) + C_RES <- C_AGE_YEARLY_DATA%>% left_join(C_DEMO) %>% mutate(Population=PER*Population) %>% select(Age,Sex,IN_KEM,Population) + if(i==1){RES <- C_RES}else{RES <- rbind(RES,C_RES)} +} +RES <- RES %>% group_by(IN_KEM) %>% mutate(Per_Pop=Population/sum(Population)) %>% mutate(Region=ifelse(IN_KEM,'Kemmerere & Diamondville','Rest of the County')) %>% select(-Population) %>% ungroup +CURRENT_KEM_POP <- readRDS("Data/Cleaned_Data/Wyoming_City_Population.Rds") %>% filter(City %in% c('Kemmerer','Diamondville'),Year==max(Year)) %>% mutate(Population=as.numeric(Population)) %>% pull(Population) %>% sum() +KEM_DEMOGRAPHICS <- RES %>% filter(IN_KEM==1) %>% select(Age,Sex,Per_Pop) %>% mutate(Year=ACS_YEAR+1,County='Kemmerer',Population=Per_Pop*CURRENT_KEM_POP ) %>% select(-Per_Pop) %>% pivot_wider(values_from=Population,names_from=c(Sex),names_prefix="Num_") %>% select(County,Year,Age,Num_Male,Num_Female) + saveRDS(KEM_DEMOGRAPHICS,"Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Demographic_Data.Rds") + write_csv(KEM_DEMOGRAPHICS,"Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Demographic_Data.csv") + +##Create a simplfied summary of the Kemmerer/Diamondville population estimates, to start the Monte Carlo + ST_BIRTH_KEM <- round(sum((KEM_DEMOGRAPHICS%>% filter(Age==0))[4:5])) + ST_BIRTH_TWO_PREV_KEM <- round(sum((KEM_DEMOGRAPHICS%>% filter(Age==1))[4:5])) + INTIATE_KEMMER <- KEM_DEMOGRAPHICS %>% mutate(Male_Window=Age>=18 & Age<=30,Female_Window=Age>=18 & Age<=28) %>% group_by(County,Year) %>% summarize(Female_Birth_Group=sum(Num_Female*Female_Window),Male_Birth_Group=sum(Num_Male*Male_Window),Min_Birth_Group=ifelse(Female_Birth_Group% mutate(Births=NA,Deaths=NA,Migration=NA) %>% select(Year,County,Population,Births,Deaths,Migration,Min_Birth_Group,PREV_BIRTH,PREV_TWO_BIRTH) %>% ungroup + saveRDS(INTIATE_KEMMER ,"Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Summary_Start_Data.Rds") + write_csv(INTIATE_KEMMER ,"Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Summary_Start_Data.csv") +###Make a population Pyramid Graph +PY_KEM_DATA <- KEM_DEMOGRAPHICS %>% mutate(County='Kemmerer') %>% pivot_longer(cols=c(Num_Male,Num_Female),names_to="Sex",values_to="Population") %>% mutate(Sex=ifelse(Sex=='Num_Male','Male','Female')) +PY_LN_DATA <- readRDS("Data/Cleaned_Data/Wyoming_County_Demographic_Data.Rds") %>% filter(County=='Lincoln',Year==max(Year))%>% pivot_longer(cols=c(Num_Male,Num_Female),names_to="Sex",values_to="Population") %>% mutate(Sex=ifelse(Sex=='Num_Male','Male','Female')) + +#Population=ifelse(Sex=='Male',-Population,Population) +#Remove the Kemmerer Population estimate from Lincoln county +PY_LN_DATA <- PY_LN_DATA %>% left_join(PY_KEM_DATA %>% rename(POP_SHIFT=Population) %>% select(-County)) %>% mutate(Population=Population-POP_SHIFT) %>% select(-POP_SHIFT) + +LN_ONLY_POP <- sum(PY_LN_DATA$Population) +PY_LN_DATA$Percent <- PY_LN_DATA$Population/LN_ONLY_POP +#Find percent Kemmerer (nod adjustment needed) +KEM_ONLY_POP <- sum(PY_KEM_DATA$Population) +PY_KEM_DATA$Percent <- PY_KEM_DATA$Population/KEM_ONLY_POP + +PY_DATA <- rbind(PY_LN_DATA,PY_KEM_DATA) %>% mutate(Population=ifelse(County=='Kemmerer',Population,-Population),'Percent Population'=ifelse(County=='Kemmerer',Percent,-Percent),Age_Numeric=Age,Age=as.character(Age)) +PY_DATA[PY_DATA$Age=='85',"Age"] <- ">84" +ORD <- PY_DATA[,c("Age_Numeric","Age")] %>% unique %>% arrange(Age_Numeric) %>% pull(Age) +PY_DATA$Age <- factor(PY_DATA$Age,levels=ORD) + +RANGE <- c(pretty(range(PY_DATA$`Percent Population`),n=8)) +LAB <- percent(abs(RANGE),accuracy=0.1 ) + POP_PYRAMID <- ggplot(PY_DATA,aes(y=factor(Age),x=`Percent Population`,fill=County))+geom_col() +scale_x_continuous(breaks = RANGE,labels = LAB)+facet_grid(~Sex) +POP_PYRAMID + diff --git a/Scripts/Downshift_Population_Functions.r b/Scripts/Downshift_Population_Functions.r deleted file mode 100644 index 1109ddb..0000000 --- a/Scripts/Downshift_Population_Functions.r +++ /dev/null @@ -1,58 +0,0 @@ -#These functions are used to take census data from Kemmerer in 2020, and find the relative ratio of the total population in Lincoln. This allows for a heuristic of the current year age-sex demographic amounts in 2025 of Kemmerer to start the Monte Carlo with a list age people in each age group - -#A function to pull an estimate of the ratio between the Kemmerer are age-sex population cohort, and the Lincoln total. This allows the age-sex by year demographic data for Lincoln to be converted to Kemmerer numbers. Used as in input to TRANSPOSE_AGE_DEMOGRAPHIC_DATA_DOWN (see below) -GET_VALUE <- function(Age,SEX,DATA=AGE_DATA,County='Lincoln'){ - DATA <- DATA %>% filter(Region==County,Sex==SEX) - if(any(Age==DATA$Med_Age)){ - #If any exact matches in between the min and max ages pull the exact record - RES <- DATA[which(Age==DATA$Med_Age & DATA$Sex==SEX),] %>% pull(Per_Pop) -}else if(Age>=85){ - RES <- DATA[which(DATA$Min_Age==85 & DATA$Max_Age==Inf),] %>% pull(Per_Pop) - }else if(Age>=85){ - #If in the upper bound of age pull the single record - RES <- DATA[which(DATA$Min_Age==85 & DATA$Max_Age==Inf),] %>% pull(Per_Pop) - }else if(Age==0){ - #If in the lower bound of age pull the single record - RES <- DATA[which(DATA$Min_Age==0),] %>% pull(Per_Pop) - }else{ - #If no exact record is found perform a linear interpolation between the two age brackets bounding the point - TEMP <- DATA %>% arrange(Min_Age) %>% select(Med_Age,Min_Age,Max_Age,Per_Pop) %>% filter(Per_Pop!=0) - LOWER <- TEMP[max(max(which(TEMP$Med_AgeAge)),] - if(LOWER$Med_Age!=UPPER$Med_Age){ - #If the LOWER and UPPER are the same than the age must be less the first entry, if that is not true perform a linear interpolation between the two bounding entries - C <- LOWER$Med_Age - ST <- LOWER$Per_Pop - DELTA <- UPPER$Per_Pop-LOWER$Per_Pop - GAP <- UPPER$Med_Age-LOWER$Med_Age - RES <- (ST+(Age-C)*DELTA/GAP) - } else{RES <- UPPER %>% pull(Per_Pop)} - } - return(RES) -} -#A function which returns a tibble of all single year age-sex combinations for conversions of ratio in Lincoln to Kemmerer -INTERPOLATE_COUNTY_AGE_DEMOGRAPHIC_DATA_TO_CITY_LEVEL <- function(DATA,COUNTY='Lincoln',AGE_RANGE=0:85,YEARS_FORWARD=0){ - #Create a vector of all male ages, then all female ages - VALUES <- c(unlist((sapply(AGE_RANGE,function(x){GET_VALUE(x,'Male',DATA,COUNTY)}))),unlist((sapply(AGE_RANGE,function(x){GET_VALUE(x,'Female',DATA,COUNTY)})))) - #Turn into an easy to read tibble to merge later with the actual demographic data - RES <- cbind(rep(AGE_RANGE,2),c(rep("Male",length(VALUES)/2),rep("Female",length(VALUES)/2)),VALUES) %>% as_tibble - colnames(RES) <- c("Age","Sex","Conversion_Ratio") - RES <- RES %>% mutate(Age=as.numeric(Age),Conversion_Ratio=as.numeric(Conversion_Ratio)) -if(YEARS_FORWARD>0){ - MALE <- RES %>% filter(Sex=='Male') - FEMALE <- RES %>% filter(Sex=='Female') - STUB_AGE_MALE <- MALE[rep(1,YEARS_FORWARD),] - STUB_AGE_MALE$Age <- 0:(YEARS_FORWARD-1) - - STUB_AGE_FEMALE <- FEMALE[rep(1,YEARS_FORWARD),] - STUB_AGE_FEMALE$Age <- 0:(YEARS_FORWARD-1) - - MALE[,"Age"] <- MALE[,"Age"]+YEARS_FORWARD - FEMALE[,"Age"] <- FEMALE[,"Age"]+YEARS_FORWARD - MALE <- rbind(STUB_AGE_MALE,MALE)[1:nrow(MALE),] - FEMALE <- rbind(STUB_AGE_FEMALE,FEMALE)[1:nrow(FEMALE),] - RES <- rbind(MALE,FEMALE) -} - return(RES) -} - diff --git a/Scripts/Load_ACS_Census_Data.r b/Scripts/Load_ACS_Census_Data.r new file mode 100644 index 0000000..494adaa --- /dev/null +++ b/Scripts/Load_ACS_Census_Data.r @@ -0,0 +1,40 @@ +library(tidycensus) +library(zipcodeR) +#Pull the relevant median age variables the value moe (margine of error) can be converted to standard error, following the link below +#https://www.census.gov/content/dam/Census/library/publications/2018/acs/acs_general_handbook_2018_ch08.pdf +ACS_YEAR <- 2023 #most recent as of Nov 4 2025 +source("Scripts/Downshift_Population_Functions.r") +#Packages to instal on computer if zipcodeR won't install +#Sudo apt install libssl-dev libudunits2-dev libabsl-dev libcurl4-openssl-dev libgdal-dev cmake libfontconfig1-dev libharfbuzz-dev libfribidi-dev +#install.packages("zipcodeR") + +#Add API key if missing + #KEY <- '30e13ab22563318ff59286e433099f4174d4edd4' + #census_api_key(KEY, install = TRUE) +PROJ_TRACTS <- get_tracts(search_city('Kemmerer','WY')$zipcode) %>% full_join(get_tracts(search_city('Diamondville','WY')$zipcode)) +PROJ_TRACTS <- PROJ_TRACTS %>% select(GEOID) %>% mutate('IN_KEM'=1) %>% mutate(GEOID=as.character(GEOID)) + +MED_AGE_VAR <- cbind(c('B01002_001E','B01002_002E','B01002_003E'),c('Median_Age','Median_Age_Male','Median_Age_Female')) %>% as_tibble %>% rename(variable=V1,Data_Type=V2) + +###Load data manually created which links vairable names to sex-age census data +CODES <- read_csv("Data/API_CENSUS_CODES.csv",skip=1) %>% mutate(Med_Age=(Min_Age+Max_Age)/2) %>% rename(variable=Code) +#Testing age Comparison between the two +###Extract census data for all tracts in Lincoln county, clean up the data, and indicate if the tract is in Kemmerer/Diamondvile or not. +AGE_DATA <- get_acs(geography="tract",year=ACS_YEAR,variables=CODES$variable,state='WY',county='lincoln') %>% mutate(se=moe/1.64) %>% left_join(CODES %>% mutate(variable=gsub('E','',variable))) %>% select(-NAME,-moe) %>% left_join(PROJ_TRACTS) %>% mutate(IN_KEM=ifelse(is.na(IN_KEM),0,1)) %>% rename(Population=estimate) %>% select(-variable,-GEOID) %>% select(Sex,Min_Age,Max_Age,Med_Age,IN_KEM,Population,se) %>% filter(!(Min_Age==0& Max_Age==Inf)) +AGE_DATA <- AGE_DATA %>% group_by(Sex,Min_Age,Max_Age,Med_Age,IN_KEM)%>% summarize(Population=sum(Population)) %>% ungroup +#Add Descriptive age category to a clean graph +AGE_DATA$Ages <- paste(AGE_DATA$Min_Age,"to",AGE_DATA$Max_Age) +AGE_DATA[AGE_DATA$Max_Age==Inf,"Ages"] <- "85+" +AGE_DATA[AGE_DATA$Med_Age==18,"Ages"] <- "18" +AGE_DATA[AGE_DATA$Med_Age==20,"Ages"] <- "20" +AGE_DATA[AGE_DATA$Med_Age==21,"Ages"] <- "21" +AGE_DATA[AGE_DATA$Min_Age==0,"Ages"] <- "Under 5" +#Turn the ages into factors to keep the correct order in graphs +#Add the percent of total relative population in the region +ORD <- AGE_DATA %>% select(Min_Age,Ages) %>% unique %>% arrange(Min_Age) %>% pull(Ages) %>% unique +AGE_DATA$Ages <- factor(AGE_DATA$Ages,levels=ORD) +AGE_DATA$Year <- ACS_YEAR +AGE_DATA <-AGE_DATA %>% select(Year,Sex,Ages,IN_KEM,everything()) +if(!exists('ACS_FILE_LOC')){ACS_FILE_LOC <- "Data/Cleaned_Data/ACS_Census_Demographic_Data.Rds"} +saveRDS(AGE_DATA,ACS_FILE_LOC) + diff --git a/Zip_Code.r b/Zip_Code.r deleted file mode 100644 index cdd2efe..0000000 --- a/Zip_Code.r +++ /dev/null @@ -1,96 +0,0 @@ -library(tidyverse) -library(tidycensus) -library(zipcodeR) -library(scales) #For a pretty population Pyramid -source("Scripts/Downshift_Population_Functions.r") -#Packages to instal on computer if zipcodeR won't install -#Sudo apt install libssl-dev libudunits2-dev libabsl-dev libcurl4-openssl-dev libgdal-dev cmake libfontconfig1-dev libharfbuzz-dev libfribidi-dev -#install.packages("zipcodeR") - -#Add API key if missing - #KEY <- '30e13ab22563318ff59286e433099f4174d4edd4' - #census_api_key(KEY, install = TRUE) -PROJ_TRACTS <- get_tracts(search_city('Kemmerer','WY')$zipcode) %>% full_join(get_tracts(search_city('Diamondville','WY')$zipcode)) -PROJ_TRACTS <- PROJ_TRACTS %>% select(GEOID) %>% mutate('IN_KEM'=1) %>% mutate(GEOID=as.character(GEOID)) - -MED_AGE_VAR <- cbind(c('B01002_001E','B01002_002E','B01002_003E'),c('Median_Age','Median_Age_Male','Median_Age_Female')) %>% as_tibble %>% rename(variable=V1,Data_Type=V2) -#Pull the relevant median age variables the value moe (margine of error) can be converted to standard error, following the link below -#https://www.census.gov/content/dam/Census/library/publications/2018/acs/acs_general_handbook_2018_ch08.pdf - -MED_AGE_VALUES <- get_acs(geography="tract",variables=MED_AGE_VAR$variable,state='WY',county='lincoln') %>% mutate(se=moe/1.64) %>% left_join(MED_AGE_VAR %>% mutate(variable=gsub('E','',variable))) %>% select(-NAME,-moe) %>% left_join(PROJ_TRACTS) %>% mutate(IN_KEM=ifelse(is.na(IN_KEM),0,1)) -AGE_DIFF <- MED_AGE_VALUES %>% group_by(IN_KEM,Data_Type) %>% summarize(Age=mean(estimate)) %>% pivot_wider(names_from=IN_KEM,values_from=Age,names_prefix="In_Kemmerer_") -AGE_DIFF - -###Load data manually created which links vairable names to sex-age census data -CODES <- read_csv("Data/API_CENSUS_CODES.csv",skip=1) %>% mutate(Med_Age=(Min_Age+Max_Age)/2) %>% rename(variable=Code) - - - -#Testing age Comparison between the two -###Extract census data for all tracts in Lincoln county, clean up the data, and indicate if the tract is in Kemmerer/Diamondvile or not. -AGE_DATA <- get_acs(geography="tract",variables=CODES$variable,state='WY',county='lincoln') %>% mutate(se=moe/1.64) %>% left_join(CODES %>% mutate(variable=gsub('E','',variable))) %>% select(-NAME,-moe) %>% left_join(PROJ_TRACTS) %>% mutate(IN_KEM=ifelse(is.na(IN_KEM),0,1)) %>% rename(Population=estimate) %>% select(-variable,-GEOID) %>% select(Sex,Min_Age,Max_Age,Med_Age,IN_KEM,Population,se) %>% filter(!(Min_Age==0& Max_Age==Inf)) - -AGE_DATA <- AGE_DATA %>% group_by(Sex,Min_Age,Max_Age,Med_Age,IN_KEM)%>% summarize(Population=sum(Population)) %>% ungroup -#Add Descriptive age category to a clean graph -AGE_DATA$Ages <- paste(AGE_DATA$Min_Age,"to",AGE_DATA$Max_Age) -AGE_DATA[AGE_DATA$Max_Age==Inf,"Ages"] <- "85+" -AGE_DATA[AGE_DATA$Med_Age==18,"Ages"] <- "18" -AGE_DATA[AGE_DATA$Med_Age==20,"Ages"] <- "20" -AGE_DATA[AGE_DATA$Med_Age==21,"Ages"] <- "21" -AGE_DATA[AGE_DATA$Min_Age==0,"Ages"] <- "Under 5" -#Turn the ages into factors to keep the correct order in graphs -ORD <- AGE_DATA %>% select(Min_Age,Ages) %>% unique %>% arrange(Min_Age) %>% pull(Ages) %>% unique -AGE_DATA$Ages <- factor(AGE_DATA$Ages,levels=ORD) -#Add the percent of total relative population in the region -AGE_DATA <- AGE_DATA %>% mutate(Per_Pop=Population/sum(Population)) %>% group_by(IN_KEM) %>% mutate(Per_Pop_Region=Population/sum(Population)) %>% ungroup - -#Add a region name for clearer graphs -AGE_DATA <- AGE_DATA %>% mutate(Region=ifelse(IN_KEM==1,'Kemmerer','Lincoln')) -#AGE_DATA <- AGE_DATA %>% group_by(IN_KEM) %>% mutate(MORE_KEMMER=ifelse(sum(IN_KEM*Per_Pop_Region)>sum((1-IN_KEM)*Per_Pop_Region),1,0)) %>% ungroup - -#PLOT <- ggplot(AGE_DATA, aes(x =Ages, y = Per_Pop_Region)) + geom_line() + geom_point(aes(color = Region ), size = 5) + scale_color_brewer(palette = "Set1", direction = 1) + theme(legend.position = "bottom")+facet_wrap(~Sex,ncol=1) -#PLOT -LIN_TO_KEMMER_CONVERSION_RATIOS <- INTERPOLATE_COUNTY_AGE_DEMOGRAPHIC_DATA_TO_CITY_LEVEL(AGE_DATA,YEARS_FORWARD=1) %>% pivot_wider(names_from=Sex,values_from=Conversion_Ratio,names_prefix="Convert_") - -KEM_DEMO_DATA <- readRDS("Data/Cleaned_Data/Lincoln_Demographic_Data.Rds") %>% filter(Year==max(Year)) %>% left_join(LIN_TO_KEMMER_CONVERSION_RATIOS ) %>% mutate(Num_Male=Num_Male*Convert_Male,Num_Female=Num_Female*Convert_Female) %>% mutate(County='Kemmerer') %>% select(-Convert_Male,-Convert_Female) -NUM_KEM <- sum(KEM_DEMO_DATA[,4:5] ) -CURRENT_KEM_POP <- readRDS("Data/Cleaned_Data/Wyoming_City_Population.Rds") %>% filter(City=='Kemmerer' | City=='Diamondville') %>% group_by(City) %>% filter(Year==max(Year)) %>% ungroup %>% group_by(Year) %>% summarize(Population=sum(as.numeric(Population)),County='Kemmerer') - -SCALE_KEM_FACTOR <- CURRENT_KEM_POP$Population/NUM_KEM -KEM_NEW_DEMO_DATA <- KEM_DEMO_DATA -KEM_NEW_DEMO_DATA[,c("Num_Male","Num_Female")] <- round(SCALE_KEM_FACTOR*KEM_NEW_DEMO_DATA[,c("Num_Male","Num_Female")]) -##Create a simplfied summary of the Kemmerer/Diamondville population estimates, to start the Monte Carlo - ST_BIRTH_KEM <- sum((KEM_NEW_DEMO_DATA %>% filter(Age==0))[4:5]) - ST_BIRTH_TWO_PREV_KEM <- sum((KEM_NEW_DEMO_DATA %>% filter(Age==1))[4:5]) - -KEM_DEMO_DATA %>% filter(Age==0) - - ST_POP <- sum((KEM_NEW_DEMO_DATA)[4:5]) - INTIATE_KEMMER <- KEM_NEW_DEMO_DATA%>% mutate(Male_Window=Age>=18 & Age<=30,Female_Window=Age>=18 & Age<=28) %>% group_by(County,Year) %>% summarize(Female_Birth_Group=sum(Num_Female*Female_Window),Male_Birth_Group=sum(Num_Male*Male_Window),Min_Birth_Group=ifelse(Female_Birth_Group% mutate(Births=NA,Deaths=NA,Migration=NA) %>% select(Year,County,Population,Births,Deaths,Migration,Min_Birth_Group,PREV_BIRTH,PREV_TWO_BIRTH) %>% ungroup -INTIATE_KEMMER$County <- (readRDS("Data/Regression_Results/Birth_Regression_Data_Set.Rds") %>% filter(County=='Lincoln',Year==2020))$County #Making the factor the same for the regression - saveRDS(INTIATE_KEMMER ,"Data/Cleaned_Data/Kemmerer_Summary_Start_Data.Rds") - write_csv(INTIATE_KEMMER ,"Data/Cleaned_Data/Kemmerer_Summary_Start_Data.csv") - - saveRDS(KEM_NEW_DEMO_DATA,"Data/Cleaned_Data/Kemmerer_Demographic_Data.Rds") - write_csv(KEM_NEW_DEMO_DATA,"Data/Cleaned_Data/Kemmerer_Demographic_Data.csv") -#sum(KEM_NEW_DEMO_DATA[,4:5])-CURRENT_KEM_POP$Population -###Make a population Pyramid Graph -PY_KEM_DATA <- KEM_NEW_DEMO_DATA %>% pivot_longer(cols=c(Num_Male,Num_Female),names_to="Sex",values_to="Population") %>% mutate(Sex=ifelse(Sex=='Num_Male','Male','Female'),Population=ifelse(Sex=='Male',-Population,Population)) -LN_CURRENT_DEMO <- readRDS("Data/Cleaned_Data/Wyoming_County_Demographic_Data.Rds") %>% filter(County=='Lincoln',Year==max(Year)) - -PY_LN_DATA <- LN_CURRENT_DEMO %>% pivot_longer(cols=c(Num_Male,Num_Female),names_to="Sex",values_to="Population") %>% mutate(Sex=ifelse(Sex=='Num_Male','Male','Female'),Population=ifelse(Sex=='Male',-Population,Population)) -PY_LN_DATA <- PY_LN_DATA %>% left_join(PY_KEM_DATA %>% rename(POP_SHIFT=Population) %>% select(-County)) %>% mutate(Population=Population-POP_SHIFT) %>% select(-POP_SHIFT) -PY_LN_DATA$Population <- PY_LN_DATA$Population/sum(abs(PY_LN_DATA$Population )) - - -PY_KEM_PER <- PY_KEM_DATA -PY_KEM_PER$Population <- PY_KEM_PER$Population/sum(abs(PY_KEM_PER$Population )) -GRAPH_DATA <- full_join(PY_KEM_PER ,PY_LN_DATA) %>% mutate(Population=ifelse(County=='Kemmerer',abs(Population),-abs(Population))) - - -RANGE <- c(-0.015,pretty(range(GRAPH_DATA$Population),n=8)) -LAB <- percent(abs(RANGE),accuracy=0.1 ) - POP_PYRAMID <- ggplot(GRAPH_DATA,aes(y=factor(Age),x=Population,fill=County))+geom_col() +scale_x_continuous(breaks = RANGE,labels = LAB)+facet_grid(~Sex) -POP_PYRAMID - -