Cleaned up demographic match, saved for walk

2025-11-04 15:25:29 -07:00 · 2025-11-04 15:25:29 -07:00 · 7e43123840
commit 7e43123840
parent e60fa6fd53
7 changed files with 191 additions and 160 deletions
--- a/Data/API_CENSUS_CODES.csv
+++ b/Data/API_CENSUS_CODES.csv
@ -5,7 +5,7 @@ B01001_003E,"Male",0,4
 B01001_004E,"Male",5,9
 B01001_005E,"Male",10,14
 B01001_006E,"Male",15,17
-B01001_007E,"Male",18,18
+B01001_007E,"Male",18,19
 B01001_008E,"Male",20,20
 B01001_009E,"Male",21,21
 B01001_010E,"Male",22,24
@ -16,9 +16,9 @@ B01001_014E,"Male",40,44
 B01001_015E,"Male",45,49
 B01001_016E,"Male",50,54
 B01001_017E,"Male",55,59
-B01001_018E,"Male",60,60
+B01001_018E,"Male",60,61
 B01001_019E,"Male",62,64
-B01001_020E,"Male",65,65
+B01001_020E,"Male",65,66
 B01001_021E,"Male",67,69
 B01001_022E,"Male",70,74
 B01001_023E,"Male",75,79
@ -29,7 +29,7 @@ B01001_027E,"Female",0,4
 B01001_028E,"Female",5,9
 B01001_029E,"Female",10,14
 B01001_030E,"Female",15,17
-B01001_031E,"Female",18,18
+B01001_031E,"Female",18,19
 B01001_032E,"Female",20,20
 B01001_033E,"Female",21,21 
 B01001_034E,"Female",22,24
@ -40,9 +40,9 @@ B01001_038E,"Female",40,44
 B01001_039E,"Female",45,49
 B01001_040E,"Female",50,54
 B01001_041E,"Female",55,59
-B01001_042E,"Female",60,60
+B01001_042E,"Female",60,61
 B01001_043E,"Female",62,64
-B01001_044E,"Female",65,65
+B01001_044E,"Female",65,66
 B01001_045E,"Female",67,69
 B01001_046E,"Female",70,74
 B01001_047E,"Female",75,79
--- a/Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Demographic_Data.csv
+++ b/Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Demographic_Data.csv
@ -0,0 +1,87 @@
+County,Year,Age,Num_Male,Num_Female
+Kemmerer,2024,0,10.464965521092157,18.53827755476956
+Kemmerer,2024,1,10.73329797035093,26.12211837262983
+Kemmerer,2024,2,9.838856472821687,22.330197963699696
+Kemmerer,2024,3,11.717183617633099,26.3327806175704
+Kemmerer,2024,4,10.464965521092157,24.015495923224204
+Kemmerer,2024,5,15.460982984335295,16.925040008171884
+Kemmerer,2024,6,19.09886133359066,18.773993958644443
+Kemmerer,2024,7,20.78787628145922,22.045220178711276
+Kemmerer,2024,8,17.409846385722098,19.34290286648215
+Kemmerer,2024,9,21.17764896173658,20.05403900127929
+Kemmerer,2024,10,23.313248886019743,19.837665883519023
+Kemmerer,2024,11,24.63286674749256,19.692865402617425
+Kemmerer,2024,12,25.0727393679835,18.534461555404636
+Kemmerer,2024,13,24.19299412700162,23.023276463354193
+Kemmerer,2024,14,26.53898143628663,23.74727886786219
+Kemmerer,2024,15,13.763908906585888,13.325834953286826
+Kemmerer,2024,16,14.400390243306624,12.585510789215336
+Kemmerer,2024,17,15.116431747117451,11.919219041550994
+Kemmerer,2024,18,8.11409091515564,8.11283185840708
+Kemmerer,2024,19,6.633417390492201,5.672882427307205
+Kemmerer,2024,20,8.976744186046512,27.571428571428573
+Kemmerer,2024,21,0,6.411960132890365
+Kemmerer,2024,22,10.007317017094115,16.00986295681063
+Kemmerer,2024,23,9.285139500396603,13.795732973421927
+Kemmerer,2024,24,10.523158100449484,13.795732973421927
+Kemmerer,2024,25,14.229527197897555,14.473076411960134
+Kemmerer,2024,26,12.94653704071007,15.057847176079733
+Kemmerer,2024,27,13.179807978380522,13.888305647840532
+Kemmerer,2024,28,9.214202037982844,13.74211295681063
+Kemmerer,2024,29,12.94653704071007,15.935003322259135
+Kemmerer,2024,30,9.841972541328104,16.16915622314293
+Kemmerer,2024,31,10.64539887123244,16.16915622314293
+Kemmerer,2024,32,9.339831085137893,17.360567734321886
+Kemmerer,2024,33,10.24368570628027,17.360567734321886
+Kemmerer,2024,34,9.942400832566145,21.10500391231288
+Kemmerer,2024,35,23.090047712195066,13.797127159737228
+Kemmerer,2024,36,24.414886515353803,15.316076938790872
+Kemmerer,2024,37,21.386683536705274,16.20213097657216
+Kemmerer,2024,38,23.2793103983606,16.835026717844507
+Kemmerer,2024,39,29.33571635565767,16.07555182831769
+Kemmerer,2024,40,17.783950233132593,12.477327826165036
+Kemmerer,2024,41,17.654140377416297,12.581305558049745
+Kemmerer,2024,42,16.615661531685927,13.413127413127413
+Kemmerer,2024,43,18.30318965599778,15.388704318936878
+Kemmerer,2024,44,19.73109806887704,15.388704318936878
+Kemmerer,2024,45,9.96622215641455,21.42182039512059
+Kemmerer,2024,46,10.86595054553531,17.354386142882504
+Kemmerer,2024,47,10.796740669449097,16.812061575917422
+Kemmerer,2024,48,9.96622215641455,16.405318150693617
+Kemmerer,2024,49,8.097555502086824,16.812061575917422
+Kemmerer,2024,50,11.910119959416715,17.094436330798903
+Kemmerer,2024,51,12.541717229991843,18.692047202836182
+Kemmerer,2024,52,13.443999045099172,20.928702423688375
+Kemmerer,2024,53,12.000348140927445,20.928702423688375
+Kemmerer,2024,54,10.37624087373426,17.57371959241009
+Kemmerer,2024,55,27.330581312705572,17.79318936877076
+Kemmerer,2024,56,28.04355299912398,19.075581395348838
+Kemmerer,2024,57,24.71635179583808,19.716777408637874
+Kemmerer,2024,58,30.1824680583792,21.640365448504983
+Kemmerer,2024,59,33.034354804052825,21.80066445182724
+Kemmerer,2024,60,24.58166576527853,13.547851248526417
+Kemmerer,2024,61,25.752221277910838,15.305969349480229
+Kemmerer,2024,62,23.212805992871722,17.31788012362568
+Kemmerer,2024,63,23.212805992871722,16.868065055479555
+Kemmerer,2024,64,24.42654748269508,17.430333890662205
+Kemmerer,2024,65,17.826584994462902,31.169037340601193
+Kemmerer,2024,66,20.324577796234774,32.30936797501343
+Kemmerer,2024,67,31.745589072282296,31.707002858013322
+Kemmerer,2024,68,31.745589072282296,32.5413976700663
+Kemmerer,2024,69,29.161645775701185,31.289805451986826
+Kemmerer,2024,70,27.118003770017854,15.476896504155697
+Kemmerer,2024,71,25.59022890973516,15.122463759785719
+Kemmerer,2024,72,21.579819901493085,13.114011541689177
+Kemmerer,2024,73,20.43398875628106,12.405146052949224
+Kemmerer,2024,74,20.052045041210388,12.168857556702571
+Kemmerer,2024,75,27.418452872826624,9.03148043108338
+Kemmerer,2024,76,32.273803902389666,9.305161656267725
+Kemmerer,2024,77,19.421404118252187,5.199943278502552
+Kemmerer,2024,78,17.422141929608582,5.0174891283796565
+Kemmerer,2024,79,15.994097509148864,5.108716203441104
+Kemmerer,2024,80,7.746694691165502,5.8689063665333245
+Kemmerer,2024,81,5.325852600176282,5.103396840463761
+Kemmerer,2024,82,3.7523052410332904,5.358566682486948
+Kemmerer,2024,83,3.510221031934368,4.465472235405791
+Kemmerer,2024,84,3.389178927384908,4.2103023933826025
+Kemmerer,2024,85,20.83887043189369,18.274086378737543
--- a/Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Summary_Start_Data.csv
+++ b/Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Summary_Start_Data.csv
@ -0,0 +1,2 @@
+Year,County,Population,Births,Deaths,Migration,Min_Birth_Group,PREV_BIRTH,PREV_TWO_BIRTH
+2024,Kemmerer,2895,NA,NA,NA,125.89845094664372,29,37
--- a/Demographic_Split.r
+++ b/Demographic_Split.r
@ -0,0 +1,56 @@
+library(tidyverse)
+library(scales) #For a pretty population Pyramid
+ACS_FILE_LOC <- "Data/Cleaned_Data/ACS_Census_Demographic_Data.Rds"
+ACS_YEAR <- 2023 #Year of the ACS data
+if(!file.exists(ACS_FILE_LOC)){source("Scripts/Load_ACS_Census_Data.r")}else{AGE_DATA <- readRDS(ACS_FILE_LOC)}
+LIN_DEMOGRAPHICS <-    	readRDS("Data/Cleaned_Data/Lincoln_Demographic_Data.Rds") %>% filter(Year==ACS_YEAR)
+for(i in 1:nrow(AGE_DATA)){
+	if(i==1 & exists("RES")){rm(RES)}
+	C_DEMO <-	AGE_DATA[i,]
+	C_SEX <- C_DEMO$Sex 
+	if(C_DEMO$Min_Age==C_DEMO$Max_Age){C_AGE_YEARLY_DATA <- LIN_DEMOGRAPHICS %>% filter(Age==C_DEMO$Min_Age)}else{
+		C_AGE_YEARLY_DATA <- LIN_DEMOGRAPHICS %>% filter(Age>=C_DEMO$Min_Age,Age<=C_DEMO$Max_Age)
+	}
+	#Extract only the current sex being applied
+	if(C_SEX=='Female'){C_AGE_YEARLY_DATA  <- C_AGE_YEARLY_DATA %>% select(Age,Num_Female) %>% rename(POP=Num_Female)}else{C_AGE_YEARLY_DATA  <- C_AGE_YEARLY_DATA %>% select(Age,Num_Male) %>% rename(POP=Num_Male)}
+	C_AGE_YEARLY_DATA$Sex <- C_SEX
+	C_AGE_YEARLY_DATA$PER  <- C_AGE_YEARLY_DATA$POP/sum(C_AGE_YEARLY_DATA$POP)
+	C_RES <- C_AGE_YEARLY_DATA%>% left_join(C_DEMO) %>% mutate(Population=PER*Population) %>% select(Age,Sex,IN_KEM,Population)
+	if(i==1){RES <- C_RES}else{RES <- rbind(RES,C_RES)}
+}
+RES <- RES %>% group_by(IN_KEM) %>% mutate(Per_Pop=Population/sum(Population)) %>% mutate(Region=ifelse(IN_KEM,'Kemmerere & Diamondville','Rest of the County')) %>% select(-Population) %>% ungroup
+CURRENT_KEM_POP <- readRDS("Data/Cleaned_Data/Wyoming_City_Population.Rds")  %>% filter(City %in% c('Kemmerer','Diamondville'),Year==max(Year)) %>% mutate(Population=as.numeric(Population)) %>% pull(Population) %>% sum()
+KEM_DEMOGRAPHICS <- RES %>% filter(IN_KEM==1) %>% select(Age,Sex,Per_Pop) %>% mutate(Year=ACS_YEAR+1,County='Kemmerer',Population=Per_Pop*CURRENT_KEM_POP ) %>% select(-Per_Pop) %>% pivot_wider(values_from=Population,names_from=c(Sex),names_prefix="Num_") %>% select(County,Year,Age,Num_Male,Num_Female)
+	saveRDS(KEM_DEMOGRAPHICS,"Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Demographic_Data.Rds")
+	write_csv(KEM_DEMOGRAPHICS,"Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Demographic_Data.csv")
+
+##Create a simplfied summary of the Kemmerer/Diamondville population estimates, to start the Monte Carlo
+	ST_BIRTH_KEM <- round(sum((KEM_DEMOGRAPHICS%>% filter(Age==0))[4:5]))
+	ST_BIRTH_TWO_PREV_KEM <- round(sum((KEM_DEMOGRAPHICS%>% filter(Age==1))[4:5]))
+	INTIATE_KEMMER <- KEM_DEMOGRAPHICS %>% mutate(Male_Window=Age>=18 & Age<=30,Female_Window=Age>=18 & Age<=28) %>% group_by(County,Year) %>% summarize(Female_Birth_Group=sum(Num_Female*Female_Window),Male_Birth_Group=sum(Num_Male*Male_Window),Min_Birth_Group=ifelse(Female_Birth_Group<Male_Birth_Group,Female_Birth_Group,Male_Birth_Group),PREV_BIRTH=ST_BIRTH_KEM,PREV_TWO_BIRTH=ST_BIRTH_TWO_PREV_KEM,Population=CURRENT_KEM_POP) %>% mutate(Births=NA,Deaths=NA,Migration=NA) %>% select(Year,County,Population,Births,Deaths,Migration,Min_Birth_Group,PREV_BIRTH,PREV_TWO_BIRTH) %>% ungroup
+	saveRDS(INTIATE_KEMMER ,"Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Summary_Start_Data.Rds")
+	write_csv(INTIATE_KEMMER ,"Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Summary_Start_Data.csv")
+###Make a population Pyramid Graph
+PY_KEM_DATA <- KEM_DEMOGRAPHICS %>% mutate(County='Kemmerer') %>% pivot_longer(cols=c(Num_Male,Num_Female),names_to="Sex",values_to="Population") %>% mutate(Sex=ifelse(Sex=='Num_Male','Male','Female'))
+PY_LN_DATA <- readRDS("Data/Cleaned_Data/Wyoming_County_Demographic_Data.Rds") %>% filter(County=='Lincoln',Year==max(Year))%>% pivot_longer(cols=c(Num_Male,Num_Female),names_to="Sex",values_to="Population") %>% mutate(Sex=ifelse(Sex=='Num_Male','Male','Female'))
+
+#Population=ifelse(Sex=='Male',-Population,Population)
+#Remove the Kemmerer Population estimate from Lincoln county
+PY_LN_DATA <- PY_LN_DATA %>% left_join(PY_KEM_DATA %>% rename(POP_SHIFT=Population) %>% select(-County)) %>% mutate(Population=Population-POP_SHIFT) %>% select(-POP_SHIFT)
+
+LN_ONLY_POP <- sum(PY_LN_DATA$Population)
+PY_LN_DATA$Percent <- PY_LN_DATA$Population/LN_ONLY_POP 
+#Find percent Kemmerer (nod adjustment needed)
+KEM_ONLY_POP <- sum(PY_KEM_DATA$Population)
+PY_KEM_DATA$Percent <- PY_KEM_DATA$Population/KEM_ONLY_POP 
+
+PY_DATA <- rbind(PY_LN_DATA,PY_KEM_DATA) %>% mutate(Population=ifelse(County=='Kemmerer',Population,-Population),'Percent Population'=ifelse(County=='Kemmerer',Percent,-Percent),Age_Numeric=Age,Age=as.character(Age))
+PY_DATA[PY_DATA$Age=='85',"Age"] <- ">84"
+ORD <- PY_DATA[,c("Age_Numeric","Age")] %>% unique %>% arrange(Age_Numeric) %>% pull(Age)
+PY_DATA$Age <- factor(PY_DATA$Age,levels=ORD)
+
+RANGE <- c(pretty(range(PY_DATA$`Percent Population`),n=8))
+LAB <- 	percent(abs(RANGE),accuracy=0.1 )
+ POP_PYRAMID <- ggplot(PY_DATA,aes(y=factor(Age),x=`Percent Population`,fill=County))+geom_col() +scale_x_continuous(breaks  = RANGE,labels = LAB)+facet_grid(~Sex)
+POP_PYRAMID 
+
--- a/Scripts/Downshift_Population_Functions.r
+++ b/Scripts/Downshift_Population_Functions.r
@ -1,58 +0,0 @@
-#These functions are used to take census data from Kemmerer in 2020, and find the relative ratio of the total population in Lincoln. This allows for a heuristic of the current year age-sex demographic amounts in 2025 of Kemmerer to start the Monte Carlo with a list age people in each age group
-
-#A function to pull an estimate of the ratio between the Kemmerer are age-sex population cohort, and the Lincoln total. This allows the age-sex by year demographic data for Lincoln to be converted to Kemmerer numbers. Used as in input to TRANSPOSE_AGE_DEMOGRAPHIC_DATA_DOWN (see below)
-GET_VALUE <- function(Age,SEX,DATA=AGE_DATA,County='Lincoln'){
-	DATA <- DATA %>% filter(Region==County,Sex==SEX)
-	if(any(Age==DATA$Med_Age)){
-	#If any exact matches in between the min and max ages pull the exact record
-		RES <- DATA[which(Age==DATA$Med_Age & DATA$Sex==SEX),] %>% pull(Per_Pop)
-}else if(Age>=85){
-		   RES <- DATA[which(DATA$Min_Age==85 & DATA$Max_Age==Inf),] %>% pull(Per_Pop)
-	}else	if(Age>=85){
-	#If in the upper bound of age pull the single record
-		   RES <- DATA[which(DATA$Min_Age==85 & DATA$Max_Age==Inf),] %>% pull(Per_Pop)
-	}else if(Age==0){
-	#If in the lower bound of age pull the single record
-	   RES <- DATA[which(DATA$Min_Age==0),] %>% pull(Per_Pop)
-	}else{
-	#If no exact record is found perform a linear interpolation between the two age brackets bounding the point
-		TEMP <- DATA %>% arrange(Min_Age) %>% select(Med_Age,Min_Age,Max_Age,Per_Pop) %>% filter(Per_Pop!=0)
-		LOWER <-       	TEMP[max(max(which(TEMP$Med_Age<Age)),1),] #Find the lowest value in the list but if no match is found pull the first record
-		UPPER <- TEMP[min(which(TEMP$Med_Age>Age)),]
-		if(LOWER$Med_Age!=UPPER$Med_Age){
-			#If the LOWER and UPPER are the same than the age must be less the first entry, if that is not true perform a linear interpolation between the two bounding entries
-			C <- LOWER$Med_Age
-			ST <- LOWER$Per_Pop
-			DELTA <- UPPER$Per_Pop-LOWER$Per_Pop
-			GAP <- UPPER$Med_Age-LOWER$Med_Age
-			RES <- (ST+(Age-C)*DELTA/GAP)
-		} else{RES <- UPPER %>% pull(Per_Pop)}
-	}
-	return(RES)
-}
-#A function which returns a tibble of all single year age-sex combinations for conversions of ratio in Lincoln to Kemmerer
-INTERPOLATE_COUNTY_AGE_DEMOGRAPHIC_DATA_TO_CITY_LEVEL <- function(DATA,COUNTY='Lincoln',AGE_RANGE=0:85,YEARS_FORWARD=0){
-	#Create a vector of all male ages, then all female ages
-	VALUES <- 	 c(unlist((sapply(AGE_RANGE,function(x){GET_VALUE(x,'Male',DATA,COUNTY)}))),unlist((sapply(AGE_RANGE,function(x){GET_VALUE(x,'Female',DATA,COUNTY)}))))
-	#Turn into an easy to read tibble to merge later with the actual demographic data
-	RES <- cbind(rep(AGE_RANGE,2),c(rep("Male",length(VALUES)/2),rep("Female",length(VALUES)/2)),VALUES) %>% as_tibble
-	colnames(RES) <- c("Age","Sex","Conversion_Ratio")
-	RES <-	RES %>% mutate(Age=as.numeric(Age),Conversion_Ratio=as.numeric(Conversion_Ratio))
-if(YEARS_FORWARD>0){
-	MALE <- RES %>% filter(Sex=='Male')
-	FEMALE <- RES %>% filter(Sex=='Female')
-	STUB_AGE_MALE <- MALE[rep(1,YEARS_FORWARD),]
-	STUB_AGE_MALE$Age <- 0:(YEARS_FORWARD-1)
-
-	STUB_AGE_FEMALE <- FEMALE[rep(1,YEARS_FORWARD),]
-	STUB_AGE_FEMALE$Age <- 0:(YEARS_FORWARD-1)
-
-	MALE[,"Age"] <- MALE[,"Age"]+YEARS_FORWARD
-	FEMALE[,"Age"] <- FEMALE[,"Age"]+YEARS_FORWARD
-	MALE <- rbind(STUB_AGE_MALE,MALE)[1:nrow(MALE),]
-	FEMALE <- rbind(STUB_AGE_FEMALE,FEMALE)[1:nrow(FEMALE),]
-	RES <- rbind(MALE,FEMALE)
-}
-	return(RES)
-}
-
--- a/Scripts/Load_ACS_Census_Data.r
+++ b/Scripts/Load_ACS_Census_Data.r
@ -0,0 +1,40 @@
+library(tidycensus)
+library(zipcodeR)
+#Pull the relevant median age variables the value moe (margine of error) can be converted to standard error, following the link below
+#https://www.census.gov/content/dam/Census/library/publications/2018/acs/acs_general_handbook_2018_ch08.pdf
+ACS_YEAR <- 2023 #most recent as of Nov 4 2025
+source("Scripts/Downshift_Population_Functions.r")
+#Packages to instal on computer if zipcodeR won't install
+#Sudo apt install libssl-dev libudunits2-dev libabsl-dev libcurl4-openssl-dev libgdal-dev cmake libfontconfig1-dev libharfbuzz-dev libfribidi-dev
+#install.packages("zipcodeR")
+
+#Add API key if missing
+	#KEY <- '30e13ab22563318ff59286e433099f4174d4edd4'
+	#census_api_key(KEY, install = TRUE)
+PROJ_TRACTS <- get_tracts(search_city('Kemmerer','WY')$zipcode)  %>% full_join(get_tracts(search_city('Diamondville','WY')$zipcode))
+PROJ_TRACTS <- PROJ_TRACTS  %>% select(GEOID) %>% mutate('IN_KEM'=1) %>% mutate(GEOID=as.character(GEOID))
+
+MED_AGE_VAR <- cbind(c('B01002_001E','B01002_002E','B01002_003E'),c('Median_Age','Median_Age_Male','Median_Age_Female')) %>% as_tibble %>% rename(variable=V1,Data_Type=V2)
+
+###Load data manually created which links vairable names to sex-age census data
+CODES <- read_csv("Data/API_CENSUS_CODES.csv",skip=1) %>% mutate(Med_Age=(Min_Age+Max_Age)/2) %>% rename(variable=Code)
+#Testing age Comparison between the two
+###Extract census data for all tracts in Lincoln county, clean up the data, and indicate if the tract is in Kemmerer/Diamondvile or not.
+AGE_DATA <- 	get_acs(geography="tract",year=ACS_YEAR,variables=CODES$variable,state='WY',county='lincoln') %>% mutate(se=moe/1.64) %>% left_join(CODES %>% mutate(variable=gsub('E','',variable))) %>% select(-NAME,-moe) %>% left_join(PROJ_TRACTS) %>% mutate(IN_KEM=ifelse(is.na(IN_KEM),0,1)) %>% rename(Population=estimate) %>% select(-variable,-GEOID) %>% select(Sex,Min_Age,Max_Age,Med_Age,IN_KEM,Population,se) %>% filter(!(Min_Age==0& Max_Age==Inf))
+AGE_DATA <- AGE_DATA %>% group_by(Sex,Min_Age,Max_Age,Med_Age,IN_KEM)%>% summarize(Population=sum(Population)) %>% ungroup
+#Add Descriptive age category to a clean graph
+AGE_DATA$Ages <- paste(AGE_DATA$Min_Age,"to",AGE_DATA$Max_Age)
+AGE_DATA[AGE_DATA$Max_Age==Inf,"Ages"] <- "85+"
+AGE_DATA[AGE_DATA$Med_Age==18,"Ages"] <- "18"
+AGE_DATA[AGE_DATA$Med_Age==20,"Ages"] <- "20"
+AGE_DATA[AGE_DATA$Med_Age==21,"Ages"] <- "21"
+AGE_DATA[AGE_DATA$Min_Age==0,"Ages"] <- "Under 5"
+#Turn the ages into factors to keep the correct order in graphs
+#Add the percent of total relative population in the region
+ORD <- AGE_DATA %>% select(Min_Age,Ages) %>% unique %>% arrange(Min_Age) %>% pull(Ages) %>% unique
+AGE_DATA$Ages <- factor(AGE_DATA$Ages,levels=ORD)
+AGE_DATA$Year <- ACS_YEAR
+AGE_DATA <-AGE_DATA %>% select(Year,Sex,Ages,IN_KEM,everything())
+if(!exists('ACS_FILE_LOC')){ACS_FILE_LOC <- "Data/Cleaned_Data/ACS_Census_Demographic_Data.Rds"}
+saveRDS(AGE_DATA,ACS_FILE_LOC)
+
--- a/Zip_Code.r
+++ b/Zip_Code.r
@ -1,96 +0,0 @@
-library(tidyverse)
-library(tidycensus)
-library(zipcodeR)
-library(scales) #For a pretty population Pyramid
-source("Scripts/Downshift_Population_Functions.r")
-#Packages to instal on computer if zipcodeR won't install
-#Sudo apt install libssl-dev libudunits2-dev libabsl-dev libcurl4-openssl-dev libgdal-dev cmake libfontconfig1-dev libharfbuzz-dev libfribidi-dev
-#install.packages("zipcodeR")
-
-#Add API key if missing
-	#KEY <- '30e13ab22563318ff59286e433099f4174d4edd4'
-	#census_api_key(KEY, install = TRUE)
-PROJ_TRACTS <- get_tracts(search_city('Kemmerer','WY')$zipcode)  %>% full_join(get_tracts(search_city('Diamondville','WY')$zipcode))
-PROJ_TRACTS <- PROJ_TRACTS  %>% select(GEOID) %>% mutate('IN_KEM'=1) %>% mutate(GEOID=as.character(GEOID))
-
-MED_AGE_VAR <- cbind(c('B01002_001E','B01002_002E','B01002_003E'),c('Median_Age','Median_Age_Male','Median_Age_Female')) %>% as_tibble %>% rename(variable=V1,Data_Type=V2)
-#Pull the relevant median age variables the value moe (margine of error) can be converted to standard error, following the link below
-#https://www.census.gov/content/dam/Census/library/publications/2018/acs/acs_general_handbook_2018_ch08.pdf
-
-MED_AGE_VALUES <-  get_acs(geography="tract",variables=MED_AGE_VAR$variable,state='WY',county='lincoln')  %>% mutate(se=moe/1.64) %>% left_join(MED_AGE_VAR %>% mutate(variable=gsub('E','',variable))) %>% select(-NAME,-moe) %>% left_join(PROJ_TRACTS) %>% mutate(IN_KEM=ifelse(is.na(IN_KEM),0,1))
-AGE_DIFF <- MED_AGE_VALUES  %>% group_by(IN_KEM,Data_Type) %>% summarize(Age=mean(estimate)) %>% pivot_wider(names_from=IN_KEM,values_from=Age,names_prefix="In_Kemmerer_")
-AGE_DIFF 
-
-###Load data manually created which links vairable names to sex-age census data
-CODES <- read_csv("Data/API_CENSUS_CODES.csv",skip=1) %>% mutate(Med_Age=(Min_Age+Max_Age)/2) %>% rename(variable=Code)
-
-
-
-#Testing age Comparison between the two
-###Extract census data for all tracts in Lincoln county, clean up the data, and indicate if the tract is in Kemmerer/Diamondvile or not.
-AGE_DATA <- get_acs(geography="tract",variables=CODES$variable,state='WY',county='lincoln') %>% mutate(se=moe/1.64) %>% left_join(CODES %>% mutate(variable=gsub('E','',variable))) %>% select(-NAME,-moe) %>% left_join(PROJ_TRACTS) %>% mutate(IN_KEM=ifelse(is.na(IN_KEM),0,1)) %>% rename(Population=estimate) %>% select(-variable,-GEOID) %>% select(Sex,Min_Age,Max_Age,Med_Age,IN_KEM,Population,se) %>% filter(!(Min_Age==0& Max_Age==Inf))
-
-AGE_DATA <- AGE_DATA %>% group_by(Sex,Min_Age,Max_Age,Med_Age,IN_KEM)%>% summarize(Population=sum(Population)) %>% ungroup
-#Add Descriptive age category to a clean graph
-AGE_DATA$Ages <- paste(AGE_DATA$Min_Age,"to",AGE_DATA$Max_Age)
-AGE_DATA[AGE_DATA$Max_Age==Inf,"Ages"] <- "85+"
-AGE_DATA[AGE_DATA$Med_Age==18,"Ages"] <- "18"
-AGE_DATA[AGE_DATA$Med_Age==20,"Ages"] <- "20"
-AGE_DATA[AGE_DATA$Med_Age==21,"Ages"] <- "21"
-AGE_DATA[AGE_DATA$Min_Age==0,"Ages"] <- "Under 5"
-#Turn the ages into factors to keep the correct order in graphs
-ORD <- AGE_DATA %>% select(Min_Age,Ages) %>% unique %>% arrange(Min_Age) %>% pull(Ages) %>% unique
-AGE_DATA$Ages <- factor(AGE_DATA$Ages,levels=ORD)
-#Add the percent of total relative population in the region
-AGE_DATA <- AGE_DATA %>%  mutate(Per_Pop=Population/sum(Population)) %>% group_by(IN_KEM) %>% mutate(Per_Pop_Region=Population/sum(Population)) %>% ungroup
-
-#Add a region name for clearer graphs
-AGE_DATA <- AGE_DATA %>% mutate(Region=ifelse(IN_KEM==1,'Kemmerer','Lincoln')) 
-#AGE_DATA <- AGE_DATA %>% group_by(IN_KEM) %>% mutate(MORE_KEMMER=ifelse(sum(IN_KEM*Per_Pop_Region)>sum((1-IN_KEM)*Per_Pop_Region),1,0)) %>% ungroup 
-
-#PLOT <- ggplot(AGE_DATA, aes(x =Ages, y = Per_Pop_Region)) +  geom_line() +  geom_point(aes(color = Region ), size = 5) +  scale_color_brewer(palette = "Set1", direction = 1) +  theme(legend.position = "bottom")+facet_wrap(~Sex,ncol=1)
-#PLOT
-LIN_TO_KEMMER_CONVERSION_RATIOS <- INTERPOLATE_COUNTY_AGE_DEMOGRAPHIC_DATA_TO_CITY_LEVEL(AGE_DATA,YEARS_FORWARD=1) %>% pivot_wider(names_from=Sex,values_from=Conversion_Ratio,names_prefix="Convert_")
-
-KEM_DEMO_DATA <- readRDS("Data/Cleaned_Data/Lincoln_Demographic_Data.Rds")  %>% filter(Year==max(Year)) %>% left_join(LIN_TO_KEMMER_CONVERSION_RATIOS ) %>% mutate(Num_Male=Num_Male*Convert_Male,Num_Female=Num_Female*Convert_Female) %>% mutate(County='Kemmerer') %>% select(-Convert_Male,-Convert_Female)
-NUM_KEM <- sum(KEM_DEMO_DATA[,4:5] )
-CURRENT_KEM_POP <- readRDS("Data/Cleaned_Data/Wyoming_City_Population.Rds") %>% filter(City=='Kemmerer' | City=='Diamondville') %>% group_by(City) %>% filter(Year==max(Year)) %>% ungroup %>% group_by(Year) %>% summarize(Population=sum(as.numeric(Population)),County='Kemmerer')
-
-SCALE_KEM_FACTOR <- CURRENT_KEM_POP$Population/NUM_KEM
-KEM_NEW_DEMO_DATA <- KEM_DEMO_DATA
-KEM_NEW_DEMO_DATA[,c("Num_Male","Num_Female")] <- round(SCALE_KEM_FACTOR*KEM_NEW_DEMO_DATA[,c("Num_Male","Num_Female")])
-##Create a simplfied summary of the Kemmerer/Diamondville population estimates, to start the Monte Carlo
-	ST_BIRTH_KEM <- sum((KEM_NEW_DEMO_DATA %>% filter(Age==0))[4:5])
-	ST_BIRTH_TWO_PREV_KEM <- sum((KEM_NEW_DEMO_DATA %>% filter(Age==1))[4:5])
-
-KEM_DEMO_DATA %>% filter(Age==0)
-
-	ST_POP <- sum((KEM_NEW_DEMO_DATA)[4:5])
-	INTIATE_KEMMER <- KEM_NEW_DEMO_DATA%>% mutate(Male_Window=Age>=18 & Age<=30,Female_Window=Age>=18 & Age<=28) %>% group_by(County,Year) %>% summarize(Female_Birth_Group=sum(Num_Female*Female_Window),Male_Birth_Group=sum(Num_Male*Male_Window),Min_Birth_Group=ifelse(Female_Birth_Group<Male_Birth_Group,Female_Birth_Group,Male_Birth_Group),PREV_BIRTH=ST_BIRTH_KEM,PREV_TWO_BIRTH=ST_BIRTH_TWO_PREV_KEM,Population=ST_POP) %>% mutate(Births=NA,Deaths=NA,Migration=NA) %>% select(Year,County,Population,Births,Deaths,Migration,Min_Birth_Group,PREV_BIRTH,PREV_TWO_BIRTH) %>% ungroup
-INTIATE_KEMMER$County <- (readRDS("Data/Regression_Results/Birth_Regression_Data_Set.Rds") %>% filter(County=='Lincoln',Year==2020))$County #Making the factor the same for the regression
-	saveRDS(INTIATE_KEMMER ,"Data/Cleaned_Data/Kemmerer_Summary_Start_Data.Rds")
-	write_csv(INTIATE_KEMMER ,"Data/Cleaned_Data/Kemmerer_Summary_Start_Data.csv")
-
-	saveRDS(KEM_NEW_DEMO_DATA,"Data/Cleaned_Data/Kemmerer_Demographic_Data.Rds")
-	write_csv(KEM_NEW_DEMO_DATA,"Data/Cleaned_Data/Kemmerer_Demographic_Data.csv")
-#sum(KEM_NEW_DEMO_DATA[,4:5])-CURRENT_KEM_POP$Population
-###Make a population Pyramid Graph
-PY_KEM_DATA <- KEM_NEW_DEMO_DATA %>% pivot_longer(cols=c(Num_Male,Num_Female),names_to="Sex",values_to="Population") %>% mutate(Sex=ifelse(Sex=='Num_Male','Male','Female'),Population=ifelse(Sex=='Male',-Population,Population))
-LN_CURRENT_DEMO <- readRDS("Data/Cleaned_Data/Wyoming_County_Demographic_Data.Rds") %>% filter(County=='Lincoln',Year==max(Year))
-
-PY_LN_DATA <- LN_CURRENT_DEMO %>% pivot_longer(cols=c(Num_Male,Num_Female),names_to="Sex",values_to="Population") %>% mutate(Sex=ifelse(Sex=='Num_Male','Male','Female'),Population=ifelse(Sex=='Male',-Population,Population))
-PY_LN_DATA <- PY_LN_DATA %>% left_join(PY_KEM_DATA %>% rename(POP_SHIFT=Population) %>% select(-County)) %>% mutate(Population=Population-POP_SHIFT) %>% select(-POP_SHIFT)
-PY_LN_DATA$Population <- PY_LN_DATA$Population/sum(abs(PY_LN_DATA$Population ))
-
-
-PY_KEM_PER <- PY_KEM_DATA
-PY_KEM_PER$Population <- PY_KEM_PER$Population/sum(abs(PY_KEM_PER$Population ))
-GRAPH_DATA <- full_join(PY_KEM_PER ,PY_LN_DATA)   %>% mutate(Population=ifelse(County=='Kemmerer',abs(Population),-abs(Population)))
-
-
-RANGE <- c(-0.015,pretty(range(GRAPH_DATA$Population),n=8))
-LAB <- 	percent(abs(RANGE),accuracy=0.1 )
- POP_PYRAMID <- ggplot(GRAPH_DATA,aes(y=factor(Age),x=Population,fill=County))+geom_col() +scale_x_continuous(breaks  = RANGE,labels = LAB)+facet_grid(~Sex)
-POP_PYRAMID 
-
-