Cleaned up demographic match, saved for walk
This commit is contained in:
parent
e60fa6fd53
commit
7e43123840
@ -5,7 +5,7 @@ B01001_003E,"Male",0,4
|
||||
B01001_004E,"Male",5,9
|
||||
B01001_005E,"Male",10,14
|
||||
B01001_006E,"Male",15,17
|
||||
B01001_007E,"Male",18,18
|
||||
B01001_007E,"Male",18,19
|
||||
B01001_008E,"Male",20,20
|
||||
B01001_009E,"Male",21,21
|
||||
B01001_010E,"Male",22,24
|
||||
@ -16,9 +16,9 @@ B01001_014E,"Male",40,44
|
||||
B01001_015E,"Male",45,49
|
||||
B01001_016E,"Male",50,54
|
||||
B01001_017E,"Male",55,59
|
||||
B01001_018E,"Male",60,60
|
||||
B01001_018E,"Male",60,61
|
||||
B01001_019E,"Male",62,64
|
||||
B01001_020E,"Male",65,65
|
||||
B01001_020E,"Male",65,66
|
||||
B01001_021E,"Male",67,69
|
||||
B01001_022E,"Male",70,74
|
||||
B01001_023E,"Male",75,79
|
||||
@ -29,7 +29,7 @@ B01001_027E,"Female",0,4
|
||||
B01001_028E,"Female",5,9
|
||||
B01001_029E,"Female",10,14
|
||||
B01001_030E,"Female",15,17
|
||||
B01001_031E,"Female",18,18
|
||||
B01001_031E,"Female",18,19
|
||||
B01001_032E,"Female",20,20
|
||||
B01001_033E,"Female",21,21
|
||||
B01001_034E,"Female",22,24
|
||||
@ -40,9 +40,9 @@ B01001_038E,"Female",40,44
|
||||
B01001_039E,"Female",45,49
|
||||
B01001_040E,"Female",50,54
|
||||
B01001_041E,"Female",55,59
|
||||
B01001_042E,"Female",60,60
|
||||
B01001_042E,"Female",60,61
|
||||
B01001_043E,"Female",62,64
|
||||
B01001_044E,"Female",65,65
|
||||
B01001_044E,"Female",65,66
|
||||
B01001_045E,"Female",67,69
|
||||
B01001_046E,"Female",70,74
|
||||
B01001_047E,"Female",75,79
|
||||
|
||||
|
Can't render this file because it has a wrong number of fields in line 2.
|
@ -0,0 +1,87 @@
|
||||
County,Year,Age,Num_Male,Num_Female
|
||||
Kemmerer,2024,0,10.464965521092157,18.53827755476956
|
||||
Kemmerer,2024,1,10.73329797035093,26.12211837262983
|
||||
Kemmerer,2024,2,9.838856472821687,22.330197963699696
|
||||
Kemmerer,2024,3,11.717183617633099,26.3327806175704
|
||||
Kemmerer,2024,4,10.464965521092157,24.015495923224204
|
||||
Kemmerer,2024,5,15.460982984335295,16.925040008171884
|
||||
Kemmerer,2024,6,19.09886133359066,18.773993958644443
|
||||
Kemmerer,2024,7,20.78787628145922,22.045220178711276
|
||||
Kemmerer,2024,8,17.409846385722098,19.34290286648215
|
||||
Kemmerer,2024,9,21.17764896173658,20.05403900127929
|
||||
Kemmerer,2024,10,23.313248886019743,19.837665883519023
|
||||
Kemmerer,2024,11,24.63286674749256,19.692865402617425
|
||||
Kemmerer,2024,12,25.0727393679835,18.534461555404636
|
||||
Kemmerer,2024,13,24.19299412700162,23.023276463354193
|
||||
Kemmerer,2024,14,26.53898143628663,23.74727886786219
|
||||
Kemmerer,2024,15,13.763908906585888,13.325834953286826
|
||||
Kemmerer,2024,16,14.400390243306624,12.585510789215336
|
||||
Kemmerer,2024,17,15.116431747117451,11.919219041550994
|
||||
Kemmerer,2024,18,8.11409091515564,8.11283185840708
|
||||
Kemmerer,2024,19,6.633417390492201,5.672882427307205
|
||||
Kemmerer,2024,20,8.976744186046512,27.571428571428573
|
||||
Kemmerer,2024,21,0,6.411960132890365
|
||||
Kemmerer,2024,22,10.007317017094115,16.00986295681063
|
||||
Kemmerer,2024,23,9.285139500396603,13.795732973421927
|
||||
Kemmerer,2024,24,10.523158100449484,13.795732973421927
|
||||
Kemmerer,2024,25,14.229527197897555,14.473076411960134
|
||||
Kemmerer,2024,26,12.94653704071007,15.057847176079733
|
||||
Kemmerer,2024,27,13.179807978380522,13.888305647840532
|
||||
Kemmerer,2024,28,9.214202037982844,13.74211295681063
|
||||
Kemmerer,2024,29,12.94653704071007,15.935003322259135
|
||||
Kemmerer,2024,30,9.841972541328104,16.16915622314293
|
||||
Kemmerer,2024,31,10.64539887123244,16.16915622314293
|
||||
Kemmerer,2024,32,9.339831085137893,17.360567734321886
|
||||
Kemmerer,2024,33,10.24368570628027,17.360567734321886
|
||||
Kemmerer,2024,34,9.942400832566145,21.10500391231288
|
||||
Kemmerer,2024,35,23.090047712195066,13.797127159737228
|
||||
Kemmerer,2024,36,24.414886515353803,15.316076938790872
|
||||
Kemmerer,2024,37,21.386683536705274,16.20213097657216
|
||||
Kemmerer,2024,38,23.2793103983606,16.835026717844507
|
||||
Kemmerer,2024,39,29.33571635565767,16.07555182831769
|
||||
Kemmerer,2024,40,17.783950233132593,12.477327826165036
|
||||
Kemmerer,2024,41,17.654140377416297,12.581305558049745
|
||||
Kemmerer,2024,42,16.615661531685927,13.413127413127413
|
||||
Kemmerer,2024,43,18.30318965599778,15.388704318936878
|
||||
Kemmerer,2024,44,19.73109806887704,15.388704318936878
|
||||
Kemmerer,2024,45,9.96622215641455,21.42182039512059
|
||||
Kemmerer,2024,46,10.86595054553531,17.354386142882504
|
||||
Kemmerer,2024,47,10.796740669449097,16.812061575917422
|
||||
Kemmerer,2024,48,9.96622215641455,16.405318150693617
|
||||
Kemmerer,2024,49,8.097555502086824,16.812061575917422
|
||||
Kemmerer,2024,50,11.910119959416715,17.094436330798903
|
||||
Kemmerer,2024,51,12.541717229991843,18.692047202836182
|
||||
Kemmerer,2024,52,13.443999045099172,20.928702423688375
|
||||
Kemmerer,2024,53,12.000348140927445,20.928702423688375
|
||||
Kemmerer,2024,54,10.37624087373426,17.57371959241009
|
||||
Kemmerer,2024,55,27.330581312705572,17.79318936877076
|
||||
Kemmerer,2024,56,28.04355299912398,19.075581395348838
|
||||
Kemmerer,2024,57,24.71635179583808,19.716777408637874
|
||||
Kemmerer,2024,58,30.1824680583792,21.640365448504983
|
||||
Kemmerer,2024,59,33.034354804052825,21.80066445182724
|
||||
Kemmerer,2024,60,24.58166576527853,13.547851248526417
|
||||
Kemmerer,2024,61,25.752221277910838,15.305969349480229
|
||||
Kemmerer,2024,62,23.212805992871722,17.31788012362568
|
||||
Kemmerer,2024,63,23.212805992871722,16.868065055479555
|
||||
Kemmerer,2024,64,24.42654748269508,17.430333890662205
|
||||
Kemmerer,2024,65,17.826584994462902,31.169037340601193
|
||||
Kemmerer,2024,66,20.324577796234774,32.30936797501343
|
||||
Kemmerer,2024,67,31.745589072282296,31.707002858013322
|
||||
Kemmerer,2024,68,31.745589072282296,32.5413976700663
|
||||
Kemmerer,2024,69,29.161645775701185,31.289805451986826
|
||||
Kemmerer,2024,70,27.118003770017854,15.476896504155697
|
||||
Kemmerer,2024,71,25.59022890973516,15.122463759785719
|
||||
Kemmerer,2024,72,21.579819901493085,13.114011541689177
|
||||
Kemmerer,2024,73,20.43398875628106,12.405146052949224
|
||||
Kemmerer,2024,74,20.052045041210388,12.168857556702571
|
||||
Kemmerer,2024,75,27.418452872826624,9.03148043108338
|
||||
Kemmerer,2024,76,32.273803902389666,9.305161656267725
|
||||
Kemmerer,2024,77,19.421404118252187,5.199943278502552
|
||||
Kemmerer,2024,78,17.422141929608582,5.0174891283796565
|
||||
Kemmerer,2024,79,15.994097509148864,5.108716203441104
|
||||
Kemmerer,2024,80,7.746694691165502,5.8689063665333245
|
||||
Kemmerer,2024,81,5.325852600176282,5.103396840463761
|
||||
Kemmerer,2024,82,3.7523052410332904,5.358566682486948
|
||||
Kemmerer,2024,83,3.510221031934368,4.465472235405791
|
||||
Kemmerer,2024,84,3.389178927384908,4.2103023933826025
|
||||
Kemmerer,2024,85,20.83887043189369,18.274086378737543
|
||||
|
@ -0,0 +1,2 @@
|
||||
Year,County,Population,Births,Deaths,Migration,Min_Birth_Group,PREV_BIRTH,PREV_TWO_BIRTH
|
||||
2024,Kemmerer,2895,NA,NA,NA,125.89845094664372,29,37
|
||||
|
56
Demographic_Split.r
Normal file
56
Demographic_Split.r
Normal file
@ -0,0 +1,56 @@
|
||||
library(tidyverse)
|
||||
library(scales) #For a pretty population Pyramid
|
||||
ACS_FILE_LOC <- "Data/Cleaned_Data/ACS_Census_Demographic_Data.Rds"
|
||||
ACS_YEAR <- 2023 #Year of the ACS data
|
||||
if(!file.exists(ACS_FILE_LOC)){source("Scripts/Load_ACS_Census_Data.r")}else{AGE_DATA <- readRDS(ACS_FILE_LOC)}
|
||||
LIN_DEMOGRAPHICS <- readRDS("Data/Cleaned_Data/Lincoln_Demographic_Data.Rds") %>% filter(Year==ACS_YEAR)
|
||||
for(i in 1:nrow(AGE_DATA)){
|
||||
if(i==1 & exists("RES")){rm(RES)}
|
||||
C_DEMO <- AGE_DATA[i,]
|
||||
C_SEX <- C_DEMO$Sex
|
||||
if(C_DEMO$Min_Age==C_DEMO$Max_Age){C_AGE_YEARLY_DATA <- LIN_DEMOGRAPHICS %>% filter(Age==C_DEMO$Min_Age)}else{
|
||||
C_AGE_YEARLY_DATA <- LIN_DEMOGRAPHICS %>% filter(Age>=C_DEMO$Min_Age,Age<=C_DEMO$Max_Age)
|
||||
}
|
||||
#Extract only the current sex being applied
|
||||
if(C_SEX=='Female'){C_AGE_YEARLY_DATA <- C_AGE_YEARLY_DATA %>% select(Age,Num_Female) %>% rename(POP=Num_Female)}else{C_AGE_YEARLY_DATA <- C_AGE_YEARLY_DATA %>% select(Age,Num_Male) %>% rename(POP=Num_Male)}
|
||||
C_AGE_YEARLY_DATA$Sex <- C_SEX
|
||||
C_AGE_YEARLY_DATA$PER <- C_AGE_YEARLY_DATA$POP/sum(C_AGE_YEARLY_DATA$POP)
|
||||
C_RES <- C_AGE_YEARLY_DATA%>% left_join(C_DEMO) %>% mutate(Population=PER*Population) %>% select(Age,Sex,IN_KEM,Population)
|
||||
if(i==1){RES <- C_RES}else{RES <- rbind(RES,C_RES)}
|
||||
}
|
||||
RES <- RES %>% group_by(IN_KEM) %>% mutate(Per_Pop=Population/sum(Population)) %>% mutate(Region=ifelse(IN_KEM,'Kemmerere & Diamondville','Rest of the County')) %>% select(-Population) %>% ungroup
|
||||
CURRENT_KEM_POP <- readRDS("Data/Cleaned_Data/Wyoming_City_Population.Rds") %>% filter(City %in% c('Kemmerer','Diamondville'),Year==max(Year)) %>% mutate(Population=as.numeric(Population)) %>% pull(Population) %>% sum()
|
||||
KEM_DEMOGRAPHICS <- RES %>% filter(IN_KEM==1) %>% select(Age,Sex,Per_Pop) %>% mutate(Year=ACS_YEAR+1,County='Kemmerer',Population=Per_Pop*CURRENT_KEM_POP ) %>% select(-Per_Pop) %>% pivot_wider(values_from=Population,names_from=c(Sex),names_prefix="Num_") %>% select(County,Year,Age,Num_Male,Num_Female)
|
||||
saveRDS(KEM_DEMOGRAPHICS,"Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Demographic_Data.Rds")
|
||||
write_csv(KEM_DEMOGRAPHICS,"Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Demographic_Data.csv")
|
||||
|
||||
##Create a simplfied summary of the Kemmerer/Diamondville population estimates, to start the Monte Carlo
|
||||
ST_BIRTH_KEM <- round(sum((KEM_DEMOGRAPHICS%>% filter(Age==0))[4:5]))
|
||||
ST_BIRTH_TWO_PREV_KEM <- round(sum((KEM_DEMOGRAPHICS%>% filter(Age==1))[4:5]))
|
||||
INTIATE_KEMMER <- KEM_DEMOGRAPHICS %>% mutate(Male_Window=Age>=18 & Age<=30,Female_Window=Age>=18 & Age<=28) %>% group_by(County,Year) %>% summarize(Female_Birth_Group=sum(Num_Female*Female_Window),Male_Birth_Group=sum(Num_Male*Male_Window),Min_Birth_Group=ifelse(Female_Birth_Group<Male_Birth_Group,Female_Birth_Group,Male_Birth_Group),PREV_BIRTH=ST_BIRTH_KEM,PREV_TWO_BIRTH=ST_BIRTH_TWO_PREV_KEM,Population=CURRENT_KEM_POP) %>% mutate(Births=NA,Deaths=NA,Migration=NA) %>% select(Year,County,Population,Births,Deaths,Migration,Min_Birth_Group,PREV_BIRTH,PREV_TWO_BIRTH) %>% ungroup
|
||||
saveRDS(INTIATE_KEMMER ,"Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Summary_Start_Data.Rds")
|
||||
write_csv(INTIATE_KEMMER ,"Data/Cleaned_Data/Intiate_Simulation/Kemmerer_Summary_Start_Data.csv")
|
||||
###Make a population Pyramid Graph
|
||||
PY_KEM_DATA <- KEM_DEMOGRAPHICS %>% mutate(County='Kemmerer') %>% pivot_longer(cols=c(Num_Male,Num_Female),names_to="Sex",values_to="Population") %>% mutate(Sex=ifelse(Sex=='Num_Male','Male','Female'))
|
||||
PY_LN_DATA <- readRDS("Data/Cleaned_Data/Wyoming_County_Demographic_Data.Rds") %>% filter(County=='Lincoln',Year==max(Year))%>% pivot_longer(cols=c(Num_Male,Num_Female),names_to="Sex",values_to="Population") %>% mutate(Sex=ifelse(Sex=='Num_Male','Male','Female'))
|
||||
|
||||
#Population=ifelse(Sex=='Male',-Population,Population)
|
||||
#Remove the Kemmerer Population estimate from Lincoln county
|
||||
PY_LN_DATA <- PY_LN_DATA %>% left_join(PY_KEM_DATA %>% rename(POP_SHIFT=Population) %>% select(-County)) %>% mutate(Population=Population-POP_SHIFT) %>% select(-POP_SHIFT)
|
||||
|
||||
LN_ONLY_POP <- sum(PY_LN_DATA$Population)
|
||||
PY_LN_DATA$Percent <- PY_LN_DATA$Population/LN_ONLY_POP
|
||||
#Find percent Kemmerer (nod adjustment needed)
|
||||
KEM_ONLY_POP <- sum(PY_KEM_DATA$Population)
|
||||
PY_KEM_DATA$Percent <- PY_KEM_DATA$Population/KEM_ONLY_POP
|
||||
|
||||
PY_DATA <- rbind(PY_LN_DATA,PY_KEM_DATA) %>% mutate(Population=ifelse(County=='Kemmerer',Population,-Population),'Percent Population'=ifelse(County=='Kemmerer',Percent,-Percent),Age_Numeric=Age,Age=as.character(Age))
|
||||
PY_DATA[PY_DATA$Age=='85',"Age"] <- ">84"
|
||||
ORD <- PY_DATA[,c("Age_Numeric","Age")] %>% unique %>% arrange(Age_Numeric) %>% pull(Age)
|
||||
PY_DATA$Age <- factor(PY_DATA$Age,levels=ORD)
|
||||
|
||||
RANGE <- c(pretty(range(PY_DATA$`Percent Population`),n=8))
|
||||
LAB <- percent(abs(RANGE),accuracy=0.1 )
|
||||
POP_PYRAMID <- ggplot(PY_DATA,aes(y=factor(Age),x=`Percent Population`,fill=County))+geom_col() +scale_x_continuous(breaks = RANGE,labels = LAB)+facet_grid(~Sex)
|
||||
POP_PYRAMID
|
||||
|
||||
@ -1,58 +0,0 @@
|
||||
#These functions are used to take census data from Kemmerer in 2020, and find the relative ratio of the total population in Lincoln. This allows for a heuristic of the current year age-sex demographic amounts in 2025 of Kemmerer to start the Monte Carlo with a list age people in each age group
|
||||
|
||||
#A function to pull an estimate of the ratio between the Kemmerer are age-sex population cohort, and the Lincoln total. This allows the age-sex by year demographic data for Lincoln to be converted to Kemmerer numbers. Used as in input to TRANSPOSE_AGE_DEMOGRAPHIC_DATA_DOWN (see below)
|
||||
GET_VALUE <- function(Age,SEX,DATA=AGE_DATA,County='Lincoln'){
|
||||
DATA <- DATA %>% filter(Region==County,Sex==SEX)
|
||||
if(any(Age==DATA$Med_Age)){
|
||||
#If any exact matches in between the min and max ages pull the exact record
|
||||
RES <- DATA[which(Age==DATA$Med_Age & DATA$Sex==SEX),] %>% pull(Per_Pop)
|
||||
}else if(Age>=85){
|
||||
RES <- DATA[which(DATA$Min_Age==85 & DATA$Max_Age==Inf),] %>% pull(Per_Pop)
|
||||
}else if(Age>=85){
|
||||
#If in the upper bound of age pull the single record
|
||||
RES <- DATA[which(DATA$Min_Age==85 & DATA$Max_Age==Inf),] %>% pull(Per_Pop)
|
||||
}else if(Age==0){
|
||||
#If in the lower bound of age pull the single record
|
||||
RES <- DATA[which(DATA$Min_Age==0),] %>% pull(Per_Pop)
|
||||
}else{
|
||||
#If no exact record is found perform a linear interpolation between the two age brackets bounding the point
|
||||
TEMP <- DATA %>% arrange(Min_Age) %>% select(Med_Age,Min_Age,Max_Age,Per_Pop) %>% filter(Per_Pop!=0)
|
||||
LOWER <- TEMP[max(max(which(TEMP$Med_Age<Age)),1),] #Find the lowest value in the list but if no match is found pull the first record
|
||||
UPPER <- TEMP[min(which(TEMP$Med_Age>Age)),]
|
||||
if(LOWER$Med_Age!=UPPER$Med_Age){
|
||||
#If the LOWER and UPPER are the same than the age must be less the first entry, if that is not true perform a linear interpolation between the two bounding entries
|
||||
C <- LOWER$Med_Age
|
||||
ST <- LOWER$Per_Pop
|
||||
DELTA <- UPPER$Per_Pop-LOWER$Per_Pop
|
||||
GAP <- UPPER$Med_Age-LOWER$Med_Age
|
||||
RES <- (ST+(Age-C)*DELTA/GAP)
|
||||
} else{RES <- UPPER %>% pull(Per_Pop)}
|
||||
}
|
||||
return(RES)
|
||||
}
|
||||
#A function which returns a tibble of all single year age-sex combinations for conversions of ratio in Lincoln to Kemmerer
|
||||
INTERPOLATE_COUNTY_AGE_DEMOGRAPHIC_DATA_TO_CITY_LEVEL <- function(DATA,COUNTY='Lincoln',AGE_RANGE=0:85,YEARS_FORWARD=0){
|
||||
#Create a vector of all male ages, then all female ages
|
||||
VALUES <- c(unlist((sapply(AGE_RANGE,function(x){GET_VALUE(x,'Male',DATA,COUNTY)}))),unlist((sapply(AGE_RANGE,function(x){GET_VALUE(x,'Female',DATA,COUNTY)}))))
|
||||
#Turn into an easy to read tibble to merge later with the actual demographic data
|
||||
RES <- cbind(rep(AGE_RANGE,2),c(rep("Male",length(VALUES)/2),rep("Female",length(VALUES)/2)),VALUES) %>% as_tibble
|
||||
colnames(RES) <- c("Age","Sex","Conversion_Ratio")
|
||||
RES <- RES %>% mutate(Age=as.numeric(Age),Conversion_Ratio=as.numeric(Conversion_Ratio))
|
||||
if(YEARS_FORWARD>0){
|
||||
MALE <- RES %>% filter(Sex=='Male')
|
||||
FEMALE <- RES %>% filter(Sex=='Female')
|
||||
STUB_AGE_MALE <- MALE[rep(1,YEARS_FORWARD),]
|
||||
STUB_AGE_MALE$Age <- 0:(YEARS_FORWARD-1)
|
||||
|
||||
STUB_AGE_FEMALE <- FEMALE[rep(1,YEARS_FORWARD),]
|
||||
STUB_AGE_FEMALE$Age <- 0:(YEARS_FORWARD-1)
|
||||
|
||||
MALE[,"Age"] <- MALE[,"Age"]+YEARS_FORWARD
|
||||
FEMALE[,"Age"] <- FEMALE[,"Age"]+YEARS_FORWARD
|
||||
MALE <- rbind(STUB_AGE_MALE,MALE)[1:nrow(MALE),]
|
||||
FEMALE <- rbind(STUB_AGE_FEMALE,FEMALE)[1:nrow(FEMALE),]
|
||||
RES <- rbind(MALE,FEMALE)
|
||||
}
|
||||
return(RES)
|
||||
}
|
||||
|
||||
40
Scripts/Load_ACS_Census_Data.r
Normal file
40
Scripts/Load_ACS_Census_Data.r
Normal file
@ -0,0 +1,40 @@
|
||||
library(tidycensus)
|
||||
library(zipcodeR)
|
||||
#Pull the relevant median age variables the value moe (margine of error) can be converted to standard error, following the link below
|
||||
#https://www.census.gov/content/dam/Census/library/publications/2018/acs/acs_general_handbook_2018_ch08.pdf
|
||||
ACS_YEAR <- 2023 #most recent as of Nov 4 2025
|
||||
source("Scripts/Downshift_Population_Functions.r")
|
||||
#Packages to instal on computer if zipcodeR won't install
|
||||
#Sudo apt install libssl-dev libudunits2-dev libabsl-dev libcurl4-openssl-dev libgdal-dev cmake libfontconfig1-dev libharfbuzz-dev libfribidi-dev
|
||||
#install.packages("zipcodeR")
|
||||
|
||||
#Add API key if missing
|
||||
#KEY <- '30e13ab22563318ff59286e433099f4174d4edd4'
|
||||
#census_api_key(KEY, install = TRUE)
|
||||
PROJ_TRACTS <- get_tracts(search_city('Kemmerer','WY')$zipcode) %>% full_join(get_tracts(search_city('Diamondville','WY')$zipcode))
|
||||
PROJ_TRACTS <- PROJ_TRACTS %>% select(GEOID) %>% mutate('IN_KEM'=1) %>% mutate(GEOID=as.character(GEOID))
|
||||
|
||||
MED_AGE_VAR <- cbind(c('B01002_001E','B01002_002E','B01002_003E'),c('Median_Age','Median_Age_Male','Median_Age_Female')) %>% as_tibble %>% rename(variable=V1,Data_Type=V2)
|
||||
|
||||
###Load data manually created which links vairable names to sex-age census data
|
||||
CODES <- read_csv("Data/API_CENSUS_CODES.csv",skip=1) %>% mutate(Med_Age=(Min_Age+Max_Age)/2) %>% rename(variable=Code)
|
||||
#Testing age Comparison between the two
|
||||
###Extract census data for all tracts in Lincoln county, clean up the data, and indicate if the tract is in Kemmerer/Diamondvile or not.
|
||||
AGE_DATA <- get_acs(geography="tract",year=ACS_YEAR,variables=CODES$variable,state='WY',county='lincoln') %>% mutate(se=moe/1.64) %>% left_join(CODES %>% mutate(variable=gsub('E','',variable))) %>% select(-NAME,-moe) %>% left_join(PROJ_TRACTS) %>% mutate(IN_KEM=ifelse(is.na(IN_KEM),0,1)) %>% rename(Population=estimate) %>% select(-variable,-GEOID) %>% select(Sex,Min_Age,Max_Age,Med_Age,IN_KEM,Population,se) %>% filter(!(Min_Age==0& Max_Age==Inf))
|
||||
AGE_DATA <- AGE_DATA %>% group_by(Sex,Min_Age,Max_Age,Med_Age,IN_KEM)%>% summarize(Population=sum(Population)) %>% ungroup
|
||||
#Add Descriptive age category to a clean graph
|
||||
AGE_DATA$Ages <- paste(AGE_DATA$Min_Age,"to",AGE_DATA$Max_Age)
|
||||
AGE_DATA[AGE_DATA$Max_Age==Inf,"Ages"] <- "85+"
|
||||
AGE_DATA[AGE_DATA$Med_Age==18,"Ages"] <- "18"
|
||||
AGE_DATA[AGE_DATA$Med_Age==20,"Ages"] <- "20"
|
||||
AGE_DATA[AGE_DATA$Med_Age==21,"Ages"] <- "21"
|
||||
AGE_DATA[AGE_DATA$Min_Age==0,"Ages"] <- "Under 5"
|
||||
#Turn the ages into factors to keep the correct order in graphs
|
||||
#Add the percent of total relative population in the region
|
||||
ORD <- AGE_DATA %>% select(Min_Age,Ages) %>% unique %>% arrange(Min_Age) %>% pull(Ages) %>% unique
|
||||
AGE_DATA$Ages <- factor(AGE_DATA$Ages,levels=ORD)
|
||||
AGE_DATA$Year <- ACS_YEAR
|
||||
AGE_DATA <-AGE_DATA %>% select(Year,Sex,Ages,IN_KEM,everything())
|
||||
if(!exists('ACS_FILE_LOC')){ACS_FILE_LOC <- "Data/Cleaned_Data/ACS_Census_Demographic_Data.Rds"}
|
||||
saveRDS(AGE_DATA,ACS_FILE_LOC)
|
||||
|
||||
96
Zip_Code.r
96
Zip_Code.r
@ -1,96 +0,0 @@
|
||||
library(tidyverse)
|
||||
library(tidycensus)
|
||||
library(zipcodeR)
|
||||
library(scales) #For a pretty population Pyramid
|
||||
source("Scripts/Downshift_Population_Functions.r")
|
||||
#Packages to instal on computer if zipcodeR won't install
|
||||
#Sudo apt install libssl-dev libudunits2-dev libabsl-dev libcurl4-openssl-dev libgdal-dev cmake libfontconfig1-dev libharfbuzz-dev libfribidi-dev
|
||||
#install.packages("zipcodeR")
|
||||
|
||||
#Add API key if missing
|
||||
#KEY <- '30e13ab22563318ff59286e433099f4174d4edd4'
|
||||
#census_api_key(KEY, install = TRUE)
|
||||
PROJ_TRACTS <- get_tracts(search_city('Kemmerer','WY')$zipcode) %>% full_join(get_tracts(search_city('Diamondville','WY')$zipcode))
|
||||
PROJ_TRACTS <- PROJ_TRACTS %>% select(GEOID) %>% mutate('IN_KEM'=1) %>% mutate(GEOID=as.character(GEOID))
|
||||
|
||||
MED_AGE_VAR <- cbind(c('B01002_001E','B01002_002E','B01002_003E'),c('Median_Age','Median_Age_Male','Median_Age_Female')) %>% as_tibble %>% rename(variable=V1,Data_Type=V2)
|
||||
#Pull the relevant median age variables the value moe (margine of error) can be converted to standard error, following the link below
|
||||
#https://www.census.gov/content/dam/Census/library/publications/2018/acs/acs_general_handbook_2018_ch08.pdf
|
||||
|
||||
MED_AGE_VALUES <- get_acs(geography="tract",variables=MED_AGE_VAR$variable,state='WY',county='lincoln') %>% mutate(se=moe/1.64) %>% left_join(MED_AGE_VAR %>% mutate(variable=gsub('E','',variable))) %>% select(-NAME,-moe) %>% left_join(PROJ_TRACTS) %>% mutate(IN_KEM=ifelse(is.na(IN_KEM),0,1))
|
||||
AGE_DIFF <- MED_AGE_VALUES %>% group_by(IN_KEM,Data_Type) %>% summarize(Age=mean(estimate)) %>% pivot_wider(names_from=IN_KEM,values_from=Age,names_prefix="In_Kemmerer_")
|
||||
AGE_DIFF
|
||||
|
||||
###Load data manually created which links vairable names to sex-age census data
|
||||
CODES <- read_csv("Data/API_CENSUS_CODES.csv",skip=1) %>% mutate(Med_Age=(Min_Age+Max_Age)/2) %>% rename(variable=Code)
|
||||
|
||||
|
||||
|
||||
#Testing age Comparison between the two
|
||||
###Extract census data for all tracts in Lincoln county, clean up the data, and indicate if the tract is in Kemmerer/Diamondvile or not.
|
||||
AGE_DATA <- get_acs(geography="tract",variables=CODES$variable,state='WY',county='lincoln') %>% mutate(se=moe/1.64) %>% left_join(CODES %>% mutate(variable=gsub('E','',variable))) %>% select(-NAME,-moe) %>% left_join(PROJ_TRACTS) %>% mutate(IN_KEM=ifelse(is.na(IN_KEM),0,1)) %>% rename(Population=estimate) %>% select(-variable,-GEOID) %>% select(Sex,Min_Age,Max_Age,Med_Age,IN_KEM,Population,se) %>% filter(!(Min_Age==0& Max_Age==Inf))
|
||||
|
||||
AGE_DATA <- AGE_DATA %>% group_by(Sex,Min_Age,Max_Age,Med_Age,IN_KEM)%>% summarize(Population=sum(Population)) %>% ungroup
|
||||
#Add Descriptive age category to a clean graph
|
||||
AGE_DATA$Ages <- paste(AGE_DATA$Min_Age,"to",AGE_DATA$Max_Age)
|
||||
AGE_DATA[AGE_DATA$Max_Age==Inf,"Ages"] <- "85+"
|
||||
AGE_DATA[AGE_DATA$Med_Age==18,"Ages"] <- "18"
|
||||
AGE_DATA[AGE_DATA$Med_Age==20,"Ages"] <- "20"
|
||||
AGE_DATA[AGE_DATA$Med_Age==21,"Ages"] <- "21"
|
||||
AGE_DATA[AGE_DATA$Min_Age==0,"Ages"] <- "Under 5"
|
||||
#Turn the ages into factors to keep the correct order in graphs
|
||||
ORD <- AGE_DATA %>% select(Min_Age,Ages) %>% unique %>% arrange(Min_Age) %>% pull(Ages) %>% unique
|
||||
AGE_DATA$Ages <- factor(AGE_DATA$Ages,levels=ORD)
|
||||
#Add the percent of total relative population in the region
|
||||
AGE_DATA <- AGE_DATA %>% mutate(Per_Pop=Population/sum(Population)) %>% group_by(IN_KEM) %>% mutate(Per_Pop_Region=Population/sum(Population)) %>% ungroup
|
||||
|
||||
#Add a region name for clearer graphs
|
||||
AGE_DATA <- AGE_DATA %>% mutate(Region=ifelse(IN_KEM==1,'Kemmerer','Lincoln'))
|
||||
#AGE_DATA <- AGE_DATA %>% group_by(IN_KEM) %>% mutate(MORE_KEMMER=ifelse(sum(IN_KEM*Per_Pop_Region)>sum((1-IN_KEM)*Per_Pop_Region),1,0)) %>% ungroup
|
||||
|
||||
#PLOT <- ggplot(AGE_DATA, aes(x =Ages, y = Per_Pop_Region)) + geom_line() + geom_point(aes(color = Region ), size = 5) + scale_color_brewer(palette = "Set1", direction = 1) + theme(legend.position = "bottom")+facet_wrap(~Sex,ncol=1)
|
||||
#PLOT
|
||||
LIN_TO_KEMMER_CONVERSION_RATIOS <- INTERPOLATE_COUNTY_AGE_DEMOGRAPHIC_DATA_TO_CITY_LEVEL(AGE_DATA,YEARS_FORWARD=1) %>% pivot_wider(names_from=Sex,values_from=Conversion_Ratio,names_prefix="Convert_")
|
||||
|
||||
KEM_DEMO_DATA <- readRDS("Data/Cleaned_Data/Lincoln_Demographic_Data.Rds") %>% filter(Year==max(Year)) %>% left_join(LIN_TO_KEMMER_CONVERSION_RATIOS ) %>% mutate(Num_Male=Num_Male*Convert_Male,Num_Female=Num_Female*Convert_Female) %>% mutate(County='Kemmerer') %>% select(-Convert_Male,-Convert_Female)
|
||||
NUM_KEM <- sum(KEM_DEMO_DATA[,4:5] )
|
||||
CURRENT_KEM_POP <- readRDS("Data/Cleaned_Data/Wyoming_City_Population.Rds") %>% filter(City=='Kemmerer' | City=='Diamondville') %>% group_by(City) %>% filter(Year==max(Year)) %>% ungroup %>% group_by(Year) %>% summarize(Population=sum(as.numeric(Population)),County='Kemmerer')
|
||||
|
||||
SCALE_KEM_FACTOR <- CURRENT_KEM_POP$Population/NUM_KEM
|
||||
KEM_NEW_DEMO_DATA <- KEM_DEMO_DATA
|
||||
KEM_NEW_DEMO_DATA[,c("Num_Male","Num_Female")] <- round(SCALE_KEM_FACTOR*KEM_NEW_DEMO_DATA[,c("Num_Male","Num_Female")])
|
||||
##Create a simplfied summary of the Kemmerer/Diamondville population estimates, to start the Monte Carlo
|
||||
ST_BIRTH_KEM <- sum((KEM_NEW_DEMO_DATA %>% filter(Age==0))[4:5])
|
||||
ST_BIRTH_TWO_PREV_KEM <- sum((KEM_NEW_DEMO_DATA %>% filter(Age==1))[4:5])
|
||||
|
||||
KEM_DEMO_DATA %>% filter(Age==0)
|
||||
|
||||
ST_POP <- sum((KEM_NEW_DEMO_DATA)[4:5])
|
||||
INTIATE_KEMMER <- KEM_NEW_DEMO_DATA%>% mutate(Male_Window=Age>=18 & Age<=30,Female_Window=Age>=18 & Age<=28) %>% group_by(County,Year) %>% summarize(Female_Birth_Group=sum(Num_Female*Female_Window),Male_Birth_Group=sum(Num_Male*Male_Window),Min_Birth_Group=ifelse(Female_Birth_Group<Male_Birth_Group,Female_Birth_Group,Male_Birth_Group),PREV_BIRTH=ST_BIRTH_KEM,PREV_TWO_BIRTH=ST_BIRTH_TWO_PREV_KEM,Population=ST_POP) %>% mutate(Births=NA,Deaths=NA,Migration=NA) %>% select(Year,County,Population,Births,Deaths,Migration,Min_Birth_Group,PREV_BIRTH,PREV_TWO_BIRTH) %>% ungroup
|
||||
INTIATE_KEMMER$County <- (readRDS("Data/Regression_Results/Birth_Regression_Data_Set.Rds") %>% filter(County=='Lincoln',Year==2020))$County #Making the factor the same for the regression
|
||||
saveRDS(INTIATE_KEMMER ,"Data/Cleaned_Data/Kemmerer_Summary_Start_Data.Rds")
|
||||
write_csv(INTIATE_KEMMER ,"Data/Cleaned_Data/Kemmerer_Summary_Start_Data.csv")
|
||||
|
||||
saveRDS(KEM_NEW_DEMO_DATA,"Data/Cleaned_Data/Kemmerer_Demographic_Data.Rds")
|
||||
write_csv(KEM_NEW_DEMO_DATA,"Data/Cleaned_Data/Kemmerer_Demographic_Data.csv")
|
||||
#sum(KEM_NEW_DEMO_DATA[,4:5])-CURRENT_KEM_POP$Population
|
||||
###Make a population Pyramid Graph
|
||||
PY_KEM_DATA <- KEM_NEW_DEMO_DATA %>% pivot_longer(cols=c(Num_Male,Num_Female),names_to="Sex",values_to="Population") %>% mutate(Sex=ifelse(Sex=='Num_Male','Male','Female'),Population=ifelse(Sex=='Male',-Population,Population))
|
||||
LN_CURRENT_DEMO <- readRDS("Data/Cleaned_Data/Wyoming_County_Demographic_Data.Rds") %>% filter(County=='Lincoln',Year==max(Year))
|
||||
|
||||
PY_LN_DATA <- LN_CURRENT_DEMO %>% pivot_longer(cols=c(Num_Male,Num_Female),names_to="Sex",values_to="Population") %>% mutate(Sex=ifelse(Sex=='Num_Male','Male','Female'),Population=ifelse(Sex=='Male',-Population,Population))
|
||||
PY_LN_DATA <- PY_LN_DATA %>% left_join(PY_KEM_DATA %>% rename(POP_SHIFT=Population) %>% select(-County)) %>% mutate(Population=Population-POP_SHIFT) %>% select(-POP_SHIFT)
|
||||
PY_LN_DATA$Population <- PY_LN_DATA$Population/sum(abs(PY_LN_DATA$Population ))
|
||||
|
||||
|
||||
PY_KEM_PER <- PY_KEM_DATA
|
||||
PY_KEM_PER$Population <- PY_KEM_PER$Population/sum(abs(PY_KEM_PER$Population ))
|
||||
GRAPH_DATA <- full_join(PY_KEM_PER ,PY_LN_DATA) %>% mutate(Population=ifelse(County=='Kemmerer',abs(Population),-abs(Population)))
|
||||
|
||||
|
||||
RANGE <- c(-0.015,pretty(range(GRAPH_DATA$Population),n=8))
|
||||
LAB <- percent(abs(RANGE),accuracy=0.1 )
|
||||
POP_PYRAMID <- ggplot(GRAPH_DATA,aes(y=factor(Age),x=Population,fill=County))+geom_col() +scale_x_continuous(breaks = RANGE,labels = LAB)+facet_grid(~Sex)
|
||||
POP_PYRAMID
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user