20 lines
1.4 KiB
R
20 lines
1.4 KiB
R
#Clean a single file
|
|
Occupation_Summary <- function(COL_NAME,RANK_SIZE=5,DATA=OCC,SIG_FIGS=0){
|
|
DATA <- DATA[c(1,which(colnames(DATA)==COL_NAME))]
|
|
colnames(DATA)[2] <- "TEMP_COL"
|
|
RES <- DATA %>% select(Occupation,TEMP_COL) %>% mutate(Rank=rank(-TEMP_COL),Occupation=ifelse(Rank>RANK_SIZE,"Other Jobs",Occupation) ) %>% group_by(Occupation) %>% summarize(TEMP_COL=round(sum(TEMP_COL),SIG_FIGS),Rank=as.character(min(Rank))) %>% arrange(as.numeric(Rank))
|
|
RES[nrow(RES),"Rank"] <- paste0(RES[nrow(RES),"Rank"],"+")
|
|
colnames(RES)[2] <- COL_NAME
|
|
return(RES)
|
|
}
|
|
####A function to automaticly process all files in a given folder
|
|
Occupation_Summary_Directory <- function(RES_PATH="./Results",OCCUPATION_DIR='Data/Occupation_IMPACTS_Tab',OCC_NAMES=c("Occupation_Code","Occupation","Employment","FTE","Wage_and_Salary_Income","Supplemental_Income","Employee_Compensation","Hours_Worked")){
|
|
OCCUPATION_FILES <- list.files(OCCUPATION_DIR,pattern="*.csv",full.names=TRUE)
|
|
RES_FILE_PATH <- paste0(RES_PATH,"/",gsub(" ","_",paste0(gsub('\\.csv','', list.files(OCCUPATION_DIR,pattern="*.csv",full.names=FALSE),ignore.case=TRUE),"_Results") ),'.csv')
|
|
for(i in 1:length(OCCUPATION_FILES)){
|
|
OCC <- read_csv(OCCUPATION_FILES [i],col_names=OCC_NAMES,skip=1)%>% mutate_at(5:7,parse_number) %>% select(-Occupation_Code)
|
|
write_csv(Occupation_Summary("Employment",10,OCC),RES_FILE_PATH[i])
|
|
}
|
|
}
|
|
|