diff --git a/R_Analysis.r b/R_Analysis.r new file mode 100644 index 0000000..85bba50 --- /dev/null +++ b/R_Analysis.r @@ -0,0 +1,49 @@ +library(tidyverse) +library(fedmatch) +library(eia) + eia_set_key("MpAjEon2zWULOTOnfJf2olzv3KB8zOgYnILsGVQf") +library("stringdist") +library(fixest) +ASME <- readRDS("Data/PROCESSED_DATA/RDS/ASME.Rds") +#nrow(ASME )/14865 +ASME$CO_NAME <- clean_strings(ASME$CO_NAME) +ASME <- ASME %>% group_by(CO_NAME) %>% mutate(CO=CO_NAME) +MAT <- stringdistmatrix(ASME$CO_NAME,ASME$CO_NAME,method="jw",nthread=23) +MATCH <- which(MAT<0.06 & MAT>0,arr.ind = TRUE) +MATCH_RES <- as_tibble(matrix(NA,nrow=nrow(MATCH),ncol=3)) +for(i in 1:nrow(MATCH)){ + MATCH_RES[i,] <- t(ASME$CO_NAME[MATCH[i,]],) +} +MATCH_RES <- MATCH_RES %>% unique +for(i in 1:nrow(MATCH_RES)){ +ASME$CO_NAME <- ifelse(ASME$CO_NAME==as.character(MATCH_RES[i,1]),as.character(MATCH_RES[i,2]),ASME$CO_NAME) +} +ASME <- ASME %>% group_by(PLANT_ADDRESS,COUNTRY,STATE,CITY) %>% mutate(CO_NAME=min(CO_NAME)) %>% ungroup + +REGION_MATCH <- read_csv("https://raw.githubusercontent.com/lukes/ISO-3166-Countries-with-Regional-Codes/refs/heads/master/all/all.csv") %>% select(COUNTRY=name,COUNTRY_ID=`alpha-3`) + REGION_MATCH$COUNTRY <- ifelse(REGION_MATCH$COUNTRY=='United States of America',"United States",REGION_MATCH$COUNTRY) + REGION_MATCH$COUNTRY <- ifelse(REGION_MATCH$COUNTRY=='Korea, Republic of',"Republic of Korea",REGION_MATCH$COUNTRY) + ASME$COUNTRY <- ifelse(ASME$COUNTRY=='People\'s Republic of China',"China",ASME$COUNTRY) + REGION_MATCH$COUNTRY <- ifelse(REGION_MATCH$COUNTRY=='United Kingdom of Great Britain and Northern Ireland','United Kingdom',REGION_MATCH$COUNTRY) + ASME$COUNTRY <- ifelse(ASME$COUNTRY=='Wales','United Kingdom',ASME$COUNTRY) + ASME$COUNTRY <- ifelse(ASME$COUNTRY=='Northern Ireland','United Kingdom',ASME$COUNTRY) + ASME$COUNTRY <- ifelse(ASME$COUNTRY=='Scotland','United Kingdom',ASME$COUNTRY) + REGION_MATCH$COUNTRY <- ifelse(REGION_MATCH$COUNTRY=='Netherlands, Kingdom of the','The Netherlands',REGION_MATCH$COUNTRY) + REGION_MATCH$COUNTRY <- ifelse(REGION_MATCH$COUNTRY=='Türkiye','Turkey',REGION_MATCH$COUNTRY) + REGION_MATCH$COUNTRY <- ifelse(REGION_MATCH$COUNTRY=='Taiwan, Province of China','Taiwan',REGION_MATCH$COUNTRY) + REGION_MATCH$COUNTRY <- ifelse(REGION_MATCH$COUNTRY=='Venezuela, Bolivarian Republic of','Venezuela',REGION_MATCH$COUNTRY) + REGION_MATCH$COUNTRY <- ifelse(REGION_MATCH$COUNTRY=='Bolivia, Plurinational State of','Bolivia',REGION_MATCH$COUNTRY) + ASME$COUNTRY <- ifelse(ASME$COUNTRY=='Czech Republic',"Czechia",ASME$COUNTRY) + ASME$COUNTRY <- ifelse(ASME$COUNTRY=='Trinidad And Tobago','Trinidad and Tobago',ASME$COUNTRY) +ASME <- ASME %>% left_join(REGION_MATCH) +REGION_MATCH +COUNTRY_LIST <- ASME$COUNTRY_ID %>% unique +ASME +NUC_DATA <- eia_data(dir="international",data="value",freq="annual",facets=list(productId="27",unit="TJ",countryRegionId=COUNTRY_LIST[1])) %>% select(COUNTRY_ID=countryRegionId,AUTH_YEAR=period,NUC_GEN=value) %>% mutate(AUTH_YEAR=as.numeric(AUTH_YEAR)) %>% arrange(AUTH_YEAR) %>% mutate(LAG_GEN=lag(NUC_GEN),LAG_TWO_GEN=lag(NUC_GEN,2),LAG_THREE_GEN=lag(NUC_GEN,3)) +for(i in COUNTRY_LIST[-1]){try(NUC_DATA <- rbind(NUC_DATA,eia_data(dir="international",data="value",freq="annual",facets=list(productId="27",unit="TJ",countryRegionId=i)) %>% select(COUNTRY_ID=countryRegionId,AUTH_YEAR=period,NUC_GEN=value) %>% mutate(AUTH_YEAR=as.numeric(AUTH_YEAR)) %>% arrange(AUTH_YEAR) %>% mutate(LAG_GEN=lag(NUC_GEN),LAG_TWO_GEN=lag(NUC_GEN,2),LAG_THREE_GEN=lag(NUC_GEN,3))))} +ASME <- ASME %>% left_join(NUC_DATA) +ASME %>% left_join(NUC_DATA) %>% select(MAIN_CERT,COUNTRY,AUTH_YEAR,NUC_GEN) + +COUNTRY_DATA <- ASME %>% group_by(COUNTRY,AUTH_YEAR) %>% summarize(NUCLEAR=sum(NUCLEAR),NUC_GEN=mean(NUC_GEN,na.rm=TRUE),OTHER=n()-NUCLEAR) +fepois(NUCLEAR~log(OTHER) +NUC_GEN +COUNTRY|AUTH_YEAR,cluster=~COUNTRY,COUNTRY_DATA) + diff --git a/R_Clean.r b/R_Clean.r index d073e58..354eacc 100644 --- a/R_Clean.r +++ b/R_Clean.r @@ -1,6 +1,4 @@ library(tidyverse) -library(zipcodeR) -require(usmap) #SAVE FOLDERS if(!file.exists("./Data/PROCESSED_DATA/RDS")){dir.create(path="./Data/PROCESSED_DATA/RDS",recursive=TRUE)} @@ -59,192 +57,36 @@ RES <- RES %>% filter(!is.na(CO_ADDR)) TBL <- TBL %>% filter(!(MAIN_CERT %in% (UPDATE %>% pull(MAIN_CERT))) ) TBL <- rbind(TBL,UPDATE) RES_NAMES <- RES %>% select(MAIN_CERT,CO_NAME) - DROP <- TBL %>% unique %>% group_by(MAIN_CERT) %>% filter(n()>1) %>% arrange(MAIN_CERT) %>% left_join(RES_NAMES) %>% select(MAIN_CERT,CO_NAME,CO_NAME2) %>% mutate(PREFER=ifelse(toupper(CO_NAME)==toupper(CO_NAME2),1,0)) %>% filter(PREFER==0) %>% select(MAIN_CERT,CO_NAME2) %>% ungroup TBL <- TBL %>% anti_join(DROP) %>% unique -##########Change later -TBL <- TBL %>% unique %>% group_by(MAIN_CERT) %>% filter(n()==1) -#### -RES %>% full_join(TBL) %>% filter(toupper(CO_NAME)!=toupper(CO_NAME2))%>% select(MAIN_CERT, CO_NAME,CO_NAME2) %>% filter(!(MAIN_CERT %in% c(58279,'QSC-580','QSC-852','N-4584','43305',38008,39360,61004,62372,62850,60884,60883,16963,42365,59238,60377,60677,60885,61003,61084,61248,61293,61992,62255,62373,62469,62593,62727,54539,'N-3796',40267,34632,46096,47226,51803,52376,47225,48967,39361,41570,47224,41583,41052,52912,52960,62154))) %>% select(MAIN_CERT,CO_NAME,CO_NAME2) - - - - MISSING_IN_TBL <- RES %>% full_join(TBL) %>% filter(is.na(CO_NAME2)) %>% select(CO_NAME) %>% unique -write_csv(MISSING_IN_TBL,"Missing_From_Table.csv") EXISTING_CERTS <- TBL %>% pull(MAIN_CERT) # UPDATE_TBL <- read_csv("Data/Raw_Data/Table_Data/Updated_Missing_Data.csv") %>% rename(CO_NAME2=`Company Name`,DIV_NAME2=`Division Name`,ABB2=Abbrev.,PLANT_ADDRESS=`Plant Address`,STATE2=`State/Province`,CERT_TYPE2=Type,CERT_STATUS2=Status,COUNTRY=`Country/Region`,MAIN_CERT=Certificate) %>% unique %>% filter(!(MAIN_CERT %in% EXISTING_CERTS)) TBL <- rbind(TBL,UPDATE_TBL) -MISSING_IN_TBL <- RES %>% full_join(TBL) %>% filter(is.na(CO_NAME2)) %>% select(CO_NAME) %>% unique -write_csv(MISSING_IN_TBL,"Missing_From_Table.csv") # EXISTING_CERTS <- TBL %>% pull(MAIN_CERT) -UPDATE_TBL <- read_csv("Data/Raw_Data/Table_Data/Updated_Missing_Data_2.csv") %>% rename(CO_NAME2=`Company Name`,DIV_NAME2=`Division Name`,ABB2=Abbrev.,PLANT_ADDRESS=`Plant Address`,STATE2=`State/Province`,CERT_TYPE2=Type,CERT_STATUS2=Status,COUNTRY=`Country/Region`,MAIN_CERT=Certificate) %>% unique %>% filter(!(MAIN_CERT %in% EXISTING_CERTS)) - -MISSING_IN_TBL <- RES %>% full_join(TBL) %>% filter(is.na(CO_NAME2)) %>% select(CO_NAME) %>% unique -write_csv(MISSING_IN_TBL,"Missing_From_Table.csv") +UPDATE_TBL <- read_csv("Data/Raw_Data/Table_Data/Updated_Missing_Data_2.csv")[,-1] %>% rename(CO_NAME2=`Company Name`,DIV_NAME2=`Division Name`,ABB2=Abbrev.,PLANT_ADDRESS=`Plant Address`,STATE2=`State/Province`,CERT_TYPE2=Type,CERT_STATUS2=Status,COUNTRY=`Country/Region`,MAIN_CERT=Certificate) %>% unique %>% filter(!(MAIN_CERT %in% EXISTING_CERTS)) +TBL <- rbind(TBL,UPDATE_TBL) # EXISTING_CERTS <- TBL %>% pull(MAIN_CERT) -UPDATE_TBL <- read_csv("Data/Raw_Data/Table_Data/Updated_Missing_Data_3.csv") %>% rename(CO_NAME2=`Company Name`,DIV_NAME2=`Division Name`,ABB2=Abbrev.,PLANT_ADDRESS=`Plant Address`,STATE2=`State/Province`,CERT_TYPE2=Type,CERT_STATUS2=Status,COUNTRY=`Country/Region`,MAIN_CERT=Certificate) %>% unique %>% filter(!(MAIN_CERT %in% EXISTING_CERTS)) +UPDATE_TBL <- read_csv("Data/Raw_Data/Table_Data/Updated_Missing_Data3.csv")[,-1] %>% rename(CO_NAME2=`Company Name`,DIV_NAME2=`Division Name`,ABB2=Abbrev.,PLANT_ADDRESS=`Plant Address`,STATE2=`State/Province`,CERT_TYPE2=Type,CERT_STATUS2=Status,COUNTRY=`Country/Region`,MAIN_CERT=Certificate) %>% unique %>% filter(!(MAIN_CERT %in% EXISTING_CERTS)) TBL <- rbind(TBL,UPDATE_TBL) -MISSING_IN_TBL <- RES %>% full_join(TBL) %>% filter(is.na(CO_NAME2)) %>% select(CO_NAME) %>% unique -RES %>% full_join(TBL) %>% filter(is.na(CO_NAME2)) %>% select(CO_NAME) %>% unique %>% print(100) -write_csv(MISSING_IN_TBL,"Missing_From_Table.csv") -# -EXISTING_CERTS <- TBL %>% pull(MAIN_CERT) -UPDATE_TBL <- read_csv("Data/Raw_Data/Table_Data/Updated_Missing_Data_4.csv") %>% rename(CO_NAME2=`Company Name`,DIV_NAME2=`Division Name`,ABB2=Abbrev.,PLANT_ADDRESS=`Plant Address`,STATE2=`State/Province`,CERT_TYPE2=Type,CERT_STATUS2=Status,COUNTRY=`Country/Region`,MAIN_CERT=Certificate) %>% unique %>% filter(!(MAIN_CERT %in% EXISTING_CERTS)) -TBL <- rbind(TBL,UPDATE_TBL) MISSING_IN_TBL <- RES %>% full_join(TBL) %>% filter(is.na(CO_NAME2)) %>% select(CO_NAME) %>% unique MISSING_IN_PDFS <- RES %>% full_join(TBL) %>% filter(is.na(CO_NAME)) %>% select(CO_NAME2) %>% unique - write_csv(MISSING_IN_TBL,"Missing_From_Table.csv") write_csv(MISSING_IN_PDFS,"Missing_From_PDS.csv") -RES %>% full_join(TBL) %>% filter(is.na(CO_NAME)) %>% select(CO_NAME2) %>% unique -RES <- RES %>% inner_join(TBL) -nrow(RES)/14854 - -RES <- RES %>% select(MAIN_CERT,CERT_NUM,CERT_TYPE,COUNTRY,STATE=STATE,CITY=City,CERT_TYPE,CERT_STATUS,ORIG_AUTH_DATE,AUTH_DATE,EXP_DATE,CO_NAME=CO_NAME2,PLANT_ADDRESS,SCOPE) - - +RES <- RES %>% select(MAIN_CERT,CERT_NUM,CERT_TYPE,COUNTRY,STATE=STATE2,CITY=City,CERT_TYPE,CERT_STATUS,ORIG_AUTH_DATE,AUTH_DATE,EXP_DATE,CO_NAME=CO_NAME2,PLANT_ADDRESS,SCOPE) RES$CERT_TYPE <- ifelse(RES$CERT_TYPE=='NAC (Quality Assurance Program)',"NUA",RES$CERT_TYPE) RES$CERT_TYPE <- ifelse(RES$CERT_TYPE=='NPT (Quality Assurance Program)',"NPT",RES$CERT_TYPE) RES <- RES %>% group_by(MAIN_CERT) %>% mutate(NUM_SUB_CERT=n()-1) %>% ungroup -RES %>% pull(COUNTRY) %>% unique -RES %>% filter(COUNTRY=='United States') %>% pull(STATE) -for(x in 1:length(state.name)){ - #Reversing the order of state names prefers other states over "Washington" many addresses are on "Washington St" so this maximizes the correct matches. - RES$STATE[grepl(toupper(rev(state.name))[x],toupper(RES$CO_ADDR) )] <- rev(state.abb)[x] - -} -#Function to clean up bad matches, and add zip codes -FIX_STATE <- function(RES){ - RES$STATE <- NA - RES$CO_ZIP_CODE <- NA - RES$COUNTRY <- NA - RES$CO_ZIP_CODE <- NA - for(x in 1:length(state.name)){ - #Reversing the order of state names prefers other states over "Washington" many addresses are on "Washington St" so this maximizes the correct matches. - RES$STATE[grepl(toupper(rev(state.name))[x],toupper(RES$CO_ADDR) )] <- rev(state.abb)[x] - - } - RES$COUNTRY[grepl(toupper("Puerto Rico"),toupper(RES$CO_ADDR) )] <- 'USA_OTHER' - RES$STATE[grepl(toupper("Puerto Rico"),toupper(RES$CO_ADDR) )] <- 'OTHER' - RES$STATE[grepl("ARKANSAS",toupper(RES$CO_ADDR) )] <- 'AR' - RES$STATE[grepl("WEST VIRGINIA",toupper(RES$CO_ADDR) )] <- 'WV' - RES[RES$CO_ADDR=='1106 Kansas Street Memphis Tennessee USA 38106',"STATE"] <- 'TN' - RES[RES$CO_ADDR=='5215 Arkansas Road Catoosa Oklahoma USA 74015',"STATE"] <- 'OK' - RES[RES$CO_ADDR=='1428 W. 9th Street Kansas City Missouri USA 64101',"STATE"] <- 'MO' - RES[RES$CO_ADDR=='1600 Warren Street North Kansas City Missouri USA 64116',"STATE"] <- 'MO' - RES[RES$CO_ADDR=='31 Maryland Avenue Paterson New Jersey USA 07503',"STATE"] <- 'NJ' - RES[RES$CO_ADDR=='327 North Maine Fallon Nevada USA 89406',"STATE"] <- 'NV' - RES[RES$CO_ADDR=='369 West Western Avenue Port Washington Wisconsin USA 53074-0993',"STATE"] <- 'WI' - RES[RES$CO_ADDR=='600 London Rd Delaware Ohio USA 43015',"STATE"] <- 'OH' - RES[RES$CO_ADDR=='801 Georgia Ave. Deer Park Texas USA 77536',"STATE"] <- 'TX' - RES[RES$CO_ADDR=='8116 Highway 166 Arkansas City Kansas USA 67005',"STATE"] <- 'KS' - RES[RES$CO_ADDR=='Calle Siete Sur No. 108 Ciudad Industrial-Nueva Tijuana Tijuana, Baja California MEX 22500',"STATE"] <-NA - RES[RES$CO_ADDR=='Carrera 22 No. 3A-37 La Virginia, Risaralda COL',"STATE"] <- NA - RES[RES$CO_ADDR=='Km 20 Via Cali - Florida Corregimiento La Regina Candelaria Valle del Cauca COL',"STATE"] <-NA - RES[RES$CO_ADDR=='1133 California Way Longview Washington USA 98632',"STATE"] <- 'WA' - RES[RES$CO_ADDR=='13800 Wyandotte St. Kansas City Missouri USA 64145-1518',"STATE"] <- 'MO' - RES[RES$CO_ADDR=='2307 Oregon Street Oshkosh Wisconsin USA 54902',"STATE"] <- 'WI' - RES[RES$CO_ADDR=='2538 W. Kentucky Ave. Pampa Texas USA 79065',"STATE"] <- 'TX' - RES[RES$CO_ADDR=='2611 Southwest Blvd. Kansas City Missouri USA 64108',"STATE"] <- 'MO' - RES[RES$CO_ADDR=='2905 Maryland Avenue North Versailles Pennsylvania USA 15137',"STATE"] <- 'PA' - RES[RES$CO_ADDR=='307 Mississippi Ave Wichita Falls Texas USA 76301',"STATE"] <- 'TX' - RES[RES$CO_ADDR=='35006 Washington Avenue Honey Creek Wisconsin USA 53138',"STATE"] <- 'WI' - RES[RES$CO_ADDR=='3737 Old Iowa Park Road Wichita Falls Texas USA 76306',"STATE"] <- 'TX' - RES[RES$CO_ADDR=='3928 Bacon Switch Rd. Iowa Park Texas USA 76367',"STATE"] <- 'TX' - RES[RES$CO_ADDR=='4001 East 149th Street, Suite B Kansas City Missouri USA 64147',"STATE"] <- 'MO' - RES[RES$CO_ADDR=='455 Michigan Drive Oakville Ontario CAN L6L 0G4',"STATE"] <-NA - RES[RES$CO_ADDR=='5873 FM 369 North Iowa Park Texas USA 76367',"STATE"] <- 'TX' - RES[RES$CO_ADDR=='Calle Tijuana No. 16 Col. Baja California El Salto, Jalisco MEX 45692',"STATE"] <-NA - RES[RES$CO_ADDR=='Champ de Mass 5 Ave Anatole Paris New York FRA 75007',"STATE"] <- NA - RES[RES$CO_ADDR=='Idaho National Laboratory 2525 Fremont Ave Idaho Falls Idaho USA 83145',"STATE"] <- 'ID' - RES[RES$CO_ADDR=='Rua Chaves, 510 Jardim California Barueri, Sao Paulo BRA 06409-000',"STATE"] <- NA - RES[RES$CO_ADDR=='VIA POLENGHI 5 MONTANASO LOMBARDO ITA 26836',"STATE"] <- NA - - - - RES_NON_US <- RES %>% filter(is.na(STATE)) %>% select(-CO_ZIP_CODE) - RES <- RES %>% filter(!is.na(STATE),STATE!="OTHER") - - RES$CO_ZIP_CODE <- gsub("-","",substr(word(RES$CO_ADDR,-1),1,5)) - #One zip failing to match, used USPS lookup of address to change to the proper ZIP - RES[RES$CO_ZIP_CODE=='83145',"CO_ZIP_CODE"] <- '83402' - - RES <- RES%>% left_join(cbind(state.abb,state.name) %>% as_tibble %>% rename(STATE=state.abb,STATE_NAME=state.name) ) - ZIPS <- reverse_zipcode(RES$CO_ZIP_CODE) - ZIPS <- ZIPS %>% rename(STATE=state,CO_ZIP_CODE=zipcode) - RES <- RES %>% left_join(ZIPS) - - RES$FIPS <- NA - for(N in 1:nrow(RES)){ - try(RES$FIPS[N] <- fips(RES$STATE[N],RES$county[N])) - } - #Fix two missing codes. It looks like they use special characters - RES[RES$STATE=='NM' &RES$county=='Doa Ana County',"FIPS"] <- '35013' -# RES[RES$STATE=='LA' &RES$county=='St Mary Parish',"FIPS"] <- '22101' - RES <- RES %>% full_join(RES_NON_US) - return(RES) -} -RES <- FIX_STATE(RES) -RES$COUNTRY <- NA -#A function to identify the if a country abbreviation is in the address. If "WRITE" is TRUE the function updates the "COUNTRY" field to be the matched COUNTRY. -FIX_COUNTRY <- function( COUNTRY_ABBR,WRITE=FALSE,DF=RES){ - #Find the COUNTRY_ABBR somewhere in the middle of the company address - MATCH_AT_END <- grepl(toupper(paste0(COUNTRY_ABBR,"$")),word(DF$CO_ADDR,-1)) - #Find the COUNTRY_ABBR at the end of the company address - MATCH_IN_MIDDLE <- grepl(toupper(paste0(" ",COUNTRY_ABBR," ")),DF$CO_ADDR ) - #Remove records that have already been matched. Keep the remaining matches - MATCH <- MATCH_IN_MIDDLE | MATCH_AT_END - MATCH <- MATCH & is.na(DF$STATE)& is.na(DF$COUNTRY) - if(!WRITE){ - return(DF[MATCH,"CO_ADDR"] %>% unique ) - - } else{ - DF[MATCH,"COUNTRY"] <- COUNTRY_ABBR - return(DF) - } -} - -RES$COUNTRY <- ifelse(!is.na(RES$STATE),"USA",NA) - - -COUNTRY_ABBR <- c("CAN","EGY","KOR","JPN","DEU","CHN","IND","ZAF","ITA","ARG","FRA","ESP","TWN","AUT","SCO","BRA","SWE","TUR","SAU","SGP","PAK","ARE","PHL","BHR","OMN","CHE","ISR","LBN","AUS","MEX","GRC","THA","HUN","BEL","ROU","PJ","QAT","VNM","IRQ","MYS","IRL","POL","SVN","NLD","FIN","IDN","CZE","VEN","COL","CHL","NGA","ECU","GBR","DNK","TTO","UKR","SVK","BGR","KWT","KAZ","EST","GTM","RUS","BGD","UZB","PRT","NZL","TUN","JOR","URY","AZE","PER","NOR","BOL","SUR","HRV","CRI","BRN","LIE","HKG","DOM","PE","PK","SR","WAL","DB","LL") -for(x in COUNTRY_ABBR){ - RES <- FIX_COUNTRY(x,TRUE,DF=RES) -} -#Correct remaining issues - #PE means Peru, which also has PER -RES$COUNTRY <- ifelse(RES$COUNTRY=='PE','PER',RES$COUNTRY) -RES$COUNTRY <- ifelse(RES$COUNTRY=='PK','PAK',RES$COUNTRY) -RES$COUNTRY <- ifelse(RES$COUNTRY=='SR','SUR',RES$COUNTRY) -#Wales -RES$COUNTRY <- ifelse(RES$COUNTRY=='WAL','GBR',RES$COUNTRY) -#All these address in Netherlands -RES$COUNTRY <- ifelse(RES$COUNTRY=='DB','NLD',RES$COUNTRY) -RES$COUNTRY <- ifelse(RES$COUNTRY=='LL','NLD',RES$COUNTRY) -#In Slovakia -RES$COUNTRY <- ifelse(RES$CO_ADDR=='Jurská cesta 7 Levice 93401','SVK',RES$COUNTRY) -#In Northern Ireland -RES$COUNTRY <- ifelse(RES$CO_ADDR=='Lissue Industrial Estate Moira Road Lisburn NOT BT28 2RF','IRL',RES$COUNTRY) -#Remove "null" addresses -RES <- RES %>% filter(CO_ADDR!="null") - -###MANUAL REVIEW OF MISSED ENTRIED -#RES %>% filter(is.na(COUNTRY)) %>% select(MAIN_CERT,CO_ADDR) %>% print(n=100) -#Add Year columns - RES <- RES %>% mutate(ORIG_AUTH_YEAR=year(ORIG_AUTH_DATE),AUTH_YEAR=year(AUTH_DATE),ORIG_AUTH_MONTH=year(ORIG_AUTH_DATE),AUTH_MONTH=year(AUTH_DATE)) #Add Nuclear indicator (all license with a nuclear related certificate) RES <- RES %>% mutate(NUCLEAR=CERT_TYPE %in%c("N","N3","NAC","NPT","NS","NUA","NV","OWN","G","GC","MO")) -RES - #RES <- RES %>% filter(CERT_TYPE!="OWN",CERT_TYPE!="NAC") ##SAVE saveRDS(RES,"./Data/PROCESSED_DATA/RDS/ASME.Rds")