ASME_nuclear/R_Analysis.r
2026-01-15 17:08:46 -07:00

50 lines
3.6 KiB
R

library(tidyverse)
library(fedmatch)
library(eia)
eia_set_key("MpAjEon2zWULOTOnfJf2olzv3KB8zOgYnILsGVQf")
library("stringdist")
library(fixest)
ASME <- readRDS("Data/PROCESSED_DATA/RDS/ASME.Rds")
#nrow(ASME )/14865
ASME$CO_NAME <- clean_strings(ASME$CO_NAME)
ASME <- ASME %>% group_by(CO_NAME) %>% mutate(CO=CO_NAME)
MAT <- stringdistmatrix(ASME$CO_NAME,ASME$CO_NAME,method="jw",nthread=23)
MATCH <- which(MAT<0.06 & MAT>0,arr.ind = TRUE)
MATCH_RES <- as_tibble(matrix(NA,nrow=nrow(MATCH),ncol=3))
for(i in 1:nrow(MATCH)){
MATCH_RES[i,] <- t(ASME$CO_NAME[MATCH[i,]],)
}
MATCH_RES <- MATCH_RES %>% unique
for(i in 1:nrow(MATCH_RES)){
ASME$CO_NAME <- ifelse(ASME$CO_NAME==as.character(MATCH_RES[i,1]),as.character(MATCH_RES[i,2]),ASME$CO_NAME)
}
ASME <- ASME %>% group_by(PLANT_ADDRESS,COUNTRY,STATE,CITY) %>% mutate(CO_NAME=min(CO_NAME)) %>% ungroup
REGION_MATCH <- read_csv("https://raw.githubusercontent.com/lukes/ISO-3166-Countries-with-Regional-Codes/refs/heads/master/all/all.csv") %>% select(COUNTRY=name,COUNTRY_ID=`alpha-3`)
REGION_MATCH$COUNTRY <- ifelse(REGION_MATCH$COUNTRY=='United States of America',"United States",REGION_MATCH$COUNTRY)
REGION_MATCH$COUNTRY <- ifelse(REGION_MATCH$COUNTRY=='Korea, Republic of',"Republic of Korea",REGION_MATCH$COUNTRY)
ASME$COUNTRY <- ifelse(ASME$COUNTRY=='People\'s Republic of China',"China",ASME$COUNTRY)
REGION_MATCH$COUNTRY <- ifelse(REGION_MATCH$COUNTRY=='United Kingdom of Great Britain and Northern Ireland','United Kingdom',REGION_MATCH$COUNTRY)
ASME$COUNTRY <- ifelse(ASME$COUNTRY=='Wales','United Kingdom',ASME$COUNTRY)
ASME$COUNTRY <- ifelse(ASME$COUNTRY=='Northern Ireland','United Kingdom',ASME$COUNTRY)
ASME$COUNTRY <- ifelse(ASME$COUNTRY=='Scotland','United Kingdom',ASME$COUNTRY)
REGION_MATCH$COUNTRY <- ifelse(REGION_MATCH$COUNTRY=='Netherlands, Kingdom of the','The Netherlands',REGION_MATCH$COUNTRY)
REGION_MATCH$COUNTRY <- ifelse(REGION_MATCH$COUNTRY=='Türkiye','Turkey',REGION_MATCH$COUNTRY)
REGION_MATCH$COUNTRY <- ifelse(REGION_MATCH$COUNTRY=='Taiwan, Province of China','Taiwan',REGION_MATCH$COUNTRY)
REGION_MATCH$COUNTRY <- ifelse(REGION_MATCH$COUNTRY=='Venezuela, Bolivarian Republic of','Venezuela',REGION_MATCH$COUNTRY)
REGION_MATCH$COUNTRY <- ifelse(REGION_MATCH$COUNTRY=='Bolivia, Plurinational State of','Bolivia',REGION_MATCH$COUNTRY)
ASME$COUNTRY <- ifelse(ASME$COUNTRY=='Czech Republic',"Czechia",ASME$COUNTRY)
ASME$COUNTRY <- ifelse(ASME$COUNTRY=='Trinidad And Tobago','Trinidad and Tobago',ASME$COUNTRY)
ASME <- ASME %>% left_join(REGION_MATCH)
REGION_MATCH
COUNTRY_LIST <- ASME$COUNTRY_ID %>% unique
ASME
NUC_DATA <- eia_data(dir="international",data="value",freq="annual",facets=list(productId="27",unit="TJ",countryRegionId=COUNTRY_LIST[1])) %>% select(COUNTRY_ID=countryRegionId,AUTH_YEAR=period,NUC_GEN=value) %>% mutate(AUTH_YEAR=as.numeric(AUTH_YEAR)) %>% arrange(AUTH_YEAR) %>% mutate(LAG_GEN=lag(NUC_GEN),LAG_TWO_GEN=lag(NUC_GEN,2),LAG_THREE_GEN=lag(NUC_GEN,3))
for(i in COUNTRY_LIST[-1]){try(NUC_DATA <- rbind(NUC_DATA,eia_data(dir="international",data="value",freq="annual",facets=list(productId="27",unit="TJ",countryRegionId=i)) %>% select(COUNTRY_ID=countryRegionId,AUTH_YEAR=period,NUC_GEN=value) %>% mutate(AUTH_YEAR=as.numeric(AUTH_YEAR)) %>% arrange(AUTH_YEAR) %>% mutate(LAG_GEN=lag(NUC_GEN),LAG_TWO_GEN=lag(NUC_GEN,2),LAG_THREE_GEN=lag(NUC_GEN,3))))}
ASME <- ASME %>% left_join(NUC_DATA)
ASME %>% left_join(NUC_DATA) %>% select(MAIN_CERT,COUNTRY,AUTH_YEAR,NUC_GEN)
COUNTRY_DATA <- ASME %>% group_by(COUNTRY,AUTH_YEAR) %>% summarize(NUCLEAR=sum(NUCLEAR),NUC_GEN=mean(NUC_GEN,na.rm=TRUE),OTHER=n()-NUCLEAR)
fepois(NUCLEAR~log(OTHER) +NUC_GEN +COUNTRY|AUTH_YEAR,cluster=~COUNTRY,COUNTRY_DATA)