ASME_nuclear/R_Analysis.r
2026-01-21 14:45:24 -07:00

57 lines
3.9 KiB
R

library(tidyverse)
library(fedmatch)
library(eia)
eia_set_key("MpAjEon2zWULOTOnfJf2olzv3KB8zOgYnILsGVQf")
library("stringdist")
library(fixest)
ASME <- readRDS("Data/PROCESSED_DATA/RDS/ASME.Rds")
#nrow(ASME )/14865
ASME$CO_NAME <- clean_strings(ASME$CO_NAME)
ASME <- ASME %>% group_by(CO_NAME) %>% mutate(CO=CO_NAME)
MAT <- stringdistmatrix(ASME$CO_NAME,ASME$CO_NAME,method="jw",nthread=23)
MATCH <- which(MAT<0.06 & MAT>0,arr.ind = TRUE)
MATCH_RES <- as_tibble(matrix(NA,nrow=nrow(MATCH),ncol=2))
for(i in 1:nrow(MATCH)){
MATCH_RES[i,1:2] <- t(ASME$CO_NAME[MATCH[i,]])
}
MATCH_RES <- MATCH_RES %>% unique
for(i in 1:nrow(MATCH_RES)){
ASME$CO_NAME <- ifelse(ASME$CO_NAME==as.character(MATCH_RES[i,1]),as.character(MATCH_RES[i,2]),ASME$CO_NAME)
}
ASME <- ASME %>% group_by(PLANT_ADDRESS,COUNTRY,STATE,CITY) %>% mutate(CO_NAME=min(CO_NAME)) %>% ungroup
REGION_MATCH <- read_csv("https://raw.githubusercontent.com/lukes/ISO-3166-Countries-with-Regional-Codes/refs/heads/master/all/all.csv") %>% select(COUNTRY=name,COUNTRY_ID=`alpha-3`)
REGION_MATCH$COUNTRY <- ifelse(REGION_MATCH$COUNTRY=='United States of America',"United States",REGION_MATCH$COUNTRY)
REGION_MATCH$COUNTRY <- ifelse(REGION_MATCH$COUNTRY=='Korea, Republic of',"Republic of Korea",REGION_MATCH$COUNTRY)
ASME$COUNTRY <- ifelse(ASME$COUNTRY=='People\'s Republic of China',"China",ASME$COUNTRY)
REGION_MATCH$COUNTRY <- ifelse(REGION_MATCH$COUNTRY=='United Kingdom of Great Britain and Northern Ireland','United Kingdom',REGION_MATCH$COUNTRY)
ASME$COUNTRY <- ifelse(ASME$COUNTRY=='Wales','United Kingdom',ASME$COUNTRY)
ASME$COUNTRY <- ifelse(ASME$COUNTRY=='Northern Ireland','United Kingdom',ASME$COUNTRY)
ASME$COUNTRY <- ifelse(ASME$COUNTRY=='Scotland','United Kingdom',ASME$COUNTRY)
REGION_MATCH$COUNTRY <- ifelse(REGION_MATCH$COUNTRY=='Netherlands, Kingdom of the','The Netherlands',REGION_MATCH$COUNTRY)
REGION_MATCH$COUNTRY <- ifelse(REGION_MATCH$COUNTRY=='Türkiye','Turkey',REGION_MATCH$COUNTRY)
REGION_MATCH$COUNTRY <- ifelse(REGION_MATCH$COUNTRY=='Taiwan, Province of China','Taiwan',REGION_MATCH$COUNTRY)
REGION_MATCH$COUNTRY <- ifelse(REGION_MATCH$COUNTRY=='Venezuela, Bolivarian Republic of','Venezuela',REGION_MATCH$COUNTRY)
REGION_MATCH$COUNTRY <- ifelse(REGION_MATCH$COUNTRY=='Bolivia, Plurinational State of','Bolivia',REGION_MATCH$COUNTRY)
ASME$COUNTRY <- ifelse(ASME$COUNTRY=='Czech Republic',"Czechia",ASME$COUNTRY)
ASME$COUNTRY <- ifelse(ASME$COUNTRY=='Trinidad And Tobago','Trinidad and Tobago',ASME$COUNTRY)
ASME <- ASME %>% left_join(REGION_MATCH)
COUNTRY_LIST <- ASME$COUNTRY_ID %>% unique
COUNTRY_LIST <- COUNTRY_LIST[!is.na(COUNTRY_LIST )]
NUC_DATA <- do.call(rbind,lapply(1:length(COUNTRY_LIST),function(x){eia_data(dir="international",data="value",freq="annual",facets=list(productId="27",unit="TJ",countryRegionId=COUNTRY_LIST[x])) %>% select(COUNTRY_ID=countryRegionId,Year=period,NUC_GEN=value) %>% mutate(Year=as.numeric(Year),NUC_GEN=as.numeric(NUC_GEN)) %>% arrange(Year) %>% mutate(LAG_GEN=lag(NUC_GEN),LAG_TWO_GEN=lag(NUC_GEN,2),LAG_THREE_GEN=lag(NUC_GEN,3))} ))
ASME <- ASME %>% left_join(NUC_DATA %>% rename(ORIG_AUTH_YEAR=Year))
RES <- rbind(ASME %>% filter(CERT_STATUS!='Active') %>% mutate(Year=ORIG_AUTH_YEAR,CERT_VALUE=1),ASME %>% filter(CERT_STATUS!='Active') %>% mutate(Year=EXP_DATE,CERT_VALUE=-1),ASME %>% filter(CERT_STATUS=='Active') %>% mutate(Year=ORIG_AUTH_YEAR,CERT_VALUE=-1)) %>% select(CERT_VALUE,everything()) %>% mutate(NUCLEAR=NUCLEAR*CERT_VALUE) %>% unique
COUNTRY_DATA <- ASME %>% group_by(COUNTRY,ORIG_AUTH_YEAR) %>% summarize(NUCLEAR=sum(NUCLEAR),LAG_GEN=sum(LAG_GEN),LAG_TWO_GEN=sum(LAG_TWO_GEN),LAG_THREE_GEN=sum(LAG_THREE_GEN),NUC_GEN=mean(NUC_GEN,na.rm=TRUE),OTHER=n()-NUCLEAR)
COUNTRY_DATA
fepois(NUCLEAR~log(OTHER+0.0001)+log(LAG_GEN+0.0001)|COUNTRY+ORIG_AUTH_YEAR,cluster=~COUNTRY,COUNTRY_DATA)
fepois(NUCLEAR~OTHER+log(OTHER) +log(LAG_GEN+0.0001)|AUTH_YEAR+COUNTRY,cluster=~COUNTRY,COUNTRY_DATA)