Updated cleaning

This commit is contained in:
Alex Gebben Work 2026-01-21 14:45:24 -07:00
parent 81bac25617
commit 4e0922f879

View File

@ -10,9 +10,9 @@ ASME$CO_NAME <- clean_strings(ASME$CO_NAME)
ASME <- ASME %>% group_by(CO_NAME) %>% mutate(CO=CO_NAME) ASME <- ASME %>% group_by(CO_NAME) %>% mutate(CO=CO_NAME)
MAT <- stringdistmatrix(ASME$CO_NAME,ASME$CO_NAME,method="jw",nthread=23) MAT <- stringdistmatrix(ASME$CO_NAME,ASME$CO_NAME,method="jw",nthread=23)
MATCH <- which(MAT<0.06 & MAT>0,arr.ind = TRUE) MATCH <- which(MAT<0.06 & MAT>0,arr.ind = TRUE)
MATCH_RES <- as_tibble(matrix(NA,nrow=nrow(MATCH),ncol=3)) MATCH_RES <- as_tibble(matrix(NA,nrow=nrow(MATCH),ncol=2))
for(i in 1:nrow(MATCH)){ for(i in 1:nrow(MATCH)){
MATCH_RES[i,] <- t(ASME$CO_NAME[MATCH[i,]],) MATCH_RES[i,1:2] <- t(ASME$CO_NAME[MATCH[i,]])
} }
MATCH_RES <- MATCH_RES %>% unique MATCH_RES <- MATCH_RES %>% unique
for(i in 1:nrow(MATCH_RES)){ for(i in 1:nrow(MATCH_RES)){
@ -36,14 +36,21 @@ REGION_MATCH <- read_csv("https://raw.githubusercontent.com/lukes/ISO-3166-Count
ASME$COUNTRY <- ifelse(ASME$COUNTRY=='Czech Republic',"Czechia",ASME$COUNTRY) ASME$COUNTRY <- ifelse(ASME$COUNTRY=='Czech Republic',"Czechia",ASME$COUNTRY)
ASME$COUNTRY <- ifelse(ASME$COUNTRY=='Trinidad And Tobago','Trinidad and Tobago',ASME$COUNTRY) ASME$COUNTRY <- ifelse(ASME$COUNTRY=='Trinidad And Tobago','Trinidad and Tobago',ASME$COUNTRY)
ASME <- ASME %>% left_join(REGION_MATCH) ASME <- ASME %>% left_join(REGION_MATCH)
REGION_MATCH COUNTRY_LIST <- ASME$COUNTRY_ID %>% unique
COUNTRY_LIST <- ASME$COUNTRY_ID %>% unique COUNTRY_LIST <- COUNTRY_LIST[!is.na(COUNTRY_LIST )]
ASME
NUC_DATA <- eia_data(dir="international",data="value",freq="annual",facets=list(productId="27",unit="TJ",countryRegionId=COUNTRY_LIST[1])) %>% select(COUNTRY_ID=countryRegionId,AUTH_YEAR=period,NUC_GEN=value) %>% mutate(AUTH_YEAR=as.numeric(AUTH_YEAR)) %>% arrange(AUTH_YEAR) %>% mutate(LAG_GEN=lag(NUC_GEN),LAG_TWO_GEN=lag(NUC_GEN,2),LAG_THREE_GEN=lag(NUC_GEN,3))
for(i in COUNTRY_LIST[-1]){try(NUC_DATA <- rbind(NUC_DATA,eia_data(dir="international",data="value",freq="annual",facets=list(productId="27",unit="TJ",countryRegionId=i)) %>% select(COUNTRY_ID=countryRegionId,AUTH_YEAR=period,NUC_GEN=value) %>% mutate(AUTH_YEAR=as.numeric(AUTH_YEAR)) %>% arrange(AUTH_YEAR) %>% mutate(LAG_GEN=lag(NUC_GEN),LAG_TWO_GEN=lag(NUC_GEN,2),LAG_THREE_GEN=lag(NUC_GEN,3))))}
ASME <- ASME %>% left_join(NUC_DATA)
ASME %>% left_join(NUC_DATA) %>% select(MAIN_CERT,COUNTRY,AUTH_YEAR,NUC_GEN)
COUNTRY_DATA <- ASME %>% group_by(COUNTRY,AUTH_YEAR) %>% summarize(NUCLEAR=sum(NUCLEAR),NUC_GEN=mean(NUC_GEN,na.rm=TRUE),OTHER=n()-NUCLEAR) NUC_DATA <- do.call(rbind,lapply(1:length(COUNTRY_LIST),function(x){eia_data(dir="international",data="value",freq="annual",facets=list(productId="27",unit="TJ",countryRegionId=COUNTRY_LIST[x])) %>% select(COUNTRY_ID=countryRegionId,Year=period,NUC_GEN=value) %>% mutate(Year=as.numeric(Year),NUC_GEN=as.numeric(NUC_GEN)) %>% arrange(Year) %>% mutate(LAG_GEN=lag(NUC_GEN),LAG_TWO_GEN=lag(NUC_GEN,2),LAG_THREE_GEN=lag(NUC_GEN,3))} ))
fepois(NUCLEAR~log(OTHER) +NUC_GEN +COUNTRY|AUTH_YEAR,cluster=~COUNTRY,COUNTRY_DATA) ASME <- ASME %>% left_join(NUC_DATA %>% rename(ORIG_AUTH_YEAR=Year))
RES <- rbind(ASME %>% filter(CERT_STATUS!='Active') %>% mutate(Year=ORIG_AUTH_YEAR,CERT_VALUE=1),ASME %>% filter(CERT_STATUS!='Active') %>% mutate(Year=EXP_DATE,CERT_VALUE=-1),ASME %>% filter(CERT_STATUS=='Active') %>% mutate(Year=ORIG_AUTH_YEAR,CERT_VALUE=-1)) %>% select(CERT_VALUE,everything()) %>% mutate(NUCLEAR=NUCLEAR*CERT_VALUE) %>% unique
COUNTRY_DATA <- ASME %>% group_by(COUNTRY,ORIG_AUTH_YEAR) %>% summarize(NUCLEAR=sum(NUCLEAR),LAG_GEN=sum(LAG_GEN),LAG_TWO_GEN=sum(LAG_TWO_GEN),LAG_THREE_GEN=sum(LAG_THREE_GEN),NUC_GEN=mean(NUC_GEN,na.rm=TRUE),OTHER=n()-NUCLEAR)
COUNTRY_DATA
fepois(NUCLEAR~log(OTHER+0.0001)+log(LAG_GEN+0.0001)|COUNTRY+ORIG_AUTH_YEAR,cluster=~COUNTRY,COUNTRY_DATA)
fepois(NUCLEAR~OTHER+log(OTHER) +log(LAG_GEN+0.0001)|AUTH_YEAR+COUNTRY,cluster=~COUNTRY,COUNTRY_DATA)