Updated cleaning
This commit is contained in:
parent
81bac25617
commit
4e0922f879
29
R_Analysis.r
29
R_Analysis.r
@ -10,9 +10,9 @@ ASME$CO_NAME <- clean_strings(ASME$CO_NAME)
|
||||
ASME <- ASME %>% group_by(CO_NAME) %>% mutate(CO=CO_NAME)
|
||||
MAT <- stringdistmatrix(ASME$CO_NAME,ASME$CO_NAME,method="jw",nthread=23)
|
||||
MATCH <- which(MAT<0.06 & MAT>0,arr.ind = TRUE)
|
||||
MATCH_RES <- as_tibble(matrix(NA,nrow=nrow(MATCH),ncol=3))
|
||||
MATCH_RES <- as_tibble(matrix(NA,nrow=nrow(MATCH),ncol=2))
|
||||
for(i in 1:nrow(MATCH)){
|
||||
MATCH_RES[i,] <- t(ASME$CO_NAME[MATCH[i,]],)
|
||||
MATCH_RES[i,1:2] <- t(ASME$CO_NAME[MATCH[i,]])
|
||||
}
|
||||
MATCH_RES <- MATCH_RES %>% unique
|
||||
for(i in 1:nrow(MATCH_RES)){
|
||||
@ -36,14 +36,21 @@ REGION_MATCH <- read_csv("https://raw.githubusercontent.com/lukes/ISO-3166-Count
|
||||
ASME$COUNTRY <- ifelse(ASME$COUNTRY=='Czech Republic',"Czechia",ASME$COUNTRY)
|
||||
ASME$COUNTRY <- ifelse(ASME$COUNTRY=='Trinidad And Tobago','Trinidad and Tobago',ASME$COUNTRY)
|
||||
ASME <- ASME %>% left_join(REGION_MATCH)
|
||||
REGION_MATCH
|
||||
COUNTRY_LIST <- ASME$COUNTRY_ID %>% unique
|
||||
ASME
|
||||
NUC_DATA <- eia_data(dir="international",data="value",freq="annual",facets=list(productId="27",unit="TJ",countryRegionId=COUNTRY_LIST[1])) %>% select(COUNTRY_ID=countryRegionId,AUTH_YEAR=period,NUC_GEN=value) %>% mutate(AUTH_YEAR=as.numeric(AUTH_YEAR)) %>% arrange(AUTH_YEAR) %>% mutate(LAG_GEN=lag(NUC_GEN),LAG_TWO_GEN=lag(NUC_GEN,2),LAG_THREE_GEN=lag(NUC_GEN,3))
|
||||
for(i in COUNTRY_LIST[-1]){try(NUC_DATA <- rbind(NUC_DATA,eia_data(dir="international",data="value",freq="annual",facets=list(productId="27",unit="TJ",countryRegionId=i)) %>% select(COUNTRY_ID=countryRegionId,AUTH_YEAR=period,NUC_GEN=value) %>% mutate(AUTH_YEAR=as.numeric(AUTH_YEAR)) %>% arrange(AUTH_YEAR) %>% mutate(LAG_GEN=lag(NUC_GEN),LAG_TWO_GEN=lag(NUC_GEN,2),LAG_THREE_GEN=lag(NUC_GEN,3))))}
|
||||
ASME <- ASME %>% left_join(NUC_DATA)
|
||||
ASME %>% left_join(NUC_DATA) %>% select(MAIN_CERT,COUNTRY,AUTH_YEAR,NUC_GEN)
|
||||
COUNTRY_LIST <- ASME$COUNTRY_ID %>% unique
|
||||
COUNTRY_LIST <- COUNTRY_LIST[!is.na(COUNTRY_LIST )]
|
||||
|
||||
COUNTRY_DATA <- ASME %>% group_by(COUNTRY,AUTH_YEAR) %>% summarize(NUCLEAR=sum(NUCLEAR),NUC_GEN=mean(NUC_GEN,na.rm=TRUE),OTHER=n()-NUCLEAR)
|
||||
fepois(NUCLEAR~log(OTHER) +NUC_GEN +COUNTRY|AUTH_YEAR,cluster=~COUNTRY,COUNTRY_DATA)
|
||||
NUC_DATA <- do.call(rbind,lapply(1:length(COUNTRY_LIST),function(x){eia_data(dir="international",data="value",freq="annual",facets=list(productId="27",unit="TJ",countryRegionId=COUNTRY_LIST[x])) %>% select(COUNTRY_ID=countryRegionId,Year=period,NUC_GEN=value) %>% mutate(Year=as.numeric(Year),NUC_GEN=as.numeric(NUC_GEN)) %>% arrange(Year) %>% mutate(LAG_GEN=lag(NUC_GEN),LAG_TWO_GEN=lag(NUC_GEN,2),LAG_THREE_GEN=lag(NUC_GEN,3))} ))
|
||||
ASME <- ASME %>% left_join(NUC_DATA %>% rename(ORIG_AUTH_YEAR=Year))
|
||||
|
||||
RES <- rbind(ASME %>% filter(CERT_STATUS!='Active') %>% mutate(Year=ORIG_AUTH_YEAR,CERT_VALUE=1),ASME %>% filter(CERT_STATUS!='Active') %>% mutate(Year=EXP_DATE,CERT_VALUE=-1),ASME %>% filter(CERT_STATUS=='Active') %>% mutate(Year=ORIG_AUTH_YEAR,CERT_VALUE=-1)) %>% select(CERT_VALUE,everything()) %>% mutate(NUCLEAR=NUCLEAR*CERT_VALUE) %>% unique
|
||||
|
||||
|
||||
COUNTRY_DATA <- ASME %>% group_by(COUNTRY,ORIG_AUTH_YEAR) %>% summarize(NUCLEAR=sum(NUCLEAR),LAG_GEN=sum(LAG_GEN),LAG_TWO_GEN=sum(LAG_TWO_GEN),LAG_THREE_GEN=sum(LAG_THREE_GEN),NUC_GEN=mean(NUC_GEN,na.rm=TRUE),OTHER=n()-NUCLEAR)
|
||||
COUNTRY_DATA
|
||||
|
||||
|
||||
|
||||
fepois(NUCLEAR~log(OTHER+0.0001)+log(LAG_GEN+0.0001)|COUNTRY+ORIG_AUTH_YEAR,cluster=~COUNTRY,COUNTRY_DATA)
|
||||
|
||||
fepois(NUCLEAR~OTHER+log(OTHER) +log(LAG_GEN+0.0001)|AUTH_YEAR+COUNTRY,cluster=~COUNTRY,COUNTRY_DATA)
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user