diff --git a/Class4/Comp_Econ_Code/Code_Examples.r b/Class4/Comp_Econ_Code/Code_Examples.r index 770b85c..f23a5ca 100644 --- a/Class4/Comp_Econ_Code/Code_Examples.r +++ b/Class4/Comp_Econ_Code/Code_Examples.r @@ -1,8 +1,40 @@ -library(tidyverse) -getwd() +####Load libraries + library(tidyverse) #tidyverse is used to organize the data with read_csv() + library(fixest) #Fixest is load so a fixed effect model can be used. +#Creating directories to store the raw data set, +# so the results can reproduced more easily if the data changes. + dir.create("Data",showWarnings = FALSE) + dir.create("Data/Orig_Data",showWarnings = FALSE) + #Check if direjctory exits + dir.exists("Data/Orig_Data") + +# Help -------------------------------------------------------------------- + + +#List all files in the working directory +list.files() +#List all files in the working directory that end in png + list.files("./",pattern="*.png") +#list all files in the Data folder staring from the working directory + list.files("Data/") +#write a csv file to the directory + write_csv(diamonds, "Data/diamonds.csv") +#write a tab separated file to the directory (tsv) + write_tsv(diamonds, "Data/diamonds.tsv") +#list all files in the Data directory. Save in a variable. + FILE_LIST <- list.files("Data/") + print(FILE_LIST) +#Check if a file exists in the Data directory +file.exists("Data/DIAMOND.csv") +#Check if the file in the list exists + file.exists(FILE_LIST[1]) +#Check if the file in the list exists + FILE_LOC <- paste0("./Data/",FILE_LIST[1]) + print(FILE_LOC) + file.exists(FILE_LOC) + ggplot(diamonds, aes(x = carat, y = price)) + geom_hex() ggsave("diamonds.png") dir.create("./data") -write_csv(diamonds, "data/diamonds.csv") \ No newline at end of file diff --git a/Class4/Comp_Econ_Code/diamonds.png b/Class4/Comp_Econ_Code/diamonds.png index 7748617..5ca4fd1 100644 Binary files a/Class4/Comp_Econ_Code/diamonds.png and b/Class4/Comp_Econ_Code/diamonds.png differ diff --git a/Class4/Slides/Bad_File_Names.png b/Class4/Slides/Bad_File_Names.png new file mode 100644 index 0000000..0c588f3 Binary files /dev/null and b/Class4/Slides/Bad_File_Names.png differ diff --git a/Class4/Slides/Best_Practice.tex b/Class4/Slides/Best_Practice.tex index f2a80f1..7bb5b99 100644 --- a/Class4/Slides/Best_Practice.tex +++ b/Class4/Slides/Best_Practice.tex @@ -18,6 +18,14 @@ \end{frame} %%%%%%%%%%%%%%%%%% +\begin{frame}{Shortcuts} + \begin{itemize} + \item \texttt{Alt + - (the minus sign)} creates a arrow \texttt{->} + \item{\texttt{Cmd/Ctrl + Shift + N} open a new Rscript} + \item{\texttt{Cmd/Ctrl + Enter.} execute the current line} + \item{\texttt{Cmd/Ctrl + Shift + S.} execute the entire script} + \end{itemize} +\end{frame} \begin{frame}{What problems have you run into?} \begin{centering} @@ -105,11 +113,12 @@ \end{frame} %%%%%%%%%%%%%%%%%% \begin{frame}{Class Exercise} - \huge - Download relevant data - - \normalsize - It is good practice to store the data is a separate folder in the location of the Rscript. Such as a "Data" folder. + \centering + \begin{enumerate} + \item{Create a new project folder, make sure R does not auto-save.} + \item{Open R from your programs and run getwd()} + \item{Close R and open the R project, run getwd() again} + \end{enumerate} \end{frame} \begin{frame}{File Management} @@ -134,7 +143,7 @@ \only<1-14>{ \begin{columns}[T,onlytextwidth] \column{0.5\textwidth} - \textbf{Usefull Functions} + \textbf{Useful Functions} \begin{itemize} \onslide<2->{\item \texttt{create.dir()}} \onslide<3->{\item\texttt{dir.exists()}} @@ -155,17 +164,112 @@ } \end{columns} \vspace{1.5em} -\onslide<14>{Many other options in R review \texttt{help()} and the internet.} +\onslide<14->{For downloading raw files \texttt{download.file()} is great!} +\onslide<15>{Many other options in R review \texttt{help()} and the internet.} } \only<15>{\includegraphics[width=\textwidth]{DIR.png}} \only<16>{\includegraphics[width=\textwidth]{DIR2.png}} \end{frame} - +%%%%%%%%%%% +\begin{frame}{Using Directories for Organization} + Keep your main working space clean, and separate what you can. +\vspace{1.5em} + \begin{enumerate} + \onslide<2->{\item Make diffrent files for each main data step. Downloading, cleaning, various analyses.} + \onslide<3->{\item You can save the data downloading portion as a file called \texttt{1\_EIA\_Data\_Proc.R}} + \onslide<4->{\item In the main file you could load the code with \texttt{source(``1\_EIA\_Data\_Proc.R'')}} + \onslide<5->{R will run the code in the current directory even if the script is saved elsewhere.} +\end{enumerate} +\end{frame} +%%%%%%%%%%%%%%%%%%% +\begin{frame}{Class Exercise} + \centering + \begin{enumerate} + \item{Using the R project create a directory for data, with a sub-directory for raw data and cleaned data.} + \item{Using R create directory for scripts} + \item{In R verify the directories were created using code.} + \item{All code should use relative paths. Make sure it can be run multiple times without errors.} + \end{enumerate} +\end{frame} +%%%%%%%%%%%% \begin{frame}{Naming Files} % \href{https://r4ds.hadley.nz/workflow-scripts.html#fn2'}{For an example DONT LEAVE AS A LINK FOR MY REFRENCE} - It might be tempting to name your files code.R or myscript.R, but you should think a bit harder before choosing a name for your file. Three important principles for file naming are as follows\: - File names should be machine readable\: avoid spaces, symbols, and special characters. Don’t rely on case sensitivity to distinguish files. - File names should be human readable\: use file names to describe what’s in the file. - File names should play well with default ordering\: start file names with numbers so that alphabetical sorting puts them in the order they get used. + File names should be \textbf{human readable}: + \begin{enumerate} + \onslide<2->{\item Use file names to describe what’s in the file.} + \onslide<3->{\item Don’t rely on case sensitivity to distinguish files.} + \onslide<4->{\item File names should play well with default ordering} + \onslide<5->{\item Start file names with numbers so that alphabetical sorting puts them in the order they get used.} + \onslide<6->{\item Include dates in the name of output files.} + \end{enumerate} + \onslide<7->{File names should be \textbf{computer readable} avoid:} + \begin{enumerate} + \onslide<8->{\item spaces} + \onslide<9->{\item symbols } + \onslide<10->{\item special characters} + \end{enumerate} + \end{frame} +\begin{frame}{Class Exercise} + \only<1-7>{ + \only<1>{List as many issues you can find in these file names? Explain why they might cause you problems in a long term project.} + \only<2->{\includegraphics[width=\textwidth]{Bad_File_Names.png}} + \begin{itemize} + \onslide<3->{\item File names contain spaces} + \onslide<4->{\item Same name but different capitalization} + \onslide<5->{\item Names don’t describe their contents ``temp''} + \onslide<6->{\item Outputs should be in seperate directory from inputs} + \onslide<7->{\item Outputs files should have dates in the name} +\end{itemize} +} + \only<8>{\includegraphics[width=\textwidth]{Good_File_Names.png}} +\end{frame} +%%%%%%%%%%%%%%% +\begin{frame}{Commenting Code} + \only<1-3>{ + \onslide<1-3>{Comments are lines of code which do not run. These are \textbf{\emph{very}} important. } + \newline + \onslide<2-3>{ In R these are created by starting a line with ``\#''} +\onslide<3>{ + \begin{enumerate} + \item{You should add a comment to any line of code which is not highly simple.} + \item{Keep a clean comment style} + \item{Comment above a section of code. Indent the following code} + \item{Always include the why of your code. The what and how of your code is less critical} + \item{Think about what you would need to know if you forgot everything about the project} + \end{enumerate} +} +} +\only<4>{ + \includegraphics[width=0.7\textwidth]{What_Comments} + \includegraphics[width=\textwidth]{Why_Comments} + + } +\end{frame} +\begin{frame}{Section Comments} + Section comments visually separate chunks of code. It is an excellent practice to distinguish types of analysis, or groups of code. + \newline +\vspace{1em} +\onslide<2->{\includegraphics[width=\textwidth]{Section_comments.png}} + \newline +\vspace{1em} +\onslide<3->{RStudio provides a keyboard shortcut to create these headers \texttt{(Cmd/Ctrl + Shift + R)}} +\end{frame} +%%%%%%%% +\begin{frame}{Class Exercise} + \begin{enumerate} +\item Identify a data set you are interested in on FRED + \item Load the data using a URL saved as a character variable + \item Create a raw data subdirectory and save the raw data into it + \item Clean the data, at least update the column names + \item Save the cleaned data as csv and an RDS file in a cleaned data subdirectory with proper names + \item Include a comment on the first line of the file explain what the code is used for and why. + \item Comment any other code as needed + \item Save the Rscript in a directory for scripts. Pay attention to the name + \item In the main directory create a Rscript and in that script start by loading the data loading script + \item After this add a section of code to find the mean, and summary of the data +\end{enumerate} +\end{frame} + + \end{document} diff --git a/Class4/Slides/Good_File_Names.png b/Class4/Slides/Good_File_Names.png new file mode 100644 index 0000000..175a094 Binary files /dev/null and b/Class4/Slides/Good_File_Names.png differ diff --git a/Class4/Slides/Section_comments.png b/Class4/Slides/Section_comments.png new file mode 100644 index 0000000..2a31b06 Binary files /dev/null and b/Class4/Slides/Section_comments.png differ diff --git a/Class4/Slides/What_Comments.png b/Class4/Slides/What_Comments.png new file mode 100644 index 0000000..2ec77ea Binary files /dev/null and b/Class4/Slides/What_Comments.png differ diff --git a/Class4/Slides/Why_Comments.png b/Class4/Slides/Why_Comments.png new file mode 100644 index 0000000..3dd7f82 Binary files /dev/null and b/Class4/Slides/Why_Comments.png differ