275 lines
9.6 KiB
TeX
275 lines
9.6 KiB
TeX
\documentclass{beamer}
|
||
\usepackage{graphicx}
|
||
\usepackage{multicol}
|
||
\usepackage{hyperref}
|
||
\usepackage{lipsum} % for placeholder text
|
||
\graphicspath{{pdf_images/}}
|
||
|
||
|
||
\title{ECON 4530/5530 \\ Computational Economics}
|
||
\subtitle{Best Practice of Coding}
|
||
\author{Alex Gebben}
|
||
|
||
\begin{document}
|
||
|
||
% Title Slide
|
||
\begin{frame}
|
||
\titlepage
|
||
\end{frame}
|
||
|
||
%%%%%%%%%%%%%%%%%%
|
||
\begin{frame}{Shortcuts}
|
||
\begin{itemize}
|
||
\item \texttt{Alt + - (the minus sign)} creates a arrow \texttt{->}
|
||
\item{\texttt{Cmd/Ctrl + Shift + N} open a new Rscript}
|
||
\item{\texttt{Cmd/Ctrl + Enter.} execute the current line}
|
||
\item{\texttt{Cmd/Ctrl + Shift + S.} execute the entire script}
|
||
\end{itemize}
|
||
\end{frame}
|
||
|
||
\begin{frame}{What problems have you run into?}
|
||
\begin{centering}
|
||
\begin{itemize}
|
||
\onslide<2->{\item Working directory location}
|
||
\onslide<3->{\item Backslash syntax \texttt{\textbackslash} vs \texttt{/}}
|
||
\onslide<4->{\item What data comes loaded on start?}
|
||
\onslide<5->{\item Forgetting what the code was for}
|
||
\onslide<6->{\item Need to reload from scratch}
|
||
\onslide<7->{\item What to do when the code fails}
|
||
\end{itemize}
|
||
\end{centering}
|
||
\end{frame}
|
||
|
||
|
||
\begin{frame}{Work flow}
|
||
\only<1-6>{
|
||
At some point you will:
|
||
\begin{itemize}
|
||
\onslide<2->{\item Reload an old project}
|
||
\onslide<3->{\item Share code with others}
|
||
\onslide<4->{\item Run multiple projects at once}
|
||
\onslide<5->{\item Need to load and export data}
|
||
\onslide<6->{\item Need to reproduce a result}
|
||
\end{itemize}
|
||
|
||
}
|
||
|
||
\only<7>{\huge Let's make life easier (and harder)}
|
||
\only<8>{
|
||
% Top half: two columns
|
||
\vspace{-0.5em}
|
||
\begin{columns}[T,onlytextwidth]
|
||
\column{0.5\textwidth}
|
||
\includegraphics[width=\textwidth]{clean-slate}
|
||
\column{0.5\textwidth}
|
||
\textbf{Never save a R session}
|
||
\begin{itemize}
|
||
\item{You wont get tangled by lines out of order}
|
||
\item{Makes code reproducible}
|
||
\item{You can update results more easily}
|
||
\end{itemize}
|
||
|
||
\end{columns}
|
||
}
|
||
\only<9->{
|
||
Alternatively you can run code to clear all data at the start of each session \texttt{\\usethis::use\_blank\_slate()} but you will need to install the library \texttt{"usethis"}:
|
||
\begin{enumerate}
|
||
\item \texttt{install.packages("usethis")}
|
||
\item \texttt{library("usethis")}
|
||
\end{enumerate}
|
||
}
|
||
|
||
|
||
|
||
\end{frame}
|
||
%%%%%%%%%%%%%%
|
||
\begin{frame}{Rstudio Projects}
|
||
\only<1-5>{
|
||
If you are using Rstudio it is best practice to create a project file.
|
||
\begin{itemize}
|
||
\onslide<2->{\item Sets the working directory.}
|
||
\onslide<3->{\item Allows all code to use relative paths.}
|
||
\onslide<4->{\item \emph{Advanced:} Can store all packages with the library renv.}
|
||
\onslide<5->{\item \emph{Advanced:} Can use code control with git (more on that later).}
|
||
\end{itemize}
|
||
}
|
||
\only<6>{ \includegraphics[width=0.9\textwidth]{new-project}}
|
||
\only<7->{
|
||
\begin{columns}[T,onlytextwidth]
|
||
\column{0.6\textwidth}
|
||
\begin{center}
|
||
\includegraphics[width=\textwidth]{Project_file.png}
|
||
\includegraphics[width=0.5\textwidth]{Project_file_Contents.png}
|
||
\end{center}
|
||
\column{0.4\textwidth}
|
||
\begin{enumerate}
|
||
\item Creates a .Rproj file.
|
||
\item Open this file from R.
|
||
\item Saved paths
|
||
\end{enumerate}
|
||
\end{columns}
|
||
|
||
}
|
||
\end{frame}
|
||
%%%%%%%%%%%%%%%%%%
|
||
\begin{frame}{Class Exercise}
|
||
\centering
|
||
\begin{enumerate}
|
||
\item{Create a new project folder, make sure R does not auto-save.}
|
||
\item{Open R from your programs and run getwd()}
|
||
\item{Close R and open the R project, run getwd() again}
|
||
\end{enumerate}
|
||
\end{frame}
|
||
|
||
\begin{frame}{File Management}
|
||
\begin{itemize}
|
||
\onslide<1->{\item Create directories for data}
|
||
\onslide<2->{\item Create directories for scripts}
|
||
\onslide<3->{\item Make code do most of the cleaning}
|
||
\onslide<4->{\item Clear naming of files}
|
||
\onslide<5->{\item Document all files with a Readme file}
|
||
\end{itemize}
|
||
\end{frame}
|
||
\begin{frame}{Create Directories}
|
||
\begin{itemize}
|
||
\onslide<1->{\item Create directories for data}
|
||
\onslide<2->{\item Create Directories for scripts}
|
||
\onslide<3->{\item Clear naming of files}
|
||
\onslide<4->{\item Document all files with a Readme file}
|
||
\end{itemize}
|
||
\end{frame}
|
||
\begin{frame}{Manage Directories}
|
||
|
||
\only<1-14>{
|
||
\begin{columns}[T,onlytextwidth]
|
||
\column{0.5\textwidth}
|
||
\textbf{Useful Functions}
|
||
\begin{itemize}
|
||
\onslide<2->{\item \texttt{dir.create()}}
|
||
\onslide<3->{\item\texttt{dir.exists()}}
|
||
\onslide<4->{\item\texttt{file.exists()}}
|
||
\onslide<5->{\item\texttt{list.files()}}
|
||
\end{itemize}
|
||
\column{0.5\textwidth}
|
||
\onslide<6->{\textbf{Saving Data}
|
||
\begin{itemize}
|
||
\onslide<7->{\item \texttt{write\_csv()}}
|
||
\onslide<8->{\item \texttt{write\_excel\_csv()}}
|
||
\onslide<9->{\item\texttt{write\_delim()}}
|
||
\onslide<10->{\item\texttt{saveRDS()}}
|
||
\onslide<11->{\item\texttt{ggsave()}}
|
||
\onslide<12->{\item\texttt{write.csv()}}
|
||
\end{itemize}
|
||
}
|
||
\end{columns}
|
||
\vspace{1.5em}
|
||
\onslide<14->{For downloading raw files \texttt{download.file()} is great!}
|
||
\onslide<15>{Many other options in R review \texttt{help()} and the internet.}
|
||
}
|
||
\only<15>{\includegraphics[width=\textwidth]{DIR.png}}
|
||
\only<16>{\includegraphics[width=\textwidth]{DIR2.png}}
|
||
\end{frame}
|
||
%%%%%%%%%%%
|
||
\begin{frame}{Using Directories for Organization}
|
||
Keep your main working space clean, and separate what you can.
|
||
\vspace{1.5em}
|
||
\begin{enumerate}
|
||
\onslide<2->{\item Make different files for each main data step. Downloading, cleaning, various analyses.}
|
||
\onslide<3->{\item You can save the data downloading portion as a file called \texttt{1\_EIA\_Data\_Proc.R}}
|
||
\onslide<4->{\item In the main file you could load the code with \texttt{source(``1\_EIA\_Data\_Proc.R'')}}
|
||
\onslide<5->{R will run the code in the current directory even if the script is saved elsewhere.}
|
||
\end{enumerate}
|
||
\end{frame}
|
||
%%%%%%%%%%%%%%%%%%%
|
||
\begin{frame}{Class Exercise}
|
||
\centering
|
||
\begin{enumerate}
|
||
\item{Using the R project create a directory for data, with a sub-directory for raw data and cleaned data.}
|
||
\item{Using R create directory for scripts}
|
||
\item{In R verify the directories were created using code.}
|
||
\item{All code should use relative paths. Make sure it can be run multiple times without errors.}
|
||
\end{enumerate}
|
||
\end{frame}
|
||
%%%%%%%%%%%%
|
||
\begin{frame}{Naming Files}
|
||
% \href{https://r4ds.hadley.nz/workflow-scripts.html#fn2'}{For an example DONT LEAVE AS A LINK FOR MY REFRENCE}
|
||
File names should be \textbf{human readable}:
|
||
\begin{enumerate}
|
||
\onslide<2->{\item Use file names to describe what’s in the file.}
|
||
\onslide<3->{\item Don’t rely on case sensitivity to distinguish files.}
|
||
\onslide<4->{\item File names should play well with default ordering}
|
||
\onslide<5->{\item Start file names with numbers so that alphabetical sorting puts them in the order they get used.}
|
||
\onslide<6->{\item Include dates in the name of output files.}
|
||
\end{enumerate}
|
||
\onslide<7->{File names should be \textbf{computer readable} avoid:}
|
||
\begin{enumerate}
|
||
\onslide<8->{\item spaces}
|
||
\onslide<9->{\item symbols }
|
||
\onslide<10->{\item special characters}
|
||
\end{enumerate}
|
||
|
||
\end{frame}
|
||
\begin{frame}{Class Exercise}
|
||
\only<1-7>{
|
||
\only<1>{List as many issues you can find in these file names? Explain why they might cause you problems in a long term project.}
|
||
\only<2->{\includegraphics[width=\textwidth]{Bad_File_Names.png}}
|
||
\begin{itemize}
|
||
\onslide<3->{\item File names contain spaces}
|
||
\onslide<4->{\item Same name but different capitalization}
|
||
\onslide<5->{\item Names don’t describe their contents ``temp''}
|
||
\onslide<6->{\item Outputs should be in separate directory from inputs}
|
||
\onslide<7->{\item Outputs files should have dates in the name}
|
||
\end{itemize}
|
||
}
|
||
\only<8>{\includegraphics[width=\textwidth]{Good_File_Names.png}}
|
||
\end{frame}
|
||
%%%%%%%%%%%%%%%
|
||
\begin{frame}{Commenting Code}
|
||
\only<1-3>{
|
||
\onslide<1-3>{Comments are lines of code which do not run. These are \textbf{\emph{very}} important. }
|
||
\newline
|
||
\onslide<2-3>{ In R these are created by starting a line with ``\#''}
|
||
\onslide<3>{
|
||
\begin{enumerate}
|
||
\item{You should add a comment to any line of code which is not highly simple.}
|
||
\item{Keep a clean comment style}
|
||
\item{Comment above a section of code. Indent the following code}
|
||
\item{Always include the why of your code. The what and how of your code is less critical}
|
||
\item{Think about what you would need to know if you forgot everything about the project}
|
||
\end{enumerate}
|
||
}
|
||
}
|
||
\only<4>{
|
||
\includegraphics[width=0.7\textwidth]{What_Comments}
|
||
\includegraphics[width=\textwidth]{Why_Comments}
|
||
|
||
}
|
||
\end{frame}
|
||
\begin{frame}{Section Comments}
|
||
Section comments visually separate chunks of code. It is an excellent practice to distinguish types of analysis, or groups of code.
|
||
\newline
|
||
\vspace{1em}
|
||
\onslide<2->{\includegraphics[width=\textwidth]{Section_comments.png}}
|
||
\newline
|
||
\vspace{1em}
|
||
\onslide<3->{RStudio provides a keyboard shortcut to create these headers \texttt{(Cmd/Ctrl + Shift + R)}}
|
||
\end{frame}
|
||
%%%%%%%%
|
||
\begin{frame}{Class Exercise}
|
||
\begin{enumerate}
|
||
\item Identify a data set you are interested in on FRED
|
||
\item Load the data using a URL saved as a character variable
|
||
\item Create a raw data subdirectory and save the raw data into it
|
||
\item Clean the data, at least update the column names
|
||
\item Save the cleaned data as csv and an RDS file in a cleaned data subdirectory with proper names
|
||
\item Include a comment on the first line of the file explain what the code is used for and why.
|
||
\item Comment any other code as needed
|
||
\item Save the Rscript in a directory for scripts. Pay attention to the name
|
||
\item In the main directory create a Rscript and in that script start by loading the data loading script
|
||
\item After this add a section of code to find the mean, and summary of the data
|
||
\end{enumerate}
|
||
\end{frame}
|
||
|
||
|
||
\end{document}
|