Computational_Econ/Class4/Slides/Best_Practice.tex
Alex Gebben Work 8110df7d97 Created slides
2025-09-03 16:19:34 -06:00

276 lines
9.6 KiB
TeX
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

\documentclass{beamer}
\usepackage{graphicx}
\usepackage{multicol}
\usepackage{hyperref}
\usepackage{lipsum} % for placeholder text
\graphicspath{{pdf_images/}}
\title{ECON 4530/5530 \\ Computational Economics}
\subtitle{Best Practice of Coding}
\author{Alex Gebben}
\begin{document}
% Title Slide
\begin{frame}
\titlepage
\end{frame}
%%%%%%%%%%%%%%%%%%
\begin{frame}{Shortcuts}
\begin{itemize}
\item \texttt{Alt + - (the minus sign)} creates a arrow \texttt{->}
\item{\texttt{Cmd/Ctrl + Shift + N} open a new Rscript}
\item{\texttt{Cmd/Ctrl + Enter.} execute the current line}
\item{\texttt{Cmd/Ctrl + Shift + S.} execute the entire script}
\end{itemize}
\end{frame}
\begin{frame}{What problems have you run into?}
\begin{centering}
\begin{itemize}
\onslide<2->{\item Working directory location}
\onslide<3->{\item Backslash syntax \texttt{\textbackslash} vs \texttt{/}}
\onslide<4->{\item What data comes loaded on start?}
\onslide<5->{\item Forgetting what the code was for}
\onslide<6->{\item Need to reload from scratch}
\onslide<7->{\item What to do when the code fails}
\end{itemize}
\end{centering}
\end{frame}
\begin{frame}{Work flow}
\only<1-6>{
At some point you will:
\begin{itemize}
\onslide<2->{\item Reload an old project}
\onslide<3->{\item Share code with others}
\onslide<4->{\item Run multiple projects at once}
\onslide<5->{\item Need to load and export data}
\onslide<6->{\item Need to reproduce a result}
\end{itemize}
}
\only<7>{\huge Let's make life easier (and harder)}
\only<8>{
% Top half: two columns
\vspace{-0.5em}
\begin{columns}[T,onlytextwidth]
\column{0.5\textwidth}
\includegraphics[width=\textwidth]{clean-slate}
\column{0.5\textwidth}
\textbf{Never save a R session}
\begin{itemize}
\item{You wont get tangled by lines out of order}
\item{Makes code reproducible}
\item{You can update results more easily}
\end{itemize}
\end{columns}
}
\only<9->{
Alternatively you can run code to clear all data at the start of each session \texttt{\\usethis::use\_blank\_slate()} but you will need to install the library \texttt{"usethis"}:
\begin{enumerate}
\item \texttt{install.packages("usethis")}
\item \texttt{library("usethis")}
\end{enumerate}
}
\end{frame}
%%%%%%%%%%%%%%
\begin{frame}{R studio Projects}
\only<1-5>{
If you are using R studio it is best practice to create a project file.
\begin{itemize}
\onslide<2->{\item Sets the working directory.}
\onslide<3->{\item Allows all code to use relative paths.}
\onslide<4->{\item \emph{Advanced:} Can store all packages with the library renv.}
\onslide<5->{\item \emph{Advanced:} Can use code control with git (more on that later).}
\end{itemize}
}
\only<6>{ \includegraphics[width=0.9\textwidth]{new-project}}
\only<7->{
\begin{columns}[T,onlytextwidth]
\column{0.6\textwidth}
\begin{center}
\includegraphics[width=\textwidth]{Project_file.png}
\includegraphics[width=0.5\textwidth]{Project_file_Contents.png}
\end{center}
\column{0.4\textwidth}
\begin{enumerate}
\item Creates a .Rproj file.
\item Open this file from R.
\item Saved paths
\end{enumerate}
\end{columns}
}
\end{frame}
%%%%%%%%%%%%%%%%%%
\begin{frame}{Class Exercise}
\centering
\begin{enumerate}
\item{Create a new project folder, make sure R does not auto-save.}
\item{Open R from your programs and run getwd()}
\item{Close R and open the R project, run getwd() again}
\end{enumerate}
\end{frame}
\begin{frame}{File Management}
\begin{itemize}
\onslide<1->{\item Create directories for data}
\onslide<2->{\item Create Directories for scripts}
\onslide<3->{\item Make code do most of the cleaning}
\onslide<4->{\item Clear naming of files}
\onslide<5->{\item Document all files with a Readme file}
\end{itemize}
\end{frame}
\begin{frame}{Create Directories}
\begin{itemize}
\onslide<1->{\item Create directories for data}
\onslide<2->{\item Create Directories for scripts}
\onslide<3->{\item Clear naming of files}
\onslide<4->{\item Document all files with a Readme file}
\end{itemize}
\end{frame}
\begin{frame}{Manage Directories}
\only<1-14>{
\begin{columns}[T,onlytextwidth]
\column{0.5\textwidth}
\textbf{Useful Functions}
\begin{itemize}
\onslide<2->{\item \texttt{create.dir()}}
\onslide<3->{\item\texttt{dir.exists()}}
\onslide<4->{\item\texttt{file.exists()}}
\onslide<5->{\item\texttt{list.files()}}
\onslide<6->{\item\texttt{dir.create()}}
\end{itemize}
\column{0.5\textwidth}
\onslide<7->{\textbf{Saving Data}
\begin{itemize}
\onslide<8->{\item \texttt{write\_csv()}}
\onslide<9->{\item \texttt{write\_excel\_csv()}}
\onslide<10->{\item\texttt{write\_delim()}}
\onslide<11->{\item\texttt{saveRDS()}}
\onslide<12->{\item\texttt{ggsave()}}
\onslide<13->{\item\texttt{write.csv()}}
\end{itemize}
}
\end{columns}
\vspace{1.5em}
\onslide<14->{For downloading raw files \texttt{download.file()} is great!}
\onslide<15>{Many other options in R review \texttt{help()} and the internet.}
}
\only<15>{\includegraphics[width=\textwidth]{DIR.png}}
\only<16>{\includegraphics[width=\textwidth]{DIR2.png}}
\end{frame}
%%%%%%%%%%%
\begin{frame}{Using Directories for Organization}
Keep your main working space clean, and separate what you can.
\vspace{1.5em}
\begin{enumerate}
\onslide<2->{\item Make diffrent files for each main data step. Downloading, cleaning, various analyses.}
\onslide<3->{\item You can save the data downloading portion as a file called \texttt{1\_EIA\_Data\_Proc.R}}
\onslide<4->{\item In the main file you could load the code with \texttt{source(``1\_EIA\_Data\_Proc.R'')}}
\onslide<5->{R will run the code in the current directory even if the script is saved elsewhere.}
\end{enumerate}
\end{frame}
%%%%%%%%%%%%%%%%%%%
\begin{frame}{Class Exercise}
\centering
\begin{enumerate}
\item{Using the R project create a directory for data, with a sub-directory for raw data and cleaned data.}
\item{Using R create directory for scripts}
\item{In R verify the directories were created using code.}
\item{All code should use relative paths. Make sure it can be run multiple times without errors.}
\end{enumerate}
\end{frame}
%%%%%%%%%%%%
\begin{frame}{Naming Files}
% \href{https://r4ds.hadley.nz/workflow-scripts.html#fn2'}{For an example DONT LEAVE AS A LINK FOR MY REFRENCE}
File names should be \textbf{human readable}:
\begin{enumerate}
\onslide<2->{\item Use file names to describe whats in the file.}
\onslide<3->{\item Dont rely on case sensitivity to distinguish files.}
\onslide<4->{\item File names should play well with default ordering}
\onslide<5->{\item Start file names with numbers so that alphabetical sorting puts them in the order they get used.}
\onslide<6->{\item Include dates in the name of output files.}
\end{enumerate}
\onslide<7->{File names should be \textbf{computer readable} avoid:}
\begin{enumerate}
\onslide<8->{\item spaces}
\onslide<9->{\item symbols }
\onslide<10->{\item special characters}
\end{enumerate}
\end{frame}
\begin{frame}{Class Exercise}
\only<1-7>{
\only<1>{List as many issues you can find in these file names? Explain why they might cause you problems in a long term project.}
\only<2->{\includegraphics[width=\textwidth]{Bad_File_Names.png}}
\begin{itemize}
\onslide<3->{\item File names contain spaces}
\onslide<4->{\item Same name but different capitalization}
\onslide<5->{\item Names dont describe their contents ``temp''}
\onslide<6->{\item Outputs should be in seperate directory from inputs}
\onslide<7->{\item Outputs files should have dates in the name}
\end{itemize}
}
\only<8>{\includegraphics[width=\textwidth]{Good_File_Names.png}}
\end{frame}
%%%%%%%%%%%%%%%
\begin{frame}{Commenting Code}
\only<1-3>{
\onslide<1-3>{Comments are lines of code which do not run. These are \textbf{\emph{very}} important. }
\newline
\onslide<2-3>{ In R these are created by starting a line with ``\#''}
\onslide<3>{
\begin{enumerate}
\item{You should add a comment to any line of code which is not highly simple.}
\item{Keep a clean comment style}
\item{Comment above a section of code. Indent the following code}
\item{Always include the why of your code. The what and how of your code is less critical}
\item{Think about what you would need to know if you forgot everything about the project}
\end{enumerate}
}
}
\only<4>{
\includegraphics[width=0.7\textwidth]{What_Comments}
\includegraphics[width=\textwidth]{Why_Comments}
}
\end{frame}
\begin{frame}{Section Comments}
Section comments visually separate chunks of code. It is an excellent practice to distinguish types of analysis, or groups of code.
\newline
\vspace{1em}
\onslide<2->{\includegraphics[width=\textwidth]{Section_comments.png}}
\newline
\vspace{1em}
\onslide<3->{RStudio provides a keyboard shortcut to create these headers \texttt{(Cmd/Ctrl + Shift + R)}}
\end{frame}
%%%%%%%%
\begin{frame}{Class Exercise}
\begin{enumerate}
\item Identify a data set you are interested in on FRED
\item Load the data using a URL saved as a character variable
\item Create a raw data subdirectory and save the raw data into it
\item Clean the data, at least update the column names
\item Save the cleaned data as csv and an RDS file in a cleaned data subdirectory with proper names
\item Include a comment on the first line of the file explain what the code is used for and why.
\item Comment any other code as needed
\item Save the Rscript in a directory for scripts. Pay attention to the name
\item In the main directory create a Rscript and in that script start by loading the data loading script
\item After this add a section of code to find the mean, and summary of the data
\end{enumerate}
\end{frame}
\end{document}