Created slides
@ -1,8 +1,40 @@
|
|||||||
library(tidyverse)
|
####Load libraries
|
||||||
getwd()
|
library(tidyverse) #tidyverse is used to organize the data with read_csv()
|
||||||
|
library(fixest) #Fixest is load so a fixed effect model can be used.
|
||||||
|
#Creating directories to store the raw data set,
|
||||||
|
# so the results can reproduced more easily if the data changes.
|
||||||
|
dir.create("Data",showWarnings = FALSE)
|
||||||
|
dir.create("Data/Orig_Data",showWarnings = FALSE)
|
||||||
|
#Check if direjctory exits
|
||||||
|
dir.exists("Data/Orig_Data")
|
||||||
|
|
||||||
|
# Help --------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
#List all files in the working directory
|
||||||
|
list.files()
|
||||||
|
#List all files in the working directory that end in png
|
||||||
|
list.files("./",pattern="*.png")
|
||||||
|
#list all files in the Data folder staring from the working directory
|
||||||
|
list.files("Data/")
|
||||||
|
#write a csv file to the directory
|
||||||
|
write_csv(diamonds, "Data/diamonds.csv")
|
||||||
|
#write a tab separated file to the directory (tsv)
|
||||||
|
write_tsv(diamonds, "Data/diamonds.tsv")
|
||||||
|
#list all files in the Data directory. Save in a variable.
|
||||||
|
FILE_LIST <- list.files("Data/")
|
||||||
|
print(FILE_LIST)
|
||||||
|
#Check if a file exists in the Data directory
|
||||||
|
file.exists("Data/DIAMOND.csv")
|
||||||
|
#Check if the file in the list exists
|
||||||
|
file.exists(FILE_LIST[1])
|
||||||
|
#Check if the file in the list exists
|
||||||
|
FILE_LOC <- paste0("./Data/",FILE_LIST[1])
|
||||||
|
print(FILE_LOC)
|
||||||
|
file.exists(FILE_LOC)
|
||||||
|
|
||||||
|
|
||||||
ggplot(diamonds, aes(x = carat, y = price)) +
|
ggplot(diamonds, aes(x = carat, y = price)) +
|
||||||
geom_hex()
|
geom_hex()
|
||||||
ggsave("diamonds.png")
|
ggsave("diamonds.png")
|
||||||
dir.create("./data")
|
dir.create("./data")
|
||||||
write_csv(diamonds, "data/diamonds.csv")
|
|
||||||
|
Before Width: | Height: | Size: 169 KiB After Width: | Height: | Size: 152 KiB |
BIN
Class4/Slides/Bad_File_Names.png
Normal file
|
After Width: | Height: | Size: 42 KiB |
@ -18,6 +18,14 @@
|
|||||||
\end{frame}
|
\end{frame}
|
||||||
|
|
||||||
%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%
|
||||||
|
\begin{frame}{Shortcuts}
|
||||||
|
\begin{itemize}
|
||||||
|
\item \texttt{Alt + - (the minus sign)} creates a arrow \texttt{->}
|
||||||
|
\item{\texttt{Cmd/Ctrl + Shift + N} open a new Rscript}
|
||||||
|
\item{\texttt{Cmd/Ctrl + Enter.} execute the current line}
|
||||||
|
\item{\texttt{Cmd/Ctrl + Shift + S.} execute the entire script}
|
||||||
|
\end{itemize}
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
\begin{frame}{What problems have you run into?}
|
\begin{frame}{What problems have you run into?}
|
||||||
\begin{centering}
|
\begin{centering}
|
||||||
@ -105,11 +113,12 @@
|
|||||||
\end{frame}
|
\end{frame}
|
||||||
%%%%%%%%%%%%%%%%%%
|
%%%%%%%%%%%%%%%%%%
|
||||||
\begin{frame}{Class Exercise}
|
\begin{frame}{Class Exercise}
|
||||||
\huge
|
\centering
|
||||||
Download relevant data
|
\begin{enumerate}
|
||||||
|
\item{Create a new project folder, make sure R does not auto-save.}
|
||||||
\normalsize
|
\item{Open R from your programs and run getwd()}
|
||||||
It is good practice to store the data is a separate folder in the location of the Rscript. Such as a "Data" folder.
|
\item{Close R and open the R project, run getwd() again}
|
||||||
|
\end{enumerate}
|
||||||
\end{frame}
|
\end{frame}
|
||||||
|
|
||||||
\begin{frame}{File Management}
|
\begin{frame}{File Management}
|
||||||
@ -134,7 +143,7 @@
|
|||||||
\only<1-14>{
|
\only<1-14>{
|
||||||
\begin{columns}[T,onlytextwidth]
|
\begin{columns}[T,onlytextwidth]
|
||||||
\column{0.5\textwidth}
|
\column{0.5\textwidth}
|
||||||
\textbf{Usefull Functions}
|
\textbf{Useful Functions}
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\onslide<2->{\item \texttt{create.dir()}}
|
\onslide<2->{\item \texttt{create.dir()}}
|
||||||
\onslide<3->{\item\texttt{dir.exists()}}
|
\onslide<3->{\item\texttt{dir.exists()}}
|
||||||
@ -155,17 +164,112 @@
|
|||||||
}
|
}
|
||||||
\end{columns}
|
\end{columns}
|
||||||
\vspace{1.5em}
|
\vspace{1.5em}
|
||||||
\onslide<14>{Many other options in R review \texttt{help()} and the internet.}
|
\onslide<14->{For downloading raw files \texttt{download.file()} is great!}
|
||||||
|
\onslide<15>{Many other options in R review \texttt{help()} and the internet.}
|
||||||
}
|
}
|
||||||
\only<15>{\includegraphics[width=\textwidth]{DIR.png}}
|
\only<15>{\includegraphics[width=\textwidth]{DIR.png}}
|
||||||
\only<16>{\includegraphics[width=\textwidth]{DIR2.png}}
|
\only<16>{\includegraphics[width=\textwidth]{DIR2.png}}
|
||||||
\end{frame}
|
\end{frame}
|
||||||
|
%%%%%%%%%%%
|
||||||
|
\begin{frame}{Using Directories for Organization}
|
||||||
|
Keep your main working space clean, and separate what you can.
|
||||||
|
\vspace{1.5em}
|
||||||
|
\begin{enumerate}
|
||||||
|
\onslide<2->{\item Make diffrent files for each main data step. Downloading, cleaning, various analyses.}
|
||||||
|
\onslide<3->{\item You can save the data downloading portion as a file called \texttt{1\_EIA\_Data\_Proc.R}}
|
||||||
|
\onslide<4->{\item In the main file you could load the code with \texttt{source(``1\_EIA\_Data\_Proc.R'')}}
|
||||||
|
\onslide<5->{R will run the code in the current directory even if the script is saved elsewhere.}
|
||||||
|
\end{enumerate}
|
||||||
|
\end{frame}
|
||||||
|
%%%%%%%%%%%%%%%%%%%
|
||||||
|
\begin{frame}{Class Exercise}
|
||||||
|
\centering
|
||||||
|
\begin{enumerate}
|
||||||
|
\item{Using the R project create a directory for data, with a sub-directory for raw data and cleaned data.}
|
||||||
|
\item{Using R create directory for scripts}
|
||||||
|
\item{In R verify the directories were created using code.}
|
||||||
|
\item{All code should use relative paths. Make sure it can be run multiple times without errors.}
|
||||||
|
\end{enumerate}
|
||||||
|
\end{frame}
|
||||||
|
%%%%%%%%%%%%
|
||||||
\begin{frame}{Naming Files}
|
\begin{frame}{Naming Files}
|
||||||
% \href{https://r4ds.hadley.nz/workflow-scripts.html#fn2'}{For an example DONT LEAVE AS A LINK FOR MY REFRENCE}
|
% \href{https://r4ds.hadley.nz/workflow-scripts.html#fn2'}{For an example DONT LEAVE AS A LINK FOR MY REFRENCE}
|
||||||
It might be tempting to name your files code.R or myscript.R, but you should think a bit harder before choosing a name for your file. Three important principles for file naming are as follows\:
|
File names should be \textbf{human readable}:
|
||||||
File names should be machine readable\: avoid spaces, symbols, and special characters. Don’t rely on case sensitivity to distinguish files.
|
\begin{enumerate}
|
||||||
File names should be human readable\: use file names to describe what’s in the file.
|
\onslide<2->{\item Use file names to describe what’s in the file.}
|
||||||
File names should play well with default ordering\: start file names with numbers so that alphabetical sorting puts them in the order they get used.
|
\onslide<3->{\item Don’t rely on case sensitivity to distinguish files.}
|
||||||
|
\onslide<4->{\item File names should play well with default ordering}
|
||||||
|
\onslide<5->{\item Start file names with numbers so that alphabetical sorting puts them in the order they get used.}
|
||||||
|
\onslide<6->{\item Include dates in the name of output files.}
|
||||||
|
\end{enumerate}
|
||||||
|
\onslide<7->{File names should be \textbf{computer readable} avoid:}
|
||||||
|
\begin{enumerate}
|
||||||
|
\onslide<8->{\item spaces}
|
||||||
|
\onslide<9->{\item symbols }
|
||||||
|
\onslide<10->{\item special characters}
|
||||||
|
\end{enumerate}
|
||||||
|
|
||||||
\end{frame}
|
\end{frame}
|
||||||
|
\begin{frame}{Class Exercise}
|
||||||
|
\only<1-7>{
|
||||||
|
\only<1>{List as many issues you can find in these file names? Explain why they might cause you problems in a long term project.}
|
||||||
|
\only<2->{\includegraphics[width=\textwidth]{Bad_File_Names.png}}
|
||||||
|
\begin{itemize}
|
||||||
|
\onslide<3->{\item File names contain spaces}
|
||||||
|
\onslide<4->{\item Same name but different capitalization}
|
||||||
|
\onslide<5->{\item Names don’t describe their contents ``temp''}
|
||||||
|
\onslide<6->{\item Outputs should be in seperate directory from inputs}
|
||||||
|
\onslide<7->{\item Outputs files should have dates in the name}
|
||||||
|
\end{itemize}
|
||||||
|
}
|
||||||
|
\only<8>{\includegraphics[width=\textwidth]{Good_File_Names.png}}
|
||||||
|
\end{frame}
|
||||||
|
%%%%%%%%%%%%%%%
|
||||||
|
\begin{frame}{Commenting Code}
|
||||||
|
\only<1-3>{
|
||||||
|
\onslide<1-3>{Comments are lines of code which do not run. These are \textbf{\emph{very}} important. }
|
||||||
|
\newline
|
||||||
|
\onslide<2-3>{ In R these are created by starting a line with ``\#''}
|
||||||
|
\onslide<3>{
|
||||||
|
\begin{enumerate}
|
||||||
|
\item{You should add a comment to any line of code which is not highly simple.}
|
||||||
|
\item{Keep a clean comment style}
|
||||||
|
\item{Comment above a section of code. Indent the following code}
|
||||||
|
\item{Always include the why of your code. The what and how of your code is less critical}
|
||||||
|
\item{Think about what you would need to know if you forgot everything about the project}
|
||||||
|
\end{enumerate}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
\only<4>{
|
||||||
|
\includegraphics[width=0.7\textwidth]{What_Comments}
|
||||||
|
\includegraphics[width=\textwidth]{Why_Comments}
|
||||||
|
|
||||||
|
}
|
||||||
|
\end{frame}
|
||||||
|
\begin{frame}{Section Comments}
|
||||||
|
Section comments visually separate chunks of code. It is an excellent practice to distinguish types of analysis, or groups of code.
|
||||||
|
\newline
|
||||||
|
\vspace{1em}
|
||||||
|
\onslide<2->{\includegraphics[width=\textwidth]{Section_comments.png}}
|
||||||
|
\newline
|
||||||
|
\vspace{1em}
|
||||||
|
\onslide<3->{RStudio provides a keyboard shortcut to create these headers \texttt{(Cmd/Ctrl + Shift + R)}}
|
||||||
|
\end{frame}
|
||||||
|
%%%%%%%%
|
||||||
|
\begin{frame}{Class Exercise}
|
||||||
|
\begin{enumerate}
|
||||||
|
\item Identify a data set you are interested in on FRED
|
||||||
|
\item Load the data using a URL saved as a character variable
|
||||||
|
\item Create a raw data subdirectory and save the raw data into it
|
||||||
|
\item Clean the data, at least update the column names
|
||||||
|
\item Save the cleaned data as csv and an RDS file in a cleaned data subdirectory with proper names
|
||||||
|
\item Include a comment on the first line of the file explain what the code is used for and why.
|
||||||
|
\item Comment any other code as needed
|
||||||
|
\item Save the Rscript in a directory for scripts. Pay attention to the name
|
||||||
|
\item In the main directory create a Rscript and in that script start by loading the data loading script
|
||||||
|
\item After this add a section of code to find the mean, and summary of the data
|
||||||
|
\end{enumerate}
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
|
|
||||||
\end{document}
|
\end{document}
|
||||||
|
|||||||
BIN
Class4/Slides/Good_File_Names.png
Normal file
|
After Width: | Height: | Size: 54 KiB |
BIN
Class4/Slides/Section_comments.png
Normal file
|
After Width: | Height: | Size: 11 KiB |
BIN
Class4/Slides/What_Comments.png
Normal file
|
After Width: | Height: | Size: 28 KiB |
BIN
Class4/Slides/Why_Comments.png
Normal file
|
After Width: | Height: | Size: 56 KiB |