207 lines
5.8 KiB
TeX
207 lines
5.8 KiB
TeX
\documentclass{beamer}
|
||
\usepackage{graphicx}
|
||
\usepackage{multicol}
|
||
\usepackage{hyperref}
|
||
\usepackage{verbatim}
|
||
\usepackage{lipsum} % for placeholder text
|
||
\graphicspath{{pdf_images/}}
|
||
|
||
|
||
\title{ECON 4530/5530 \\ Computational Economics}
|
||
\subtitle{Data transformation and Loops}
|
||
\author{Alex Gebben}
|
||
|
||
\begin{document}
|
||
|
||
% Title Slide
|
||
\begin{frame}
|
||
\titlepage
|
||
\end{frame}
|
||
%%%%%%%%%%%%%%%%%%
|
||
\begin{frame}{Loops}
|
||
\only<1->{There are three types of loops in R }
|
||
\begin{enumerate}
|
||
\onslide<2->{ \item{A \emph{for} loop runs for each value in a list}}
|
||
\begin{itemize}
|
||
\onslide<3->{\item \texttt{for(i in 1:10)\{Code to run\}}}
|
||
\end{itemize}
|
||
\onslide<4->{ \item{A \emph{While} loop runs if a condtion is metif a condtion is met.\emph{Can lead to infinite loops}}}
|
||
\begin{itemize}
|
||
\onslide<5->{\item \texttt{while(a==b)\{Code to run\}}}
|
||
\end{itemize}
|
||
\onslide<4->{apply functions }
|
||
\begin{itemize}
|
||
\onslide<5->{\item \texttt{while(a==b)\{Code to run\}}}
|
||
\end{itemize}
|
||
\end{enumerate}
|
||
\end{frame}
|
||
|
||
\begin{frame}{Loop examples}
|
||
\only<1>{\includegraphics[width=\textwidth]{For_Loops.png}}
|
||
\only<2>{\includegraphics[width=\textwidth]{While_Loops.png}}
|
||
\only<3>{\includegraphics[width=0.5\textwidth]{While_Loops_Inf.png}}
|
||
\only<4>{\includegraphics[width=\textwidth]{apply_functions.png}}
|
||
\only<5>{\includegraphics[width=\textwidth]{sapply_loop.png}}
|
||
\end{frame}
|
||
\begin{frame}{Introduction}
|
||
\begin{itemize}
|
||
\item Data rarely comes in the form you need.
|
||
\item Transformation helps prepare data for analysis and visualization.
|
||
\item We'll use the \texttt{dplyr} package from the tidyverse.
|
||
\end{itemize}
|
||
\end{frame}
|
||
|
||
% Slide 2
|
||
\begin{frame}{Core dplyr Verbs}
|
||
\begin{itemize}
|
||
\item \texttt{filter()} – select rows based on conditions
|
||
\item \texttt{arrange()} – reorder rows
|
||
\item \texttt{select()} – choose columns
|
||
\item \texttt{mutate()} – add new columns
|
||
\item \texttt{summarize()} – reduce multiple values to one
|
||
\item \texttt{group\_by()} – group data for summary
|
||
\end{itemize}
|
||
\end{frame}
|
||
|
||
% Slide 3
|
||
\begin{frame}{Using the Pipe Operator}
|
||
\begin{itemize}
|
||
\item Pipe: \texttt{|>} passes output to next function
|
||
\item Example:
|
||
%\begin{verbatim}
|
||
%flights |>
|
||
% filter(dest == "IAH") |>
|
||
% group_by(year, month, day) |>
|
||
% summarize(arr_delay = mean(arr_delay, na.rm = TRUE))
|
||
% \end{verbatim}
|
||
\end{itemize}
|
||
\end{frame}
|
||
|
||
% Slide 4
|
||
\begin{frame}{Working with Rows}
|
||
\begin{itemize}
|
||
\item \texttt{filter()} – keep rows meeting conditions
|
||
\item \texttt{arrange()} – sort rows
|
||
\item \texttt{distinct()} – remove duplicates
|
||
\end{itemize}
|
||
\end{frame}
|
||
|
||
% Slide 5
|
||
\begin{frame}{Working with Columns}
|
||
\begin{itemize}
|
||
\item \texttt{select()} – choose columns
|
||
\item \texttt{rename()} – rename columns
|
||
\item \texttt{mutate()} – create new columns
|
||
\end{itemize}
|
||
\end{frame}
|
||
|
||
% Slide 6
|
||
\begin{frame}{Grouped Operations}
|
||
\begin{itemize}
|
||
\item \texttt{group\_by()} – group data
|
||
\item \texttt{summarize()} – compute summaries per group
|
||
\item Useful for aggregation and comparisons
|
||
\end{itemize}
|
||
\end{frame}
|
||
|
||
% Slide 8
|
||
\begin{frame}{Class Example}
|
||
|
||
\textbf{Example:} dataset to apply dplyr: \texttt{airquality}
|
||
\begin{itemize}
|
||
\item Convert to a tibble
|
||
\item Remove any entries with NA values
|
||
\item Remove wind speed outliers
|
||
\item Calculate the average temperature in each month
|
||
\item Find the number of observations in each month
|
||
\item Find the max, min and standard deviation of Wind speed
|
||
\item Use pipes to complete all tasks in one line
|
||
\end{itemize}
|
||
\end{frame}
|
||
\begin{frame}{Why Join Data?}
|
||
\begin{itemize}
|
||
\item Real-world data often comes in multiple tables.
|
||
\item Joins combine related data based on common keys.
|
||
\item \texttt{dplyr} provides intuitive functions for joining.
|
||
\end{itemize}
|
||
\end{frame}
|
||
|
||
% Slide 2
|
||
\begin{frame}{Types of Joins}
|
||
\begin{itemize}
|
||
\item \texttt{left\_join()} – keep all rows from left table
|
||
\item \texttt{right\_join()} – keep all rows from right table
|
||
\item \texttt{inner\_join()} – keep only matching rows
|
||
\item \texttt{full\_join()} – keep all rows from both tables
|
||
\end{itemize}
|
||
\end{frame}
|
||
|
||
% Slide 3
|
||
\begin{frame}{left\_join()}
|
||
|
||
\texttt{left\_join(df1, df2, by = "id")}
|
||
\begin{itemize}
|
||
\item Keeps all rows from the left table.
|
||
\item Adds matching rows from the right table.
|
||
\item Missing matches are filled with \texttt{NA}.
|
||
\end{itemize}
|
||
\end{frame}
|
||
|
||
% Slide 4
|
||
\begin{frame}{right\_join()}
|
||
|
||
\texttt{right\_join(df1, df2, by = "id")}
|
||
\begin{itemize}
|
||
\item Keeps all rows from the right table.
|
||
\item Adds matching rows from the left table.
|
||
\end{itemize}
|
||
\end{frame}
|
||
|
||
% Slide 5
|
||
\begin{frame}{inner\_join()}
|
||
\texttt{inner\_join(df1, df2, by = "id")}
|
||
\begin{itemize}
|
||
\item Keeps only rows with matching keys in both tables.
|
||
\item Most commonly used for filtering to shared data.
|
||
\end{itemize}
|
||
\end{frame}
|
||
|
||
% Slide 6
|
||
\begin{frame}{full\_join()}
|
||
\texttt{full\_join(df1, df2, by = "id")}
|
||
\begin{itemize}
|
||
\item Keeps all rows from both tables.
|
||
\item Missing matches are filled with \texttt{NA}.
|
||
\end{itemize}
|
||
\end{frame}
|
||
|
||
% Slide 7
|
||
\begin{frame}{Common Issues}
|
||
\begin{itemize}
|
||
\item Mismatched column names
|
||
\item Duplicate keys – can lead to unexpected row duplication
|
||
\item Data types must match, both keys should be character or numeric
|
||
\item Missing value joins will introduce \texttt{NA}s
|
||
\end{itemize}
|
||
\end{frame}
|
||
|
||
% Slide 8
|
||
\begin{frame}{Best Practices}
|
||
\begin{itemize}
|
||
\item Inspect keys before joining: \texttt{unique()}
|
||
\item Use \texttt{anti\_join()} to find unmatched rows
|
||
\item Validate results with \texttt{summary()} and \texttt{count()}
|
||
\end{itemize}
|
||
\end{frame}
|
||
|
||
% Slide 9
|
||
\begin{frame}{Class Exercise}
|
||
\begin{itemize}
|
||
\item Joins are essential for combining data.
|
||
\item Choose the right join based on your goal.
|
||
\item Always check for common issues before and after joining.
|
||
\end{itemize}
|
||
\end{frame}
|
||
|
||
\end{document}
|