\documentclass{beamer} \usepackage{graphicx} \usepackage{multicol} \usepackage{hyperref} \usepackage{verbatim} \usepackage{lipsum} % for placeholder text \graphicspath{{pdf_images/}} \title{ECON 4530/5530 \\ Computational Economics} \subtitle{Joining data and Data Transformation} \author{Alex Gebben} \begin{document} % Title Slide \begin{frame} \titlepage \end{frame} %%%%%%%%%%%%%%%%%% \begin{frame}{Joining Data} \begin{itemize} \item Real-world data often comes in multiple tables. \item Joins combine related data based on common keys. \item \texttt{dplyr} provides intuitive functions for joining. \end{itemize} \end{frame} % Slide 2 \begin{frame}{Types of Joins} \begin{itemize} \item \texttt{left\_join()} – keep all rows from left table \item \texttt{right\_join()} – keep all rows from right table \item \texttt{inner\_join()} – keep only matching rows \item \texttt{full\_join()} – keep all rows from both tables \end{itemize} \end{frame} % Slide 3 \begin{frame}{left\_join()} \texttt{left\_join(df1, df2, by = "id")} \newline OR \newline \onslide<2->{\texttt{df1 \%>\% left\_join(df2)}} \begin{itemize} \onslide<3->{ \item Keeps all rows from the left table.} \onslide<4->{ \item Adds matching rows from the right table.} \onslide<5->{ \item Missing matches are filled with \texttt{NA}.} \end{itemize} \end{frame} % Slide 4 \begin{frame}{right\_join()} \texttt{right\_join(df1, df2, by = "id")} \newline OR \newline \onslide<2->{\texttt{df2 \%>\% left\_join(df1)}} \onslide<3->{ \begin{itemize} \item Keeps all rows from the right table. \item Adds matching rows from the left table. \end{itemize} } \end{frame} % Slide 5 \begin{frame}{inner\_join()} \texttt{inner\_join(df1, df2, by = "id")} \newline OR \newline \texttt{df1 \%>\% inner\_join(df2, by = "id")} \begin{itemize} \item Keeps only rows with matching keys in both tables. \item Most commonly used for filtering to shared data. \end{itemize} \end{frame} % Slide 6 \begin{frame}{full\_join()} \texttt{full\_join(df1, df2, by = "id")} \newline OR \newline \texttt{df1 \%>\% full\_join(df2, by = "id")} \begin{itemize} \item Keeps all rows from both tables. \item Missing matches are filled with \texttt{NA}. \end{itemize} \end{frame} \begin{frame}{Common Issues} \begin{itemize} \item Mismatched column names \item Duplicate keys – can lead to unexpected row duplication \item Data types must match, both keys should be character or numeric \item Missing value joins will introduce \texttt{NA}s \end{itemize} \end{frame} \begin{frame}[plain] \includegraphics[width=\textwidth]{venn.png} \end{frame} \begin{frame}[plain] \begin{columns} \begin{column}{0.5\textwidth} \includegraphics[width=\textwidth]{1_join} \end{column} \begin{column}{0.5\textwidth} \includegraphics[width=\textwidth]{2_join} \end{column} \end{columns} \end{frame} \begin{frame}[plain] \includegraphics[width=\textwidth]{3_join} \end{frame} \begin{frame}[plain] \includegraphics[width=\textwidth]{full.png} \end{frame} \begin{frame}[plain] \includegraphics[width=\textwidth]{left.png} \end{frame} \begin{frame}[plain] \includegraphics[width=\textwidth]{right.png} \end{frame} % Slide 7 \begin{frame}[plain] \includegraphics[width=\textwidth]{match-types.png} \end{frame} \begin{frame}{Checking with Anti-Joins} \includegraphics[width=\textwidth]{anti.png} \end{frame} \begin{frame}{Checking with Anti-Joins} \includegraphics[width=\textwidth]{anti.png} \end{frame} \begin{frame}{Filtering with semi-joins} \includegraphics[width=\textwidth]{semi.png} \end{frame} \begin{frame}{Other non-equa joins} \includegraphics[width=\textwidth]{gte} \end{frame} % Slide 8 \begin{frame}{Best Practices} \begin{itemize} \item Inspect keys before joining: \texttt{unique()} \item Use \texttt{anti\_join()} to find unmatched rows \item Validate results with \texttt{summary()} and \texttt{count()} \end{itemize} \end{frame} % Slide 9 \begin{frame}{Class Exercise} \begin{itemize} \item Joins are essential for combining data. \item Choose the right join based on your goal. \item Always check for common issues before and after joining. \end{itemize} \end{frame} \begin{frame}{Pivot} \centering \includegraphics[width=0.6\textwidth]{Pivot_Data.png} \end{frame} \begin{frame}{Pivot longer} \centering \includegraphics[width=\textwidth]{pivot_longer.png} \end{frame} \begin{frame}{Pivot longer} \centering \includegraphics[width=\textwidth]{column-names.png} \end{frame} \begin{frame}{Pivot longer} \centering \includegraphics[width=\textwidth]{cell-values.png} \end{frame} \end{document}