Working on class slides
8
Class8/Class_Examples/apply.r
Normal file
@ -0,0 +1,8 @@
|
|||||||
|
DAT <- 1:8
|
||||||
|
?apply
|
||||||
|
apply(DAT,mean)
|
||||||
|
?mapply
|
||||||
|
x <- as.data.frame(cbind(x1 = c(3,NA,12,7), x2 = c(4:1, 2:5)) )
|
||||||
|
dimnames(x)[[1]] <- letters[1:8]
|
||||||
|
apply(x, 2, mean, trim = .2,na.rm=TRUE)
|
||||||
|
?mean
|
||||||
BIN
Class8/Slides/1_join.png
Normal file
|
After Width: | Height: | Size: 15 KiB |
BIN
Class8/Slides/2_join.png
Normal file
|
After Width: | Height: | Size: 17 KiB |
BIN
Class8/Slides/3_join.png
Normal file
|
After Width: | Height: | Size: 42 KiB |
BIN
Class8/Slides/Filter_Example.png
Normal file
|
After Width: | Height: | Size: 51 KiB |
BIN
Class8/Slides/For_Loops.png
Normal file
|
After Width: | Height: | Size: 9.6 KiB |
BIN
Class8/Slides/Function.png
Normal file
|
After Width: | Height: | Size: 51 KiB |
BIN
Class8/Slides/If.png
Normal file
|
After Width: | Height: | Size: 43 KiB |
171
Class8/Slides/Joins.tex
Normal file
@ -0,0 +1,171 @@
|
|||||||
|
\documentclass{beamer}
|
||||||
|
\usepackage{graphicx}
|
||||||
|
\usepackage{multicol}
|
||||||
|
\usepackage{hyperref}
|
||||||
|
\usepackage{verbatim}
|
||||||
|
\usepackage{lipsum} % for placeholder text
|
||||||
|
\graphicspath{{pdf_images/}}
|
||||||
|
|
||||||
|
|
||||||
|
\title{ECON 4530/5530 \\ Computational Economics}
|
||||||
|
\subtitle{Joining data and Data Transformation}
|
||||||
|
\author{Alex Gebben}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
% Title Slide
|
||||||
|
\begin{frame}
|
||||||
|
\titlepage
|
||||||
|
\end{frame}
|
||||||
|
%%%%%%%%%%%%%%%%%%
|
||||||
|
\begin{frame}{Joining Data}
|
||||||
|
\begin{itemize}
|
||||||
|
\item Real-world data often comes in multiple tables.
|
||||||
|
\item Joins combine related data based on common keys.
|
||||||
|
\item \texttt{dplyr} provides intuitive functions for joining.
|
||||||
|
\end{itemize}
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
|
% Slide 2
|
||||||
|
\begin{frame}{Types of Joins}
|
||||||
|
\begin{itemize}
|
||||||
|
\item \texttt{left\_join()} – keep all rows from left table
|
||||||
|
\item \texttt{right\_join()} – keep all rows from right table
|
||||||
|
\item \texttt{inner\_join()} – keep only matching rows
|
||||||
|
\item \texttt{full\_join()} – keep all rows from both tables
|
||||||
|
\end{itemize}
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
|
% Slide 3
|
||||||
|
\begin{frame}{left\_join()}
|
||||||
|
\texttt{left\_join(df1, df2, by = "id")}
|
||||||
|
\newline
|
||||||
|
OR
|
||||||
|
\newline
|
||||||
|
\onslide<2->{\texttt{df1 \%>\% left\_join(df2)}}
|
||||||
|
\begin{itemize}
|
||||||
|
\onslide<3->{ \item Keeps all rows from the left table.}
|
||||||
|
\onslide<4->{ \item Adds matching rows from the right table.}
|
||||||
|
\onslide<5->{ \item Missing matches are filled with \texttt{NA}.}
|
||||||
|
\end{itemize}
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
|
% Slide 4
|
||||||
|
\begin{frame}{right\_join()}
|
||||||
|
|
||||||
|
\texttt{right\_join(df1, df2, by = "id")}
|
||||||
|
\newline
|
||||||
|
OR
|
||||||
|
\newline
|
||||||
|
\onslide<2->{\texttt{df2 \%>\% left\_join(df1)}}
|
||||||
|
\onslide<3->{
|
||||||
|
\begin{itemize}
|
||||||
|
\item Keeps all rows from the right table.
|
||||||
|
\item Adds matching rows from the left table.
|
||||||
|
\end{itemize}
|
||||||
|
}
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
|
% Slide 5
|
||||||
|
\begin{frame}{inner\_join()}
|
||||||
|
\texttt{inner\_join(df1, df2, by = "id")}
|
||||||
|
\newline
|
||||||
|
OR
|
||||||
|
\newline
|
||||||
|
\texttt{df1 \%>\% inner\_join(df2, by = "id")}
|
||||||
|
\begin{itemize}
|
||||||
|
\item Keeps only rows with matching keys in both tables.
|
||||||
|
\item Most commonly used for filtering to shared data.
|
||||||
|
\end{itemize}
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
|
% Slide 6
|
||||||
|
\begin{frame}{full\_join()}
|
||||||
|
\texttt{full\_join(df1, df2, by = "id")}
|
||||||
|
\newline
|
||||||
|
OR
|
||||||
|
\newline
|
||||||
|
\texttt{df1 \%>\% full\_join(df2, by = "id")}
|
||||||
|
|
||||||
|
\begin{itemize}
|
||||||
|
\item Keeps all rows from both tables.
|
||||||
|
\item Missing matches are filled with \texttt{NA}.
|
||||||
|
\end{itemize}
|
||||||
|
\end{frame}
|
||||||
|
\begin{frame}{Common Issues}
|
||||||
|
\begin{itemize}
|
||||||
|
\item Mismatched column names
|
||||||
|
\item Duplicate keys – can lead to unexpected row duplication
|
||||||
|
\item Data types must match, both keys should be character or numeric
|
||||||
|
\item Missing value joins will introduce \texttt{NA}s
|
||||||
|
\end{itemize}
|
||||||
|
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
|
\begin{frame}[plain]
|
||||||
|
\includegraphics[width=\textwidth]{venn.png}
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
|
\begin{frame}[plain]
|
||||||
|
\begin{columns}
|
||||||
|
\begin{column}{0.5\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{1_join}
|
||||||
|
\end{column}
|
||||||
|
\begin{column}{0.5\textwidth}
|
||||||
|
\includegraphics[width=\textwidth]{2_join}
|
||||||
|
\end{column}
|
||||||
|
\end{columns}
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
|
\begin{frame}[plain]
|
||||||
|
\includegraphics[width=\textwidth]{3_join}
|
||||||
|
\end{frame}
|
||||||
|
\begin{frame}[plain]
|
||||||
|
\includegraphics[width=\textwidth]{full.png}
|
||||||
|
\end{frame}
|
||||||
|
\begin{frame}[plain]
|
||||||
|
\includegraphics[width=\textwidth]{left.png}
|
||||||
|
\end{frame}
|
||||||
|
\begin{frame}[plain]
|
||||||
|
\includegraphics[width=\textwidth]{right.png}
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
% Slide 7
|
||||||
|
\begin{frame}[plain]
|
||||||
|
\includegraphics[width=\textwidth]{match-types.png}
|
||||||
|
\end{frame}
|
||||||
|
\begin{frame}{Checking with Anti-Joins}
|
||||||
|
\includegraphics[width=\textwidth]{anti.png}
|
||||||
|
\end{frame}
|
||||||
|
\begin{frame}{Checking with Anti-Joins}
|
||||||
|
\includegraphics[width=\textwidth]{anti.png}
|
||||||
|
\end{frame}
|
||||||
|
\begin{frame}{Filtering with semi-joins}
|
||||||
|
\includegraphics[width=\textwidth]{semi.png}
|
||||||
|
\end{frame}
|
||||||
|
\begin{frame}{Other non-equa joins}
|
||||||
|
\includegraphics[width=\textwidth]{gte}
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
|
% Slide 8
|
||||||
|
\begin{frame}{Best Practices}
|
||||||
|
\begin{itemize}
|
||||||
|
\item Inspect keys before joining: \texttt{unique()}
|
||||||
|
\item Use \texttt{anti\_join()} to find unmatched rows
|
||||||
|
\item Validate results with \texttt{summary()} and \texttt{count()}
|
||||||
|
\end{itemize}
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
|
% Slide 9
|
||||||
|
\begin{frame}{Class Exercise}
|
||||||
|
\begin{itemize}
|
||||||
|
\item Joins are essential for combining data.
|
||||||
|
\item Choose the right join based on your goal.
|
||||||
|
\item Always check for common issues before and after joining.
|
||||||
|
\end{itemize}
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
|
\end{document}
|
||||||
BIN
Class8/Slides/Pipe_Example.png
Normal file
|
After Width: | Height: | Size: 73 KiB |
BIN
Class8/Slides/Pivot_Data.png
Normal file
|
After Width: | Height: | Size: 26 KiB |
BIN
Class8/Slides/Section_comments.png
Normal file
|
After Width: | Height: | Size: 11 KiB |
BIN
Class8/Slides/What_Comments.png
Normal file
|
After Width: | Height: | Size: 28 KiB |
BIN
Class8/Slides/While_Loops.png
Normal file
|
After Width: | Height: | Size: 12 KiB |
BIN
Class8/Slides/While_Loops_Inf.png
Normal file
|
After Width: | Height: | Size: 22 KiB |
BIN
Class8/Slides/Why_Comments.png
Normal file
|
After Width: | Height: | Size: 56 KiB |
BIN
Class8/Slides/anti.png
Normal file
|
After Width: | Height: | Size: 37 KiB |
BIN
Class8/Slides/apply_functions.png
Normal file
|
After Width: | Height: | Size: 14 KiB |
BIN
Class8/Slides/full.png
Normal file
|
After Width: | Height: | Size: 55 KiB |
BIN
Class8/Slides/group_and Summarize.png
Normal file
|
After Width: | Height: | Size: 26 KiB |
BIN
Class8/Slides/gte.png
Normal file
|
After Width: | Height: | Size: 60 KiB |
BIN
Class8/Slides/left.png
Normal file
|
After Width: | Height: | Size: 49 KiB |
BIN
Class8/Slides/match-types.png
Normal file
|
After Width: | Height: | Size: 39 KiB |
BIN
Class8/Slides/pivot_longer.png
Normal file
|
After Width: | Height: | Size: 65 KiB |
BIN
Class8/Slides/right.png
Normal file
|
After Width: | Height: | Size: 53 KiB |
BIN
Class8/Slides/sapply_loop.png
Normal file
|
After Width: | Height: | Size: 15 KiB |
BIN
Class8/Slides/semi.png
Normal file
|
After Width: | Height: | Size: 39 KiB |
BIN
Class8/Slides/venn.png
Normal file
|
After Width: | Height: | Size: 59 KiB |