diff --git a/Class8/Class_Examples/apply.r b/Class8/Class_Examples/apply.r new file mode 100644 index 0000000..99d2e7c --- /dev/null +++ b/Class8/Class_Examples/apply.r @@ -0,0 +1,8 @@ +DAT <- 1:8 +?apply +apply(DAT,mean) +?mapply +x <- as.data.frame(cbind(x1 = c(3,NA,12,7), x2 = c(4:1, 2:5)) ) +dimnames(x)[[1]] <- letters[1:8] +apply(x, 2, mean, trim = .2,na.rm=TRUE) +?mean \ No newline at end of file diff --git a/Class8/Slides/1_join.png b/Class8/Slides/1_join.png new file mode 100644 index 0000000..0033216 Binary files /dev/null and b/Class8/Slides/1_join.png differ diff --git a/Class8/Slides/2_join.png b/Class8/Slides/2_join.png new file mode 100644 index 0000000..cb0d82e Binary files /dev/null and b/Class8/Slides/2_join.png differ diff --git a/Class8/Slides/3_join.png b/Class8/Slides/3_join.png new file mode 100644 index 0000000..7c6f9a8 Binary files /dev/null and b/Class8/Slides/3_join.png differ diff --git a/Class8/Slides/Filter_Example.png b/Class8/Slides/Filter_Example.png new file mode 100644 index 0000000..4d5040e Binary files /dev/null and b/Class8/Slides/Filter_Example.png differ diff --git a/Class8/Slides/For_Loops.png b/Class8/Slides/For_Loops.png new file mode 100644 index 0000000..5acb332 Binary files /dev/null and b/Class8/Slides/For_Loops.png differ diff --git a/Class8/Slides/Function.png b/Class8/Slides/Function.png new file mode 100644 index 0000000..4baf7df Binary files /dev/null and b/Class8/Slides/Function.png differ diff --git a/Class8/Slides/If.png b/Class8/Slides/If.png new file mode 100644 index 0000000..c545516 Binary files /dev/null and b/Class8/Slides/If.png differ diff --git a/Class8/Slides/Joins.tex b/Class8/Slides/Joins.tex new file mode 100644 index 0000000..bdad5c2 --- /dev/null +++ b/Class8/Slides/Joins.tex @@ -0,0 +1,171 @@ +\documentclass{beamer} +\usepackage{graphicx} +\usepackage{multicol} +\usepackage{hyperref} +\usepackage{verbatim} +\usepackage{lipsum} % for placeholder text +\graphicspath{{pdf_images/}} + + +\title{ECON 4530/5530 \\ Computational Economics} +\subtitle{Joining data and Data Transformation} +\author{Alex Gebben} + +\begin{document} + +% Title Slide +\begin{frame} + \titlepage +\end{frame} +%%%%%%%%%%%%%%%%%% +\begin{frame}{Joining Data} +\begin{itemize} + \item Real-world data often comes in multiple tables. + \item Joins combine related data based on common keys. + \item \texttt{dplyr} provides intuitive functions for joining. +\end{itemize} +\end{frame} + +% Slide 2 +\begin{frame}{Types of Joins} +\begin{itemize} + \item \texttt{left\_join()} – keep all rows from left table + \item \texttt{right\_join()} – keep all rows from right table + \item \texttt{inner\_join()} – keep only matching rows + \item \texttt{full\_join()} – keep all rows from both tables +\end{itemize} +\end{frame} + +% Slide 3 +\begin{frame}{left\_join()} +\texttt{left\_join(df1, df2, by = "id")} +\newline +OR +\newline +\onslide<2->{\texttt{df1 \%>\% left\_join(df2)}} +\begin{itemize} + \onslide<3->{ \item Keeps all rows from the left table.} + \onslide<4->{ \item Adds matching rows from the right table.} + \onslide<5->{ \item Missing matches are filled with \texttt{NA}.} +\end{itemize} +\end{frame} + +% Slide 4 +\begin{frame}{right\_join()} + +\texttt{right\_join(df1, df2, by = "id")} +\newline +OR +\newline +\onslide<2->{\texttt{df2 \%>\% left\_join(df1)}} +\onslide<3->{ +\begin{itemize} + \item Keeps all rows from the right table. + \item Adds matching rows from the left table. +\end{itemize} +} +\end{frame} + +% Slide 5 +\begin{frame}{inner\_join()} +\texttt{inner\_join(df1, df2, by = "id")} +\newline +OR +\newline +\texttt{df1 \%>\% inner\_join(df2, by = "id")} +\begin{itemize} + \item Keeps only rows with matching keys in both tables. + \item Most commonly used for filtering to shared data. +\end{itemize} +\end{frame} + +% Slide 6 +\begin{frame}{full\_join()} +\texttt{full\_join(df1, df2, by = "id")} +\newline +OR +\newline +\texttt{df1 \%>\% full\_join(df2, by = "id")} + +\begin{itemize} + \item Keeps all rows from both tables. + \item Missing matches are filled with \texttt{NA}. +\end{itemize} +\end{frame} +\begin{frame}{Common Issues} +\begin{itemize} + \item Mismatched column names + \item Duplicate keys – can lead to unexpected row duplication + \item Data types must match, both keys should be character or numeric + \item Missing value joins will introduce \texttt{NA}s +\end{itemize} + +\end{frame} + +\begin{frame}[plain] + \includegraphics[width=\textwidth]{venn.png} +\end{frame} + +\begin{frame}[plain] + \begin{columns} + \begin{column}{0.5\textwidth} + \includegraphics[width=\textwidth]{1_join} + \end{column} + \begin{column}{0.5\textwidth} + \includegraphics[width=\textwidth]{2_join} + \end{column} + \end{columns} +\end{frame} + +\begin{frame}[plain] + \includegraphics[width=\textwidth]{3_join} +\end{frame} +\begin{frame}[plain] + \includegraphics[width=\textwidth]{full.png} +\end{frame} +\begin{frame}[plain] + \includegraphics[width=\textwidth]{left.png} +\end{frame} +\begin{frame}[plain] + \includegraphics[width=\textwidth]{right.png} +\end{frame} + + + + +% Slide 7 +\begin{frame}[plain] + \includegraphics[width=\textwidth]{match-types.png} +\end{frame} +\begin{frame}{Checking with Anti-Joins} + \includegraphics[width=\textwidth]{anti.png} +\end{frame} +\begin{frame}{Checking with Anti-Joins} + \includegraphics[width=\textwidth]{anti.png} +\end{frame} +\begin{frame}{Filtering with semi-joins} + \includegraphics[width=\textwidth]{semi.png} +\end{frame} +\begin{frame}{Other non-equa joins} + \includegraphics[width=\textwidth]{gte} +\end{frame} + +% Slide 8 +\begin{frame}{Best Practices} +\begin{itemize} + \item Inspect keys before joining: \texttt{unique()} + \item Use \texttt{anti\_join()} to find unmatched rows + \item Validate results with \texttt{summary()} and \texttt{count()} +\end{itemize} +\end{frame} + +% Slide 9 +\begin{frame}{Class Exercise} +\begin{itemize} + \item Joins are essential for combining data. + \item Choose the right join based on your goal. + \item Always check for common issues before and after joining. +\end{itemize} +\end{frame} + +\end{document} diff --git a/Class8/Slides/Pipe_Example.png b/Class8/Slides/Pipe_Example.png new file mode 100644 index 0000000..ca919d2 Binary files /dev/null and b/Class8/Slides/Pipe_Example.png differ diff --git a/Class8/Slides/Pivot_Data.png b/Class8/Slides/Pivot_Data.png new file mode 100644 index 0000000..c15094c Binary files /dev/null and b/Class8/Slides/Pivot_Data.png differ diff --git a/Class8/Slides/Section_comments.png b/Class8/Slides/Section_comments.png new file mode 100644 index 0000000..2a31b06 Binary files /dev/null and b/Class8/Slides/Section_comments.png differ diff --git a/Class8/Slides/What_Comments.png b/Class8/Slides/What_Comments.png new file mode 100644 index 0000000..2ec77ea Binary files /dev/null and b/Class8/Slides/What_Comments.png differ diff --git a/Class8/Slides/While_Loops.png b/Class8/Slides/While_Loops.png new file mode 100644 index 0000000..303ca06 Binary files /dev/null and b/Class8/Slides/While_Loops.png differ diff --git a/Class8/Slides/While_Loops_Inf.png b/Class8/Slides/While_Loops_Inf.png new file mode 100644 index 0000000..9ca3174 Binary files /dev/null and b/Class8/Slides/While_Loops_Inf.png differ diff --git a/Class8/Slides/Why_Comments.png b/Class8/Slides/Why_Comments.png new file mode 100644 index 0000000..3dd7f82 Binary files /dev/null and b/Class8/Slides/Why_Comments.png differ diff --git a/Class8/Slides/anti.png b/Class8/Slides/anti.png new file mode 100644 index 0000000..1501157 Binary files /dev/null and b/Class8/Slides/anti.png differ diff --git a/Class8/Slides/apply_functions.png b/Class8/Slides/apply_functions.png new file mode 100644 index 0000000..b03ad4d Binary files /dev/null and b/Class8/Slides/apply_functions.png differ diff --git a/Class8/Slides/full.png b/Class8/Slides/full.png new file mode 100644 index 0000000..b0c63c1 Binary files /dev/null and b/Class8/Slides/full.png differ diff --git a/Class8/Slides/group_and Summarize.png b/Class8/Slides/group_and Summarize.png new file mode 100644 index 0000000..c31cfa5 Binary files /dev/null and b/Class8/Slides/group_and Summarize.png differ diff --git a/Class8/Slides/gte.png b/Class8/Slides/gte.png new file mode 100644 index 0000000..fdca916 Binary files /dev/null and b/Class8/Slides/gte.png differ diff --git a/Class8/Slides/left.png b/Class8/Slides/left.png new file mode 100644 index 0000000..4efb093 Binary files /dev/null and b/Class8/Slides/left.png differ diff --git a/Class8/Slides/match-types.png b/Class8/Slides/match-types.png new file mode 100644 index 0000000..1f9fe53 Binary files /dev/null and b/Class8/Slides/match-types.png differ diff --git a/Class8/Slides/pivot_longer.png b/Class8/Slides/pivot_longer.png new file mode 100644 index 0000000..72664ff Binary files /dev/null and b/Class8/Slides/pivot_longer.png differ diff --git a/Class8/Slides/right.png b/Class8/Slides/right.png new file mode 100644 index 0000000..5d8c6cd Binary files /dev/null and b/Class8/Slides/right.png differ diff --git a/Class8/Slides/sapply_loop.png b/Class8/Slides/sapply_loop.png new file mode 100644 index 0000000..33aea1e Binary files /dev/null and b/Class8/Slides/sapply_loop.png differ diff --git a/Class8/Slides/semi.png b/Class8/Slides/semi.png new file mode 100644 index 0000000..b76f211 Binary files /dev/null and b/Class8/Slides/semi.png differ diff --git a/Class8/Slides/venn.png b/Class8/Slides/venn.png new file mode 100644 index 0000000..c9d558f Binary files /dev/null and b/Class8/Slides/venn.png differ