This commit is contained in:
Alex Gebben Work 2025-09-15 17:43:46 -06:00
parent 0b5ec37716
commit 3a33a93592

View File

@ -29,9 +29,9 @@
\begin{itemize} \begin{itemize}
\onslide<5->{\item \texttt{while(a==b)\{Code to run\}}} \onslide<5->{\item \texttt{while(a==b)\{Code to run\}}}
\end{itemize} \end{itemize}
\onslide<4->{apply functions } \onslide<6->{\item apply functions }
\begin{itemize} \begin{itemize}
\onslide<5->{\item \texttt{while(a==b)\{Code to run\}}} \onslide<7->{\item \texttt{while(a==b)\{Code to run\}}}
\end{itemize} \end{itemize}
\end{enumerate} \end{enumerate}
\end{frame} \end{frame}
@ -43,7 +43,16 @@
\only<4>{\includegraphics[width=\textwidth]{apply_functions.png}} \only<4>{\includegraphics[width=\textwidth]{apply_functions.png}}
\only<5>{\includegraphics[width=\textwidth]{sapply_loop.png}} \only<5>{\includegraphics[width=\textwidth]{sapply_loop.png}}
\end{frame} \end{frame}
\begin{frame}{Introduction}
\begin{frame}{Class Exercise}
\begin{itemize}
\item Create a loop that downloads a set of files when given a list of URLs.
\item Name each file based on another list.
\item Repeat for each type of loop (for, while, apply)
\end{itemize}
\end{frame}
\begin{frame}{Data transformation}
\begin{itemize} \begin{itemize}
\item Data rarely comes in the form you need. \item Data rarely comes in the form you need.
\item Transformation helps prepare data for analysis and visualization. \item Transformation helps prepare data for analysis and visualization.
@ -66,14 +75,7 @@
% Slide 3 % Slide 3
\begin{frame}{Using the Pipe Operator} \begin{frame}{Using the Pipe Operator}
\begin{itemize} \begin{itemize}
\item Pipe: \texttt{|>} passes output to next function \item Pipe: \texttt{|>} or \texttt{\%>\%} passes output to next function
\item Example:
%\begin{verbatim}
%flights |>
% filter(dest == "IAH") |>
% group_by(year, month, day) |>
% summarize(arr_delay = mean(arr_delay, na.rm = TRUE))
% \end{verbatim}
\end{itemize} \end{itemize}
\end{frame} \end{frame}
@ -103,9 +105,14 @@
\item Useful for aggregation and comparisons \item Useful for aggregation and comparisons
\end{itemize} \end{itemize}
\end{frame} \end{frame}
\begin{frame}[plain]
\only<1>{\includegraphics[width=\textwidth]{Pipe_Example.png}}
\only<2>{\includegraphics[width=0.8\textwidth]{Filter_Example.png}}
\only<3>{\includegraphics[width=\textwidth]{group_and Summarize.png}}
\end{frame}
% Slide 8 % Slide 8
\begin{frame}{Class Example} \begin{frame}{Class Exercise}
\textbf{Example:} dataset to apply dplyr: \texttt{airquality} \textbf{Example:} dataset to apply dplyr: \texttt{airquality}
\begin{itemize} \begin{itemize}
@ -118,89 +125,4 @@
\item Use pipes to complete all tasks in one line \item Use pipes to complete all tasks in one line
\end{itemize} \end{itemize}
\end{frame} \end{frame}
\begin{frame}{Why Join Data?}
\begin{itemize}
\item Real-world data often comes in multiple tables.
\item Joins combine related data based on common keys.
\item \texttt{dplyr} provides intuitive functions for joining.
\end{itemize}
\end{frame}
% Slide 2
\begin{frame}{Types of Joins}
\begin{itemize}
\item \texttt{left\_join()} keep all rows from left table
\item \texttt{right\_join()} keep all rows from right table
\item \texttt{inner\_join()} keep only matching rows
\item \texttt{full\_join()} keep all rows from both tables
\end{itemize}
\end{frame}
% Slide 3
\begin{frame}{left\_join()}
\texttt{left\_join(df1, df2, by = "id")}
\begin{itemize}
\item Keeps all rows from the left table.
\item Adds matching rows from the right table.
\item Missing matches are filled with \texttt{NA}.
\end{itemize}
\end{frame}
% Slide 4
\begin{frame}{right\_join()}
\texttt{right\_join(df1, df2, by = "id")}
\begin{itemize}
\item Keeps all rows from the right table.
\item Adds matching rows from the left table.
\end{itemize}
\end{frame}
% Slide 5
\begin{frame}{inner\_join()}
\texttt{inner\_join(df1, df2, by = "id")}
\begin{itemize}
\item Keeps only rows with matching keys in both tables.
\item Most commonly used for filtering to shared data.
\end{itemize}
\end{frame}
% Slide 6
\begin{frame}{full\_join()}
\texttt{full\_join(df1, df2, by = "id")}
\begin{itemize}
\item Keeps all rows from both tables.
\item Missing matches are filled with \texttt{NA}.
\end{itemize}
\end{frame}
% Slide 7
\begin{frame}{Common Issues}
\begin{itemize}
\item Mismatched column names
\item Duplicate keys can lead to unexpected row duplication
\item Data types must match, both keys should be character or numeric
\item Missing value joins will introduce \texttt{NA}s
\end{itemize}
\end{frame}
% Slide 8
\begin{frame}{Best Practices}
\begin{itemize}
\item Inspect keys before joining: \texttt{unique()}
\item Use \texttt{anti\_join()} to find unmatched rows
\item Validate results with \texttt{summary()} and \texttt{count()}
\end{itemize}
\end{frame}
% Slide 9
\begin{frame}{Class Exercise}
\begin{itemize}
\item Joins are essential for combining data.
\item Choose the right join based on your goal.
\item Always check for common issues before and after joining.
\end{itemize}
\end{frame}
\end{document} \end{document}