Cleanup
This commit is contained in:
parent
0b5ec37716
commit
3a33a93592
@ -29,9 +29,9 @@
|
||||
\begin{itemize}
|
||||
\onslide<5->{\item \texttt{while(a==b)\{Code to run\}}}
|
||||
\end{itemize}
|
||||
\onslide<4->{apply functions }
|
||||
\onslide<6->{\item apply functions }
|
||||
\begin{itemize}
|
||||
\onslide<5->{\item \texttt{while(a==b)\{Code to run\}}}
|
||||
\onslide<7->{\item \texttt{while(a==b)\{Code to run\}}}
|
||||
\end{itemize}
|
||||
\end{enumerate}
|
||||
\end{frame}
|
||||
@ -43,7 +43,16 @@
|
||||
\only<4>{\includegraphics[width=\textwidth]{apply_functions.png}}
|
||||
\only<5>{\includegraphics[width=\textwidth]{sapply_loop.png}}
|
||||
\end{frame}
|
||||
\begin{frame}{Introduction}
|
||||
|
||||
\begin{frame}{Class Exercise}
|
||||
\begin{itemize}
|
||||
\item Create a loop that downloads a set of files when given a list of URLs.
|
||||
\item Name each file based on another list.
|
||||
\item Repeat for each type of loop (for, while, apply)
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Data transformation}
|
||||
\begin{itemize}
|
||||
\item Data rarely comes in the form you need.
|
||||
\item Transformation helps prepare data for analysis and visualization.
|
||||
@ -66,14 +75,7 @@
|
||||
% Slide 3
|
||||
\begin{frame}{Using the Pipe Operator}
|
||||
\begin{itemize}
|
||||
\item Pipe: \texttt{|>} passes output to next function
|
||||
\item Example:
|
||||
%\begin{verbatim}
|
||||
%flights |>
|
||||
% filter(dest == "IAH") |>
|
||||
% group_by(year, month, day) |>
|
||||
% summarize(arr_delay = mean(arr_delay, na.rm = TRUE))
|
||||
% \end{verbatim}
|
||||
\item Pipe: \texttt{|>} or \texttt{\%>\%} passes output to next function
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
@ -103,9 +105,14 @@
|
||||
\item Useful for aggregation and comparisons
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
\begin{frame}[plain]
|
||||
\only<1>{\includegraphics[width=\textwidth]{Pipe_Example.png}}
|
||||
\only<2>{\includegraphics[width=0.8\textwidth]{Filter_Example.png}}
|
||||
\only<3>{\includegraphics[width=\textwidth]{group_and Summarize.png}}
|
||||
\end{frame}
|
||||
|
||||
% Slide 8
|
||||
\begin{frame}{Class Example}
|
||||
\begin{frame}{Class Exercise}
|
||||
|
||||
\textbf{Example:} dataset to apply dplyr: \texttt{airquality}
|
||||
\begin{itemize}
|
||||
@ -118,89 +125,4 @@
|
||||
\item Use pipes to complete all tasks in one line
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
\begin{frame}{Why Join Data?}
|
||||
\begin{itemize}
|
||||
\item Real-world data often comes in multiple tables.
|
||||
\item Joins combine related data based on common keys.
|
||||
\item \texttt{dplyr} provides intuitive functions for joining.
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% Slide 2
|
||||
\begin{frame}{Types of Joins}
|
||||
\begin{itemize}
|
||||
\item \texttt{left\_join()} – keep all rows from left table
|
||||
\item \texttt{right\_join()} – keep all rows from right table
|
||||
\item \texttt{inner\_join()} – keep only matching rows
|
||||
\item \texttt{full\_join()} – keep all rows from both tables
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% Slide 3
|
||||
\begin{frame}{left\_join()}
|
||||
|
||||
\texttt{left\_join(df1, df2, by = "id")}
|
||||
\begin{itemize}
|
||||
\item Keeps all rows from the left table.
|
||||
\item Adds matching rows from the right table.
|
||||
\item Missing matches are filled with \texttt{NA}.
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% Slide 4
|
||||
\begin{frame}{right\_join()}
|
||||
|
||||
\texttt{right\_join(df1, df2, by = "id")}
|
||||
\begin{itemize}
|
||||
\item Keeps all rows from the right table.
|
||||
\item Adds matching rows from the left table.
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% Slide 5
|
||||
\begin{frame}{inner\_join()}
|
||||
\texttt{inner\_join(df1, df2, by = "id")}
|
||||
\begin{itemize}
|
||||
\item Keeps only rows with matching keys in both tables.
|
||||
\item Most commonly used for filtering to shared data.
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% Slide 6
|
||||
\begin{frame}{full\_join()}
|
||||
\texttt{full\_join(df1, df2, by = "id")}
|
||||
\begin{itemize}
|
||||
\item Keeps all rows from both tables.
|
||||
\item Missing matches are filled with \texttt{NA}.
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% Slide 7
|
||||
\begin{frame}{Common Issues}
|
||||
\begin{itemize}
|
||||
\item Mismatched column names
|
||||
\item Duplicate keys – can lead to unexpected row duplication
|
||||
\item Data types must match, both keys should be character or numeric
|
||||
\item Missing value joins will introduce \texttt{NA}s
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% Slide 8
|
||||
\begin{frame}{Best Practices}
|
||||
\begin{itemize}
|
||||
\item Inspect keys before joining: \texttt{unique()}
|
||||
\item Use \texttt{anti\_join()} to find unmatched rows
|
||||
\item Validate results with \texttt{summary()} and \texttt{count()}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% Slide 9
|
||||
\begin{frame}{Class Exercise}
|
||||
\begin{itemize}
|
||||
\item Joins are essential for combining data.
|
||||
\item Choose the right join based on your goal.
|
||||
\item Always check for common issues before and after joining.
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\end{document}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user