Cleanup
This commit is contained in:
parent
0b5ec37716
commit
3a33a93592
@ -29,9 +29,9 @@
|
|||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\onslide<5->{\item \texttt{while(a==b)\{Code to run\}}}
|
\onslide<5->{\item \texttt{while(a==b)\{Code to run\}}}
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
\onslide<4->{apply functions }
|
\onslide<6->{\item apply functions }
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\onslide<5->{\item \texttt{while(a==b)\{Code to run\}}}
|
\onslide<7->{\item \texttt{while(a==b)\{Code to run\}}}
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
\end{enumerate}
|
\end{enumerate}
|
||||||
\end{frame}
|
\end{frame}
|
||||||
@ -43,7 +43,16 @@
|
|||||||
\only<4>{\includegraphics[width=\textwidth]{apply_functions.png}}
|
\only<4>{\includegraphics[width=\textwidth]{apply_functions.png}}
|
||||||
\only<5>{\includegraphics[width=\textwidth]{sapply_loop.png}}
|
\only<5>{\includegraphics[width=\textwidth]{sapply_loop.png}}
|
||||||
\end{frame}
|
\end{frame}
|
||||||
\begin{frame}{Introduction}
|
|
||||||
|
\begin{frame}{Class Exercise}
|
||||||
|
\begin{itemize}
|
||||||
|
\item Create a loop that downloads a set of files when given a list of URLs.
|
||||||
|
\item Name each file based on another list.
|
||||||
|
\item Repeat for each type of loop (for, while, apply)
|
||||||
|
\end{itemize}
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
|
\begin{frame}{Data transformation}
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item Data rarely comes in the form you need.
|
\item Data rarely comes in the form you need.
|
||||||
\item Transformation helps prepare data for analysis and visualization.
|
\item Transformation helps prepare data for analysis and visualization.
|
||||||
@ -66,14 +75,7 @@
|
|||||||
% Slide 3
|
% Slide 3
|
||||||
\begin{frame}{Using the Pipe Operator}
|
\begin{frame}{Using the Pipe Operator}
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
\item Pipe: \texttt{|>} passes output to next function
|
\item Pipe: \texttt{|>} or \texttt{\%>\%} passes output to next function
|
||||||
\item Example:
|
|
||||||
%\begin{verbatim}
|
|
||||||
%flights |>
|
|
||||||
% filter(dest == "IAH") |>
|
|
||||||
% group_by(year, month, day) |>
|
|
||||||
% summarize(arr_delay = mean(arr_delay, na.rm = TRUE))
|
|
||||||
% \end{verbatim}
|
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
\end{frame}
|
\end{frame}
|
||||||
|
|
||||||
@ -103,9 +105,14 @@
|
|||||||
\item Useful for aggregation and comparisons
|
\item Useful for aggregation and comparisons
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
\end{frame}
|
\end{frame}
|
||||||
|
\begin{frame}[plain]
|
||||||
|
\only<1>{\includegraphics[width=\textwidth]{Pipe_Example.png}}
|
||||||
|
\only<2>{\includegraphics[width=0.8\textwidth]{Filter_Example.png}}
|
||||||
|
\only<3>{\includegraphics[width=\textwidth]{group_and Summarize.png}}
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
% Slide 8
|
% Slide 8
|
||||||
\begin{frame}{Class Example}
|
\begin{frame}{Class Exercise}
|
||||||
|
|
||||||
\textbf{Example:} dataset to apply dplyr: \texttt{airquality}
|
\textbf{Example:} dataset to apply dplyr: \texttt{airquality}
|
||||||
\begin{itemize}
|
\begin{itemize}
|
||||||
@ -118,89 +125,4 @@
|
|||||||
\item Use pipes to complete all tasks in one line
|
\item Use pipes to complete all tasks in one line
|
||||||
\end{itemize}
|
\end{itemize}
|
||||||
\end{frame}
|
\end{frame}
|
||||||
\begin{frame}{Why Join Data?}
|
|
||||||
\begin{itemize}
|
|
||||||
\item Real-world data often comes in multiple tables.
|
|
||||||
\item Joins combine related data based on common keys.
|
|
||||||
\item \texttt{dplyr} provides intuitive functions for joining.
|
|
||||||
\end{itemize}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
% Slide 2
|
|
||||||
\begin{frame}{Types of Joins}
|
|
||||||
\begin{itemize}
|
|
||||||
\item \texttt{left\_join()} – keep all rows from left table
|
|
||||||
\item \texttt{right\_join()} – keep all rows from right table
|
|
||||||
\item \texttt{inner\_join()} – keep only matching rows
|
|
||||||
\item \texttt{full\_join()} – keep all rows from both tables
|
|
||||||
\end{itemize}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
% Slide 3
|
|
||||||
\begin{frame}{left\_join()}
|
|
||||||
|
|
||||||
\texttt{left\_join(df1, df2, by = "id")}
|
|
||||||
\begin{itemize}
|
|
||||||
\item Keeps all rows from the left table.
|
|
||||||
\item Adds matching rows from the right table.
|
|
||||||
\item Missing matches are filled with \texttt{NA}.
|
|
||||||
\end{itemize}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
% Slide 4
|
|
||||||
\begin{frame}{right\_join()}
|
|
||||||
|
|
||||||
\texttt{right\_join(df1, df2, by = "id")}
|
|
||||||
\begin{itemize}
|
|
||||||
\item Keeps all rows from the right table.
|
|
||||||
\item Adds matching rows from the left table.
|
|
||||||
\end{itemize}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
% Slide 5
|
|
||||||
\begin{frame}{inner\_join()}
|
|
||||||
\texttt{inner\_join(df1, df2, by = "id")}
|
|
||||||
\begin{itemize}
|
|
||||||
\item Keeps only rows with matching keys in both tables.
|
|
||||||
\item Most commonly used for filtering to shared data.
|
|
||||||
\end{itemize}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
% Slide 6
|
|
||||||
\begin{frame}{full\_join()}
|
|
||||||
\texttt{full\_join(df1, df2, by = "id")}
|
|
||||||
\begin{itemize}
|
|
||||||
\item Keeps all rows from both tables.
|
|
||||||
\item Missing matches are filled with \texttt{NA}.
|
|
||||||
\end{itemize}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
% Slide 7
|
|
||||||
\begin{frame}{Common Issues}
|
|
||||||
\begin{itemize}
|
|
||||||
\item Mismatched column names
|
|
||||||
\item Duplicate keys – can lead to unexpected row duplication
|
|
||||||
\item Data types must match, both keys should be character or numeric
|
|
||||||
\item Missing value joins will introduce \texttt{NA}s
|
|
||||||
\end{itemize}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
% Slide 8
|
|
||||||
\begin{frame}{Best Practices}
|
|
||||||
\begin{itemize}
|
|
||||||
\item Inspect keys before joining: \texttt{unique()}
|
|
||||||
\item Use \texttt{anti\_join()} to find unmatched rows
|
|
||||||
\item Validate results with \texttt{summary()} and \texttt{count()}
|
|
||||||
\end{itemize}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
% Slide 9
|
|
||||||
\begin{frame}{Class Exercise}
|
|
||||||
\begin{itemize}
|
|
||||||
\item Joins are essential for combining data.
|
|
||||||
\item Choose the right join based on your goal.
|
|
||||||
\item Always check for common issues before and after joining.
|
|
||||||
\end{itemize}
|
|
||||||
\end{frame}
|
|
||||||
|
|
||||||
\end{document}
|
\end{document}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user