2025-09-10 16:23:57 -06:00

207 lines
5.8 KiB
TeX
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

\documentclass{beamer}
\usepackage{graphicx}
\usepackage{multicol}
\usepackage{hyperref}
\usepackage{verbatim}
\usepackage{lipsum} % for placeholder text
\graphicspath{{pdf_images/}}
\title{ECON 4530/5530 \\ Computational Economics}
\subtitle{Data transformation and Loops}
\author{Alex Gebben}
\begin{document}
% Title Slide
\begin{frame}
\titlepage
\end{frame}
%%%%%%%%%%%%%%%%%%
\begin{frame}{Loops}
\only<1->{There are three types of loops in R }
\begin{enumerate}
\onslide<2->{ \item{A \emph{for} loop runs for each value in a list}}
\begin{itemize}
\onslide<3->{\item \texttt{for(i in 1:10)\{Code to run\}}}
\end{itemize}
\onslide<4->{ \item{A \emph{While} loop runs if a condtion is metif a condtion is met.\emph{Can lead to infinite loops}}}
\begin{itemize}
\onslide<5->{\item \texttt{while(a==b)\{Code to run\}}}
\end{itemize}
\onslide<4->{apply functions }
\begin{itemize}
\onslide<5->{\item \texttt{while(a==b)\{Code to run\}}}
\end{itemize}
\end{enumerate}
\end{frame}
\begin{frame}{Loop examples}
\only<1>{\includegraphics[width=\textwidth]{For_Loops.png}}
\only<2>{\includegraphics[width=\textwidth]{While_Loops.png}}
\only<3>{\includegraphics[width=0.5\textwidth]{While_Loops_Inf.png}}
\only<4>{\includegraphics[width=\textwidth]{apply_functions.png}}
\only<5>{\includegraphics[width=\textwidth]{sapply_loop.png}}
\end{frame}
\begin{frame}{Introduction}
\begin{itemize}
\item Data rarely comes in the form you need.
\item Transformation helps prepare data for analysis and visualization.
\item We'll use the \texttt{dplyr} package from the tidyverse.
\end{itemize}
\end{frame}
% Slide 2
\begin{frame}{Core dplyr Verbs}
\begin{itemize}
\item \texttt{filter()} select rows based on conditions
\item \texttt{arrange()} reorder rows
\item \texttt{select()} choose columns
\item \texttt{mutate()} add new columns
\item \texttt{summarize()} reduce multiple values to one
\item \texttt{group\_by()} group data for summary
\end{itemize}
\end{frame}
% Slide 3
\begin{frame}{Using the Pipe Operator}
\begin{itemize}
\item Pipe: \texttt{|>} passes output to next function
\item Example:
%\begin{verbatim}
%flights |>
% filter(dest == "IAH") |>
% group_by(year, month, day) |>
% summarize(arr_delay = mean(arr_delay, na.rm = TRUE))
% \end{verbatim}
\end{itemize}
\end{frame}
% Slide 4
\begin{frame}{Working with Rows}
\begin{itemize}
\item \texttt{filter()} keep rows meeting conditions
\item \texttt{arrange()} sort rows
\item \texttt{distinct()} remove duplicates
\end{itemize}
\end{frame}
% Slide 5
\begin{frame}{Working with Columns}
\begin{itemize}
\item \texttt{select()} choose columns
\item \texttt{rename()} rename columns
\item \texttt{mutate()} create new columns
\end{itemize}
\end{frame}
% Slide 6
\begin{frame}{Grouped Operations}
\begin{itemize}
\item \texttt{group\_by()} group data
\item \texttt{summarize()} compute summaries per group
\item Useful for aggregation and comparisons
\end{itemize}
\end{frame}
% Slide 8
\begin{frame}{Class Example}
\textbf{Example:} dataset to apply dplyr: \texttt{airquality}
\begin{itemize}
\item Convert to a tibble
\item Remove any entries with NA values
\item Remove wind speed outliers
\item Calculate the average temperature in each month
\item Find the number of observations in each month
\item Find the max, min and standard deviation of Wind speed
\item Use pipes to complete all tasks in one line
\end{itemize}
\end{frame}
\begin{frame}{Why Join Data?}
\begin{itemize}
\item Real-world data often comes in multiple tables.
\item Joins combine related data based on common keys.
\item \texttt{dplyr} provides intuitive functions for joining.
\end{itemize}
\end{frame}
% Slide 2
\begin{frame}{Types of Joins}
\begin{itemize}
\item \texttt{left\_join()} keep all rows from left table
\item \texttt{right\_join()} keep all rows from right table
\item \texttt{inner\_join()} keep only matching rows
\item \texttt{full\_join()} keep all rows from both tables
\end{itemize}
\end{frame}
% Slide 3
\begin{frame}{left\_join()}
\texttt{left\_join(df1, df2, by = "id")}
\begin{itemize}
\item Keeps all rows from the left table.
\item Adds matching rows from the right table.
\item Missing matches are filled with \texttt{NA}.
\end{itemize}
\end{frame}
% Slide 4
\begin{frame}{right\_join()}
\texttt{right\_join(df1, df2, by = "id")}
\begin{itemize}
\item Keeps all rows from the right table.
\item Adds matching rows from the left table.
\end{itemize}
\end{frame}
% Slide 5
\begin{frame}{inner\_join()}
\texttt{inner\_join(df1, df2, by = "id")}
\begin{itemize}
\item Keeps only rows with matching keys in both tables.
\item Most commonly used for filtering to shared data.
\end{itemize}
\end{frame}
% Slide 6
\begin{frame}{full\_join()}
\texttt{full\_join(df1, df2, by = "id")}
\begin{itemize}
\item Keeps all rows from both tables.
\item Missing matches are filled with \texttt{NA}.
\end{itemize}
\end{frame}
% Slide 7
\begin{frame}{Common Issues}
\begin{itemize}
\item Mismatched column names
\item Duplicate keys can lead to unexpected row duplication
\item Data types must match, both keys should be character or numeric
\item Missing value joins will introduce \texttt{NA}s
\end{itemize}
\end{frame}
% Slide 8
\begin{frame}{Best Practices}
\begin{itemize}
\item Inspect keys before joining: \texttt{unique()}
\item Use \texttt{anti\_join()} to find unmatched rows
\item Validate results with \texttt{summary()} and \texttt{count()}
\end{itemize}
\end{frame}
% Slide 9
\begin{frame}{Class Exercise}
\begin{itemize}
\item Joins are essential for combining data.
\item Choose the right join based on your goal.
\item Always check for common issues before and after joining.
\end{itemize}
\end{frame}
\end{document}