diff --git a/Class7/Class_Examples/apply.r b/Class7/Class_Examples/apply.r new file mode 100644 index 0000000..99d2e7c --- /dev/null +++ b/Class7/Class_Examples/apply.r @@ -0,0 +1,8 @@ +DAT <- 1:8 +?apply +apply(DAT,mean) +?mapply +x <- as.data.frame(cbind(x1 = c(3,NA,12,7), x2 = c(4:1, 2:5)) ) +dimnames(x)[[1]] <- letters[1:8] +apply(x, 2, mean, trim = .2,na.rm=TRUE) +?mean \ No newline at end of file diff --git a/Class7/Slides/For_Loops.png b/Class7/Slides/For_Loops.png new file mode 100644 index 0000000..5acb332 Binary files /dev/null and b/Class7/Slides/For_Loops.png differ diff --git a/Class7/Slides/Function.png b/Class7/Slides/Function.png new file mode 100644 index 0000000..4baf7df Binary files /dev/null and b/Class7/Slides/Function.png differ diff --git a/Class7/Slides/If.png b/Class7/Slides/If.png new file mode 100644 index 0000000..c545516 Binary files /dev/null and b/Class7/Slides/If.png differ diff --git a/Class7/Slides/Loops_Tidy.tex b/Class7/Slides/Loops_Tidy.tex new file mode 100644 index 0000000..15be558 --- /dev/null +++ b/Class7/Slides/Loops_Tidy.tex @@ -0,0 +1,206 @@ +\documentclass{beamer} +\usepackage{graphicx} +\usepackage{multicol} +\usepackage{hyperref} +\usepackage{verbatim} +\usepackage{lipsum} % for placeholder text +\graphicspath{{pdf_images/}} + + +\title{ECON 4530/5530 \\ Computational Economics} +\subtitle{Data transformation and Loops} +\author{Alex Gebben} + +\begin{document} + +% Title Slide +\begin{frame} + \titlepage +\end{frame} +%%%%%%%%%%%%%%%%%% +\begin{frame}{Loops} + \only<1->{There are three types of loops in R } + \begin{enumerate} + \onslide<2->{ \item{A \emph{for} loop runs for each value in a list}} + \begin{itemize} + \onslide<3->{\item \texttt{for(i in 1:10)\{Code to run\}}} + \end{itemize} + \onslide<4->{ \item{A \emph{While} loop runs if a condtion is metif a condtion is met.\emph{Can lead to infinite loops}}} + \begin{itemize} + \onslide<5->{\item \texttt{while(a==b)\{Code to run\}}} + \end{itemize} + \onslide<4->{apply functions } + \begin{itemize} + \onslide<5->{\item \texttt{while(a==b)\{Code to run\}}} + \end{itemize} + \end{enumerate} +\end{frame} + +\begin{frame}{Loop examples} + \only<1>{\includegraphics[width=\textwidth]{For_Loops.png}} + \only<2>{\includegraphics[width=\textwidth]{While_Loops.png}} + \only<3>{\includegraphics[width=0.5\textwidth]{While_Loops_Inf.png}} + \only<4>{\includegraphics[width=\textwidth]{apply_functions.png}} + \only<5>{\includegraphics[width=\textwidth]{sapply_loop.png}} +\end{frame} +\begin{frame}{Introduction} +\begin{itemize} + \item Data rarely comes in the form you need. + \item Transformation helps prepare data for analysis and visualization. + \item We'll use the \texttt{dplyr} package from the tidyverse. +\end{itemize} +\end{frame} + +% Slide 2 +\begin{frame}{Core dplyr Verbs} +\begin{itemize} + \item \texttt{filter()} – select rows based on conditions + \item \texttt{arrange()} – reorder rows + \item \texttt{select()} – choose columns + \item \texttt{mutate()} – add new columns + \item \texttt{summarize()} – reduce multiple values to one + \item \texttt{group\_by()} – group data for summary +\end{itemize} +\end{frame} + +% Slide 3 +\begin{frame}{Using the Pipe Operator} +\begin{itemize} + \item Pipe: \texttt{|>} passes output to next function + \item Example: + %\begin{verbatim} +%flights |> +% filter(dest == "IAH") |> +% group_by(year, month, day) |> +% summarize(arr_delay = mean(arr_delay, na.rm = TRUE)) +% \end{verbatim} +\end{itemize} +\end{frame} + +% Slide 4 +\begin{frame}{Working with Rows} +\begin{itemize} + \item \texttt{filter()} – keep rows meeting conditions + \item \texttt{arrange()} – sort rows + \item \texttt{distinct()} – remove duplicates +\end{itemize} +\end{frame} + +% Slide 5 +\begin{frame}{Working with Columns} +\begin{itemize} + \item \texttt{select()} – choose columns + \item \texttt{rename()} – rename columns + \item \texttt{mutate()} – create new columns +\end{itemize} +\end{frame} + +% Slide 6 +\begin{frame}{Grouped Operations} +\begin{itemize} + \item \texttt{group\_by()} – group data + \item \texttt{summarize()} – compute summaries per group + \item Useful for aggregation and comparisons +\end{itemize} +\end{frame} + +% Slide 8 +\begin{frame}{Class Example} + + \textbf{Example:} dataset to apply dplyr: \texttt{airquality} +\begin{itemize} + \item Convert to a tibble + \item Remove any entries with NA values + \item Remove wind speed outliers + \item Calculate the average temperature in each month + \item Find the number of observations in each month + \item Find the max, min and standard deviation of Wind speed + \item Use pipes to complete all tasks in one line +\end{itemize} +\end{frame} +\begin{frame}{Why Join Data?} +\begin{itemize} + \item Real-world data often comes in multiple tables. + \item Joins combine related data based on common keys. + \item \texttt{dplyr} provides intuitive functions for joining. +\end{itemize} +\end{frame} + +% Slide 2 +\begin{frame}{Types of Joins} +\begin{itemize} + \item \texttt{left\_join()} – keep all rows from left table + \item \texttt{right\_join()} – keep all rows from right table + \item \texttt{inner\_join()} – keep only matching rows + \item \texttt{full\_join()} – keep all rows from both tables +\end{itemize} +\end{frame} + +% Slide 3 +\begin{frame}{left\_join()} + +\texttt{left\_join(df1, df2, by = "id")} +\begin{itemize} + \item Keeps all rows from the left table. + \item Adds matching rows from the right table. + \item Missing matches are filled with \texttt{NA}. +\end{itemize} +\end{frame} + +% Slide 4 +\begin{frame}{right\_join()} + +\texttt{right\_join(df1, df2, by = "id")} +\begin{itemize} + \item Keeps all rows from the right table. + \item Adds matching rows from the left table. +\end{itemize} +\end{frame} + +% Slide 5 +\begin{frame}{inner\_join()} +\texttt{inner\_join(df1, df2, by = "id")} +\begin{itemize} + \item Keeps only rows with matching keys in both tables. + \item Most commonly used for filtering to shared data. +\end{itemize} +\end{frame} + +% Slide 6 +\begin{frame}{full\_join()} +\texttt{full\_join(df1, df2, by = "id")} +\begin{itemize} + \item Keeps all rows from both tables. + \item Missing matches are filled with \texttt{NA}. +\end{itemize} +\end{frame} + +% Slide 7 +\begin{frame}{Common Issues} +\begin{itemize} + \item Mismatched column names + \item Duplicate keys – can lead to unexpected row duplication + \item Data types must match, both keys should be character or numeric + \item Missing value joins will introduce \texttt{NA}s +\end{itemize} +\end{frame} + +% Slide 8 +\begin{frame}{Best Practices} +\begin{itemize} + \item Inspect keys before joining: \texttt{unique()} + \item Use \texttt{anti\_join()} to find unmatched rows + \item Validate results with \texttt{summary()} and \texttt{count()} +\end{itemize} +\end{frame} + +% Slide 9 +\begin{frame}{Class Exercise} +\begin{itemize} + \item Joins are essential for combining data. + \item Choose the right join based on your goal. + \item Always check for common issues before and after joining. +\end{itemize} +\end{frame} + +\end{document} diff --git a/Class7/Slides/Section_comments.png b/Class7/Slides/Section_comments.png new file mode 100644 index 0000000..2a31b06 Binary files /dev/null and b/Class7/Slides/Section_comments.png differ diff --git a/Class7/Slides/What_Comments.png b/Class7/Slides/What_Comments.png new file mode 100644 index 0000000..2ec77ea Binary files /dev/null and b/Class7/Slides/What_Comments.png differ diff --git a/Class7/Slides/While_Loops.png b/Class7/Slides/While_Loops.png new file mode 100644 index 0000000..303ca06 Binary files /dev/null and b/Class7/Slides/While_Loops.png differ diff --git a/Class7/Slides/While_Loops_Inf.png b/Class7/Slides/While_Loops_Inf.png new file mode 100644 index 0000000..9ca3174 Binary files /dev/null and b/Class7/Slides/While_Loops_Inf.png differ diff --git a/Class7/Slides/Why_Comments.png b/Class7/Slides/Why_Comments.png new file mode 100644 index 0000000..3dd7f82 Binary files /dev/null and b/Class7/Slides/Why_Comments.png differ diff --git a/Class7/Slides/apply_functions.png b/Class7/Slides/apply_functions.png new file mode 100644 index 0000000..b03ad4d Binary files /dev/null and b/Class7/Slides/apply_functions.png differ diff --git a/Class7/Slides/sapply_loop.png b/Class7/Slides/sapply_loop.png new file mode 100644 index 0000000..33aea1e Binary files /dev/null and b/Class7/Slides/sapply_loop.png differ