Updating slides
8
Class7/Class_Examples/apply.r
Normal file
@ -0,0 +1,8 @@
|
||||
DAT <- 1:8
|
||||
?apply
|
||||
apply(DAT,mean)
|
||||
?mapply
|
||||
x <- as.data.frame(cbind(x1 = c(3,NA,12,7), x2 = c(4:1, 2:5)) )
|
||||
dimnames(x)[[1]] <- letters[1:8]
|
||||
apply(x, 2, mean, trim = .2,na.rm=TRUE)
|
||||
?mean
|
||||
BIN
Class7/Slides/For_Loops.png
Normal file
|
After Width: | Height: | Size: 9.6 KiB |
BIN
Class7/Slides/Function.png
Normal file
|
After Width: | Height: | Size: 51 KiB |
BIN
Class7/Slides/If.png
Normal file
|
After Width: | Height: | Size: 43 KiB |
206
Class7/Slides/Loops_Tidy.tex
Normal file
@ -0,0 +1,206 @@
|
||||
\documentclass{beamer}
|
||||
\usepackage{graphicx}
|
||||
\usepackage{multicol}
|
||||
\usepackage{hyperref}
|
||||
\usepackage{verbatim}
|
||||
\usepackage{lipsum} % for placeholder text
|
||||
\graphicspath{{pdf_images/}}
|
||||
|
||||
|
||||
\title{ECON 4530/5530 \\ Computational Economics}
|
||||
\subtitle{Data transformation and Loops}
|
||||
\author{Alex Gebben}
|
||||
|
||||
\begin{document}
|
||||
|
||||
% Title Slide
|
||||
\begin{frame}
|
||||
\titlepage
|
||||
\end{frame}
|
||||
%%%%%%%%%%%%%%%%%%
|
||||
\begin{frame}{Loops}
|
||||
\only<1->{There are three types of loops in R }
|
||||
\begin{enumerate}
|
||||
\onslide<2->{ \item{A \emph{for} loop runs for each value in a list}}
|
||||
\begin{itemize}
|
||||
\onslide<3->{\item \texttt{for(i in 1:10)\{Code to run\}}}
|
||||
\end{itemize}
|
||||
\onslide<4->{ \item{A \emph{While} loop runs if a condtion is metif a condtion is met.\emph{Can lead to infinite loops}}}
|
||||
\begin{itemize}
|
||||
\onslide<5->{\item \texttt{while(a==b)\{Code to run\}}}
|
||||
\end{itemize}
|
||||
\onslide<4->{apply functions }
|
||||
\begin{itemize}
|
||||
\onslide<5->{\item \texttt{while(a==b)\{Code to run\}}}
|
||||
\end{itemize}
|
||||
\end{enumerate}
|
||||
\end{frame}
|
||||
|
||||
\begin{frame}{Loop examples}
|
||||
\only<1>{\includegraphics[width=\textwidth]{For_Loops.png}}
|
||||
\only<2>{\includegraphics[width=\textwidth]{While_Loops.png}}
|
||||
\only<3>{\includegraphics[width=0.5\textwidth]{While_Loops_Inf.png}}
|
||||
\only<4>{\includegraphics[width=\textwidth]{apply_functions.png}}
|
||||
\only<5>{\includegraphics[width=\textwidth]{sapply_loop.png}}
|
||||
\end{frame}
|
||||
\begin{frame}{Introduction}
|
||||
\begin{itemize}
|
||||
\item Data rarely comes in the form you need.
|
||||
\item Transformation helps prepare data for analysis and visualization.
|
||||
\item We'll use the \texttt{dplyr} package from the tidyverse.
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% Slide 2
|
||||
\begin{frame}{Core dplyr Verbs}
|
||||
\begin{itemize}
|
||||
\item \texttt{filter()} – select rows based on conditions
|
||||
\item \texttt{arrange()} – reorder rows
|
||||
\item \texttt{select()} – choose columns
|
||||
\item \texttt{mutate()} – add new columns
|
||||
\item \texttt{summarize()} – reduce multiple values to one
|
||||
\item \texttt{group\_by()} – group data for summary
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% Slide 3
|
||||
\begin{frame}{Using the Pipe Operator}
|
||||
\begin{itemize}
|
||||
\item Pipe: \texttt{|>} passes output to next function
|
||||
\item Example:
|
||||
%\begin{verbatim}
|
||||
%flights |>
|
||||
% filter(dest == "IAH") |>
|
||||
% group_by(year, month, day) |>
|
||||
% summarize(arr_delay = mean(arr_delay, na.rm = TRUE))
|
||||
% \end{verbatim}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% Slide 4
|
||||
\begin{frame}{Working with Rows}
|
||||
\begin{itemize}
|
||||
\item \texttt{filter()} – keep rows meeting conditions
|
||||
\item \texttt{arrange()} – sort rows
|
||||
\item \texttt{distinct()} – remove duplicates
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% Slide 5
|
||||
\begin{frame}{Working with Columns}
|
||||
\begin{itemize}
|
||||
\item \texttt{select()} – choose columns
|
||||
\item \texttt{rename()} – rename columns
|
||||
\item \texttt{mutate()} – create new columns
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% Slide 6
|
||||
\begin{frame}{Grouped Operations}
|
||||
\begin{itemize}
|
||||
\item \texttt{group\_by()} – group data
|
||||
\item \texttt{summarize()} – compute summaries per group
|
||||
\item Useful for aggregation and comparisons
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% Slide 8
|
||||
\begin{frame}{Class Example}
|
||||
|
||||
\textbf{Example:} dataset to apply dplyr: \texttt{airquality}
|
||||
\begin{itemize}
|
||||
\item Convert to a tibble
|
||||
\item Remove any entries with NA values
|
||||
\item Remove wind speed outliers
|
||||
\item Calculate the average temperature in each month
|
||||
\item Find the number of observations in each month
|
||||
\item Find the max, min and standard deviation of Wind speed
|
||||
\item Use pipes to complete all tasks in one line
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
\begin{frame}{Why Join Data?}
|
||||
\begin{itemize}
|
||||
\item Real-world data often comes in multiple tables.
|
||||
\item Joins combine related data based on common keys.
|
||||
\item \texttt{dplyr} provides intuitive functions for joining.
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% Slide 2
|
||||
\begin{frame}{Types of Joins}
|
||||
\begin{itemize}
|
||||
\item \texttt{left\_join()} – keep all rows from left table
|
||||
\item \texttt{right\_join()} – keep all rows from right table
|
||||
\item \texttt{inner\_join()} – keep only matching rows
|
||||
\item \texttt{full\_join()} – keep all rows from both tables
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% Slide 3
|
||||
\begin{frame}{left\_join()}
|
||||
|
||||
\texttt{left\_join(df1, df2, by = "id")}
|
||||
\begin{itemize}
|
||||
\item Keeps all rows from the left table.
|
||||
\item Adds matching rows from the right table.
|
||||
\item Missing matches are filled with \texttt{NA}.
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% Slide 4
|
||||
\begin{frame}{right\_join()}
|
||||
|
||||
\texttt{right\_join(df1, df2, by = "id")}
|
||||
\begin{itemize}
|
||||
\item Keeps all rows from the right table.
|
||||
\item Adds matching rows from the left table.
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% Slide 5
|
||||
\begin{frame}{inner\_join()}
|
||||
\texttt{inner\_join(df1, df2, by = "id")}
|
||||
\begin{itemize}
|
||||
\item Keeps only rows with matching keys in both tables.
|
||||
\item Most commonly used for filtering to shared data.
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% Slide 6
|
||||
\begin{frame}{full\_join()}
|
||||
\texttt{full\_join(df1, df2, by = "id")}
|
||||
\begin{itemize}
|
||||
\item Keeps all rows from both tables.
|
||||
\item Missing matches are filled with \texttt{NA}.
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% Slide 7
|
||||
\begin{frame}{Common Issues}
|
||||
\begin{itemize}
|
||||
\item Mismatched column names
|
||||
\item Duplicate keys – can lead to unexpected row duplication
|
||||
\item Data types must match, both keys should be character or numeric
|
||||
\item Missing value joins will introduce \texttt{NA}s
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% Slide 8
|
||||
\begin{frame}{Best Practices}
|
||||
\begin{itemize}
|
||||
\item Inspect keys before joining: \texttt{unique()}
|
||||
\item Use \texttt{anti\_join()} to find unmatched rows
|
||||
\item Validate results with \texttt{summary()} and \texttt{count()}
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
% Slide 9
|
||||
\begin{frame}{Class Exercise}
|
||||
\begin{itemize}
|
||||
\item Joins are essential for combining data.
|
||||
\item Choose the right join based on your goal.
|
||||
\item Always check for common issues before and after joining.
|
||||
\end{itemize}
|
||||
\end{frame}
|
||||
|
||||
\end{document}
|
||||
BIN
Class7/Slides/Section_comments.png
Normal file
|
After Width: | Height: | Size: 11 KiB |
BIN
Class7/Slides/What_Comments.png
Normal file
|
After Width: | Height: | Size: 28 KiB |
BIN
Class7/Slides/While_Loops.png
Normal file
|
After Width: | Height: | Size: 12 KiB |
BIN
Class7/Slides/While_Loops_Inf.png
Normal file
|
After Width: | Height: | Size: 22 KiB |
BIN
Class7/Slides/Why_Comments.png
Normal file
|
After Width: | Height: | Size: 56 KiB |
BIN
Class7/Slides/apply_functions.png
Normal file
|
After Width: | Height: | Size: 14 KiB |
BIN
Class7/Slides/sapply_loop.png
Normal file
|
After Width: | Height: | Size: 15 KiB |