\documentclass{beamer} \usepackage{graphicx} \usepackage{multicol} \usepackage{hyperref} \usepackage{lipsum} % for placeholder text \graphicspath{{pdf_images/}} \begin{document} %%%%%%%%%%%%%%%%%% \begin{frame}{Class Exercise} In this class we will be starting with the question \vspace{1em} ``Which states have oil production rates that are similar to Wyoming?'' \begin{itemize} \onslide<2->{\item Method applied: correlation matrix} \onslide<3->{\item Data needed: Oil production volumes for each US state over time} \onslide<4->{\item \href{https://www.eia.gov/dnav/pet/pet_crd_crpdn_adc_mbbl_m.htm}{Petroleum \& Other Liquids from the EIA} } \end{itemize} \end{frame} \begin{frame}{Loading Data} % Top half: two columns \vspace{-0.5em} \begin{columns}[T,onlytextwidth] \column{0.5\textwidth} \textbf{Functions to read data} \begin{enumerate} \item \texttt{read.csv("FilePath")} \item \texttt{read.delim("FilePath")} \item \texttt{read.table("FilePath")} \end{enumerate} \column{0.48\textwidth} \textbf{Important inputs} \begin{enumerate} \item \texttt{file} \item \texttt{header} \item \texttt{skip} \item \texttt{colnames} \end{enumerate} \end{columns} \vspace{1em} % Bottom half: full width \textbf{Notes} \begin{itemize} \item You can load data directly from a URL or point the file variable to the local computer. \item R uses \texttt{/} for file path, based on Unix standards. \item Enclose URLs or paths with single quotes when they include a space. \item You can see where R starts looking for a file (default working directory) with \texttt{getwd()}. \end{itemize} \end{frame} %%%%%%%%%%%%%% \begin{frame}{Other data loading options} Other libraries can be installed to load data in other ways. Keep these in mind for future projects. \begin{itemize} \item \texttt{read xls(x)}: library \texttt{read excel} \item \texttt{getSymbols}: library \texttt{quantmod} \item \texttt{fredr}: library \texttt{fredr} \item \texttt{vroom}: library \texttt{vroom} \item \texttt{read html}: library \texttt{rvest} \item \texttt{read csv}: library \texttt{tidyverse} \item \texttt{read sheet}: library \texttt{googlesheets4} \end{itemize} \vspace{1em} Almost any type of data can be loaded. Search the web for the right library. \end{frame} %%%%%%%%%%%%%%%%%% \begin{frame}{Class Exercise} \huge Download relevant data \normalsize It is good practice to store the data is a separate folder in the location of the Rscript. Such as a "Data" folder. \end{frame} %%%%%%%% \begin{frame}{Variables} \onslide<1->{ Variables store any type of data.\\ \vspace{1em} Can be a single values, a row of data, or a table of data. \vspace{1em} } \onslide<2->{ \begin{center} \includegraphics[width=0.3\textwidth]{image_16_1} \end{center} } \end{frame} %%%%%%%%%%%%%%%%%%%%% \begin{frame}{Variable Tips} \begin{itemize} \item Variables can be created with either \texttt{< -}, \texttt{- >}, or \texttt{=}. But arrows are best practice. \item You can view the names of all variables being used with \texttt{ls()}. \item You can delete a variable with \texttt{rm(variable)}. \item Variables can be re-assigned using themselves: \texttt{a = 1} then \texttt{a = a + 1} means \texttt{a = 2}. \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}{Data Frames} Think of data frames as a table, with rows and columns. The specific data in a data frame named \texttt{TBL} can be accessed with: \onslide<2->{ \begin{itemize} \onslide<2->{ \item \texttt{TBL[row\_num, column\_num]}} \onslide<3->{ \item \texttt{TBL[row\_num, "column\_name"]}} \onslide<4->{ \item \texttt{TBL\$column\_name[row\_num]}} \end{itemize} } \onslide<5->{ You can use these to assign values just as you would a variable.\\ \texttt{TBL[4,5] <- 7} will change the value of row 4, column 5 of the data frame to 7. } \onslide<5->{ \vspace{1em} Row or column names can be viewed or assigned with \texttt{colnames()} and \texttt{rownames()}. } \end{frame} %%%%%%%%%%%%%%%%%%%%%%% \begin{frame}{Vectors} % Slide 1 (Page 10) \only<1>{ Think of vectors as single rows of data.\\ Data frames are made up of many vectors put together. } % Slide 2 (Page 11) \only<2>{ You can pull out a vector from a data frame. To get a column you can use: \begin{itemize} \item \texttt{TBL\$column\_name} \item \texttt{TBL[, column\_num]} \item \texttt{TBL[, "column\_name"]} \end{itemize} Rows are typically not named, so to extract a row you can use: \begin{itemize} \item \texttt{TBL[row\_num, ]} \end{itemize} The \texttt{\$} extracts a column by name.\\ Use \texttt{colnames()} or \texttt{names()} to view or modify column names. } % Slide 3 (Page 13) % Slide 6 (Page 16) \only<3>{ \begin{center} \includegraphics[width=0.45\textwidth]{image_23_1} % Replace with actual image if available \end{center} } \only<4>{ A vector can be created in several ways. The most common is: \begin{itemize} \item \texttt{c(item1, item2, item3, ...)} \item \texttt{1:100} creates a sequence from 1 to 100 \item \texttt{seq(0, 100, by = 5)} creates a sequence from 0 to 100 by 5 \item \texttt{rep("a", 100)} creates a vector of "a" repeated 100 times \end{itemize} } % Slide 4 (Page 14) \only<5>{ These can be combined: \begin{itemize} \item \texttt{rep(1:10, 5)} creates a vector from 1 to 10 repeated 5 times (length 50) \item \texttt{c("a", rep(1:10, 5), "b")} starts with "a", repeats 1 to 10 five times, ends with "b" \end{itemize} } % Slide 5 (Page 15) \only<6>{ Like data frames, vector values can be accessed with \texttt{[ ]}, but vectors are one-dimensional. Example: \begin{itemize} \item \texttt{a = c(1, 3, 5.8, 7)} \item \texttt{a[2]} is 3 \item \texttt{a[3]} is 5.8 \item \texttt{a[2] * a[4]} is 21 \end{itemize} } \end{frame} %%%%%%%%%%%%%%%%%%%%% %%%%%%%% \begin{frame}{Class Exercise} \huge Review existing column names \end{frame} %%%%%%%%%%%%%%%% \begin{frame}{Manipulating Data Frames} \only<1-7>{ Some useful R functions to review and update data frames. \begin{itemize} \onslide<2->{\item{\texttt{ncol()}: Provides the number of columns}} \onslide<3->{\item{\texttt{nrow()}: Provides the number of rows}} \onslide<4->{\item{\texttt{length()}: For data frames, gives the number of columns; for matrices/vectors/lists, gives the number of entries}} \onslide<5->{\item{\texttt{rank()}: Provides an index score of a vector from smallest to largest}} \onslide<6->{\item{\texttt{order()}: Provides the index of a vector that goes from smallest to largest}} \onslide<7->{\item{\texttt{sort()}: Rearranges a vector from smallest to largest}} \end{itemize} } \only<8->{ \begin{center} \includegraphics[width=0.45\textwidth]{image_57_1} \end{center} } \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}{Validate Data Frames} \only<1-7>{ \onslide<1->{ After loading a data set, you need to look for errors. Common issues include: } \onslide<2->{ \begin{itemize} \onslide<2->{ \item Missing values (\texttt{NA})} \onslide<3->{ \item Wrong data type for the column (number, date, words)} \onslide<4->{ \item Numbers treated as words} \onslide<5->{ \item Values entered incorrectly} \onslide<6->{ \item Implausible data (too large or too small)} \end{itemize} } \onslide<7->{ What is likely to be an issue varies case by case.\\ Consider the unique issues for any data you are loading. } } \only<8->{ \onslide<8->{ Some useful functions to review data: \begin{itemize} \item \texttt{summary()}: Provides summary statistics for a data frame \item \texttt{head()}, \texttt{tail()}: Show the top or bottom of a data frame \item \texttt{plot()}: Visually plots a vector (can be pulled from a data frame) \item \texttt{hist()}: Provides a histogram of values in a vector \item \texttt{class()}: Lists the data type of any object (check each column) \end{itemize} } } \end{frame} %%%%%%%%%%%%%%%% \begin{frame}{Data Types} \begin{enumerate} \item \textbf{numeric} (All numbers) \item \textbf{integer} (Whole numbers) \item \textbf{character} (Letters and words) \item \textbf{logical} (True or False) \item \textbf{factor} (Categories) \item \textbf{date} (Time variables) \end{enumerate} \vspace{1em} You can convert one type to another with \texttt{as.type}.\\ For example: \begin{itemize} \item \texttt{as.numeric(1)} is \texttt{1.00} \item \texttt{as.integer(1.00)} is \texttt{1} \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%% % Numeric Slide \begin{frame}{Numeric} \onslide<1->{All numbers. Very flexible but can slow down calculations.} \vspace{1em} \onslide<2->{Examples: \begin{itemize} \item \texttt{1.001}, \texttt{Inf}, \texttt{-Inf}, \texttt{0}, \texttt{1}, \texttt{-1.08}, \texttt{100.999999} \end{itemize} } \vspace{1em} \onslide<3->{\textbf{Note:} \texttt{"1.001"} is not numeric — it's a character.} \end{frame} % Integer Slide \begin{frame}{Integer} \onslide<1->{All whole numbers.} \vspace{1em} \onslide<2->{Examples: \begin{itemize} \item \texttt{1}, \texttt{2}, \texttt{-100} \end{itemize} } \end{frame} % Logical Slide \begin{frame}{Logical} \onslide<1->{A true or false value. \vspace{1em} Displays as \texttt{TRUE}/\texttt{FALSE}, but in the background is an integer:} \onslide<2->{ \begin{itemize} \item \texttt{TRUE} = 1 \item \texttt{FALSE} = 0 \end{itemize} } \onslide<3->{Examples: \begin{itemize} \item \texttt{TRUE}, \texttt{FALSE}, \texttt{1}, \texttt{0} \end{itemize} } \end{frame} % Character Slide \begin{frame}{Character} \onslide<1->{All characters, treated as words.} \vspace{1em} \onslide<2->{For example, \texttt{1} can be a character if written as \texttt{'1'}. \begin{itemize} \item As an integer: \texttt{1 + 1 == 2} → \texttt{TRUE} \item As a character: \texttt{'1' + '1'} → error \end{itemize} } \onslide<3->{Examples: \begin{itemize} \item \texttt{m}, \texttt{M}, \texttt{' '}, \texttt{@}, \texttt{'1'} \end{itemize} } \end{frame} % Factor Slide \begin{frame}{Factor} \onslide<1->{Levels of an input. Looks like a word or character but is treated as a distinct group.} \vspace{1em} \onslide<2->{In the background, an integer is assigned to each group, but R knows not to perform math on an unordered factor.} \vspace{1em} \onslide<3->{\textbf{Unordered examples:} \begin{itemize} \item \texttt{female/male}, \texttt{Wyoming/Colorado/Nebraska} \end{itemize} } \onslide<5->{\textbf{Ordered examples:} \begin{itemize} \item \texttt{Low/Medium/High} \end{itemize} } \onslide<7->{ \vspace{1em} Useful for analysis where different groups respond to a policy differently.} \end{frame} % Dates Slide \begin{frame}{Dates} \onslide<1->{Dates are stored as an integer in the background of R. Each is the number of seconds since January 1, 1970.} \onslide<2->{\vspace{1em} \begin{center} \includegraphics[width=0.9\textwidth]{image_37_1} \end{center} \vspace{1em} Dates can be printed in any format. R knows to display the date in a human-readable format. } \end{frame} % Other Non-Data Types Slide \begin{frame}{Other Non-Data Types} \begin{itemize} \item \textbf{vector} \end{itemize} \begin{itemize} \item \textbf{data frame} \end{itemize} \begin{itemize} \item \textbf{matrix}: Like a data frame but must use the same data type \end{itemize} \begin{itemize} \item \textbf{list}: Like a vector but can store a data frame \end{itemize} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}{Data Conversion} \only<1>{ You can convert data types in R using the \texttt{as.type} functions. Here are key points to understand: \begin{itemize} \small \item \texttt{as.numeric('1')} converts the character \texttt{'1'} to the numeric \texttt{1.00}. \item \texttt{as.integer(1.00)} converts the numeric \texttt{1.00} to the integer \texttt{1}. \item \texttt{as.logical('1')} will fail to convert directly, but \texttt{as.logical(as.numeric('1'))} will return \texttt{TRUE}. \end{itemize} } \only<2->{ R makes educated guesses when converting types, but not all types are compatible. If a conversion fails, R will insert \texttt{NA} values. \vspace{1em} } \only<3->{When converting a data frame to a matrix using \texttt{as.matrix()}, all elements must be of the same type.} \vspace{1em} \only<4->{ For example, if a column contains the word \texttt{"zero"}, it will become \texttt{NA} in the matrix, not \texttt{0}.} \end{frame} %%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}{Working with Characters} \only<1-9>{ Key functions\: \begin{itemize} \onslide<2->{\item{\texttt{paste}: Combines two strings with spaces}} \onslide<3->{\item{\texttt{paste0}: Combines two strings without spaces}} \onslide<4->{\item{\texttt{grep}: Finds matching characters}} \onslide<5->{\item{\texttt{grepl}: Finds matching characters returns TRUE or FALSE}} \onslide<6->{\item{\texttt{toupper}, \texttt{tolower}: Converts the whole string to upper or lower case}} \onslide<7->{\item{\texttt{substr}: Selects only part of the string}} \onslide<8->{\item{\texttt{sub}: Replaces the first occurrence}} \onslide<9->{\item{\texttt{gsub}: Replaces all occurrences}} \end{itemize} } \only<10>{ \begin{center} \includegraphics[width=0.6\textwidth]{image_51_1} % Replace with actual image if available \end{center} } \end{frame} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{frame}{Using Logicals} \only<1>{ The logical data type can be used to conditionally perform calculations or to select data. \vspace{1em} Some important functions related to logicals include: \begin{itemize} \item \texttt{which}, \texttt{which.min}, \texttt{which.max} \item \texttt{is.na} \item \texttt{is.infinite} \item \texttt{is.nan} \item \texttt{==} (double equals) \item \texttt{>}, \texttt{<}, \texttt{>=}, \texttt{<=} \item \texttt{!} (logical NOT) \end{itemize} } \only<2->{ \begin{columns} \column{0.5\textwidth} \includegraphics[width=\textwidth]{image_41_1} \column{0.5\textwidth} \includegraphics[width=\textwidth]{image_41_2} \end{columns} } \end{frame} \begin{frame}{Class Exercise} \huge Correct all observed issues \normalsize \end{frame} \end{document}